UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
forwarding. Only the header of head_skb from ip_finish_output_gso ->
skb_gso_segment is updated but following frag_skbs are not updated.
A call path skb_mac_gso_segment -> inet_gso_segment ->
udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
does not try to update UDP/IP header of the segment list.
Update dport, daddr and checksums of each skb of the segment list
after __udp_gso_segment.
Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
Signed-off-by: Dongseok Yi <[email protected]>
---
Steffen Klassert said, there could be 2 options.
https://lore.kernel.org/patchwork/patch/1362257/
I was trying to write a quick fix, but it was not easy to forward
segmented list. Currently, assuming DNAT only. Should we consider
SNAT too?
net/ipv4/udp_offload.c | 45 +++++++++++++++++++++++++++++++++++++++++----
1 file changed, 41 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ff39e94..7e24928 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -309,10 +309,12 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct sk_buff *seg;
unsigned int mss;
__wsum csum;
- struct udphdr *uh;
- struct iphdr *iph;
+ struct udphdr *uh, *uh2;
+ struct iphdr *iph, *iph2;
+ bool is_fraglist = false;
if (skb->encapsulation &&
(skb_shinfo(skb)->gso_type &
@@ -327,8 +329,43 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
- return __udp_gso_segment(skb, features);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ is_fraglist = true;
+
+ segs = __udp_gso_segment(skb, features);
+ if (IS_ERR_OR_NULL(segs) || !is_fraglist)
+ return segs;
+
+ seg = segs;
+ uh = udp_hdr(seg);
+ iph = ip_hdr(seg);
+
+ while ((seg = seg->next)) {
+ uh2 = udp_hdr(seg);
+ iph2 = ip_hdr(seg);
+
+ if (uh->dest == uh2->dest && iph->daddr == iph2->daddr)
+ continue;
+
+ if (uh2->check) {
+ inet_proto_csum_replace4(&uh2->check, seg,
+ iph2->daddr,
+ iph->daddr, true);
+ inet_proto_csum_replace2(&uh2->check, seg,
+ uh2->dest, uh->dest,
+ false);
+ if (!uh2->check)
+ uh2->check = CSUM_MANGLED_0;
+ }
+ uh2->dest = uh->dest;
+
+ csum_replace4(&iph2->check, iph2->daddr, iph->daddr);
+ iph2->daddr = iph->daddr;
+ }
+
+ return segs;
+ }
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
--
2.7.4
On Fri, Jan 15, 2021 at 02:58:24PM +0900, Dongseok Yi wrote:
> UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> forwarding. Only the header of head_skb from ip_finish_output_gso ->
> skb_gso_segment is updated but following frag_skbs are not updated.
>
> A call path skb_mac_gso_segment -> inet_gso_segment ->
> udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> does not try to update UDP/IP header of the segment list.
We still need to find out why it works for Alexander, but not for you.
Different usecases?
We copy only the MAC header in skb_segment_list(), so I think
this is a valid bug when NAT changed the UDP header.
>
> Update dport, daddr and checksums of each skb of the segment list
> after __udp_gso_segment.
>
> Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> Signed-off-by: Dongseok Yi <[email protected]>
> ---
> Steffen Klassert said, there could be 2 options.
> https://lore.kernel.org/patchwork/patch/1362257/
>
> I was trying to write a quick fix, but it was not easy to forward
> segmented list. Currently, assuming DNAT only. Should we consider
> SNAT too?
If it is broken, then it is broken for both, so yes.
>
> net/ipv4/udp_offload.c | 45 +++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 41 insertions(+), 4 deletions(-)
>
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index ff39e94..7e24928 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -309,10 +309,12 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
> netdev_features_t features)
> {
> struct sk_buff *segs = ERR_PTR(-EINVAL);
> + struct sk_buff *seg;
> unsigned int mss;
> __wsum csum;
> - struct udphdr *uh;
> - struct iphdr *iph;
> + struct udphdr *uh, *uh2;
> + struct iphdr *iph, *iph2;
> + bool is_fraglist = false;
>
> if (skb->encapsulation &&
> (skb_shinfo(skb)->gso_type &
> @@ -327,8 +329,43 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
> if (!pskb_may_pull(skb, sizeof(struct udphdr)))
> goto out;
>
> - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
> - return __udp_gso_segment(skb, features);
> + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
> + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
> + is_fraglist = true;
> +
> + segs = __udp_gso_segment(skb, features);
> + if (IS_ERR_OR_NULL(segs) || !is_fraglist)
> + return segs;
> +
> + seg = segs;
> + uh = udp_hdr(seg);
> + iph = ip_hdr(seg);
> +
> + while ((seg = seg->next)) {
> + uh2 = udp_hdr(seg);
> + iph2 = ip_hdr(seg);
> +
> + if (uh->dest == uh2->dest && iph->daddr == iph2->daddr)
> + continue;
> +
> + if (uh2->check) {
> + inet_proto_csum_replace4(&uh2->check, seg,
> + iph2->daddr,
> + iph->daddr, true);
> + inet_proto_csum_replace2(&uh2->check, seg,
> + uh2->dest, uh->dest,
> + false);
> + if (!uh2->check)
> + uh2->check = CSUM_MANGLED_0;
> + }
> + uh2->dest = uh->dest;
> +
> + csum_replace4(&iph2->check, iph2->daddr, iph->daddr);
> + iph2->daddr = iph->daddr;
> + }
> +
> + return segs;
> + }
I would not like to add this to a generic codepath. I think we can
relatively easy copy the full headers in skb_segment_list().
I think about something like the (completely untested) patch below:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f62cae3f75d8..63ae7f79fad7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3651,13 +3651,14 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
unsigned int offset)
{
struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
+ unsigned int doffset = skb->data - skb_mac_header(skb);
unsigned int tnl_hlen = skb_tnl_header_len(skb);
unsigned int delta_truesize = 0;
unsigned int delta_len = 0;
struct sk_buff *tail = NULL;
struct sk_buff *nskb;
- skb_push(skb, -skb_network_offset(skb) + offset);
+ skb_push(skb, doffset);
skb_shinfo(skb)->frag_list = NULL;
@@ -3675,7 +3676,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
delta_len += nskb->len;
delta_truesize += nskb->truesize;
- skb_push(nskb, -skb_network_offset(nskb) + offset);
+ skb_push(nskb, doffset);
skb_release_head_state(nskb);
__copy_skb_header(nskb, skb);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ff39e94781bf..1181398378b8 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -190,9 +190,22 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
netdev_features_t features)
{
+ struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
unsigned int mss = skb_shinfo(skb)->gso_size;
+ unsigned int offset;
- skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ skb_headers_offset_update(list_skb, skb_headroom(list_skb) - skb_headroom(skb));
+
+ /* Check for header changes and copy the full header in that case. */
+ if ((udp_hdr(skb)->dest == udp_hdr(list_skb)->dest) &&
+ (udp_hdr(skb)->source == udp_hdr(list_skb)->source) &&
+ (ip_hdr(skb)->daddr == ip_hdr(list_skb)->daddr) &&
+ (ip_hdr(skb)->saddr == ip_hdr(list_skb)->saddr))
+ offset = skb_mac_header_len(skb);
+ else
+ offset = skb->data - skb_mac_header(skb);
+
+ skb = skb_segment_list(skb, features, offset);
if (IS_ERR(skb))
return skb;
After that you can apply the CSUM magic in __udp_gso_segment_list().
On 2021-01-15 17:12, Steffen Klassert wrote:
> On Fri, Jan 15, 2021 at 02:58:24PM +0900, Dongseok Yi wrote:
> > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > skb_gso_segment is updated but following frag_skbs are not updated.
> >
> > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > does not try to update UDP/IP header of the segment list.
>
> We still need to find out why it works for Alexander, but not for you.
> Different usecases?
This patch is not for
https://lore.kernel.org/patchwork/patch/1364544/
Alexander might want to call udp_gro_receive_segment even when
!sk and ~NETIF_F_GRO_FRAGLIST.
>
> We copy only the MAC header in skb_segment_list(), so I think
> this is a valid bug when NAT changed the UDP header.
>
> >
> > Update dport, daddr and checksums of each skb of the segment list
> > after __udp_gso_segment.
> >
> > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > Signed-off-by: Dongseok Yi <[email protected]>
> > ---
> > Steffen Klassert said, there could be 2 options.
> > https://lore.kernel.org/patchwork/patch/1362257/
> >
> > I was trying to write a quick fix, but it was not easy to forward
> > segmented list. Currently, assuming DNAT only. Should we consider
> > SNAT too?
>
> If it is broken, then it is broken for both, so yes.
Okay.
>
> >
> > net/ipv4/udp_offload.c | 45 +++++++++++++++++++++++++++++++++++++++++----
> > 1 file changed, 41 insertions(+), 4 deletions(-)
> >
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index ff39e94..7e24928 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -309,10 +309,12 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
> > netdev_features_t features)
> > {
> > struct sk_buff *segs = ERR_PTR(-EINVAL);
> > + struct sk_buff *seg;
> > unsigned int mss;
> > __wsum csum;
> > - struct udphdr *uh;
> > - struct iphdr *iph;
> > + struct udphdr *uh, *uh2;
> > + struct iphdr *iph, *iph2;
> > + bool is_fraglist = false;
> >
> > if (skb->encapsulation &&
> > (skb_shinfo(skb)->gso_type &
> > @@ -327,8 +329,43 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
> > if (!pskb_may_pull(skb, sizeof(struct udphdr)))
> > goto out;
> >
> > - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
> > - return __udp_gso_segment(skb, features);
> > + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
> > + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
> > + is_fraglist = true;
> > +
> > + segs = __udp_gso_segment(skb, features);
> > + if (IS_ERR_OR_NULL(segs) || !is_fraglist)
> > + return segs;
> > +
> > + seg = segs;
> > + uh = udp_hdr(seg);
> > + iph = ip_hdr(seg);
> > +
> > + while ((seg = seg->next)) {
> > + uh2 = udp_hdr(seg);
> > + iph2 = ip_hdr(seg);
> > +
> > + if (uh->dest == uh2->dest && iph->daddr == iph2->daddr)
> > + continue;
> > +
> > + if (uh2->check) {
> > + inet_proto_csum_replace4(&uh2->check, seg,
> > + iph2->daddr,
> > + iph->daddr, true);
> > + inet_proto_csum_replace2(&uh2->check, seg,
> > + uh2->dest, uh->dest,
> > + false);
> > + if (!uh2->check)
> > + uh2->check = CSUM_MANGLED_0;
> > + }
> > + uh2->dest = uh->dest;
> > +
> > + csum_replace4(&iph2->check, iph2->daddr, iph->daddr);
> > + iph2->daddr = iph->daddr;
> > + }
> > +
> > + return segs;
> > + }
>
> I would not like to add this to a generic codepath. I think we can
> relatively easy copy the full headers in skb_segment_list().
I tried to copy the full headers with the similar approach, but it
copies length too. Can we keep the length of each skb of the fraglist?
>
> I think about something like the (completely untested) patch below:
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index f62cae3f75d8..63ae7f79fad7 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3651,13 +3651,14 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
> unsigned int offset)
> {
> struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> + unsigned int doffset = skb->data - skb_mac_header(skb);
> unsigned int tnl_hlen = skb_tnl_header_len(skb);
> unsigned int delta_truesize = 0;
> unsigned int delta_len = 0;
> struct sk_buff *tail = NULL;
> struct sk_buff *nskb;
>
> - skb_push(skb, -skb_network_offset(skb) + offset);
> + skb_push(skb, doffset);
>
> skb_shinfo(skb)->frag_list = NULL;
>
> @@ -3675,7 +3676,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
> delta_len += nskb->len;
> delta_truesize += nskb->truesize;
>
> - skb_push(nskb, -skb_network_offset(nskb) + offset);
> + skb_push(nskb, doffset);
>
> skb_release_head_state(nskb);
> __copy_skb_header(nskb, skb);
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index ff39e94781bf..1181398378b8 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -190,9 +190,22 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
> static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> netdev_features_t features)
> {
> + struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> unsigned int mss = skb_shinfo(skb)->gso_size;
> + unsigned int offset;
>
> - skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> + skb_headers_offset_update(list_skb, skb_headroom(list_skb) - skb_headroom(skb));
> +
> + /* Check for header changes and copy the full header in that case. */
> + if ((udp_hdr(skb)->dest == udp_hdr(list_skb)->dest) &&
> + (udp_hdr(skb)->source == udp_hdr(list_skb)->source) &&
> + (ip_hdr(skb)->daddr == ip_hdr(list_skb)->daddr) &&
> + (ip_hdr(skb)->saddr == ip_hdr(list_skb)->saddr))
> + offset = skb_mac_header_len(skb);
> + else
> + offset = skb->data - skb_mac_header(skb);
> +
> + skb = skb_segment_list(skb, features, offset);
> if (IS_ERR(skb))
> return skb;
>
>
> After that you can apply the CSUM magic in __udp_gso_segment_list().
Sorry, I don't know CSUM magic well. Is it used for checksum
incremental update too?
On Fri, Jan 15, 2021 at 05:55:22PM +0900, Dongseok Yi wrote:
> On 2021-01-15 17:12, Steffen Klassert wrote:
> > On Fri, Jan 15, 2021 at 02:58:24PM +0900, Dongseok Yi wrote:
> > > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > > skb_gso_segment is updated but following frag_skbs are not updated.
> > >
> > > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > > does not try to update UDP/IP header of the segment list.
> >
> > We still need to find out why it works for Alexander, but not for you.
> > Different usecases?
>
> This patch is not for
> https://lore.kernel.org/patchwork/patch/1364544/
> Alexander might want to call udp_gro_receive_segment even when
> !sk and ~NETIF_F_GRO_FRAGLIST.
Yes, I know. But he said that fraglist GRO + NAT works for him.
I want to find out why it works for him, but not for you.
> >
> > I would not like to add this to a generic codepath. I think we can
> > relatively easy copy the full headers in skb_segment_list().
>
> I tried to copy the full headers with the similar approach, but it
> copies length too. Can we keep the length of each skb of the fraglist?
Ah yes, good point.
Then maybe you can move your approach into __udp_gso_segment_list()
so that we dont touch generic code.
>
> >
> > I think about something like the (completely untested) patch below:
> >
> > diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> > index f62cae3f75d8..63ae7f79fad7 100644
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -3651,13 +3651,14 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
> > unsigned int offset)
> > {
> > struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> > + unsigned int doffset = skb->data - skb_mac_header(skb);
> > unsigned int tnl_hlen = skb_tnl_header_len(skb);
> > unsigned int delta_truesize = 0;
> > unsigned int delta_len = 0;
> > struct sk_buff *tail = NULL;
> > struct sk_buff *nskb;
> >
> > - skb_push(skb, -skb_network_offset(skb) + offset);
> > + skb_push(skb, doffset);
> >
> > skb_shinfo(skb)->frag_list = NULL;
> >
> > @@ -3675,7 +3676,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
> > delta_len += nskb->len;
> > delta_truesize += nskb->truesize;
> >
> > - skb_push(nskb, -skb_network_offset(nskb) + offset);
> > + skb_push(nskb, doffset);
> >
> > skb_release_head_state(nskb);
> > __copy_skb_header(nskb, skb);
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index ff39e94781bf..1181398378b8 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -190,9 +190,22 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
> > static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> > netdev_features_t features)
> > {
> > + struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> > unsigned int mss = skb_shinfo(skb)->gso_size;
> > + unsigned int offset;
> >
> > - skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> > + skb_headers_offset_update(list_skb, skb_headroom(list_skb) - skb_headroom(skb));
> > +
> > + /* Check for header changes and copy the full header in that case. */
> > + if ((udp_hdr(skb)->dest == udp_hdr(list_skb)->dest) &&
> > + (udp_hdr(skb)->source == udp_hdr(list_skb)->source) &&
> > + (ip_hdr(skb)->daddr == ip_hdr(list_skb)->daddr) &&
> > + (ip_hdr(skb)->saddr == ip_hdr(list_skb)->saddr))
> > + offset = skb_mac_header_len(skb);
> > + else
> > + offset = skb->data - skb_mac_header(skb);
> > +
> > + skb = skb_segment_list(skb, features, offset);
> > if (IS_ERR(skb))
> > return skb;
> >
> >
> > After that you can apply the CSUM magic in __udp_gso_segment_list().
>
> Sorry, I don't know CSUM magic well. Is it used for checksum
> incremental update too?
With that I meant the checksum updating you did in your patch.
From: Steffen Klassert <[email protected]>
Date: Fri, 15 Jan 2021 10:27:52 +0100
> On Fri, Jan 15, 2021 at 05:55:22PM +0900, Dongseok Yi wrote:
>> On 2021-01-15 17:12, Steffen Klassert wrote:
>>> On Fri, Jan 15, 2021 at 02:58:24PM +0900, Dongseok Yi wrote:
>>>> UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
>>>> forwarding. Only the header of head_skb from ip_finish_output_gso ->
>>>> skb_gso_segment is updated but following frag_skbs are not updated.
>>>>
>>>> A call path skb_mac_gso_segment -> inet_gso_segment ->
>>>> udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
>>>> does not try to update UDP/IP header of the segment list.
>>>
>>> We still need to find out why it works for Alexander, but not for you.
>>> Different usecases?
>>
>> This patch is not for
>> https://lore.kernel.org/patchwork/patch/1364544/
>> Alexander might want to call udp_gro_receive_segment even when
>> !sk and ~NETIF_F_GRO_FRAGLIST.
>
> Yes, I know. But he said that fraglist GRO + NAT works for him.
> I want to find out why it works for him, but not for you.
I found that it worked for me because I advertised fraglist GSO
support in my driver (and added actual support for xmitting
fraglists). If so, kernel won't resegment GSO into a list of
plain packets, so no __udp_gso_segment_list() will be called.
I think it will break if I disable fraglist GSO feature through
Ethtool, so I could test your patches.
>>>
>>> I would not like to add this to a generic codepath. I think we can
>>> relatively easy copy the full headers in skb_segment_list().
>>
>> I tried to copy the full headers with the similar approach, but it
>> copies length too. Can we keep the length of each skb of the fraglist?
>
> Ah yes, good point.
>
> Then maybe you can move your approach into __udp_gso_segment_list()
> so that we dont touch generic code.
>
>>
>>>
>>> I think about something like the (completely untested) patch below:
>>>
>>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>>> index f62cae3f75d8..63ae7f79fad7 100644
>>> --- a/net/core/skbuff.c
>>> +++ b/net/core/skbuff.c
>>> @@ -3651,13 +3651,14 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
>>> unsigned int offset)
>>> {
>>> struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
>>> + unsigned int doffset = skb->data - skb_mac_header(skb);
>>> unsigned int tnl_hlen = skb_tnl_header_len(skb);
>>> unsigned int delta_truesize = 0;
>>> unsigned int delta_len = 0;
>>> struct sk_buff *tail = NULL;
>>> struct sk_buff *nskb;
>>>
>>> - skb_push(skb, -skb_network_offset(skb) + offset);
>>> + skb_push(skb, doffset);
>>>
>>> skb_shinfo(skb)->frag_list = NULL;
>>>
>>> @@ -3675,7 +3676,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
>>> delta_len += nskb->len;
>>> delta_truesize += nskb->truesize;
>>>
>>> - skb_push(nskb, -skb_network_offset(nskb) + offset);
>>> + skb_push(nskb, doffset);
>>>
>>> skb_release_head_state(nskb);
>>> __copy_skb_header(nskb, skb);
>>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
>>> index ff39e94781bf..1181398378b8 100644
>>> --- a/net/ipv4/udp_offload.c
>>> +++ b/net/ipv4/udp_offload.c
>>> @@ -190,9 +190,22 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
>>> static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
>>> netdev_features_t features)
>>> {
>>> + struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
>>> unsigned int mss = skb_shinfo(skb)->gso_size;
>>> + unsigned int offset;
>>>
>>> - skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
>>> + skb_headers_offset_update(list_skb, skb_headroom(list_skb) - skb_headroom(skb));
>>> +
>>> + /* Check for header changes and copy the full header in that case. */
>>> + if ((udp_hdr(skb)->dest == udp_hdr(list_skb)->dest) &&
>>> + (udp_hdr(skb)->source == udp_hdr(list_skb)->source) &&
>>> + (ip_hdr(skb)->daddr == ip_hdr(list_skb)->daddr) &&
>>> + (ip_hdr(skb)->saddr == ip_hdr(list_skb)->saddr))
>>> + offset = skb_mac_header_len(skb);
>>> + else
>>> + offset = skb->data - skb_mac_header(skb);
>>> +
>>> + skb = skb_segment_list(skb, features, offset);
>>> if (IS_ERR(skb))
>>> return skb;
>>>
>>>
>>> After that you can apply the CSUM magic in __udp_gso_segment_list().
I'll test and let you know if it works. If doesn't, I think I'll be
able to get a working one based on this.
>> Sorry, I don't know CSUM magic well. Is it used for checksum
>> incremental update too?
>
> With that I meant the checksum updating you did in your patch.
Thanks,
Al
On 2021-01-15 19:51, Alexander Lobakin wrote:
> From: Steffen Klassert <[email protected]>
> Date: Fri, 15 Jan 2021 10:27:52 +0100
>
> > On Fri, Jan 15, 2021 at 05:55:22PM +0900, Dongseok Yi wrote:
> >> On 2021-01-15 17:12, Steffen Klassert wrote:
> >>> On Fri, Jan 15, 2021 at 02:58:24PM +0900, Dongseok Yi wrote:
> >>>> UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> >>>> forwarding. Only the header of head_skb from ip_finish_output_gso ->
> >>>> skb_gso_segment is updated but following frag_skbs are not updated.
> >>>>
> >>>> A call path skb_mac_gso_segment -> inet_gso_segment ->
> >>>> udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> >>>> does not try to update UDP/IP header of the segment list.
> >>>
> >>> We still need to find out why it works for Alexander, but not for you.
> >>> Different usecases?
> >>
> >> This patch is not for
> >> https://lore.kernel.org/patchwork/patch/1364544/
> >> Alexander might want to call udp_gro_receive_segment even when
> >> !sk and ~NETIF_F_GRO_FRAGLIST.
> >
> > Yes, I know. But he said that fraglist GRO + NAT works for him.
> > I want to find out why it works for him, but not for you.
>
> I found that it worked for me because I advertised fraglist GSO
> support in my driver (and added actual support for xmitting
> fraglists). If so, kernel won't resegment GSO into a list of
> plain packets, so no __udp_gso_segment_list() will be called.
>
> I think it will break if I disable fraglist GSO feature through
> Ethtool, so I could test your patches.
Thanks for the reply. In my case I enabled NETIF_F_GRO_FRAGLIST on
my driver. It expected that NAT done on each skb of the forwarded
fraglist.
>
> >>>
> >>> I would not like to add this to a generic codepath. I think we can
> >>> relatively easy copy the full headers in skb_segment_list().
> >>
> >> I tried to copy the full headers with the similar approach, but it
> >> copies length too. Can we keep the length of each skb of the fraglist?
> >
> > Ah yes, good point.
> >
> > Then maybe you can move your approach into __udp_gso_segment_list()
> > so that we dont touch generic code.
> >
Okay, I will move it into __udp_gso_segment_list() on v3.
> >>
> >>>
> >>> I think about something like the (completely untested) patch below:
> >>>
> >>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> >>> index f62cae3f75d8..63ae7f79fad7 100644
> >>> --- a/net/core/skbuff.c
> >>> +++ b/net/core/skbuff.c
> >>> @@ -3651,13 +3651,14 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
> >>> unsigned int offset)
> >>> {
> >>> struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> >>> + unsigned int doffset = skb->data - skb_mac_header(skb);
> >>> unsigned int tnl_hlen = skb_tnl_header_len(skb);
> >>> unsigned int delta_truesize = 0;
> >>> unsigned int delta_len = 0;
> >>> struct sk_buff *tail = NULL;
> >>> struct sk_buff *nskb;
> >>>
> >>> - skb_push(skb, -skb_network_offset(skb) + offset);
> >>> + skb_push(skb, doffset);
> >>>
> >>> skb_shinfo(skb)->frag_list = NULL;
> >>>
> >>> @@ -3675,7 +3676,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
> >>> delta_len += nskb->len;
> >>> delta_truesize += nskb->truesize;
> >>>
> >>> - skb_push(nskb, -skb_network_offset(nskb) + offset);
> >>> + skb_push(nskb, doffset);
> >>>
> >>> skb_release_head_state(nskb);
> >>> __copy_skb_header(nskb, skb);
> >>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> >>> index ff39e94781bf..1181398378b8 100644
> >>> --- a/net/ipv4/udp_offload.c
> >>> +++ b/net/ipv4/udp_offload.c
> >>> @@ -190,9 +190,22 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
> >>> static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> >>> netdev_features_t features)
> >>> {
> >>> + struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> >>> unsigned int mss = skb_shinfo(skb)->gso_size;
> >>> + unsigned int offset;
> >>>
> >>> - skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> >>> + skb_headers_offset_update(list_skb, skb_headroom(list_skb) - skb_headroom(skb));
> >>> +
> >>> + /* Check for header changes and copy the full header in that case. */
> >>> + if ((udp_hdr(skb)->dest == udp_hdr(list_skb)->dest) &&
> >>> + (udp_hdr(skb)->source == udp_hdr(list_skb)->source) &&
> >>> + (ip_hdr(skb)->daddr == ip_hdr(list_skb)->daddr) &&
> >>> + (ip_hdr(skb)->saddr == ip_hdr(list_skb)->saddr))
> >>> + offset = skb_mac_header_len(skb);
> >>> + else
> >>> + offset = skb->data - skb_mac_header(skb);
> >>> +
> >>> + skb = skb_segment_list(skb, features, offset);
> >>> if (IS_ERR(skb))
> >>> return skb;
> >>>
> >>>
> >>> After that you can apply the CSUM magic in __udp_gso_segment_list().
>
> I'll test and let you know if it works. If doesn't, I think I'll be
> able to get a working one based on this.
>
> >> Sorry, I don't know CSUM magic well. Is it used for checksum
> >> incremental update too?
> >
> > With that I meant the checksum updating you did in your patch.
Ah, I see.
>
> Thanks,
> Al
From: Dongseok Yi <[email protected]>
Date: Fri, 15 Jan 2021 20:21:06 +0900
> On 2021-01-15 19:51, Alexander Lobakin wrote:
>> From: Steffen Klassert <[email protected]>
>> Date: Fri, 15 Jan 2021 10:27:52 +0100
>>
>>> On Fri, Jan 15, 2021 at 05:55:22PM +0900, Dongseok Yi wrote:
>>>> On 2021-01-15 17:12, Steffen Klassert wrote:
>>>>> On Fri, Jan 15, 2021 at 02:58:24PM +0900, Dongseok Yi wrote:
>>>>>> UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
>>>>>> forwarding. Only the header of head_skb from ip_finish_output_gso ->
>>>>>> skb_gso_segment is updated but following frag_skbs are not updated.
>>>>>>
>>>>>> A call path skb_mac_gso_segment -> inet_gso_segment ->
>>>>>> udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
>>>>>> does not try to update UDP/IP header of the segment list.
>>>>>
>>>>> We still need to find out why it works for Alexander, but not for you.
>>>>> Different usecases?
>>>>
>>>> This patch is not for
>>>> https://lore.kernel.org/patchwork/patch/1364544/
>>>> Alexander might want to call udp_gro_receive_segment even when
>>>> !sk and ~NETIF_F_GRO_FRAGLIST.
>>>
>>> Yes, I know. But he said that fraglist GRO + NAT works for him.
>>> I want to find out why it works for him, but not for you.
>>
>> I found that it worked for me because I advertised fraglist GSO
>> support in my driver (and added actual support for xmitting
>> fraglists). If so, kernel won't resegment GSO into a list of
>> plain packets, so no __udp_gso_segment_list() will be called.
>>
>> I think it will break if I disable fraglist GSO feature through
>> Ethtool, so I could test your patches.
>
> Thanks for the reply. In my case I enabled NETIF_F_GRO_FRAGLIST on
> my driver. It expected that NAT done on each skb of the forwarded
> fraglist.
>
>>
>>>>>
>>>>> I would not like to add this to a generic codepath. I think we can
>>>>> relatively easy copy the full headers in skb_segment_list().
>>>>
>>>> I tried to copy the full headers with the similar approach, but it
>>>> copies length too. Can we keep the length of each skb of the fraglist?
>>>
>>> Ah yes, good point.
>>>
>>> Then maybe you can move your approach into __udp_gso_segment_list()
>>> so that we dont touch generic code.
>>>
>
> Okay, I will move it into __udp_gso_segment_list() on v3.
I'll take this one. We also need to cover cases with altered headroom
size, e.g. when the NIC is behind the switch and sends packets with
CPU tags that is being added prior to resegmentation.
The base suggested by Steffen in good enough, just needs a bit more
handling. I'll publish a final fix soon.
>>>>
>>>>>
>>>>> I think about something like the (completely untested) patch below:
>>>>>
>>>>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>>>>> index f62cae3f75d8..63ae7f79fad7 100644
>>>>> --- a/net/core/skbuff.c
>>>>> +++ b/net/core/skbuff.c
>>>>> @@ -3651,13 +3651,14 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
>>>>> unsigned int offset)
>>>>> {
>>>>> struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
>>>>> + unsigned int doffset = skb->data - skb_mac_header(skb);
>>>>> unsigned int tnl_hlen = skb_tnl_header_len(skb);
>>>>> unsigned int delta_truesize = 0;
>>>>> unsigned int delta_len = 0;
>>>>> struct sk_buff *tail = NULL;
>>>>> struct sk_buff *nskb;
>>>>>
>>>>> - skb_push(skb, -skb_network_offset(skb) + offset);
>>>>> + skb_push(skb, doffset);
>>>>>
>>>>> skb_shinfo(skb)->frag_list = NULL;
>>>>>
>>>>> @@ -3675,7 +3676,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
>>>>> delta_len += nskb->len;
>>>>> delta_truesize += nskb->truesize;
>>>>>
>>>>> - skb_push(nskb, -skb_network_offset(nskb) + offset);
>>>>> + skb_push(nskb, doffset);
>>>>>
>>>>> skb_release_head_state(nskb);
>>>>> __copy_skb_header(nskb, skb);
>>>>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
>>>>> index ff39e94781bf..1181398378b8 100644
>>>>> --- a/net/ipv4/udp_offload.c
>>>>> +++ b/net/ipv4/udp_offload.c
>>>>> @@ -190,9 +190,22 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
>>>>> static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
>>>>> netdev_features_t features)
>>>>> {
>>>>> + struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
>>>>> unsigned int mss = skb_shinfo(skb)->gso_size;
>>>>> + unsigned int offset;
>>>>>
>>>>> - skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
>>>>> + skb_headers_offset_update(list_skb, skb_headroom(list_skb) - skb_headroom(skb));
>>>>> +
>>>>> + /* Check for header changes and copy the full header in that case. */
>>>>> + if ((udp_hdr(skb)->dest == udp_hdr(list_skb)->dest) &&
>>>>> + (udp_hdr(skb)->source == udp_hdr(list_skb)->source) &&
>>>>> + (ip_hdr(skb)->daddr == ip_hdr(list_skb)->daddr) &&
>>>>> + (ip_hdr(skb)->saddr == ip_hdr(list_skb)->saddr))
>>>>> + offset = skb_mac_header_len(skb);
>>>>> + else
>>>>> + offset = skb->data - skb_mac_header(skb);
>>>>> +
>>>>> + skb = skb_segment_list(skb, features, offset);
>>>>> if (IS_ERR(skb))
>>>>> return skb;
>>>>>
>>>>>
>>>>> After that you can apply the CSUM magic in __udp_gso_segment_list().
>>
>> I'll test and let you know if it works. If doesn't, I think I'll be
>> able to get a working one based on this.
>>
>>>> Sorry, I don't know CSUM magic well. Is it used for checksum
>>>> incremental update too?
>>>
>>> With that I meant the checksum updating you did in your patch.
>
> Ah, I see.
>
>>
>> Thanks,
>> Al
Al