2024-04-26 06:52:28

by Felix Fietkau

[permalink] [raw]
Subject: [PATCH v3 net-next v3 2/6] net: add support for segmenting TCP fraglist GSO packets

Preparation for adding TCP fraglist GRO support. It expects packets to be
combined in a similar way as UDP fraglist GSO packets.
For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO.

Signed-off-by: Felix Fietkau <[email protected]>
---
net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++
net/ipv6/tcpv6_offload.c | 3 ++
2 files changed, 68 insertions(+)

diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index fab0973f995b..c493e95e09a5 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
}
}

+static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
+ __be32 *oldip, __be32 *newip,
+ __be16 *oldport, __be16 *newport)
+{
+ struct tcphdr *th;
+ struct iphdr *iph;
+
+ if (*oldip == *newip && *oldport == *newport)
+ return;
+
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+
+ inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true);
+ inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false);
+ *oldport = *newport;
+
+ csum_replace4(&iph->check, *oldip, *newip);
+ *oldip = *newip;
+}
+
+static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+ struct sk_buff *seg;
+ struct tcphdr *th, *th2;
+ struct iphdr *iph, *iph2;
+
+ seg = segs;
+ th = tcp_hdr(seg);
+ iph = ip_hdr(seg);
+ th2 = tcp_hdr(seg->next);
+ iph2 = ip_hdr(seg->next);
+
+ if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) &&
+ iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
+ return segs;
+
+ while ((seg = seg->next)) {
+ th2 = tcp_hdr(seg);
+ iph2 = ip_hdr(seg);
+
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->saddr, &iph->saddr,
+ &th2->source, &th->source);
+ __tcpv4_gso_segment_csum(seg,
+ &iph2->daddr, &iph->daddr,
+ &th2->dest, &th->dest);
+ }
+
+ return segs;
+}
+
+static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ return __tcpv4_gso_segment_list_csum(skb);
+}
+
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -37,6 +99,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);

+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __tcp4_gso_segment_list(skb, features);
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 4b07d1e6c952..b3b8e1f6b92a 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -51,6 +51,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);

+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+ return skb_segment_list(skb, features, skb_mac_header_len(skb));
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
--
2.44.0



2024-04-26 07:45:07

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH v3 net-next v3 2/6] net: add support for segmenting TCP fraglist GSO packets

On Fri, Apr 26, 2024 at 8:51 AM Felix Fietkau <[email protected]> wrote:
>
> Preparation for adding TCP fraglist GRO support. It expects packets to be
> combined in a similar way as UDP fraglist GSO packets.
> For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO.
>
> Signed-off-by: Felix Fietkau <[email protected]>
> ---
> net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++
> net/ipv6/tcpv6_offload.c | 3 ++
> 2 files changed, 68 insertions(+)
>
> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
> index fab0973f995b..c493e95e09a5 100644
> --- a/net/ipv4/tcp_offload.c
> +++ b/net/ipv4/tcp_offload.c
> @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
> }
> }
>
> +static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
> + __be32 *oldip, __be32 *newip,
> + __be16 *oldport, __be16 *newport)


Do we really need pointers for newip and newport ?

> +{
> + struct tcphdr *th;
> + struct iphdr *iph;
> +
> + if (*oldip == *newip && *oldport == *newport)
> + return;
> +
> + th = tcp_hdr(seg);
> + iph = ip_hdr(seg);
> +
> + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true);
> + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false);
> + *oldport = *newport;
> +
> + csum_replace4(&iph->check, *oldip, *newip);
> + *oldip = *newip;
> +}
> +
> +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
> +{
> + struct sk_buff *seg;
> + struct tcphdr *th, *th2;
> + struct iphdr *iph, *iph2;

I would probably add a const qualifier to th and iph

> +
> + seg = segs;
> + th = tcp_hdr(seg);
> + iph = ip_hdr(seg);
> + th2 = tcp_hdr(seg->next);
> + iph2 = ip_hdr(seg->next);
> +
> + if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) &&


> + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
> + return segs;
> +
> + while ((seg = seg->next)) {
> + th2 = tcp_hdr(seg);
> + iph2 = ip_hdr(seg);
> +
> + __tcpv4_gso_segment_csum(seg,
> + &iph2->saddr, &iph->saddr,
> + &th2->source, &th->source);
> + __tcpv4_gso_segment_csum(seg,
> + &iph2->daddr, &iph->daddr,
> + &th2->dest, &th->dest);
> + }
> +
> + return segs;
> +}
>

2024-04-26 08:29:16

by Paolo Abeni

[permalink] [raw]
Subject: Re: [PATCH v3 net-next v3 2/6] net: add support for segmenting TCP fraglist GSO packets

On Fri, 2024-04-26 at 08:51 +0200, Felix Fietkau wrote:
> Preparation for adding TCP fraglist GRO support. It expects packets to be
> combined in a similar way as UDP fraglist GSO packets.
> For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO.
>
> Signed-off-by: Felix Fietkau <[email protected]>
> ---
> net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++
> net/ipv6/tcpv6_offload.c | 3 ++
> 2 files changed, 68 insertions(+)
>
> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
> index fab0973f995b..c493e95e09a5 100644
> --- a/net/ipv4/tcp_offload.c
> +++ b/net/ipv4/tcp_offload.c
> @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
> }
> }
>
> +static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
> + __be32 *oldip, __be32 *newip,
> + __be16 *oldport, __be16 *newport)
> +{
> + struct tcphdr *th;
> + struct iphdr *iph;
> +
> + if (*oldip == *newip && *oldport == *newport)
> + return;
> +
> + th = tcp_hdr(seg);
> + iph = ip_hdr(seg);
> +
> + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true);
> + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false);
> + *oldport = *newport;
> +
> + csum_replace4(&iph->check, *oldip, *newip);
> + *oldip = *newip;
> +}
> +
> +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
> +{
> + struct sk_buff *seg;
> + struct tcphdr *th, *th2;
> + struct iphdr *iph, *iph2;
> +
> + seg = segs;
> + th = tcp_hdr(seg);
> + iph = ip_hdr(seg);
> + th2 = tcp_hdr(seg->next);
> + iph2 = ip_hdr(seg->next);
> +
> + if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) &&
> + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
> + return segs;

As mentioned in previous revisions, I think a problem with this
approach is that the stack could make other changes to the TCP header
after the GRO stage, that are unnoticed here and could cause csum
corruption, if the egress device does not recompute the packet csum.

Cheers,

Paolo


2024-04-26 09:28:54

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH v3 net-next v3 2/6] net: add support for segmenting TCP fraglist GSO packets

On 26.04.24 09:44, Eric Dumazet wrote:
> On Fri, Apr 26, 2024 at 8:51 AM Felix Fietkau <[email protected]> wrote:
>>
>> Preparation for adding TCP fraglist GRO support. It expects packets to be
>> combined in a similar way as UDP fraglist GSO packets.
>> For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO.
>>
>> Signed-off-by: Felix Fietkau <[email protected]>
>> ---
>> net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++
>> net/ipv6/tcpv6_offload.c | 3 ++
>> 2 files changed, 68 insertions(+)
>>
>> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
>> index fab0973f995b..c493e95e09a5 100644
>> --- a/net/ipv4/tcp_offload.c
>> +++ b/net/ipv4/tcp_offload.c
>> @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
>> }
>> }
>>
>> +static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
>> + __be32 *oldip, __be32 *newip,
>> + __be16 *oldport, __be16 *newport)
>
>
> Do we really need pointers for newip and newport ?
>
>> +{
>> + struct tcphdr *th;
>> + struct iphdr *iph;
>> +
>> + if (*oldip == *newip && *oldport == *newport)
>> + return;
>> +
>> + th = tcp_hdr(seg);
>> + iph = ip_hdr(seg);
>> +
>> + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true);
>> + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false);
>> + *oldport = *newport;
>> +
>> + csum_replace4(&iph->check, *oldip, *newip);
>> + *oldip = *newip;
>> +}
>> +
>> +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
>> +{
>> + struct sk_buff *seg;
>> + struct tcphdr *th, *th2;
>> + struct iphdr *iph, *iph2;
>
> I would probably add a const qualifier to th and iph

Will do, thanks.

- Felix


2024-04-26 09:39:53

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH v3 net-next v3 2/6] net: add support for segmenting TCP fraglist GSO packets

On 26.04.24 10:28, Paolo Abeni wrote:
> On Fri, 2024-04-26 at 08:51 +0200, Felix Fietkau wrote:
>> Preparation for adding TCP fraglist GRO support. It expects packets to be
>> combined in a similar way as UDP fraglist GSO packets.
>> For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO.
>>
>> Signed-off-by: Felix Fietkau <[email protected]>
>> ---
>> net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++
>> net/ipv6/tcpv6_offload.c | 3 ++
>> 2 files changed, 68 insertions(+)
>>
>> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
>> index fab0973f995b..c493e95e09a5 100644
>> --- a/net/ipv4/tcp_offload.c
>> +++ b/net/ipv4/tcp_offload.c
>> @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
>> }
>> }
>>
>> +static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
>> + __be32 *oldip, __be32 *newip,
>> + __be16 *oldport, __be16 *newport)
>> +{
>> + struct tcphdr *th;
>> + struct iphdr *iph;
>> +
>> + if (*oldip == *newip && *oldport == *newport)
>> + return;
>> +
>> + th = tcp_hdr(seg);
>> + iph = ip_hdr(seg);
>> +
>> + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true);
>> + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false);
>> + *oldport = *newport;
>> +
>> + csum_replace4(&iph->check, *oldip, *newip);
>> + *oldip = *newip;
>> +}
>> +
>> +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
>> +{
>> + struct sk_buff *seg;
>> + struct tcphdr *th, *th2;
>> + struct iphdr *iph, *iph2;
>> +
>> + seg = segs;
>> + th = tcp_hdr(seg);
>> + iph = ip_hdr(seg);
>> + th2 = tcp_hdr(seg->next);
>> + iph2 = ip_hdr(seg->next);
>> +
>> + if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) &&
>> + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
>> + return segs;
>
> As mentioned in previous revisions, I think a problem with this
> approach is that the stack could make other changes to the TCP header
> after the GRO stage, that are unnoticed here and could cause csum
> corruption, if the egress device does not recompute the packet csum.

On segmentation, each packet keeps its original TCP header and csum. If
the stack makes changes, they apply to the first packet only. I don't
see how we could get csum corruption.

- Felix

2024-04-26 10:40:55

by Paolo Abeni

[permalink] [raw]
Subject: Re: [PATCH v3 net-next v3 2/6] net: add support for segmenting TCP fraglist GSO packets

On Fri, 2024-04-26 at 11:39 +0200, Felix Fietkau wrote:
> On 26.04.24 10:28, Paolo Abeni wrote:
> > On Fri, 2024-04-26 at 08:51 +0200, Felix Fietkau wrote:
> > > Preparation for adding TCP fraglist GRO support. It expects packets to be
> > > combined in a similar way as UDP fraglist GSO packets.
> > > For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO.
> > >
> > > Signed-off-by: Felix Fietkau <[email protected]>
> > > ---
> > > net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++
> > > net/ipv6/tcpv6_offload.c | 3 ++
> > > 2 files changed, 68 insertions(+)
> > >
> > > diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
> > > index fab0973f995b..c493e95e09a5 100644
> > > --- a/net/ipv4/tcp_offload.c
> > > +++ b/net/ipv4/tcp_offload.c
> > > @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
> > > }
> > > }
> > >
> > > +static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
> > > + __be32 *oldip, __be32 *newip,
> > > + __be16 *oldport, __be16 *newport)
> > > +{
> > > + struct tcphdr *th;
> > > + struct iphdr *iph;
> > > +
> > > + if (*oldip == *newip && *oldport == *newport)
> > > + return;
> > > +
> > > + th = tcp_hdr(seg);
> > > + iph = ip_hdr(seg);
> > > +
> > > + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true);
> > > + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false);
> > > + *oldport = *newport;
> > > +
> > > + csum_replace4(&iph->check, *oldip, *newip);
> > > + *oldip = *newip;
> > > +}
> > > +
> > > +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
> > > +{
> > > + struct sk_buff *seg;
> > > + struct tcphdr *th, *th2;
> > > + struct iphdr *iph, *iph2;
> > > +
> > > + seg = segs;
> > > + th = tcp_hdr(seg);
> > > + iph = ip_hdr(seg);
> > > + th2 = tcp_hdr(seg->next);
> > > + iph2 = ip_hdr(seg->next);
> > > +
> > > + if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) &&
> > > + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
> > > + return segs;
> >
> > As mentioned in previous revisions, I think a problem with this
> > approach is that the stack could make other changes to the TCP header
> > after the GRO stage, that are unnoticed here and could cause csum
> > corruption, if the egress device does not recompute the packet csum.
>
> On segmentation, each packet keeps its original TCP header and csum. If
> the stack makes changes, they apply to the first packet only. I don't
> see how we could get csum corruption.

You are right. I did not take in account that such changes (to the
first skb) are not reflected to the frag_list at segmentation time. The
end result could be different from what the user/admin is expecting,
but at least should not impact drops.

Side note: alike UDP, this is not supporting IPv6 NAT...

Thanks,

Paolo


2024-04-26 11:36:22

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH v3 net-next v3 2/6] net: add support for segmenting TCP fraglist GSO packets

On 26.04.24 12:40, Paolo Abeni wrote:
> On Fri, 2024-04-26 at 11:39 +0200, Felix Fietkau wrote:
>> On 26.04.24 10:28, Paolo Abeni wrote:
>> > On Fri, 2024-04-26 at 08:51 +0200, Felix Fietkau wrote:
>> > > Preparation for adding TCP fraglist GRO support. It expects packets to be
>> > > combined in a similar way as UDP fraglist GSO packets.
>> > > For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO.
>> > >
>> > > Signed-off-by: Felix Fietkau <[email protected]>
>> > > ---
>> > > net/ipv4/tcp_offload.c | 65 ++++++++++++++++++++++++++++++++++++++++
>> > > net/ipv6/tcpv6_offload.c | 3 ++
>> > > 2 files changed, 68 insertions(+)
>> > >
>> > > diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
>> > > index fab0973f995b..c493e95e09a5 100644
>> > > --- a/net/ipv4/tcp_offload.c
>> > > +++ b/net/ipv4/tcp_offload.c
>> > > @@ -28,6 +28,68 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
>> > > }
>> > > }
>> > >
>> > > +static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
>> > > + __be32 *oldip, __be32 *newip,
>> > > + __be16 *oldport, __be16 *newport)
>> > > +{
>> > > + struct tcphdr *th;
>> > > + struct iphdr *iph;
>> > > +
>> > > + if (*oldip == *newip && *oldport == *newport)
>> > > + return;
>> > > +
>> > > + th = tcp_hdr(seg);
>> > > + iph = ip_hdr(seg);
>> > > +
>> > > + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true);
>> > > + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false);
>> > > + *oldport = *newport;
>> > > +
>> > > + csum_replace4(&iph->check, *oldip, *newip);
>> > > + *oldip = *newip;
>> > > +}
>> > > +
>> > > +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
>> > > +{
>> > > + struct sk_buff *seg;
>> > > + struct tcphdr *th, *th2;
>> > > + struct iphdr *iph, *iph2;
>> > > +
>> > > + seg = segs;
>> > > + th = tcp_hdr(seg);
>> > > + iph = ip_hdr(seg);
>> > > + th2 = tcp_hdr(seg->next);
>> > > + iph2 = ip_hdr(seg->next);
>> > > +
>> > > + if (!(*(u32 *)&th->source ^ *(u32 *)&th2->source) &&
>> > > + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
>> > > + return segs;
>> >
>> > As mentioned in previous revisions, I think a problem with this
>> > approach is that the stack could make other changes to the TCP header
>> > after the GRO stage, that are unnoticed here and could cause csum
>> > corruption, if the egress device does not recompute the packet csum.
>>
>> On segmentation, each packet keeps its original TCP header and csum. If
>> the stack makes changes, they apply to the first packet only. I don't
>> see how we could get csum corruption.
>
> You are right. I did not take in account that such changes (to the
> first skb) are not reflected to the frag_list at segmentation time. The
> end result could be different from what the user/admin is expecting,
> but at least should not impact drops.
>
> Side note: alike UDP, this is not supporting IPv6 NAT...

I will add that for both in the next version.

Thanks,

- Felix