This implements fraglist GRO similar to how it's handled in UDP, however
no functional changes are added yet. The next change adds a heuristic for
using fraglist GRO instead of regular GRO.
Signed-off-by: Felix Fietkau <[email protected]>
---
include/net/tcp.h | 3 ++-
net/ipv4/tcp_offload.c | 29 +++++++++++++++++++++++++++--
net/ipv6/tcpv6_offload.c | 11 ++++++++++-
3 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b935e1ae4caf..875cda53a7c9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2194,7 +2194,8 @@ void tcp_v4_destroy_sock(struct sock *sk);
struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
netdev_features_t features);
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ bool fraglist);
INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 06dbb2e2b2f3..6294e7a5c099 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -252,7 +252,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
return segs;
}
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ bool fraglist)
{
struct sk_buff *pp = NULL;
struct sk_buff *p;
@@ -289,6 +290,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
len = skb_gro_len(skb);
flags = tcp_flag_word(th);
+ NAPI_GRO_CB(skb)->is_flist = fraglist;
list_for_each_entry(p, head, list) {
if (!NAPI_GRO_CB(p)->same_flow)
continue;
@@ -308,6 +310,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
found:
/* Include the IP ID check below from the inner most IP hdr */
flush = NAPI_GRO_CB(p)->flush;
+ flush |= fraglist != NAPI_GRO_CB(p)->is_flist;
flush |= (__force int)(flags & TCP_FLAG_CWR);
flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
@@ -341,6 +344,19 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
flush |= skb_cmp_decrypted(p, skb);
+ if (fraglist) {
+ flush |= (__force int)(flags ^ tcp_flag_word(th2));
+ flush |= skb->ip_summed != p->ip_summed;
+ flush |= skb->csum_level != p->csum_level;
+ flush |= !pskb_may_pull(skb, skb_gro_offset(skb));
+ flush |= NAPI_GRO_CB(p)->count >= 64;
+
+ if (flush || skb_gro_receive_list(p, skb))
+ mss = 1;
+
+ goto out_check_final;
+ }
+
if (flush || skb_gro_receive(p, skb)) {
mss = 1;
goto out_check_final;
@@ -399,7 +415,7 @@ struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
return NULL;
}
- return tcp_gro_receive(head, skb);
+ return tcp_gro_receive(head, skb, false);
}
INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
@@ -407,6 +423,15 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 12fe79cb2c10..239588557dc4 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -24,7 +24,7 @@ struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
return NULL;
}
- return tcp_gro_receive(head, skb);
+ return tcp_gro_receive(head, skb, false);
}
INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
@@ -32,6 +32,15 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
&iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
--
2.44.0
On Wed, 2024-04-24 at 20:04 +0200, Felix Fietkau wrote:
> This implements fraglist GRO similar to how it's handled in UDP, however
> no functional changes are added yet. The next change adds a heuristic for
> using fraglist GRO instead of regular GRO.
>
> Signed-off-by: Felix Fietkau <[email protected]>
> ---
> include/net/tcp.h | 3 ++-
> net/ipv4/tcp_offload.c | 29 +++++++++++++++++++++++++++--
> net/ipv6/tcpv6_offload.c | 11 ++++++++++-
> 3 files changed, 39 insertions(+), 4 deletions(-)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index b935e1ae4caf..875cda53a7c9 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2194,7 +2194,8 @@ void tcp_v4_destroy_sock(struct sock *sk);
>
> struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
> netdev_features_t features);
> -struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
> +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
> + bool fraglist);
> INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
> INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
> INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
> index 06dbb2e2b2f3..6294e7a5c099 100644
> --- a/net/ipv4/tcp_offload.c
> +++ b/net/ipv4/tcp_offload.c
> @@ -252,7 +252,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
> return segs;
> }
>
> -struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
> +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
> + bool fraglist)
> {
> struct sk_buff *pp = NULL;
> struct sk_buff *p;
> @@ -289,6 +290,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
> len = skb_gro_len(skb);
> flags = tcp_flag_word(th);
>
> + NAPI_GRO_CB(skb)->is_flist = fraglist;
> list_for_each_entry(p, head, list) {
> if (!NAPI_GRO_CB(p)->same_flow)
> continue;
> @@ -308,6 +310,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
> found:
> /* Include the IP ID check below from the inner most IP hdr */
> flush = NAPI_GRO_CB(p)->flush;
> + flush |= fraglist != NAPI_GRO_CB(p)->is_flist;
> flush |= (__force int)(flags & TCP_FLAG_CWR);
> flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
> ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
> @@ -341,6 +344,19 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
> flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
> flush |= skb_cmp_decrypted(p, skb);
>
> + if (fraglist) {
> + flush |= (__force int)(flags ^ tcp_flag_word(th2));
Don't we have this check already a few lines above?
> + flush |= skb->ip_summed != p->ip_summed;
> + flush |= skb->csum_level != p->csum_level;
> + flush |= !pskb_may_pull(skb, skb_gro_offset(skb));
Why we need this check? The earlier skb_gro_may_pull() should ensure
that, right?
> + flush |= NAPI_GRO_CB(p)->count >= 64;
> +
> + if (flush || skb_gro_receive_list(p, skb))
> + mss = 1;
> +
> + goto out_check_final;
TCP flags processing needs some care. You need to propagate the current
packets flag to the old one, and update the older packet csum
accordingly.
Cheers,
Paolo