2018-11-28 12:17:50

by Yafang Shao

[permalink] [raw]
Subject: [PATCH net-next] tcp: introduce skb_rbtree_walk_safe() and use it in tcp_clean_rtx_queue()

When walk RB tree, we'd better use skb_rbtree_walk* helpers, that can make
the code more clear.
As skb_rbtree_walk() can't be used in tcp_clean_rtx_queue(), so the new
helper skb_rbtree_walk_safe() is introduced.

Signed-off-by: Yafang Shao <[email protected]>
---
include/linux/skbuff.h | 5 +++++
net/ipv4/tcp_input.c | 5 ++---
2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 73902ac..37ff792 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3256,6 +3256,11 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
for (skb = skb_rb_first(root); skb != NULL; \
skb = skb_rb_next(skb))

+#define skb_rbtree_walk_safe(skb, root, tmp) \
+ for (skb = skb_rb_first(root); \
+ tmp = skb ? skb_rb_next(skb) : NULL, skb != NULL; \
+ skb = tmp)
+
#define skb_rbtree_walk_from(skb) \
for (; skb != NULL; \
skb = skb_rb_next(skb))
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f323978..ab6add2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3043,7 +3043,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
struct tcp_sock *tp = tcp_sk(sk);
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
- struct sk_buff *skb, *next;
+ struct sk_buff *skb, *tmp;
bool fully_acked = true;
long sack_rtt_us = -1L;
long seq_rtt_us = -1L;
@@ -3055,7 +3055,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,

first_ackt = 0;

- for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
+ skb_rbtree_walk_safe(skb, &sk->tcp_rtx_queue, tmp) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
const u32 start_seq = scb->seq;
u8 sacked = scb->sacked;
@@ -3126,7 +3126,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (!fully_acked)
break;

- next = skb_rb_next(skb);
if (unlikely(skb == tp->retransmit_skb_hint))
tp->retransmit_skb_hint = NULL;
if (unlikely(skb == tp->lost_skb_hint))
--
1.8.3.1



2018-11-28 14:46:03

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH net-next] tcp: introduce skb_rbtree_walk_safe() and use it in tcp_clean_rtx_queue()



On 11/28/2018 04:16 AM, Yafang Shao wrote:
> When walk RB tree, we'd better use skb_rbtree_walk* helpers, that can make
> the code more clear.
> As skb_rbtree_walk() can't be used in tcp_clean_rtx_queue(), so the new
> helper skb_rbtree_walk_safe() is introduced.

This makes things slower. Let me explain inline.

>
> Signed-off-by: Yafang Shao <[email protected]>
> ---
> include/linux/skbuff.h | 5 +++++
> net/ipv4/tcp_input.c | 5 ++---
> 2 files changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 73902ac..37ff792 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -3256,6 +3256,11 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
> for (skb = skb_rb_first(root); skb != NULL; \
> skb = skb_rb_next(skb))
>
> +#define skb_rbtree_walk_safe(skb, root, tmp) \
> + for (skb = skb_rb_first(root); \
> + tmp = skb ? skb_rb_next(skb) : NULL, skb != NULL; \
> + skb = tmp)
> +
> #define skb_rbtree_walk_from(skb) \
> for (; skb != NULL; \
> skb = skb_rb_next(skb))
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index f323978..ab6add2 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3043,7 +3043,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
> struct tcp_sock *tp = tcp_sk(sk);
> u32 prior_sacked = tp->sacked_out;
> u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
> - struct sk_buff *skb, *next;
> + struct sk_buff *skb, *tmp;
> bool fully_acked = true;
> long sack_rtt_us = -1L;
> long seq_rtt_us = -1L;
> @@ -3055,7 +3055,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
>
> first_ackt = 0;
>
> - for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
> + skb_rbtree_walk_safe(skb, &sk->tcp_rtx_queue, tmp) {
> struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
> const u32 start_seq = scb->seq;
> u8 sacked = scb->sacked;
> @@ -3126,7 +3126,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
> if (!fully_acked)
> break;
>
> - next = skb_rb_next(skb);

We call skb_rb_next() here only, not at the beginning of the loop.

Why ?

Because we can break of the loop if the current skb is not fully acked.

So your patch would add unnecessary overhead, since the extra sk_rb_next()
could add more extra cache line misses.



2018-11-28 16:55:14

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH net-next] tcp: introduce skb_rbtree_walk_safe() and use it in tcp_clean_rtx_queue()



On 11/28/2018 06:44 AM, Eric Dumazet wrote:
>

> Because we can break of the loop if the current skb is not fully acked.
>
> So your patch would add unnecessary overhead, since the extra sk_rb_next()
> could add more extra cache line misses.

I am testing the following optimization, since we can avoid the rb_next() call
when we reached snd_una

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f32397890b6dcbc34976954c4be142108efa04d8..6829e470f0c186a73c34dca414cd4a2b379baded 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3126,7 +3126,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (!fully_acked)
break;

- next = skb_rb_next(skb);
+ next = (scb->end_seq == tp->snd_una) ? NULL : skb_rb_next(skb);
+
if (unlikely(skb == tp->retransmit_skb_hint))
tp->retransmit_skb_hint = NULL;
if (unlikely(skb == tp->lost_skb_hint))


2018-11-28 17:27:47

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH net-next] tcp: introduce skb_rbtree_walk_safe() and use it in tcp_clean_rtx_queue()



On 11/28/2018 08:53 AM, Eric Dumazet wrote:
>
>
> On 11/28/2018 06:44 AM, Eric Dumazet wrote:
>>
>
>> Because we can break of the loop if the current skb is not fully acked.
>>
>> So your patch would add unnecessary overhead, since the extra sk_rb_next()
>> could add more extra cache line misses.
>
> I am testing the following optimization, since we can avoid the rb_next() call
> when we reached snd_una
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index f32397890b6dcbc34976954c4be142108efa04d8..6829e470f0c186a73c34dca414cd4a2b379baded 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3126,7 +3126,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
> if (!fully_acked)
> break;
>
> - next = skb_rb_next(skb);
> + next = (scb->end_seq == tp->snd_una) ? NULL : skb_rb_next(skb);
> +
> if (unlikely(skb == tp->retransmit_skb_hint))
> tp->retransmit_skb_hint = NULL;
> if (unlikely(skb == tp->lost_skb_hint))
>

This does not work since we use next skb after the loop :

1) if (!skb)
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);

This test could use tcp_rtx_queue_empty()

2) SACK reneging
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;

3) FLAG_SET_XMIT_TIMER
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
tcp_skb_timestamp_us(skb))) {
flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */

So this idea is not feasible.

2018-11-29 02:35:08

by Yafang Shao

[permalink] [raw]
Subject: Re: [PATCH net-next] tcp: introduce skb_rbtree_walk_safe() and use it in tcp_clean_rtx_queue()

On Wed, Nov 28, 2018 at 10:44 PM Eric Dumazet <[email protected]> wrote:
>
>
>
> On 11/28/2018 04:16 AM, Yafang Shao wrote:
> > When walk RB tree, we'd better use skb_rbtree_walk* helpers, that can make
> > the code more clear.
> > As skb_rbtree_walk() can't be used in tcp_clean_rtx_queue(), so the new
> > helper skb_rbtree_walk_safe() is introduced.
>
> This makes things slower. Let me explain inline.
>
> >
> > Signed-off-by: Yafang Shao <[email protected]>
> > ---
> > include/linux/skbuff.h | 5 +++++
> > net/ipv4/tcp_input.c | 5 ++---
> > 2 files changed, 7 insertions(+), 3 deletions(-)
> >
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 73902ac..37ff792 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -3256,6 +3256,11 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
> > for (skb = skb_rb_first(root); skb != NULL; \
> > skb = skb_rb_next(skb))
> >
> > +#define skb_rbtree_walk_safe(skb, root, tmp) \
> > + for (skb = skb_rb_first(root); \
> > + tmp = skb ? skb_rb_next(skb) : NULL, skb != NULL; \
> > + skb = tmp)
> > +
> > #define skb_rbtree_walk_from(skb) \
> > for (; skb != NULL; \
> > skb = skb_rb_next(skb))
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index f323978..ab6add2 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -3043,7 +3043,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
> > struct tcp_sock *tp = tcp_sk(sk);
> > u32 prior_sacked = tp->sacked_out;
> > u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
> > - struct sk_buff *skb, *next;
> > + struct sk_buff *skb, *tmp;
> > bool fully_acked = true;
> > long sack_rtt_us = -1L;
> > long seq_rtt_us = -1L;
> > @@ -3055,7 +3055,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
> >
> > first_ackt = 0;
> >
> > - for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
> > + skb_rbtree_walk_safe(skb, &sk->tcp_rtx_queue, tmp) {
> > struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
> > const u32 start_seq = scb->seq;
> > u8 sacked = scb->sacked;
> > @@ -3126,7 +3126,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
> > if (!fully_acked)
> > break;
> >
> > - next = skb_rb_next(skb);
>
> We call skb_rb_next() here only, not at the beginning of the loop.
>
> Why ?
>
> Because we can break of the loop if the current skb is not fully acked.
>
> So your patch would add unnecessary overhead, since the extra sk_rb_next()
> could add more extra cache line misses.
>

I thought this extra sk_rb_next() doesn't add much overhead before,
since it won't take long time to execute.
Seems I made a mistake.

Thanks
Yafang