Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754099AbeAAO7Y (ORCPT + 1 other); Mon, 1 Jan 2018 09:59:24 -0500 Received: from mail.linuxfoundation.org ([140.211.169.12]:46336 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932689AbeAAOnz (ORCPT ); Mon, 1 Jan 2018 09:43:55 -0500 From: Greg Kroah-Hartman To: linux-kernel@vger.kernel.org Cc: Greg Kroah-Hartman , stable@vger.kernel.org, Yousuk Seung , Neal Cardwell , Yuchung Cheng , Soheil Hassas Yeganeh , Eric Dumazet , Priyaranjan Jha , "David S. Miller" Subject: [PATCH 4.14 087/146] tcp: invalidate rate samples during SACK reneging Date: Mon, 1 Jan 2018 15:37:58 +0100 Message-Id: <20180101140136.630652015@linuxfoundation.org> X-Mailer: git-send-email 2.15.1 In-Reply-To: <20180101140123.743014891@linuxfoundation.org> References: <20180101140123.743014891@linuxfoundation.org> User-Agent: quilt/0.65 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: 4.14-stable review patch. If anyone has any objections, please let me know. ------------------ From: Yousuk Seung [ Upstream commit d4761754b4fb2ef8d9a1e9d121c4bec84e1fe292 ] Mark tcp_sock during a SACK reneging event and invalidate rate samples while marked. Such rate samples may overestimate bw by including packets that were SACKed before reneging. < ack 6001 win 10000 sack 7001:38001 < ack 7001 win 0 sack 8001:38001 // Reneg detected > seq 7001:8001 // RTO, SACK cleared. < ack 38001 win 10000 In above example the rate sample taken after the last ack will count 7001-38001 as delivered while the actual delivery rate likely could be much lower i.e. 7001-8001. This patch adds a new field tcp_sock.sack_reneg and marks it when we declare SACK reneging and entering TCP_CA_Loss, and unmarks it after the last rate sample was taken before moving back to TCP_CA_Open. This patch also invalidates rate samples taken while tcp_sock.is_sack_reneg is set. Fixes: b9f64820fb22 ("tcp: track data delivery rate for a TCP connection") Signed-off-by: Yousuk Seung Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Acked-by: Eric Dumazet Acked-by: Priyaranjan Jha Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/tcp.h | 3 ++- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 1 + net/ipv4/tcp_input.c | 10 ++++++++-- net/ipv4/tcp_rate.c | 10 +++++++--- 5 files changed, 19 insertions(+), 7 deletions(-) --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -214,7 +214,8 @@ struct tcp_sock { u8 chrono_type:2, /* current chronograph type */ rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ - unused:4; + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ + unused:3; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ unused1 : 1, --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1085,7 +1085,7 @@ void tcp_rate_skb_sent(struct sock *sk, void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct rate_sample *rs); + bool is_sack_reneg, struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2356,6 +2356,7 @@ int tcp_disconnect(struct sock *sk, int tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; tcp_clear_retrans(tp); inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1975,6 +1975,8 @@ void tcp_enter_loss(struct sock *sk) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; tp->fackets_out = 0; + /* Mark SACK reneging until we recover from this loss event. */ + tp->is_sack_reneg = 1; } tcp_clear_all_retrans_hints(tp); @@ -2428,6 +2430,7 @@ static bool tcp_try_undo_recovery(struct return true; } tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; return false; } @@ -2459,8 +2462,10 @@ static bool tcp_try_undo_loss(struct soc NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - if (frto_undo || tcp_is_sack(tp)) + if (frto_undo || tcp_is_sack(tp)) { tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; + } return true; } return false; @@ -3551,6 +3556,7 @@ static int tcp_ack(struct sock *sk, cons struct tcp_sacktag_state sack_state; struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; + bool is_sack_reneg = tp->is_sack_reneg; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; @@ -3666,7 +3672,7 @@ static int tcp_ack(struct sock *sk, cons delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ - tcp_rate_gen(sk, delivered, lost, sack_state.rate); + tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); return 1; --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock /* Update the connection delivery information and generate a rate sample. */ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct rate_sample *rs) + bool is_sack_reneg, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; @@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 d rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ - /* Return an invalid sample if no timing information is available. */ - if (!rs->prior_mstamp) { + /* Return an invalid sample if no timing information is available or + * in recovery from loss with SACK reneging. Rate samples taken during + * a SACK reneging event may overestimate bw by including packets that + * were SACKed before the reneg. + */ + if (!rs->prior_mstamp || is_sack_reneg) { rs->delivered = -1; rs->interval_us = -1; return;