This patch enables fast retransmissions after one dupACK for TCP if the stream is identified as thin. This will reduce latencies for thin streams that are not able to trigger fast retransmissions due to high packet interarrival time. This mechanism is only active if enabled by iocontrol or syscontrol and the stream is identified as thin.
Signed-off-by: Andreas Petlund <[email protected]>
---
include/linux/tcp.h | 4 +++-
include/net/tcp.h | 1 +
net/ipv4/sysctl_net_ipv4.c | 8 ++++++++
net/ipv4/tcp.c | 5 +++++
net/ipv4/tcp_input.c | 8 ++++++++
5 files changed, 25 insertions(+), 1 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e64368d..f4a05ff 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -97,6 +97,7 @@ enum {
#define TCP_CONGESTION 13 /* Congestion control algorithm */
#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */
#define TCP_THIN_RM_EXPB 15 /* Remove exp. backoff for thin streams*/
+#define TCP_THIN_DUPACK 16 /* Fast retrans. after 1 dupack */
#define TCPI_OPT_TIMESTAMPS 1
#define TCPI_OPT_SACK 2
@@ -301,7 +302,8 @@ struct tcp_sock {
u8 frto_counter; /* Number of new acks after RTO */
u8 nonagle; /* Disable Nagle algorithm? */
u8 thin_rm_expb:1, /* Remove exp. backoff for thin streams */
- thin_undef : 7;
+ thin_dupack : 1,/* Fast retransmit on first dupack */
+ thin_undef : 6;
/* RTT measurement */
u32 srtt; /* smoothed round trip time << 3 */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 412c1bd..41f3a5e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -238,6 +238,7 @@ extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
extern int sysctl_tcp_max_ssthresh;
extern int sysctl_tcp_force_thin_rm_expb;
+extern int sysctl_tcp_force_thin_dupack;
extern atomic_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7458f37..8653867 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -721,6 +721,14 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tcp_force_thin_dupack",
+ .data = &sysctl_tcp_force_thin_dupack,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.ctl_name = CTL_UNNUMBERED,
.procname = "udp_mem",
.data = &sysctl_udp_mem,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b4b0931..de190db 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2139,6 +2139,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
tp->thin_rm_expb = 1;
break;
+ case TCP_THIN_DUPACK:
+ if (val)
+ tp->thin_dupack = 1;
+ break;
+
case TCP_CORK:
/* When set indicates to always queue non-full frames.
* Later the user clears this option and we transmit
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d86784b..b71eb89 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
int sysctl_tcp_frto_response __read_mostly;
int sysctl_tcp_nometrics_save __read_mostly;
+int sysctl_tcp_force_thin_dupack __read_mostly;
+
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
int sysctl_tcp_abc __read_mostly;
@@ -2447,6 +2449,12 @@ static int tcp_time_to_recover(struct sock *sk)
return 1;
}
+ /* If a thin stream is detected, retransmit after first
+ * received dupack */
+ if ((tp->thin_dupack || sysctl_tcp_force_thin_dupack) &&
+ tcp_dupack_heurestics(tp) > 1 && tcp_stream_is_thin(tp))
+ return 1;
+
return 0;
}
--
1.6.0.4
Andreas Petlund wrote:
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index e64368d..f4a05ff 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -97,6 +97,7 @@ enum {
> #define TCP_CONGESTION 13 /* Congestion control algorithm */
> #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */
> #define TCP_THIN_RM_EXPB 15 /* Remove exp. backoff for thin streams*/
> +#define TCP_THIN_DUPACK 16 /* Fast retrans. after 1 dupack */
>
I've not had the chance to examine the rest, but I've been poking at a
patch series that's used 15 for over a year, so could you try 16 and 17?
On Tue, 27 Oct 2009, Andreas Petlund wrote:
> This patch enables fast retransmissions after one dupACK for TCP if the
> stream is identified as thin. This will reduce latencies for thin
> streams that are not able to trigger fast retransmissions due to high
> packet interarrival time. This mechanism is only active if enabled by
> iocontrol or syscontrol and the stream is identified as thin.
>
>
> Signed-off-by: Andreas Petlund <[email protected]>
> ---
> include/linux/tcp.h | 4 +++-
> include/net/tcp.h | 1 +
> net/ipv4/sysctl_net_ipv4.c | 8 ++++++++
> net/ipv4/tcp.c | 5 +++++
> net/ipv4/tcp_input.c | 8 ++++++++
> 5 files changed, 25 insertions(+), 1 deletions(-)
>
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index e64368d..f4a05ff 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -97,6 +97,7 @@ enum {
> #define TCP_CONGESTION 13 /* Congestion control algorithm */
> #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */
> #define TCP_THIN_RM_EXPB 15 /* Remove exp. backoff for thin streams*/
> +#define TCP_THIN_DUPACK 16 /* Fast retrans. after 1 dupack */
>
> #define TCPI_OPT_TIMESTAMPS 1
> #define TCPI_OPT_SACK 2
> @@ -301,7 +302,8 @@ struct tcp_sock {
> u8 frto_counter; /* Number of new acks after RTO */
> u8 nonagle; /* Disable Nagle algorithm? */
> u8 thin_rm_expb:1, /* Remove exp. backoff for thin streams */
> - thin_undef : 7;
> + thin_dupack : 1,/* Fast retransmit on first dupack */
> + thin_undef : 6;
>
> /* RTT measurement */
> u32 srtt; /* smoothed round trip time << 3 */
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 412c1bd..41f3a5e 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -238,6 +238,7 @@ extern int sysctl_tcp_workaround_signed_windows;
> extern int sysctl_tcp_slow_start_after_idle;
> extern int sysctl_tcp_max_ssthresh;
> extern int sysctl_tcp_force_thin_rm_expb;
> +extern int sysctl_tcp_force_thin_dupack;
>
> extern atomic_t tcp_memory_allocated;
> extern struct percpu_counter tcp_sockets_allocated;
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 7458f37..8653867 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -721,6 +721,14 @@ static struct ctl_table ipv4_table[] = {
> .proc_handler = proc_dointvec
> },
> {
> + .ctl_name = CTL_UNNUMBERED,
> + .procname = "tcp_force_thin_dupack",
> + .data = &sysctl_tcp_force_thin_dupack,
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec
> + },
> + {
> .ctl_name = CTL_UNNUMBERED,
> .procname = "udp_mem",
> .data = &sysctl_udp_mem,
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index b4b0931..de190db 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2139,6 +2139,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
> tp->thin_rm_expb = 1;
> break;
>
> + case TCP_THIN_DUPACK:
> + if (val)
> + tp->thin_dupack = 1;
> + break;
> +
> case TCP_CORK:
> /* When set indicates to always queue non-full frames.
> * Later the user clears this option and we transmit
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index d86784b..b71eb89 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
> int sysctl_tcp_frto_response __read_mostly;
> int sysctl_tcp_nometrics_save __read_mostly;
>
> +int sysctl_tcp_force_thin_dupack __read_mostly;
> +
> int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
> int sysctl_tcp_abc __read_mostly;
>
> @@ -2447,6 +2449,12 @@ static int tcp_time_to_recover(struct sock *sk)
> return 1;
> }
>
> + /* If a thin stream is detected, retransmit after first
> + * received dupack */
> + if ((tp->thin_dupack || sysctl_tcp_force_thin_dupack) &&
> + tcp_dupack_heurestics(tp) > 1 && tcp_stream_is_thin(tp))
> + return 1;
> +
> return 0;
> }
Have you tested it? ...I doubt this will work like you say and retransmit
something when the window is small. ...Besides, you should have built this
patch on top of the function rename you submitted earlier as after DaveM
applied that this will no longer even compile...
--
i.
Den 28. okt. 2009 kl. 03.43 skrev William Allen Simpson:
> Andreas Petlund wrote:
>> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
>> index e64368d..f4a05ff 100644
>> --- a/include/linux/tcp.h
>> +++ b/include/linux/tcp.h
>> @@ -97,6 +97,7 @@ enum {
>> #define TCP_CONGESTION 13 /* Congestion control algorithm */
>> #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */
>> #define TCP_THIN_RM_EXPB 15 /* Remove exp. backoff for
>> thin streams*/
>> +#define TCP_THIN_DUPACK 16 /* Fast retrans. after 1
>> dupack */
>>
> I've not had the chance to examine the rest, but I've been poking at a
> patch series that's used 15 for over a year, so could you try 16 and
> 17?
Thank you for the feedback. I will address this in the next patch
iteration.
I apologise that some of you received this mail more than once. My email
client played a HTML-trick on me.
>> + /* If a thin stream is detected, retransmit after first
>> + * received dupack */
>> + if ((tp->thin_dupack || sysctl_tcp_force_thin_dupack) &&
>> + tcp_dupack_heurestics(tp) > 1 && tcp_stream_is_thin(tp))
>> + return 1;
>> +
>> return 0;
>> }
>
> Have you tested it? ...I doubt this will work like you say and
retransmit
> something when the window is small. ...Besides, you should have built
this
> patch on top of the function rename you submitted earlier as after DaveM
applied that this will no longer even compile...
>
> --
> i.
>
We have performed extensive tests mapping the effect of the patch you
commented on some months ago. Since then, the only change was the one you
requested of switching tcp_fackets_out() with tcp_dupack_heurestics().
After inspecting the code, I believed the effect should be equal to the
previous, only making considerations for SACK and FACK availability.
Please tell if this will break the intended effect, and I will modify the
patch accordingly.
Graphs from our tests of the original patch can be found at the location
linked to below. I have tested the new one for functionality, but have
not et performed tests on this scope as the changes were minor. I will, of
course, fix the function rename in the next iteration. Sorry for that.
http://folk.uio.no/apetlund/lktmp/
-AP
On Thu, 29 Oct 2009, [email protected] wrote:
> I apologise that some of you received this mail more than once. My email
> client played a HTML-trick on me.
>
> >> + /* If a thin stream is detected, retransmit after first
> >> + * received dupack */
> >> + if ((tp->thin_dupack || sysctl_tcp_force_thin_dupack) &&
> >> + tcp_dupack_heurestics(tp) > 1 && tcp_stream_is_thin(tp))
> >> + return 1;
> >> +
> >> return 0;
> >> }
> >
> > Have you tested it? ...I doubt this will work like you say and
> retransmit
> > something when the window is small. ...Besides, you should have built
> this
> > patch on top of the function rename you submitted earlier as after DaveM
> applied that this will no longer even compile...
> >
> > --
> > i.
> >
>
> We have performed extensive tests mapping the effect of the patch you
> commented on some months ago. Since then, the only change was the one you
> requested of switching tcp_fackets_out() with tcp_dupack_heurestics().
> After inspecting the code, I believed the effect should be equal to the
> previous, only making considerations for SACK and FACK availability.
> Please tell if this will break the intended effect, and I will modify the
> patch accordingly.
Ah, you're of course right. FACK retransmits the head always but RFC3517
mode doesn't. I think you'd need to artificially lower (ie., to calculate)
the dupthresh (from tp->reordering) to be 1 for it to work as intented.
> Graphs from our tests of the original patch can be found at the location
> linked to below. I have tested the new one for functionality, but have
> not et performed tests on this scope as the changes were minor. I will, of
> course, fix the function rename in the next iteration. Sorry for that.
>
> http://folk.uio.no/apetlund/lktmp/
You curiousity, have you run this more aggressive form of early retransmit
against the one ID gives? ...I checked your results but if I understood
them correctly the IDish early retransmit wasn't among the variants used.
--
i.
> On Thu, 29 Oct 2009, [email protected] wrote:
>
>> I apologise that some of you received this mail more than once. My
email
>> client played a HTML-trick on me.
>> >> + /* If a thin stream is detected, retransmit after first
>> >> + * received dupack */
>> >> + if ((tp->thin_dupack || sysctl_tcp_force_thin_dupack) &&
>> >> + tcp_dupack_heurestics(tp) > 1 && tcp_stream_is_thin(tp))
+ return 1;
>> >> +
>> >> return 0;
>> >> }
>> >
>> > Have you tested it? ...I doubt this will work like you say and
>> retransmit
>> > something when the window is small. ...Besides, you should have built
>> this
>> > patch on top of the function rename you submitted earlier as after
>> DaveM
>> applied that this will no longer even compile...
>> >
>> > --
>> > i.
>> >
>> We have performed extensive tests mapping the effect of the patch you
commented on some months ago. Since then, the only change was the one
you
>> requested of switching tcp_fackets_out() with tcp_dupack_heurestics().
After inspecting the code, I believed the effect should be equal to the
previous, only making considerations for SACK and FACK availability.
Please tell if this will break the intended effect, and I will modify
the
>> patch accordingly.
>
> Ah, you're of course right. FACK retransmits the head always but RFC3517
mode doesn't. I think you'd need to artificially lower (ie., to
calculate)
> the dupthresh (from tp->reordering) to be 1 for it to work as intented.
>
>> Graphs from our tests of the original patch can be found at the
location
>> linked to below. I have tested the new one for functionality, but have
not et performed tests on this scope as the changes were minor. I will,
of
>> course, fix the function rename in the next iteration. Sorry for that.
http://folk.uio.no/apetlund/lktmp/
>
> You curiousity, have you run this more aggressive form of early
retransmit
> against the one ID gives? ...I checked your results but if I understood
them correctly the IDish early retransmit wasn't among the variants
used.
We have not implemented EFR for Linux TCP. We have, however, performed
tests where we compare the Free BSD implementation on SCTP with SCTP using
our proposed exp. bo. and dupACK modifications. I know that this is not
directly comparable, and link to this as a digression:
http://folk.uio.no/apetlund/lktmp/SCTP_thin_compare.pdf
If you are interested in our set of SCTP experiments, it is summarised in
the paper linked to below:
http://simula.no/research/networks/publications/Simula.ND.311/simula_pdf_file
Regards,
Andreas