Major changes:
-Possible to disable mechanisms by socket option
-Socket option value boundary check
Signed-off-by: Andreas Petlund <[email protected]>
---
include/linux/sysctl.h | 1 +
include/linux/tcp.h | 4 +++-
include/net/tcp.h | 1 +
net/ipv4/sysctl_net_ipv4.c | 7 +++++++
net/ipv4/tcp.c | 7 +++++++
net/ipv4/tcp_input.c | 11 +++++++++++
6 files changed, 30 insertions(+), 1 deletions(-)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index d840d75..ded3f20 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -426,6 +426,7 @@ enum
NET_TCP_MAX_SSTHRESH=124,
NET_TCP_FRTO_RESPONSE=125,
NET_TCP_FORCE_THIN_LINEAR_TIMEOUTS=126,
+ NET_TCP_FORCE_THIN_LINEAR_DUPACK=127,
};
enum {
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 67da706..c30ed17 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -104,6 +104,7 @@ enum {
#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */
#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */
#define TCP_THIN_LT 16 /* Use linear timeouts for thin streams*/
+#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */
/* for TCP_INFO socket option */
#define TCPI_OPT_TIMESTAMPS 1
@@ -343,7 +344,8 @@ struct tcp_sock {
u8 frto_counter; /* Number of new acks after RTO */
u8 nonagle; /* Disable Nagle algorithm? */
u8 thin_lt : 1,/* Use linear timeouts for thin streams */
- thin_undef : 7;
+ thin_dupack : 1,/* Fast retransmit on first dupack */
+ thin_undef : 6;
/* RTT measurement */
u32 srtt; /* smoothed round trip time << 3 */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index bc5856a..af1253c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -245,6 +245,7 @@ extern int sysctl_tcp_slow_start_after_idle;
extern int sysctl_tcp_max_ssthresh;
extern int sysctl_tcp_cookie_size;
extern int sysctl_tcp_force_thin_linear_timeouts;
+extern int sysctl_tcp_force_thin_dupack;
extern atomic_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index cb2ed35..b097a58 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -582,6 +582,13 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tcp_force_thin_dupack",
+ .data = &sysctl_tcp_force_thin_dupack,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{
.procname = "udp_mem",
.data = &sysctl_udp_mem,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ce9aeb0..6d7cb9c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2236,6 +2236,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
tp->thin_lt = val;
break;
+ case TCP_THIN_DUPACK:
+ if (val < 0 || val > 1)
+ err = -EINVAL;
+ else
+ tp->thin_dupack = val;
+ break;
+
case TCP_CORK:
/* When set indicates to always queue non-full frames.
* Later the user clears this option and we transmit
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 28e0296..c5a73ab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
int sysctl_tcp_frto_response __read_mostly;
int sysctl_tcp_nometrics_save __read_mostly;
+int sysctl_tcp_force_thin_dupack __read_mostly;
+
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
int sysctl_tcp_abc __read_mostly;
@@ -2447,6 +2449,15 @@ static int tcp_time_to_recover(struct sock *sk)
return 1;
}
+ /* If a thin stream is detected, retransmit after first
+ * received dupack. Employ only if SACK is supported in order
+ * to avoid possible corner-case series of spurious retransmissions
+ * Use only if there are no unsent data. */
+ if ((tp->thin_dupack || sysctl_tcp_force_thin_dupack) &&
+ tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
+ tcp_is_sack(tp) && sk->sk_send_head == NULL)
+ return 1;
+
return 0;
}
--
1.6.3.3
Last year, I'm pretty sure I was on record as thinking this is *not* a
good idea. But at least it now requires a sysctl to turn on, and
should default to off.
Also that naming was a bit dicey. Now the names are more descriptive,
but the "force" is a bit overkill.
How about:
NET_TCP_FORCE_THIN_LINEAR_DUPACK -> NET_TCP_THIN_LINEAR_DUPACK
tcp_force_thin_dupack -> tcp_thin_linear_dupack
sysctl_tcp_force_thin_dupack -> sysctl_tcp_thin_linear_dupack
Andreas Petlund wrote:
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 28e0296..c5a73ab 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
> int sysctl_tcp_frto_response __read_mostly;
> int sysctl_tcp_nometrics_save __read_mostly;
>
> +int sysctl_tcp_force_thin_dupack __read_mostly;
> +
> int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
Where is the sysctl initialized?
On Fri, 12 Feb 2010, William Allen Simpson wrote:
> Andreas Petlund wrote:
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index 28e0296..c5a73ab 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
> > int sysctl_tcp_frto_response __read_mostly;
> > int sysctl_tcp_nometrics_save __read_mostly;
> > +int sysctl_tcp_force_thin_dupack __read_mostly;
> > +
> > int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
>
> Where is the sysctl initialized?
...That's offloaded to the compiler, like is done with some of the above
ones too. There's nothing to worry in that.
--
i.
Andreas Petlund <[email protected]> writes:
> Major changes:
> -Possible to disable mechanisms by socket option
> -Socket option value boundary check
>
>
> Signed-off-by: Andreas Petlund <[email protected]>
> ---
> include/linux/sysctl.h | 1 +
> include/linux/tcp.h | 4 +++-
> include/net/tcp.h | 1 +
> net/ipv4/sysctl_net_ipv4.c | 7 +++++++
> net/ipv4/tcp.c | 7 +++++++
> net/ipv4/tcp_input.c | 11 +++++++++++
> 6 files changed, 30 insertions(+), 1 deletions(-)
>
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index d840d75..ded3f20 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -426,6 +426,7 @@ enum
> NET_TCP_MAX_SSTHRESH=124,
> NET_TCP_FRTO_RESPONSE=125,
> NET_TCP_FORCE_THIN_LINEAR_TIMEOUTS=126,
> + NET_TCP_FORCE_THIN_LINEAR_DUPACK=127,
There is no need to allocate a binary sysctl here.
Eric
On 13. feb. 2010 03:13, Eric W. Biederman wrote:
> Andreas Petlund <[email protected]> writes:
>
>> Major changes:
>> -Possible to disable mechanisms by socket option
>> -Socket option value boundary check
>>
>>
>> Signed-off-by: Andreas Petlund <[email protected]>
>> ---
>> include/linux/sysctl.h | 1 +
>> include/linux/tcp.h | 4 +++-
>> include/net/tcp.h | 1 +
>> net/ipv4/sysctl_net_ipv4.c | 7 +++++++
>> net/ipv4/tcp.c | 7 +++++++
>> net/ipv4/tcp_input.c | 11 +++++++++++
>> 6 files changed, 30 insertions(+), 1 deletions(-)
>>
>> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
>> index d840d75..ded3f20 100644
>> --- a/include/linux/sysctl.h
>> +++ b/include/linux/sysctl.h
>> @@ -426,6 +426,7 @@ enum
>> NET_TCP_MAX_SSTHRESH=124,
>> NET_TCP_FRTO_RESPONSE=125,
>> NET_TCP_FORCE_THIN_LINEAR_TIMEOUTS=126,
>> + NET_TCP_FORCE_THIN_LINEAR_DUPACK=127,
>
> There is no need to allocate a binary sysctl here.
>
> Eric
Thanks. I'll address this in the next iteration.
Best regrads,
Andreas
On 12. feb. 2010 12:19, William Allen Simpson wrote:
> Last year, I'm pretty sure I was on record as thinking this is *not* a
> good idea. But at least it now requires a sysctl to turn on, and
> should default to off.
>
> Also that naming was a bit dicey. Now the names are more descriptive,
> but the "force" is a bit overkill.
>
> How about:
> NET_TCP_FORCE_THIN_LINEAR_DUPACK -> NET_TCP_THIN_LINEAR_DUPACK
> tcp_force_thin_dupack -> tcp_thin_linear_dupack
> sysctl_tcp_force_thin_dupack -> sysctl_tcp_thin_linear_dupack
You uncovered a copy/paste/edit-typo there. The term "linear" had snuck
in even though it does not make sense for this patch. I think that
NET_TCP_THIN_DUPACK, tcp_thin_dupack and sysctl_tcp_thin_dupack will
be better.
Best regards,
Andreas