2004-04-23 14:28:48

by Wesley Eddy

[permalink] [raw]
Subject: TCP rto estimation patch

The TCP RTO estimation algorithm defined in RFC 2988 is followed properly
in the kernel, however the constants alpha and beta in the specification
are hardcoded as 3 and 2 everywhere they occur in the kernel. Since these
constants crop up multiple times, this is poor programming practice. This
patch binds the numeric values to symbols RTT_ALPHA and RTTVAR_BETA, and
uses those symbolic values throughout the code. Since using 2 and 3 for
these values is not mandatory, this makes tweaking them much easier.

-Wes


diff -ur linux-2.6.5/include/linux/tcp.h linux-2.6.5-weddy/include/linux/tcp.h
--- linux-2.6.5/include/linux/tcp.h 2004-04-03 22:37:06.000000000 -0500
+++ linux-2.6.5-weddy/include/linux/tcp.h 2004-04-21 16:05:38.000000000 -0
400
@@ -406,4 +406,8 @@

#endif

+/* weights for RTO estimator from RFC 2988 */
+#define RTT_ALPHA 3
+#define RTTVAR_BETA 2
+
#endif /* _LINUX_TCP_H */
diff -ur linux-2.6.5/net/ipv4/tcp.c linux-2.6.5-weddy/net/ipv4/tcp.c
--- linux-2.6.5/net/ipv4/tcp.c 2004-04-03 22:36:53.000000000 -0500
+++ linux-2.6.5-weddy/net/ipv4/tcp.c 2004-04-21 16:00:29.000000000 -0400
@@ -2538,8 +2538,8 @@

info.tcpi_pmtu = tp->pmtu_cookie;
info.tcpi_rcv_ssthresh = tp->rcv_ssthresh;
- info.tcpi_rtt = ((1000000 * tp->srtt) / HZ) >> 3;
- info.tcpi_rttvar = ((1000000 * tp->mdev) / HZ) >> 2;
+ info.tcpi_rtt = ((1000000 * tp->srtt) / HZ) >> RTT_ALPHA;
+ info.tcpi_rttvar = ((1000000 * tp->mdev) / HZ) >> RTTVAR_BETA;
info.tcpi_snd_ssthresh = tp->snd_ssthresh;
info.tcpi_snd_cwnd = tp->snd_cwnd;
info.tcpi_advmss = tp->advmss;
diff -ur linux-2.6.5/net/ipv4/tcp_diag.c linux-2.6.5-weddy/net/ipv4/tcp_diag.c
--- linux-2.6.5/net/ipv4/tcp_diag.c 2004-04-03 22:36:14.000000000 -0500
+++ linux-2.6.5-weddy/net/ipv4/tcp_diag.c 2004-04-21 16:01:08.000000000 -0
400
@@ -188,8 +188,8 @@

info->tcpi_pmtu = tp->pmtu_cookie;
info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
- info->tcpi_rtt = ((1000000*tp->srtt)/HZ)>>3;
- info->tcpi_rttvar = ((1000000*tp->mdev)/HZ)>>2;
+ info->tcpi_rtt = ((1000000*tp->srtt)/HZ)>>RTT_ALPHA;
+ info->tcpi_rttvar = ((1000000*tp->mdev)/HZ)>>RTTVAR_BETA;
info->tcpi_snd_ssthresh = tp->snd_ssthresh;
info->tcpi_snd_cwnd = tp->snd_cwnd;
info->tcpi_advmss = tp->advmss;
diff -ur linux-2.6.5/net/ipv4/tcp_input.c linux-2.6.5-weddy/net/ipv4/tcp_input.c
--- linux-2.6.5/net/ipv4/tcp_input.c 2004-04-03 22:37:39.000000000 -0500
+++ linux-2.6.5-weddy/net/ipv4/tcp_input.c 2004-04-21 15:59:25.000000000 -0
400
@@ -435,11 +435,12 @@
if(m == 0)
m = 1;
if (tp->srtt != 0) {
- m -= (tp->srtt >> 3); /* m is now error in rtt est */
+ m -= (tp->srtt >> RTT_ALPHA); /* m is now error in rtt est */
tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */
if (m < 0) {
m = -m; /* m is now abs(error) */
- m -= (tp->mdev >> 2); /* similar update on mdev */
+ /* similar update on mdev */
+ m -= (tp->mdev >> RTTVAR_BETA);
/* This is similar to one of Eifel findings.
* Eifel blocks mdev updates when rtt decreases.
* This solution is a bit different: we use finer gain
@@ -449,9 +450,10 @@
* happening in pure Eifel.
*/
if (m > 0)
- m >>= 3;
+ m >>= RTT_ALPHA;
} else {
- m -= (tp->mdev >> 2); /* similar update on mdev */
+ /* similar update on mdev */
+ m -= (tp->mdev >> RTTVAR_BETA);
}
tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */
if (tp->mdev > tp->mdev_max) {
@@ -461,19 +463,21 @@
}
if (after(tp->snd_una, tp->rtt_seq)) {
if (tp->mdev_max < tp->rttvar)
- tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
+ tp->rttvar -= (tp->rttvar-tp->mdev_max) >>
+ RTTVAR_BETA;
tp->rtt_seq = tp->snd_nxt;
tp->mdev_max = TCP_RTO_MIN;
}
} else {
/* no previous measure. */
- tp->srtt = m<<3; /* take the measured time to be rtt */
- tp->mdev = m<<1; /* make sure rto = 3*rtt */
+ /* take the measured time to be rtt */
+ tp->srtt = m<<RTT_ALPHA;
+ tp->mdev = m<< (RTTVAR_BETA-1); /* make sure rto = 3*rtt */
tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
tp->rtt_seq = tp->snd_nxt;
}

- tcp_westwood_update_rtt(tp, tp->srtt >> 3);
+ tcp_westwood_update_rtt(tp, tp->srtt >> RTT_ALPHA);
}

/* Calculate rto without backoff. This is the second half of Van Jacobson's
@@ -491,7 +495,7 @@
* is invisible. Actually, Linux-2.4 also generates erratic
* ACKs in some curcumstances.
*/
- tp->rto = (tp->srtt >> 3) + tp->rttvar;
+ tp->rto = (tp->srtt >> RTT_ALPHA) + tp->rttvar;

/* 2. Fixups made earlier cannot be right.
* If we do not estimate RTO correctly without them,
@@ -543,7 +547,7 @@
if (m <= 0)
dst->metrics[RTAX_RTT-1] = tp->srtt;
else
- dst->metrics[RTAX_RTT-1] -= (m>>3);
+ dst->metrics[RTAX_RTT-1] -= (m>>RTT_ALPHA);
}

if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
@@ -551,7 +555,7 @@
m = -m;

/* Scale deviation to rttvar fixed point */
- m >>= 1;
+ m >>= (RTT_ALPHA-RTTVAR_BETA);
if (m < tp->mdev)
m = tp->mdev;

@@ -559,7 +563,8 @@
dst->metrics[RTAX_RTTVAR-1] = m;
else
dst->metrics[RTAX_RTTVAR-1] -=
- (dst->metrics[RTAX_RTTVAR-1] - m)>>2;
+ (dst->metrics[RTAX_RTTVAR-1] - m ) >>
+ RTTVAR_BETA;
}

if (tp->snd_ssthresh >= 0xFFFF) {
@@ -641,7 +646,8 @@
if (dst_metric(dst, RTAX_RTT) == 0)
goto reset;

- if (!tp->srtt && dst_metric(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
+ if (!tp->srtt && dst_metric(dst, RTAX_RTT) <
+ (TCP_TIMEOUT_INIT << RTT_ALPHA))
goto reset;

/* Initial rtt is determined from SYN,SYN-ACK.
@@ -2083,7 +2089,7 @@

static inline __u32 westwood_do_filter(__u32 a, __u32 b)
{
- return (((7 * a) + b) >> 3);
+ return ((7 * a) + b) >> 3;
}

static void westwood_filter(struct sock *sk, __u32 delta)
diff -ur linux-2.6.5/net/ipv4/tcp_output.c linux-2.6.5-weddy/net/ipv4/tcp_output
.c
--- linux-2.6.5/net/ipv4/tcp_output.c 2004-04-03 22:37:36.000000000 -0500
+++ linux-2.6.5-weddy/net/ipv4/tcp_output.c 2004-04-21 16:00:39.000000000 -0
400
@@ -1356,7 +1356,7 @@
* directly.
*/
if (tp->srtt) {
- int rtt = max(tp->srtt>>3, TCP_DELACK_MIN);
+ int rtt = max(tp->srtt>>RTT_ALPHA, TCP_DELACK_MIN);

if (rtt < max_ato)
max_ato = rtt;


Attachments:
(No filename) (7.80 kB)
(No filename) (189.00 B)
Download all attachments

2004-04-23 17:35:56

by Horst H. von Brand

[permalink] [raw]
Subject: Re: TCP rto estimation patch

Wesley Eddy <[email protected]> said:
> The TCP RTO estimation algorithm defined in RFC 2988 is followed properly
> in the kernel, however the constants alpha and beta in the specification
> are hardcoded as 3 and 2 everywhere they occur in the kernel. Since these
> constants crop up multiple times, this is poor programming practice. This
> patch binds the numeric values to symbols RTT_ALPHA and RTTVAR_BETA, and
> uses those symbolic values throughout the code. Since using 2 and 3 for
> these values is not mandatory, this makes tweaking them much easier.

Why RTT_ALPHA and RTTVAR_BETA, and not just RTT_BETA? Or even RTO_xxx?

Is there any reason to change them, ever? What happens if you change them?
Restrictions on values? All this should go with such a patch IMHO (at least
pointers to relevant discussion).

Must go over it with a fine comb to make sure no unrelated 2 or 3 got
replaced... out of my league, sorry.

Your patch is MIME-mangled, =3D, =20, =\n shows up all over the place here.
--
Dr. Horst H. von Brand User #22616 counter.li.org
Departamento de Informatica Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria +56 32 654239
Casilla 110-V, Valparaiso, Chile Fax: +56 32 797513

2004-04-23 20:48:05

by Wesley Eddy

[permalink] [raw]
Subject: Re: TCP rto estimation patch

On Fri, Apr 23, 2004 at 01:35:27PM -0400, Horst von Brand wrote:
>
> Why RTT_ALPHA and RTTVAR_BETA, and not just RTT_BETA? Or even RTO_xxx?
>

Per the spec, alpha is used to compute rtt and beta likewise for rttvar.

> Is there any reason to change them, ever? What happens if you change them?
> Restrictions on values? All this should go with such a patch IMHO (at least
> pointers to relevant discussion).

The information isn't relevant since the patch didn't change them, and if
you were interested in changing the values then simple google search
would return a plethora of research on RTT estimation.

> Must go over it with a fine comb to make sure no unrelated 2 or 3 got
> replaced... out of my league, sorry.

There are stray constants all over the place, which is something that
even freshman computer science students are taught to avoid. This patch
represents a minor improvement to that situation.

-Wes


Attachments:
(No filename) (921.00 B)
(No filename) (189.00 B)
Download all attachments

2004-04-23 21:06:59

by David Stevens

[permalink] [raw]
Subject: Re: TCP rto estimation patch

I believe "2" and "3" are the scale factors for the fixed-point
representation
of the data, not the "alpha" and "beta" I remember from the estimator
paper.

+-DLS

2004-04-23 21:09:50

by David Miller

[permalink] [raw]
Subject: Re: TCP rto estimation patch

On Fri, 23 Apr 2004 14:06:39 -0700
David Stevens <[email protected]> wrote:

> I believe "2" and "3" are the scale factors for the fixed-point
> representation
> of the data, not the "alpha" and "beta" I remember from the estimator
> paper.

That's correct.

2004-04-23 23:22:36

by Wesley Eddy

[permalink] [raw]
Subject: Re: TCP rto estimation patch

On Fri, Apr 23, 2004 at 02:06:39PM -0700, David Stevens wrote:
> I believe "2" and "3" are the scale factors for the fixed-point
> representation
> of the data, not the "alpha" and "beta" I remember from the estimator
> paper.
>

The math is done with bit shifts instead of floating point multiplications.
It's the same thing though. The 2 means beta of 1/4 and the 3 means alpha
of 1/8.

-Wes


Attachments:
(No filename) (398.00 B)
(No filename) (189.00 B)
Download all attachments