LinuxLists.cc - [PATCH 1/3] Support arbitrary initial TCP timestamps

2008-02-15 17:39:41

Subject: [PATCH 1/3] Support arbitrary initial TCP timestamps

Introduce the ability to send arbitrary initial tcp timestamps that are not
tied directly to jiffies. The basic conecpt is every tcp_request_sock and
tcp_sock now has a ts_off offset that represents the difference between
tcp_time_stamp and the timestamp we send and expect to
receive.

This has the advantage of not divulging system information (uptime)
depending on the policy chosen for the initial timestamps.

A policy where ts_off is always set to zero should produce no change in
behavior. The policy implemented is not intended for real use, but just as
a simple example. A realistic example would probably be similar to the tcp
init sequence generator.

Signed-off-by: Glenn Griffin <[email protected]>
---
include/linux/tcp.h | 6 ++++++
include/net/tcp.h | 2 +-
net/ipv4/tcp_input.c | 12 ++++++------
net/ipv4/tcp_ipv4.c | 2 +-
net/ipv4/tcp_minisocks.c | 5 +++--
net/ipv4/tcp_output.c | 8 +++++---
net/ipv6/tcp_ipv6.c | 2 +-
7 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 08027f1..68387dc 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -232,6 +232,7 @@ struct tcp_request_sock {
#endif
u32 rcv_isn;
u32 snt_isn;
+ u32 ts_off;
};

static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -311,6 +312,11 @@ struct tcp_sock {
struct tcp_options_received rx_opt;

/*
+ * offset of transmitted tcp timestamp from jiffies
+ */
+ u32 ts_off;
+
+/*
* Slow start and congestion control (see also Nagle, and Karn & Partridge)
*/
u32 snd_ssthresh; /* Slow start size threshold */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7de4ea3..c935de7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -395,7 +395,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,

extern void tcp_parse_options(struct sk_buff *skb,
struct tcp_options_received *opt_rx,
- int estab);
+ int estab, u32 tsecr_off);

/*
* TCP v4 functions exported for the inet6 API
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 19c449f..95fa8dc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3264,7 +3264,7 @@ uninteresting_ack:
* the fast version below fails.
*/
void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
- int estab)
+ int estab, u32 tsecr_off)
{
unsigned char *ptr;
struct tcphdr *th = tcp_hdr(skb);
@@ -3322,7 +3322,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
(!estab && sysctl_tcp_timestamps))) {
opt_rx->saw_tstamp = 1;
opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
- opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
+ opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4))) - tsecr_off;
}
break;
case TCPOPT_SACK_PERM:
@@ -3374,11 +3374,11 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
++ptr;
tp->rx_opt.rcv_tsval = ntohl(*ptr);
++ptr;
- tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+ tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->ts_off;
return 1;
}
}
- tcp_parse_options(skb, &tp->rx_opt, 1);
+ tcp_parse_options(skb, &tp->rx_opt, 1, tp->ts_off);
return 1;
}

@@ -4615,7 +4615,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
++ptr;
tp->rx_opt.rcv_tsval = ntohl(*ptr);
++ptr;
- tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+ tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->ts_off;

/* If PAWS failed, check it more carefully in slow path */
if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
@@ -4824,7 +4824,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct inet_connection_sock *icsk = inet_csk(sk);
int saved_clamp = tp->rx_opt.mss_clamp;

- tcp_parse_options(skb, &tp->rx_opt, 0);
+ tcp_parse_options(skb, &tp->rx_opt, 0, tp->ts_off);

if (th->ack) {
/* rfc793:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 63414ea..267eda9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1295,7 +1295,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tmp_opt.mss_clamp = 536;
tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;

- tcp_parse_options(skb, &tmp_opt, 0);
+ tcp_parse_options(skb, &tmp_opt, 0, 0);

if (want_cookie) {
tcp_clear_options(&tmp_opt);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b61b768..648f84e 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -102,7 +102,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,

tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
- tcp_parse_options(skb, &tmp_opt, 0);
+ tcp_parse_options(skb, &tmp_opt, 0, 0);

if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = tcptw->tw_ts_recent;
@@ -442,6 +442,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
inet_csk_reset_keepalive_timer(newsk,
keepalive_time_when(newtp));

+ newtp->ts_off = treq->ts_off;
newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
if (sysctl_tcp_fack)
@@ -502,7 +503,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,

tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
- tcp_parse_options(skb, &tmp_opt, 0);
+ tcp_parse_options(skb, &tmp_opt, 0, tcp_rsk(req)->ts_off);

if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ed750f9..81f1383 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -583,7 +583,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
(sysctl_flags & SYSCTL_FLAG_SACK),
(sysctl_flags & SYSCTL_FLAG_WSCALE),
tp->rx_opt.rcv_wscale,
- tcb->when,
+ tcb->when + tp->ts_off,
tp->rx_opt.ts_recent,

#ifdef CONFIG_TCP_MD5SIG
@@ -592,7 +592,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
NULL);
} else {
tcp_build_and_update_options((__be32 *)(th + 1),
- tp, tcb->when,
+ tp, tcb->when + tp->ts_off,
#ifdef CONFIG_TCP_MD5SIG
md5 ? &md5_hash_location :
#endif
@@ -2227,9 +2227,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
th->window = htons(min(req->rcv_wnd, 65535U));

TCP_SKB_CB(skb)->when = tcp_time_stamp;
+ tcp_rsk(req)->ts_off = 1000 - TCP_SKB_CB(skb)->when;
tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
- TCP_SKB_CB(skb)->when,
+ TCP_SKB_CB(skb)->when + tcp_rsk(req)->ts_off,
req->ts_recent,
(
#ifdef CONFIG_TCP_MD5SIG
@@ -2335,6 +2336,7 @@ int tcp_connect(struct sock *sk)

/* Send it off. */
TCP_SKB_CB(buff)->when = tcp_time_stamp;
+ tp->ts_off = 2000 - TCP_SKB_CB(buff)->when;
tp->retrans_stamp = TCP_SKB_CB(buff)->when;
skb_header_release(buff);
__tcp_add_write_queue_tail(sk, buff);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 12750f2..896ebd0 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1264,7 +1264,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;

- tcp_parse_options(skb, &tmp_opt, 0);
+ tcp_parse_options(skb, &tmp_opt, 0, 0);

tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb);
--
1.5.4

2008-02-15 17:39:58

by Glenn Griffin

[permalink] [raw]

Subject: [PATCH 2/3] Enable the use of TCP options with syncookies

Allow the use of extended tcp options when syncookies are in use and the
client supports tcp timestamps.

The general concept is to encode the tcp options as the initial timestamp
that we send to the requestor in our synack. We can then decode the
timestamp echo reply value on the returning ack.

This implementation encodes the following options in the timestamp,
snd_wscale, rcv_wscale, sack_ok, and also the necessary bits to calculate
the rtt, and ts_off accurately. Note that there are still 5 unused bits
that could be used for future options.

Signed-off-by: Glenn Griffin <[email protected]>
---
include/linux/tcp.h | 3 ++
include/net/tcp.h | 4 ++
net/ipv4/syncookies.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++--
net/ipv4/tcp_ipv4.c | 5 +--
net/ipv4/tcp_output.c | 10 +++++-
5 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 68387dc..f5a23e1 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -233,6 +233,9 @@ struct tcp_request_sock {
u32 rcv_isn;
u32 snt_isn;
u32 ts_off;
+#ifdef CONFIG_SYN_COOKIES
+ u8 want_cookie_ts;
+#endif
};

static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c935de7..42b36f5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -438,6 +438,7 @@ extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt);
extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
__u16 *mss);
+extern __u32 cookie_init_timestamp(struct request_sock *req);

/* tcp_output.c */

@@ -958,6 +959,9 @@ static inline void tcp_openreq_init(struct request_sock *req,
ireq->acked = 0;
ireq->ecn_ok = 0;
ireq->rmt_port = tcp_hdr(skb)->source;
+#ifdef CONFIG_SYN_COOKIES
+ tcp_rsk(req)->want_cookie_ts = 0;
+#endif
}

extern void tcp_enter_memory_pressure(void);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f470fe4..eb5d804 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -35,6 +35,9 @@ module_init(init_syncookies);
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)

+#define TSBITS 18
+#define TSMASK (((__u32)1 << TSBITS) - 1)
+
static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
u32 count, int c)
{
@@ -144,6 +147,23 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
jiffies / (HZ * 60), mssind);
}

+__u32 cookie_init_timestamp(struct request_sock *req)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ __u32 init_ts = 0;
+
+ if (ireq->wscale_ok) {
+ init_ts |= ireq->snd_wscale << TSBITS;
+ init_ts |= ireq->rcv_wscale << (4 + TSBITS);
+ }
+ init_ts |= ireq->sack_ok << (8 + TSBITS);
+
+ init_ts |= tcp_time_stamp & TSMASK;
+
+ return init_ts;
+}
+
+
/*
* This (misnamed) value is the age of syncookie which is permitted.
* Its ideal value should be dependent on TCP_TIMEOUT_INIT and
@@ -184,6 +204,52 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
return child;
}

+/* when syncookies are in effect and tcp timestamps are enabled we will store
+ * additional tcp options encoded in the timestamp value.
+ *
+ * The 18 lsb of tcp timestamp is stored in the 18 lsb of the tcp timestamp sent
+ *
+ * 18 bits should be enough to contain a very conservative 2*MSL on a 1000 HZ
+ * system
+ *
+ * The next 4 lsb are for snd_wscale
+ * The next 4 lsb are for rcv_wscale
+ * The next 1 lsb are for sack_ok
+ */
+static __u32 cookie_check_timestamp(__u32 ts,
+ struct tcp_options_received *tcp_opt)
+{
+ __u32 init_ts = ts & TSMASK;
+
+ /*
+ * check if tcp_time_stamp overflowed 18bits
+ * while waiting for response
+ */
+ if ((tcp_time_stamp & TSMASK) >= init_ts)
+ /* likely no overflow occurred */
+ init_ts |= tcp_time_stamp & (~TSMASK);
+ else
+ /*
+ * overflow occurred, but we can be sure it's only one bit
+ * since the cookie_check verifies the cookie was generated
+ * within the last 4 minutes
+ */
+ init_ts |= ((tcp_time_stamp >> TSBITS) - 1) << TSBITS;
+
+ tcp_opt->tstamp_ok = 1;
+ tcp_opt->snd_wscale = (ts >> TSBITS) & 0xf;
+ tcp_opt->rcv_wscale = (ts >> (TSBITS + 4)) & 0xf;
+ tcp_opt->sack_ok = (ts >> (TSBITS + 8)) & 0x1;
+
+ if (tcp_opt->sack_ok)
+ tcp_sack_reset(tcp_opt);
+
+ if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale)
+ tcp_opt->wscale_ok = 1;
+
+ return init_ts;
+}
+
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt)
{
@@ -197,6 +263,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
int mss;
struct rtable *rt;
__u8 rcv_wscale;
+ struct tcp_options_received tcp_opt;
+ __u32 xmit_ts = 0;

if (!sysctl_tcp_syncookies || !th->ack)
goto out;
@@ -209,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,

NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);

+ /* check for timestamp cookie support */
+ memset(&tcp_opt, 0, sizeof(tcp_opt));
+ tcp_parse_options(skb, &tcp_opt, 0, 0);
+
+ if (tcp_opt.saw_tstamp)
+ xmit_ts = cookie_check_timestamp(tcp_opt.rcv_tsecr, &tcp_opt);
+
ret = NULL;
req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
if (!req)
@@ -222,11 +297,18 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
treq = tcp_rsk(req);
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
+ treq->ts_off = xmit_ts ? tcp_opt.rcv_tsecr - xmit_ts : 0;
req->mss = mss;
ireq->rmt_port = th->source;
ireq->loc_addr = ip_hdr(skb)->daddr;
ireq->rmt_addr = ip_hdr(skb)->saddr;
ireq->opt = NULL;
+ ireq->snd_wscale = tcp_opt.snd_wscale;
+ ireq->rcv_wscale = tcp_opt.rcv_wscale;
+ ireq->sack_ok = tcp_opt.sack_ok;
+ ireq->wscale_ok = tcp_opt.wscale_ok;
+ ireq->tstamp_ok = tcp_opt.tstamp_ok;
+ req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;

/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -241,8 +323,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
}
}

- ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0;
- ireq->wscale_ok = ireq->sack_ok = 0;
req->expires = 0UL;
req->retrans = 0;

@@ -271,10 +351,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
}

/* Try to redo what tcp_v4_send_synack did. */
- req->window_clamp = dst_metric(&rt->u.dst, RTAX_WINDOW);
+ req->window_clamp = tcp_sk(sk)->window_clamp ?
+ : dst_metric(&rt->u.dst, RTAX_WINDOW);
+
tcp_select_initial_window(tcp_full_space(sk), req->mss,
&req->rcv_wnd, &req->window_clamp,
- 0, &rcv_wscale);
+ ireq->wscale_ok, &rcv_wscale);
/* BTW win scale with syncookies is 0 by definition */
ireq->rcv_wscale = rcv_wscale;

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 267eda9..e20f78e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1297,10 +1297,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)

tcp_parse_options(skb, &tmp_opt, 0, 0);

- if (want_cookie) {
+ if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
- tmp_opt.saw_tstamp = 0;
- }

if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
/* Some OSes (unknown ones, but I see them on web server, which
@@ -1328,6 +1326,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (want_cookie) {
#ifdef CONFIG_SYN_COOKIES
syn_flood_warning(skb);
+ tcp_rsk(req)->want_cookie_ts = tmp_opt.saw_tstamp;
#endif
isn = cookie_v4_init_sequence(sk, skb, &req->mss);
} else if (!isn) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 81f1383..0c900e0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2227,7 +2227,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
th->window = htons(min(req->rcv_wnd, 65535U));

TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tcp_rsk(req)->ts_off = 1000 - TCP_SKB_CB(skb)->when;
+
+#ifdef CONFIG_SYN_COOKIES
+ if (tcp_rsk(req)->want_cookie_ts)
+ tcp_rsk(req)->ts_off = cookie_init_timestamp(req) -
+ TCP_SKB_CB(skb)->when;
+ else
+#endif
+ tcp_rsk(req)->ts_off = 1000 - TCP_SKB_CB(skb)->when;
+
tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
TCP_SKB_CB(skb)->when + tcp_rsk(req)->ts_off,
--
1.5.4

2008-02-15 17:40:32

by Glenn Griffin

[permalink] [raw]

Subject: [PATCH 3/3] Add IPv6 Support to TCP SYN cookies

Support IPv6 syncookies

Signed-off-by: Glenn Griffin <[email protected]>
---
include/net/tcp.h | 10 ++
net/ipv4/syncookies.c | 9 +-
net/ipv4/tcp_input.c | 1 +
net/ipv4/tcp_minisocks.c | 2 +
net/ipv4/tcp_output.c | 1 +
net/ipv6/Makefile | 1 +
net/ipv6/syncookies.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++
net/ipv6/tcp_ipv6.c | 78 +++++++++----
8 files changed, 359 insertions(+), 27 deletions(-)
create mode 100644 net/ipv6/syncookies.c

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 42b36f5..18bc5f2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -29,6 +29,7 @@
#include <linux/skbuff.h>
#include <linux/dmaengine.h>
#include <linux/crypto.h>
+#include <linux/cryptohash.h>

#include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
@@ -434,11 +435,19 @@ extern int tcp_disconnect(struct sock *sk, int flags);
extern void tcp_unhash(struct sock *sk);

/* From syncookies.c */
+extern __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS];
extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt);
extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
__u16 *mss);
extern __u32 cookie_init_timestamp(struct request_sock *req);
+extern __u32 cookie_check_timestamp(__u32 ts,
+ struct tcp_options_received *tcp_opt);
+
+/* From net/ipv6/syncookies.c */
+extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
+extern __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb,
+ __u16 *mss);

/* tcp_output.c */

@@ -1336,6 +1345,7 @@ extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo);
extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);

extern struct request_sock_ops tcp_request_sock_ops;
+extern struct request_sock_ops tcp6_request_sock_ops;

extern int tcp_v4_destroy_sock(struct sock *sk);

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index eb5d804..4915d8d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -10,8 +10,6 @@
* 2 of the License, or (at your option) any later version.
*
* $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $
- *
- * Missing: IPv6 support.
*/

#include <linux/tcp.h>
@@ -23,7 +21,8 @@

extern int sysctl_tcp_syncookies;

-static __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS];
+__u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS];
+EXPORT_SYMBOL(syncookie_secret);

static __init int init_syncookies(void)
{
@@ -216,8 +215,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
* The next 4 lsb are for rcv_wscale
* The next 1 lsb are for sack_ok
*/
-static __u32 cookie_check_timestamp(__u32 ts,
- struct tcp_options_received *tcp_opt)
+__u32 cookie_check_timestamp(__u32 ts, struct tcp_options_received *tcp_opt)
{
__u32 init_ts = ts & TSMASK;

@@ -249,6 +247,7 @@ static __u32 cookie_check_timestamp(__u32 ts,

return init_ts;
}
+EXPORT_SYMBOL(cookie_check_timestamp);

struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct ip_options *opt)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 95fa8dc..018ffc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5326,6 +5326,7 @@ discard:

EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
EXPORT_SYMBOL(tcp_parse_options);
EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 648f84e..b4d4fb8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -35,6 +35,8 @@
#endif

int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
+EXPORT_SYMBOL(sysctl_tcp_syncookies);
+
int sysctl_tcp_abort_on_overflow __read_mostly;

struct inet_timewait_death_row tcp_death_row = {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0c900e0..e07c32f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2570,6 +2570,7 @@ void tcp_send_probe0(struct sock *sk)
}
}

+EXPORT_SYMBOL(tcp_select_initial_window);
EXPORT_SYMBOL(tcp_connect);
EXPORT_SYMBOL(tcp_make_synack);
EXPORT_SYMBOL(tcp_simple_retransmit);
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 24f3aa0..ae14617 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -16,6 +16,7 @@ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
+ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o

ipv6-objs += $(ipv6-y)

diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
new file mode 100644
index 0000000..9f64d9a
--- /dev/null
+++ b/net/ipv6/syncookies.c
@@ -0,0 +1,284 @@
+/*
+ * IPv6 Syncookies implementation for the Linux kernel
+ *
+ * Authors:
+ * Glenn Griffin <[email protected]>
+ *
+ * Based on IPv4 implementation by Andi Kleen
+ * linux/net/ipv4/syncookies.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/tcp.h>
+#include <linux/random.h>
+#include <linux/cryptohash.h>
+#include <linux/kernel.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+
+extern int sysctl_tcp_syncookies;
+extern __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS];
+
+#define COOKIEBITS 24 /* Upper bits store count */
+#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
+
+/*
+ * This table has to be sorted and terminated with (__u16)-1.
+ * XXX generate a better table.
+ * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
+ *
+ * Taken directly from ipv4 implementation.
+ * Should this list be modified for ipv6 use or is it close enough?
+ * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart
+ */
+static __u16 const msstab[] = {
+ 64 - 1,
+ 256 - 1,
+ 512 - 1,
+ 536 - 1,
+ 1024 - 1,
+ 1440 - 1,
+ 1460 - 1,
+ 4312 - 1,
+ (__u16)-1
+};
+/* The number doesn't include the -1 terminator */
+#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
+
+/*
+ * This (misnamed) value is the age of syncookie which is permitted.
+ * Its ideal value should be dependent on TCP_TIMEOUT_INIT and
+ * sysctl_tcp_retries1. It's a rather complicated formula (exponential
+ * backoff) to compute at runtime so it's currently hardcoded here.
+ */
+#define COUNTER_TRIES 4
+
+static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sock *child;
+
+ child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
+ if (child)
+ inet_csk_reqsk_queue_add(sk, req, child);
+ else
+ reqsk_free(req);
+
+ return child;
+}
+
+static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS];
+
+static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr,
+ __be16 sport, __be16 dport, u32 count, int c)
+{
+ __u32 *tmp = __get_cpu_var(cookie_scratch);
+
+ /*
+ * we have 320 bits of information to hash, copy in the remaining
+ * 192 bits required for sha_transform, from the syncookie_secret
+ * and overwrite the digest with the secret
+ */
+ memcpy(tmp + 10, syncookie_secret[c], 44);
+ memcpy(tmp, saddr, 16);
+ memcpy(tmp + 4, daddr, 16);
+ tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
+ tmp[9] = count;
+ sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5);
+
+ return tmp[17];
+}
+
+static __u32 secure_tcp_syn_cookie(struct in6_addr *saddr,
+ struct in6_addr *daddr,
+ __be16 sport, __be16 dport, __u32 sseq,
+ __u32 count, __u32 data)
+{
+ return (cookie_hash(saddr, daddr, sport, dport, 0, 0) +
+ sseq + (count << COOKIEBITS) +
+ ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data)
+ & COOKIEMASK));
+}
+
+static __u32 check_tcp_syn_cookie(__u32 cookie, struct in6_addr *saddr,
+ struct in6_addr *daddr, __be16 sport,
+ __be16 dport, __u32 sseq, __u32 count,
+ __u32 maxdiff)
+{
+ __u32 diff;
+
+ cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
+
+ diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS);
+ if (diff >= maxdiff)
+ return (__u32)-1;
+
+ return (cookie -
+ cookie_hash(saddr, daddr, sport, dport, count - diff, 1))
+ & COOKIEMASK;
+}
+
+__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ int mssind;
+ const __u16 mss = *mssp;
+
+ tcp_sk(sk)->last_synq_overflow = jiffies;
+
+ for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
+ ;
+ *mssp = msstab[mssind] + 1;
+
+ NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
+
+ return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
+ th->dest, ntohl(th->seq),
+ jiffies / (HZ * 60), mssind);
+}
+
+static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ __u32 seq = ntohl(th->seq) - 1;
+ __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
+ th->source, th->dest, seq,
+ jiffies / (HZ * 60), COUNTER_TRIES);
+
+ return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
+}
+
+struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct inet_request_sock *ireq;
+ struct inet6_request_sock *ireq6;
+ struct tcp_request_sock *treq;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcphdr *th = tcp_hdr(skb);
+ __u32 cookie = ntohl(th->ack_seq) - 1;
+ struct sock *ret = sk;
+ struct request_sock *req;
+ int mss;
+ struct dst_entry *dst;
+ __u8 rcv_wscale;
+ struct tcp_options_received tcp_opt;
+ __u32 xmit_ts = 0;
+
+ if (!sysctl_tcp_syncookies || !th->ack)
+ goto out;
+
+ if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
+ (mss = cookie_check(skb, cookie)) == 0) {
+ NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED);
+ goto out;
+ }
+
+ NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
+
+ /* check for timestamp cookie support */
+ memset(&tcp_opt, 0, sizeof(tcp_opt));
+ tcp_parse_options(skb, &tcp_opt, 0, 0);
+
+ if (tcp_opt.saw_tstamp)
+ xmit_ts = cookie_check_timestamp(tcp_opt.rcv_tsecr, &tcp_opt);
+
+ ret = NULL;
+ req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
+ if (!req)
+ goto out;
+
+ ireq = inet_rsk(req);
+ ireq6 = inet6_rsk(req);
+ treq = tcp_rsk(req);
+ ireq6->pktopts = NULL;
+
+ if (security_inet_conn_request(sk, skb, req)) {
+ reqsk_free(req);
+ goto out;
+ }
+
+ req->mss = mss;
+ ireq->rmt_port = th->source;
+ ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
+ if (ipv6_opt_accepted(sk, skb) ||
+ np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+ np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+ atomic_inc(&skb->users);
+ ireq6->pktopts = skb;
+ }
+
+ ireq6->iif = sk->sk_bound_dev_if;
+ /* So that link locals have meaning */
+ if (!sk->sk_bound_dev_if &&
+ ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq6->iif = inet6_iif(skb);
+
+ req->expires = 0UL;
+ req->retrans = 0;
+ ireq->snd_wscale = tcp_opt.snd_wscale;
+ ireq->rcv_wscale = tcp_opt.rcv_wscale;
+ ireq->sack_ok = tcp_opt.sack_ok;
+ ireq->wscale_ok = tcp_opt.wscale_ok;
+ ireq->tstamp_ok = tcp_opt.tstamp_ok;
+ req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
+ treq->rcv_isn = ntohl(th->seq) - 1;
+ treq->snt_isn = cookie;
+ treq->ts_off = xmit_ts ? tcp_opt.rcv_tsecr - xmit_ts : 0;
+
+ /*
+ * We need to lookup the dst_entry to get the correct window size.
+ * This is taken from tcp_v6_syn_recv_sock. Somebody please enlighten
+ * me if there is a preferred way.
+ */
+ {
+ struct in6_addr *final_p = NULL, final;
+ struct flowi fl;
+ memset(&fl, 0, sizeof(fl));
+ fl.proto = IPPROTO_TCP;
+ ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+ if (np->opt && np->opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+ ipv6_addr_copy(&final, &fl.fl6_dst);
+ ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ final_p = &final;
+ }
+ ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
+ fl.oif = sk->sk_bound_dev_if;
+ fl.fl_ip_dport = inet_rsk(req)->rmt_port;
+ fl.fl_ip_sport = inet_sk(sk)->sport;
+ security_req_classify_flow(req, &fl);
+ if (ip6_dst_lookup(sk, &dst, &fl)) {
+ reqsk_free(req);
+ goto out;
+ }
+ if (final_p)
+ ipv6_addr_copy(&fl.fl6_dst, final_p);
+ if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+ goto out;
+ }
+
+ req->window_clamp = tcp_sk(sk)->window_clamp ?
+ : dst_metric(dst, RTAX_WINDOW);
+
+ tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ &req->rcv_wnd, &req->window_clamp,
+ ireq->wscale_ok, &rcv_wscale);
+
+ ireq->rcv_wscale = rcv_wscale;
+
+ ret = get_cookie_sock(sk, skb, req, dst);
+
+out: return ret;
+}
+
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 896ebd0..7a48263 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -514,6 +514,20 @@ done:
return err;
}

+static inline void syn_flood_warning(struct sk_buff *skb)
+{
+#ifdef CONFIG_SYN_COOKIES
+ if (sysctl_tcp_syncookies)
+ printk(KERN_INFO
+ "TCPv6: Possible SYN flooding on port %d. "
+ "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
+ else
+#endif
+ printk(KERN_INFO
+ "TCPv6: Possible SYN flooding on port %d. "
+ "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
+}
+
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
if (inet6_rsk(req)->pktopts)
@@ -917,7 +931,7 @@ done_opts:
}
#endif

-static struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
+struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.family = AF_INET6,
.obj_size = sizeof(struct tcp6_request_sock),
.rtx_syn_ack = tcp_v6_send_synack,
@@ -1215,9 +1229,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
return NULL;
}

-#if 0 /*def CONFIG_SYN_COOKIES*/
+#ifdef CONFIG_SYN_COOKIES
if (!th->rst && !th->syn && th->ack)
- sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
+ sk = cookie_v6_check(sk, skb);
#endif
return sk;
}
@@ -1233,6 +1247,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
struct request_sock *req = NULL;
__u32 isn = TCP_SKB_CB(skb)->when;
+#ifdef CONFIG_SYN_COOKIES
+ int want_cookie = 0;
+#else
+#define want_cookie 0
+#endif

if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb);
@@ -1240,12 +1259,14 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
goto drop;

- /*
- * There are no SYN attacks on IPv6, yet...
- */
if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
if (net_ratelimit())
- printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
+ syn_flood_warning(skb);
+#ifdef CONFIG_SYN_COOKIES
+ if (sysctl_tcp_syncookies)
+ want_cookie = 1;
+ else
+#endif
goto drop;
}

@@ -1266,29 +1287,40 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)

tcp_parse_options(skb, &tmp_opt, 0, 0);

+ if (want_cookie && !tmp_opt.saw_tstamp)
+ tcp_clear_options(&tmp_opt);
+
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb);

treq = inet6_rsk(req);
ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
- TCP_ECN_create_request(req, tcp_hdr(skb));
treq->pktopts = NULL;
- if (ipv6_opt_accepted(sk, skb) ||
- np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
- np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
- atomic_inc(&skb->users);
- treq->pktopts = skb;
- }
- treq->iif = sk->sk_bound_dev_if;
+ if (!want_cookie)
+ TCP_ECN_create_request(req, tcp_hdr(skb));
+
+ if (want_cookie) {
+#ifdef CONFIG_SYN_COOKIES
+ tcp_rsk(req)->want_cookie_ts = tmp_opt.saw_tstamp;
+#endif
+ isn = cookie_v6_init_sequence(sk, skb, &req->mss);
+ } else if (!isn) {
+ if (ipv6_opt_accepted(sk, skb) ||
+ np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
+ np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
+ atomic_inc(&skb->users);
+ treq->pktopts = skb;
+ }
+ treq->iif = sk->sk_bound_dev_if;

- /* So that link locals have meaning */
- if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
- treq->iif = inet6_iif(skb);
+ /* So that link locals have meaning */
+ if (!sk->sk_bound_dev_if &&
+ ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ treq->iif = inet6_iif(skb);

- if (isn == 0)
isn = tcp_v6_init_sequence(skb);
+ }

tcp_rsk(req)->snt_isn = isn;

@@ -1297,8 +1329,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (tcp_v6_send_synack(sk, req, NULL))
goto drop;

- inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- return 0;
+ if (!want_cookie) {
+ inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ return 0;
+ }

drop:
if (req)
--
1.5.4

2008-02-18 07:33:08

by David Miller

[permalink] [raw]

Subject: Re: [PATCH 1/3] Support arbitrary initial TCP timestamps

From: Glenn Griffin <[email protected]>
Date: Fri, 15 Feb 2008 09:47:08 -0800

> Introduce the ability to send arbitrary initial tcp timestamps that are not
> tied directly to jiffies. The basic conecpt is every tcp_request_sock and
> tcp_sock now has a ts_off offset that represents the difference between
> tcp_time_stamp and the timestamp we send and expect to
> receive.
>
> This has the advantage of not divulging system information (uptime)
> depending on the policy chosen for the initial timestamps.
>
> A policy where ts_off is always set to zero should produce no change in
> behavior. The policy implemented is not intended for real use, but just as
> a simple example. A realistic example would probably be similar to the tcp
> init sequence generator.
>
> Signed-off-by: Glenn Griffin <[email protected]>

Adding yet another member to the already bloated tcp_sock structure to
implement this is too high a cost.

I would instead prefer that there be some global random number
calculated when the first TCP socket is created, and use that as a
global offset. You can even recompute it every few hours if you
like.

We do similar things already elsewhere.

2008-02-18 22:38:36

by Glenn Griffin

[permalink] [raw]

Subject: Re: [PATCH 1/3] Support arbitrary initial TCP timestamps

> Adding yet another member to the already bloated tcp_sock structure to
> implement this is too high a cost.

Yes, I was worried that would be deemed too high of a cost, but it was
the most efficient way I could think to accomplish what I wanted.

> I would instead prefer that there be some global random number
> calculated when the first TCP socket is created, and use that as a
> global offset. You can even recompute it every few hours if you
> like.

That would work fine if my mine purpose was to randomize the tcp
timestamp to mitigate the leak in information regarding uptime, but
despite the brief description, that's only a side effect of what I
intended to do. What I wanted was a way to be able to choose an initial
tcp timestamp for a particular connection that was not tied directly to
jiffies.

The two patches following this show my intended use case. I intend to
enhance syncookie support to allow it to support advanced tcp options
(sack and window scaling). Normally syncookies encode the bare minimum
state of a connection in the ISN they choose, but the 32bit ISN isn't
enough to encode advanced tcp options so you are left with a working but
crippled tcp stack during a synflood attack. If in addition to choosing
an ISN we are able to choose an initial tcp timestamp, we are then able
to encode an additional 32 bits of information that can contain more of
the advanced tcp options.

This stems from a discussion about implementing IPv6 support for
syncookies, and the main concern being that syncookies disabled too many
valuable tcp features to be relevant on modern systems. Many people
stood in opposition to that statement, but it did not seem as though a
general consensus was reached. http://lkml.org/lkml/2008/2/4/396

I'm always open to alternatives.

--Glenn

2008-02-19 17:54:47

by Glenn Griffin

[permalink] [raw]

Subject: Re: [PATCH 1/3] Support arbitrary initial TCP timestamps

> > Adding yet another member to the already bloated tcp_sock structure to
> > implement this is too high a cost.
>
> Yes, I was worried that would be deemed too high of a cost, but it was
> the most efficient way I could think to accomplish what I wanted.
>
> > I would instead prefer that there be some global random number
> > calculated when the first TCP socket is created, and use that as a
> > global offset. You can even recompute it every few hours if you
> > like.
>
> That would work fine if my mine purpose was to randomize the tcp
> timestamp to mitigate the leak in information regarding uptime, but
> despite the brief description, that's only a side effect of what I
> intended to do. What I wanted was a way to be able to choose an initial
> tcp timestamp for a particular connection that was not tied directly to
> jiffies.
>
> The two patches following this show my intended use case. I intend to
> enhance syncookie support to allow it to support advanced tcp options
> (sack and window scaling). Normally syncookies encode the bare minimum
> state of a connection in the ISN they choose, but the 32bit ISN isn't
> enough to encode advanced tcp options so you are left with a working but
> crippled tcp stack during a synflood attack. If in addition to choosing
> an ISN we are able to choose an initial tcp timestamp, we are then able
> to encode an additional 32 bits of information that can contain more of
> the advanced tcp options.

Perhaps I should clarify. I don't see a way to implement the additional
syncookie enhancements without storing an offset in some type of
per-socket structure. Given that, is it still deemed too high of a cost?
Is enhancing syncookies not deemed important enough to warrant the
additional 4 bytes? What if there was an additional config variable to
support arbitrary/random tcp timestamps, and then syncookies only used
the additional options when that setting was chosen? Is there some
possiblity I'm missing that could get this feature in a form suitable
for inclusion? Thanks.

--Glenn