2010-01-10 13:03:05

by William Allen Simpson

[permalink] [raw]
Subject: query: redundant tcp header length checks?

On a related note, back on Nov 10th, Ilpo brought to my attention --
*hidden* inside the pskb_may_pull() -- the tcp header length is range
checked for being too short (skb->len < th->doff * 4).

tcp_v4_rcv()
...
1585 if (!pskb_may_pull(skb, th->doff * 4))
1586 goto discard_it;
1587

But the next comment in tcp_ipv4.c (missing from tcp_ipv6.c) says:

1588 /* An explanation is required here, I think.
1589 * Packet length and doff are validated by header prediction,
1590 * provided case of th->doff==0 is eliminated.
1591 * So, we defer the checks. */

That's not correct anymore, as the checks aren't deferred! But the
redundant deferred checks _are_ still present in tcp_input.c:

tcp_rcv_established()
...
5210 if (len <= tcp_header_len) {
5211 /* Bulk data transfer: sender */
5212 if (len == tcp_header_len) {
...
5229 } else { /* Header too small */
5230 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5231 goto discard;
5232 }

Also:

5325 slow_path:
5326 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5327 goto csum_error;

Apparently tcp_rcv_established() might be called through some other path
that requires this extra header length checking?

Its callers also have another reference, for example in tcp_ipv4.c:

2432 .backlog_rcv = tcp_v4_do_rcv,

And tcp_ipv6.c:

213 sk->sk_backlog_rcv = tcp_v4_do_rcv;

223 sk->sk_backlog_rcv = tcp_v6_do_rcv;

1302 newsk->sk_backlog_rcv = tcp_v4_do_rcv;

2073 .backlog_rcv = tcp_v6_do_rcv,

Can anybody explain these?


2010-01-12 10:05:29

by William Allen Simpson

[permalink] [raw]
Subject: [PATCH] tcp: harmonize tcp_vx_rcv header length assumptions

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 65b8ebf..7ea2cff 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1559,6 +1559,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}

+ /* Transformed? re-check too short header and options before checksum */
if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
goto csum_err;

@@ -1601,14 +1602,14 @@ csum_err:
}

/*
- * From tcp_input.c
+ * Called by ip_input.c: ip_local_deliver_finish()
*/
-
int tcp_v4_rcv(struct sk_buff *skb)
{
const struct iphdr *iph;
struct tcphdr *th;
struct sock *sk;
+ int tcp_header_len;
int ret;
struct net *net = dev_net(skb->dev);

@@ -1618,20 +1619,23 @@ int tcp_v4_rcv(struct sk_buff *skb)
/* Count it even if it's bad */
TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);

+ /* Check too short header */
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;

- th = tcp_hdr(skb);
-
- if (th->doff < sizeof(struct tcphdr) / 4)
+ /* Check bad doff, compare doff directly to constant value */
+ tcp_header_len = tcp_hdr(skb)->doff;
+ if (tcp_header_len < (sizeof(struct tcphdr) / 4))
goto bad_packet;
- if (!pskb_may_pull(skb, th->doff * 4))
+
+ /* Check too short header and options */
+ tcp_header_len *= 4;
+ if (!pskb_may_pull(skb, tcp_header_len))
goto discard_it;

- /* An explanation is required here, I think.
- * Packet length and doff are validated by header prediction,
- * provided case of th->doff==0 is eliminated.
- * So, we defer the checks. */
+ /* Packet length and doff are validated by header prediction,
+ * provided case of th->doff == 0 is eliminated (above).
+ */
if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
goto bad_packet;

@@ -1639,7 +1643,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
iph = ip_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
- skb->len - th->doff * 4);
+ skb->len - tcp_header_len);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
TCP_SKB_CB(skb)->flags = iph->tos;
@@ -1682,14 +1686,14 @@ process:
bh_unlock_sock(sk);

sock_put(sk);
-
return ret;

no_tcp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;

- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+ /* Transformed? re-check too short header and options before checksum */
+ if (skb->len < tcp_header_len_th(th) || tcp_checksum_complete(skb)) {
bad_packet:
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
@@ -1711,18 +1715,20 @@ do_time_wait:
goto discard_it;
}

- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+ /* Transformed? re-check too short header and options before checksum */
+ if (skb->len < tcp_header_len_th(th) || tcp_checksum_complete(skb)) {
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
+
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
&tcp_hashinfo,
iph->daddr, th->dest,
inet_iif(skb));
- if (sk2) {
+ if (sk2 != NULL) {
inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
inet_twsk_put(inet_twsk(sk));
sk = sk2;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index febfd59..268bc8a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1594,6 +1594,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}

+ /* Transformed? re-check too short header and options before checksum */
if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
goto csum_err;

@@ -1664,38 +1665,47 @@ ipv6_pktoptions:
return 0;
}

+/*
+ * Called by ip6_input.c: ip6_input_finish()
+ */
static int tcp_v6_rcv(struct sk_buff *skb)
{
struct tcphdr *th;
struct sock *sk;
+ int tcp_header_len;
int ret;
struct net *net = dev_net(skb->dev);

if (skb->pkt_type != PACKET_HOST)
goto discard_it;

- /*
- * Count it even if it's bad.
- */
+ /* Count it even if it's bad */
TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);

+ /* Check too short header */
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;

- th = tcp_hdr(skb);
-
- if (th->doff < sizeof(struct tcphdr)/4)
+ /* Check bad doff, compare doff directly to constant value */
+ tcp_header_len = tcp_hdr(skb)->doff;
+ if (tcp_header_len < (sizeof(struct tcphdr) / 4))
goto bad_packet;
- if (!pskb_may_pull(skb, th->doff*4))
+
+ /* Check too short header and options */
+ tcp_header_len *= 4;
+ if (!pskb_may_pull(skb, tcp_header_len))
goto discard_it;

+ /* Packet length and doff are validated by header prediction,
+ * provided case of th->doff == 0 is eliminated (above).
+ */
if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
goto bad_packet;

th = tcp_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
- skb->len - th->doff*4);
+ skb->len - tcp_header_len);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
@@ -1743,7 +1753,8 @@ no_tcp_socket:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;

- if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ /* Transformed? re-check too short header and options before checksum */
+ if (skb->len < tcp_header_len_th(th) || tcp_checksum_complete(skb)) {
bad_packet:
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
@@ -1751,11 +1762,7 @@ bad_packet:
}

discard_it:
-
- /*
- * Discard frame
- */
-
+ /* Discard frame. */
kfree_skb(skb);
return 0;

@@ -1769,24 +1776,23 @@ do_time_wait:
goto discard_it;
}

- if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ /* Transformed? re-check too short header and options before checksum */
+ if (skb->len < tcp_header_len_th(th) || tcp_checksum_complete(skb)) {
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}

switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
- case TCP_TW_SYN:
- {
- struct sock *sk2;
-
- sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
- &ipv6_hdr(skb)->daddr,
- ntohs(th->dest), inet6_iif(skb));
+ case TCP_TW_SYN: {
+ struct sock *sk2 = inet6_lookup_listener(dev_net(skb->dev),
+ &tcp_hashinfo,
+ &ipv6_hdr(skb)->daddr,
+ ntohs(th->dest),
+ inet6_iif(skb));
if (sk2 != NULL) {
- struct inet_timewait_sock *tw = inet_twsk(sk);
- inet_twsk_deschedule(tw, &tcp_death_row);
- inet_twsk_put(tw);
+ inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_put(inet_twsk(sk));
sk = sk2;
goto process;
}
--
1.6.3.3


Attachments:
len_th+3v3.patch (6.75 kB)

2010-01-12 10:46:13

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH] tcp: harmonize tcp_vx_rcv header length assumptions

Le 12/01/2010 11:05, William Allen Simpson a ?crit :
> Harmonize tcp_v4_rcv() and tcp_v6_rcv() -- better document tcp doff and
> header length assumptions.
>
> Reduces multiply/shifts, marginally improving speed.
>
> Retains redundant tcp header length checks before checksumming.
>
> Stand-alone patch, originally developed for TCPCT.
>
> Requires:
> net: tcp_header_len_th and tcp_option_len_th
>
> Signed-off-by: [email protected]
> ---
> net/ipv4/tcp_ipv4.c | 36 +++++++++++++++++++-------------
> net/ipv6/tcp_ipv6.c | 56
> ++++++++++++++++++++++++++++----------------------
> 2 files changed, 52 insertions(+), 40 deletions(-)

Seems fine, but :

1) What means the "Transformed ?" you wrote several times ?

2) This part :

- th = tcp_hdr(skb);
-
- if (th->doff < sizeof(struct tcphdr)/4)
+ /* Check bad doff, compare doff directly to constant value */
+ tcp_header_len = tcp_hdr(skb)->doff;
+ if (tcp_header_len < (sizeof(struct tcphdr) / 4))
goto bad_packet;
- if (!pskb_may_pull(skb, th->doff*4))
+
+ /* Check too short header and options */
+ tcp_header_len *= 4;
+ if (!pskb_may_pull(skb, tcp_header_len))
goto discard_it;


could be : (no need for 4 multiplies/divides)

tcp_header_len = tcp_header_len_th(tcp_hdr(skb));
if (tcp_header_len < sizeof(struct tcphdr))
goto bad_packet;
/* Check too short header and options */
if (!pskb_may_pull(skb, tcp_header_len))
goto discard_it;

2010-01-12 17:11:39

by William Allen Simpson

[permalink] [raw]
Subject: Re: [PATCH] tcp: harmonize tcp_vx_rcv header length assumptions

Eric Dumazet wrote:
> Seems fine, but :
>
> 1) What means the "Transformed ?" you wrote several times ?
>
The only reason that I've been able to figure out for having the
skb->len test in those places is the preceding xfrm4_policy_check()
or xfrm6_policy_check() must be able to shrink the skb->len?

When I did the original transform stuff in other code circa 1995, I'd
envisioned IP length or link layer (PPP) length shrinking (removing
padding after block ciphers) -- and apparently this implementation
extended that concept to transport layer, too.

Personally, I'd prefer that a single test be placed in the appropriate
spot in the xfrm* functions, instead. Anybody know where?

2010-01-12 17:14:24

by William Allen Simpson

[permalink] [raw]
Subject: Re: [PATCH] tcp: harmonize tcp_vx_rcv header length assumptions

Eric Dumazet wrote:
> 2) This part :
>
> - th = tcp_hdr(skb);
> -
> - if (th->doff < sizeof(struct tcphdr)/4)
> + /* Check bad doff, compare doff directly to constant value */
> + tcp_header_len = tcp_hdr(skb)->doff;
> + if (tcp_header_len < (sizeof(struct tcphdr) / 4))
> goto bad_packet;
> - if (!pskb_may_pull(skb, th->doff*4))
> +
> + /* Check too short header and options */
> + tcp_header_len *= 4;
> + if (!pskb_may_pull(skb, tcp_header_len))
> goto discard_it;
>
>
> could be : (no need for 4 multiplies/divides)
>
> tcp_header_len = tcp_header_len_th(tcp_hdr(skb));
> if (tcp_header_len < sizeof(struct tcphdr))
> goto bad_packet;
> /* Check too short header and options */
> if (!pskb_may_pull(skb, tcp_header_len))
> goto discard_it;
>
Actually, tcp_header_len_th() has a multiply by 4 in it, too.

My code exactly parallels the existing code. That is slightly faster
for bad packets, as it does the raw comparison first, saving the
multiply by 4 until it has passed that test.

My change just saves a store (and maybe a register load) over the
existing code. This is supposed to be "fast path" after all....

Also adds comments that explain what we're doing. As I mentioned
earlier in the thread:

... back on Nov 10th, Ilpo brought to my attention --
*hidden* inside the pskb_may_pull() -- the tcp header length is range
checked for being too short (skb->len < th->doff * 4).

Anytime I find the code isn't obvious to me, I figure the next person
will benefit from some more comments....

(Of course, this patch also fixes existing comments that are not true!)

2010-01-13 09:50:18

by William Allen Simpson

[permalink] [raw]
Subject: Re: [PATCH] tcp: harmonize tcp_vx_rcv header length assumptions

William Allen Simpson wrote:
> Eric Dumazet wrote:
>> Seems fine, but :
>>
>> 1) What means the "Transformed ?" you wrote several times ?
>>
> The only reason that I've been able to figure out for having the
> skb->len test in those places is the preceding xfrm4_policy_check()
> or xfrm6_policy_check() must be able to shrink the skb->len?
>
> When I did the original transform stuff in other code circa 1995, I'd
> envisioned IP length or link layer (PPP) length shrinking (removing
> padding after block ciphers) -- and apparently this implementation
> extended that concept to transport layer, too.
>
> Personally, I'd prefer that a single test be placed in the appropriate
> spot in the xfrm* functions, instead. Anybody know where?
>
I've spent another day staring at the xfrm* functions. Since nobody
familiar with them has answered my recent questions, it seems I'm on my
own.... So, here are my conclusions:

The current xfrm* code shouldn't change the TCP header. If anything did,
the current tests wouldn't work anyway. For example:

tcp_ipv4:
tcp_v4_rcv()
...
1645 no_tcp_socket:
1646 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1647 goto discard_it;
1648
1649 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {

This code depends on the *th pointer remaining unchanged. A pullup or
skb clone could make the pointer invalid.

Likewise, the checksum occurs after the xfrm* code. Thus, the xfrm*
cannot alter, decrypt, or tunnel the input data.

Therefore, I'll remove those existing extraneous skb->len tests. And
I'll add these criteria to the include/net/xfrm.h for future reference.

2010-01-13 10:48:27

by William Allen Simpson

[permalink] [raw]
Subject: [PATCH v4] tcp: harmonize tcp_vx_rcv header length assumptions

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6d85861..89c5465 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -975,6 +975,13 @@ xfrm_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x, unsigned short
}

#ifdef CONFIG_XFRM
+/*
+ * For transport, the policy is checked before the presumed more expensive
+ * checksum. The transport header has already been checked for size, and is
+ * guaranteed to be contiguous. These policies must not alter the header or
+ * its position in the buffer, and should not shorten the buffer length
+ * without ensuring the length remains >= the header size.
+ */
extern int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, unsigned short family);

static inline int __xfrm_policy_check2(struct sock *sk, int dir,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 65b8ebf..b3bd25f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1559,7 +1559,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}

- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb))
goto csum_err;

if (sk->sk_state == TCP_LISTEN) {
@@ -1601,14 +1602,14 @@ csum_err:
}

/*
- * From tcp_input.c
+ * Called by ip_input.c: ip_local_deliver_finish()
*/
-
int tcp_v4_rcv(struct sk_buff *skb)
{
const struct iphdr *iph;
struct tcphdr *th;
struct sock *sk;
+ int tcp_header_len;
int ret;
struct net *net = dev_net(skb->dev);

@@ -1618,20 +1619,23 @@ int tcp_v4_rcv(struct sk_buff *skb)
/* Count it even if it's bad */
TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);

+ /* Check too short header */
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;

- th = tcp_hdr(skb);
-
- if (th->doff < sizeof(struct tcphdr) / 4)
+ /* Check bad doff, compare doff directly to constant value */
+ tcp_header_len = tcp_hdr(skb)->doff;
+ if (tcp_header_len < (sizeof(struct tcphdr) / 4))
goto bad_packet;
- if (!pskb_may_pull(skb, th->doff * 4))
+
+ /* Check too short header and options */
+ tcp_header_len *= 4;
+ if (!pskb_may_pull(skb, tcp_header_len))
goto discard_it;

- /* An explanation is required here, I think.
- * Packet length and doff are validated by header prediction,
- * provided case of th->doff==0 is eliminated.
- * So, we defer the checks. */
+ /* Packet length and doff are validated by header prediction,
+ * provided case of th->doff == 0 is eliminated (above).
+ */
if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
goto bad_packet;

@@ -1639,7 +1643,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
iph = ip_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
- skb->len - th->doff * 4);
+ skb->len - tcp_header_len);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
TCP_SKB_CB(skb)->flags = iph->tos;
@@ -1682,14 +1686,14 @@ process:
bh_unlock_sock(sk);

sock_put(sk);
-
return ret;

no_tcp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;

- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb)) {
bad_packet:
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
@@ -1711,18 +1715,20 @@ do_time_wait:
goto discard_it;
}

- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb)) {
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
+
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
&tcp_hashinfo,
iph->daddr, th->dest,
inet_iif(skb));
- if (sk2) {
+ if (sk2 != NULL) {
inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
inet_twsk_put(inet_twsk(sk));
sk = sk2;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index febfd59..1439192 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1594,7 +1594,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}

- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb))
goto csum_err;

if (sk->sk_state == TCP_LISTEN) {
@@ -1664,38 +1665,47 @@ ipv6_pktoptions:
return 0;
}

+/*
+ * Called by ip6_input.c: ip6_input_finish()
+ */
static int tcp_v6_rcv(struct sk_buff *skb)
{
struct tcphdr *th;
struct sock *sk;
+ int tcp_header_len;
int ret;
struct net *net = dev_net(skb->dev);

if (skb->pkt_type != PACKET_HOST)
goto discard_it;

- /*
- * Count it even if it's bad.
- */
+ /* Count it even if it's bad */
TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);

+ /* Check too short header */
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;

- th = tcp_hdr(skb);
-
- if (th->doff < sizeof(struct tcphdr)/4)
+ /* Check bad doff, compare doff directly to constant value */
+ tcp_header_len = tcp_hdr(skb)->doff;
+ if (tcp_header_len < (sizeof(struct tcphdr) / 4))
goto bad_packet;
- if (!pskb_may_pull(skb, th->doff*4))
+
+ /* Check too short header and options */
+ tcp_header_len *= 4;
+ if (!pskb_may_pull(skb, tcp_header_len))
goto discard_it;

+ /* Packet length and doff are validated by header prediction,
+ * provided case of th->doff == 0 is eliminated (above).
+ */
if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
goto bad_packet;

th = tcp_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
- skb->len - th->doff*4);
+ skb->len - tcp_header_len);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
@@ -1743,7 +1753,8 @@ no_tcp_socket:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;

- if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb)) {
bad_packet:
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
@@ -1751,11 +1762,7 @@ bad_packet:
}

discard_it:
-
- /*
- * Discard frame
- */
-
+ /* Discard frame. */
kfree_skb(skb);
return 0;

@@ -1769,24 +1776,23 @@ do_time_wait:
goto discard_it;
}

- if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb)) {
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}

switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
- case TCP_TW_SYN:
- {
- struct sock *sk2;
-
- sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
- &ipv6_hdr(skb)->daddr,
- ntohs(th->dest), inet6_iif(skb));
+ case TCP_TW_SYN: {
+ struct sock *sk2 = inet6_lookup_listener(dev_net(skb->dev),
+ &tcp_hashinfo,
+ &ipv6_hdr(skb)->daddr,
+ ntohs(th->dest),
+ inet6_iif(skb));
if (sk2 != NULL) {
- struct inet_timewait_sock *tw = inet_twsk(sk);
- inet_twsk_deschedule(tw, &tcp_death_row);
- inet_twsk_put(tw);
+ inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_put(inet_twsk(sk));
sk = sk2;
goto process;
}
--
1.6.3.3


Attachments:
len_th+3v4.patch (7.48 kB)

2010-01-13 11:56:28

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH v4] tcp: harmonize tcp_vx_rcv header length assumptions

On Wed, Jan 13, 2010 at 05:48:20AM -0500, William Allen Simpson wrote:
> Harmonize tcp_v4_rcv() and tcp_v6_rcv() -- better document tcp doff
> and header length assumptions.
>
> Reduces multiply/shifts, marginally improving speed.
>
> Removes redundant tcp header length checks before checksumming.

I wonder if this actually improves performance on x86. On x86 and several
other architecture there's a addressing mode which allows to scale
numbers by small factors (like 4). So doing a *4 is very cheap.

That's likely cheaper than using another register for the scaled value,
especially on 32bit x86 which doesn't have many.

It's difficult to benchmark this code, but did you check that the code
shrinks after applying this patch at least?

-andi

2010-01-13 15:36:44

by William Allen Simpson

[permalink] [raw]
Subject: Re: [PATCH v4] tcp: harmonize tcp_vx_rcv header length assumptions

Andi Kleen wrote:
> It's difficult to benchmark this code, but did you check that the code
> shrinks after applying this patch at least?
>
Good question. Only on a gross scale with ls -l.

Before: 332612
After: 332384

I've just spent an hour Googling and then trying to decipher the make file
looking for a standard way to generate the -S (.s output), but no joy.

What make command would you suggest to get the interleaved asm?

2010-01-13 15:53:30

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH v4] tcp: harmonize tcp_vx_rcv header length assumptions

On Wed, Jan 13, 2010 at 10:36:39AM -0500, William Allen Simpson wrote:
> Andi Kleen wrote:
>> It's difficult to benchmark this code, but did you check that the code
>> shrinks after applying this patch at least?
>>
> Good question. Only on a gross scale with ls -l.
>
> Before: 332612
> After: 332384

size is better.

>
> I've just spent an hour Googling and then trying to decipher the make file
> looking for a standard way to generate the -S (.s output), but no joy.
>
> What make command would you suggest to get the interleaved asm?

make path/to/file.lst

(or .S -- you might need uptodate binutils if the lst listing is messed up)

-Andi

--
[email protected] -- Speaking for myself only.

2010-01-13 16:40:41

by Joe Perches

[permalink] [raw]
Subject: [PATCH] Makefile: Document ability to make file.lst and file.S

On Wed, 2010-01-13 at 16:53 +0100, Andi Kleen wrote:
> On Wed, Jan 13, 2010 at 10:36:39AM -0500, William Allen Simpson wrote:
> > What make command would you suggest to get the interleaved asm?
> make path/to/file.lst
> (or .S -- you might need uptodate binutils if the lst listing is messed up)

I didn't know that was possible.

Signed-off-by: Joe Perches <[email protected]>
---
diff --git a/Makefile b/Makefile
index 9f64552..4557d58 100644
--- a/Makefile
+++ b/Makefile
@@ -1248,7 +1248,9 @@ help:
@echo ' firmware_install- Install all firmware to INSTALL_FW_PATH'
@echo ' (default: $$(INSTALL_MOD_PATH)/lib/firmware)'
@echo ' dir/ - Build all files in dir and below'
- @echo ' dir/file.[ois] - Build specified target only'
+ @echo ' dir/file.[oisS] - Build specified target only'
+ @echo ' dir/file.lst - Build specified mixed source/assembly target only'
+ @echo ' (needs a previous build and System.map)'
@echo ' dir/file.ko - Build module including final link'
@echo ' modules_prepare - Set up for building external modules'
@echo ' tags/TAGS - Generate tags file for editors'


2010-01-13 17:14:18

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] Makefile: Document ability to make file.lst and file.S

On Wed, Jan 13, 2010 at 08:40:35AM -0800, Joe Perches wrote:
> On Wed, 2010-01-13 at 16:53 +0100, Andi Kleen wrote:
> > On Wed, Jan 13, 2010 at 10:36:39AM -0500, William Allen Simpson wrote:
> > > What make command would you suggest to get the interleaved asm?
> > make path/to/file.lst
> > (or .S -- you might need uptodate binutils if the lst listing is messed up)
>
> I didn't know that was possible.

The trap is that it doesn't work well on most systems with too
old binutils (unless the gcc is really ancient)

-Andi

2010-01-13 17:31:47

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] Makefile: Document ability to make file.lst and file.S

On Wed, 2010-01-13 at 18:14 +0100, Andi Kleen wrote:
> On Wed, Jan 13, 2010 at 08:40:35AM -0800, Joe Perches wrote:
> > On Wed, 2010-01-13 at 16:53 +0100, Andi Kleen wrote:
> > > On Wed, Jan 13, 2010 at 10:36:39AM -0500, William Allen Simpson wrote:
> > > > What make command would you suggest to get the interleaved asm?
> > > make path/to/file.lst
> > > (or .S -- you might need uptodate binutils if the lst listing is messed up)
> > I didn't know that was possible.
> The trap is that it doesn't work well on most systems with too
> old binutils (unless the gcc is really ancient)

Then that might as well go into the help.

Signed-off-by: Joe Perches <[email protected]>
---
diff --git a/Makefile b/Makefile
index 9f64552..484cff4 100644
--- a/Makefile
+++ b/Makefile
@@ -1248,7 +1248,9 @@ help:
@echo ' firmware_install- Install all firmware to INSTALL_FW_PATH'
@echo ' (default: $$(INSTALL_MOD_PATH)/lib/firmware)'
@echo ' dir/ - Build all files in dir and below'
- @echo ' dir/file.[ois] - Build specified target only'
+ @echo ' dir/file.[oisS] - Build specified target only'
+ @echo ' dir/file.lst - Build specified mixed source/assembly target only'
+ @echo ' (requires a recent binutils and recent build (System.map))'
@echo ' dir/file.ko - Build module including final link'
@echo ' modules_prepare - Set up for building external modules'
@echo ' tags/TAGS - Generate tags file for editors'

2010-01-13 19:49:58

by William Allen Simpson

[permalink] [raw]
Subject: Re: [PATCH v4] tcp: harmonize tcp_vx_rcv header length assumptions

Andi Kleen wrote:
> On Wed, Jan 13, 2010 at 10:36:39AM -0500, William Allen Simpson wrote:
>> What make command would you suggest to get the interleaved asm?
>
> make path/to/file.lst
>
Thank you. So simple. I wish I'd asked you first! (Or that it was
in the README or at kernelnewbies or something.)

The short answer is yes, it saves a bit more than I'd expected!

Of course, much of the savings is due to the elimination of the
various skb->len tests. But the initial code is hit more often.

My obvious and simple load of doff, save, then multiply by 4, save,
has 2 stores into tcp_header_len -- versus 1 old store into th:

- th = tcp_hdr(skb);
-
- if (th->doff < sizeof(struct tcphdr) / 4)
+ /* Check bad doff, compare doff directly to constant value */
+ tcp_header_len = tcp_hdr(skb)->doff;
+ if (tcp_header_len < (sizeof(struct tcphdr) / 4))
goto bad_packet;
- if (!pskb_may_pull(skb, th->doff * 4))
+
+ /* Check too short header and options */
+ tcp_header_len *= 4;
+ if (!pskb_may_pull(skb, tcp_header_len))
goto discard_it;

Existing tcp_v4_rcv() initially has 3 extra loads, a shift, and several
register swaps compared to mine.

Eric's suggestion to use an immediate multiply eliminates my store,
but that turns out to be at the cost of many additional indexed loads!

In my version, the compiler has cleverly kept tcp_hdr(skb) in %edi.
Eric's idea ends up using %edi for tcp_header_len, so it recalculates
tcp_hdr(skb) later, and temporarily stores it on the stack, requiring
%edx for the loads and stores, and preventing the usage of %dl for
comparisons, making it about 3 loads and 3 stores and a large number of
other instructions longer....

My later savings is even better, completely obviating my 1 store.
For example:

==old==
th = tcp_hdr(skb);
iph = ip_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
c04f2621: 8d 43 20 lea 0x20(%ebx),%eax
c04f2624: 8b 4f 04 mov 0x4(%edi),%ecx
c04f2627: 0f c9 bswap %ecx
c04f2629: 89 4d e8 mov %ecx,-0x18(%ebp)
c04f262c: 89 48 18 mov %ecx,0x18(%eax)
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
c04f262f: 0f b6 4f 0d movzbl 0xd(%edi),%ecx
c04f2633: 89 ca mov %ecx,%edx
c04f2635: d0 ea shr %dl
c04f2637: 83 e2 01 and $0x1,%edx
c04f263a: 89 55 e4 mov %edx,-0x1c(%ebp)
c04f263d: 89 ca mov %ecx,%edx
c04f263f: 0f b6 4f 0c movzbl 0xc(%edi),%ecx
c04f2643: 83 e2 01 and $0x1,%edx
c04f2646: 03 55 e4 add -0x1c(%ebp),%edx
c04f2649: 03 53 50 add 0x50(%ebx),%edx
c04f264c: c0 e9 04 shr $0x4,%cl
c04f264f: 0f b6 c9 movzbl %cl,%ecx
c04f2652: c1 e1 02 shl $0x2,%ecx
c04f2655: 29 ca sub %ecx,%edx
c04f2657: 03 55 e8 add -0x18(%ebp),%edx
c04f265a: 89 50 1c mov %edx,0x1c(%eax)
c04f265d: 8b 57 08 mov 0x8(%edi),%edx
skb->len - th->doff * 4);

==new==
th = tcp_hdr(skb);
iph = ip_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
c04f25f2: 8d 43 20 lea 0x20(%ebx),%eax
c04f25f5: 8b 4f 04 mov 0x4(%edi),%ecx
c04f25f8: 0f c9 bswap %ecx
c04f25fa: 89 48 18 mov %ecx,0x18(%eax)
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
c04f25fd: 0f b6 57 0d movzbl 0xd(%edi),%edx
c04f2601: d0 ea shr %dl
c04f2603: 83 e2 01 and $0x1,%edx
c04f2606: 89 55 e0 mov %edx,-0x20(%ebp)
c04f2609: 0f b6 57 0d movzbl 0xd(%edi),%edx
c04f260d: 83 e2 01 and $0x1,%edx
c04f2610: 03 55 e0 add -0x20(%ebp),%edx
c04f2613: 03 53 50 add 0x50(%ebx),%edx
c04f2616: 2b 55 e8 sub -0x18(%ebp),%edx
c04f2619: 01 ca add %ecx,%edx
c04f261b: 89 50 1c mov %edx,0x1c(%eax)
c04f261e: 8b 57 08 mov 0x8(%edi),%edx
skb->len - tcp_header_len);

2010-01-13 19:51:36

by William Allen Simpson

[permalink] [raw]
Subject: Re: [PATCH] Makefile: Document ability to make file.lst and file.S

Thanks, Joe! Just the kind of thing I was wishing!

2010-01-13 20:19:41

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH v4] tcp: harmonize tcp_vx_rcv header length assumptions

> The short answer is yes, it saves a bit more than I'd expected!

Thanks for checking.

-Andi

2010-01-13 21:13:51

by William Allen Simpson

[permalink] [raw]
Subject: Re: [PATCH v4] tcp: harmonize tcp_vx_rcv header length assumptions

I did today's tests on tcp_ipv4, but part of this harmonization is to
make v4 and v6 match up better. As I was examining line by line, one of
the things that leaps out is that ipv4 has nf_reset(skb):

tcp_ipv4:
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
nf_reset(skb);

if (sk_filter(sk, skb))
goto discard_and_relse;

tcp_ipv6:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;

if (sk_filter(sk, skb))
goto discard_and_relse;

Does anybody know why? Should ipv6 have it? Or at least a comment
explaining the reasoning for the omission?

2010-01-14 01:03:14

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH v4] tcp: harmonize tcp_vx_rcv header length assumptions

On Wed, 2010-01-13 at 16:13 -0500, William Allen Simpson wrote:
> I did today's tests on tcp_ipv4, but part of this harmonization is to
> make v4 and v6 match up better. As I was examining line by line, one of
> the things that leaps out is that ipv4 has nf_reset(skb):
>
> tcp_ipv4:
> if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
> goto discard_and_relse;
> nf_reset(skb);
>
> if (sk_filter(sk, skb))
> goto discard_and_relse;
>
> tcp_ipv6:
> if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
> goto discard_and_relse;
>
> if (sk_filter(sk, skb))
> goto discard_and_relse;
>
> Does anybody know why? Should ipv6 have it? Or at least a comment
> explaining the reasoning for the omission?

You should ask the person that put the line in, but
it might be because no IPv6 NAT support exists.

$ git blame -L 1658,1658 net/ipv4/tcp_ipv4.c
b59c2701 (Patrick McHardy 2006-01-06 23:06:10 -0800 1658) nf_reset(skb);

And

$ git log -p -1 b59c2701
commit b59c270104f03960069596722fea70340579244d
Author: Patrick McHardy <[email protected]>
Date: Fri Jan 6 23:06:10 2006 -0800

[NETFILTER]: Keep conntrack reference until IPsec policy checks are done

Keep the conntrack reference until policy checks have been performed for
IPsec NAT support. The reference needs to be dropped before a packet is
queued to avoid having the conntrack module unloadable.

Signed-off-by: Patrick McHardy <[email protected]>
Signed-off-by: David S. Miller <[email protected]>




2010-01-14 03:26:10

by Cong Wang

[permalink] [raw]
Subject: Re: [PATCH] Makefile: Document ability to make file.lst and file.S

On Thu, Jan 14, 2010 at 1:31 AM, Joe Perches <[email protected]> wrote:
> On Wed, 2010-01-13 at 18:14 +0100, Andi Kleen wrote:
>> On Wed, Jan 13, 2010 at 08:40:35AM -0800, Joe Perches wrote:
>> > On Wed, 2010-01-13 at 16:53 +0100, Andi Kleen wrote:
>> > > On Wed, Jan 13, 2010 at 10:36:39AM -0500, William Allen Simpson wrote:
>> > > > What make command would you suggest to get the interleaved asm?
>> > > make path/to/file.lst
>> > > (or .S -- you might need uptodate binutils if the lst listing is messed up)
>> > I didn't know that was possible.
>> The trap is that it doesn't work well on most systems with too
>> old binutils (unless the gcc is really ancient)
>
> Then that might as well go into the help.
>
> Signed-off-by: Joe Perches <[email protected]>

Good patch,

Acked-by: WANG Cong <[email protected]>

> ---
> diff --git a/Makefile b/Makefile
> index 9f64552..484cff4 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -1248,7 +1248,9 @@ help:
>        @echo  '  firmware_install- Install all firmware to INSTALL_FW_PATH'
>        @echo  '                    (default: $$(INSTALL_MOD_PATH)/lib/firmware)'
>        @echo  '  dir/            - Build all files in dir and below'
> -       @echo  '  dir/file.[ois]  - Build specified target only'
> +       @echo  '  dir/file.[oisS] - Build specified target only'
> +       @echo  '  dir/file.lst    - Build specified mixed source/assembly target only'
> +       @echo  '                    (requires a recent binutils and recent build (System.map))'
>        @echo  '  dir/file.ko     - Build module including final link'
>        @echo  '  modules_prepare - Set up for building external modules'
>        @echo  '  tags/TAGS       - Generate tags file for editors'
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

2010-01-14 08:39:19

by Patrick McHardy

[permalink] [raw]
Subject: Re: [PATCH v4] tcp: harmonize tcp_vx_rcv header length assumptions

Joe Perches wrote:
> On Wed, 2010-01-13 at 16:13 -0500, William Allen Simpson wrote:
>> I did today's tests on tcp_ipv4, but part of this harmonization is to
>> make v4 and v6 match up better. As I was examining line by line, one of
>> the things that leaps out is that ipv4 has nf_reset(skb):
>>
>> tcp_ipv4:
>> if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
>> goto discard_and_relse;
>> nf_reset(skb);
>>
>> if (sk_filter(sk, skb))
>> goto discard_and_relse;
>>
>> tcp_ipv6:
>> if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
>> goto discard_and_relse;
>>
>> if (sk_filter(sk, skb))
>> goto discard_and_relse;
>>
>> Does anybody know why? Should ipv6 have it? Or at least a comment
>> explaining the reasoning for the omission?
>
> You should ask the person that put the line in, but
> it might be because no IPv6 NAT support exists.
>
> $ git blame -L 1658,1658 net/ipv4/tcp_ipv4.c
> b59c2701 (Patrick McHardy 2006-01-06 23:06:10 -0800 1658) nf_reset(skb);
>
> And
>
> $ git log -p -1 b59c2701
> commit b59c270104f03960069596722fea70340579244d
> Author: Patrick McHardy <[email protected]>
> Date: Fri Jan 6 23:06:10 2006 -0800
>
> [NETFILTER]: Keep conntrack reference until IPsec policy checks are done
>
> Keep the conntrack reference until policy checks have been performed for
> IPsec NAT support. The reference needs to be dropped before a packet is
> queued to avoid having the conntrack module unloadable.

In IPv6 it is released in ip6_input.c before the packet is handed to
the protocol handler.

2010-01-14 15:03:29

by William Allen Simpson

[permalink] [raw]
Subject: Re: [PATCH v4] tcp: harmonize tcp_vx_rcv header length assumptions

Patrick McHardy wrote:
> In IPv6 it is released in ip6_input.c before the packet is handed to
> the protocol handler.
>
Thanks, I made a note of it.

Thanks to Andi, I've been using his suggestion to compare the IPv4 and
IPv6 generated code. Another minor difference between them was IPv4
stored *iph.

So, I added *ip6h, and discovered that it made the code longer. Later
inline function invocations had also calculated it, so adding it merely
calculated it earlier and stored it. The code thrashes registers
making room for the additional variable. I'm guessing that the IPv6
folks had already looked at the issue.... Good work!

So, I tried removing *iph, and that shrunk the IPv4 code some, too.

Thanks to everybody for their comments and review.

2010-01-14 15:10:53

by William Allen Simpson

[permalink] [raw]
Subject: [PATCH v5] tcp: harmonize tcp_vx_rcv header length assumptions

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6d85861..89c5465 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -975,6 +975,13 @@ xfrm_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x, unsigned short
}

#ifdef CONFIG_XFRM
+/*
+ * For transport, the policy is checked before the presumed more expensive
+ * checksum. The transport header has already been checked for size, and is
+ * guaranteed to be contiguous. These policies must not alter the header or
+ * its position in the buffer, and should not shorten the buffer length
+ * without ensuring the length remains >= the header size.
+ */
extern int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, unsigned short family);

static inline int __xfrm_policy_check2(struct sock *sk, int dir,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 65b8ebf..0a76e41 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1559,7 +1559,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}

- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb))
goto csum_err;

if (sk->sk_state == TCP_LISTEN) {
@@ -1601,14 +1602,13 @@ csum_err:
}

/*
- * From tcp_input.c
+ * Called by ip_input.c: ip_local_deliver_finish()
*/
-
int tcp_v4_rcv(struct sk_buff *skb)
{
- const struct iphdr *iph;
struct tcphdr *th;
struct sock *sk;
+ int tcp_header_len;
int ret;
struct net *net = dev_net(skb->dev);

@@ -1618,31 +1618,33 @@ int tcp_v4_rcv(struct sk_buff *skb)
/* Count it even if it's bad */
TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);

+ /* Check too short header */
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;

- th = tcp_hdr(skb);
-
- if (th->doff < sizeof(struct tcphdr) / 4)
+ /* Check bad doff, compare doff directly to constant value */
+ tcp_header_len = tcp_hdr(skb)->doff;
+ if (tcp_header_len < (sizeof(struct tcphdr) / 4))
goto bad_packet;
- if (!pskb_may_pull(skb, th->doff * 4))
+
+ /* Check too short header and options */
+ tcp_header_len *= 4;
+ if (!pskb_may_pull(skb, tcp_header_len))
goto discard_it;

- /* An explanation is required here, I think.
- * Packet length and doff are validated by header prediction,
- * provided case of th->doff==0 is eliminated.
- * So, we defer the checks. */
+ /* Packet length and doff are validated by header prediction,
+ * provided case of th->doff == 0 is eliminated (above).
+ */
if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
goto bad_packet;

th = tcp_hdr(skb);
- iph = ip_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
- skb->len - th->doff * 4);
+ skb->len - tcp_header_len);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
- TCP_SKB_CB(skb)->flags = iph->tos;
+ TCP_SKB_CB(skb)->flags = ip_hdr(skb)->tos;
TCP_SKB_CB(skb)->sacked = 0;

sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
@@ -1682,14 +1684,14 @@ process:
bh_unlock_sock(sk);

sock_put(sk);
-
return ret;

no_tcp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;

- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb)) {
bad_packet:
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
@@ -1711,18 +1713,21 @@ do_time_wait:
goto discard_it;
}

- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb)) {
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
+
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
&tcp_hashinfo,
- iph->daddr, th->dest,
+ ip_hdr(skb)->daddr,
+ th->dest,
inet_iif(skb));
- if (sk2) {
+ if (sk2 != NULL) {
inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
inet_twsk_put(inet_twsk(sk));
sk = sk2;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index febfd59..b76939a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1594,7 +1594,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}

- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb))
goto csum_err;

if (sk->sk_state == TCP_LISTEN) {
@@ -1664,38 +1665,47 @@ ipv6_pktoptions:
return 0;
}

+/*
+ * Called by ip6_input.c: ip6_input_finish()
+ */
static int tcp_v6_rcv(struct sk_buff *skb)
{
struct tcphdr *th;
struct sock *sk;
+ int tcp_header_len;
int ret;
struct net *net = dev_net(skb->dev);

if (skb->pkt_type != PACKET_HOST)
goto discard_it;

- /*
- * Count it even if it's bad.
- */
+ /* Count it even if it's bad */
TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);

+ /* Check too short header */
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;

- th = tcp_hdr(skb);
-
- if (th->doff < sizeof(struct tcphdr)/4)
+ /* Check bad doff, compare doff directly to constant value */
+ tcp_header_len = tcp_hdr(skb)->doff;
+ if (tcp_header_len < (sizeof(struct tcphdr) / 4))
goto bad_packet;
- if (!pskb_may_pull(skb, th->doff*4))
+
+ /* Check too short header and options */
+ tcp_header_len *= 4;
+ if (!pskb_may_pull(skb, tcp_header_len))
goto discard_it;

+ /* Packet length and doff are validated by header prediction,
+ * provided case of th->doff == 0 is eliminated (above).
+ */
if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
goto bad_packet;

th = tcp_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
- skb->len - th->doff*4);
+ skb->len - tcp_header_len);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
@@ -1711,6 +1721,7 @@ process:

if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
+ /* nf_reset(skb); in ip6_input.c ip6_input_finish() */

if (sk_filter(sk, skb))
goto discard_and_relse;
@@ -1743,7 +1754,8 @@ no_tcp_socket:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;

- if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb)) {
bad_packet:
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
@@ -1751,11 +1763,7 @@ bad_packet:
}

discard_it:
-
- /*
- * Discard frame
- */
-
+ /* Discard frame. */
kfree_skb(skb);
return 0;

@@ -1769,24 +1777,23 @@ do_time_wait:
goto discard_it;
}

- if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ /* Assumes header and options unchanged since checksum_init() */
+ if (tcp_checksum_complete(skb)) {
TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}

switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
- case TCP_TW_SYN:
- {
- struct sock *sk2;
-
- sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
- &ipv6_hdr(skb)->daddr,
- ntohs(th->dest), inet6_iif(skb));
+ case TCP_TW_SYN: {
+ struct sock *sk2 = inet6_lookup_listener(dev_net(skb->dev),
+ &tcp_hashinfo,
+ &ipv6_hdr(skb)->daddr,
+ ntohs(th->dest),
+ inet6_iif(skb));
if (sk2 != NULL) {
- struct inet_timewait_sock *tw = inet_twsk(sk);
- inet_twsk_deschedule(tw, &tcp_death_row);
- inet_twsk_put(tw);
+ inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_put(inet_twsk(sk));
sk = sk2;
goto process;
}
--
1.6.3.3


Attachments:
len_th+3v5.patch (7.85 kB)

2010-01-18 12:30:04

by Michal Marek

[permalink] [raw]
Subject: Re: [PATCH] Makefile: Document ability to make file.lst and file.S

On 13.1.2010 18:31, Joe Perches wrote:
> Then that might as well go into the help.
>
> Signed-off-by: Joe Perches <[email protected]>
> ---
> diff --git a/Makefile b/Makefile
> index 9f64552..484cff4 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -1248,7 +1248,9 @@ help:
> @echo ' firmware_install- Install all firmware to INSTALL_FW_PATH'
> @echo ' (default: $$(INSTALL_MOD_PATH)/lib/firmware)'
> @echo ' dir/ - Build all files in dir and below'
> - @echo ' dir/file.[ois] - Build specified target only'
> + @echo ' dir/file.[oisS] - Build specified target only'
> + @echo ' dir/file.lst - Build specified mixed source/assembly target only'
> + @echo ' (requires a recent binutils and recent build (System.map))'

Applied, thanks.

Michal