In message <[email protected]> you write:
> A. Paul, look into your own code doing the same thing which you wrote before.
> Seems, you have lost the skill of copying skbs owned by sockets.
Argh... that explains the crashes, of course. You have a good memory!
> Actually, skbs owned by sockets should not be copied at all, if
> mangle does not involve change of data.
The NAT code is special: it will always mangle packet the same way, so
it doesn't *REALLY* matter if we mangle the original packet for local
output (if we change IP headers in different ways for retransmission
of the same packet, we would have much bigger problems).
> B. It is very strange that a copy happened in local output path.
> The problem which was supposed to be cured by this was at input.
My solution was too generic: *any* cloned skb was copied. Repairing
is simple.
Does this work for everyone? Particularly while running tcpdump?
Rusty.
--
Premature optmztion is rt of all evl. --DK
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.4.14-official/net/ipv4/netfilter/ip_fw_compat.c working-2.4.13-uml-proc/net/ipv4/netfilter/ip_fw_compat.c
--- linux-2.4.14-official/net/ipv4/netfilter/ip_fw_compat.c Sun Apr 29 06:17:11 2001
+++ working-2.4.13-uml-proc/net/ipv4/netfilter/ip_fw_compat.c Tue Nov 13 20:58:33 2001
@@ -78,11 +78,21 @@
{
int ret = FW_BLOCK;
u_int16_t redirpt;
+ struct sk_buff *nskb;
/* Assume worse case: any hook could change packet */
(*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED;
if ((*pskb)->ip_summed == CHECKSUM_HW)
(*pskb)->ip_summed = CHECKSUM_NONE;
+
+ /* Firewall rules can alter TOS: raw socket (tcpdump) may have
+ clone of incoming skb: don't disturb it --RR */
+ if (skb_cloned(*pskb) && !(*pskb)->sk) {
+ struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
+ if (!nskb)
+ return NF_DROP;
+ *pskb = nskb;
+ }
switch (hooknum) {
case NF_IP_PRE_ROUTING:
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.4.14-official/net/ipv4/netfilter/ip_nat_core.c working-2.4.13-uml-proc/net/ipv4/netfilter/ip_nat_core.c
--- linux-2.4.14-official/net/ipv4/netfilter/ip_nat_core.c Mon May 28 12:43:00 2001
+++ working-2.4.13-uml-proc/net/ipv4/netfilter/ip_nat_core.c Tue Nov 13 20:57:56 2001
@@ -734,6 +734,17 @@
synchronize_bh()) can vanish. */
READ_LOCK(&ip_nat_lock);
for (i = 0; i < info->num_manips; i++) {
+ /* raw socket (tcpdump) may have clone of incoming
+ skb: don't disturb it --RR */
+ if (skb_cloned(*pskb) && !(*pskb)->sk) {
+ struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
+ if (!nskb) {
+ READ_UNLOCK(&ip_nat_lock);
+ return NF_DROP;
+ }
+ *pskb = nskb;
+ }
+
if (info->manips[i].direction == dir
&& info->manips[i].hooknum == hooknum) {
DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n",
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.4.14-official/net/ipv4/netfilter/ipt_TCPMSS.c working-2.4.13-uml-proc/net/ipv4/netfilter/ipt_TCPMSS.c
--- linux-2.4.14-official/net/ipv4/netfilter/ipt_TCPMSS.c Thu Oct 25 11:29:50 2001
+++ working-2.4.13-uml-proc/net/ipv4/netfilter/ipt_TCPMSS.c Tue Nov 13 20:59:42 2001
@@ -44,11 +44,22 @@
{
const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
struct tcphdr *tcph;
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph;
u_int16_t tcplen, newtotlen, oldval, newmss;
unsigned int i;
u_int8_t *opt;
+ struct sk_buff *nskb;
+ /* raw socket (tcpdump) may have clone of incoming skb: don't
+ disturb it --RR */
+ if (skb_cloned(*pskb) && !(*pskb)->sk) {
+ struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
+ if (!nskb)
+ return NF_DROP;
+ *pskb = nskb;
+ }
+
+ iph = (*pskb)->nh.iph;
tcplen = (*pskb)->len - iph->ihl*4;
tcph = (void *)iph + iph->ihl*4;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.4.14-official/net/ipv4/netfilter/ipt_TOS.c working-2.4.13-uml-proc/net/ipv4/netfilter/ipt_TOS.c
--- linux-2.4.14-official/net/ipv4/netfilter/ipt_TOS.c Thu Oct 25 11:29:50 2001
+++ working-2.4.13-uml-proc/net/ipv4/netfilter/ipt_TOS.c Tue Nov 13 21:02:20 2001
@@ -19,7 +19,18 @@
const struct ipt_tos_target_info *tosinfo = targinfo;
if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
+ struct sk_buff *nskb;
u_int16_t diffs[2];
+
+ /* raw socket (tcpdump) may have clone of incoming
+ skb: don't disturb it --RR */
+ if (skb_cloned(*pskb) && !(*pskb)->sk) {
+ struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
+ if (!nskb)
+ return NF_DROP;
+ *pskb = nskb;
+ iph = (*pskb)->nh.iph;
+ }
diffs[0] = htons(iph->tos) ^ 0xFFFF;
iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
Hello!
> The NAT code is special: it will always mangle packet the same way, so
> it doesn't *REALLY* matter if we mangle the original packet for local
> output (if we change IP headers in different ways for retransmission
> of the same packet, we would have much bigger problems).
No, this does not matter at all if you change only headers.
Netfilter is first who sees output packets and it may rewrite them
mostly arbitraririly: packet sockets, devices see only rewritten copy.
When retransmitting tcp prepares new headers and, hence, checks
for clonin itself, if someone holds the packet it prepares copy.
But if a hook changes _data_ (f.e. ftp packets), it can break original
packet sitting in tcp queue and this is fatal. So, when mangling
data, packet must be always copied. When mangling header, no copy
is required.
> Does this work for everyone? Particularly while running tcpdump?
It does, but I cannot say "for everyone", it will work with current
protocol suite, but probably will break with out-of-tree stacks.
Anyway, if it will not work, it will be problem not of netfilter,
but of the guy who did not prepare right ownership. :-)
Alexey
In message <[email protected]> you write:
> But if a hook changes _data_ (f.e. ftp packets), it can break original
> packet sitting in tcp queue and this is fatal. So, when mangling
> data, packet must be always copied. When mangling header, no copy
> is required.
ACK! Then it will have to set skb->sk, or else ip_queue_xmit2 will
break on orphaned, copied packet.
OK, this covers that case, too (ftp and IRC), and fixes the skb leak
in the last patch.
Thanks!
Rusty.
--
Premature optmztion is rt of all evl. --DK
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.14-official/net/ipv4/netfilter/ip_fw_compat.c working-2.4.14-nfunshare/net/ipv4/netfilter/ip_fw_compat.c
--- linux-2.4.14-official/net/ipv4/netfilter/ip_fw_compat.c Sat Apr 28 07:15:01 2001
+++ working-2.4.14-nfunshare/net/ipv4/netfilter/ip_fw_compat.c Wed Nov 14 02:53:03 2001
@@ -84,6 +84,16 @@
if ((*pskb)->ip_summed == CHECKSUM_HW)
(*pskb)->ip_summed = CHECKSUM_NONE;
+ /* Firewall rules can alter TOS: raw socket (tcpdump) may have
+ clone of incoming skb: don't disturb it --RR */
+ if (skb_cloned(*pskb) && !(*pskb)->sk) {
+ struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
+ if (!nskb)
+ return NF_DROP;
+ kfree_skb(*pskb);
+ *pskb = nskb;
+ }
+
switch (hooknum) {
case NF_IP_PRE_ROUTING:
if (fwops->fw_acct_in)
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.14-official/net/ipv4/netfilter/ip_nat_core.c working-2.4.14-nfunshare/net/ipv4/netfilter/ip_nat_core.c
--- linux-2.4.14-official/net/ipv4/netfilter/ip_nat_core.c Thu May 17 03:31:27 2001
+++ working-2.4.14-nfunshare/net/ipv4/netfilter/ip_nat_core.c Wed Nov 14 02:21:14 2001
@@ -734,6 +734,18 @@
synchronize_bh()) can vanish. */
READ_LOCK(&ip_nat_lock);
for (i = 0; i < info->num_manips; i++) {
+ /* raw socket (tcpdump) may have clone of incoming
+ skb: don't disturb it --RR */
+ if (skb_cloned(*pskb) && !(*pskb)->sk) {
+ struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
+ if (!nskb) {
+ READ_UNLOCK(&ip_nat_lock);
+ return NF_DROP;
+ }
+ kfree_skb(*pskb);
+ *pskb = nskb;
+ }
+
if (info->manips[i].direction == dir
&& info->manips[i].hooknum == hooknum) {
DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n",
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.14-official/net/ipv4/netfilter/ip_nat_helper.c working-2.4.14-nfunshare/net/ipv4/netfilter/ip_nat_helper.c
--- linux-2.4.14-official/net/ipv4/netfilter/ip_nat_helper.c Wed Aug 29 00:09:44 2001
+++ working-2.4.14-nfunshare/net/ipv4/netfilter/ip_nat_helper.c Wed Nov 14 02:46:26 2001
@@ -143,6 +143,23 @@
}
}
+ /* Alexey says: if a hook changes _data_ ... it can break
+ original packet sitting in tcp queue and this is fatal */
+ if (skb_cloned(*skb)) {
+ struct sk_buff *nskb = skb_copy(*skb, GFP_ATOMIC);
+ if (!nskb) {
+ if (net_ratelimit())
+ printk("Out of memory cloning TCP packet\n");
+ return 0;
+ }
+ /* Rest of kernel will get very unhappy if we pass it
+ a suddenly-orphaned skbuff */
+ if ((*skb)->sk)
+ skb_set_owner_w(nskb, (*skb)->sk);
+ kfree_skb(*skb);
+ *skb = nskb;
+ }
+
/* skb may be copied !! */
iph = (*skb)->nh.iph;
tcph = (void *)iph + iph->ihl*4;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.14-official/net/ipv4/netfilter/ipt_TCPMSS.c working-2.4.14-nfunshare/net/ipv4/netfilter/ipt_TCPMSS.c
--- linux-2.4.14-official/net/ipv4/netfilter/ipt_TCPMSS.c Mon Oct 1 05:26:08 2001
+++ working-2.4.14-nfunshare/net/ipv4/netfilter/ipt_TCPMSS.c Wed Nov 14 02:49:46 2001
@@ -44,11 +44,22 @@
{
const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
struct tcphdr *tcph;
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph;
u_int16_t tcplen, newtotlen, oldval, newmss;
unsigned int i;
u_int8_t *opt;
+ /* raw socket (tcpdump) may have clone of incoming skb: don't
+ disturb it --RR */
+ if (skb_cloned(*pskb) && !(*pskb)->sk) {
+ struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
+ if (!nskb)
+ return NF_DROP;
+ kfree_skb(*pskb);
+ *pskb = nskb;
+ }
+
+ iph = (*pskb)->nh.iph;
tcplen = (*pskb)->len - iph->ihl*4;
tcph = (void *)iph + iph->ihl*4;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.4.14-official/net/ipv4/netfilter/ipt_TOS.c working-2.4.14-nfunshare/net/ipv4/netfilter/ipt_TOS.c
--- linux-2.4.14-official/net/ipv4/netfilter/ipt_TOS.c Mon Oct 1 05:26:08 2001
+++ working-2.4.14-nfunshare/net/ipv4/netfilter/ipt_TOS.c Wed Nov 14 02:49:37 2001
@@ -21,6 +21,17 @@
if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
u_int16_t diffs[2];
+ /* raw socket (tcpdump) may have clone of incoming
+ skb: don't disturb it --RR */
+ if (skb_cloned(*pskb) && !(*pskb)->sk) {
+ struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
+ if (!nskb)
+ return NF_DROP;
+ *pskb = nskb;
+ kfree_skb(*pskb);
+ iph = (*pskb)->nh.iph;
+ }
+
diffs[0] = htons(iph->tos) ^ 0xFFFF;
iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
diffs[1] = htons(iph->tos);