SKBs are assigned a conntrack entry before being passed to any NFQUEUEs,
and if no entry is found then a new one is created. This behavior causes
problems for some traffic patterns. For example, if two UDP packets
to/from the same host (using the same ports) arrive at the "same" time,
both are assigned a new conntrack entry. After the first packet have
traversed all chains, the conntrack entry will be inserted into the
global table. The second packet will then be dropped during the
insertion step, as an entry for the same flow already exists. One type
of application that frequently generates this traffic pattern, is DNS
resolvers.
This commit introduces a new function that checks, and potentially
replaces, the conntrack entry for any additional "new" SKBs mapping to
an existing flow. While not a perfect solution, there are still
situations where to-be-dropped SKBs can slip through, the situations is
improved considerably. On the routers I have used for testing, packets
belonging to the same UDP flow are let through (when generating the
traffic pattern described above). Without the change in this commit, all
packets except the first one was dropped.
With the change in this commit, a user can implement "perfect" solutions
in user-space. An application can for example keep track of seen UDP
flows, and then only release packets belonging to one flow when the
entry has been created. Without the change, and SKB is stuck with the
original conntrack entry.
Signed-off-by: Kristian Evensen <[email protected]>
---
net/netfilter/nfnetlink_queue.c | 68 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 68 insertions(+)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index c97966298..150c11ff4 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -43,6 +43,9 @@
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#endif
#define NFQNL_QMAX_DEFAULT 1024
@@ -1046,6 +1049,53 @@ static int nfq_id_after(unsigned int id, unsigned int max)
return (int)(id - max) > 0;
}
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+static void nfqnl_update_ct(struct net *net, struct sk_buff *skb)
+{
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_tuple tuple;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = NULL;
+ unsigned int dataoff;
+ u16 l3num;
+ u8 l4num;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ l3num = nf_ct_l3num(ct);
+ l3proto = nf_ct_l3proto_find_get(l3num);
+
+ if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
+ &l4num) <= 0) {
+ return;
+ }
+
+ l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+
+ if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
+ l4num, net, &tuple, l3proto, l4proto)) {
+ return;
+ }
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)
+ h = nf_conntrack_find_get(net, &ct->zone, &tuple);
+#else
+ h = nf_conntrack_find_get(net, NULL, &tuple);
+#endif
+
+ if (h) {
+ pr_debug("%s: tuple %u %pI4:%hu -> %pI4:%hu\n", __func__,
+ tuple.dst.protonum, &tuple.src.u3.ip,
+ ntohs(tuple.src.u.all), &tuple.dst.u3.ip,
+ ntohs(tuple.dst.u.all));
+ nf_ct_put(ct);
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ nf_ct_set(skb, ct, IP_CT_NEW);
+ }
+}
+#endif
+
static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
@@ -1060,6 +1110,7 @@ static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
LIST_HEAD(batch_list);
u16 queue_num = ntohs(nfmsg->res_id);
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ enum ip_conntrack_info ctinfo;
queue = verdict_instance_lookup(q, queue_num,
NETLINK_CB(skb).portid);
@@ -1090,6 +1141,16 @@ static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
list_for_each_entry_safe(entry, tmp, &batch_list, list) {
if (nfqa[NFQA_MARK])
entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ nf_ct_get(entry->skb, &ctinfo);
+
+ if (ctinfo == IP_CT_NEW && verdict != NF_STOLEN &&
+ verdict != NF_DROP) {
+ nfqnl_update_ct(net, entry->skb);
+ }
+#endif
+
nf_reinject(entry, verdict);
}
return 0;
@@ -1213,6 +1274,13 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
if (nfqa[NFQA_MARK])
entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ nf_ct_get(entry->skb, &ctinfo);
+
+ if (ctinfo == IP_CT_NEW && verdict != NF_STOLEN && verdict != NF_DROP)
+ nfqnl_update_ct(net, entry->skb);
+#endif
+
nf_reinject(entry, verdict);
return 0;
}
--
2.14.1
Hi Kristian,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on nf-next/master]
[also build test ERROR on v4.17-rc3 next-20180503]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Kristian-Evensen/netfilter-nf_queue-Replace-conntrack-entry/20180504-051218
base: https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
config: x86_64-randconfig-s5-05041850 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All errors (new ones prefixed by >>):
net/netfilter/nfnetlink_queue.o: In function `nfqnl_update_ct':
>> net/netfilter/nfnetlink_queue.c:1062: undefined reference to `nf_ct_l3proto_find_get'
>> net/netfilter/nfnetlink_queue.c:1069: undefined reference to `nf_ct_l4proto_find_get'
>> net/netfilter/nfnetlink_queue.c:1071: undefined reference to `nf_ct_get_tuple'
>> net/netfilter/nfnetlink_queue.c:1079: undefined reference to `nf_conntrack_find_get'
vim +1062 net/netfilter/nfnetlink_queue.c
1046
1047 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1048 static void nfqnl_update_ct(struct net *net, struct sk_buff *skb)
1049 {
1050 const struct nf_conntrack_l3proto *l3proto;
1051 const struct nf_conntrack_l4proto *l4proto;
1052 struct nf_conntrack_tuple_hash *h;
1053 struct nf_conntrack_tuple tuple;
1054 enum ip_conntrack_info ctinfo;
1055 struct nf_conn *ct = NULL;
1056 unsigned int dataoff;
1057 u16 l3num;
1058 u8 l4num;
1059
1060 ct = nf_ct_get(skb, &ctinfo);
1061 l3num = nf_ct_l3num(ct);
> 1062 l3proto = nf_ct_l3proto_find_get(l3num);
1063
1064 if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
1065 &l4num) <= 0) {
1066 return;
1067 }
1068
> 1069 l4proto = nf_ct_l4proto_find_get(l3num, l4num);
1070
> 1071 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
1072 l4num, net, &tuple, l3proto, l4proto)) {
1073 return;
1074 }
1075
1076 #if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)
1077 h = nf_conntrack_find_get(net, &ct->zone, &tuple);
1078 #else
> 1079 h = nf_conntrack_find_get(net, NULL, &tuple);
1080 #endif
1081
1082 if (h) {
1083 pr_debug("%s: tuple %u %pI4:%hu -> %pI4:%hu\n", __func__,
1084 tuple.dst.protonum, &tuple.src.u3.ip,
1085 ntohs(tuple.src.u.all), &tuple.dst.u3.ip,
1086 ntohs(tuple.dst.u.all));
1087 nf_ct_put(ct);
1088 ct = nf_ct_tuplehash_to_ctrack(h);
1089 nf_ct_set(skb, ct, IP_CT_NEW);
1090 }
1091 }
1092 #endif
1093
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi Kristian,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on nf-next/master]
[also build test WARNING on v4.17-rc3 next-20180504]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Kristian-Evensen/netfilter-nf_queue-Replace-conntrack-entry/20180504-051218
base: https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
smatch warnings:
net/netfilter/nfnetlink_queue.c:1141 nfqnl_recv_verdict_batch() warn: curly braces intended?
# https://github.com/0day-ci/linux/commit/8776e32a6c6e2ba0c6c8ce85e227672b81a1649d
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout 8776e32a6c6e2ba0c6c8ce85e227672b81a1649d
vim +1141 net/netfilter/nfnetlink_queue.c
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 1093
7b8002a1 net/netfilter/nfnetlink_queue.c Pablo Neira Ayuso 2015-12-15 1094 static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
7b8002a1 net/netfilter/nfnetlink_queue.c Pablo Neira Ayuso 2015-12-15 1095 struct sk_buff *skb,
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1096 const struct nlmsghdr *nlh,
04ba724b net/netfilter/nfnetlink_queue.c Pablo Neira Ayuso 2017-06-19 1097 const struct nlattr * const nfqa[],
04ba724b net/netfilter/nfnetlink_queue.c Pablo Neira Ayuso 2017-06-19 1098 struct netlink_ext_ack *extack)
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1099 {
3da07c0c net/netfilter/nfnetlink_queue_core.c David S. Miller 2012-06-26 1100 struct nfgenmsg *nfmsg = nlmsg_data(nlh);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1101 struct nf_queue_entry *entry, *tmp;
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1102 unsigned int verdict, maxid;
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1103 struct nfqnl_msg_verdict_hdr *vhdr;
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1104 struct nfqnl_instance *queue;
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1105 LIST_HEAD(batch_list);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1106 u16 queue_num = ntohs(nfmsg->res_id);
e8179610 net/netfilter/nfnetlink_queue_core.c Gao feng 2013-03-24 1107 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 1108 enum ip_conntrack_info ctinfo;
e8179610 net/netfilter/nfnetlink_queue_core.c Gao feng 2013-03-24 1109
e8179610 net/netfilter/nfnetlink_queue_core.c Gao feng 2013-03-24 1110 queue = verdict_instance_lookup(q, queue_num,
e8179610 net/netfilter/nfnetlink_queue_core.c Gao feng 2013-03-24 1111 NETLINK_CB(skb).portid);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1112 if (IS_ERR(queue))
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1113 return PTR_ERR(queue);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1114
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1115 vhdr = verdicthdr_get(nfqa);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1116 if (!vhdr)
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1117 return -EINVAL;
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1118
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1119 verdict = ntohl(vhdr->verdict);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1120 maxid = ntohl(vhdr->id);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1121
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1122 spin_lock_bh(&queue->lock);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1123
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1124 list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) {
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1125 if (nfq_id_after(entry->id, maxid))
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1126 break;
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1127 __dequeue_entry(queue, entry);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1128 list_add_tail(&entry->list, &batch_list);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1129 }
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1130
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1131 spin_unlock_bh(&queue->lock);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1132
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1133 if (list_empty(&batch_list))
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1134 return -ENOENT;
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1135
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1136 list_for_each_entry_safe(entry, tmp, &batch_list, list) {
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1137 if (nfqa[NFQA_MARK])
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1138 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 1139
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 1140 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 @1141 nf_ct_get(entry->skb, &ctinfo);
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 1142
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 1143 if (ctinfo == IP_CT_NEW && verdict != NF_STOLEN &&
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 1144 verdict != NF_DROP) {
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 1145 nfqnl_update_ct(net, entry->skb);
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 1146 }
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 1147 #endif
8776e32a net/netfilter/nfnetlink_queue.c Kristian Evensen 2018-05-03 1148
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1149 nf_reinject(entry, verdict);
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1150 }
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1151 return 0;
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1152 }
97d32cf9 net/netfilter/nfnetlink_queue.c Florian Westphal 2011-07-19 1153
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
In gmane.comp.security.firewalls.netfilter.devel Kristian Evensen <[email protected]> wrote:
> SKBs are assigned a conntrack entry before being passed to any NFQUEUEs,
> and if no entry is found then a new one is created. This behavior causes
> problems for some traffic patterns. For example, if two UDP packets
> to/from the same host (using the same ports) arrive at the "same" time,
> both are assigned a new conntrack entry. After the first packet have
> traversed all chains, the conntrack entry will be inserted into the
> global table. The second packet will then be dropped during the
> insertion step, as an entry for the same flow already exists. One type
> of application that frequently generates this traffic pattern, is DNS
> resolvers.
> This commit introduces a new function that checks, and potentially
> replaces, the conntrack entry for any additional "new" SKBs mapping to
> an existing flow. While not a perfect solution, there are still
> situations where to-be-dropped SKBs can slip through, the situations is
> improved considerably. On the routers I have used for testing, packets
> belonging to the same UDP flow are let through (when generating the
> traffic pattern described above). Without the change in this commit, all
> packets except the first one was dropped.
> With the change in this commit, a user can implement "perfect" solutions
> in user-space. An application can for example keep track of seen UDP
> flows, and then only release packets belonging to one flow when the
> entry has been created. Without the change, and SKB is stuck with the
> original conntrack entry.
PING
Any progress on this patch?
> Signed-off-by: Kristian Evensen <[email protected]>
> ---
> net/netfilter/nfnetlink_queue.c | 68 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 68 insertions(+)
[...]