2009-07-27 15:31:28

by Hannes Eder

[permalink] [raw]
Subject: [RFC][PATCH 0/5] IPVS full NAT support + netfilter 'ipvs' match support

The following series is aiming at adding full NAT support to IPVS. The
approach is via a minimal change to IPVS (make friends with nf_conntrack)
and adding a netfilter matcher (xt_ipvs + libxt_ipvs).

Example usage:

% ipvsadm -A -t 192.168.100.30:8080 -s rr
% ipvsadm -a -t 192.168.100.30:8080 -r 192.168.10.20:8080 -m
# ...

# SNAT for VIP 192.168.100.30:8080
% iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 --vport 8080 \
> -j SNAT --to-source 192.168.10.10

Comments?

Changes to the linux kernel:

Hannes Eder (4):
IPVS: debugging output for ip_vs_update_conntrack
IPVS: make friends with nf_conntrack
netfilter: xt_ipvs (netfilter matcher for ipvs)
IPVS: prefix EnterFunction and LeaveFunction msg with "IPVS:"

include/linux/netfilter/xt_ipvs.h | 32 +++++++
include/net/ip_vs.h | 24 +++--
net/netfilter/Kconfig | 8 ++
net/netfilter/Makefile | 1
net/netfilter/ipvs/ip_vs_core.c | 36 --------
net/netfilter/ipvs/ip_vs_proto.c | 1
net/netfilter/ipvs/ip_vs_xmit.c | 54 ++++++++++++
net/netfilter/xt_ipvs.c | 171 +++++++++++++++++++++++++++++++++++++
8 files changed, 279 insertions(+), 48 deletions(-)
create mode 100644 include/linux/netfilter/xt_ipvs.h
create mode 100644 net/netfilter/xt_ipvs.c


Changes to iptables:

Hannes Eder (1):
libxt_ipvs: user space lib for netfilter matcher xt_ipvs

extensions/libxt_ipvs.c | 350 +++++++++++++++++++++++++++++++++++++
extensions/libxt_ipvs.man | 7 +
include/linux/netfilter/xt_ipvs.h | 32 +++
3 files changed, 389 insertions(+), 0 deletions(-)
create mode 100644 extensions/libxt_ipvs.c
create mode 100644 extensions/libxt_ipvs.man
create mode 100644 include/linux/netfilter/xt_ipvs.h


Cheers,
-Hannes


2009-07-27 15:31:30

by Hannes Eder

[permalink] [raw]
Subject: [RFC][PATCH 1/5] IPVS: prefix EnterFunction and LeaveFunction msg with "IPVS:"

From: Hannes Eder <[email protected]>

Now all printk messages from IPVS are prefixed with "IPVS:".

Signed-off-by: Hannes Eder <[email protected]>

include/net/ip_vs.h | 24 ++++++++++++------------
1 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index e01cc2d..e03524e 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -161,18 +161,18 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
} while (0)

#ifdef CONFIG_IP_VS_DEBUG
-#define EnterFunction(level) \
- do { \
- if (level <= ip_vs_get_debug_level()) \
- printk(KERN_DEBUG "Enter: %s, %s line %i\n", \
- __func__, __FILE__, __LINE__); \
- } while (0)
-#define LeaveFunction(level) \
- do { \
- if (level <= ip_vs_get_debug_level()) \
- printk(KERN_DEBUG "Leave: %s, %s line %i\n", \
- __func__, __FILE__, __LINE__); \
- } while (0)
+#define EnterFunction(level) \
+ do { \
+ if (level <= ip_vs_get_debug_level()) \
+ printk(KERN_DEBUG "IPVS: Enter: %s, %s line %i\n", \
+ __func__, __FILE__, __LINE__); \
+ } while (0)
+#define LeaveFunction(level) \
+ do { \
+ if (level <= ip_vs_get_debug_level()) \
+ printk(KERN_DEBUG "IPVS: Leave: %s, %s line %i\n", \
+ __func__, __FILE__, __LINE__); \
+ } while (0)
#else
#define EnterFunction(level) do {} while (0)
#define LeaveFunction(level) do {} while (0)

2009-07-27 15:32:10

by Hannes Eder

[permalink] [raw]
Subject: [RFC][PATCH 5/5] libxt_ipvs: user space lib for netfilter matcher xt_ipvs

Signed-off-by: Hannes Eder <[email protected]>

extensions/libxt_ipvs.c | 350 +++++++++++++++++++++++++++++++++++++
extensions/libxt_ipvs.man | 7 +
include/linux/netfilter/xt_ipvs.h | 32 +++
3 files changed, 389 insertions(+), 0 deletions(-)
create mode 100644 extensions/libxt_ipvs.c
create mode 100644 extensions/libxt_ipvs.man
create mode 100644 include/linux/netfilter/xt_ipvs.h

diff --git a/extensions/libxt_ipvs.c b/extensions/libxt_ipvs.c
new file mode 100644
index 0000000..dc48aee
--- /dev/null
+++ b/extensions/libxt_ipvs.c
@@ -0,0 +1,350 @@
+/* Shared library add-on to iptables to add ipvs matching.
+ *
+ * Detailed doc is in the kernel module source
+ * net/netfilter/xt_ipvs.c
+ *
+ * Author: Hannes Eder <[email protected]>
+ */
+#include <sys/types.h>
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <xtables.h>
+#include <linux/netfilter/xt_ipvs.h>
+
+
+static const struct option ipvs_mt_opts[] = {
+ { .name = "ipvs", .has_arg = false, .val = '0' },
+ { .name = "vproto", .has_arg = true, .val = '1' },
+ { .name = "vaddr", .has_arg = true, .val = '2' },
+ { .name = "vport", .has_arg = true, .val = '3' },
+ { .name = "vdir", .has_arg = true, .val = '4' },
+ { .name = "vmethod", .has_arg = true, .val = '5' },
+ { .name = NULL }
+};
+
+static void ipvs_mt_help(void)
+{
+ printf(
+"ipvs match options:\n"
+"[!] --ipvs\n"
+"\n"
+"Any of the following options implies --ipvs (even negated)\n"
+"[!] --vproto protocol\n"
+"[!] --vaddr address[/mask]\n"
+"[!] --vport port\n"
+" --vdir {ORIGINAL|REPLY}\n"
+"[!] --vmethod {GATE|IPIP|MASQ}\n"
+ );
+}
+
+static void ipvs_mt_parse_addr_and_mask(const char *arg,
+ union nf_inet_addr *address,
+ union nf_inet_addr *mask,
+ unsigned int family)
+{
+ struct in_addr *addr = NULL;
+ struct in6_addr *addr6 = NULL;
+ unsigned int naddrs = 0;
+
+ if (family == NFPROTO_IPV4) {
+ xtables_ipparse_any(arg, &addr, &mask->in, &naddrs);
+ if (naddrs > 1)
+ xtables_error(PARAMETER_PROBLEM,
+ "multiple IP addresses not allowed");
+ if (naddrs == 1)
+ memcpy(&address->in, addr, sizeof(*addr));
+ } else if (family == NFPROTO_IPV6) {
+ xtables_ip6parse_any(arg, &addr6, &mask->in6, &naddrs);
+ if (naddrs > 1)
+ xtables_error(PARAMETER_PROBLEM,
+ "multiple IP addresses not allowed");
+ if (naddrs == 1)
+ memcpy(&address->in6, addr6, sizeof(*addr6));
+ } else {
+ /* Hu? */
+ assert(false);
+ }
+}
+
+/* Function which parses command options; returns true if it ate an option */
+static int ipvs_mt_parse(int c, char **argv, int invert, unsigned int *flags,
+ const void *entry, struct xt_entry_match **match,
+ unsigned int family)
+{
+ struct xt_ipvs *data = (void *)(*match)->data;
+ char *p = NULL;
+ u_int8_t op = 0;
+
+ if ('0' <= c && c <= '5') {
+ int ops[] = {
+ XT_IPVS_IPVS,
+ XT_IPVS_PROTO,
+ XT_IPVS_VADDR,
+ XT_IPVS_VPORT,
+ XT_IPVS_DIR,
+ XT_IPVS_METHOD
+ };
+ op = ops[c - '0'];
+ } else
+ return 0;
+
+ if (*flags & op & XT_IPVS_ONCE_MASK)
+ goto multiple_use;
+
+ switch (c) {
+ case '0': /* --ipvs */
+ /* Nothing to do here. */
+ break;
+
+ case '1': /* --vproto */
+ /* Canonicalize into lower case */
+ for (p = optarg; *p != '\0'; ++p)
+ *p = tolower(*p);
+
+ data->l4proto = xtables_parse_protocol(optarg);
+ break;
+
+ case '2': /* --vaddr */
+ ipvs_mt_parse_addr_and_mask(optarg, &data->vaddr,
+ &data->vmask, family);
+ break;
+
+ case '3': /* --vport */
+ data->vport = htons(xtables_parse_port(optarg, "tcp"));
+ break;
+
+ case '4': /* --vdir */
+ xtables_param_act(XTF_NO_INVERT, "ipvs", "--vdir", invert);
+ if (strcasecmp(optarg, "ORIGINAL") == 0) {
+ data->bitmask |= XT_IPVS_DIR;
+ data->invert &= ~XT_IPVS_DIR;
+ } else if (strcasecmp(optarg, "REPLY") == 0) {
+ data->bitmask |= XT_IPVS_DIR;
+ data->invert |= XT_IPVS_DIR;
+ } else {
+ xtables_param_act(XTF_BAD_VALUE,
+ "ipvs", "--vdir", optarg);
+ }
+ break;
+
+ case '5': /* --vmethod */
+ if (strcasecmp(optarg, "GATE") == 0)
+ data->fwd_method = IP_VS_CONN_F_DROUTE;
+ else if (strcasecmp(optarg, "IPIP") == 0)
+ data->fwd_method = IP_VS_CONN_F_TUNNEL;
+ else if (strcasecmp(optarg, "MASQ") == 0)
+ data->fwd_method = IP_VS_CONN_F_MASQ;
+ else
+ xtables_param_act(XTF_BAD_VALUE,
+ "ipvs", "--vmethod", optarg);
+ break;
+
+ default:
+ /* Hu? How did we came here? */
+ assert(false);
+ return 0;
+ }
+
+ if (op & XT_IPVS_ONCE_MASK) {
+ if (data->invert & XT_IPVS_IPVS)
+ xtables_error(PARAMETER_PROBLEM,
+ "! --ipvs can not be together with"
+ " other options");
+ data->bitmask |= XT_IPVS_IPVS;
+ }
+
+ data->bitmask |= op;
+ if (invert)
+ data->invert |= op;
+ *flags |= op;
+ return 1;
+
+multiple_use:
+ xtables_error(PARAMETER_PROBLEM,
+ "multiple use of the same ipvs option is not allowed");
+}
+
+static int ipvs_mt4_parse(int c, char **argv, int invert, unsigned int *flags,
+ const void *entry, struct xt_entry_match **match)
+{
+ return ipvs_mt_parse(c, argv, invert, flags, entry, match,
+ NFPROTO_IPV4);
+}
+
+static int ipvs_mt6_parse(int c, char **argv, int invert, unsigned int *flags,
+ const void *entry, struct xt_entry_match **match)
+{
+ return ipvs_mt_parse(c, argv, invert, flags, entry, match,
+ NFPROTO_IPV6);
+}
+
+static void ipvs_mt_check(unsigned int flags)
+{
+ if (flags == 0)
+ xtables_error(PARAMETER_PROBLEM,
+ "ipvs: At least one option is required");
+}
+
+/* Shamelessly copied from libxt_conntrack.c */
+static void
+ipvs_mt_dump_addr(const union nf_inet_addr *addr,
+ const union nf_inet_addr *mask,
+ unsigned int family, bool numeric)
+{
+ char buf[BUFSIZ];
+
+ if (family == NFPROTO_IPV4) {
+ if (!numeric && addr->ip == 0) {
+ printf("anywhere ");
+ return;
+ }
+ if (numeric)
+ strcpy(buf, xtables_ipaddr_to_numeric(&addr->in));
+ else
+ strcpy(buf, xtables_ipaddr_to_anyname(&addr->in));
+ strcat(buf, xtables_ipmask_to_numeric(&mask->in));
+ printf("%s ", buf);
+ } else if (family == NFPROTO_IPV6) {
+ if (!numeric && addr->ip6[0] == 0 && addr->ip6[1] == 0 &&
+ addr->ip6[2] == 0 && addr->ip6[3] == 0) {
+ printf("anywhere ");
+ return;
+ }
+ if (numeric)
+ strcpy(buf, xtables_ip6addr_to_numeric(&addr->in6));
+ else
+ strcpy(buf, xtables_ip6addr_to_anyname(&addr->in6));
+ strcat(buf, xtables_ip6mask_to_numeric(&mask->in6));
+ printf("%s ", buf);
+ }
+}
+
+static void ipvs_mt_dump(const void *ip, const struct xt_ipvs *data,
+ unsigned int family, bool numeric, const char *prefix)
+{
+ if (data->bitmask == XT_IPVS_IPVS) {
+ if (data->invert & XT_IPVS_IPVS)
+ printf("! ");
+ printf("%sipvs ", prefix);
+ }
+
+ if (data->bitmask & XT_IPVS_PROTO) {
+ if (data->invert & XT_IPVS_PROTO)
+ printf("! ");
+ printf("%sproto %u ", prefix, data->l4proto);
+ }
+
+ if (data->bitmask & XT_IPVS_VADDR) {
+ if (data->invert & XT_IPVS_VADDR)
+ printf("! ");
+
+ printf("%svaddr ", prefix);
+ ipvs_mt_dump_addr(&data->vaddr, &data->vmask, family, numeric);
+ }
+
+ if (data->bitmask & XT_IPVS_VPORT) {
+ if (data->invert & XT_IPVS_VPORT)
+ printf("! ");
+
+ printf("%svport %u ", prefix, ntohs(data->vport));
+ }
+
+ if (data->bitmask & XT_IPVS_DIR) {
+ if (data->invert & XT_IPVS_DIR)
+ printf("%svdir REPLY ", prefix);
+ else
+ printf("%svdir ORIGINAL ", prefix);
+ }
+
+ if (data->bitmask & XT_IPVS_METHOD) {
+ if (data->invert & XT_IPVS_METHOD)
+ printf("! ");
+
+ printf("%svmethod ", prefix);
+ switch (data->fwd_method) {
+ case IP_VS_CONN_F_DROUTE:
+ printf("GATE ");
+ break;
+ case IP_VS_CONN_F_TUNNEL:
+ printf("IPIP ");
+ break;
+ case IP_VS_CONN_F_MASQ:
+ printf("MASQ ");
+ break;
+ default:
+ /* Hu? */
+ printf("UNKNOWN ");
+ break;
+ }
+ }
+
+}
+
+static void ipvs_mt4_print(const void *ip, const struct xt_entry_match *match,
+ int numeric)
+{
+ const struct xt_ipvs *data = (const void *)match->data;
+ ipvs_mt_dump(ip, data, NFPROTO_IPV4, numeric, "");
+}
+
+static void ipvs_mt6_print(const void *ip, const struct xt_entry_match *match,
+ int numeric)
+{
+ const struct xt_ipvs *data = (const void *)match->data;
+ ipvs_mt_dump(ip, data, NFPROTO_IPV6, numeric, "");
+}
+
+static void ipvs_mt4_save(const void *ip, const struct xt_entry_match *match)
+{
+ const struct xt_ipvs *data = (const void *)match->data;
+ ipvs_mt_dump(ip, data, NFPROTO_IPV4, true, "--");
+}
+
+static void ipvs_mt6_save(const void *ip, const struct xt_entry_match *match)
+{
+ const struct xt_ipvs *data = (const void *)match->data;
+ ipvs_mt_dump(ip, data, NFPROTO_IPV6, true, "--");
+}
+
+static struct xtables_match ipvs_matches_reg[] = {
+ {
+ .version = XTABLES_VERSION,
+ .name = "ipvs",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .size = XT_ALIGN(sizeof(struct xt_ipvs)),
+ .userspacesize = XT_ALIGN(sizeof(struct xt_ipvs)),
+ .help = ipvs_mt_help,
+ .parse = ipvs_mt4_parse,
+ .final_check = ipvs_mt_check,
+ .print = ipvs_mt4_print,
+ .save = ipvs_mt4_save,
+ .extra_opts = ipvs_mt_opts,
+ },
+ {
+ .version = XTABLES_VERSION,
+ .name = "ipvs",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .size = XT_ALIGN(sizeof(struct xt_ipvs)),
+ .userspacesize = XT_ALIGN(sizeof(struct xt_ipvs)),
+ .help = ipvs_mt_help,
+ .parse = ipvs_mt6_parse,
+ .final_check = ipvs_mt_check,
+ .print = ipvs_mt6_print,
+ .save = ipvs_mt6_save,
+ .extra_opts = ipvs_mt_opts,
+ },
+};
+
+void _init(void)
+{
+ xtables_register_matches(ipvs_matches_reg,
+ ARRAY_SIZE(ipvs_matches_reg));
+}
diff --git a/extensions/libxt_ipvs.man b/extensions/libxt_ipvs.man
new file mode 100644
index 0000000..2c842d6
--- /dev/null
+++ b/extensions/libxt_ipvs.man
@@ -0,0 +1,7 @@
+ipvs tests where the packet was modified by IPVS, i.e. is the
+skb_buff->ipvs_property set.
+.TP
+[\fB!\fP] \fB--ipvs
+Does the packet have to IPVS property?
+
+TODO: Write proper documentation.
diff --git a/include/linux/netfilter/xt_ipvs.h b/include/linux/netfilter/xt_ipvs.h
new file mode 100644
index 0000000..3a70289
--- /dev/null
+++ b/include/linux/netfilter/xt_ipvs.h
@@ -0,0 +1,32 @@
+#ifndef _XT_IPVS_H
+#define _XT_IPVS_H 1
+
+#ifndef _IP_VS_H
+#define IP_VS_CONN_F_FWD_MASK 0x0007 /* mask for the fwd methods */
+#define IP_VS_CONN_F_MASQ 0x0000 /* masquerading/NAT */
+#define IP_VS_CONN_F_LOCALNODE 0x0001 /* local node */
+#define IP_VS_CONN_F_TUNNEL 0x0002 /* tunneling */
+#define IP_VS_CONN_F_DROUTE 0x0003 /* direct routing */
+#define IP_VS_CONN_F_BYPASS 0x0004 /* cache bypass */
+#endif
+
+#define XT_IPVS_IPVS 0x01 /* this is implied by all other options */
+#define XT_IPVS_PROTO 0x02
+#define XT_IPVS_VADDR 0x04
+#define XT_IPVS_VPORT 0x08
+#define XT_IPVS_DIR 0x10
+#define XT_IPVS_METHOD 0x20
+#define XT_IPVS_MASK (0x40 - 1)
+#define XT_IPVS_ONCE_MASK (XT_IPVS_MASK & ~XT_IPVS_IPVS)
+
+struct xt_ipvs {
+ union nf_inet_addr vaddr, vmask;
+ __be16 vport;
+ u_int16_t l4proto;
+ u_int16_t fwd_method;
+
+ u_int8_t invert;
+ u_int8_t bitmask;
+};
+
+#endif /* _XT_IPVS_H */

2009-07-27 15:31:43

by Hannes Eder

[permalink] [raw]
Subject: [RFC][PATCH 3/5] IPVS: make friends with nf_conntrack

From: Hannes Eder <[email protected]>

We aim at adding full NAT support to IPVS. With this patch it is
possible to use netfilters SNAT in POSTROUTING, especially with
xt_ipvs, e.g.:

iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 --vport 8080 \
-j SNAT --to-source 192.168.10.10

There might be other use cases.

Current Status:
- NAT works
- DR works
- IPIP not tested
- overall: needs more testing
- Performance impact?

Signed-off-by: Hannes Eder <[email protected]>

net/netfilter/ipvs/ip_vs_core.c | 36 ------------------------------------
net/netfilter/ipvs/ip_vs_xmit.c | 28 ++++++++++++++++++++++++++++
2 files changed, 28 insertions(+), 36 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 8dddb17..b021464 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -518,26 +518,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
return NF_DROP;
}

-
-/*
- * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
- * chain, and is used for VS/NAT.
- * It detects packets for VS/NAT connections and sends the packets
- * immediately. This can avoid that iptable_nat mangles the packets
- * for VS/NAT.
- */
-static unsigned int ip_vs_post_routing(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- if (!skb->ipvs_property)
- return NF_ACCEPT;
- /* The packet was sent from IPVS, exit this chain */
- return NF_STOP;
-}
-
__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
{
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -1428,14 +1408,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
- /* Before the netfilter connection tracking, exit from POST_ROUTING */
- {
- .hook = ip_vs_post_routing,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP_PRI_NAT_SRC-1,
- },
#ifdef CONFIG_IP_VS_IPV6
/* After packet filtering, forward packet through VS/DR, VS/TUN,
* or VS/NAT(change destination), so that filtering rules can be
@@ -1464,14 +1436,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
- /* Before the netfilter connection tracking, exit from POST_ROUTING */
- {
- .hook = ip_vs_post_routing,
- .owner = THIS_MODULE,
- .pf = PF_INET6,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP6_PRI_NAT_SRC-1,
- },
#endif
};

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 425ab14..f3b6810 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -24,6 +24,7 @@
#include <net/ip6_route.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack.h>
#include <linux/netfilter_ipv4.h>

#include <net/ip_vs.h>
@@ -344,6 +345,29 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
#endif

+static void
+ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
+{
+ if (skb->nfct) {
+ struct nf_conn *ct = (struct nf_conn *)skb->nfct;
+
+ if (ct != &nf_conntrack_untracked && !nf_ct_is_confirmed(ct)) {
+ /*
+ * The connection is not yet in the hashtable, so we
+ * update it. CIP->VIP will remain the same, so leave
+ * the tuple in IP_CT_DIR_ORIGINAL untouched. When the
+ * reply comes back from the real-server we will see
+ * RIP->DIP.
+ */
+
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3 = cp->daddr;
+ /* this will also take care for UDP and */
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port =
+ cp->dport;
+ }
+ }
+}
+
/*
* NAT transmitter (only for outside-to-inside nat forwarding)
* Not used for related ICMP
@@ -399,6 +423,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,

IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");

+ ip_vs_update_conntrack(skb, cp);
+
/* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still
MTU problem. */
@@ -475,6 +501,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,

IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");

+ ip_vs_update_conntrack(skb, cp);
+
/* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still
MTU problem. */

2009-07-27 15:31:24

by Hannes Eder

[permalink] [raw]
Subject: [RFC][PATCH 2/5] netfilter: xt_ipvs (netfilter matcher for ipvs)

From: Hannes Eder <[email protected]>

This implements the kernel-space side of the iptables matcher xt_ipvs.

Signed-off-by: Hannes Eder <[email protected]>

include/linux/netfilter/xt_ipvs.h | 32 +++++++
net/netfilter/Kconfig | 8 ++
net/netfilter/Makefile | 1
net/netfilter/ipvs/ip_vs_proto.c | 1
net/netfilter/xt_ipvs.c | 171 +++++++++++++++++++++++++++++++++++++
5 files changed, 213 insertions(+), 0 deletions(-)
create mode 100644 include/linux/netfilter/xt_ipvs.h
create mode 100644 net/netfilter/xt_ipvs.c

diff --git a/include/linux/netfilter/xt_ipvs.h b/include/linux/netfilter/xt_ipvs.h
new file mode 100644
index 0000000..3a70289
--- /dev/null
+++ b/include/linux/netfilter/xt_ipvs.h
@@ -0,0 +1,32 @@
+#ifndef _XT_IPVS_H
+#define _XT_IPVS_H 1
+
+#ifndef _IP_VS_H
+#define IP_VS_CONN_F_FWD_MASK 0x0007 /* mask for the fwd methods */
+#define IP_VS_CONN_F_MASQ 0x0000 /* masquerading/NAT */
+#define IP_VS_CONN_F_LOCALNODE 0x0001 /* local node */
+#define IP_VS_CONN_F_TUNNEL 0x0002 /* tunneling */
+#define IP_VS_CONN_F_DROUTE 0x0003 /* direct routing */
+#define IP_VS_CONN_F_BYPASS 0x0004 /* cache bypass */
+#endif
+
+#define XT_IPVS_IPVS 0x01 /* this is implied by all other options */
+#define XT_IPVS_PROTO 0x02
+#define XT_IPVS_VADDR 0x04
+#define XT_IPVS_VPORT 0x08
+#define XT_IPVS_DIR 0x10
+#define XT_IPVS_METHOD 0x20
+#define XT_IPVS_MASK (0x40 - 1)
+#define XT_IPVS_ONCE_MASK (XT_IPVS_MASK & ~XT_IPVS_IPVS)
+
+struct xt_ipvs {
+ union nf_inet_addr vaddr, vmask;
+ __be16 vport;
+ u_int16_t l4proto;
+ u_int16_t fwd_method;
+
+ u_int8_t invert;
+ u_int8_t bitmask;
+};
+
+#endif /* _XT_IPVS_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index cb3ad74..677a880 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -678,6 +678,14 @@ config NETFILTER_XT_MATCH_IPRANGE

If unsure, say M.

+config NETFILTER_XT_MATCH_IPVS
+ tristate '"ipvs" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This option allows you to match against ipvs properties of a packet.
+
+ To compile it as a module, choos M here. If unsure, say N.
+
config NETFILTER_XT_MATCH_LENGTH
tristate '"length" match support'
depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6282060..1c18e80 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -72,6 +72,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o
obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index a01520e..84bf4e6 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -94,6 +94,7 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)

return NULL;
}
+EXPORT_SYMBOL(ip_vs_proto_get);


/*
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
new file mode 100644
index 0000000..00373d9
--- /dev/null
+++ b/net/netfilter/xt_ipvs.c
@@ -0,0 +1,171 @@
+/*
+ * xt_ipvs - kernel module to match ipvs properties of a packet
+ *
+ * Author: Hannes Eder <[email protected]>
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#ifdef CONFIG_IP_VS_IPV6
+#include <net/ipv6.h>
+#endif
+#include <linux/types.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_ipvs.h>
+
+#include <net/ip_vs.h>
+
+MODULE_AUTHOR("Hannes Eder <[email protected]>");
+MODULE_DESCRIPTION("Xtables: match ipvs");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_ipvs");
+MODULE_ALIAS("ip6t_ipvs");
+
+/* borrowed from xt_conntrack */
+static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr,
+ const union nf_inet_addr *uaddr,
+ const union nf_inet_addr *umask,
+ unsigned int l3proto)
+{
+ if (l3proto == NFPROTO_IPV4)
+ return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0;
+#ifdef CONFIG_IP_VS_IPV6
+ else if (l3proto == NFPROTO_IPV6)
+ return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
+ &uaddr->in6) == 0;
+#endif
+ else
+ return false;
+}
+
+bool ipvs_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ const struct xt_ipvs *data = par->matchinfo;
+ const u_int8_t family = par->family;
+ struct ip_vs_iphdr iph;
+ struct ip_vs_protocol *pp;
+ struct ip_vs_conn *cp;
+ int af;
+ bool match = true;
+
+ if (data->bitmask == XT_IPVS_IPVS) {
+ match = !!(skb->ipvs_property)
+ ^ !!(data->invert & XT_IPVS_IPVS);
+ goto out;
+ }
+
+ /* other flags than XT_IPVS_IPVS are set */
+ if (!skb->ipvs_property) {
+ match = false;
+ goto out;
+ }
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ af = AF_INET;
+ break;
+#ifdef CONFIG_IP_VS_IPV6
+ case htons(ETH_P_IPV6):
+ af = AF_INET6;
+ break;
+#endif
+ default:
+ match = false;
+ goto out;
+ }
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+ if (data->bitmask & XT_IPVS_PROTO)
+ if ((iph.protocol == data->l4proto)
+ ^ !(data->invert & XT_IPVS_PROTO)) {
+ match = false;
+ goto out;
+ }
+
+ pp = ip_vs_proto_get(iph.protocol);
+ if (unlikely(!pp)) {
+ match = false;
+ goto out;
+ }
+
+ /*
+ * Check if the packet belongs to an existing entry
+ */
+ /* TODO: why does it only match in inverse? */
+ cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 1 /* inverse */);
+ if (unlikely(!cp)) {
+ match = false;
+ goto out;
+ }
+
+ /*
+ * We found a connection, i.e. ct != 0, make sure to call
+ * __ip_vs_conn_put before returning. In our case jump to out_put_con.
+ */
+
+ if (data->bitmask & XT_IPVS_VPORT)
+ if ((cp->vport == data->vport)
+ ^ !(data->invert & XT_IPVS_VPORT)) {
+ match = false;
+ goto out_put_ct;
+ }
+
+ if (data->bitmask & XT_IPVS_DIR) {
+ /* TODO: implement */
+ }
+
+ if (data->bitmask & XT_IPVS_METHOD)
+ if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method)
+ ^ !(data->invert & XT_IPVS_METHOD)) {
+ match = false;
+ goto out_put_ct;
+ }
+
+ if (data->bitmask & XT_IPVS_VADDR) {
+ if (af != family) {
+ match = false;
+ goto out_put_ct;
+ }
+
+ if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr, &data->vmask, af)
+ ^ !(data->invert & XT_IPVS_VADDR)) {
+ match = false;
+ goto out_put_ct;
+ }
+ }
+
+out_put_ct:
+ __ip_vs_conn_put(cp);
+out:
+ return match;
+}
+EXPORT_SYMBOL(ipvs_mt);
+
+static struct xt_match xt_ipvs_mt_reg[] __read_mostly = {
+ {
+ .name = "ipvs",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .match = ipvs_mt,
+ .matchsize = sizeof(struct xt_ipvs),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init ipvs_mt_init(void)
+{
+ return xt_register_matches(xt_ipvs_mt_reg,
+ ARRAY_SIZE(xt_ipvs_mt_reg));
+}
+
+static void __exit ipvs_mt_exit(void)
+{
+ xt_unregister_matches(xt_ipvs_mt_reg,
+ ARRAY_SIZE(xt_ipvs_mt_reg));
+}
+
+module_init(ipvs_mt_init);
+module_exit(ipvs_mt_exit);

2009-07-27 15:32:34

by Hannes Eder

[permalink] [raw]
Subject: [RFC][PATCH 4/5] IPVS: debugging output for ip_vs_update_conntrack

This patch is not ment to be merged, its mere for debugging during
development.

Signed-off-by: Hannes Eder <[email protected]>

net/netfilter/ipvs/ip_vs_xmit.c | 26 ++++++++++++++++++++++++++
1 files changed, 26 insertions(+), 0 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index f3b6810..ed6b811 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -24,6 +24,7 @@
#include <net/ip6_route.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
+#define DEBUG
#include <net/netfilter/nf_conntrack.h>
#include <linux/netfilter_ipv4.h>

@@ -345,12 +346,31 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
#endif

+#ifdef DEBUG
+static void
+ip_vs_dump_ct_tuple(const struct nf_conntrack_tuple *t)
+{
+ /*
+ * We ignore the fact that this is not SMP-safe. Otherwise we would
+ * have to duplicate code and this code is not ment to stay here anyway.
+ */
+ printk(KERN_DEBUG "IPVS: ");
+ nf_ct_dump_tuple(t);
+}
+#endif
+
static void
ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
{
if (skb->nfct) {
struct nf_conn *ct = (struct nf_conn *)skb->nfct;

+#ifdef DEBUG
+ ip_vs_dump_ct_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ ip_vs_dump_ct_tuple(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ printk("IPVS: nfct confirmed=%d\n", nf_ct_is_confirmed(ct));
+#endif
+
if (ct != &nf_conntrack_untracked && !nf_ct_is_confirmed(ct)) {
/*
* The connection is not yet in the hashtable, so we
@@ -365,6 +385,12 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port =
cp->dport;
}
+
+#ifdef DEBUG
+ ip_vs_dump_ct_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ ip_vs_dump_ct_tuple(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+#endif
+
}
}

2009-07-27 18:14:33

by Jan Engelhardt

[permalink] [raw]
Subject: Re: [RFC][PATCH 1/5] IPVS: prefix EnterFunction and LeaveFunction msg with "IPVS:"


On Monday 2009-07-27 15:46, Hannes Eder wrote:
>
>Now all printk messages from IPVS are prefixed with "IPVS:".
>
>+#define EnterFunction(level) \
>+ do { \
>+ if (level <= ip_vs_get_debug_level()) \
>+ printk(KERN_DEBUG "IPVS: Enter: %s, %s line %i\n", \
>+ __func__, __FILE__, __LINE__); \
>+ } while (0)
>+#define LeaveFunction(level) \
>+ do { \
>+ if (level <= ip_vs_get_debug_level()) \
>+ printk(KERN_DEBUG "IPVS: Leave: %s, %s line %i\n", \
>+ __func__, __FILE__, __LINE__); \
>+ } while (0)

I think you should rather make use of pr_fmt:

<before any #includes>
#define pr_fmt(x) "IPVS: " x

And then use pr_<level>("Elvis has left the building") in code. This
will add IPVS: automatically to all pr_* calls, alleviating the need
to manually type it into all printks.
Of course, if you only want it for the two defines here, scrap
my idea :)

2009-07-27 18:35:42

by Jan Engelhardt

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/5] netfilter: xt_ipvs (netfilter matcher for ipvs)


On Monday 2009-07-27 15:46, Hannes Eder wrote:
>--- /dev/null
>+++ b/include/linux/netfilter/xt_ipvs.h
>@@ -0,0 +1,32 @@
>+#ifndef _XT_IPVS_H
>+#define _XT_IPVS_H 1
>+
>+#ifndef _IP_VS_H

Do the definitions (should probably be enums) exist in
very old <linux/ip_vs.h>? Then maybe you can get rid of them
from xt_ipvs.h.

>+#define IP_VS_CONN_F_FWD_MASK 0x0007 /* mask for the fwd methods */
>+#define IP_VS_CONN_F_MASQ 0x0000 /* masquerading/NAT */
>+#define IP_VS_CONN_F_LOCALNODE 0x0001 /* local node */
>+#define IP_VS_CONN_F_TUNNEL 0x0002 /* tunneling */
>+#define IP_VS_CONN_F_DROUTE 0x0003 /* direct routing */
>+#define IP_VS_CONN_F_BYPASS 0x0004 /* cache bypass */
>+#endif
>+
>+#define XT_IPVS_IPVS 0x01 /* this is implied by all other options */
>+#define XT_IPVS_PROTO 0x02
>+#define XT_IPVS_VADDR 0x04
>+#define XT_IPVS_VPORT 0x08
>+#define XT_IPVS_DIR 0x10
>+#define XT_IPVS_METHOD 0x20
>+#define XT_IPVS_MASK (0x40 - 1)
>+#define XT_IPVS_ONCE_MASK (XT_IPVS_MASK & ~XT_IPVS_IPVS)
>+
>+struct xt_ipvs {
>+ union nf_inet_addr vaddr, vmask;
>+ __be16 vport;
>+ u_int16_t l4proto;
>+ u_int16_t fwd_method;
>+
>+ u_int8_t invert;
>+ u_int8_t bitmask;
>+};

As per the "Writing Netfilter modules" e-book, __u16/__u8 is required.

>+config NETFILTER_XT_MATCH_IPVS
>+ tristate '"ipvs" match support'
>+ depends on NETFILTER_ADVANCED
>+ help
>+ This option allows you to match against ipvs properties of a packet.
>+
>+ To compile it as a module, choos M here. If unsure, say N.
>+

IMHO the "to compile it as a module, choos[e] M" is a relict from
old times and should just be dropped. These days, I stupilate,
everyone knows that M makes modules. And if it doesnot, then
it's not allowed by Kconfig :>

>+MODULE_AUTHOR("Hannes Eder <[email protected]>");
>+MODULE_DESCRIPTION("Xtables: match ipvs");

"Match IPVS connection properties" is what you previously stated,
and which I think is good. Use it here, too.

>+bool ipvs_mt(const struct sk_buff *skb, const struct xt_match_param *par)
>+{
>+ const struct xt_ipvs *data = par->matchinfo;
>+ const u_int8_t family = par->family;
>+ struct ip_vs_iphdr iph;
>+ struct ip_vs_protocol *pp;
>+ struct ip_vs_conn *cp;
>+ int af;
>+ bool match = true;
>+
>+ if (data->bitmask == XT_IPVS_IPVS) {
>+ match = !!(skb->ipvs_property)
>+ ^ !!(data->invert & XT_IPVS_IPVS);
>+ goto out;
>+ }

XT_IPVS_IPVS? What's that supposed to tell me...
Anyway, this can be made much shorter, given that all following
the "out" label is a return:

if (data->bitmask == XT_IPVS_IPVS)
return skb->ipvs_property ^ !!(data->invert & XT_IPVS_IPVS);

>+ /* other flags than XT_IPVS_IPVS are set */
>+ if (!skb->ipvs_property) {
>+ match = false;
>+ goto out;
>+ }

if (!skb->ipvs_property)
return false;

>+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
>+
>+ if (data->bitmask & XT_IPVS_PROTO)
>+ if ((iph.protocol == data->l4proto)
>+ ^ !(data->invert & XT_IPVS_PROTO)) {
>+ match = false;
>+ goto out;
>+ }

if (iph.protocol == data->l4proto ^
!(data->invert & XT_IPVS_PROTO))
return false;

>+ pp = ip_vs_proto_get(iph.protocol);
>+ if (unlikely(!pp)) {
>+ match = false;
>+ goto out;
>+ }

if (unlikely(pp == NULL))
return false;

Only after here we really need goto (to put pp).

>+ /*
>+ * Check if the packet belongs to an existing entry
>+ */
>+ /* TODO: why does it only match in inverse? */

FIXME: Figure it out :)

>+ cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 1 /* inverse */);
>+ if (unlikely(!cp)) {
>+ match = false;
>+ goto out;
>+ }
>+
>+ /*
>+ * We found a connection, i.e. ct != 0, make sure to call
>+ * __ip_vs_conn_put before returning. In our case jump to out_put_con.
>+ */
>+
>+ if (data->bitmask & XT_IPVS_VPORT)
>+ if ((cp->vport == data->vport)
>+ ^ !(data->invert & XT_IPVS_VPORT)) {
>+ match = false;
>+ goto out_put_ct;
>+ }
>+
>+ if (data->bitmask & XT_IPVS_DIR) {
>+ /* TODO: implement */

/* Yes please */

>+ }
>+
>+ if (data->bitmask & XT_IPVS_METHOD)
>+ if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method)
>+ ^ !(data->invert & XT_IPVS_METHOD)) {
>+ match = false;
>+ goto out_put_ct;
>+ }
>+
>+ if (data->bitmask & XT_IPVS_VADDR) {
>+ if (af != family) {
>+ match = false;
>+ goto out_put_ct;
>+ }
>+
>+ if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr, &data->vmask, af)
>+ ^ !(data->invert & XT_IPVS_VADDR)) {

I think the operator (^ in this case) always goes onto the same line as the ')'
((a) ^\n(b), not ((a)\n ^(b)), though some legacy code seems to do it
random so-so.)

>+ return match;
>+}
>+EXPORT_SYMBOL(ipvs_mt);

What will be using ipvs_mt?

>+static struct xt_match xt_ipvs_mt_reg[] __read_mostly = {
>+ {
>+ .name = "ipvs",
>+ .revision = 0,
>+ .family = NFPROTO_UNSPEC,
>+ .match = ipvs_mt,
>+ .matchsize = sizeof(struct xt_ipvs),
>+ .me = THIS_MODULE,
>+ },
>+};

There is just one element, so no strict need for the [] array.

2009-07-27 18:40:26

by Jan Engelhardt

[permalink] [raw]
Subject: Re: [RFC][PATCH 5/5] libxt_ipvs: user space lib for netfilter matcher xt_ipvs


On Monday 2009-07-27 15:48, Hannes Eder wrote:
>+
>+ switch (c) {
>+ case '0': /* --ipvs */
>+ /* Nothing to do here. */

Then why add it?

>+ char buf[BUFSIZ];
>+
>+ if (family == NFPROTO_IPV4) {
>+ if (!numeric && addr->ip == 0) {
>+ printf("anywhere ");
>+ return;
>+ }
>+ if (numeric)
>+ strcpy(buf, xtables_ipaddr_to_numeric(&addr->in));
>+ else
>+ strcpy(buf, xtables_ipaddr_to_anyname(&addr->in));
>+ strcat(buf, xtables_ipmask_to_numeric(&mask->in));
>+ printf("%s ", buf);

There is no need to use the strcpy/strcat hacks. Just directly printf it.

>--- /dev/null
>+++ b/extensions/libxt_ipvs.man
>@@ -0,0 +1,7 @@
>+ipvs tests where the packet was modified by IPVS, i.e. is the
>+skb_buff->ipvs_property set.
>+.TP
>+[\fB!\fP] \fB--ipvs
>+Does the packet have to IPVS property?
>+
>+TODO: Write proper documentation.

Yes.

2009-07-28 11:15:12

by Hannes Eder

[permalink] [raw]
Subject: Re: [RFC][PATCH 1/5] IPVS: prefix EnterFunction and LeaveFunction msg with "IPVS:"

On Mon, Jul 27, 2009 at 20:14, Jan Engelhardt<[email protected]> wrote:
>
> On Monday 2009-07-27 15:46, Hannes Eder wrote:
>>
>>Now all printk messages from IPVS are prefixed with "IPVS:".
>>
>>+#define EnterFunction(level) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>>+ ? ? ?do { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>>+ ? ? ? ? ? ? ?if (level <= ip_vs_get_debug_level()) ? ? ? ? ? ? ? ? ? ? ? ? ?\
>>+ ? ? ? ? ? ? ? ? ? ? ?printk(KERN_DEBUG "IPVS: Enter: %s, %s line %i\n", ? ? \
>>+ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?__func__, __FILE__, __LINE__); ? ? ? ? ? ? ? ? \
>>+ ? ? ?} while (0)
>>+#define LeaveFunction(level) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>>+ ? ? ?do { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>>+ ? ? ? ? ? ? ?if (level <= ip_vs_get_debug_level()) ? ? ? ? ? ? ? ? ? ? ? ? ?\
>>+ ? ? ? ? ? ? ? ? ? ? ?printk(KERN_DEBUG "IPVS: Leave: %s, %s line %i\n", ? ? \
>>+ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?__func__, __FILE__, __LINE__); ? ? ? ? ? ? ? ? \
>>+ ? ? ?} while (0)
>
> I think you should rather make use of pr_fmt:
>
> <before any #includes>
> #define pr_fmt(x) "IPVS: " x
>
> And then use pr_<level>("Elvis has left the building") in code. This
> will add IPVS: automatically to all pr_* calls, alleviating the need
> to manually type it into all printks.

I like this idea. I'll come up with an extra patch, it does not fit
into this series anyway.

> Of course, if you only want it for the two defines here, scrap
> my idea :)
>

Cheers,
-Hannes

2009-07-28 12:34:34

by Hannes Eder

[permalink] [raw]
Subject: Re: [RFC][PATCH 5/5] libxt_ipvs: user space lib for netfilter matcher xt_ipvs

On Mon, Jul 27, 2009 at 20:40, Jan Engelhardt<[email protected]> wrote:
>
> On Monday 2009-07-27 15:48, Hannes Eder wrote:
>>+
>>+ ? ? ?switch (c) {
>>+ ? ? ?case '0': /* --ipvs */
>>+ ? ? ? ? ? ? ?/* Nothing to do here. */
>
> ? ? ? ? ? ? ? ?Then why add it?

In the 'default' branch is an assert(false); Call it defensive programming.

>>+ ? ? ?char buf[BUFSIZ];
>>+
>>+ ? ? ?if (family == NFPROTO_IPV4) {
>>+ ? ? ? ? ? ? ?if (!numeric && addr->ip == 0) {
>>+ ? ? ? ? ? ? ? ? ? ? ?printf("anywhere ");
>>+ ? ? ? ? ? ? ? ? ? ? ?return;
>>+ ? ? ? ? ? ? ?}
>>+ ? ? ? ? ? ? ?if (numeric)
>>+ ? ? ? ? ? ? ? ? ? ? ?strcpy(buf, xtables_ipaddr_to_numeric(&addr->in));
>>+ ? ? ? ? ? ? ?else
>>+ ? ? ? ? ? ? ? ? ? ? ?strcpy(buf, xtables_ipaddr_to_anyname(&addr->in));
>>+ ? ? ? ? ? ? ?strcat(buf, xtables_ipmask_to_numeric(&mask->in));
>>+ ? ? ? ? ? ? ?printf("%s ", buf);
>
> There is no need to use the strcpy/strcat hacks. Just directly printf it.

As the comment says: "Shamelessly copied from libxt_conntrack.c". ;)

Furthermore I think it is good that way, because
xtables_ipaddr_to_numeric writes to a local static buffer, and
xtables_ipaddr_to_numeric might get called by
xtables_ipmask_to_numeric.

>>--- /dev/null
>>+++ b/extensions/libxt_ipvs.man
>>@@ -0,0 +1,7 @@
>>+ipvs tests where the packet was modified by IPVS, i.e. is the
>>+skb_buff->ipvs_property set.
>>+.TP
>>+[\fB!\fP] \fB--ipvs
>>+Does the packet have to IPVS property?
>>+
>>+TODO: Write proper documentation.
>
> Yes.

Sir, yes, sir ;) I am working on that.

Thanks,
-Hannes

2009-07-28 14:55:11

by Hannes Eder

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/5] netfilter: xt_ipvs (netfilter matcher for ipvs)

On Mon, Jul 27, 2009 at 20:35, Jan Engelhardt<[email protected]> wrote:
>
> On Monday 2009-07-27 15:46, Hannes Eder wrote:
>>--- /dev/null
>>+++ b/include/linux/netfilter/xt_ipvs.h
>>@@ -0,0 +1,32 @@
>>+#ifndef _XT_IPVS_H
>>+#define _XT_IPVS_H 1
>>+
>>+#ifndef _IP_VS_H
>
> Do the definitions (should probably be enums) exist in
> very old <linux/ip_vs.h>? Then maybe you can get rid of them
> from xt_ipvs.h.

Definitions removed from xt_ipvs.h. For xt_ipvs.c I just included
linux/ip_vs.h. For libxt_ipvs (user space library) I added the entire
linux/ip_vs.h. Do you think this is better?

>>+#define IP_VS_CONN_F_FWD_MASK 0x0007 ? ? ? ? ?/* mask for the fwd methods */
>>+#define IP_VS_CONN_F_MASQ ? ? 0x0000 ? ? ? ? ?/* masquerading/NAT */
>>+#define IP_VS_CONN_F_LOCALNODE ? ? ? ?0x0001 ? ? ? ? ?/* local node */
>>+#define IP_VS_CONN_F_TUNNEL ? 0x0002 ? ? ? ? ?/* tunneling */
>>+#define IP_VS_CONN_F_DROUTE ? 0x0003 ? ? ? ? ?/* direct routing */
>>+#define IP_VS_CONN_F_BYPASS ? 0x0004 ? ? ? ? ?/* cache bypass */
>>+#endif
>>+
>>+#define XT_IPVS_IPVS ? ? ? ? ?0x01 /* this is implied by all other options */
>>+#define XT_IPVS_PROTO ? ? ? ? 0x02
>>+#define XT_IPVS_VADDR ? ? ? ? 0x04
>>+#define XT_IPVS_VPORT ? ? ? ? 0x08
>>+#define XT_IPVS_DIR ? ? ? ? ? 0x10
>>+#define XT_IPVS_METHOD ? ? ? ? ? ? ? ?0x20
>>+#define XT_IPVS_MASK ? ? ? ? ?(0x40 - 1)
>>+#define XT_IPVS_ONCE_MASK ? ? (XT_IPVS_MASK & ~XT_IPVS_IPVS)
>>+
>>+struct xt_ipvs {
>>+ ? ? ?union nf_inet_addr ? ? ?vaddr, vmask;
>>+ ? ? ?__be16 ? ? ? ? ? ? ? ? ?vport;
>>+ ? ? ?u_int16_t ? ? ? ? ? ? ? l4proto;
>>+ ? ? ?u_int16_t ? ? ? ? ? ? ? fwd_method;
>>+
>>+ ? ? ?u_int8_t invert;
>>+ ? ? ?u_int8_t bitmask;
>>+};
>
> As per the "Writing Netfilter modules" e-book, __u16/__u8 is required.

Done.

>>+config NETFILTER_XT_MATCH_IPVS
>>+ ? ? ?tristate '"ipvs" match support'
>>+ ? ? ?depends on NETFILTER_ADVANCED
>>+ ? ? ?help
>>+ ? ? ? ?This option allows you to match against ipvs properties of a packet.
>>+
>>+ ? ? ? ? ?To compile it as a module, choos M here. ?If unsure, say N.
>>+
>IMHO the "to compile it as a module, choos[e] M" is a relict from
> old times and should just be dropped. These days, I stupilate,
> everyone knows that M makes modules. And if it doesnot, then
> it's not allowed by Kconfig :>

Done.

>>+MODULE_AUTHOR("Hannes Eder <[email protected]>");
>>+MODULE_DESCRIPTION("Xtables: match ipvs");
>
> "Match IPVS connection properties" is what you previously stated,
> and which I think is good. Use it here, too.

Done.

>>+bool ipvs_mt(const struct sk_buff *skb, const struct xt_match_param *par)
>>+{
>>+ ? ? ?const struct xt_ipvs *data = par->matchinfo;
>>+ ? ? ?const u_int8_t family = par->family;
>>+ ? ? ?struct ip_vs_iphdr iph;
>>+ ? ? ?struct ip_vs_protocol *pp;
>>+ ? ? ?struct ip_vs_conn *cp;
>>+ ? ? ?int af;
>>+ ? ? ?bool match = true;
>>+
>>+ ? ? ?if (data->bitmask == XT_IPVS_IPVS) {
>>+ ? ? ? ? ? ? ?match = !!(skb->ipvs_property)
>>+ ? ? ? ? ? ? ? ? ? ? ?^ !!(data->invert & XT_IPVS_IPVS);
>>+ ? ? ? ? ? ? ?goto out;
>>+ ? ? ?}
>
> XT_IPVS_IPVS? What's that supposed to tell me...

Just matching aginst the skb->ipvs_property, I changed to name to
XT_IPVS_IPVS_PROPERTY.

> Anyway, this can be made much shorter, given that all following
> the "out" label is a return:

You are right. For the moment, I'll keep it as is. Having a single
entry/exit point makes it easier to instrument the function for
debugging.

>
> ? ? ? ?if (data->bitmask == XT_IPVS_IPVS)
> ? ? ? ? ? ? ? ?return skb->ipvs_property ^ !!(data->invert & XT_IPVS_IPVS);
>
>>+ ? ? ?/* other flags than XT_IPVS_IPVS are set */
>>+ ? ? ?if (!skb->ipvs_property) {
>>+ ? ? ? ? ? ? ?match = false;
>>+ ? ? ? ? ? ? ?goto out;
>>+ ? ? ?}
>
> ? ? ? ?if (!skb->ipvs_property)
> ? ? ? ? ? ? ? ?return false;
>
>>+ ? ? ?ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
>>+
>>+ ? ? ?if (data->bitmask & XT_IPVS_PROTO)
>>+ ? ? ? ? ? ? ?if ((iph.protocol == data->l4proto)
>>+ ? ? ? ? ? ? ? ? ?^ !(data->invert & XT_IPVS_PROTO)) {
>>+ ? ? ? ? ? ? ? ? ? ? ?match = false;
>>+ ? ? ? ? ? ? ? ? ? ? ?goto out;
>>+ ? ? ? ? ? ? ?}
>
> ? ? ? ? ? ? ? ?if (iph.protocol == data->l4proto ^
> ? ? ? ? ? ? ? ? ? ?!(data->invert & XT_IPVS_PROTO))
> ? ? ? ? ? ? ? ? ? ? ? ?return false;
>
>>+ ? ? ?pp = ip_vs_proto_get(iph.protocol);
>>+ ? ? ?if (unlikely(!pp)) {
>>+ ? ? ? ? ? ? ?match = false;
>>+ ? ? ? ? ? ? ?goto out;
>>+ ? ? ?}
>
> ? ? ? ?if (unlikely(pp == NULL))
> ? ? ? ? ? ? ? ?return false;
>
> Only after here we really need goto (to put pp).
>
>>+ ? ? ?/*
>>+ ? ? ? * Check if the packet belongs to an existing entry
>>+ ? ? ? */
>>+ ? ? ?/* TODO: why does it only match in inverse? */
>
> FIXME: Figure it out :)

Still working on that ;)

>>+ ? ? ?cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 1 /* inverse */);
>>+ ? ? ?if (unlikely(!cp)) {
>>+ ? ? ? ? ? ? ?match = false;
>>+ ? ? ? ? ? ? ?goto out;
>>+ ? ? ?}
>>+
>>+ ? ? ?/*
>>+ ? ? ? * We found a connection, i.e. ct != 0, make sure to call
>>+ ? ? ? * __ip_vs_conn_put before returning. ?In our case jump to out_put_con.
>>+ ? ? ? */
>>+
>>+ ? ? ?if (data->bitmask & XT_IPVS_VPORT)
>>+ ? ? ? ? ? ? ?if ((cp->vport == data->vport)
>>+ ? ? ? ? ? ? ? ? ?^ !(data->invert & XT_IPVS_VPORT)) {
>>+ ? ? ? ? ? ? ? ? ? ? ?match = false;
>>+ ? ? ? ? ? ? ? ? ? ? ?goto out_put_ct;
>>+ ? ? ? ? ? ? ?}
>>+
>>+ ? ? ?if (data->bitmask & XT_IPVS_DIR) {
>>+ ? ? ? ? ? ? ?/* TODO: implement */
>
> ? ? ? ? ? ? ? ?/* Yes please */

Here we go:

if (data->bitmask & XT_IPVS_DIR) {
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);

if (ct == NULL || ct == &nf_conntrack_untracked) {
match = false;
goto out_put_cp;
}

if ((ctinfo >= IP_CT_IS_REPLY) ^
!!(data->invert & XT_IPVS_DIR)) {
match = false;
goto out_put_cp;
}
}

>>+ ? ? ?}
>>+
>>+ ? ? ?if (data->bitmask & XT_IPVS_METHOD)
>>+ ? ? ? ? ? ? ?if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method)
>>+ ? ? ? ? ? ? ? ? ?^ !(data->invert & XT_IPVS_METHOD)) {
>>+ ? ? ? ? ? ? ? ? ? ? ?match = false;
>>+ ? ? ? ? ? ? ? ? ? ? ?goto out_put_ct;
>>+ ? ? ? ? ? ? ?}
>>+
>>+ ? ? ?if (data->bitmask & XT_IPVS_VADDR) {
>>+ ? ? ? ? ? ? ?if (af != family) {
>>+ ? ? ? ? ? ? ? ? ? ? ?match = false;
>>+ ? ? ? ? ? ? ? ? ? ? ?goto out_put_ct;
>>+ ? ? ? ? ? ? ?}
>>+
>>+ ? ? ? ? ? ? ?if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr, &data->vmask, af)
>>+ ? ? ? ? ? ? ? ? ?^ !(data->invert & XT_IPVS_VADDR)) {
>
> I think the operator (^ in this case) always goes onto the same line as the ')'
> ((a) ^\n(b), not ((a)\n ^(b)), though some legacy code seems to do it
> random so-so.)

I changed it as suggested, though I could not find anything in
Documentation/CodingStyle about that.

>>+ ? ? ?return match;
>>+}
>>+EXPORT_SYMBOL(ipvs_mt);
>
> What will be using ipvs_mt?

Nobody, EXPORT_SYMBOL removed.

>>+static struct xt_match xt_ipvs_mt_reg[] __read_mostly = {
>>+ ? ? ?{
>>+ ? ? ? ? ? ? ?.name ? ? ? = "ipvs",
>>+ ? ? ? ? ? ? ?.revision ? = 0,
>>+ ? ? ? ? ? ? ?.family ? ? = NFPROTO_UNSPEC,
>>+ ? ? ? ? ? ? ?.match ? ? ?= ipvs_mt,
>>+ ? ? ? ? ? ? ?.matchsize ?= sizeof(struct xt_ipvs),
>>+ ? ? ? ? ? ? ?.me ? ? ? ? = THIS_MODULE,
>>+ ? ? ?},
>>+};
>
> There is just one element, so no strict need for the [] array.

You are right. Done.

Thanks for the review.

Cheers,
-Hannes