2021-04-23 13:57:26

by Pavel Balaev

[permalink] [raw]
Subject: [PATCH v4 net-next] net: multipath routing: configurable seed

Ability for a user to assign seed value to multipath route hashes.
Now kernel uses random seed value to prevent hash-flooding DoS attacks;
however, it disables some use cases, f.e:

+-------+ +------+ +--------+
| |-eth0---| FW0 |---eth0-| |
| | +------+ | |
| GW0 |ECMP ECMP| GW1 |
| | +------+ | |
| |-eth1---| FW1 |---eth1-| |
+-------+ +------+ +--------+

In this use case, two ECMP routers balance traffic between two firewalls.
If some flow transmits a response over a different channel than request,
such flow will be dropped, because keep-state rules are created on
the other firewall.

This patch adds sysctl variable: net.ipv4|ipv6.fib_multipath_hash_seed.
User can set the same seed value on GW0 and GW1 for traffic to be
mirror-balanced. By default, random value is used.

Signed-off-by: Balaev Pavel <[email protected]>
---
Documentation/networking/ip-sysctl.rst | 14 +
include/net/flow_dissector.h | 4 +
include/net/netns/ipv4.h | 2 +
include/net/netns/ipv6.h | 3 +
net/core/flow_dissector.c | 9 +
net/ipv4/route.c | 10 +-
net/ipv4/sysctl_net_ipv4.c | 97 +++++
net/ipv6/route.c | 10 +-
net/ipv6/sysctl_net_ipv6.c | 96 +++++
.../testing/selftests/net/forwarding/Makefile | 1 +
tools/testing/selftests/net/forwarding/lib.sh | 41 +++
.../net/forwarding/router_mpath_seed.sh | 347 ++++++++++++++++++
12 files changed, 632 insertions(+), 2 deletions(-)
create mode 100755 tools/testing/selftests/net/forwarding/router_mpath_seed.sh

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 9701906f6..d1a67e6fe 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -100,6 +100,20 @@ fib_multipath_hash_policy - INTEGER
- 1 - Layer 4
- 2 - Layer 3 or inner Layer 3 if present

+fib_multipath_hash_seed - STRING
+ Controls seed value for multipath route hashes. By default
+ random value is used. Only valid for kernels built with
+ CONFIG_IP_ROUTE_MULTIPATH enabled.
+
+ Valid format: two hex values set off with comma or "random"
+ keyword.
+
+ Example to generate the seed value::
+
+ RAND=$(openssl rand -hex 16) && echo "${RAND:0:16},${RAND:16:16}"
+
+ Default: "random"
+
fib_sync_mem - UNSIGNED INTEGER
Amount of dirty memory from fib entries that can be backlogged before
synchronize_rcu is forced.
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index ffd386ea0..2bd4e28de 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -348,6 +348,10 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
}

u32 flow_hash_from_keys(struct flow_keys *keys);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+u32 flow_multipath_hash_from_keys(struct flow_keys *keys,
+ const siphash_key_t *seed);
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
struct flow_dissector_key_icmp *key_icmp,
const void *data, int thoff, int hlen);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 87e161249..cb2830432 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -222,6 +222,8 @@ struct netns_ipv4 {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
u8 sysctl_fib_multipath_use_neigh;
u8 sysctl_fib_multipath_hash_policy;
+ int sysctl_fib_multipath_hash_seed;
+ siphash_key_t __rcu *fib_multipath_hash_seed_ctx;
#endif

struct fib_notifier_ops *notifier_ops;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 808f0f79e..6bb383b0a 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -8,6 +8,7 @@
#ifndef __NETNS_IPV6_H__
#define __NETNS_IPV6_H__
#include <net/dst_ops.h>
+#include <linux/siphash.h>
#include <uapi/linux/icmpv6.h>

struct ctl_table_header;
@@ -30,6 +31,7 @@ struct netns_sysctl_ipv6 {
int ip6_rt_min_advmss;
u8 bindv6only;
u8 multipath_hash_policy;
+ u8 multipath_hash_seed;
u8 flowlabel_consistency;
u8 auto_flowlabels;
int icmpv6_time;
@@ -107,6 +109,7 @@ struct netns_ipv6 {
struct fib_rules_ops *mr6_rules_ops;
#endif
#endif
+ siphash_key_t __rcu *multipath_hash_seed_ctx;
atomic_t dev_addr_genid;
atomic_t fib6_sernum;
struct seg6_pernet_data *seg6_data;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 5985029e4..c9b53cb2b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1560,6 +1560,15 @@ u32 flow_hash_from_keys(struct flow_keys *keys)
}
EXPORT_SYMBOL(flow_hash_from_keys);

+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+u32 flow_multipath_hash_from_keys(struct flow_keys *keys,
+ const siphash_key_t *seed)
+{
+ return __flow_hash_from_keys(keys, seed);
+}
+EXPORT_SYMBOL(flow_multipath_hash_from_keys);
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
struct flow_keys *keys,
const siphash_key_t *keyval)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f6787c55f..79866b429 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1912,6 +1912,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
{
u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
struct flow_keys hash_keys;
+ siphash_key_t *seed_ctx;
u32 mhash;

switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
@@ -1989,7 +1990,14 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
}
break;
}
- mhash = flow_hash_from_keys(&hash_keys);
+
+ rcu_read_lock();
+ seed_ctx = rcu_dereference(net->ipv4.fib_multipath_hash_seed_ctx);
+ if (seed_ctx)
+ mhash = flow_multipath_hash_from_keys(&hash_keys, seed_ctx);
+ else
+ mhash = flow_hash_from_keys(&hash_keys);
+ rcu_read_unlock();

if (multipath_hash)
mhash = jhash_2words(mhash, multipath_hash, 0);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a09e466ce..5dff59733 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -447,6 +447,8 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
}

#ifdef CONFIG_IP_ROUTE_MULTIPATH
+#define FIB_MULTIPATH_SEED_KEY_LENGTH sizeof(siphash_key_t)
+#define FIB_MULTIPATH_SEED_RANDOM "random"
static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
@@ -461,6 +463,93 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,

return ret;
}
+
+static int proc_fib_multipath_hash_seed(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_fib_multipath_hash_seed);
+ /* maxlen to print the keys in hex (*2) and a comma in between keys. */
+ struct ctl_table tbl = {
+ .maxlen = ((FIB_MULTIPATH_SEED_KEY_LENGTH * 2) + 2)
+ };
+ siphash_key_t user_key, *ctx;
+ __le64 key[2];
+ int ret;
+
+ tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
+
+ if (!tbl.data)
+ return -ENOMEM;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(net->ipv4.fib_multipath_hash_seed_ctx);
+ if (ctx) {
+ put_unaligned_le64(ctx->key[0], &key[0]);
+ put_unaligned_le64(ctx->key[1], &key[1]);
+ user_key.key[0] = le64_to_cpu(key[0]);
+ user_key.key[1] = le64_to_cpu(key[1]);
+
+ snprintf(tbl.data, tbl.maxlen, "%016llx,%016llx",
+ user_key.key[0], user_key.key[1]);
+ } else {
+ snprintf(tbl.data, tbl.maxlen, "%s", FIB_MULTIPATH_SEED_RANDOM);
+ }
+ rcu_read_unlock();
+
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+
+ if (write && ret == 0) {
+ siphash_key_t *new_ctx, *old_ctx;
+
+ if (!strcmp(tbl.data, FIB_MULTIPATH_SEED_RANDOM)) {
+ rtnl_lock();
+ old_ctx = rtnl_dereference(net->ipv4.fib_multipath_hash_seed_ctx);
+ RCU_INIT_POINTER(net->ipv4.fib_multipath_hash_seed_ctx, NULL);
+ rtnl_unlock();
+ if (old_ctx) {
+ synchronize_net();
+ kfree_sensitive(old_ctx);
+ }
+
+ pr_debug("multipath hash seed set to random value\n");
+ goto out;
+ }
+
+ if (sscanf(tbl.data, "%llx,%llx", user_key.key, user_key.key + 1) != 2) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ key[0] = cpu_to_le64(user_key.key[0]);
+ key[1] = cpu_to_le64(user_key.key[1]);
+ pr_debug("multipath hash seed set to 0x%llx,0x%llx\n",
+ user_key.key[0], user_key.key[1]);
+
+ new_ctx = kmalloc(sizeof(*new_ctx), GFP_KERNEL);
+ if (!new_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ new_ctx->key[0] = get_unaligned_le64(&key[0]);
+ new_ctx->key[1] = get_unaligned_le64(&key[1]);
+
+ rtnl_lock();
+ old_ctx = rtnl_dereference(net->ipv4.fib_multipath_hash_seed_ctx);
+ rcu_assign_pointer(net->ipv4.fib_multipath_hash_seed_ctx, new_ctx);
+ rtnl_unlock();
+ if (old_ctx) {
+ synchronize_net();
+ kfree_sensitive(old_ctx);
+ }
+ }
+
+out:
+ kfree(tbl.data);
+ return ret;
+}
#endif

static struct ctl_table ipv4_table[] = {
@@ -1052,6 +1141,14 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
+ {
+ .procname = "fib_multipath_hash_seed",
+ .data = &init_net.ipv4.sysctl_fib_multipath_hash_seed,
+ /* maxlen to print the keys in hex (*2) and a comma in between keys. */
+ .maxlen = (FIB_MULTIPATH_SEED_KEY_LENGTH * 2) + 2,
+ .mode = 0600,
+ .proc_handler = proc_fib_multipath_hash_seed,
+ },
#endif
{
.procname = "ip_unprivileged_port_start",
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 28801ae80..70c488812 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2331,6 +2331,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
const struct sk_buff *skb, struct flow_keys *flkeys)
{
struct flow_keys hash_keys;
+ siphash_key_t *seed_ctx;
u32 mhash;

switch (ip6_multipath_hash_policy(net)) {
@@ -2414,7 +2415,14 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
}
break;
}
- mhash = flow_hash_from_keys(&hash_keys);
+
+ rcu_read_lock();
+ seed_ctx = rcu_dereference(net->ipv6.multipath_hash_seed_ctx);
+ if (seed_ctx)
+ mhash = flow_multipath_hash_from_keys(&hash_keys, seed_ctx);
+ else
+ mhash = flow_hash_from_keys(&hash_keys);
+ rcu_read_unlock();

return mhash >> 1;
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 27102c3d6..349251cb7 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -40,6 +40,94 @@ static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
return ret;
}

+#define RT6_MULTIPATH_SEED_KEY_LENGTH sizeof(siphash_key_t)
+#define RT6_MULTIPATH_SEED_RANDOM "random"
+static int proc_rt6_multipath_hash_seed(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = container_of(table->data, struct net,
+ ipv6.sysctl.multipath_hash_seed);
+ /* maxlen to print the keys in hex (*2) and a comma in between keys. */
+ struct ctl_table tbl = {
+ .maxlen = ((RT6_MULTIPATH_SEED_KEY_LENGTH * 2) + 2)
+ };
+ siphash_key_t user_key, *ctx;
+ __le64 key[2];
+ int ret;
+
+ tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
+
+ if (!tbl.data)
+ return -ENOMEM;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(net->ipv6.multipath_hash_seed_ctx);
+ if (ctx) {
+ put_unaligned_le64(ctx->key[0], &key[0]);
+ put_unaligned_le64(ctx->key[1], &key[1]);
+ user_key.key[0] = le64_to_cpu(key[0]);
+ user_key.key[1] = le64_to_cpu(key[1]);
+
+ snprintf(tbl.data, tbl.maxlen, "%016llx,%016llx",
+ user_key.key[0], user_key.key[1]);
+ } else {
+ snprintf(tbl.data, tbl.maxlen, "%s", RT6_MULTIPATH_SEED_RANDOM);
+ }
+ rcu_read_unlock();
+
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+
+ if (write && ret == 0) {
+ siphash_key_t *new_ctx, *old_ctx;
+
+ if (!strcmp(tbl.data, RT6_MULTIPATH_SEED_RANDOM)) {
+ rtnl_lock();
+ old_ctx = rtnl_dereference(net->ipv6.multipath_hash_seed_ctx);
+ RCU_INIT_POINTER(net->ipv6.multipath_hash_seed_ctx, NULL);
+ rtnl_unlock();
+ if (old_ctx) {
+ synchronize_net();
+ kfree_sensitive(old_ctx);
+ }
+
+ pr_debug("multipath hash seed set to random value\n");
+ goto out;
+ }
+
+ if (sscanf(tbl.data, "%llx,%llx", user_key.key, user_key.key + 1) != 2) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ key[0] = cpu_to_le64(user_key.key[0]);
+ key[1] = cpu_to_le64(user_key.key[1]);
+ pr_debug("multipath hash seed set to 0x%llx,0x%llx\n",
+ user_key.key[0], user_key.key[1]);
+
+ new_ctx = kmalloc(sizeof(*new_ctx), GFP_KERNEL);
+ if (!new_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ new_ctx->key[0] = get_unaligned_le64(&key[0]);
+ new_ctx->key[1] = get_unaligned_le64(&key[1]);
+
+ rtnl_lock();
+ old_ctx = rtnl_dereference(net->ipv6.multipath_hash_seed_ctx);
+ rcu_assign_pointer(net->ipv6.multipath_hash_seed_ctx, new_ctx);
+ rtnl_unlock();
+ if (old_ctx) {
+ synchronize_net();
+ kfree_sensitive(old_ctx);
+ }
+ }
+
+out:
+ kfree(tbl.data);
+ return ret;
+}
+
static struct ctl_table ipv6_table_template[] = {
{
.procname = "bindv6only",
@@ -151,6 +239,14 @@ static struct ctl_table ipv6_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
+ {
+ .procname = "fib_multipath_hash_seed",
+ .data = &init_net.ipv6.sysctl.multipath_hash_seed,
+ /* maxlen to print the keys in hex (*2) and a comma in between keys. */
+ .maxlen = (RT6_MULTIPATH_SEED_KEY_LENGTH * 2) + 2,
+ .mode = 0600,
+ .proc_handler = proc_rt6_multipath_hash_seed,
+ },
{
.procname = "seg6_flowlabel",
.data = &init_net.ipv6.sysctl.seg6_flowlabel,
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index d97bd6889..080af970c 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -38,6 +38,7 @@ TEST_PROGS = bridge_igmp.sh \
router_mpath_nh.sh \
router_multicast.sh \
router_multipath.sh \
+ router_mpath_seed.sh \
router.sh \
router_vid_1.sh \
sch_ets.sh \
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 42e28c983..15d5b8bfa 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -10,6 +10,7 @@ PING6=${PING6:=ping6}
MZ=${MZ:=mausezahn}
ARPING=${ARPING:=arping}
TEAMD=${TEAMD:=teamd}
+OPENSSL=${OPENSSL:=openssl}
WAIT_TIME=${WAIT_TIME:=5}
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
@@ -693,11 +694,51 @@ link_stats_tx_packets_get()
link_stats_get $1 tx packets
}

+link_stats_tx_packets_get()
+{
+ link_stats_get $1 tx packets
+}
+
link_stats_rx_errors_get()
{
link_stats_get $1 rx errors
}

+ns_link_stats_get()
+{
+ local netns=$1; shift
+ local if_name=$1; shift
+ local dir=$1; shift
+ local stat=$1; shift
+
+ ip netns exec $netns ip -j -s link show dev $if_name \
+ | jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
+}
+
+ns_link_stats_tx_packets_get()
+{
+ local netns=$1; shift
+ local if_name=$1; shift
+
+ ns_link_stats_get $netns $if_name tx packets
+}
+
+ns_link_stats_tx_packets_get()
+{
+ local netns=$1; shift
+ local if_name=$1; shift
+
+ ns_link_stats_get $netns $if_name tx packets
+}
+
+ns_link_stats_rx_errors_get()
+{
+ local netns=$1; shift
+ local if_name=$1; shift
+
+ ns_link_stats_get $netns $if_name rx errors
+}
+
tc_rule_stats_get()
{
local dev=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_seed.sh b/tools/testing/selftests/net/forwarding/router_mpath_seed.sh
new file mode 100755
index 000000000..b2f99f428
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_seed.sh
@@ -0,0 +1,347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="multipath_seed_test"
+NUM_NETIFS=8
+source lib.sh
+
+veth_prepare()
+{
+ ip link add ecmp1l type veth peer name ecmp1r
+ ip link add ecmp2l type veth peer name ecmp2r
+ ip link add ecmphost1l type veth peer name ecmphost1r
+ ip link add ecmphost2l type veth peer name ecmphost2r
+}
+
+cl1_create()
+{
+ local ns_exec="ip netns exec ecmp_cl1"
+
+ ip netns add ecmp_cl1
+ ip l set dev ecmphost1l netns ecmp_cl1
+ $ns_exec ip l set dev ecmphost1l up
+ $ns_exec ip a a 10.100.0.2/30 dev ecmphost1l
+ $ns_exec ip a a 2001:db8:3::2/64 dev ecmphost1l
+ $ns_exec ip r add default via 10.100.0.1
+ $ns_exec ip r add default via 2001:db8:3::1
+}
+
+cl2_create()
+{
+ local ns_exec="ip netns exec ecmp_cl2"
+
+ ip netns add ecmp_cl2
+ ip l set dev ecmphost2l netns ecmp_cl2
+ $ns_exec ip l set dev ecmphost2l up
+ $ns_exec ip a a 10.200.0.2/30 dev ecmphost2l
+ $ns_exec ip a a 2001:db8:4::2/64 dev ecmphost2l
+ $ns_exec ip r add default via 10.200.0.1
+ $ns_exec ip r add default via 2001:db8:4::1
+}
+
+r1_create()
+{
+ local ns_exec="ip netns exec ecmp1"
+
+ ip netns add ecmp1
+ ip l set dev ecmp1l netns ecmp1
+ ip l set dev ecmp2l netns ecmp1
+ ip l set dev ecmphost1r netns ecmp1
+ $ns_exec ip l set dev ecmphost1r up
+ $ns_exec ip l set dev ecmp1l up
+ $ns_exec ip l set dev ecmp2l up
+ $ns_exec ip a a 10.100.0.1/30 dev ecmphost1r
+ $ns_exec ip a a 10.10.0.1/30 dev ecmp1l
+ $ns_exec ip a a 10.20.0.1/30 dev ecmp2l
+ $ns_exec ip a a 2001:db8:3::1/64 dev ecmphost1r
+ $ns_exec ip a a 2001:db8:1::1/64 dev ecmp1l
+ $ns_exec ip a a 2001:db8:2::1/64 dev ecmp2l
+ $ns_exec sysctl -q net.ipv4.ip_forward=1
+ $ns_exec sysctl -q net.ipv6.conf.all.forwarding=1
+ $ns_exec sysctl -q net.ipv4.fib_multipath_hash_policy=1
+ $ns_exec sysctl -q net.ipv6.fib_multipath_hash_policy=1
+ $ns_exec ip route add 10.200.0.0/30 nexthop via 10.10.0.2 \
+ weight 1 nexthop via 10.20.0.2 weight 1
+ $ns_exec ip route add 2001:db8:4::/64 nexthop via 2001:db8:1::2 \
+ weight 1 nexthop via 2001:db8:2::2 weight 1
+}
+
+r2_create()
+{
+ local ns_exec="ip netns exec ecmp2"
+
+ ip netns add ecmp2
+ ip l set dev ecmp1r netns ecmp2
+ ip l set dev ecmp2r netns ecmp2
+ ip l set dev ecmphost2r netns ecmp2
+ $ns_exec ip l set dev ecmphost2r up
+ $ns_exec ip l set dev ecmp1r up
+ $ns_exec ip l set dev ecmp2r up
+ $ns_exec ip a a 10.200.0.1/30 dev ecmphost2r
+ $ns_exec ip a a 10.10.0.2/30 dev ecmp1r
+ $ns_exec ip a a 10.20.0.2/30 dev ecmp2r
+ $ns_exec ip a a 2001:db8:4::1/64 dev ecmphost2r
+ $ns_exec ip a a 2001:db8:1::2/64 dev ecmp1r
+ $ns_exec ip a a 2001:db8:2::2/64 dev ecmp2r
+ $ns_exec sysctl -q net.ipv4.ip_forward=1
+ $ns_exec sysctl -q net.ipv6.conf.all.forwarding=1
+ $ns_exec sysctl -q net.ipv4.fib_multipath_hash_policy=1
+ $ns_exec sysctl -q net.ipv6.fib_multipath_hash_policy=1
+ $ns_exec ip route add 10.100.0.0/30 nexthop via 10.10.0.1 \
+ weight 1 nexthop via 10.20.0.1 weight 1
+ $ns_exec ip route add 2001:db8:3::/64 nexthop via 2001:db8:1::1 \
+ weight 1 nexthop via 2001:db8:2::1 weight 1
+}
+
+cl1_destroy()
+{
+ ip netns del ecmp_cl1
+}
+
+cl2_destroy()
+{
+ ip netns del ecmp_cl2
+}
+
+r1_destroy()
+{
+ ip netns del ecmp1
+}
+
+r2_destroy()
+{
+ ip netns del ecmp2
+}
+
+gen_udp4()
+{
+ local sp=$1; shift
+ local dp=$1; shift
+ local tx1_1_start tx1_2_start tx2_1_start tx2_2_start
+ local tx1_1_end tx1_2_end tx2_1_end tx2_2_end
+ local tx1_1 tx1_2 tx2_1 tx2_2
+ local tx1_1_res tx1_2_res tx2_1_res tx2_2_res
+ local chan1 chan2
+ local cl1_exec="ip netns exec ecmp_cl1"
+ local cl2_exec="ip netns exec ecmp_cl2"
+
+ tx1_1_start=$(ns_link_stats_tx_packets_get ecmp1 ecmp1l)
+ tx1_2_start=$(ns_link_stats_tx_packets_get ecmp1 ecmp2l)
+ tx2_1_start=$(ns_link_stats_tx_packets_get ecmp2 ecmp1r)
+ tx2_2_start=$(ns_link_stats_tx_packets_get ecmp2 ecmp2r)
+
+ $cl1_exec $MZ ecmphost1l -q -c 20 -p 64 -A 10.100.0.2 -B 10.200.0.2 \
+ -t udp "sp=${sp},dp=${dp}"
+
+ $cl2_exec $MZ ecmphost2l -q -c 20 -p 64 -A 10.200.0.2 -B 10.100.0.2 \
+ -t udp "sp=${dp},dp=${sp}"
+
+ tx1_1_end=$(ns_link_stats_tx_packets_get ecmp1 ecmp1l)
+ tx1_2_end=$(ns_link_stats_tx_packets_get ecmp1 ecmp2l)
+ tx2_1_end=$(ns_link_stats_tx_packets_get ecmp2 ecmp1r)
+ tx2_2_end=$(ns_link_stats_tx_packets_get ecmp2 ecmp2r)
+
+ let "tx1_1 = $tx1_1_end - $tx1_1_start"
+ let "tx1_2 = $tx1_2_end - $tx1_2_start"
+ let "tx2_1 = $tx2_1_end - $tx2_1_start"
+ let "tx2_2 = $tx2_2_end - $tx2_2_start"
+
+ [ "$tx1_1" -ge 20 ] && tx1_1_res=1 || tx1_1_res=0
+ [ "$tx1_2" -ge 20 ] && tx1_2_res=1 || tx1_2_res=0
+ [ "$tx2_1" -ge 20 ] && tx2_1_res=1 || tx2_1_res=0
+ [ "$tx2_2" -ge 20 ] && tx2_2_res=1 || tx2_2_res=0
+
+ let "chan1 = $tx1_1_res + $tx2_1_res"
+ let "chan2 = $tx1_2_res + $tx2_2_res"
+
+ if [ $chan1 -eq 2 ] || [ $chan2 -eq 2 ]; then
+ return 0
+ fi
+
+ return 1;
+}
+
+gen_udp6()
+{
+ local sp=$1; shift
+ local dp=$1; shift
+ local tx1_1_start tx1_2_start tx2_1_start tx2_2_start
+ local tx1_1_end tx1_2_end tx2_1_end tx2_2_end
+ local tx1_1 tx1_2 tx2_1 tx2_2
+ local tx1_1_res tx1_2_res tx2_1_res tx2_2_res
+ local chan1 chan2
+ local cl1_exec="ip netns exec ecmp_cl1"
+ local cl2_exec="ip netns exec ecmp_cl2"
+
+ tx1_1_start=$(ns_link_stats_tx_packets_get ecmp1 ecmp1l)
+ tx1_2_start=$(ns_link_stats_tx_packets_get ecmp1 ecmp2l)
+ tx2_1_start=$(ns_link_stats_tx_packets_get ecmp2 ecmp1r)
+ tx2_2_start=$(ns_link_stats_tx_packets_get ecmp2 ecmp2r)
+
+ $cl1_exec $MZ ecmphost1l -6 -q -c 20 -p 64 -A 2001:db8:3::2 -B 2001:db8:4::2 \
+ -t udp "sp=${sp},dp=${dp}"
+
+ $cl2_exec $MZ ecmphost2l -6 -q -c 20 -p 64 -A 2001:db8:4::2 -B 2001:db8:3::2 \
+ -t udp "sp=${dp},dp=${sp}"
+
+ tx1_1_end=$(ns_link_stats_tx_packets_get ecmp1 ecmp1l)
+ tx1_2_end=$(ns_link_stats_tx_packets_get ecmp1 ecmp2l)
+ tx2_1_end=$(ns_link_stats_tx_packets_get ecmp2 ecmp1r)
+ tx2_2_end=$(ns_link_stats_tx_packets_get ecmp2 ecmp2r)
+
+ let "tx1_1 = $tx1_1_end - $tx1_1_start"
+ let "tx1_2 = $tx1_2_end - $tx1_2_start"
+ let "tx2_1 = $tx2_1_end - $tx2_1_start"
+ let "tx2_2 = $tx2_2_end - $tx2_2_start"
+
+ [ "$tx1_1" -ge 20 ] && tx1_1_res=1 || tx1_1_res=0
+ [ "$tx1_2" -ge 20 ] && tx1_2_res=1 || tx1_2_res=0
+ [ "$tx2_1" -ge 20 ] && tx2_1_res=1 || tx2_1_res=0
+ [ "$tx2_2" -ge 20 ] && tx2_2_res=1 || tx2_2_res=0
+
+ let "chan1 = $tx1_1_res + $tx2_1_res"
+ let "chan2 = $tx1_2_res + $tx2_2_res"
+
+ if [ $chan1 -eq 2 ] || [ $chan2 -eq 2 ]; then
+ return 0
+ fi
+
+ return 1;
+}
+
+
+seed4_test_equal()
+{
+ RET=0
+ local sp
+ local dp
+ local i
+ local res=0
+ local seed=$(${OPENSSL} rand -hex 16)
+
+ seed=${seed:0:16},${seed:16:16}
+
+ ip netns exec ecmp1 sysctl -q \
+ net.ipv4.fib_multipath_hash_seed=${seed}
+ ip netns exec ecmp2 sysctl -q \
+ net.ipv4.fib_multipath_hash_seed=${seed}
+
+ for i in {1..30}; do
+ sp=$(shuf -i 1024-65000 -n 1)
+ dp=$(shuf -i 1024-65000 -n 1)
+ gen_udp4 $sp $dp && let res++
+ done
+
+ [ $res != 30 ] && RET=1
+ log_test "IPv4 multipath seed tests [equal seed]"
+}
+
+seed4_test_diff()
+{
+ RET=0
+ local sp
+ local dp
+ local i
+ local res=0
+ local seed1=$(${OPENSSL} rand -hex 16)
+ local seed2=$(${OPENSSL} rand -hex 16)
+
+ seed1=${seed1:0:16},${seed1:16:16}
+ seed2=${seed2:0:16},${seed2:16:16}
+
+ ip netns exec ecmp1 sysctl -q \
+ net.ipv4.fib_multipath_hash_seed=${seed1}
+ ip netns exec ecmp2 sysctl -q \
+ net.ipv4.fib_multipath_hash_seed=${seed2}
+
+ for i in {1..30}; do
+ sp=$(shuf -i 1024-65000 -n 1)
+ dp=$(shuf -i 1024-65000 -n 1)
+ gen_udp4 $sp $dp && let res++
+ done
+
+ [ $res -eq 30 ] && RET=1
+ log_test "IPv4 multipath seed tests [different seed]"
+}
+
+seed6_test_equal()
+{
+ RET=0
+ local sp
+ local dp
+ local i
+ local res=0
+ local seed=$(${OPENSSL} rand -hex 16)
+
+ seed=${seed:0:16},${seed:16:16}
+
+ ip netns exec ecmp1 sysctl -q \
+ net.ipv6.fib_multipath_hash_seed=${seed}
+ ip netns exec ecmp2 sysctl -q \
+ net.ipv6.fib_multipath_hash_seed=${seed}
+
+ for i in {1..30}; do
+ sp=$(shuf -i 1024-65000 -n 1)
+ dp=$(shuf -i 1024-65000 -n 1)
+ gen_udp6 $sp $dp && let res++
+ done
+
+ [ $res != 30 ] && RET=1
+ log_test "IPv6 multipath seed tests [equal seed]"
+}
+
+seed6_test_diff()
+{
+ RET=0
+ local sp
+ local dp
+ local i
+ local res=0
+ local seed1=$(${OPENSSL} rand -hex 16)
+ local seed2=$(${OPENSSL} rand -hex 16)
+
+ seed1=${seed1:0:16},${seed1:16:16}
+ seed2=${seed2:0:16},${seed2:16:16}
+
+ ip netns exec ecmp1 sysctl -q \
+ net.ipv6.fib_multipath_hash_seed=${seed1}
+ ip netns exec ecmp2 sysctl -q \
+ net.ipv6.fib_multipath_hash_seed=${seed2}
+
+ for i in {1..30}; do
+ sp=$(shuf -i 1024-65000 -n 1)
+ dp=$(shuf -i 1024-65000 -n 1)
+ gen_udp4 $sp $dp && let res++
+ done
+
+ [ $res -eq 30 ] && RET=1
+ log_test "IPv6 multipath seed tests [different seed]"
+}
+
+multipath_seed_test()
+{
+ require_command $OPENSSL
+ veth_prepare
+ cl1_create
+ cl2_create
+ r1_create
+ r2_create
+
+ log_info "Running IPv4 multipath seed tests [equal seed]"
+ seed4_test_equal
+ log_info "Running IPv4 multipath seed tests [different seed]"
+ seed4_test_diff
+ log_info "Running IPv6 multipath seed tests [equal seed]"
+ seed6_test_equal
+ log_info "Running IPv6 multipath seed tests [different seed]"
+ seed6_test_diff
+
+ cl1_destroy
+ cl2_destroy
+ r1_destroy
+ r2_destroy
+}
+
+tests_run
+
+exit $EXIT_STATUS
--
2.31.1


2021-04-27 03:23:53

by David Ahern

[permalink] [raw]
Subject: Re: [PATCH v4 net-next] net: multipath routing: configurable seed

On 4/23/21 6:44 AM, Balaev Pavel wrote:
> Ability for a user to assign seed value to multipath route hashes.
> Now kernel uses random seed value to prevent hash-flooding DoS attacks;
> however, it disables some use cases, f.e:
>
> +-------+ +------+ +--------+
> | |-eth0---| FW0 |---eth0-| |
> | | +------+ | |
> | GW0 |ECMP ECMP| GW1 |
> | | +------+ | |
> | |-eth1---| FW1 |---eth1-| |
> +-------+ +------+ +--------+
>
> In this use case, two ECMP routers balance traffic between two firewalls.
> If some flow transmits a response over a different channel than request,
> such flow will be dropped, because keep-state rules are created on
> the other firewall.
>
> This patch adds sysctl variable: net.ipv4|ipv6.fib_multipath_hash_seed.
> User can set the same seed value on GW0 and GW1 for traffic to be
> mirror-balanced. By default, random value is used.
>
> Signed-off-by: Balaev Pavel <[email protected]>
> ---
> Documentation/networking/ip-sysctl.rst | 14 +
> include/net/flow_dissector.h | 4 +
> include/net/netns/ipv4.h | 2 +
> include/net/netns/ipv6.h | 3 +
> net/core/flow_dissector.c | 9 +
> net/ipv4/route.c | 10 +-
> net/ipv4/sysctl_net_ipv4.c | 97 +++++
> net/ipv6/route.c | 10 +-
> net/ipv6/sysctl_net_ipv6.c | 96 +++++
> .../testing/selftests/net/forwarding/Makefile | 1 +
> tools/testing/selftests/net/forwarding/lib.sh | 41 +++
> .../net/forwarding/router_mpath_seed.sh | 347 ++++++++++++++++++
> 12 files changed, 632 insertions(+), 2 deletions(-)
> create mode 100755 tools/testing/selftests/net/forwarding/router_mpath_seed.sh

this really needs to be multiple patches. At a minimum 1 for ipv4, 1 for
ipv6 and 1 for the test script (thank you for adding that).

[ cc'ed Ido since most of the tests under
tools/testing/selftests/net/forwarding come from him and team ]

>
> diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
> index 9701906f6..d1a67e6fe 100644
> --- a/Documentation/networking/ip-sysctl.rst
> +++ b/Documentation/networking/ip-sysctl.rst
> @@ -100,6 +100,20 @@ fib_multipath_hash_policy - INTEGER
> - 1 - Layer 4
> - 2 - Layer 3 or inner Layer 3 if present
>
> +fib_multipath_hash_seed - STRING
> + Controls seed value for multipath route hashes. By default
> + random value is used. Only valid for kernels built with
> + CONFIG_IP_ROUTE_MULTIPATH enabled.
> +
> + Valid format: two hex values set off with comma or "random"
> + keyword.
> +
> + Example to generate the seed value::
> +
> + RAND=$(openssl rand -hex 16) && echo "${RAND:0:16},${RAND:16:16}"
> +
> + Default: "random"
> +
> fib_sync_mem - UNSIGNED INTEGER
> Amount of dirty memory from fib entries that can be backlogged before
> synchronize_rcu is forced.
> diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> index ffd386ea0..2bd4e28de 100644
> --- a/include/net/flow_dissector.h
> +++ b/include/net/flow_dissector.h
> @@ -348,6 +348,10 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
> }
>
> u32 flow_hash_from_keys(struct flow_keys *keys);
> +#ifdef CONFIG_IP_ROUTE_MULTIPATH
> +u32 flow_multipath_hash_from_keys(struct flow_keys *keys,
> + const siphash_key_t *seed);

column alignment looks off here ^^^^ and a few other places; please
correct in the next version.


2021-04-27 14:29:18

by David Ahern

[permalink] [raw]
Subject: Re: [PATCH v4 net-next] net: multipath routing: configurable seed

On 4/27/21 3:42 AM, Pavel Balaev wrote:
> After running "scripts/checkpatch.pl" I got warnings about alignment.
> So I run checkpatch.pl --fix and fixed alignment as a script did.
> So warnings goes away. I don't get the rules of alignment, can you
> tell me the right way?

I don't see any statements under Documentation/process; not sure where
it is explicitly stated. You can get the general idea by following the
surrounding code and then let checkpatch correct from there.

2021-04-29 18:53:58

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v4 net-next] net: multipath routing: configurable seed

Hi Balaev,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on net-next/master]

url: https://github.com/0day-ci/linux/commits/Balaev-Pavel/net-multipath-routing-configurable-seed/20210423-214755
base: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git cad4162a90aeff737a16c0286987f51e927f003a
config: riscv-rv32_defconfig (attached as .config)
compiler: riscv32-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/d2127c4161e4482ac75072cfdbb27781d2a9be30
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Balaev-Pavel/net-multipath-routing-configurable-seed/20210423-214755
git checkout d2127c4161e4482ac75072cfdbb27781d2a9be30
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross W=1 ARCH=riscv

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

net/ipv6/route.c: In function 'rt6_multipath_hash':
>> net/ipv6/route.c:2422:11: error: implicit declaration of function 'flow_multipath_hash_from_keys'; did you mean 'flow_hash_from_keys'? [-Werror=implicit-function-declaration]
2422 | mhash = flow_multipath_hash_from_keys(&hash_keys, seed_ctx);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| flow_hash_from_keys
cc1: some warnings being treated as errors


vim +2422 net/ipv6/route.c

2328
2329 /* if skb is set it will be used and fl6 can be NULL */
2330 u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2331 const struct sk_buff *skb, struct flow_keys *flkeys)
2332 {
2333 struct flow_keys hash_keys;
2334 siphash_key_t *seed_ctx;
2335 u32 mhash;
2336
2337 switch (ip6_multipath_hash_policy(net)) {
2338 case 0:
2339 memset(&hash_keys, 0, sizeof(hash_keys));
2340 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2341 if (skb) {
2342 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2343 } else {
2344 hash_keys.addrs.v6addrs.src = fl6->saddr;
2345 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2346 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2347 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2348 }
2349 break;
2350 case 1:
2351 if (skb) {
2352 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2353 struct flow_keys keys;
2354
2355 /* short-circuit if we already have L4 hash present */
2356 if (skb->l4_hash)
2357 return skb_get_hash_raw(skb) >> 1;
2358
2359 memset(&hash_keys, 0, sizeof(hash_keys));
2360
2361 if (!flkeys) {
2362 skb_flow_dissect_flow_keys(skb, &keys, flag);
2363 flkeys = &keys;
2364 }
2365 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2366 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2367 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2368 hash_keys.ports.src = flkeys->ports.src;
2369 hash_keys.ports.dst = flkeys->ports.dst;
2370 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2371 } else {
2372 memset(&hash_keys, 0, sizeof(hash_keys));
2373 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2374 hash_keys.addrs.v6addrs.src = fl6->saddr;
2375 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2376 hash_keys.ports.src = fl6->fl6_sport;
2377 hash_keys.ports.dst = fl6->fl6_dport;
2378 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2379 }
2380 break;
2381 case 2:
2382 memset(&hash_keys, 0, sizeof(hash_keys));
2383 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2384 if (skb) {
2385 struct flow_keys keys;
2386
2387 if (!flkeys) {
2388 skb_flow_dissect_flow_keys(skb, &keys, 0);
2389 flkeys = &keys;
2390 }
2391
2392 /* Inner can be v4 or v6 */
2393 if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2394 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2395 hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
2396 hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
2397 } else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2398 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2399 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2400 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2401 hash_keys.tags.flow_label = flkeys->tags.flow_label;
2402 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2403 } else {
2404 /* Same as case 0 */
2405 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2406 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2407 }
2408 } else {
2409 /* Same as case 0 */
2410 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2411 hash_keys.addrs.v6addrs.src = fl6->saddr;
2412 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2413 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2414 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2415 }
2416 break;
2417 }
2418
2419 rcu_read_lock();
2420 seed_ctx = rcu_dereference(net->ipv6.multipath_hash_seed_ctx);
2421 if (seed_ctx)
> 2422 mhash = flow_multipath_hash_from_keys(&hash_keys, seed_ctx);
2423 else
2424 mhash = flow_hash_from_keys(&hash_keys);
2425 rcu_read_unlock();
2426
2427 return mhash >> 1;
2428 }
2429

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]


Attachments:
(No filename) (5.94 kB)
.config.gz (20.07 kB)
Download all attachments