This patchset adds support for the SRv6 End.DT4 behavior.
The SRv6 End.DT4 is used to implement multi-tenant IPv4 L3VPN. It decapsulates
the received packets and performs IPv4 routing lookup in the routing table of
the tenant. The SRv6 End.DT4 Linux implementation leverages a VRF device. SRv6
End.DT4 is defined in the SRv6 Network Programming [1].
- Patch 1/4 is needed to solve a pre-existing issue with tunneled packets
when a sniffer is attached;
- Patch 2/4 introduces two callbacks used for customizing the
creation/destruction of a SRv6 behavior;
- Patch 3/4 is the core patch that adds support for the SRv6 End.DT4 behavior;
- Patch 4/4 adds the selftest for SRv6 End.DT4.
I would like to thank David Ahern for his support during the development of
this patch set.
Comments, suggestions and improvements are very welcome!
Thanks,
Andrea Mayer
[1] https://tools.ietf.org/html/draft-ietf-spring-srv6-network-programming
Andrea Mayer (4):
vrf: push mac header for tunneled packets when sniffer is attached
seg6: add callbacks for customizing the creation/destruction of a
behavior
seg6: add support for the SRv6 End.DT4 behavior
add selftest for the SRv6 End.DT4 behavior
drivers/net/vrf.c | 78 ++-
net/ipv6/seg6_local.c | 261 ++++++++++
.../selftests/net/srv6_end_dt4_l3vpn_test.sh | 490 ++++++++++++++++++
3 files changed, 823 insertions(+), 6 deletions(-)
create mode 100755 tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
--
2.20.1
We introduce two callbacks used for customizing the creation/destruction of
a SRv6 behavior. Such callbacks are defined in the new struct
seg6_local_lwtunnel_ops and hereafter we provide a brief description of
them:
- build_state(...): used for calling the custom constructor of the
behavior during its initialization phase and after all the attributes
have been successfully parsed;
- destroy_state(...): used for calling the custom destructor of the
behavior before it is completely destroyed.
Signed-off-by: Andrea Mayer <[email protected]>
---
net/ipv6/seg6_local.c | 57 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 57 insertions(+)
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index eba23279912d..f70687e1b8a9 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -33,11 +33,23 @@
struct seg6_local_lwt;
+typedef int (*slwt_build_state_t)(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack);
+typedef void (*slwt_destroy_state_t)(struct seg6_local_lwt *slwt);
+
+/* callbacks used for customizing the creation and destruction of a behavior */
+struct seg6_local_lwtunnel_ops {
+ slwt_build_state_t build_state;
+ slwt_destroy_state_t destroy_state;
+};
+
struct seg6_action_desc {
int action;
unsigned long attrs;
int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
int static_headroom;
+
+ struct seg6_local_lwtunnel_ops slwt_ops;
};
struct bpf_lwt_prog {
@@ -938,6 +950,45 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
};
+/* call the custom constructor of the behavior during its initialization phase
+ * and after that all its attributes have been successfully parsed.
+ */
+static int
+seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ slwt_build_state_t build_func;
+ struct seg6_action_desc *desc;
+ int err = 0;
+
+ desc = slwt->desc;
+ if (!desc)
+ return -EINVAL;
+
+ build_func = desc->slwt_ops.build_state;
+ if (build_func)
+ err = build_func(slwt, cfg, extack);
+
+ return err;
+}
+
+/* call the custom destructor of the behavior which is invoked before the
+ * tunnel is going to be destroyed.
+ */
+static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
+{
+ slwt_destroy_state_t destroy_func;
+ struct seg6_action_desc *desc;
+
+ desc = slwt->desc;
+ if (!desc)
+ return;
+
+ destroy_func = desc->slwt_ops.destroy_state;
+ if (destroy_func)
+ destroy_func(slwt);
+}
+
static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
struct seg6_action_param *param;
@@ -1003,6 +1054,10 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla,
if (err < 0)
goto out_free;
+ err = seg6_local_lwtunnel_build_state(slwt, cfg, extack);
+ if (err < 0)
+ goto out_free;
+
newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
newts->headroom = slwt->headroom;
@@ -1021,6 +1076,8 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
{
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+ seg6_local_lwtunnel_destroy_state(slwt);
+
kfree(slwt->srh);
if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) {
--
2.20.1
Before this patch, a sniffer attached to a VRF used as the receiving
interface of L3 tunneled packets detects them as malformed packets and
it complains about that (i.e.: tcpdump shows bogus packets).
The reason is that a tunneled L3 packet does not carry any L2
information and when the VRF is set as the receiving interface of a
decapsulated L3 packet, no mac header is currently set or valid.
Therefore the purpose of this patch consists of adding a MAC header to
any packet which is directly received on the VRF interface ONLY IF:
i) a sniffer is attached on the VRF and ii) the mac header is not set.
In this case, the mac address of the VRF is copied in both the
destination and the source address of the ethernet header. The protocol
type is set either to IPv4 or IPv6, depending on which L3 packet is
received.
Signed-off-by: Andrea Mayer <[email protected]>
---
drivers/net/vrf.c | 78 +++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 72 insertions(+), 6 deletions(-)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 60c1aadece89..abb09c6036a6 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1263,6 +1263,61 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
skb_dst_set(skb, &rt6->dst);
}
+static int vrf_prepare_mac_header(struct sk_buff *skb,
+ struct net_device *vrf_dev, u16 proto)
+{
+ struct ethhdr *eth;
+ int err;
+
+ /* in general, we do not know if there is enough space in the head of
+ * the packet for hosting the mac header.
+ */
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(vrf_dev));
+ if (unlikely(err))
+ /* no space in the skb head */
+ return -ENOBUFS;
+
+ __skb_push(skb, ETH_HLEN);
+ eth = (struct ethhdr *)skb->data;
+
+ skb_reset_mac_header(skb);
+
+ /* we set the ethernet destination and the source addresses to the
+ * address of the VRF device.
+ */
+ ether_addr_copy(eth->h_dest, vrf_dev->dev_addr);
+ ether_addr_copy(eth->h_source, vrf_dev->dev_addr);
+ eth->h_proto = htons(proto);
+
+ /* the destination address of the Ethernet frame corresponds to the
+ * address set on the VRF interface; therefore, the packet is intended
+ * to be processed locally.
+ */
+ skb->protocol = eth->h_proto;
+ skb->pkt_type = PACKET_HOST;
+
+ skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+
+ skb_pull_inline(skb, ETH_HLEN);
+
+ return 0;
+}
+
+/* prepare and add the mac header to the packet if it was not previously set.
+ * In this way, packet sniffers such as tcpdump can parse the packet correctly.
+ * If the mac header was previously set, the original mac header is left
+ * untouched and the function returns immediately.
+ */
+static int vrf_add_mac_header_if_unset(struct sk_buff *skb,
+ struct net_device *vrf_dev,
+ u16 proto)
+{
+ if (skb_mac_header_was_set(skb))
+ return 0;
+
+ return vrf_prepare_mac_header(skb, vrf_dev, proto);
+}
+
static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
struct sk_buff *skb)
{
@@ -1289,9 +1344,15 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
skb->skb_iif = vrf_dev->ifindex;
if (!list_empty(&vrf_dev->ptype_all)) {
- skb_push(skb, skb->mac_len);
- dev_queue_xmit_nit(skb, vrf_dev);
- skb_pull(skb, skb->mac_len);
+ int err;
+
+ err = vrf_add_mac_header_if_unset(skb, vrf_dev,
+ ETH_P_IPV6);
+ if (likely(!err)) {
+ skb_push(skb, skb->mac_len);
+ dev_queue_xmit_nit(skb, vrf_dev);
+ skb_pull(skb, skb->mac_len);
+ }
}
IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
@@ -1334,9 +1395,14 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
vrf_rx_stats(vrf_dev, skb->len);
if (!list_empty(&vrf_dev->ptype_all)) {
- skb_push(skb, skb->mac_len);
- dev_queue_xmit_nit(skb, vrf_dev);
- skb_pull(skb, skb->mac_len);
+ int err;
+
+ err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP);
+ if (likely(!err)) {
+ skb_push(skb, skb->mac_len);
+ dev_queue_xmit_nit(skb, vrf_dev);
+ skb_pull(skb, skb->mac_len);
+ }
}
skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev);
--
2.20.1
SRv6 End.DT4 is defined in the SRv6 Network Programming [1].
The SRv6 End.DT4 is used to implement IPv4 L3VPN use-cases in
multi-tenants environments. It decapsulates the received packets and it
performs IPv4 routing lookup in the routing table of the tenant.
The SRv6 End.DT4 Linux implementation leverages a VRF device in order to
force the routing lookup into the associated routing table.
To make the End.DT4 work properly, it must be guaranteed that the routing
table used for routing lookup operations is bound to one and only one
VRF during the tunnel creation. Such constraint has to be enforced by
enabling the VRF strict_mode sysctl parameter, i.e:
$ sysctl -wq net.vrf.strict_mode=1.
At JANOG44, LINE corporation presented their multi-tenant DC architecture
using SRv6 [2]. In the slides, they reported that the Linux kernel is
missing the support of SRv6 End.DT4 behavior.
The iproute2 counterpart required for configuring the SRv6 End.DT4
behavior is already implemented along with the other supported SRv6
behaviors [3].
[1] https://tools.ietf.org/html/draft-ietf-spring-srv6-network-programming
[2] https://speakerdeck.com/line_developers/line-data-center-networking-with-srv6
[3] https://patchwork.ozlabs.org/patch/799837/
Signed-off-by: Andrea Mayer <[email protected]>
---
net/ipv6/seg6_local.c | 204 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 204 insertions(+)
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index f70687e1b8a9..d47b76581dfa 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -57,6 +57,12 @@ struct bpf_lwt_prog {
char *name;
};
+struct seg6_end_dt4_info {
+ struct net *net;
+ /* VRF device to which the End.DT4 is associated to */
+ int vrf_ifindex;
+};
+
struct seg6_local_lwt {
int action;
struct ipv6_sr_hdr *srh;
@@ -66,6 +72,7 @@ struct seg6_local_lwt {
int iif;
int oif;
struct bpf_lwt_prog bpf;
+ struct seg6_end_dt4_info dt4_info;
int headroom;
struct seg6_action_desc *desc;
@@ -413,6 +420,195 @@ static int input_action_end_dx4(struct sk_buff *skb,
return -EINVAL;
}
+#ifdef CONFIG_NET_L3_MASTER_DEV
+
+static inline struct net *
+fib6_config_get_net(const struct fib6_config *fib6_cfg)
+{
+ const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
+
+ return nli->nl_net;
+}
+
+static int srv6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct seg6_end_dt4_info *info = &slwt->dt4_info;
+ int vrf_ifindex;
+ struct net *net;
+
+ net = fib6_config_get_net(cfg);
+
+ vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
+ slwt->table);
+ if (vrf_ifindex < 0) {
+ if (vrf_ifindex == -EPERM) {
+ NL_SET_ERR_MSG(extack, "Strict Mode is disabled");
+ } else if (vrf_ifindex == -ENODEV) {
+ NL_SET_ERR_MSG(extack, "No such device");
+ } else {
+ NL_SET_ERR_MSG(extack, "Unknown error");
+
+ pr_debug("SRv6 End.DT4 creation error=%d\n",
+ vrf_ifindex);
+ }
+
+ return vrf_ifindex;
+ }
+
+ info->net = net;
+ info->vrf_ifindex = vrf_ifindex;
+
+ return 0;
+}
+
+/* In the SRv6 End.DT4 use case, we can receive traffic (IPv6+Segment Routing
+ * Header packets) from several interfaces and the IPv6 destination address (DA)
+ * is used for retrieving the specific instance of the End.DT4 behavior that
+ * should process the packets.
+ *
+ * The End.DT4 behavior extracts the inner (IPv4) packet and, on the basis of
+ * the associated VRF, it routes and forwards the IPv4 packet by looking at the
+ * specific table.
+ *
+ * However, the inner IPv4 packet is not really bound to any receiving interface
+ * and thus the End.DT4 sets the VRF as the *receiving* interface. In other
+ * words, the End.DT4 processes a packet as if it has been received directly
+ * by the VRF (and not by one of its slave devices, if any).
+ * In this way, the VRF interface is used for routing the IPv4 packet in
+ * according to the specific routing table.
+ *
+ * This design has allowed us to get some interesting features like:
+ * 1) the statistics on rx packets;
+ * 2) the possibility to install a packet sniffer on the receiving interface
+ * (the VRF one) to see the incoming packets;
+ * 3) the possibility to leverage the netfilter prerouting hook for the inner
+ * IPv4 packet.
+ *
+ * This function returns:
+ * - the skb buffer when the VRF rcv handler has processed correctly the
+ * packet;
+ * - NULL when the skb is consumed by the VRF rcv handler;
+ * - a pointer which encodes a negative error number in case of error.
+ * Note that in this case, the function takes care of freeing the skb.
+ */
+static struct sk_buff *end_dt4_vrf_rcv(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* based on l3mdev_ip_rcv; we are only interested in the master */
+ if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
+ goto drop;
+
+ if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
+ goto drop;
+
+ /* the decap packet (IPv4) does not come with any mac header info.
+ * We must unset the mac header to allow the VRF device to rebuild it,
+ * just in case there is a sniffer attached on the device.
+ */
+ skb_unset_mac_header(skb);
+
+ skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, AF_INET);
+ if (!skb)
+ /* the skb buffer was consumed by the handler */
+ return NULL;
+
+ /* when a packet is received by a VRF or by one of its slaves, the
+ * master device reference is set into the skb.
+ */
+ if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
+ goto drop;
+
+ return skb;
+
+drop:
+ kfree_skb(skb);
+ return ERR_PTR(-EINVAL);
+}
+
+static struct net_device *end_dt4_get_vrf_rcu(struct sk_buff *skb,
+ struct seg6_end_dt4_info *info)
+{
+ int vrf_ifindex = info->vrf_ifindex;
+ struct net *net = info->net;
+
+ if (unlikely(vrf_ifindex < 0))
+ goto error;
+
+ if (unlikely(!net_eq(dev_net(skb->dev), net)))
+ goto error;
+
+ return dev_get_by_index_rcu(net, vrf_ifindex);
+
+error:
+ return NULL;
+}
+
+static int input_action_end_dt4(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct net_device *vrf;
+ struct iphdr *iph;
+ int err;
+
+ if (!decap_and_validate(skb, IPPROTO_IPIP))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto drop;
+
+ vrf = end_dt4_get_vrf_rcu(skb, &slwt->dt4_info);
+ if (unlikely(!vrf))
+ goto drop;
+
+ skb->protocol = htons(ETH_P_IP);
+
+ skb_dst_drop(skb);
+
+ skb_set_transport_header(skb, sizeof(struct iphdr));
+
+ skb = end_dt4_vrf_rcv(skb, vrf);
+ if (!skb)
+ /* packet has been processed and consumed by the VRF */
+ return 0;
+
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ return err;
+ }
+
+ iph = ip_hdr(skb);
+
+ err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
+ if (err)
+ goto drop;
+
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+#else
+
+static int srv6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG(extack, "Operation is not supported");
+
+ return -EOPNOTSUPP;
+}
+
+static int input_action_end_dt4(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ kfree_skb(skb);
+ return -EOPNOTSUPP;
+}
+
+#endif
+
static int input_action_end_dt6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
@@ -601,6 +797,14 @@ static struct seg6_action_desc seg6_action_table[] = {
.attrs = (1 << SEG6_LOCAL_NH4),
.input = input_action_end_dx4,
},
+ {
+ .action = SEG6_LOCAL_ACTION_END_DT4,
+ .attrs = (1 << SEG6_LOCAL_TABLE),
+ .input = input_action_end_dt4,
+ .slwt_ops = {
+ .build_state = srv6_end_dt4_build,
+ },
+ },
{
.action = SEG6_LOCAL_ACTION_END_DT6,
.attrs = (1 << SEG6_LOCAL_TABLE),
--
2.20.1
this selftest is designed for evaluating the new SRv6 End.DT4 behavior
used, in this example, for implementing IPv4 L3 VPN use cases.
Signed-off-by: Andrea Mayer <[email protected]>
---
.../selftests/net/srv6_end_dt4_l3vpn_test.sh | 490 ++++++++++++++++++
1 file changed, 490 insertions(+)
create mode 100755 tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
new file mode 100755
index 000000000000..c04437667399
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -0,0 +1,490 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <[email protected]>
+
+# This test is designed for evaluating the new SRv6 End.DT4 behavior used for
+# implementing IPv4 L3 VPN use cases.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4 L3 VPN services allowing hosts to communicate with each
+# other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv4
+# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
+# IPv4 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
+# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
+# versa.
+#
+# Routers rt-1 and rt-2 implement the IPv4 L3 VPN services leveraging the SRv6
+# architecture. The key components for such L3 VPNs are: a) SRv6 Encap
+# behavior, b) SRv6 End.DT4 behavior and c) VRF.
+#
+# To explain how those L3 VPNs work, let us briefly consider an example where
+# the host hs-t100-1 pings the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-t100-1 sends an IPv4 packet destined to hs-t100-2, the
+# router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating the IPv4 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. Then, it is sent through the (IPv6) core network
+# up to the router rt-2 that receives it on veth0 interface.
+#
+# The router rt-2 makes use of the routing table 'localsid' for processing
+# IPv6+SRH packets. In this IPv4 L3 VPN scenario, the SRv6 End.DT4 behavior is
+# used for decapsulating the inner (IPv4) packets and for forwarding them to
+# the hosts.
+# The forwarding operation is carried out by using the routing table which has
+# been set during the configuration of a specific SRv6 End.DT4 behavior
+# instance.
+# In this example, an IPv4 packet destined to hs-t100-2 is forwarded to
+# the host using the routing table 100.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4 L3 VPN for tenant 200 works exactly as the IPv4 L3 VPN
+# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
+# connect with each other.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-t100-1 netns | | hs-t100-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | |
+# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
+# | | | table | | | | table | | |
+# | +----+----+ +----------+ | | +----------+ +----+----+ |
+# | | vrf-100 | | | | vrf-100 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | veth0 | | | | veth0 | |
+# | | fd00::1/64 |.|...|.| fd00::2/64 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | vrf-200 | | | | vrf-200 | |
+# | +----+----+ | | +----+----+ |
+# | | | | | |
+# | +---------------+ | | +---------------- |
+# | | veth-t200 | | | | veth-t200 | |
+# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | |
+# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
+# | . | | . |
+# +-----------------------------------+ +-----------------------------------+
+# . .
+# . .
+# . .
+# . .
+# +-------------------+ +-------------------+
+# | . | | . |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | |
+# | +-------------+ | | +-------------+ |
+# | | | |
+# | hs-t200-3 netns | | hs-t200-4 netns |
+# | | | |
+# +-------------------+ +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +-----------------------------------+
+# |SID |Action |
+# +-----------------------------------+
+# |fc00:21:100::6004|End.DT4 table 100|
+# +-----------------------------------+
+# |fc00:21:200::6004|End.DT4 table 200|
+# +-----------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +--------------------------------------------------+
+# |host |Action |
+# +--------------------------------------------------+
+# |10.0.0.2 |seg6 encap fc00:12:100::6004 dev veth0|
+# +--------------------------------------------------+
+# |10.0.0.0/24|forward dev veth_t100 |
+# +--------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +--------------------------------------------------+
+# |host |Action |
+# +--------------------------------------------------+
+# |10.0.0.4 |seg6 encap fc00:12:200::6004 dev veth0|
+# +--------------------------------------------------+
+# |10.0.0.0/24|forward dev veth_t200 |
+# +--------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +-----------------------------------+
+# |SID |Action |
+# +-----------------------------------+
+# |fc00:12:100::6004|End.DT4 table 100|
+# +-----------------------------------+
+# |fc00:12:200::6004|End.DT4 table 200|
+# +-----------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +--------------------------------------------------+
+# |host |Action |
+# +--------------------------------------------------+
+# |10.0.0.1 |seg6 encap fc00:21:100::6004 dev veth0|
+# +--------------------------------------------------+
+# |10.0.0.0/24|forward dev veth_t100 |
+# +--------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +--------------------------------------------------+
+# |host |Action |
+# +--------------------------------------------------+
+# |10.0.0.3 |seg6 encap fc00:21:200::6004 dev veth0|
+# +--------------------------------------------------+
+# |10.0.0.0/24|forward dev veth_t200 |
+# +--------------------------------------------------+
+#
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-t${tid}-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ # configure the VRF for the tenant X on the router which is directly
+ # connected to the source host.
+ ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+ ip -netns ${rtname} link set vrf-${tid} up
+
+ # enslave the veth-tX interface to the vrf-X in the access router
+ ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
+
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+ via fd00::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+ encap seg6local action End.DT4 table ${tid} dev vrf-${tid}
+
+ # all sids for VPNs start with a common locator which is fc00::/16.
+ # Routes for handling the SRv6 End.DT4 behavior instances are grouped
+ # together in the 'localsid' table.
+ #
+ # NOTE: added only once
+ if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+ grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+ ip -netns ${rtdst_name} -6 rule add \
+ to ${VPN_LOCATOR_SERVICE}::/16 \
+ lookup ${LOCALSID_TABLE_ID} prio 999
+ fi
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100 #args: host router tenant
+ setup_hs 2 2 100
+
+ # setup two hosts for the tenant 200
+ # - host hs-3 is directly connected to the router rt-1;
+ # - host hs-4 is directly connected to the router rt-2.
+ setup_hs 3 1 200
+ setup_hs 4 2 200
+
+ # setup the IPv4 L3 VPN which connects the host hs-t100-1 and host
+ # hs-t100-2 within the same tenant 100.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+
+ # setup the IPv4 L3 VPN which connects the host hs-t200-3 and host
+ # hs-t200-4 within the same tenant 200.
+ setup_vpn_config 3 1 4 2 200
+ setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+ >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ check_rt_connectivity ${rtsrc} ${rtdst}
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc=$1
+ local tidsrc=$2
+ local hsdst=$3
+ local tiddst=$4
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc=$1
+ local tid=$2
+
+ check_hs_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+}
+
+router_tests()
+{
+ log_section "IPv6 routers connectivity test"
+
+ check_and_log_rt_connectivity 1 2
+ check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+ log_section "IPv4 connectivity test among hosts and gateway"
+
+ check_and_log_hs2gw_connectivity 1 100
+ check_and_log_hs2gw_connectivity 2 100
+
+ check_and_log_hs2gw_connectivity 3 200
+ check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+
+ check_and_log_hs_connectivity 3 4 200
+ check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+ local i
+ local j
+ local k
+ local tmp
+ local l1="1 2"
+ local l2="3 4"
+ local t1=100
+ local t2=200
+
+ log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ tmp=${t1}; t1=${t2}; t2=${tmp}
+ done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+cleanup >/dev/null 2>&1
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup >/dev/null 2>&1
+
+exit ${ret}
--
2.20.1