This patch introduce a frer action to implement frame replication and
elimination for reliability, which is defined in IEEE P802.1CB.
There are two modes for frer action: generate and push the tag, recover
and pop the tag. frer tag has three types: RTAG, HSR, and PRP. This
patch only supports RTAG now.
User can push the tag on egress port of the talker device, recover and
pop the tag on ingress port of the listener device. When it's a relay
system, push the tag on ingress port, or set individual recover on
ingress port. Set the sequence recover on egress port.
Use action "mirred" to do split function, and use "vlan-modify" to do
active stream identification function on relay system.
Below is the setting example in user space:
push rtag on relay system:
> tc qdisc add dev swp0 clsact
> tc filter add dev swp0 ingress protocol 802.1Q flower \
skip_hw dst_mac 00:01:02:03:04:05 vlan_id 1 \
action frer rtag tag-action tag-push
split stream:
> tc filter add dev swp0 ingress protocol 802.1Q flower \
skip_hw dst_mac 00:01:02:03:04:05 vlan_id 1 \
action mirred egress mirror dev swp1
individual recover:
> tc filter add dev swp0 ingress protocol 802.1Q flower
skip_hw dst_mac 00:01:02:03:04:06 vlan_id 1 \
action frer rtag recover \
alg vector history-length 32 reset-time 10000
recover and pop rtag:
> tc filter add dev swp0 egress protocol 802.1Q flower
skip_hw dst_mac 00:01:02:03:04:06 vlan_id 1 \
action frer rtag recover \
alg vector history-length 32 reset-time 10000 \
tag-action tag-pop
Signed-off-by: Xiaoliang Yang <[email protected]>
---
include/net/flow_offload.h | 9 +
include/net/tc_act/tc_frer.h | 52 +++
include/uapi/linux/if_ether.h | 1 +
include/uapi/linux/pkt_cls.h | 1 +
include/uapi/linux/tc_act/tc_frer.h | 50 ++
net/sched/Kconfig | 13 +
net/sched/Makefile | 1 +
net/sched/act_frer.c | 695 ++++++++++++++++++++++++++++
net/sched/cls_api.c | 11 +
9 files changed, 833 insertions(+)
create mode 100644 include/net/tc_act/tc_frer.h
create mode 100644 include/uapi/linux/tc_act/tc_frer.h
create mode 100644 net/sched/act_frer.c
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 3961461d9c8b..cfa9b69cec69 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -148,6 +148,7 @@ enum flow_action_id {
FLOW_ACTION_MPLS_MANGLE,
FLOW_ACTION_GATE,
FLOW_ACTION_PPPOE_PUSH,
+ FLOW_ACTION_FRER,
NUM_FLOW_ACTIONS,
};
@@ -278,6 +279,14 @@ struct flow_action_entry {
struct { /* FLOW_ACTION_PPPOE_PUSH */
u16 sid;
} pppoe;
+ struct {
+ u8 tag_type;
+ u8 tag_action;
+ u8 recover;
+ u8 rcvy_alg;
+ u8 rcvy_history_len;
+ u8 rcvy_reset_msec;
+ } frer;
};
struct flow_action_cookie *cookie; /* user defined action cookie */
};
diff --git a/include/net/tc_act/tc_frer.h b/include/net/tc_act/tc_frer.h
new file mode 100644
index 000000000000..b2ad2b2a3fe1
--- /dev/null
+++ b/include/net/tc_act/tc_frer.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright 2021 NXP */
+
+#ifndef __NET_TC_FRER_H
+#define __NET_TC_FRER_H
+
+#include <net/act_api.h>
+#include <linux/tc_act/tc_frer.h>
+
+struct tcf_frer;
+
+struct tcf_frer_proto_ops {
+ int (*encode)(struct sk_buff *skb, struct tcf_frer *frer_act);
+ int (*decode)(struct sk_buff *skb);
+ void (*tag_pop)(struct sk_buff *skb, struct tcf_frer *frer_act);
+};
+
+struct tcf_frer {
+ struct tc_action common;
+ u8 tag_type;
+ u8 tag_action;
+ u8 recover;
+ u8 rcvy_alg;
+ u8 rcvy_history_len;
+ u64 rcvy_reset_msec;
+ u32 gen_seq_num;
+ u32 rcvy_seq_num;
+ u64 seq_space;
+ u32 seq_history;
+ bool take_any;
+ bool rcvy_take_noseq;
+ u32 cps_seq_rcvy_lost_pkts;
+ u32 cps_seq_rcvy_tagless_pkts;
+ u32 cps_seq_rcvy_out_of_order_pkts;
+ u32 cps_seq_rcvy_rogue_pkts;
+ u32 cps_seq_rcvy_resets;
+ struct hrtimer hrtimer;
+ const struct tcf_frer_proto_ops *proto_ops;
+};
+
+#define to_frer(a) ((struct tcf_frer *)a)
+
+static inline bool is_tcf_frer(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ if (a->ops && a->ops->id == TCA_ID_FRER)
+ return true;
+#endif
+ return false;
+}
+
+#endif
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 5f589c7a8382..812aa75f7f23 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -114,6 +114,7 @@
#define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
#define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
#define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */
+#define ETH_P_RTAG 0xF1C1 /* Redundancy Tag(IEEE 802.1CB) */
#define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
#define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 6836ccb9c45d..a3fc0c478a65 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -136,6 +136,7 @@ enum tca_id {
TCA_ID_MPLS,
TCA_ID_CT,
TCA_ID_GATE,
+ TCA_ID_FRER,
/* other actions go here */
__TCA_ID_MAX = 255
};
diff --git a/include/uapi/linux/tc_act/tc_frer.h b/include/uapi/linux/tc_act/tc_frer.h
new file mode 100644
index 000000000000..cd86274483e7
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_frer.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/* Copyright 2021 NXP */
+
+#ifndef __LINUX_TC_FRER_H
+#define __LINUX_TC_FRER_H
+
+#include <linux/pkt_cls.h>
+
+struct tc_frer {
+ tc_gen;
+};
+
+enum {
+ TCA_FRER_UNSPEC,
+ TCA_FRER_TM,
+ TCA_FRER_PARMS,
+ TCA_FRER_PAD,
+ TCA_FRER_TAG_TYPE,
+ TCA_FRER_TAG_ACTION,
+ TCA_FRER_RECOVER,
+ TCA_FRER_RECOVER_ALG,
+ TCA_FRER_RECOVER_HISTORY_LEN,
+ TCA_FRER_RECOVER_RESET_TM,
+ TCA_FRER_RECOVER_TAGLESS_PKTS,
+ TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS,
+ TCA_FRER_RECOVER_ROGUE_PKTS,
+ TCA_FRER_RECOVER_LOST_PKTS,
+ TCA_FRER_RECOVER_RESETS,
+ __TCA_FRER_MAX,
+};
+#define TCA_FRER_MAX (__TCA_FRER_MAX - 1)
+
+enum tc_frer_tag_action {
+ TCA_FRER_TAG_NULL,
+ TCA_FRER_TAG_PUSH,
+ TCA_FRER_TAG_POP,
+};
+
+enum tc_frer_tag_type {
+ TCA_FRER_TAG_RTAG,
+ TCA_FRER_TAG_HSR,
+ TCA_FRER_TAG_PRP,
+};
+
+enum tc_frer_rcvy_alg {
+ TCA_FRER_RCVY_VECTOR_ALG,
+ TCA_FRER_RCVY_MATCH_ALG,
+};
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 1e8ab4749c6c..93e2687042c2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -997,6 +997,19 @@ config NET_ACT_GATE
To compile this code as a module, choose M here: the
module will be called act_gate.
+config NET_ACT_FRER
+ tristate "Frame frer tc action"
+ depends on NET_CLS_ACT
+ help
+ Say Y here to support frame replication and elimination for
+ reliability, which is defined by IEEE 802.1CB.
+ This action allow to add a frer tag. It also allow to remove
+ the frer tag and drop repeat frames.
+
+ If unsure, say N.
+ To compile this code as a module, choose M here: the
+ module will be called act_frer.
+
config NET_IFE_SKBMARK
tristate "Support to encoding decoding skb mark on IFE action"
depends on NET_ACT_IFE
diff --git a/net/sched/Makefile b/net/sched/Makefile
index dd14ef413fda..69e7e94be567 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o
obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
obj-$(CONFIG_NET_ACT_CT) += act_ct.o
obj-$(CONFIG_NET_ACT_GATE) += act_gate.o
+obj-$(CONFIG_NET_ACT_FRER) += act_frer.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_frer.c b/net/sched/act_frer.c
new file mode 100644
index 000000000000..6f8ec5782d3d
--- /dev/null
+++ b/net/sched/act_frer.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright 2021 NXP */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <net/act_api.h>
+#include <net/netlink.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_frer.h>
+
+#define FRER_SEQ_SPACE 16
+#define FRER_RCVY_RESET_MSEC 100
+#define FRER_RCVY_INVALID_SEQ 0x100
+#define FRER_RCVY_PASSED 0
+#define FRER_RCVY_DISCARDED -1
+
+static unsigned int frer_net_id;
+static struct tc_action_ops act_frer_ops;
+
+struct r_tag {
+ __be16 reserved;
+ __be16 sequence_nr;
+ __be16 encap_proto;
+} __packed;
+
+struct rtag_ethhdr {
+ struct ethhdr ethhdr;
+ struct r_tag h_rtag;
+} __packed;
+
+struct rtag_vlan_ethhdr {
+ struct vlan_ethhdr vlanhdr;
+ struct r_tag h_rtag;
+} __packed;
+
+static const struct nla_policy frer_policy[TCA_FRER_MAX + 1] = {
+ [TCA_FRER_PARMS] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct tc_frer)),
+ [TCA_FRER_TAG_TYPE] = { .type = NLA_U8 },
+ [TCA_FRER_TAG_ACTION] = { .type = NLA_U8 },
+ [TCA_FRER_RECOVER] = { .type = NLA_U8 },
+ [TCA_FRER_RECOVER_ALG] = { .type = NLA_U8 },
+ [TCA_FRER_RECOVER_HISTORY_LEN] = { .type = NLA_U8 },
+ [TCA_FRER_RECOVER_RESET_TM] = { .type = NLA_U64 },
+};
+
+static void frer_seq_recovery_reset(struct tcf_frer *frer_act);
+
+static enum hrtimer_restart frer_hrtimer_func(struct hrtimer *timer)
+{
+ struct tcf_frer *frer_act = container_of(timer, struct tcf_frer,
+ hrtimer);
+ ktime_t remaining_tm;
+
+ frer_seq_recovery_reset(frer_act);
+
+ remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
+
+ hrtimer_forward(timer, timer->base->get_time(), remaining_tm);
+
+ return HRTIMER_RESTART;
+}
+
+static int frer_rtag_decode(struct sk_buff *skb)
+{
+ struct rtag_vlan_ethhdr *rtag_vlan_hdr;
+ struct rtag_ethhdr *rtag_hdr;
+ struct vlan_ethhdr *vlanhdr;
+ struct ethhdr *ethhdr;
+ struct r_tag *rtag;
+ bool is_vlan;
+ u16 sequence;
+ u16 proto;
+
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ proto = ethhdr->h_proto;
+ is_vlan = false;
+
+ if (proto == htons(ETH_P_8021Q)) {
+ vlanhdr = (struct vlan_ethhdr *)ethhdr;
+ proto = vlanhdr->h_vlan_encapsulated_proto;
+ is_vlan = true;
+ }
+
+ if (proto != htons(ETH_P_RTAG))
+ return FRER_RCVY_INVALID_SEQ;
+
+ if (is_vlan) {
+ rtag_vlan_hdr = (struct rtag_vlan_ethhdr *)ethhdr;
+ rtag = &rtag_vlan_hdr->h_rtag;
+ } else {
+ rtag_hdr = (struct rtag_ethhdr *)ethhdr;
+ rtag = &rtag_hdr->h_rtag;
+ }
+
+ sequence = ntohs(rtag->sequence_nr);
+
+ return sequence;
+}
+
+static int frer_seq_generation_alg(struct tcf_frer *frer_act)
+{
+ u32 gen_seq_max = frer_act->seq_space - 1;
+ u32 gen_seq_num = frer_act->gen_seq_num;
+ int sequence_number;
+
+ sequence_number = gen_seq_num;
+
+ if (gen_seq_num >= gen_seq_max)
+ gen_seq_num = 0;
+ else
+ gen_seq_num++;
+
+ frer_act->gen_seq_num = gen_seq_num;
+
+ return sequence_number;
+}
+
+static int frer_rtag_encode(struct sk_buff *skb, struct tcf_frer *frer_act)
+{
+ struct vlan_ethhdr *vlanhdr;
+ struct ethhdr *ethhdr;
+ struct r_tag *rtag;
+ int rtag_len, head_len;
+ unsigned char *dst, *src, *p;
+ __be16 *proto, proto_val;
+
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
+ vlanhdr = (struct vlan_ethhdr *)ethhdr;
+ p = (unsigned char *)(vlanhdr + 1);
+ proto = &vlanhdr->h_vlan_encapsulated_proto;
+ } else {
+ p = (unsigned char *)(ethhdr + 1);
+ proto = ðhdr->h_proto;
+ }
+
+ proto_val = *proto;
+ *proto = htons(ETH_P_RTAG);
+
+ src = skb_mac_header(skb);
+ head_len = p - src;
+
+ rtag_len = sizeof(struct r_tag);
+ if (skb_cow_head(skb, rtag_len) < 0)
+ return -ENOMEM;
+
+ skb_push(skb, rtag_len);
+ skb->mac_header -= rtag_len;
+
+ dst = skb_mac_header(skb);
+ memmove(dst, src, head_len);
+
+ rtag = (struct r_tag *)(dst + head_len);
+ rtag->encap_proto = proto_val;
+ rtag->sequence_nr = htons(frer_act->gen_seq_num);
+ rtag->reserved = 0;
+
+ return 0;
+}
+
+static void frer_rtag_pop(struct sk_buff *skb, struct tcf_frer *frer_act)
+{
+ struct vlan_ethhdr *vlanhdr;
+ struct ethhdr *ethhdr;
+ struct r_tag *rtag;
+ int rtag_len, head_len;
+ unsigned char *dst, *src, *p;
+ __be16 *proto;
+
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+ if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
+ vlanhdr = (struct vlan_ethhdr *)ethhdr;
+ p = (unsigned char *)(vlanhdr + 1);
+ proto = &vlanhdr->h_vlan_encapsulated_proto;
+ } else {
+ p = (unsigned char *)(ethhdr + 1);
+ proto = ðhdr->h_proto;
+ }
+
+ if (*proto != htons(ETH_P_RTAG))
+ return;
+
+ rtag = (struct r_tag *)p;
+ rtag_len = sizeof(struct r_tag);
+ *proto = rtag->encap_proto;
+
+ src = skb_mac_header(skb);
+ head_len = p - src;
+
+ skb->data = skb_mac_header(skb);
+ skb_pull(skb, rtag_len);
+
+ skb_reset_mac_header(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ skb->csum_start += rtag_len;
+
+ dst = skb_mac_header(skb);
+ memmove(dst, src, head_len);
+}
+
+static const struct tcf_frer_proto_ops rtag_ops = {
+ .encode = frer_rtag_encode,
+ .decode = frer_rtag_decode,
+ .tag_pop = frer_rtag_pop,
+};
+
+static int tcf_frer_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a,
+ int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, frer_net_id);
+ struct nlattr *tb[TCA_FRER_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
+ struct tcf_frer *frer_act;
+ struct tc_frer *parm;
+ int ret = 0, err, index;
+ ktime_t remaining_tm;
+
+ if (!nla)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_FRER_MAX, nla, frer_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_FRER_PARMS])
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_FRER_PARMS]);
+ index = parm->index;
+
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
+ if (err < 0)
+ return err;
+
+ if (err && bind)
+ return 0;
+
+ if (!err) {
+ ret = tcf_idr_create(tn, index, est, a,
+ &act_frer_ops, bind, false, 0);
+
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+ }
+ } else if (!ovr) {
+ tcf_idr_release(*a, bind);
+ return -EEXIST;
+ }
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ frer_act = to_frer(*a);
+
+ spin_lock_bh(&frer_act->tcf_lock);
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+
+ frer_act->tag_type = nla_get_u8(tb[TCA_FRER_TAG_TYPE]);
+ frer_act->tag_action = nla_get_u8(tb[TCA_FRER_TAG_ACTION]);
+ frer_act->recover = nla_get_u8(tb[TCA_FRER_RECOVER]);
+ frer_act->rcvy_alg = nla_get_u8(tb[TCA_FRER_RECOVER_ALG]);
+ frer_act->rcvy_history_len = nla_get_u8(tb[TCA_FRER_RECOVER_HISTORY_LEN]);
+ frer_act->rcvy_reset_msec = nla_get_u64(tb[TCA_FRER_RECOVER_RESET_TM]);
+
+ frer_act->gen_seq_num = 0;
+ frer_act->seq_space = 1 << FRER_SEQ_SPACE;
+ frer_act->rcvy_seq_num = 0;
+ frer_act->seq_history = 0xFFFFFFFF;
+ frer_act->rcvy_take_noseq = true;
+
+ switch (frer_act->tag_type) {
+ case TCA_FRER_TAG_RTAG:
+ frer_act->proto_ops = &rtag_ops;
+ break;
+ case TCA_FRER_TAG_HSR:
+ case TCA_FRER_TAG_PRP:
+ default:
+ spin_unlock_bh(&frer_act->tcf_lock);
+ return -EOPNOTSUPP;
+ }
+
+ if (frer_act->recover && frer_act->rcvy_reset_msec) {
+ hrtimer_init(&frer_act->hrtimer, CLOCK_TAI,
+ HRTIMER_MODE_REL_SOFT);
+ frer_act->hrtimer.function = frer_hrtimer_func;
+
+ remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
+ hrtimer_start(&frer_act->hrtimer, remaining_tm,
+ HRTIMER_MODE_REL_SOFT);
+ }
+
+ spin_unlock_bh(&frer_act->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+ return ret;
+
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
+}
+
+static void frer_seq_recovery_reset(struct tcf_frer *frer_act)
+{
+ spin_lock(&frer_act->tcf_lock);
+ if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG) {
+ frer_act->rcvy_seq_num = frer_act->seq_space - 1;
+ frer_act->seq_history = 0;
+ }
+ frer_act->cps_seq_rcvy_resets++;
+ frer_act->take_any = true;
+ spin_unlock(&frer_act->tcf_lock);
+}
+
+static void frer_shift_seq_history(int value, struct tcf_frer *frer_act)
+{
+ int history_len = frer_act->rcvy_history_len;
+
+ if ((frer_act->seq_history & BIT(history_len - 1)) == 0)
+ frer_act->cps_seq_rcvy_lost_pkts++;
+
+ frer_act->seq_history <<= 1;
+
+ if (value)
+ frer_act->seq_history |= BIT(0);
+}
+
+static int frer_vector_rcvy_alg(struct tcf_frer *frer_act, int sequence,
+ bool individual)
+{
+ struct hrtimer *timer = &frer_act->hrtimer;
+ bool reset_timer = false;
+ ktime_t remaining_tm;
+ int delta, ret;
+
+ if (sequence == FRER_RCVY_INVALID_SEQ) {
+ frer_act->cps_seq_rcvy_tagless_pkts++;
+ if (frer_act->rcvy_take_noseq) {
+ reset_timer = true;
+ ret = FRER_RCVY_PASSED;
+ goto out;
+ } else {
+ return FRER_RCVY_DISCARDED;
+ }
+ }
+
+ delta = (sequence - frer_act->rcvy_seq_num) & (frer_act->seq_space - 1);
+ /* -(RecovSeqSpace/2) <= delta <= ((RecovSeqSpace/2)-1) */
+ if (delta & (frer_act->seq_space / 2))
+ delta -= frer_act->seq_space;
+
+ if (frer_act->take_any) {
+ frer_act->take_any = false;
+ frer_act->seq_history |= BIT(0);
+ frer_act->rcvy_seq_num = sequence;
+
+ reset_timer = true;
+ ret = FRER_RCVY_PASSED;
+ goto out;
+ }
+
+ if (delta >= frer_act->rcvy_history_len ||
+ delta <= -frer_act->rcvy_history_len) {
+ /* Packet is out-of-range. */
+ frer_act->cps_seq_rcvy_rogue_pkts++;
+
+ if (individual)
+ reset_timer = true;
+
+ ret = FRER_RCVY_DISCARDED;
+ goto out;
+ } else if (delta <= 0) {
+ /* Packet is old and in SequenceHistory. */
+ if (frer_act->seq_history & BIT(-delta)) {
+ if (individual)
+ reset_timer = true;
+
+ /* Packet has been seen. */
+ ret = FRER_RCVY_DISCARDED;
+ goto out;
+ } else {
+ /* Packet has not been seen. */
+ frer_act->seq_history |= BIT(-delta);
+ frer_act->cps_seq_rcvy_out_of_order_pkts++;
+
+ reset_timer = true;
+ ret = FRER_RCVY_PASSED;
+ goto out;
+ }
+ } else {
+ /* Packet is not too far ahead of the one we want. */
+ if (delta != 1)
+ frer_act->cps_seq_rcvy_out_of_order_pkts++;
+
+ while (--delta)
+ frer_shift_seq_history(0, frer_act);
+ frer_shift_seq_history(1, frer_act);
+ frer_act->rcvy_seq_num = sequence;
+
+ reset_timer = true;
+ ret = FRER_RCVY_PASSED;
+ goto out;
+ }
+out:
+ if (reset_timer && frer_act->rcvy_reset_msec) {
+ remaining_tm =
+ (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
+ hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT);
+ }
+
+ return ret;
+}
+
+static int frer_match_rcvy_alg(struct tcf_frer *frer_act, int sequence,
+ bool individual)
+{
+ struct hrtimer *timer = &frer_act->hrtimer;
+ bool reset_timer = false;
+ ktime_t remaining_tm;
+ int delta, ret;
+
+ if (sequence == FRER_RCVY_INVALID_SEQ) {
+ frer_act->cps_seq_rcvy_tagless_pkts++;
+
+ return FRER_RCVY_PASSED;
+ }
+
+ if (frer_act->take_any) {
+ frer_act->take_any = false;
+ frer_act->rcvy_seq_num = sequence;
+
+ reset_timer = true;
+ ret = FRER_RCVY_PASSED;
+ goto out;
+ }
+
+ delta = sequence - frer_act->rcvy_seq_num;
+ if (delta) {
+ /* Packet has not been seen, accept it. */
+ if (delta != 1)
+ frer_act->cps_seq_rcvy_out_of_order_pkts++;
+
+ frer_act->rcvy_seq_num = sequence;
+
+ reset_timer = true;
+ ret = FRER_RCVY_PASSED;
+ goto out;
+ } else {
+ if (individual)
+ reset_timer = true;
+
+ /* Packet has been seen. Do not forward. */
+ ret = FRER_RCVY_DISCARDED;
+ goto out;
+ }
+
+out:
+ if (reset_timer && frer_act->rcvy_reset_msec) {
+ remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
+ hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT);
+ }
+
+ return ret;
+}
+
+static int tcf_frer_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_frer *frer_act = to_frer(a);
+ bool ingress, individual;
+ int ret, retval;
+ int sequence;
+
+ tcf_lastuse_update(&frer_act->tcf_tm);
+ tcf_action_update_bstats(&frer_act->common, skb);
+
+ retval = READ_ONCE(frer_act->tcf_action);
+
+ sequence = frer_act->proto_ops->decode(skb);
+
+ ingress = skb_at_tc_ingress(skb);
+ individual = ingress;
+
+ if (frer_act->recover) {
+ spin_lock(&frer_act->tcf_lock);
+
+ if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG)
+ ret = frer_vector_rcvy_alg(frer_act, sequence,
+ individual);
+ else
+ ret = frer_match_rcvy_alg(frer_act, sequence,
+ individual);
+ if (ret) {
+ frer_act->tcf_qstats.drops++;
+ retval = TC_ACT_SHOT;
+ }
+
+ if (frer_act->tag_action == TCA_FRER_TAG_POP)
+ frer_act->proto_ops->tag_pop(skb, frer_act);
+
+ spin_unlock(&frer_act->tcf_lock);
+
+ return retval;
+ }
+
+ if (frer_act->tag_action == TCA_FRER_TAG_PUSH &&
+ sequence == FRER_RCVY_INVALID_SEQ) {
+ spin_lock(&frer_act->tcf_lock);
+
+ frer_seq_generation_alg(frer_act);
+
+ frer_act->proto_ops->encode(skb, frer_act);
+
+ spin_unlock(&frer_act->tcf_lock);
+ }
+
+ return retval;
+}
+
+static int tcf_frer_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_frer *frer_act = to_frer(a);
+ struct tc_frer opt = {
+ .index = frer_act->tcf_index,
+ .refcnt = refcount_read(&frer_act->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&frer_act->tcf_bindcnt) - bind,
+ };
+ struct tcf_t t;
+
+ spin_lock_bh(&frer_act->tcf_lock);
+ opt.action = frer_act->tcf_action;
+
+ if (nla_put(skb, TCA_FRER_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, TCA_FRER_TAG_TYPE, frer_act->tag_type))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, TCA_FRER_TAG_ACTION, frer_act->tag_action))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, TCA_FRER_RECOVER, frer_act->recover))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, TCA_FRER_RECOVER_ALG, frer_act->rcvy_alg))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, TCA_FRER_RECOVER_HISTORY_LEN,
+ frer_act->rcvy_history_len))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_FRER_RECOVER_RESET_TM,
+ frer_act->rcvy_reset_msec, TCA_FRER_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_FRER_RECOVER_TAGLESS_PKTS,
+ frer_act->cps_seq_rcvy_tagless_pkts))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS,
+ frer_act->cps_seq_rcvy_out_of_order_pkts))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_FRER_RECOVER_ROGUE_PKTS,
+ frer_act->cps_seq_rcvy_rogue_pkts))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_FRER_RECOVER_LOST_PKTS,
+ frer_act->cps_seq_rcvy_lost_pkts))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_FRER_RECOVER_RESETS,
+ frer_act->cps_seq_rcvy_resets))
+ goto nla_put_failure;
+
+ tcf_tm_dump(&t, &frer_act->tcf_tm);
+ if (nla_put_64bit(skb, TCA_FRER_TM, sizeof(t),
+ &t, TCA_FRER_PAD))
+ goto nla_put_failure;
+ spin_unlock_bh(&frer_act->tcf_lock);
+
+ return skb->len;
+
+nla_put_failure:
+ spin_unlock_bh(&frer_act->tcf_lock);
+ nlmsg_trim(skb, b);
+
+ return -1;
+}
+
+static int tcf_frer_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, frer_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static int tcf_frer_search(struct net *net, struct tc_action **a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, frer_net_id);
+
+ return tcf_idr_search(tn, a, index);
+}
+
+static void tcf_frer_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
+{
+ struct tcf_frer *frer_act = to_frer(a);
+ struct tcf_t *tm = &frer_act->tcf_tm;
+
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+}
+
+static void tcf_frer_cleanup(struct tc_action *a)
+{
+ struct tcf_frer *frer_act = to_frer(a);
+
+ if (frer_act->rcvy_reset_msec)
+ hrtimer_cancel(&frer_act->hrtimer);
+}
+
+static size_t tcf_frer_get_fill_size(const struct tc_action *act)
+{
+ return nla_total_size(sizeof(struct tc_frer));
+}
+
+static struct tc_action_ops act_frer_ops = {
+ .kind = "frer",
+ .id = TCA_ID_FRER,
+ .owner = THIS_MODULE,
+ .act = tcf_frer_act,
+ .init = tcf_frer_init,
+ .cleanup = tcf_frer_cleanup,
+ .dump = tcf_frer_dump,
+ .walk = tcf_frer_walker,
+ .stats_update = tcf_frer_stats_update,
+ .get_fill_size = tcf_frer_get_fill_size,
+ .lookup = tcf_frer_search,
+ .size = sizeof(struct tcf_frer),
+};
+
+static __net_init int frer_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, frer_net_id);
+
+ return tc_action_net_init(net, tn, &act_frer_ops);
+}
+
+static void __net_exit frer_exit_net(struct list_head *net_list)
+{
+ tc_action_net_exit(net_list, frer_net_id);
+};
+
+static struct pernet_operations frer_net_ops = {
+ .init = frer_init_net,
+ .exit_batch = frer_exit_net,
+ .id = &frer_net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+static int __init frer_init_module(void)
+{
+ return tcf_register_action(&act_frer_ops, &frer_net_ops);
+}
+
+static void __exit frer_cleanup_module(void)
+{
+ tcf_unregister_action(&act_frer_ops, &frer_net_ops);
+}
+
+module_init(frer_init_module);
+module_exit(frer_cleanup_module);
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2ef8f5a6205a..353184987427 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -39,6 +39,7 @@
#include <net/tc_act/tc_ct.h>
#include <net/tc_act/tc_mpls.h>
#include <net/tc_act/tc_gate.h>
+#include <net/tc_act/tc_frer.h>
#include <net/flow_offload.h>
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -3706,6 +3707,16 @@ int tc_setup_flow_action(struct flow_action *flow_action,
err = tcf_gate_get_entries(entry, act);
if (err)
goto err_out_locked;
+ } else if (is_tcf_frer(act)) {
+ entry->id = FLOW_ACTION_FRER;
+ entry->frer.tag_type = to_frer(act)->tag_type;
+ entry->frer.tag_action = to_frer(act)->tag_action;
+ entry->frer.recover = to_frer(act)->recover;
+ entry->frer.rcvy_alg = to_frer(act)->rcvy_alg;
+ entry->frer.rcvy_history_len =
+ to_frer(act)->rcvy_history_len;
+ entry->frer.rcvy_reset_msec =
+ to_frer(act)->rcvy_reset_msec;
} else {
err = -EOPNOTSUPP;
goto err_out_locked;
--
2.17.1
Hi,
Xiaoliang Yang <[email protected]> writes:
> This patch introduce a frer action to implement frame replication and
> elimination for reliability, which is defined in IEEE P802.1CB.
>
An action seems, to me, a bit too limiting/fine grained for a frame
replication and elimination feature.
At least I want to hear the reasons that the current hsr/prp support
cannot be extended to support one more tag format/protocol.
And the current name for the spec is IEEE 802.1CB-2017.
> There are two modes for frer action: generate and push the tag, recover
> and pop the tag. frer tag has three types: RTAG, HSR, and PRP. This
> patch only supports RTAG now.
>
> User can push the tag on egress port of the talker device, recover and
> pop the tag on ingress port of the listener device. When it's a relay
> system, push the tag on ingress port, or set individual recover on
> ingress port. Set the sequence recover on egress port.
>
> Use action "mirred" to do split function, and use "vlan-modify" to do
> active stream identification function on relay system.
>
> Below is the setting example in user space:
> push rtag on relay system:
> > tc qdisc add dev swp0 clsact
> > tc filter add dev swp0 ingress protocol 802.1Q flower \
> skip_hw dst_mac 00:01:02:03:04:05 vlan_id 1 \
> action frer rtag tag-action tag-push
>
> split stream:
> > tc filter add dev swp0 ingress protocol 802.1Q flower \
> skip_hw dst_mac 00:01:02:03:04:05 vlan_id 1 \
> action mirred egress mirror dev swp1
>
> individual recover:
> > tc filter add dev swp0 ingress protocol 802.1Q flower
> skip_hw dst_mac 00:01:02:03:04:06 vlan_id 1 \
> action frer rtag recover \
> alg vector history-length 32 reset-time 10000
>
> recover and pop rtag:
> > tc filter add dev swp0 egress protocol 802.1Q flower
> skip_hw dst_mac 00:01:02:03:04:06 vlan_id 1 \
> action frer rtag recover \
> alg vector history-length 32 reset-time 10000 \
> tag-action tag-pop
>
> Signed-off-by: Xiaoliang Yang <[email protected]>
> ---
> include/net/flow_offload.h | 9 +
> include/net/tc_act/tc_frer.h | 52 +++
> include/uapi/linux/if_ether.h | 1 +
> include/uapi/linux/pkt_cls.h | 1 +
> include/uapi/linux/tc_act/tc_frer.h | 50 ++
> net/sched/Kconfig | 13 +
> net/sched/Makefile | 1 +
> net/sched/act_frer.c | 695 ++++++++++++++++++++++++++++
> net/sched/cls_api.c | 11 +
> 9 files changed, 833 insertions(+)
> create mode 100644 include/net/tc_act/tc_frer.h
> create mode 100644 include/uapi/linux/tc_act/tc_frer.h
> create mode 100644 net/sched/act_frer.c
>
> diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
> index 3961461d9c8b..cfa9b69cec69 100644
> --- a/include/net/flow_offload.h
> +++ b/include/net/flow_offload.h
> @@ -148,6 +148,7 @@ enum flow_action_id {
> FLOW_ACTION_MPLS_MANGLE,
> FLOW_ACTION_GATE,
> FLOW_ACTION_PPPOE_PUSH,
> + FLOW_ACTION_FRER,
> NUM_FLOW_ACTIONS,
> };
>
> @@ -278,6 +279,14 @@ struct flow_action_entry {
> struct { /* FLOW_ACTION_PPPOE_PUSH */
> u16 sid;
> } pppoe;
> + struct {
> + u8 tag_type;
> + u8 tag_action;
> + u8 recover;
> + u8 rcvy_alg;
> + u8 rcvy_history_len;
> + u8 rcvy_reset_msec;
> + } frer;
> };
> struct flow_action_cookie *cookie; /* user defined action cookie */
> };
> diff --git a/include/net/tc_act/tc_frer.h b/include/net/tc_act/tc_frer.h
> new file mode 100644
> index 000000000000..b2ad2b2a3fe1
> --- /dev/null
> +++ b/include/net/tc_act/tc_frer.h
> @@ -0,0 +1,52 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/* Copyright 2021 NXP */
> +
> +#ifndef __NET_TC_FRER_H
> +#define __NET_TC_FRER_H
> +
> +#include <net/act_api.h>
> +#include <linux/tc_act/tc_frer.h>
> +
> +struct tcf_frer;
> +
> +struct tcf_frer_proto_ops {
> + int (*encode)(struct sk_buff *skb, struct tcf_frer *frer_act);
> + int (*decode)(struct sk_buff *skb);
> + void (*tag_pop)(struct sk_buff *skb, struct tcf_frer *frer_act);
> +};
> +
> +struct tcf_frer {
> + struct tc_action common;
> + u8 tag_type;
> + u8 tag_action;
> + u8 recover;
> + u8 rcvy_alg;
> + u8 rcvy_history_len;
> + u64 rcvy_reset_msec;
> + u32 gen_seq_num;
> + u32 rcvy_seq_num;
> + u64 seq_space;
> + u32 seq_history;
> + bool take_any;
> + bool rcvy_take_noseq;
> + u32 cps_seq_rcvy_lost_pkts;
> + u32 cps_seq_rcvy_tagless_pkts;
> + u32 cps_seq_rcvy_out_of_order_pkts;
> + u32 cps_seq_rcvy_rogue_pkts;
> + u32 cps_seq_rcvy_resets;
> + struct hrtimer hrtimer;
> + const struct tcf_frer_proto_ops *proto_ops;
> +};
> +
> +#define to_frer(a) ((struct tcf_frer *)a)
> +
> +static inline bool is_tcf_frer(const struct tc_action *a)
> +{
> +#ifdef CONFIG_NET_CLS_ACT
> + if (a->ops && a->ops->id == TCA_ID_FRER)
> + return true;
> +#endif
> + return false;
> +}
> +
> +#endif
> diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
> index 5f589c7a8382..812aa75f7f23 100644
> --- a/include/uapi/linux/if_ether.h
> +++ b/include/uapi/linux/if_ether.h
> @@ -114,6 +114,7 @@
> #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
> #define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
> #define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */
> +#define ETH_P_RTAG 0xF1C1 /* Redundancy Tag(IEEE 802.1CB) */
> #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
>
> #define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value
> diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
> index 6836ccb9c45d..a3fc0c478a65 100644
> --- a/include/uapi/linux/pkt_cls.h
> +++ b/include/uapi/linux/pkt_cls.h
> @@ -136,6 +136,7 @@ enum tca_id {
> TCA_ID_MPLS,
> TCA_ID_CT,
> TCA_ID_GATE,
> + TCA_ID_FRER,
> /* other actions go here */
> __TCA_ID_MAX = 255
> };
> diff --git a/include/uapi/linux/tc_act/tc_frer.h b/include/uapi/linux/tc_act/tc_frer.h
> new file mode 100644
> index 000000000000..cd86274483e7
> --- /dev/null
> +++ b/include/uapi/linux/tc_act/tc_frer.h
> @@ -0,0 +1,50 @@
> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> +/* Copyright 2021 NXP */
> +
> +#ifndef __LINUX_TC_FRER_H
> +#define __LINUX_TC_FRER_H
> +
> +#include <linux/pkt_cls.h>
> +
> +struct tc_frer {
> + tc_gen;
> +};
> +
> +enum {
> + TCA_FRER_UNSPEC,
> + TCA_FRER_TM,
> + TCA_FRER_PARMS,
> + TCA_FRER_PAD,
> + TCA_FRER_TAG_TYPE,
> + TCA_FRER_TAG_ACTION,
> + TCA_FRER_RECOVER,
> + TCA_FRER_RECOVER_ALG,
> + TCA_FRER_RECOVER_HISTORY_LEN,
> + TCA_FRER_RECOVER_RESET_TM,
> + TCA_FRER_RECOVER_TAGLESS_PKTS,
> + TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS,
> + TCA_FRER_RECOVER_ROGUE_PKTS,
> + TCA_FRER_RECOVER_LOST_PKTS,
> + TCA_FRER_RECOVER_RESETS,
> + __TCA_FRER_MAX,
> +};
> +#define TCA_FRER_MAX (__TCA_FRER_MAX - 1)
> +
> +enum tc_frer_tag_action {
> + TCA_FRER_TAG_NULL,
> + TCA_FRER_TAG_PUSH,
> + TCA_FRER_TAG_POP,
> +};
> +
> +enum tc_frer_tag_type {
> + TCA_FRER_TAG_RTAG,
> + TCA_FRER_TAG_HSR,
> + TCA_FRER_TAG_PRP,
> +};
> +
> +enum tc_frer_rcvy_alg {
> + TCA_FRER_RCVY_VECTOR_ALG,
> + TCA_FRER_RCVY_MATCH_ALG,
> +};
> +
> +#endif
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index 1e8ab4749c6c..93e2687042c2 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -997,6 +997,19 @@ config NET_ACT_GATE
> To compile this code as a module, choose M here: the
> module will be called act_gate.
>
> +config NET_ACT_FRER
> + tristate "Frame frer tc action"
> + depends on NET_CLS_ACT
> + help
> + Say Y here to support frame replication and elimination for
> + reliability, which is defined by IEEE 802.1CB.
> + This action allow to add a frer tag. It also allow to remove
> + the frer tag and drop repeat frames.
> +
> + If unsure, say N.
> + To compile this code as a module, choose M here: the
> + module will be called act_frer.
> +
> config NET_IFE_SKBMARK
> tristate "Support to encoding decoding skb mark on IFE action"
> depends on NET_ACT_IFE
> diff --git a/net/sched/Makefile b/net/sched/Makefile
> index dd14ef413fda..69e7e94be567 100644
> --- a/net/sched/Makefile
> +++ b/net/sched/Makefile
> @@ -32,6 +32,7 @@ obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o
> obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
> obj-$(CONFIG_NET_ACT_CT) += act_ct.o
> obj-$(CONFIG_NET_ACT_GATE) += act_gate.o
> +obj-$(CONFIG_NET_ACT_FRER) += act_frer.o
> obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
> obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
> obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
> diff --git a/net/sched/act_frer.c b/net/sched/act_frer.c
> new file mode 100644
> index 000000000000..6f8ec5782d3d
> --- /dev/null
> +++ b/net/sched/act_frer.c
> @@ -0,0 +1,695 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/* Copyright 2021 NXP */
> +
> +#include <linux/module.h>
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/string.h>
> +#include <linux/errno.h>
> +#include <linux/skbuff.h>
> +#include <linux/rtnetlink.h>
> +#include <linux/init.h>
> +#include <linux/slab.h>
> +#include <net/act_api.h>
> +#include <net/netlink.h>
> +#include <net/pkt_cls.h>
> +#include <net/tc_act/tc_frer.h>
> +
> +#define FRER_SEQ_SPACE 16
> +#define FRER_RCVY_RESET_MSEC 100
> +#define FRER_RCVY_INVALID_SEQ 0x100
> +#define FRER_RCVY_PASSED 0
> +#define FRER_RCVY_DISCARDED -1
> +
> +static unsigned int frer_net_id;
> +static struct tc_action_ops act_frer_ops;
> +
> +struct r_tag {
> + __be16 reserved;
> + __be16 sequence_nr;
> + __be16 encap_proto;
> +} __packed;
> +
> +struct rtag_ethhdr {
> + struct ethhdr ethhdr;
> + struct r_tag h_rtag;
> +} __packed;
> +
> +struct rtag_vlan_ethhdr {
> + struct vlan_ethhdr vlanhdr;
> + struct r_tag h_rtag;
> +} __packed;
> +
> +static const struct nla_policy frer_policy[TCA_FRER_MAX + 1] = {
> + [TCA_FRER_PARMS] =
> + NLA_POLICY_EXACT_LEN(sizeof(struct tc_frer)),
> + [TCA_FRER_TAG_TYPE] = { .type = NLA_U8 },
> + [TCA_FRER_TAG_ACTION] = { .type = NLA_U8 },
> + [TCA_FRER_RECOVER] = { .type = NLA_U8 },
> + [TCA_FRER_RECOVER_ALG] = { .type = NLA_U8 },
> + [TCA_FRER_RECOVER_HISTORY_LEN] = { .type = NLA_U8 },
> + [TCA_FRER_RECOVER_RESET_TM] = { .type = NLA_U64 },
> +};
> +
> +static void frer_seq_recovery_reset(struct tcf_frer *frer_act);
> +
> +static enum hrtimer_restart frer_hrtimer_func(struct hrtimer *timer)
> +{
> + struct tcf_frer *frer_act = container_of(timer, struct tcf_frer,
> + hrtimer);
> + ktime_t remaining_tm;
> +
> + frer_seq_recovery_reset(frer_act);
> +
> + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
> +
> + hrtimer_forward(timer, timer->base->get_time(), remaining_tm);
> +
> + return HRTIMER_RESTART;
> +}
> +
> +static int frer_rtag_decode(struct sk_buff *skb)
> +{
> + struct rtag_vlan_ethhdr *rtag_vlan_hdr;
> + struct rtag_ethhdr *rtag_hdr;
> + struct vlan_ethhdr *vlanhdr;
> + struct ethhdr *ethhdr;
> + struct r_tag *rtag;
> + bool is_vlan;
> + u16 sequence;
> + u16 proto;
> +
> + ethhdr = (struct ethhdr *)skb_mac_header(skb);
> + proto = ethhdr->h_proto;
> + is_vlan = false;
> +
> + if (proto == htons(ETH_P_8021Q)) {
> + vlanhdr = (struct vlan_ethhdr *)ethhdr;
> + proto = vlanhdr->h_vlan_encapsulated_proto;
> + is_vlan = true;
> + }
> +
> + if (proto != htons(ETH_P_RTAG))
> + return FRER_RCVY_INVALID_SEQ;
> +
> + if (is_vlan) {
> + rtag_vlan_hdr = (struct rtag_vlan_ethhdr *)ethhdr;
> + rtag = &rtag_vlan_hdr->h_rtag;
> + } else {
> + rtag_hdr = (struct rtag_ethhdr *)ethhdr;
> + rtag = &rtag_hdr->h_rtag;
> + }
> +
> + sequence = ntohs(rtag->sequence_nr);
> +
> + return sequence;
> +}
> +
> +static int frer_seq_generation_alg(struct tcf_frer *frer_act)
> +{
> + u32 gen_seq_max = frer_act->seq_space - 1;
> + u32 gen_seq_num = frer_act->gen_seq_num;
> + int sequence_number;
> +
> + sequence_number = gen_seq_num;
> +
> + if (gen_seq_num >= gen_seq_max)
> + gen_seq_num = 0;
> + else
> + gen_seq_num++;
> +
> + frer_act->gen_seq_num = gen_seq_num;
> +
> + return sequence_number;
> +}
> +
> +static int frer_rtag_encode(struct sk_buff *skb, struct tcf_frer *frer_act)
> +{
> + struct vlan_ethhdr *vlanhdr;
> + struct ethhdr *ethhdr;
> + struct r_tag *rtag;
> + int rtag_len, head_len;
> + unsigned char *dst, *src, *p;
> + __be16 *proto, proto_val;
> +
> + ethhdr = (struct ethhdr *)skb_mac_header(skb);
> + if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
> + vlanhdr = (struct vlan_ethhdr *)ethhdr;
> + p = (unsigned char *)(vlanhdr + 1);
> + proto = &vlanhdr->h_vlan_encapsulated_proto;
> + } else {
> + p = (unsigned char *)(ethhdr + 1);
> + proto = ðhdr->h_proto;
> + }
> +
> + proto_val = *proto;
> + *proto = htons(ETH_P_RTAG);
> +
> + src = skb_mac_header(skb);
> + head_len = p - src;
> +
> + rtag_len = sizeof(struct r_tag);
> + if (skb_cow_head(skb, rtag_len) < 0)
> + return -ENOMEM;
> +
> + skb_push(skb, rtag_len);
> + skb->mac_header -= rtag_len;
> +
> + dst = skb_mac_header(skb);
> + memmove(dst, src, head_len);
> +
> + rtag = (struct r_tag *)(dst + head_len);
> + rtag->encap_proto = proto_val;
> + rtag->sequence_nr = htons(frer_act->gen_seq_num);
> + rtag->reserved = 0;
> +
> + return 0;
> +}
> +
> +static void frer_rtag_pop(struct sk_buff *skb, struct tcf_frer *frer_act)
> +{
> + struct vlan_ethhdr *vlanhdr;
> + struct ethhdr *ethhdr;
> + struct r_tag *rtag;
> + int rtag_len, head_len;
> + unsigned char *dst, *src, *p;
> + __be16 *proto;
> +
> + ethhdr = (struct ethhdr *)skb_mac_header(skb);
> +
> + if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
> + vlanhdr = (struct vlan_ethhdr *)ethhdr;
> + p = (unsigned char *)(vlanhdr + 1);
> + proto = &vlanhdr->h_vlan_encapsulated_proto;
> + } else {
> + p = (unsigned char *)(ethhdr + 1);
> + proto = ðhdr->h_proto;
> + }
> +
> + if (*proto != htons(ETH_P_RTAG))
> + return;
> +
> + rtag = (struct r_tag *)p;
> + rtag_len = sizeof(struct r_tag);
> + *proto = rtag->encap_proto;
> +
> + src = skb_mac_header(skb);
> + head_len = p - src;
> +
> + skb->data = skb_mac_header(skb);
> + skb_pull(skb, rtag_len);
> +
> + skb_reset_mac_header(skb);
> +
> + if (skb->ip_summed == CHECKSUM_PARTIAL)
> + skb->csum_start += rtag_len;
> +
> + dst = skb_mac_header(skb);
> + memmove(dst, src, head_len);
> +}
> +
> +static const struct tcf_frer_proto_ops rtag_ops = {
> + .encode = frer_rtag_encode,
> + .decode = frer_rtag_decode,
> + .tag_pop = frer_rtag_pop,
> +};
> +
> +static int tcf_frer_init(struct net *net, struct nlattr *nla,
> + struct nlattr *est, struct tc_action **a,
> + int ovr, int bind, bool rtnl_held,
> + struct tcf_proto *tp, u32 flags,
> + struct netlink_ext_ack *extack)
> +{
> + struct tc_action_net *tn = net_generic(net, frer_net_id);
> + struct nlattr *tb[TCA_FRER_MAX + 1];
> + struct tcf_chain *goto_ch = NULL;
> + struct tcf_frer *frer_act;
> + struct tc_frer *parm;
> + int ret = 0, err, index;
> + ktime_t remaining_tm;
> +
> + if (!nla)
> + return -EINVAL;
> +
> + err = nla_parse_nested(tb, TCA_FRER_MAX, nla, frer_policy, extack);
> + if (err < 0)
> + return err;
> +
> + if (!tb[TCA_FRER_PARMS])
> + return -EINVAL;
> +
> + parm = nla_data(tb[TCA_FRER_PARMS]);
> + index = parm->index;
> +
> + err = tcf_idr_check_alloc(tn, &index, a, bind);
> + if (err < 0)
> + return err;
> +
> + if (err && bind)
> + return 0;
> +
> + if (!err) {
> + ret = tcf_idr_create(tn, index, est, a,
> + &act_frer_ops, bind, false, 0);
> +
> + if (ret) {
> + tcf_idr_cleanup(tn, index);
> + return ret;
> + }
> + } else if (!ovr) {
> + tcf_idr_release(*a, bind);
> + return -EEXIST;
> + }
> +
> + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
> + if (err < 0)
> + goto release_idr;
> +
> + frer_act = to_frer(*a);
> +
> + spin_lock_bh(&frer_act->tcf_lock);
> + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
> +
> + frer_act->tag_type = nla_get_u8(tb[TCA_FRER_TAG_TYPE]);
> + frer_act->tag_action = nla_get_u8(tb[TCA_FRER_TAG_ACTION]);
> + frer_act->recover = nla_get_u8(tb[TCA_FRER_RECOVER]);
> + frer_act->rcvy_alg = nla_get_u8(tb[TCA_FRER_RECOVER_ALG]);
> + frer_act->rcvy_history_len = nla_get_u8(tb[TCA_FRER_RECOVER_HISTORY_LEN]);
> + frer_act->rcvy_reset_msec = nla_get_u64(tb[TCA_FRER_RECOVER_RESET_TM]);
> +
> + frer_act->gen_seq_num = 0;
> + frer_act->seq_space = 1 << FRER_SEQ_SPACE;
> + frer_act->rcvy_seq_num = 0;
> + frer_act->seq_history = 0xFFFFFFFF;
> + frer_act->rcvy_take_noseq = true;
> +
> + switch (frer_act->tag_type) {
> + case TCA_FRER_TAG_RTAG:
> + frer_act->proto_ops = &rtag_ops;
> + break;
> + case TCA_FRER_TAG_HSR:
> + case TCA_FRER_TAG_PRP:
> + default:
> + spin_unlock_bh(&frer_act->tcf_lock);
> + return -EOPNOTSUPP;
> + }
> +
> + if (frer_act->recover && frer_act->rcvy_reset_msec) {
> + hrtimer_init(&frer_act->hrtimer, CLOCK_TAI,
> + HRTIMER_MODE_REL_SOFT);
> + frer_act->hrtimer.function = frer_hrtimer_func;
> +
> + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
> + hrtimer_start(&frer_act->hrtimer, remaining_tm,
> + HRTIMER_MODE_REL_SOFT);
> + }
> +
> + spin_unlock_bh(&frer_act->tcf_lock);
> +
> + if (goto_ch)
> + tcf_chain_put_by_act(goto_ch);
> +
> + return ret;
> +
> +release_idr:
> + tcf_idr_release(*a, bind);
> + return err;
> +}
> +
> +static void frer_seq_recovery_reset(struct tcf_frer *frer_act)
> +{
> + spin_lock(&frer_act->tcf_lock);
> + if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG) {
> + frer_act->rcvy_seq_num = frer_act->seq_space - 1;
> + frer_act->seq_history = 0;
> + }
> + frer_act->cps_seq_rcvy_resets++;
> + frer_act->take_any = true;
> + spin_unlock(&frer_act->tcf_lock);
> +}
> +
> +static void frer_shift_seq_history(int value, struct tcf_frer *frer_act)
> +{
> + int history_len = frer_act->rcvy_history_len;
> +
> + if ((frer_act->seq_history & BIT(history_len - 1)) == 0)
> + frer_act->cps_seq_rcvy_lost_pkts++;
> +
> + frer_act->seq_history <<= 1;
> +
> + if (value)
> + frer_act->seq_history |= BIT(0);
> +}
> +
> +static int frer_vector_rcvy_alg(struct tcf_frer *frer_act, int sequence,
> + bool individual)
> +{
> + struct hrtimer *timer = &frer_act->hrtimer;
> + bool reset_timer = false;
> + ktime_t remaining_tm;
> + int delta, ret;
> +
> + if (sequence == FRER_RCVY_INVALID_SEQ) {
> + frer_act->cps_seq_rcvy_tagless_pkts++;
> + if (frer_act->rcvy_take_noseq) {
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + } else {
> + return FRER_RCVY_DISCARDED;
> + }
> + }
> +
> + delta = (sequence - frer_act->rcvy_seq_num) & (frer_act->seq_space - 1);
> + /* -(RecovSeqSpace/2) <= delta <= ((RecovSeqSpace/2)-1) */
> + if (delta & (frer_act->seq_space / 2))
> + delta -= frer_act->seq_space;
> +
> + if (frer_act->take_any) {
> + frer_act->take_any = false;
> + frer_act->seq_history |= BIT(0);
> + frer_act->rcvy_seq_num = sequence;
> +
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + }
> +
> + if (delta >= frer_act->rcvy_history_len ||
> + delta <= -frer_act->rcvy_history_len) {
> + /* Packet is out-of-range. */
> + frer_act->cps_seq_rcvy_rogue_pkts++;
> +
> + if (individual)
> + reset_timer = true;
> +
> + ret = FRER_RCVY_DISCARDED;
> + goto out;
> + } else if (delta <= 0) {
> + /* Packet is old and in SequenceHistory. */
> + if (frer_act->seq_history & BIT(-delta)) {
> + if (individual)
> + reset_timer = true;
> +
> + /* Packet has been seen. */
> + ret = FRER_RCVY_DISCARDED;
> + goto out;
> + } else {
> + /* Packet has not been seen. */
> + frer_act->seq_history |= BIT(-delta);
> + frer_act->cps_seq_rcvy_out_of_order_pkts++;
> +
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + }
> + } else {
> + /* Packet is not too far ahead of the one we want. */
> + if (delta != 1)
> + frer_act->cps_seq_rcvy_out_of_order_pkts++;
> +
> + while (--delta)
> + frer_shift_seq_history(0, frer_act);
> + frer_shift_seq_history(1, frer_act);
> + frer_act->rcvy_seq_num = sequence;
> +
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + }
> +out:
> + if (reset_timer && frer_act->rcvy_reset_msec) {
> + remaining_tm =
> + (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
> + hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT);
> + }
> +
> + return ret;
> +}
> +
> +static int frer_match_rcvy_alg(struct tcf_frer *frer_act, int sequence,
> + bool individual)
> +{
> + struct hrtimer *timer = &frer_act->hrtimer;
> + bool reset_timer = false;
> + ktime_t remaining_tm;
> + int delta, ret;
> +
> + if (sequence == FRER_RCVY_INVALID_SEQ) {
> + frer_act->cps_seq_rcvy_tagless_pkts++;
> +
> + return FRER_RCVY_PASSED;
> + }
> +
> + if (frer_act->take_any) {
> + frer_act->take_any = false;
> + frer_act->rcvy_seq_num = sequence;
> +
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + }
> +
> + delta = sequence - frer_act->rcvy_seq_num;
> + if (delta) {
> + /* Packet has not been seen, accept it. */
> + if (delta != 1)
> + frer_act->cps_seq_rcvy_out_of_order_pkts++;
> +
> + frer_act->rcvy_seq_num = sequence;
> +
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + } else {
> + if (individual)
> + reset_timer = true;
> +
> + /* Packet has been seen. Do not forward. */
> + ret = FRER_RCVY_DISCARDED;
> + goto out;
> + }
> +
> +out:
> + if (reset_timer && frer_act->rcvy_reset_msec) {
> + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
> + hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT);
> + }
> +
> + return ret;
> +}
> +
> +static int tcf_frer_act(struct sk_buff *skb, const struct tc_action *a,
> + struct tcf_result *res)
> +{
> + struct tcf_frer *frer_act = to_frer(a);
> + bool ingress, individual;
> + int ret, retval;
> + int sequence;
> +
> + tcf_lastuse_update(&frer_act->tcf_tm);
> + tcf_action_update_bstats(&frer_act->common, skb);
> +
> + retval = READ_ONCE(frer_act->tcf_action);
> +
> + sequence = frer_act->proto_ops->decode(skb);
> +
> + ingress = skb_at_tc_ingress(skb);
> + individual = ingress;
> +
> + if (frer_act->recover) {
> + spin_lock(&frer_act->tcf_lock);
> +
> + if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG)
> + ret = frer_vector_rcvy_alg(frer_act, sequence,
> + individual);
> + else
> + ret = frer_match_rcvy_alg(frer_act, sequence,
> + individual);
> + if (ret) {
> + frer_act->tcf_qstats.drops++;
> + retval = TC_ACT_SHOT;
> + }
> +
> + if (frer_act->tag_action == TCA_FRER_TAG_POP)
> + frer_act->proto_ops->tag_pop(skb, frer_act);
> +
> + spin_unlock(&frer_act->tcf_lock);
> +
> + return retval;
> + }
> +
> + if (frer_act->tag_action == TCA_FRER_TAG_PUSH &&
> + sequence == FRER_RCVY_INVALID_SEQ) {
> + spin_lock(&frer_act->tcf_lock);
> +
> + frer_seq_generation_alg(frer_act);
> +
> + frer_act->proto_ops->encode(skb, frer_act);
> +
> + spin_unlock(&frer_act->tcf_lock);
> + }
> +
> + return retval;
> +}
> +
> +static int tcf_frer_dump(struct sk_buff *skb, struct tc_action *a,
> + int bind, int ref)
> +{
> + unsigned char *b = skb_tail_pointer(skb);
> + struct tcf_frer *frer_act = to_frer(a);
> + struct tc_frer opt = {
> + .index = frer_act->tcf_index,
> + .refcnt = refcount_read(&frer_act->tcf_refcnt) - ref,
> + .bindcnt = atomic_read(&frer_act->tcf_bindcnt) - bind,
> + };
> + struct tcf_t t;
> +
> + spin_lock_bh(&frer_act->tcf_lock);
> + opt.action = frer_act->tcf_action;
> +
> + if (nla_put(skb, TCA_FRER_PARMS, sizeof(opt), &opt))
> + goto nla_put_failure;
> +
> + if (nla_put_u8(skb, TCA_FRER_TAG_TYPE, frer_act->tag_type))
> + goto nla_put_failure;
> +
> + if (nla_put_u8(skb, TCA_FRER_TAG_ACTION, frer_act->tag_action))
> + goto nla_put_failure;
> +
> + if (nla_put_u8(skb, TCA_FRER_RECOVER, frer_act->recover))
> + goto nla_put_failure;
> +
> + if (nla_put_u8(skb, TCA_FRER_RECOVER_ALG, frer_act->rcvy_alg))
> + goto nla_put_failure;
> +
> + if (nla_put_u8(skb, TCA_FRER_RECOVER_HISTORY_LEN,
> + frer_act->rcvy_history_len))
> + goto nla_put_failure;
> +
> + if (nla_put_u64_64bit(skb, TCA_FRER_RECOVER_RESET_TM,
> + frer_act->rcvy_reset_msec, TCA_FRER_PAD))
> + goto nla_put_failure;
> +
> + if (nla_put_u32(skb, TCA_FRER_RECOVER_TAGLESS_PKTS,
> + frer_act->cps_seq_rcvy_tagless_pkts))
> + goto nla_put_failure;
> +
> + if (nla_put_u32(skb, TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS,
> + frer_act->cps_seq_rcvy_out_of_order_pkts))
> + goto nla_put_failure;
> +
> + if (nla_put_u32(skb, TCA_FRER_RECOVER_ROGUE_PKTS,
> + frer_act->cps_seq_rcvy_rogue_pkts))
> + goto nla_put_failure;
> +
> + if (nla_put_u32(skb, TCA_FRER_RECOVER_LOST_PKTS,
> + frer_act->cps_seq_rcvy_lost_pkts))
> + goto nla_put_failure;
> +
> + if (nla_put_u32(skb, TCA_FRER_RECOVER_RESETS,
> + frer_act->cps_seq_rcvy_resets))
> + goto nla_put_failure;
> +
> + tcf_tm_dump(&t, &frer_act->tcf_tm);
> + if (nla_put_64bit(skb, TCA_FRER_TM, sizeof(t),
> + &t, TCA_FRER_PAD))
> + goto nla_put_failure;
> + spin_unlock_bh(&frer_act->tcf_lock);
> +
> + return skb->len;
> +
> +nla_put_failure:
> + spin_unlock_bh(&frer_act->tcf_lock);
> + nlmsg_trim(skb, b);
> +
> + return -1;
> +}
> +
> +static int tcf_frer_walker(struct net *net, struct sk_buff *skb,
> + struct netlink_callback *cb, int type,
> + const struct tc_action_ops *ops,
> + struct netlink_ext_ack *extack)
> +{
> + struct tc_action_net *tn = net_generic(net, frer_net_id);
> +
> + return tcf_generic_walker(tn, skb, cb, type, ops, extack);
> +}
> +
> +static int tcf_frer_search(struct net *net, struct tc_action **a, u32 index)
> +{
> + struct tc_action_net *tn = net_generic(net, frer_net_id);
> +
> + return tcf_idr_search(tn, a, index);
> +}
> +
> +static void tcf_frer_stats_update(struct tc_action *a, u64 bytes, u64 packets,
> + u64 drops, u64 lastuse, bool hw)
> +{
> + struct tcf_frer *frer_act = to_frer(a);
> + struct tcf_t *tm = &frer_act->tcf_tm;
> +
> + tcf_action_update_stats(a, bytes, packets, drops, hw);
> + tm->lastuse = max_t(u64, tm->lastuse, lastuse);
> +}
> +
> +static void tcf_frer_cleanup(struct tc_action *a)
> +{
> + struct tcf_frer *frer_act = to_frer(a);
> +
> + if (frer_act->rcvy_reset_msec)
> + hrtimer_cancel(&frer_act->hrtimer);
> +}
> +
> +static size_t tcf_frer_get_fill_size(const struct tc_action *act)
> +{
> + return nla_total_size(sizeof(struct tc_frer));
> +}
> +
> +static struct tc_action_ops act_frer_ops = {
> + .kind = "frer",
> + .id = TCA_ID_FRER,
> + .owner = THIS_MODULE,
> + .act = tcf_frer_act,
> + .init = tcf_frer_init,
> + .cleanup = tcf_frer_cleanup,
> + .dump = tcf_frer_dump,
> + .walk = tcf_frer_walker,
> + .stats_update = tcf_frer_stats_update,
> + .get_fill_size = tcf_frer_get_fill_size,
> + .lookup = tcf_frer_search,
> + .size = sizeof(struct tcf_frer),
> +};
> +
> +static __net_init int frer_init_net(struct net *net)
> +{
> + struct tc_action_net *tn = net_generic(net, frer_net_id);
> +
> + return tc_action_net_init(net, tn, &act_frer_ops);
> +}
> +
> +static void __net_exit frer_exit_net(struct list_head *net_list)
> +{
> + tc_action_net_exit(net_list, frer_net_id);
> +};
> +
> +static struct pernet_operations frer_net_ops = {
> + .init = frer_init_net,
> + .exit_batch = frer_exit_net,
> + .id = &frer_net_id,
> + .size = sizeof(struct tc_action_net),
> +};
> +
> +static int __init frer_init_module(void)
> +{
> + return tcf_register_action(&act_frer_ops, &frer_net_ops);
> +}
> +
> +static void __exit frer_cleanup_module(void)
> +{
> + tcf_unregister_action(&act_frer_ops, &frer_net_ops);
> +}
> +
> +module_init(frer_init_module);
> +module_exit(frer_cleanup_module);
> +MODULE_LICENSE("GPL v2");
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index 2ef8f5a6205a..353184987427 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -39,6 +39,7 @@
> #include <net/tc_act/tc_ct.h>
> #include <net/tc_act/tc_mpls.h>
> #include <net/tc_act/tc_gate.h>
> +#include <net/tc_act/tc_frer.h>
> #include <net/flow_offload.h>
>
> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
> @@ -3706,6 +3707,16 @@ int tc_setup_flow_action(struct flow_action *flow_action,
> err = tcf_gate_get_entries(entry, act);
> if (err)
> goto err_out_locked;
> + } else if (is_tcf_frer(act)) {
> + entry->id = FLOW_ACTION_FRER;
> + entry->frer.tag_type = to_frer(act)->tag_type;
> + entry->frer.tag_action = to_frer(act)->tag_action;
> + entry->frer.recover = to_frer(act)->recover;
> + entry->frer.rcvy_alg = to_frer(act)->rcvy_alg;
> + entry->frer.rcvy_history_len =
> + to_frer(act)->rcvy_history_len;
> + entry->frer.rcvy_reset_msec =
> + to_frer(act)->rcvy_reset_msec;
> } else {
> err = -EOPNOTSUPP;
> goto err_out_locked;
> --
> 2.17.1
>
--
Vinicius
Hi Vinicius,
On Sep 29, 2021 at 6:35:59 +0000, Vinicius Costa Gomes wrote:
> > This patch introduce a frer action to implement frame replication and
> > elimination for reliability, which is defined in IEEE P802.1CB.
> >
>
> An action seems, to me, a bit too limiting/fine grained for a frame replication
> and elimination feature.
>
> At least I want to hear the reasons that the current hsr/prp support cannot be
> extended to support one more tag format/protocol.
>
> And the current name for the spec is IEEE 802.1CB-2017.
>
802.1CB can be set on bridge ports, and need to use bridge forward
Function as a relay system. It only works on identified streams,
unrecognized flows still need to pass through the bridged network
normally.
But current hsr/prp seems only support two ports, and cannot use the
ports in bridge. It's hard to implement FRER functions on current HSR
driver.
You can see chapter "D.2 Example 2: Various stack positions" in IEEE 802.1CB-2017,
Protocol stack for relay system is like follows:
Stream Transfer Function
| |
| Sequence generation
| Sequence encode/decode
Stream identification Active Stream identification
| |
| Internal LAN---- Relay system forwarding
| | |
MAC MAC MAC
Use port actions to easily implement FRER tag add/delete, split, and
recover functions.
Current HSR/PRP driver can be used for port HSR/PRP set, and tc-frer
Action to be used for stream RTAG/HSR/PRP set and recover.
Thanks,
Xiaoliang
Hi Vinicius,
On Wed, Sep 29, 2021 at 10:25:58AM +0000, Xiaoliang Yang wrote:
> Hi Vinicius,
>
> On Sep 29, 2021 at 6:35:59 +0000, Vinicius Costa Gomes wrote:
> > > This patch introduce a frer action to implement frame replication and
> > > elimination for reliability, which is defined in IEEE P802.1CB.
> > >
> >
> > An action seems, to me, a bit too limiting/fine grained for a frame replication
> > and elimination feature.
> >
> > At least I want to hear the reasons that the current hsr/prp support cannot be
> > extended to support one more tag format/protocol.
> >
> > And the current name for the spec is IEEE 802.1CB-2017.
> >
> 802.1CB can be set on bridge ports, and need to use bridge forward
> Function as a relay system. It only works on identified streams,
> unrecognized flows still need to pass through the bridged network
> normally.
>
> But current hsr/prp seems only support two ports, and cannot use the
> ports in bridge. It's hard to implement FRER functions on current HSR
> driver.
>
> You can see chapter "D.2 Example 2: Various stack positions" in IEEE 802.1CB-2017,
> Protocol stack for relay system is like follows:
>
> Stream Transfer Function
> | |
> | Sequence generation
> | Sequence encode/decode
> Stream identification Active Stream identification
> | |
> | Internal LAN---- Relay system forwarding
> | | |
> MAC MAC MAC
>
> Use port actions to easily implement FRER tag add/delete, split, and
> recover functions.
>
> Current HSR/PRP driver can be used for port HSR/PRP set, and tc-frer
> Action to be used for stream RTAG/HSR/PRP set and recover.
Did Xiaoliang answer your question satisfactorily? :)
Xiaoliang Yang <[email protected]> writes:
> Hi Vinicius,
>
> On Sep 29, 2021 at 6:35:59 +0000, Vinicius Costa Gomes wrote:
>> > This patch introduce a frer action to implement frame replication and
>> > elimination for reliability, which is defined in IEEE P802.1CB.
>> >
>>
>> An action seems, to me, a bit too limiting/fine grained for a frame replication
>> and elimination feature.
>>
>> At least I want to hear the reasons that the current hsr/prp support cannot be
>> extended to support one more tag format/protocol.
>>
>> And the current name for the spec is IEEE 802.1CB-2017.
>>
> 802.1CB can be set on bridge ports, and need to use bridge forward
> Function as a relay system. It only works on identified streams,
> unrecognized flows still need to pass through the bridged network
> normally.
This ("only on identified streams") is the strongest argument so far to
have FRER also as an action, in adition to the current hsr netdevice
approach.
>
> But current hsr/prp seems only support two ports, and cannot use the
> ports in bridge. It's hard to implement FRER functions on current HSR
> driver.
That the hsr netdevice only support two ports, I think is more a bug
than a design issue. Which will need to get fixed at some point.
Speaking of functions, one thing that might be interesting is trying to
see if it makes sense to make part of the current hsr functionality a
"library" so it can be used by tc-frer as well. (less duplication of
bugs).
>
> You can see chapter "D.2 Example 2: Various stack positions" in IEEE 802.1CB-2017,
> Protocol stack for relay system is like follows:
>
> Stream Transfer Function
> | |
> | Sequence generation
> | Sequence encode/decode
> Stream identification Active Stream identification
> | |
> | Internal LAN---- Relay system forwarding
> | | |
> MAC MAC MAC
>
> Use port actions to easily implement FRER tag add/delete, split, and
> recover functions.
>
> Current HSR/PRP driver can be used for port HSR/PRP set, and tc-frer
> Action to be used for stream RTAG/HSR/PRP set and recover.
I am still reading the spec and trying to imagine how things would fit
together:
- for which use cases tc-frer would be useful;
- for which use cases the hsr netdevice would be useful;
- would it make sense to have them in the same system?
>
> Thanks,
> Xiaoliang
Cheers,
--
Vinicius
Xiaoliang Yang <[email protected]> writes:
> This patch introduce a frer action to implement frame replication and
> elimination for reliability, which is defined in IEEE P802.1CB.
>
> There are two modes for frer action: generate and push the tag, recover
> and pop the tag. frer tag has three types: RTAG, HSR, and PRP. This
> patch only supports RTAG now.
>
> User can push the tag on egress port of the talker device, recover and
> pop the tag on ingress port of the listener device. When it's a relay
> system, push the tag on ingress port, or set individual recover on
> ingress port. Set the sequence recover on egress port.
>
> Use action "mirred" to do split function, and use "vlan-modify" to do
> active stream identification function on relay system.
>
> Below is the setting example in user space:
> push rtag on relay system:
> > tc qdisc add dev swp0 clsact
> > tc filter add dev swp0 ingress protocol 802.1Q flower \
> skip_hw dst_mac 00:01:02:03:04:05 vlan_id 1 \
> action frer rtag tag-action tag-push
>
> split stream:
> > tc filter add dev swp0 ingress protocol 802.1Q flower \
> skip_hw dst_mac 00:01:02:03:04:05 vlan_id 1 \
> action mirred egress mirror dev swp1
>
> individual recover:
> > tc filter add dev swp0 ingress protocol 802.1Q flower
> skip_hw dst_mac 00:01:02:03:04:06 vlan_id 1 \
> action frer rtag recover \
> alg vector history-length 32 reset-time 10000
>
> recover and pop rtag:
> > tc filter add dev swp0 egress protocol 802.1Q flower
> skip_hw dst_mac 00:01:02:03:04:06 vlan_id 1 \
> action frer rtag recover \
> alg vector history-length 32 reset-time 10000 \
> tag-action tag-pop
>
> Signed-off-by: Xiaoliang Yang <[email protected]>
> ---
> include/net/flow_offload.h | 9 +
> include/net/tc_act/tc_frer.h | 52 +++
> include/uapi/linux/if_ether.h | 1 +
> include/uapi/linux/pkt_cls.h | 1 +
> include/uapi/linux/tc_act/tc_frer.h | 50 ++
> net/sched/Kconfig | 13 +
> net/sched/Makefile | 1 +
> net/sched/act_frer.c | 695 ++++++++++++++++++++++++++++
> net/sched/cls_api.c | 11 +
> 9 files changed, 833 insertions(+)
> create mode 100644 include/net/tc_act/tc_frer.h
> create mode 100644 include/uapi/linux/tc_act/tc_frer.h
> create mode 100644 net/sched/act_frer.c
>
> diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
> index 3961461d9c8b..cfa9b69cec69 100644
> --- a/include/net/flow_offload.h
> +++ b/include/net/flow_offload.h
> @@ -148,6 +148,7 @@ enum flow_action_id {
> FLOW_ACTION_MPLS_MANGLE,
> FLOW_ACTION_GATE,
> FLOW_ACTION_PPPOE_PUSH,
> + FLOW_ACTION_FRER,
> NUM_FLOW_ACTIONS,
> };
>
> @@ -278,6 +279,14 @@ struct flow_action_entry {
> struct { /* FLOW_ACTION_PPPOE_PUSH */
> u16 sid;
> } pppoe;
> + struct {
> + u8 tag_type;
> + u8 tag_action;
> + u8 recover;
> + u8 rcvy_alg;
> + u8 rcvy_history_len;
> + u8 rcvy_reset_msec;
Optional: it wasn't clear until I took a closer look at the code that
"rcvy" means "recovery" in this context. Perhaps write "recovery" in
full would make it clearer?
> + } frer;
> };
> struct flow_action_cookie *cookie; /* user defined action cookie */
> };
> diff --git a/include/net/tc_act/tc_frer.h b/include/net/tc_act/tc_frer.h
> new file mode 100644
> index 000000000000..b2ad2b2a3fe1
> --- /dev/null
> +++ b/include/net/tc_act/tc_frer.h
> @@ -0,0 +1,52 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/* Copyright 2021 NXP */
> +
> +#ifndef __NET_TC_FRER_H
> +#define __NET_TC_FRER_H
> +
> +#include <net/act_api.h>
> +#include <linux/tc_act/tc_frer.h>
> +
> +struct tcf_frer;
> +
> +struct tcf_frer_proto_ops {
> + int (*encode)(struct sk_buff *skb, struct tcf_frer *frer_act);
> + int (*decode)(struct sk_buff *skb);
> + void (*tag_pop)(struct sk_buff *skb, struct tcf_frer *frer_act);
> +};
> +
> +struct tcf_frer {
> + struct tc_action common;
> + u8 tag_type;
> + u8 tag_action;
> + u8 recover;
> + u8 rcvy_alg;
> + u8 rcvy_history_len;
> + u64 rcvy_reset_msec;
> + u32 gen_seq_num;
> + u32 rcvy_seq_num;
> + u64 seq_space;
> + u32 seq_history;
> + bool take_any;
> + bool rcvy_take_noseq;
> + u32 cps_seq_rcvy_lost_pkts;
> + u32 cps_seq_rcvy_tagless_pkts;
> + u32 cps_seq_rcvy_out_of_order_pkts;
> + u32 cps_seq_rcvy_rogue_pkts;
> + u32 cps_seq_rcvy_resets;
> + struct hrtimer hrtimer;
> + const struct tcf_frer_proto_ops *proto_ops;
> +};
> +
> +#define to_frer(a) ((struct tcf_frer *)a)
> +
> +static inline bool is_tcf_frer(const struct tc_action *a)
> +{
> +#ifdef CONFIG_NET_CLS_ACT
> + if (a->ops && a->ops->id == TCA_ID_FRER)
> + return true;
> +#endif
> + return false;
> +}
> +
> +#endif
> diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
> index 5f589c7a8382..812aa75f7f23 100644
> --- a/include/uapi/linux/if_ether.h
> +++ b/include/uapi/linux/if_ether.h
> @@ -114,6 +114,7 @@
> #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
> #define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
> #define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */
> +#define ETH_P_RTAG 0xF1C1 /* Redundancy Tag(IEEE 802.1CB) */
> #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
>
> #define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value
> diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
> index 6836ccb9c45d..a3fc0c478a65 100644
> --- a/include/uapi/linux/pkt_cls.h
> +++ b/include/uapi/linux/pkt_cls.h
> @@ -136,6 +136,7 @@ enum tca_id {
> TCA_ID_MPLS,
> TCA_ID_CT,
> TCA_ID_GATE,
> + TCA_ID_FRER,
> /* other actions go here */
> __TCA_ID_MAX = 255
> };
> diff --git a/include/uapi/linux/tc_act/tc_frer.h b/include/uapi/linux/tc_act/tc_frer.h
> new file mode 100644
> index 000000000000..cd86274483e7
> --- /dev/null
> +++ b/include/uapi/linux/tc_act/tc_frer.h
> @@ -0,0 +1,50 @@
> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> +/* Copyright 2021 NXP */
> +
> +#ifndef __LINUX_TC_FRER_H
> +#define __LINUX_TC_FRER_H
> +
> +#include <linux/pkt_cls.h>
> +
> +struct tc_frer {
> + tc_gen;
> +};
> +
> +enum {
> + TCA_FRER_UNSPEC,
> + TCA_FRER_TM,
> + TCA_FRER_PARMS,
> + TCA_FRER_PAD,
> + TCA_FRER_TAG_TYPE,
> + TCA_FRER_TAG_ACTION,
> + TCA_FRER_RECOVER,
> + TCA_FRER_RECOVER_ALG,
> + TCA_FRER_RECOVER_HISTORY_LEN,
> + TCA_FRER_RECOVER_RESET_TM,
> + TCA_FRER_RECOVER_TAGLESS_PKTS,
> + TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS,
> + TCA_FRER_RECOVER_ROGUE_PKTS,
> + TCA_FRER_RECOVER_LOST_PKTS,
> + TCA_FRER_RECOVER_RESETS,
> + __TCA_FRER_MAX,
> +};
> +#define TCA_FRER_MAX (__TCA_FRER_MAX - 1)
> +
> +enum tc_frer_tag_action {
> + TCA_FRER_TAG_NULL,
> + TCA_FRER_TAG_PUSH,
> + TCA_FRER_TAG_POP,
> +};
> +
> +enum tc_frer_tag_type {
> + TCA_FRER_TAG_RTAG,
> + TCA_FRER_TAG_HSR,
> + TCA_FRER_TAG_PRP,
> +};
> +
> +enum tc_frer_rcvy_alg {
> + TCA_FRER_RCVY_VECTOR_ALG,
> + TCA_FRER_RCVY_MATCH_ALG,
> +};
> +
> +#endif
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index 1e8ab4749c6c..93e2687042c2 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -997,6 +997,19 @@ config NET_ACT_GATE
> To compile this code as a module, choose M here: the
> module will be called act_gate.
>
> +config NET_ACT_FRER
> + tristate "Frame frer tc action"
> + depends on NET_CLS_ACT
> + help
> + Say Y here to support frame replication and elimination for
> + reliability, which is defined by IEEE 802.1CB.
> + This action allow to add a frer tag. It also allow to remove
> + the frer tag and drop repeat frames.
> +
> + If unsure, say N.
> + To compile this code as a module, choose M here: the
> + module will be called act_frer.
> +
> config NET_IFE_SKBMARK
> tristate "Support to encoding decoding skb mark on IFE action"
> depends on NET_ACT_IFE
> diff --git a/net/sched/Makefile b/net/sched/Makefile
> index dd14ef413fda..69e7e94be567 100644
> --- a/net/sched/Makefile
> +++ b/net/sched/Makefile
> @@ -32,6 +32,7 @@ obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o
> obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
> obj-$(CONFIG_NET_ACT_CT) += act_ct.o
> obj-$(CONFIG_NET_ACT_GATE) += act_gate.o
> +obj-$(CONFIG_NET_ACT_FRER) += act_frer.o
> obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
> obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
> obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
> diff --git a/net/sched/act_frer.c b/net/sched/act_frer.c
> new file mode 100644
> index 000000000000..6f8ec5782d3d
> --- /dev/null
> +++ b/net/sched/act_frer.c
> @@ -0,0 +1,695 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/* Copyright 2021 NXP */
> +
> +#include <linux/module.h>
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/string.h>
> +#include <linux/errno.h>
> +#include <linux/skbuff.h>
> +#include <linux/rtnetlink.h>
> +#include <linux/init.h>
> +#include <linux/slab.h>
> +#include <net/act_api.h>
> +#include <net/netlink.h>
> +#include <net/pkt_cls.h>
> +#include <net/tc_act/tc_frer.h>
> +
> +#define FRER_SEQ_SPACE 16
> +#define FRER_RCVY_RESET_MSEC 100
> +#define FRER_RCVY_INVALID_SEQ 0x100
> +#define FRER_RCVY_PASSED 0
> +#define FRER_RCVY_DISCARDED -1
> +
> +static unsigned int frer_net_id;
> +static struct tc_action_ops act_frer_ops;
> +
> +struct r_tag {
> + __be16 reserved;
> + __be16 sequence_nr;
> + __be16 encap_proto;
> +} __packed;
> +
> +struct rtag_ethhdr {
> + struct ethhdr ethhdr;
> + struct r_tag h_rtag;
> +} __packed;
> +
> +struct rtag_vlan_ethhdr {
> + struct vlan_ethhdr vlanhdr;
> + struct r_tag h_rtag;
> +} __packed;
> +
> +static const struct nla_policy frer_policy[TCA_FRER_MAX + 1] = {
> + [TCA_FRER_PARMS] =
> + NLA_POLICY_EXACT_LEN(sizeof(struct tc_frer)),
> + [TCA_FRER_TAG_TYPE] = { .type = NLA_U8 },
> + [TCA_FRER_TAG_ACTION] = { .type = NLA_U8 },
> + [TCA_FRER_RECOVER] = { .type = NLA_U8 },
> + [TCA_FRER_RECOVER_ALG] = { .type = NLA_U8 },
> + [TCA_FRER_RECOVER_HISTORY_LEN] = { .type = NLA_U8 },
> + [TCA_FRER_RECOVER_RESET_TM] = { .type = NLA_U64 },
> +};
> +
> +static void frer_seq_recovery_reset(struct tcf_frer *frer_act);
> +
> +static enum hrtimer_restart frer_hrtimer_func(struct hrtimer *timer)
> +{
> + struct tcf_frer *frer_act = container_of(timer, struct tcf_frer,
> + hrtimer);
> + ktime_t remaining_tm;
> +
> + frer_seq_recovery_reset(frer_act);
> +
> + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
I think using ms_to_ktime() would be more readable. There are a few
other places where this suggestion applies.
> +
> + hrtimer_forward(timer, timer->base->get_time(), remaining_tm);
> +
> + return HRTIMER_RESTART;
> +}
> +
> +static int frer_rtag_decode(struct sk_buff *skb)
> +{
> + struct rtag_vlan_ethhdr *rtag_vlan_hdr;
> + struct rtag_ethhdr *rtag_hdr;
> + struct vlan_ethhdr *vlanhdr;
> + struct ethhdr *ethhdr;
> + struct r_tag *rtag;
> + bool is_vlan;
> + u16 sequence;
> + u16 proto;
> +
> + ethhdr = (struct ethhdr *)skb_mac_header(skb);
> + proto = ethhdr->h_proto;
> + is_vlan = false;
> +
> + if (proto == htons(ETH_P_8021Q)) {
> + vlanhdr = (struct vlan_ethhdr *)ethhdr;
> + proto = vlanhdr->h_vlan_encapsulated_proto;
> + is_vlan = true;
> + }
> +
> + if (proto != htons(ETH_P_RTAG))
> + return FRER_RCVY_INVALID_SEQ;
> +
> + if (is_vlan) {
> + rtag_vlan_hdr = (struct rtag_vlan_ethhdr *)ethhdr;
> + rtag = &rtag_vlan_hdr->h_rtag;
> + } else {
> + rtag_hdr = (struct rtag_ethhdr *)ethhdr;
> + rtag = &rtag_hdr->h_rtag;
> + }
> +
> + sequence = ntohs(rtag->sequence_nr);
> +
> + return sequence;
> +}
> +
> +static int frer_seq_generation_alg(struct tcf_frer *frer_act)
> +{
> + u32 gen_seq_max = frer_act->seq_space - 1;
> + u32 gen_seq_num = frer_act->gen_seq_num;
> + int sequence_number;
> +
> + sequence_number = gen_seq_num;
> +
> + if (gen_seq_num >= gen_seq_max)
> + gen_seq_num = 0;
> + else
> + gen_seq_num++;
> +
> + frer_act->gen_seq_num = gen_seq_num;
> +
> + return sequence_number;
> +}
> +
> +static int frer_rtag_encode(struct sk_buff *skb, struct tcf_frer *frer_act)
> +{
> + struct vlan_ethhdr *vlanhdr;
> + struct ethhdr *ethhdr;
> + struct r_tag *rtag;
> + int rtag_len, head_len;
> + unsigned char *dst, *src, *p;
> + __be16 *proto, proto_val;
> +
> + ethhdr = (struct ethhdr *)skb_mac_header(skb);
> + if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
> + vlanhdr = (struct vlan_ethhdr *)ethhdr;
> + p = (unsigned char *)(vlanhdr + 1);
> + proto = &vlanhdr->h_vlan_encapsulated_proto;
> + } else {
> + p = (unsigned char *)(ethhdr + 1);
> + proto = ðhdr->h_proto;
> + }
> +
> + proto_val = *proto;
> + *proto = htons(ETH_P_RTAG);
> +
> + src = skb_mac_header(skb);
> + head_len = p - src;
> +
> + rtag_len = sizeof(struct r_tag);
> + if (skb_cow_head(skb, rtag_len) < 0)
> + return -ENOMEM;
> +
> + skb_push(skb, rtag_len);
> + skb->mac_header -= rtag_len;
> +
> + dst = skb_mac_header(skb);
> + memmove(dst, src, head_len);
> +
> + rtag = (struct r_tag *)(dst + head_len);
> + rtag->encap_proto = proto_val;
> + rtag->sequence_nr = htons(frer_act->gen_seq_num);
> + rtag->reserved = 0;
> +
> + return 0;
> +}
> +
> +static void frer_rtag_pop(struct sk_buff *skb, struct tcf_frer *frer_act)
> +{
> + struct vlan_ethhdr *vlanhdr;
> + struct ethhdr *ethhdr;
> + struct r_tag *rtag;
> + int rtag_len, head_len;
> + unsigned char *dst, *src, *p;
> + __be16 *proto;
> +
> + ethhdr = (struct ethhdr *)skb_mac_header(skb);
> +
> + if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
> + vlanhdr = (struct vlan_ethhdr *)ethhdr;
> + p = (unsigned char *)(vlanhdr + 1);
> + proto = &vlanhdr->h_vlan_encapsulated_proto;
> + } else {
> + p = (unsigned char *)(ethhdr + 1);
> + proto = ðhdr->h_proto;
> + }
> +
> + if (*proto != htons(ETH_P_RTAG))
> + return;
> +
> + rtag = (struct r_tag *)p;
> + rtag_len = sizeof(struct r_tag);
> + *proto = rtag->encap_proto;
> +
> + src = skb_mac_header(skb);
> + head_len = p - src;
> +
> + skb->data = skb_mac_header(skb);
> + skb_pull(skb, rtag_len);
> +
> + skb_reset_mac_header(skb);
> +
> + if (skb->ip_summed == CHECKSUM_PARTIAL)
> + skb->csum_start += rtag_len;
> +
> + dst = skb_mac_header(skb);
> + memmove(dst, src, head_len);
> +}
> +
> +static const struct tcf_frer_proto_ops rtag_ops = {
> + .encode = frer_rtag_encode,
> + .decode = frer_rtag_decode,
> + .tag_pop = frer_rtag_pop,
> +};
> +
> +static int tcf_frer_init(struct net *net, struct nlattr *nla,
> + struct nlattr *est, struct tc_action **a,
> + int ovr, int bind, bool rtnl_held,
> + struct tcf_proto *tp, u32 flags,
> + struct netlink_ext_ack *extack)
> +{
> + struct tc_action_net *tn = net_generic(net, frer_net_id);
> + struct nlattr *tb[TCA_FRER_MAX + 1];
> + struct tcf_chain *goto_ch = NULL;
> + struct tcf_frer *frer_act;
> + struct tc_frer *parm;
> + int ret = 0, err, index;
> + ktime_t remaining_tm;
> +
> + if (!nla)
> + return -EINVAL;
> +
> + err = nla_parse_nested(tb, TCA_FRER_MAX, nla, frer_policy, extack);
> + if (err < 0)
> + return err;
> +
> + if (!tb[TCA_FRER_PARMS])
> + return -EINVAL;
> +
> + parm = nla_data(tb[TCA_FRER_PARMS]);
> + index = parm->index;
> +
> + err = tcf_idr_check_alloc(tn, &index, a, bind);
> + if (err < 0)
> + return err;
> +
> + if (err && bind)
> + return 0;
> +
> + if (!err) {
> + ret = tcf_idr_create(tn, index, est, a,
> + &act_frer_ops, bind, false, 0);
> +
> + if (ret) {
> + tcf_idr_cleanup(tn, index);
> + return ret;
> + }
> + } else if (!ovr) {
> + tcf_idr_release(*a, bind);
> + return -EEXIST;
> + }
> +
> + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
> + if (err < 0)
> + goto release_idr;
> +
> + frer_act = to_frer(*a);
> +
> + spin_lock_bh(&frer_act->tcf_lock);
> + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
> +
> + frer_act->tag_type = nla_get_u8(tb[TCA_FRER_TAG_TYPE]);
> + frer_act->tag_action = nla_get_u8(tb[TCA_FRER_TAG_ACTION]);
> + frer_act->recover = nla_get_u8(tb[TCA_FRER_RECOVER]);
> + frer_act->rcvy_alg = nla_get_u8(tb[TCA_FRER_RECOVER_ALG]);
> + frer_act->rcvy_history_len = nla_get_u8(tb[TCA_FRER_RECOVER_HISTORY_LEN]);
> + frer_act->rcvy_reset_msec = nla_get_u64(tb[TCA_FRER_RECOVER_RESET_TM]);
> +
> + frer_act->gen_seq_num = 0;
> + frer_act->seq_space = 1 << FRER_SEQ_SPACE;
> + frer_act->rcvy_seq_num = 0;
> + frer_act->seq_history = 0xFFFFFFFF;
> + frer_act->rcvy_take_noseq = true;
> +
> + switch (frer_act->tag_type) {
> + case TCA_FRER_TAG_RTAG:
> + frer_act->proto_ops = &rtag_ops;
> + break;
> + case TCA_FRER_TAG_HSR:
> + case TCA_FRER_TAG_PRP:
> + default:
> + spin_unlock_bh(&frer_act->tcf_lock);
> + return -EOPNOTSUPP;
> + }
> +
> + if (frer_act->recover && frer_act->rcvy_reset_msec) {
> + hrtimer_init(&frer_act->hrtimer, CLOCK_TAI,
> + HRTIMER_MODE_REL_SOFT);
> + frer_act->hrtimer.function = frer_hrtimer_func;
> +
> + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
> + hrtimer_start(&frer_act->hrtimer, remaining_tm,
> + HRTIMER_MODE_REL_SOFT);
> + }
> +
> + spin_unlock_bh(&frer_act->tcf_lock);
> +
> + if (goto_ch)
> + tcf_chain_put_by_act(goto_ch);
> +
> + return ret;
> +
> +release_idr:
> + tcf_idr_release(*a, bind);
> + return err;
> +}
> +
> +static void frer_seq_recovery_reset(struct tcf_frer *frer_act)
> +{
> + spin_lock(&frer_act->tcf_lock);
> + if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG) {
> + frer_act->rcvy_seq_num = frer_act->seq_space - 1;
> + frer_act->seq_history = 0;
> + }
> + frer_act->cps_seq_rcvy_resets++;
> + frer_act->take_any = true;
> + spin_unlock(&frer_act->tcf_lock);
> +}
> +
> +static void frer_shift_seq_history(int value, struct tcf_frer *frer_act)
> +{
> + int history_len = frer_act->rcvy_history_len;
> +
> + if ((frer_act->seq_history & BIT(history_len - 1)) == 0)
> + frer_act->cps_seq_rcvy_lost_pkts++;
> +
> + frer_act->seq_history <<= 1;
> +
> + if (value)
> + frer_act->seq_history |= BIT(0);
> +}
> +
> +static int frer_vector_rcvy_alg(struct tcf_frer *frer_act, int sequence,
> + bool individual)
> +{
> + struct hrtimer *timer = &frer_act->hrtimer;
> + bool reset_timer = false;
> + ktime_t remaining_tm;
> + int delta, ret;
> +
> + if (sequence == FRER_RCVY_INVALID_SEQ) {
> + frer_act->cps_seq_rcvy_tagless_pkts++;
> + if (frer_act->rcvy_take_noseq) {
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + } else {
> + return FRER_RCVY_DISCARDED;
> + }
> + }
> +
> + delta = (sequence - frer_act->rcvy_seq_num) & (frer_act->seq_space - 1);
> + /* -(RecovSeqSpace/2) <= delta <= ((RecovSeqSpace/2)-1) */
> + if (delta & (frer_act->seq_space / 2))
> + delta -= frer_act->seq_space;
> +
> + if (frer_act->take_any) {
> + frer_act->take_any = false;
> + frer_act->seq_history |= BIT(0);
> + frer_act->rcvy_seq_num = sequence;
> +
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + }
> +
> + if (delta >= frer_act->rcvy_history_len ||
> + delta <= -frer_act->rcvy_history_len) {
> + /* Packet is out-of-range. */
> + frer_act->cps_seq_rcvy_rogue_pkts++;
> +
> + if (individual)
> + reset_timer = true;
> +
> + ret = FRER_RCVY_DISCARDED;
> + goto out;
> + } else if (delta <= 0) {
> + /* Packet is old and in SequenceHistory. */
> + if (frer_act->seq_history & BIT(-delta)) {
> + if (individual)
> + reset_timer = true;
> +
> + /* Packet has been seen. */
> + ret = FRER_RCVY_DISCARDED;
> + goto out;
> + } else {
> + /* Packet has not been seen. */
> + frer_act->seq_history |= BIT(-delta);
> + frer_act->cps_seq_rcvy_out_of_order_pkts++;
> +
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + }
> + } else {
> + /* Packet is not too far ahead of the one we want. */
> + if (delta != 1)
> + frer_act->cps_seq_rcvy_out_of_order_pkts++;
> +
> + while (--delta)
> + frer_shift_seq_history(0, frer_act);
> + frer_shift_seq_history(1, frer_act);
> + frer_act->rcvy_seq_num = sequence;
> +
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + }
> +out:
> + if (reset_timer && frer_act->rcvy_reset_msec) {
> + remaining_tm =
> + (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
> + hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT);
> + }
> +
> + return ret;
> +}
> +
> +static int frer_match_rcvy_alg(struct tcf_frer *frer_act, int sequence,
> + bool individual)
> +{
> + struct hrtimer *timer = &frer_act->hrtimer;
> + bool reset_timer = false;
> + ktime_t remaining_tm;
> + int delta, ret;
> +
> + if (sequence == FRER_RCVY_INVALID_SEQ) {
> + frer_act->cps_seq_rcvy_tagless_pkts++;
> +
> + return FRER_RCVY_PASSED;
> + }
> +
> + if (frer_act->take_any) {
> + frer_act->take_any = false;
> + frer_act->rcvy_seq_num = sequence;
> +
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + }
> +
> + delta = sequence - frer_act->rcvy_seq_num;
> + if (delta) {
> + /* Packet has not been seen, accept it. */
> + if (delta != 1)
> + frer_act->cps_seq_rcvy_out_of_order_pkts++;
> +
> + frer_act->rcvy_seq_num = sequence;
> +
> + reset_timer = true;
> + ret = FRER_RCVY_PASSED;
> + goto out;
> + } else {
> + if (individual)
> + reset_timer = true;
> +
> + /* Packet has been seen. Do not forward. */
> + ret = FRER_RCVY_DISCARDED;
> + goto out;
> + }
> +
> +out:
> + if (reset_timer && frer_act->rcvy_reset_msec) {
> + remaining_tm = (ktime_t)(frer_act->rcvy_reset_msec * 1000000);
> + hrtimer_start(timer, remaining_tm, HRTIMER_MODE_REL_SOFT);
> + }
> +
> + return ret;
> +}
> +
> +static int tcf_frer_act(struct sk_buff *skb, const struct tc_action *a,
> + struct tcf_result *res)
> +{
> + struct tcf_frer *frer_act = to_frer(a);
> + bool ingress, individual;
> + int ret, retval;
> + int sequence;
> +
> + tcf_lastuse_update(&frer_act->tcf_tm);
> + tcf_action_update_bstats(&frer_act->common, skb);
> +
> + retval = READ_ONCE(frer_act->tcf_action);
> +
> + sequence = frer_act->proto_ops->decode(skb);
> +
> + ingress = skb_at_tc_ingress(skb);
> + individual = ingress;
> +
> + if (frer_act->recover) {
> + spin_lock(&frer_act->tcf_lock);
> +
> + if (frer_act->rcvy_alg == TCA_FRER_RCVY_VECTOR_ALG)
> + ret = frer_vector_rcvy_alg(frer_act, sequence,
> + individual);
> + else
> + ret = frer_match_rcvy_alg(frer_act, sequence,
> + individual);
> + if (ret) {
> + frer_act->tcf_qstats.drops++;
> + retval = TC_ACT_SHOT;
> + }
> +
> + if (frer_act->tag_action == TCA_FRER_TAG_POP)
> + frer_act->proto_ops->tag_pop(skb, frer_act);
> +
> + spin_unlock(&frer_act->tcf_lock);
> +
> + return retval;
> + }
> +
> + if (frer_act->tag_action == TCA_FRER_TAG_PUSH &&
> + sequence == FRER_RCVY_INVALID_SEQ) {
> + spin_lock(&frer_act->tcf_lock);
> +
> + frer_seq_generation_alg(frer_act);
> +
> + frer_act->proto_ops->encode(skb, frer_act);
> +
> + spin_unlock(&frer_act->tcf_lock);
> + }
> +
> + return retval;
> +}
> +
> +static int tcf_frer_dump(struct sk_buff *skb, struct tc_action *a,
> + int bind, int ref)
> +{
> + unsigned char *b = skb_tail_pointer(skb);
> + struct tcf_frer *frer_act = to_frer(a);
> + struct tc_frer opt = {
> + .index = frer_act->tcf_index,
> + .refcnt = refcount_read(&frer_act->tcf_refcnt) - ref,
> + .bindcnt = atomic_read(&frer_act->tcf_bindcnt) - bind,
> + };
> + struct tcf_t t;
> +
> + spin_lock_bh(&frer_act->tcf_lock);
> + opt.action = frer_act->tcf_action;
> +
> + if (nla_put(skb, TCA_FRER_PARMS, sizeof(opt), &opt))
> + goto nla_put_failure;
> +
> + if (nla_put_u8(skb, TCA_FRER_TAG_TYPE, frer_act->tag_type))
> + goto nla_put_failure;
> +
> + if (nla_put_u8(skb, TCA_FRER_TAG_ACTION, frer_act->tag_action))
> + goto nla_put_failure;
> +
> + if (nla_put_u8(skb, TCA_FRER_RECOVER, frer_act->recover))
> + goto nla_put_failure;
> +
> + if (nla_put_u8(skb, TCA_FRER_RECOVER_ALG, frer_act->rcvy_alg))
> + goto nla_put_failure;
> +
> + if (nla_put_u8(skb, TCA_FRER_RECOVER_HISTORY_LEN,
> + frer_act->rcvy_history_len))
> + goto nla_put_failure;
> +
> + if (nla_put_u64_64bit(skb, TCA_FRER_RECOVER_RESET_TM,
> + frer_act->rcvy_reset_msec, TCA_FRER_PAD))
> + goto nla_put_failure;
> +
> + if (nla_put_u32(skb, TCA_FRER_RECOVER_TAGLESS_PKTS,
> + frer_act->cps_seq_rcvy_tagless_pkts))
> + goto nla_put_failure;
> +
> + if (nla_put_u32(skb, TCA_FRER_RECOVER_OUT_OF_ORDER_PKTS,
> + frer_act->cps_seq_rcvy_out_of_order_pkts))
> + goto nla_put_failure;
> +
> + if (nla_put_u32(skb, TCA_FRER_RECOVER_ROGUE_PKTS,
> + frer_act->cps_seq_rcvy_rogue_pkts))
> + goto nla_put_failure;
> +
> + if (nla_put_u32(skb, TCA_FRER_RECOVER_LOST_PKTS,
> + frer_act->cps_seq_rcvy_lost_pkts))
> + goto nla_put_failure;
> +
> + if (nla_put_u32(skb, TCA_FRER_RECOVER_RESETS,
> + frer_act->cps_seq_rcvy_resets))
> + goto nla_put_failure;
> +
> + tcf_tm_dump(&t, &frer_act->tcf_tm);
> + if (nla_put_64bit(skb, TCA_FRER_TM, sizeof(t),
> + &t, TCA_FRER_PAD))
> + goto nla_put_failure;
> + spin_unlock_bh(&frer_act->tcf_lock);
> +
> + return skb->len;
> +
> +nla_put_failure:
> + spin_unlock_bh(&frer_act->tcf_lock);
> + nlmsg_trim(skb, b);
> +
> + return -1;
> +}
> +
> +static int tcf_frer_walker(struct net *net, struct sk_buff *skb,
> + struct netlink_callback *cb, int type,
> + const struct tc_action_ops *ops,
> + struct netlink_ext_ack *extack)
> +{
> + struct tc_action_net *tn = net_generic(net, frer_net_id);
> +
> + return tcf_generic_walker(tn, skb, cb, type, ops, extack);
> +}
> +
> +static int tcf_frer_search(struct net *net, struct tc_action **a, u32 index)
> +{
> + struct tc_action_net *tn = net_generic(net, frer_net_id);
> +
> + return tcf_idr_search(tn, a, index);
> +}
> +
> +static void tcf_frer_stats_update(struct tc_action *a, u64 bytes, u64 packets,
> + u64 drops, u64 lastuse, bool hw)
> +{
> + struct tcf_frer *frer_act = to_frer(a);
> + struct tcf_t *tm = &frer_act->tcf_tm;
> +
> + tcf_action_update_stats(a, bytes, packets, drops, hw);
> + tm->lastuse = max_t(u64, tm->lastuse, lastuse);
> +}
> +
> +static void tcf_frer_cleanup(struct tc_action *a)
> +{
> + struct tcf_frer *frer_act = to_frer(a);
> +
> + if (frer_act->rcvy_reset_msec)
> + hrtimer_cancel(&frer_act->hrtimer);
I could be missing something, but it seems that you initialized the
hrtimer if ->recover and ->rcvy_reset_msec were different from zero. I
think this can cause a non-initialized hrtimer to be cancelled, if the
user set ->recover to zero and ->rcvy_reset_msec to not zero.
Perhaps adding some policy checks for valid values of TCA_FRER_RECOVER
and friends would help?
Documenting what the different configuration parameters mean would be
nice as well.
> +}
> +
> +static size_t tcf_frer_get_fill_size(const struct tc_action *act)
> +{
> + return nla_total_size(sizeof(struct tc_frer));
> +}
> +
> +static struct tc_action_ops act_frer_ops = {
> + .kind = "frer",
> + .id = TCA_ID_FRER,
> + .owner = THIS_MODULE,
> + .act = tcf_frer_act,
> + .init = tcf_frer_init,
> + .cleanup = tcf_frer_cleanup,
> + .dump = tcf_frer_dump,
> + .walk = tcf_frer_walker,
> + .stats_update = tcf_frer_stats_update,
> + .get_fill_size = tcf_frer_get_fill_size,
> + .lookup = tcf_frer_search,
> + .size = sizeof(struct tcf_frer),
> +};
> +
> +static __net_init int frer_init_net(struct net *net)
> +{
> + struct tc_action_net *tn = net_generic(net, frer_net_id);
> +
> + return tc_action_net_init(net, tn, &act_frer_ops);
> +}
> +
> +static void __net_exit frer_exit_net(struct list_head *net_list)
> +{
> + tc_action_net_exit(net_list, frer_net_id);
> +};
> +
> +static struct pernet_operations frer_net_ops = {
> + .init = frer_init_net,
> + .exit_batch = frer_exit_net,
> + .id = &frer_net_id,
> + .size = sizeof(struct tc_action_net),
> +};
> +
> +static int __init frer_init_module(void)
> +{
> + return tcf_register_action(&act_frer_ops, &frer_net_ops);
> +}
> +
> +static void __exit frer_cleanup_module(void)
> +{
> + tcf_unregister_action(&act_frer_ops, &frer_net_ops);
> +}
> +
> +module_init(frer_init_module);
> +module_exit(frer_cleanup_module);
> +MODULE_LICENSE("GPL v2");
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index 2ef8f5a6205a..353184987427 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -39,6 +39,7 @@
> #include <net/tc_act/tc_ct.h>
> #include <net/tc_act/tc_mpls.h>
> #include <net/tc_act/tc_gate.h>
> +#include <net/tc_act/tc_frer.h>
> #include <net/flow_offload.h>
>
> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
> @@ -3706,6 +3707,16 @@ int tc_setup_flow_action(struct flow_action *flow_action,
> err = tcf_gate_get_entries(entry, act);
> if (err)
> goto err_out_locked;
> + } else if (is_tcf_frer(act)) {
> + entry->id = FLOW_ACTION_FRER;
> + entry->frer.tag_type = to_frer(act)->tag_type;
> + entry->frer.tag_action = to_frer(act)->tag_action;
> + entry->frer.recover = to_frer(act)->recover;
> + entry->frer.rcvy_alg = to_frer(act)->rcvy_alg;
> + entry->frer.rcvy_history_len =
> + to_frer(act)->rcvy_history_len;
> + entry->frer.rcvy_reset_msec =
> + to_frer(act)->rcvy_reset_msec;
> } else {
> err = -EOPNOTSUPP;
> goto err_out_locked;
> --
> 2.17.1
>
--
Vinicius
On Fri, May 06, 2022 at 02:44:17PM +0000, Ferenc Fejes wrote:
> > Glad to see someone familiar with 802.1CB. I have a few questions and
> > concerns if you don't mind.
>
> I CCd Balazs Varga? and Janos Farkas, experts of the TSN topics
> including 802.1CB as well. Istvan Moldovan's can also give valuable
> feedback as the author of our in-house userspace FRER. I'll also try my
> best to answer but I'm the least competent in the topic.
>
Nope, that would probably be me ;)
I am commenting on Xiaoliang's patch without having even run it, and I
have only looked through the code diagonally, and I'm not exactly an
expert on the use cases that drove the standard either. So plenty of
chances to make mistakes. But nonetheless I hope that by explaining to
me where I'm wrong we'll be able to make progress with this.
> >
> > I think we are seeing a bit of a stall on the topic of FRER modeling in
> > the Linux networking stack, in no small part due to the fact that we are
> > working with pre-standard hardware.
> >
> > The limitation with Xiaoliang's proposal here (to model FRER stream
> > replication and recovery as a tc action) is that I don't think it works
> > well for traffic termination - it only covers properly the use case of a
> > switch. More precisely, there isn't a single convergent termination
> > point for either locally originating traffic, or locally received
> > traffic (i.e. you, as user, don't know on which interface of several
> > available to open a socket).
> >
> > In our hardware, this limitation isn't really visible because of the way
> > in which the Ethernet switch is connected inside the NXP LS1028A.
>
> We have some NXP LS1028As as well so at least I familiar with the box :-)
Cool, this means we'll eventually reach a common understanding of the
topic.
> > It is something like this:
> >
> > +---------------------------------------+
> > | |
> > | +------+ +------+ |
> > | | eno2 | | eno3 | |
> > | +------+ +------+ |
> > | | | |
> > | +------+ +------+ |
> > | | swp4 | | swp5 | |
> > | +------+ +------+ |
> > | +------+ +------+ +------+ +------+ |
> > | | swp0 | | swp1 | | swp2 | | swp3 | |
> > +--+------+-+------+-+------+-+------+--+
> >
> > In the above picture, the switch ports swp0-swp3 have eno3 as a DSA
> > master (connected to the internal swp5, a CPU port). The other internal
> > port, swp5, is configured as a DSA user port, so it has a net device.
> > Analogously, while eno3 is a DSA master and receives DSA-tagged traffic
> > (so it is useless for direct IP termination), eno2 receives DSA untagged
> > traffic and is therefore an IP termination endpoint into a switched
> > network.
>
> Unfortunately I'm not familiar with the distributed switch architecture
> (I only read a netdev paper from that and thats all) but I try to grasp
> on the problem.
> In my understanding, the main issue is the distinction between the
> locally terminated and forwarded TSN streams, because currently the DSA
> metadata tags are required to do that? Can you explain the problem for
> one who not familiar with DSA?
Forget about DSA, what I'm trying to get at is that you might one day
read the release notes of the Linux kernel and see that it gained
support for FRER using tc, and get all excited, download and compile it,
set up 2 machines connected through 2 port pairs, and try to configure
the systems to ping each other redundantly, to become familiar with how
it works. Start with something simple, what can be so hard about a ping ;)
You'll say something along the lines of
1. ok, I have 2 IP addresses, so I need 2 streams, one A -> B and one B -> A
2. I want to use the null stream identification function (MAC DA, VLAN ID
for those following along) so I have to resolve each IP address to a
MAC address to use as a stream identifier, but how? since the 2
Ethernet cards on each system have different MAC addresses. Anyway,
pick one and put the other card in promisc for now.
3. I have the MACs now, I want to configure the streams. The stream "A -> B"
needs to be configured for splitting on the first system, and for
sequence recovery on the second system. The stream "B -> A" needs to
be configured for recovery on the first system and for splitting on
the second.
4. Let's start with splitting, this is just the "mirred egress mirror"
action, nothing FRER specific about it. There's also the "frer rtag
tag-action tag-push" action which adds the redundancy tag. Good thing
these actions can be chained. So let's put a filter on the egress
qdisc of eth0, that matches on the MAC address of B, and has a mirred
mirror action to eth1, and a "rtag tag-push" action. Notice how by
this time, eth0 becomes sort of a "primary" interface and eth1 sort
of a "secondary" interface. So if you ping, you need to use eth0.
What if the link goes down on eth0 you ask, how does the "redundancy"
in "frer" come into play, with the traffic still going through eth1?
No time to ask questions like that, let's move on.
5. Let's say that both links are up, and system B is receiving a
replicated stream with FRER tags on both eth0 and eth1. It wants to
eliminate the duplicates and see a continuous flow of ICMP requests
without the extra FRER tag. Back to the documentation. We see 2 kinds
of stream recovery, one is "individual" recovery which is a
"frer rtag recover" action put on the ingress qdisc of an interface,
and the other is just "recovery", which is the same action but put on
the egress qdisc. We don't want individual sequence recovery processes
on eth0 and eth1 of station B, since those won't consider the packets
as being members of the same stream, and the'll still be duplicated.
So we want the normal recovery. But on whose netdev's egress qdisc do
we put the "rtag recover" action? Both eth0 and eth1 are receiving.
There is no central convergence point.
Now you're stumped and thinking, how is this supposed to be used?
What can you do with it? I mean, I can probably create a veth pair as
that aforementioned missing convergence point, and guide packets from
{eth0, eth1} towards the lefthand side of the veth pair, using mirred
redirect.
Then I can put the frer rules on the egress qdisc of the lefthand side
of the veth pair, and recover the plaintext traffic (no duplicates, no
RTAG) on the righthand side of the veth pair. But... seriously?
And there is not even one mention of this in the documentation?
And even so. You need to send the request through eno0 and expect to
receive the reply through a veth interface? How is any user space
application ever going to work?
Now comes the connection with DSA. Xiaoliang made tc-frer with LS1028A
offloading in mind. No criticism there, after all it is the hardware we
are working with.
The intended usage pattern is to put the FRER rules on the switch port
netdevices, and to do the termination on the switch-unaware netdevices.
In other words, it's as if eno2 is connected to a completely external
RedBox, and tc-frer only serves externally received traffic. Except that
those 2 isolated parts of the system are physically embedded in one.
So at step (1) you put the IP on eno2, at step (2) you choose the MAC
address for the stream to be that of eno2, at step (4) you configure the
split action (mirred towards the external ports, plus FRER tag push) on
the _ingress_ of swp4 (traffic sent by eno2 is received by swp4).
At step (5) you put the sequence recovery on the _egress_ of swp4
(traffic that egresses swp4 ingresses eno2).
So then you might ask, what would we do if we didn't have that eno2 <->
swp4 port pair? Is tc-frer useful for someone who doesn't, but is maybe
even able to offload 802.1CB streams, including termination, through
some other paradigm? The thing is that, as far as I can tell, Linux does
not really like to set up a network for the exclusive use of others
(pure forwarding), to which it has no local access. This is essentially
the design of tc-frer, and my issue with it.
> >
> > What we do in this case is put tc-frer rules for stream replication and
> > recovery on swp4 itself, and we use eno2 as the convergence point for
> > locally terminated streams.
> >
> > However, naturally, a hardware design that does not look like this can't
> > terminate traffic like this.
>
> Yes, this is my concern too. What would be a nice to have thing if the
> user can configure the SW implementation and the HW offload with the
> same commands and the original tc-frer approach fits well to this
> concept. Anything towards that direction is the way forward IMO, even if
> the underlying implementation will change.
> >
> > My idea was that it might be better if FRER was its own virtual network
> > interface (like a bridge), with multiple slave interfaces. The FRER net
> > device could keep its own database of streams and actions (completely
> > outside of tc) which would be managed similar to "bridge fdb add ...".
> > This way, the frer0 netdevice would be the local termination endpoint,
> > logically speaking.
>
> Interesting approach. To be honest I dont see the long term implications
> of this solution, others might have ideas about the pros and cons, but
> that looks like a solution where local stream termination is trivial.
The implication is that you can easily do stuff with FRER. Maybe I'm
relying too much on ping as an example, but I am really lacking real
life use cases. Feedback here would be extremely appreciated.
> > What I don't know for sure is if a FRER netdevice is supposed to forward
> > frames which aren't in its list of streams (and if so, by which rules).
>
> Yes this sounds correct, somehow non-local packets should be forwarded
> too with a bridge. Is it possible to the linux bridge recognize if one
> port is a frer0 port (or on the frer0 if that is enslaved) and do the
> forwarding of the streams? Re-implementing bridge functions just for the
> frer device would be redundant. Unfortunately I never dug myself deep
> enough into the linux bridge code, just when debugged VXLAN ARP
> suppression for EVPN, but I think it would be possible to exchange some
> metadatas between the bridge and the frer device to do the
> forwarding/terminating decision, something like here [0]
The other question if you're in favor of "FRER as net device" is whether
we should have a FRER interface per TSN stream (or per stream pair, RX
and TX, since streams are unidirectional), or a FRER interface for all
TSN streams. If the latter, we're moving more towards "FRER integrated
in bridge" territory. Or... maybe even resolve local termination through
some other mechanism, and still build on top of a tc-frer action.
The thing with "FRER as net device" on the other hand is that we've
already started modeling PSFP through tc. So if the FRER device has its
own rules, then "these" streams are not the same as "those" streams, and
a user would have to duplicate parts of the configuration. Whereas I
think the PSFP standard refers to stream identifiers directly from 802.1CB.
> > Because if a FRER netdevice is supposed to behave like a regular bridge
> > for non-streams, the implication is that the FRER logic should then be
> > integrated into the Linux bridge.
>
> This is (for me) more appealing. Also we can keep that in mind when
> Linux will support deterministic layer3 networking (IETF DetNet WG RFCs)
> it would be nice to have mapping between TSN and DetNet streams, then
> forward the packets on DetNet tunnels as well (with different
> endpoints). This is something our team researching so Balazs and Istvan
> might give you some info about that. But I admit that thinking about
> playing nicely with DetNet in regard of the current linux FRER
> implementation is more than overwhelming, but the Linux bridge would be
> a nice place to map TSN flows to DetNet flow like currently EVPN maps
> VLANs to VXLANs.
So what would be the use case for bridging packets belonging to
unrecognized TSN streams? In my toy setups I almost ran out of ideas how
to drop unwanted traffic and prevent it from being looped forever.
STP, MSTP, MRP are all out the window, this is active redundancy, you
need to embrace the loops, so it isn't as if you can pretend that
something sane is going to happen with a packet if it isn't part of a
stream that gets special handling from 802.1CB. No broadcast, no
multicast, and self address filtering on all switch ports.
> > Also, this new FRER software model complicates the offloading on NXP
> > LS1028A, but let's leave that aside, since it shouldn't really be the
> > decisive factor on what should the software model look like.
> >
> > Do you have any comments on this topic?
> I would like to see if others can join to the discussion as well, I will
> try to think about this problem more too.
>
> [0] https://lore.kernel.org/netdev/[email protected]/
>
> Best,
> Ferenc
Hi Ferenc,
(I adjusted the CC list)
On Fri, May 06, 2022 at 11:55:56AM +0000, Ferenc Fejes wrote:
> On 2021. 09. 28. 13:44, Xiaoliang Yang wrote:
> > This patch introduce a frer action to implement frame replication and
> > elimination for reliability, which is defined in IEEE P802.1CB.
>
> HiXiaoliang!
>
> thanks for your efforts to introduce afreraction to implement frame
> replication and elimination for reliability, which is defined in IEEE
> P802.1CB-2017. I would like to relay a small comment from our team,
> regarding to the FRER, not particularly to the code.
>
> Support of RTAG format is very straightforward.
>
> Since 2017, several maintenance items were opened regarding IEEE
> P802.1CB-2017 to fix some errors in the standard. Discussions results
> will be published soon e.g., in IEEE P802.1CBdb
> (https://1.ieee802.org/tsn/802-1cbdb/).
>
> One of the maintenance items impacts the vector recovery algorithm itself.
>
> Details on the problem and the solution are here:
>
> -https://www.802-1.org/items/370
>
> -https://www.ieee802.org/1/files/public/docs2020/maint-varga-257-FRER-recovery-window-0320-v01.pdf
> <https://www.ieee802.org/1/files/public/docs2020/maint-varga-257-FRER-recovery-window-0320-v01.pdf>
>
> It is a small but important fix. There is an incorrect reference to the
> size of the recovery window, when a received packet is checked to be
> out-of-range or not. Without this fix the vector recovery algorithm do
> not work properly in some scenarios.
>
> Please consider to update your patch to reflect the maintenance efforts
> of IEEE to correct .1CB-2017 related issues.
>
> > There are two modes for frer action: generate and push the tag, recover
> > and pop the tag. frer tag has three types: RTAG, HSR, and PRP. This
> > patch only supports RTAG now.
> >
> > User can push the tag on egress port of the talker device, recover and
> > pop the tag on ingress port of the listener device. When it's a relay
> > system, push the tag on ingress port, or set individual recover on
> > ingress port. Set the sequence recover on egress port.
> >
> > Use action "mirred" to do split function, and use "vlan-modify" to do
> > active stream identification function on relay system.
> >
> All of our research in the topic based on a in-house userspace FRER
> implementation but we are looking forward to test your work in the future.
>
> Thanks,
>
> Ferenc
Glad to see someone familiar with 802.1CB. I have a few questions and
concerns if you don't mind.
I think we are seeing a bit of a stall on the topic of FRER modeling in
the Linux networking stack, in no small part due to the fact that we are
working with pre-standard hardware.
The limitation with Xiaoliang's proposal here (to model FRER stream
replication and recovery as a tc action) is that I don't think it works
well for traffic termination - it only covers properly the use case of a
switch. More precisely, there isn't a single convergent termination
point for either locally originating traffic, or locally received
traffic (i.e. you, as user, don't know on which interface of several
available to open a socket).
In our hardware, this limitation isn't really visible because of the way
in which the Ethernet switch is connected inside the NXP LS1028A.
It is something like this:
+---------------------------------------+
| |
| +------+ +------+ |
| | eno2 | | eno3 | |
| +------+ +------+ |
| | | |
| +------+ +------+ |
| | swp4 | | swp5 | |
| +------+ +------+ |
| +------+ +------+ +------+ +------+ |
| | swp0 | | swp1 | | swp2 | | swp3 | |
+--+------+-+------+-+------+-+------+--+
In the above picture, the switch ports swp0-swp3 have eno3 as a DSA
master (connected to the internal swp5, a CPU port). The other internal
port, swp5, is configured as a DSA user port, so it has a net device.
Analogously, while eno3 is a DSA master and receives DSA-tagged traffic
(so it is useless for direct IP termination), eno2 receives DSA untagged
traffic and is therefore an IP termination endpoint into a switched
network.
What we do in this case is put tc-frer rules for stream replication and
recovery on swp4 itself, and we use eno2 as the convergence point for
locally terminated streams.
However, naturally, a hardware design that does not look like this can't
terminate traffic like this.
My idea was that it might be better if FRER was its own virtual network
interface (like a bridge), with multiple slave interfaces. The FRER net
device could keep its own database of streams and actions (completely
outside of tc) which would be managed similar to "bridge fdb add ...".
This way, the frer0 netdevice would be the local termination endpoint,
logically speaking.
What I don't know for sure is if a FRER netdevice is supposed to forward
frames which aren't in its list of streams (and if so, by which rules).
Because if a FRER netdevice is supposed to behave like a regular bridge
for non-streams, the implication is that the FRER logic should then be
integrated into the Linux bridge.
Also, this new FRER software model complicates the offloading on NXP
LS1028A, but let's leave that aside, since it shouldn't really be the
decisive factor on what should the software model look like.
Do you have any comments on this topic?
Hi Vladimir!
I adjusted the CC list too with my colleagues.
On 2022. 05. 06. 14:23, Vladimir Oltean wrote:
> Hi Ferenc,
>
> (I adjusted the CC list)
>
> On Fri, May 06, 2022 at 11:55:56AM +0000, Ferenc Fejes wrote:
>> On 2021. 09. 28. 13:44, Xiaoliang Yang wrote:
>>> This patch introduce a frer action to implement frame replication and
>>> elimination for reliability, which is defined in IEEE P802.1CB.
>> HiXiaoliang!
>>
>> thanks for your efforts to introduce afreraction to implement frame
>> replication and elimination for reliability, which is defined in IEEE
>> P802.1CB-2017. I would like to relay a small comment from our team,
>> regarding to the FRER, not particularly to the code.
>>
>> Support of RTAG format is very straightforward.
>>
>> Since 2017, several maintenance items were opened regarding IEEE
>> P802.1CB-2017 to fix some errors in the standard. Discussions results
>> will be published soon e.g., in IEEE P802.1CBdb
>> (https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-454445555731-a50148cf7cc14d37&q=1&e=bac1cc80-3c80-4916-be67-352e21564815&u=https%3A%2F%2F1.ieee802.org%2Ftsn%2F802-1cbdb%2F).
>>
>> One of the maintenance items impacts the vector recovery algorithm itself.
>>
>> Details on the problem and the solution are here:
>>
>> -https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-454445555731-7de6c1f45efd0a2c&q=1&e=bac1cc80-3c80-4916-be67-352e21564815&u=https%3A%2F%2Fwww.802-1.org%2Fitems%2F370
>>
>> -https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-454445555731-cc75f6c9f6a68939&q=1&e=bac1cc80-3c80-4916-be67-352e21564815&u=https%3A%2F%2Fwww.ieee802.org%2F1%2Ffiles%2Fpublic%2Fdocs2020%2Fmaint-varga-257-FRER-recovery-window-0320-v01.pdf
>> <https://protect2.fireeye.com/v1/url?k=31323334-501d5122-313273af-454445555731-cc75f6c9f6a68939&q=1&e=bac1cc80-3c80-4916-be67-352e21564815&u=https%3A%2F%2Fwww.ieee802.org%2F1%2Ffiles%2Fpublic%2Fdocs2020%2Fmaint-varga-257-FRER-recovery-window-0320-v01.pdf>
>>
>> It is a small but important fix. There is an incorrect reference to the
>> size of the recovery window, when a received packet is checked to be
>> out-of-range or not. Without this fix the vector recovery algorithm do
>> not work properly in some scenarios.
>>
>> Please consider to update your patch to reflect the maintenance efforts
>> of IEEE to correct .1CB-2017 related issues.
>>
>>> There are two modes for frer action: generate and push the tag, recover
>>> and pop the tag. frer tag has three types: RTAG, HSR, and PRP. This
>>> patch only supports RTAG now.
>>>
>>> User can push the tag on egress port of the talker device, recover and
>>> pop the tag on ingress port of the listener device. When it's a relay
>>> system, push the tag on ingress port, or set individual recover on
>>> ingress port. Set the sequence recover on egress port.
>>>
>>> Use action "mirred" to do split function, and use "vlan-modify" to do
>>> active stream identification function on relay system.
>>>
>> All of our research in the topic based on a in-house userspace FRER
>> implementation but we are looking forward to test your work in the future.
>>
>> Thanks,
>>
>> Ferenc
> Glad to see someone familiar with 802.1CB. I have a few questions and
> concerns if you don't mind.
I CCd Balazs Varga and Janos Farkas, experts of the TSN topics
including 802.1CB as well. Istvan Moldovan's can also give valuable
feedback as the author of our in-house userspace FRER. I'll also try my
best to answer but I'm the least competent in the topic.
>
> I think we are seeing a bit of a stall on the topic of FRER modeling in
> the Linux networking stack, in no small part due to the fact that we are
> working with pre-standard hardware.
>
> The limitation with Xiaoliang's proposal here (to model FRER stream
> replication and recovery as a tc action) is that I don't think it works
> well for traffic termination - it only covers properly the use case of a
> switch. More precisely, there isn't a single convergent termination
> point for either locally originating traffic, or locally received
> traffic (i.e. you, as user, don't know on which interface of several
> available to open a socket).
>
> In our hardware, this limitation isn't really visible because of the way
> in which the Ethernet switch is connected inside the NXP LS1028A.
We have some NXP LS1028As as well so at least I familiar with the box :-)
> It is something like this:
>
> +---------------------------------------+
> | |
> | +------+ +------+ |
> | | eno2 | | eno3 | |
> | +------+ +------+ |
> | | | |
> | +------+ +------+ |
> | | swp4 | | swp5 | |
> | +------+ +------+ |
> | +------+ +------+ +------+ +------+ |
> | | swp0 | | swp1 | | swp2 | | swp3 | |
> +--+------+-+------+-+------+-+------+--+
>
> In the above picture, the switch ports swp0-swp3 have eno3 as a DSA
> master (connected to the internal swp5, a CPU port). The other internal
> port, swp5, is configured as a DSA user port, so it has a net device.
> Analogously, while eno3 is a DSA master and receives DSA-tagged traffic
> (so it is useless for direct IP termination), eno2 receives DSA untagged
> traffic and is therefore an IP termination endpoint into a switched
> network.
Unfortunately I'm not familiar with the distributed switch architecture
(I only read a netdev paper from that and thats all) but I try to grasp
on the problem.
In my understanding, the main issue is the distinction between the
locally terminated and forwarded TSN streams, because currently the DSA
metadata tags are required to do that? Can you explain the problem for
one who not familiar with DSA?
>
> What we do in this case is put tc-frer rules for stream replication and
> recovery on swp4 itself, and we use eno2 as the convergence point for
> locally terminated streams.
>
> However, naturally, a hardware design that does not look like this can't
> terminate traffic like this.
Yes, this is my concern too. What would be a nice to have thing if the
user can configure the SW implementation and the HW offload with the
same commands and the original tc-frer approach fits well to this
concept. Anything towards that direction is the way forward IMO, even if
the underlying implementation will change.
>
> My idea was that it might be better if FRER was its own virtual network
> interface (like a bridge), with multiple slave interfaces. The FRER net
> device could keep its own database of streams and actions (completely
> outside of tc) which would be managed similar to "bridge fdb add ...".
> This way, the frer0 netdevice would be the local termination endpoint,
> logically speaking.
Interesting approach. To be honest I dont see the long term implications
of this solution, others might have ideas about the pros and cons, but
that looks like a solution where local stream termination is trivial.
> What I don't know for sure is if a FRER netdevice is supposed to forward
> frames which aren't in its list of streams (and if so, by which rules).
Yes this sounds correct, somehow non-local packets should be forwarded
too with a bridge. Is it possible to the linux bridge recognize if one
port is a frer0 port (or on the frer0 if that is enslaved) and do the
forwarding of the streams? Re-implementing bridge functions just for the
frer device would be redundant. Unfortunately I never dug myself deep
enough into the linux bridge code, just when debugged VXLAN ARP
suppression for EVPN, but I think it would be possible to exchange some
metadatas between the bridge and the frer device to do the
forwarding/terminating decision, something like here [0]
> Because if a FRER netdevice is supposed to behave like a regular bridge
> for non-streams, the implication is that the FRER logic should then be
> integrated into the Linux bridge.
This is (for me) more appealing. Also we can keep that in mind when
Linux will support deterministic layer3 networking (IETF DetNet WG RFCs)
it would be nice to have mapping between TSN and DetNet streams, then
forward the packets on DetNet tunnels as well (with different
endpoints). This is something our team researching so Balazs and Istvan
might give you some info about that. But I admit that thinking about
playing nicely with DetNet in regard of the current linux FRER
implementation is more than overwhelming, but the Linux bridge would be
a nice place to map TSN flows to DetNet flow like currently EVPN maps
VLANs to VXLANs.
> Also, this new FRER software model complicates the offloading on NXP
> LS1028A, but let's leave that aside, since it shouldn't really be the
> decisive factor on what should the software model look like.
>
> Do you have any comments on this topic?
I would like to see if others can join to the discussion as well, I will
try to think about this problem more too.
[0] https://lore.kernel.org/netdev/[email protected]/
Best,
Ferenc
On 2021. 09. 28. 13:44, Xiaoliang Yang wrote:
> This patch introduce a frer action to implement frame replication and
> elimination for reliability, which is defined in IEEE P802.1CB.
HiXiaoliang!
thanks for your efforts to introduce afreraction to implement frame
replication and elimination for reliability, which is defined in IEEE
P802.1CB-2017. I would like to relay a small comment from our team,
regarding to the FRER, not particularly to the code.
Support of RTAG format is very straightforward.
Since 2017, several maintenance items were opened regarding IEEE
P802.1CB-2017 to fix some errors in the standard. Discussions results
will be published soon e.g., in IEEE P802.1CBdb
(https://1.ieee802.org/tsn/802-1cbdb/).
One of the maintenance items impacts the vector recovery algorithm itself.
Details on the problem and the solution are here:
-https://www.802-1.org/items/370
-https://www.ieee802.org/1/files/public/docs2020/maint-varga-257-FRER-recovery-window-0320-v01.pdf
<https://www.ieee802.org/1/files/public/docs2020/maint-varga-257-FRER-recovery-window-0320-v01.pdf>
It is a small but important fix. There is an incorrect reference to the
size of the recovery window, when a received packet is checked to be
out-of-range or not. Without this fix the vector recovery algorithm do
not work properly in some scenarios.
Please consider to update your patch to reflect the maintenance efforts
of IEEE to correct .1CB-2017 related issues.
> There are two modes for frer action: generate and push the tag, recover
> and pop the tag. frer tag has three types: RTAG, HSR, and PRP. This
> patch only supports RTAG now.
>
> User can push the tag on egress port of the talker device, recover and
> pop the tag on ingress port of the listener device. When it's a relay
> system, push the tag on ingress port, or set individual recover on
> ingress port. Set the sequence recover on egress port.
>
> Use action "mirred" to do split function, and use "vlan-modify" to do
> active stream identification function on relay system.
>
All of our research in the topic based on a in-house userspace FRER
implementation but we are looking forward to test your work in the future.
Thanks,
Ferenc
Hi,
It is an interesting conversation, see my comments below.
> On Fri, May 06, 2022 at 02:44:17PM +0000, Ferenc Fejes wrote:
>> > Glad to see someone familiar with 802.1CB. I have a few questions and
>> > concerns if you don't mind.
>>
>> I CCd Balazs Varga? and Janos Farkas, experts of the TSN topics
>> including 802.1CB as well. Istvan Moldovan's can also give valuable
>> feedback as the author of our in-house userspace FRER. I'll also try my
>> best to answer but I'm the least competent in the topic.
>>
> Nope, that would probably be me ;)
> I am commenting on Xiaoliang's patch without having even run it, and I
> have only looked through the code diagonally, and I'm not exactly an
> expert on the use cases that drove the standard either. So plenty of
> chances to make mistakes. But nonetheless I hope that by explaining to
> me where I'm wrong we'll be able to make progress with this.
>> >
>> > I think we are seeing a bit of a stall on the topic of FRER modeling in
>> > the Linux networking stack, in no small part due to the fact that we are
>> > working with pre-standard hardware.
>> >
>> > The limitation with Xiaoliang's proposal here (to model FRER stream
>> > replication and recovery as a tc action) is that I don't think it works
>> > well for traffic termination - it only covers properly the use case of a
>> > switch. More precisely, there isn't a single convergent termination
>> > point for either locally originating traffic, or locally received
>> > traffic (i.e. you, as user, don't know on which interface of several
>> > available to open a socket).
>> >
>> > In our hardware, this limitation isn't really visible because of the way
>> > in which the Ethernet switch is connected inside the NXP LS1028A.
>>
>> We have some NXP LS1028As as well so at least I familiar with the box :-)
> Cool, this means we'll eventually reach a common understanding of the
> topic.
>> > It is something like this:
>> >
>> > +---------------------------------------+
>> > | |
>> > | +------+ +------+ |
>> > | | eno2 | | eno3 | |
>> > | +------+ +------+ |
>> > | | | |
>> > | +------+ +------+ |
>> > | | swp4 | | swp5 | |
>> > | +------+ +------+ |
>> > | +------+ +------+ +------+ +------+ |
>> > | | swp0 | | swp1 | | swp2 | | swp3 | |
>> > +--+------+-+------+-+------+-+------+--+
>> >
>> > In the above picture, the switch ports swp0-swp3 have eno3 as a DSA
>> > master (connected to the internal swp5, a CPU port). The other internal
>> > port, swp5, is configured as a DSA user port, so it has a net device.
>> > Analogously, while eno3 is a DSA master and receives DSA-tagged traffic
>> > (so it is useless for direct IP termination), eno2 receives DSA untagged
>> > traffic and is therefore an IP termination endpoint into a switched
>> > network.
>>
>> Unfortunately I'm not familiar with the distributed switch architecture
>> (I only read a netdev paper from that and thats all) but I try to grasp
>> on the problem.
>> In my understanding, the main issue is the distinction between the
>> locally terminated and forwarded TSN streams, because currently the DSA
>> metadata tags are required to do that? Can you explain the problem for
>> one who not familiar with DSA?
> Forget about DSA, what I'm trying to get at is that you might one day
> read the release notes of the Linux kernel and see that it gained
> support for FRER using tc, and get all excited, download and compile it,
> set up 2 machines connected through 2 port pairs, and try to configure
> the systems to ping each other redundantly, to become familiar with how
> it works. Start with something simple, what can be so hard about a ping ;)
> You'll say something along the lines of
> 1. ok, I have 2 IP addresses, so I need 2 streams, one A -> B and one B -> A
Don't forget about the background traffic. Nothing will work if ARP is not working,
and ARP packets have broadcast destination (they will not be identified as part of
the streams). So besides the FRER forwarding, normal bridging should also be working!
> 2. I want to use the null stream identification function (MAC DA, VLAN ID
> for those following along) so I have to resolve each IP address to a
> MAC address to use as a stream identifier, but how? since the 2
> Ethernet cards on each system have different MAC addresses. Anyway,
> pick one and put the other card in promisc for now.
TSN streams by definition are Layer 2, so we suppose that MAC addresses (and VLANs) are known.
> 3. I have the MACs now, I want to configure the streams. The stream "A -> B"
> needs to be configured for splitting on the first system, and for
> sequence recovery on the second system. The stream "B -> A" needs to
> be configured for recovery on the first system and for splitting on
> the second.
> 4. Let's start with splitting, this is just the "mirred egress mirror"
> action, nothing FRER specific about it. There's also the "frer rtag
> tag-action tag-push" action which adds the redundancy tag. Good thing
> these actions can be chained. So let's put a filter on the egress
> qdisc of eth0, that matches on the MAC address of B, and has a mirred
> mirror action to eth1, and a "rtag tag-push" action. Notice how by
> this time, eth0 becomes sort of a "primary" interface and eth1 sort
> of a "secondary" interface. So if you ping, you need to use eth0.
> What if the link goes down on eth0 you ask, how does the "redundancy"
> in "frer" come into play, with the traffic still going through eth1?
> No time to ask questions like that, let's move on.
Well, there should be no "primary" or "secondary". The two interfaces should be equally
handled, otherwise a lot of other issues appear...
> 5. Let's say that both links are up, and system B is receiving a
> replicated stream with FRER tags on both eth0 and eth1. It wants to
> eliminate the duplicates and see a continuous flow of ICMP requests
> without the extra FRER tag. Back to the documentation. We see 2 kinds
> of stream recovery, one is "individual" recovery which is a
> "frer rtag recover" action put on the ingress qdisc of an interface,
> and the other is just "recovery", which is the same action but put on
> the egress qdisc. We don't want individual sequence recovery processes
> on eth0 and eth1 of station B, since those won't consider the packets
> as being members of the same stream, and the'll still be duplicated.
> So we want the normal recovery. But on whose netdev's egress qdisc do
> we put the "rtag recover" action? Both eth0 and eth1 are receiving.
> There is no central convergence point.
> Now you're stumped and thinking, how is this supposed to be used?
> What can you do with it? I mean, I can probably create a veth pair as
> that aforementioned missing convergence point, and guide packets from
> {eth0, eth1} towards the lefthand side of the veth pair, using mirred
> redirect.
> Then I can put the frer rules on the egress qdisc of the lefthand side
> of the veth pair, and recover the plaintext traffic (no duplicates, no
> RTAG) on the righthand side of the veth pair. But... seriously?
> And there is not even one mention of this in the documentation?
> And even so. You need to send the request through eno0 and expect to
> receive the reply through a veth interface? How is any user space
> application ever going to work?
We definitely need a convergence point for the elimination part. Frames from both
redundant paths should be received by the elimination function.
A virtual interface (like tap0 ) or a special netdevice could be the convergence point.
> Now comes the connection with DSA. Xiaoliang made tc-frer with LS1028A
> offloading in mind. No criticism there, after all it is the hardware we
> are working with.
> The intended usage pattern is to put the FRER rules on the switch port
> netdevices, and to do the termination on the switch-unaware netdevices.
> In other words, it's as if eno2 is connected to a completely external
> RedBox, and tc-frer only serves externally received traffic. Except that
> those 2 isolated parts of the system are physically embedded in one.
> So at step (1) you put the IP on eno2, at step (2) you choose the MAC
> address for the stream to be that of eno2, at step (4) you configure the
> split action (mirred towards the external ports, plus FRER tag push) on
> the _ingress_ of swp4 (traffic sent by eno2 is received by swp4).
> At step (5) you put the sequence recovery on the _egress_ of swp4
> (traffic that egresses swp4 ingresses eno2).
> So then you might ask, what would we do if we didn't have that eno2 <->
> swp4 port pair? Is tc-frer useful for someone who doesn't, but is maybe
> even able to offload 802.1CB streams, including termination, through
> some other paradigm? The thing is that, as far as I can tell, Linux does
> not really like to set up a network for the exclusive use of others
> (pure forwarding), to which it has no local access. This is essentially
> the design of tc-frer, and my issue with it.
I think the DSA case is a special one, and the solution should also work having
just two NICs. Besides having a tap/veth port we can also put the replication/
elimination point to the bridge.
Of course, in that case the we don't need a virtual interface, but instead we
are tied to the Linux bridge. The HW offload could also work - but a bit differently.
I'm not saying this is a better solution, but it is an other way to implement FRER,
and it has the advantage of handling the background traffic as well.
>> >
>> > What we do in this case is put tc-frer rules for stream replication and
>> > recovery on swp4 itself, and we use eno2 as the convergence point for
>> > locally terminated streams.
>> >
>> > However, naturally, a hardware design that does not look like this can't
>> > terminate traffic like this.
>>
>> Yes, this is my concern too. What would be a nice to have thing if the
>> user can configure the SW implementation and the HW offload with the
>> same commands and the original tc-frer approach fits well to this
>> concept. Anything towards that direction is the way forward IMO, even if
>> the underlying implementation will change.
>> >
>> > My idea was that it might be better if FRER was its own virtual network
>> > interface (like a bridge), with multiple slave interfaces. The FRER net
>> > device could keep its own database of streams and actions (completely
>> > outside of tc) which would be managed similar to "bridge fdb add ...".
>> > This way, the frer0 netdevice would be the local termination endpoint,
>> > logically speaking.
>>
>> Interesting approach. To be honest I dont see the long term implications
>> of this solution, others might have ideas about the pros and cons, but
>> that looks like a solution where local stream termination is trivial.
> The implication is that you can easily do stuff with FRER. Maybe I'm
> relying too much on ping as an example, but I am really lacking real
> life use cases. Feedback here would be extremely appreciated.
Ping is OK as test traffic, but probably in real life you can expect VLAN tagged
traffic. Also, as I mentioned ARP is not part of the TSN stream, but it should go through.
Just like for ping.
>> > What I don't know for sure is if a FRER netdevice is supposed to forward
>> > frames which aren't in its list of streams (and if so, by which rules).
>>
>> Yes this sounds correct, somehow non-local packets should be forwarded
>> too with a bridge. Is it possible to the linux bridge recognize if one
>> port is a frer0 port (or on the frer0 if that is enslaved) and do the
>> forwarding of the streams? Re-implementing bridge functions just for the
>> frer device would be redundant. Unfortunately I never dug myself deep
>> enough into the linux bridge code, just when debugged VXLAN ARP
>> suppression for EVPN, but I think it would be possible to exchange some
>> metadatas between the bridge and the frer device to do the
>> forwarding/terminating decision, something like here [0]
I think FRER frames need to be handled before entering the bridge. Or if the
bridge does the FRER, then the first thing is to identify and handle the FRER streams...
> The other question if you're in favor of "FRER as net device" is whether
> we should have a FRER interface per TSN stream (or per stream pair, RX
> and TX, since streams are unidirectional), or a FRER interface for all
> TSN streams. If the latter, we're moving more towards "FRER integrated
> in bridge" territory. Or... maybe even resolve local termination through
> some other mechanism, and still build on top of a tc-frer action.
We don't need FRER device per stream. However, per-stream state needs to
be maintained. If we have a FRER netdevice, we have to assign the related interfaces
as "slave" ports. These ports will have to identify the FRER streams and forward them
to the FRER netdevice, and the rest of the traffic needs to be handled normally.
So besides the FRER netdevice we can still have a bridge running to handle the other traffic.
On the other hand, we can still have multiple FRER netdevices, if needed, and we can assign
different slave interfaces to the different FRER netdevices.
> The thing with "FRER as net device" on the other hand is that we've
> already started modeling PSFP through tc. So if the FRER device has its
> own rules, then "these" streams are not the same as "those" streams, and
> a user would have to duplicate parts of the configuration. Whereas I
> think the PSFP standard refers to stream identifiers directly from 802.1CB.
Unfortunately this is true, we have to configure the filtering at the ingress interface,
and further configuration is needed at the FRER netdevice.
>> > Because if a FRER netdevice is supposed to behave like a regular bridge
>> > for non-streams, the implication is that the FRER logic should then be
>> > integrated into the Linux bridge.
>>
>> This is (for me) more appealing. Also we can keep that in mind when
>> Linux will support deterministic layer3 networking (IETF DetNet WG RFCs)
>> it would be nice to have mapping between TSN and DetNet streams, then
>> forward the packets on DetNet tunnels as well (with different
>> endpoints). This is something our team researching so Balazs and Istvan
>> might give you some info about that. But I admit that thinking about
>> playing nicely with DetNet in regard of the current linux FRER
>> implementation is more than overwhelming, but the Linux bridge would be
>> a nice place to map TSN flows to DetNet flow like currently EVPN maps
>> VLANs to VXLANs.
Having FRER in the bridge also has some disadvantages. What if I want to use
openvswitch? Netdevice based FRER can work with Linux bridge and openvswitch too.
> So what would be the use case for bridging packets belonging to
> unrecognized TSN streams? In my toy setups I almost ran out of ideas how
> to drop unwanted traffic and prevent it from being looped forever.
> STP, MSTP, MRP are all out the window, this is active redundancy, you
> need to embrace the loops, so it isn't as if you can pretend that
> something sane is going to happen with a packet if it isn't part of a
> stream that gets special handling from 802.1CB. No broadcast, no
> multicast, and self address filtering on all switch ports.
It is really important that FRER streams need to be identified immediately
and not handled as normal traffic. If we identify the FRER streams first,
and handle them, then we can avoid the loops. So it is not enough to use a hook,
we need to prevent further processing of FRER frames.
The rest of the traffic can be handled normally, even broadcast/multicast.
The unrecognized TSN streams may cause loop, that's true, but I think that is a
misconfiguration, and can not be avoided.
>> > Also, this new FRER software model complicates the offloading on NXP
>> > LS1028A, but let's leave that aside, since it shouldn't really be the
>> > decisive factor on what should the software model look like.
>> >
>> > Do you have any comments on this topic?
>> I would like to see if others can join to the discussion as well, I will
>> try to think about this problem more too.
>>
>> [0] https://lore.kernel.org/netdev/[email protected]/
>>
>> Best,
>> Ferenc
--
?dv?zlettel
Istv?n Moldov?n
mailto:[email protected]