This patch is implementing IPV6 RLB for balance-alb mode.
Suggested-by: Hu Yadi <[email protected]>
Signed-off-by: Sun Shouxin <[email protected]>
---
changelog:
v1-->v2:
-Remove ndisc_bond_send_na and refactor ndisc_send_na.
-In rlb_nd_xmit, if the lladdr is not local, return curr_active_slave.
-Don't send neighbor advertisement message when receiving
neighbor advertisement message in rlb6_update_entry_from_na.
v2-->v3:
-Don't export ndisc_send_na.
-Use ipv6_stub->ndisc_send_na to replace ndisc_send_na
in rlb6_update_client.
v3-->v4:
-Submit all code at a whole patch.
---
drivers/net/bonding/bond_3ad.c | 2 +-
drivers/net/bonding/bond_alb.c | 592 ++++++++++++++++++++++++++++-
drivers/net/bonding/bond_debugfs.c | 14 +
drivers/net/bonding/bond_main.c | 6 +-
drivers/net/usb/cdc_mbim.c | 2 +-
include/net/bond_3ad.h | 2 +-
include/net/bond_alb.h | 7 +
include/net/bonding.h | 6 +-
include/net/ipv6_stubs.h | 3 +-
include/net/ndisc.h | 9 +-
net/ipv6/addrconf.c | 4 +-
net/ipv6/ndisc.c | 64 +++-
12 files changed, 675 insertions(+), 36 deletions(-)
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index a86b1f71762e..3cba269f12e2 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2682,7 +2682,7 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
return ret;
}
-int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
+int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct bonding *bond,
struct slave *slave)
{
struct lacpdu *lacpdu, _lacpdu;
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 303c8d32d451..06a4557e00e3 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -23,6 +23,9 @@
#include <asm/byteorder.h>
#include <net/bonding.h>
#include <net/bond_alb.h>
+#include <net/addrconf.h>
+#include <net/ip6_checksum.h>
+#include <net/ipv6_stubs.h>
static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
0x33, 0x33, 0x00, 0x00, 0x00, 0x01
@@ -57,6 +60,13 @@ static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp);
static void rlb_src_unlink(struct bonding *bond, u32 index);
static void rlb_src_link(struct bonding *bond, u32 ip_src_hash,
u32 ip_dst_hash);
+static void rlb6_delete_table_entry(struct bonding *bond, u32 index);
+static u8 *alb_get_lladdr(struct sk_buff *skb);
+static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
+ struct slave *tx_slave);
+static bool alb_determine_ipv6_nd(struct sk_buff *skb, struct bonding *bond);
+static int rlb_recv(struct sk_buff *skb, struct bonding *bond,
+ struct slave *slave);
static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
{
@@ -269,7 +279,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
spin_unlock_bh(&bond->mode_lock);
}
-static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
+static int rlb_arp_recv(struct sk_buff *skb, struct bonding *bond,
struct slave *slave)
{
struct arp_pkt *arp, _arp;
@@ -415,6 +425,31 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
}
}
+ rx_hash_table = bond_info->rx6_hashtbl;
+ index = bond_info->rx6_hashtbl_used_head;
+ for (; index != RLB_NULL_INDEX; index = next_index) {
+ next_index = rx_hash_table[index].used_next;
+ if (rx_hash_table[index].slave == slave) {
+ struct slave *assigned_slave = rlb_next_rx_slave(bond);
+
+ if (assigned_slave) {
+ u8 mac_dst[ETH_ALEN];
+
+ rx_hash_table[index].slave = assigned_slave;
+ memcpy(mac_dst, rx_hash_table[index].mac_dst,
+ sizeof(mac_dst));
+ if (is_valid_ether_addr(mac_dst)) {
+ bond_info->rx6_hashtbl[index].ntt = 1;
+ bond_info->rx6_ntt = 1;
+ bond_info->rlb6_update_retry_counter =
+ RLB_UPDATE_RETRY;
+ }
+ } else { /* there is no active slave */
+ rx_hash_table[index].slave = NULL;
+ }
+ }
+ }
+
spin_unlock_bh(&bond->mode_lock);
if (slave != rtnl_dereference(bond->curr_active_slave))
@@ -704,7 +739,7 @@ static void rlb_rebalance(struct bonding *bond)
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct slave *assigned_slave;
struct rlb_client_info *client_info;
- int ntt;
+ int ntt, ntt_ip6;
u32 hash_index;
spin_lock_bh(&bond->mode_lock);
@@ -724,9 +759,27 @@ static void rlb_rebalance(struct bonding *bond)
}
}
+ ntt_ip6 = 0;
+ hash_index = bond_info->rx6_hashtbl_used_head;
+ for (; hash_index != RLB_NULL_INDEX;
+ hash_index = client_info->used_next) {
+ client_info = &bond_info->rx6_hashtbl[hash_index];
+ assigned_slave = __rlb_next_rx_slave(bond);
+ if (assigned_slave && client_info->slave != assigned_slave) {
+ client_info->slave = assigned_slave;
+ if (!is_zero_ether_addr(client_info->mac_dst)) {
+ client_info->ntt = 1;
+ ntt_ip6 = 1;
+ }
+ }
+ }
+
/* update the team's flag only after the whole iteration */
if (ntt)
bond_info->rx_ntt = 1;
+
+ if (ntt_ip6)
+ bond_info->rx6_ntt = 1;
spin_unlock_bh(&bond->mode_lock);
}
@@ -846,6 +899,7 @@ static int rlb_initialize(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct rlb_client_info *new_hashtbl;
+ struct rlb_client_info *new6_hashtbl;
int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
int i;
@@ -853,19 +907,29 @@ static int rlb_initialize(struct bonding *bond)
if (!new_hashtbl)
return -1;
+ new6_hashtbl = kmalloc(size, GFP_KERNEL);
+ if (!new6_hashtbl) {
+ kfree(new_hashtbl);
+ return -1;
+ }
+
spin_lock_bh(&bond->mode_lock);
bond_info->rx_hashtbl = new_hashtbl;
+ bond_info->rx6_hashtbl = new6_hashtbl;
bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
+ bond_info->rx6_hashtbl_used_head = RLB_NULL_INDEX;
- for (i = 0; i < RLB_HASH_TABLE_SIZE; i++)
+ for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) {
rlb_init_table_entry(bond_info->rx_hashtbl + i);
+ rlb_init_table_entry(bond_info->rx6_hashtbl + i);
+ }
spin_unlock_bh(&bond->mode_lock);
/* register to receive ARPs */
- bond->recv_probe = rlb_arp_recv;
+ bond->recv_probe = rlb_recv;
return 0;
}
@@ -880,6 +944,10 @@ static void rlb_deinitialize(struct bonding *bond)
bond_info->rx_hashtbl = NULL;
bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
+ kfree(bond_info->rx6_hashtbl);
+ bond_info->rx6_hashtbl = NULL;
+ bond_info->rx6_hashtbl_used_head = RLB_NULL_INDEX;
+
spin_unlock_bh(&bond->mode_lock);
}
@@ -901,9 +969,397 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
curr_index = next_index;
}
+ curr_index = bond_info->rx6_hashtbl_used_head;
+ while (curr_index != RLB_NULL_INDEX) {
+ struct rlb_client_info *curr = &bond_info->rx6_hashtbl[curr_index];
+ u32 next_index = bond_info->rx6_hashtbl[curr_index].used_next;
+
+ if (curr->vlan_id == vlan_id)
+ rlb6_delete_table_entry(bond, curr_index);
+
+ curr_index = next_index;
+ }
+
+ spin_unlock_bh(&bond->mode_lock);
+}
+
+/*********************** ipv6 rlb specific functions ***************************/
+static void rlb6_update_client(struct rlb_client_info *client_info)
+{
+ struct nd_sendinfo sendinfo;
+ int i;
+
+ if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst))
+ return;
+
+ sendinfo.vlanid = client_info->vlan_id;
+ sendinfo.mac_dst = client_info->mac_dst;
+ sendinfo.mac_src = client_info->slave->dev->dev_addr;
+
+ for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {
+ ipv6_stub->ndisc_send_na(client_info->slave->dev,
+ &client_info->ip6_dst,
+ &client_info->ip6_src,
+ false, false, true, true,
+ &sendinfo);
+ }
+}
+
+static void rlb6_update_rx_clients(struct bonding *bond)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct rlb_client_info *client_info;
+ u32 hash_index;
+
+ spin_lock_bh(&bond->mode_lock);
+
+ hash_index = bond_info->rx6_hashtbl_used_head;
+ for (; hash_index != RLB_NULL_INDEX;
+ hash_index = client_info->used_next) {
+ client_info = &bond_info->rx6_hashtbl[hash_index];
+ if (client_info->ntt) {
+ rlb6_update_client(client_info);
+ if (bond_info->rlb6_update_retry_counter == 0)
+ client_info->ntt = 0;
+ }
+ }
+
+ bond_info->rlb6_update_delay_counter = RLB_UPDATE_DELAY;
+
+ spin_unlock_bh(&bond->mode_lock);
+}
+
+static void rlb6_delete_table_entry_dst(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next_index = bond_info->rx6_hashtbl[index].used_next;
+ u32 prev_index = bond_info->rx6_hashtbl[index].used_prev;
+
+ if (index == bond_info->rx6_hashtbl_used_head)
+ bond_info->rx6_hashtbl_used_head = next_index;
+
+ if (next_index != RLB_NULL_INDEX)
+ bond_info->rx6_hashtbl[next_index].used_prev = prev_index;
+
+ if (prev_index != RLB_NULL_INDEX)
+ bond_info->rx6_hashtbl[prev_index].used_next = next_index;
+}
+
+static void rlb6_src_link(struct bonding *bond, u32 ip_src_hash,
+ u32 ip_dst_hash)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next;
+
+ bond_info->rx6_hashtbl[ip_dst_hash].src_prev = ip_src_hash;
+ next = bond_info->rx6_hashtbl[ip_src_hash].src_first;
+ bond_info->rx6_hashtbl[ip_dst_hash].src_next = next;
+ if (next != RLB_NULL_INDEX)
+ bond_info->rx6_hashtbl[next].src_prev = ip_dst_hash;
+ bond_info->rx6_hashtbl[ip_src_hash].src_first = ip_dst_hash;
+}
+
+static void rlb6_src_unlink(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ u32 next_index = bond_info->rx6_hashtbl[index].src_next;
+ u32 prev_index = bond_info->rx6_hashtbl[index].src_prev;
+
+ bond_info->rx6_hashtbl[index].src_next = RLB_NULL_INDEX;
+ bond_info->rx6_hashtbl[index].src_prev = RLB_NULL_INDEX;
+
+ if (next_index != RLB_NULL_INDEX)
+ bond_info->rx6_hashtbl[next_index].src_prev = prev_index;
+
+ if (prev_index == RLB_NULL_INDEX)
+ return;
+
+ if (bond_info->rx6_hashtbl[prev_index].src_first == index)
+ bond_info->rx6_hashtbl[prev_index].src_first = next_index;
+ else
+ bond_info->rx6_hashtbl[prev_index].src_next = next_index;
+}
+
+static void rlb6_req_update_slave_clients(struct bonding *bond,
+ struct slave *slave)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct rlb_client_info *client_info;
+ u32 hash_index;
+ int ntt = 0;
+
+ spin_lock_bh(&bond->mode_lock);
+
+ hash_index = bond_info->rx6_hashtbl_used_head;
+ for (; hash_index != RLB_NULL_INDEX;
+ hash_index = client_info->used_next) {
+ client_info = &bond_info->rx6_hashtbl[hash_index];
+ if (client_info->slave == slave &&
+ is_valid_ether_addr(client_info->mac_dst)) {
+ client_info->ntt = 1;
+ ntt = 1;
+ }
+ }
+
+ if (ntt) {
+ bond_info->rx6_ntt = 1;
+ bond_info->rlb6_update_retry_counter =
+ RLB_UPDATE_RETRY;
+ }
+ spin_unlock_bh(&bond->mode_lock);
+}
+
+static struct slave *rlb6_nd_choose_channel(struct sk_buff *skb,
+ struct bonding *bond,
+ struct ipv6hdr *ip6hdr,
+ u8 type)
+{
+ struct nd_msg *msg;
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct slave *assigned_slave, *curr_active_slave;
+ struct rlb_client_info *client_info;
+ struct ethhdr *eth_data;
+ u8 *dst_ip;
+ u32 hash_index = 0;
+
+ spin_lock(&bond->mode_lock);
+
+ msg = (struct nd_msg *)skb_transport_header(skb);
+ eth_data = eth_hdr(skb);
+ curr_active_slave = rcu_dereference(bond->curr_active_slave);
+
+ if (type == NDISC_NEIGHBOUR_SOLICITATION)
+ dst_ip = (u8 *)msg->target.s6_addr;
+ else
+ dst_ip = (u8 *)ip6hdr->daddr.s6_addr;
+
+ hash_index = _simple_hash(dst_ip,
+ sizeof(struct in6_addr));
+ client_info = &bond_info->rx6_hashtbl[hash_index];
+
+ if (client_info->assigned) {
+ if (!memcmp(client_info->ip6_dst.s6_addr, dst_ip,
+ sizeof(struct in6_addr)) &&
+ !memcmp(client_info->ip6_src.s6_addr,
+ ip6hdr->saddr.s6_addr,
+ sizeof(ip6hdr->saddr.s6_addr))) {
+ ether_addr_copy(client_info->mac_src,
+ eth_data->h_source);
+
+ assigned_slave = client_info->slave;
+ if (assigned_slave) {
+ spin_unlock(&bond->mode_lock);
+ return assigned_slave;
+ }
+ } else {
+ if (curr_active_slave &&
+ curr_active_slave != client_info->slave) {
+ client_info->slave = curr_active_slave;
+ rlb6_update_client(client_info);
+ }
+ }
+ }
+
+ /* assign a new slave */
+ assigned_slave = __rlb_next_rx_slave(bond);
+
+ if (assigned_slave) {
+ if (!(client_info->assigned &&
+ !memcmp(client_info->ip6_src.s6_addr,
+ ip6hdr->saddr.s6_addr, sizeof(ip6hdr->saddr.s6_addr)))) {
+ u32 hash_src = _simple_hash((u8 *)ip6hdr->saddr.s6_addr,
+ sizeof(ip6hdr->saddr.s6_addr));
+
+ rlb6_src_unlink(bond, hash_index);
+ rlb6_src_link(bond, hash_src, hash_index);
+ }
+
+ memcpy(client_info->ip6_src.s6_addr, ip6hdr->saddr.s6_addr,
+ sizeof(ip6hdr->saddr.s6_addr));
+ memcpy(client_info->ip6_dst.s6_addr, dst_ip,
+ sizeof(struct in6_addr));
+
+ ether_addr_copy(client_info->mac_dst, eth_data->h_dest);
+ ether_addr_copy(client_info->mac_src, eth_data->h_source);
+
+ client_info->slave = assigned_slave;
+
+ if (is_valid_ether_addr(client_info->mac_dst)) {
+ client_info->ntt = 1;
+ bond->alb_info.rx6_ntt = 1;
+ } else {
+ client_info->ntt = 0;
+ }
+
+ if (vlan_get_tag(skb, &client_info->vlan_id))
+ client_info->vlan_id = 0;
+
+ if (!client_info->assigned) {
+ u32 prev_tbl_head = bond_info->rx6_hashtbl_used_head;
+
+ bond_info->rx6_hashtbl_used_head = hash_index;
+ client_info->used_next = prev_tbl_head;
+ if (prev_tbl_head != RLB_NULL_INDEX)
+ bond_info->rx6_hashtbl[prev_tbl_head].used_prev = hash_index;
+ client_info->assigned = 1;
+ }
+ }
+
+ spin_unlock(&bond->mode_lock);
+
+ return assigned_slave;
+}
+
+static struct slave *rlb_nd_xmit(struct sk_buff *skb, struct bonding *bond)
+{
+ struct slave *tx_slave = NULL;
+ struct ipv6hdr *ip6hdr;
+ struct icmp6hdr *hdr;
+ u8 *lladdr;
+
+ if (!pskb_network_may_pull(skb, sizeof(*ip6hdr)))
+ return NULL;
+
+ ip6hdr = ipv6_hdr(skb);
+ if (ip6hdr->nexthdr != IPPROTO_ICMPV6)
+ return NULL;
+
+ if (!pskb_network_may_pull(skb, sizeof(*ip6hdr) + sizeof(*hdr)))
+ return NULL;
+
+ hdr = icmp6_hdr(skb);
+
+ if (hdr->icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT &&
+ hdr->icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) {
+ return NULL;
+ }
+
+ lladdr = alb_get_lladdr(skb);
+ if (!lladdr)
+ return NULL;
+
+ if (!bond_slave_has_mac_rx(bond, lladdr)) {
+ tx_slave = rcu_dereference(bond->curr_active_slave);
+ return tx_slave;
+ }
+
+ tx_slave = rlb6_nd_choose_channel(skb, bond, ip6hdr, hdr->icmp6_type);
+ if (!tx_slave)
+ return NULL;
+
+ alb_set_nd_option(skb, bond, tx_slave);
+
+ return tx_slave;
+}
+
+static void rlb6_update_entry_from_na(struct bonding *bond,
+ struct ipv6hdr *ip6hdr, u8 *lladdr)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct rlb_client_info *client_info;
+ u32 hash_index;
+
+ spin_lock_bh(&bond->mode_lock);
+
+ hash_index = _simple_hash(ip6hdr->saddr.s6_addr,
+ sizeof(ip6hdr->saddr.s6_addr));
+ client_info = &bond_info->rx6_hashtbl[hash_index];
+
+ if (client_info->assigned &&
+ !memcmp(ip6hdr->saddr.s6_addr, client_info->ip6_dst.s6_addr,
+ sizeof(ip6hdr->saddr.s6_addr)) && !memcmp(ip6hdr->daddr.s6_addr,
+ client_info->ip6_src.s6_addr, sizeof(ip6hdr->daddr.s6_addr)) &&
+ !ether_addr_equal_64bits(client_info->mac_dst, lladdr)) {
+ memcpy(client_info->mac_dst, lladdr,
+ sizeof(client_info->mac_dst));
+ }
spin_unlock_bh(&bond->mode_lock);
}
+static void rlb6_delete_table_entry(struct bonding *bond, u32 index)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct rlb_client_info *entry = &bond_info->rx_hashtbl[index];
+
+ rlb6_delete_table_entry_dst(bond, index);
+ rlb_init_table_entry_dst(entry);
+ rlb6_src_unlink(bond, index);
+}
+
+static void rlb6_purge_src_ip(struct bonding *bond, struct ipv6hdr *ip6hdr,
+ u8 *lladdr)
+{
+ struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+ struct rlb_client_info *client_info;
+ u32 ip_src_hash = _simple_hash((u8 *)ip6hdr->saddr.s6_addr,
+ sizeof(ip6hdr->saddr.s6_addr));
+ u32 index, next_index;
+
+ spin_lock_bh(&bond->mode_lock);
+
+ index = bond_info->rx6_hashtbl[ip_src_hash].src_first;
+ while (index != RLB_NULL_INDEX) {
+ client_info = &bond_info->rx6_hashtbl[index];
+ next_index = client_info->src_next;
+
+ if (!memcmp(client_info->ip6_src.s6_addr,
+ ip6hdr->saddr.s6_addr,
+ sizeof(ip6hdr->saddr.s6_addr)) &&
+ !ether_addr_equal_64bits(lladdr,
+ client_info->mac_src))
+ rlb6_delete_table_entry(bond, index);
+ index = next_index;
+ }
+
+ spin_unlock_bh(&bond->mode_lock);
+}
+
+static int rlb_nd_recv(struct sk_buff *skb, struct bonding *bond)
+{
+ struct ipv6hdr *ip6hdr;
+ struct nd_msg *msg;
+ struct inet6_ifaddr *ifp;
+ u8 *lladdr;
+
+ if (!pskb_network_may_pull(skb, sizeof(*ip6hdr)))
+ return RX_HANDLER_ANOTHER;
+
+ ip6hdr = ipv6_hdr(skb);
+
+ ifp = ipv6_get_ifaddr(dev_net(skb->dev), &ip6hdr->saddr, NULL, 0);
+ if (ifp) {
+ in6_ifa_put(ifp);
+ return RX_HANDLER_ANOTHER;
+ }
+
+ if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr) +
+ sizeof(struct nd_msg)))
+ return RX_HANDLER_ANOTHER;
+
+ msg = (struct nd_msg *)skb_transport_header(skb);
+ lladdr = alb_get_lladdr(skb);
+ if (!lladdr)
+ return RX_HANDLER_ANOTHER;
+
+ rlb6_purge_src_ip(bond, ip6hdr, lladdr);
+
+ if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)
+ rlb6_update_entry_from_na(bond, ip6hdr, lladdr);
+
+ return RX_HANDLER_ANOTHER;
+}
+
+static int rlb_recv(struct sk_buff *skb, struct bonding *bond,
+ struct slave *slave)
+{
+ if (skb->protocol == cpu_to_be16(ETH_P_ARP))
+ return rlb_arp_recv(skb, bond, slave);
+ else if (alb_determine_ipv6_nd(skb, bond))
+ return rlb_nd_recv(skb, bond);
+
+ return RX_HANDLER_ANOTHER;
+}
+
/*********************** tlb/rlb shared functions *********************/
static void alb_send_lp_vid(struct slave *slave, const u8 mac_addr[],
@@ -1068,6 +1524,7 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
* has changed
*/
rlb_req_update_slave_clients(bond, slave1);
+ rlb6_req_update_slave_clients(bond, slave1);
}
} else {
disabled_slave = slave1;
@@ -1080,6 +1537,7 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
* has changed
*/
rlb_req_update_slave_clients(bond, slave2);
+ rlb6_req_update_slave_clients(bond, slave2);
}
} else {
disabled_slave = slave2;
@@ -1291,6 +1749,111 @@ static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond)
hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION;
}
+static bool alb_determine_ipv6_nd(struct sk_buff *skb, struct bonding *bond)
+{
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (skb_vlan_tag_present(skb))
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+ return alb_determine_nd(skb, bond);
+ }
+
+ return false;
+}
+
+static void alb_change_nd_option(struct sk_buff *skb, const void *data)
+{
+ struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+ struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
+ struct net_device *dev = skb->dev;
+ struct icmp6hdr *icmp6h = icmp6_hdr(skb);
+ struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
+ u8 *lladdr = NULL;
+ u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+ offsetof(struct nd_msg, opt));
+
+ while (ndoptlen) {
+ int l;
+
+ switch (nd_opt->nd_opt_type) {
+ case ND_OPT_SOURCE_LL_ADDR:
+ case ND_OPT_TARGET_LL_ADDR:
+ lladdr = ndisc_opt_addr_data(nd_opt, dev);
+ break;
+
+ default:
+ lladdr = NULL;
+ break;
+ }
+
+ l = nd_opt->nd_opt_len << 3;
+
+ if (ndoptlen < l || l == 0)
+ return;
+
+ if (lladdr) {
+ memcpy(lladdr, data, dev->addr_len);
+ icmp6h->icmp6_cksum = 0;
+
+ icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
+ &ip6hdr->daddr,
+ ntohs(ip6hdr->payload_len),
+ IPPROTO_ICMPV6,
+ csum_partial(icmp6h,
+ ntohs(ip6hdr->payload_len),
+ 0));
+ return;
+ }
+ ndoptlen -= l;
+ nd_opt = ((void *)nd_opt) + l;
+ }
+}
+
+static u8 *alb_get_lladdr(struct sk_buff *skb)
+{
+ struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+ struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
+ struct net_device *dev = skb->dev;
+ u8 *lladdr = NULL;
+ u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+ offsetof(struct nd_msg, opt));
+
+ while (ndoptlen) {
+ int l;
+
+ switch (nd_opt->nd_opt_type) {
+ case ND_OPT_SOURCE_LL_ADDR:
+ case ND_OPT_TARGET_LL_ADDR:
+ lladdr = ndisc_opt_addr_data(nd_opt, dev);
+ break;
+
+ default:
+ break;
+ }
+
+ l = nd_opt->nd_opt_len << 3;
+
+ if (ndoptlen < l || l == 0)
+ return NULL;
+
+ if (lladdr)
+ return lladdr;
+
+ ndoptlen -= l;
+ nd_opt = ((void *)nd_opt) + l;
+ }
+
+ return lladdr;
+}
+
+static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
+ struct slave *tx_slave)
+{
+ if (tx_slave != rcu_access_pointer(bond->curr_active_slave)) {
+ if (alb_determine_nd(skb, bond))
+ alb_change_nd_option(skb, tx_slave->dev->dev_addr);
+ }
+}
+
/************************ exported alb functions ************************/
int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
@@ -1457,12 +2020,17 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
break;
}
- if (alb_determine_nd(skb, bond)) {
+ tx_slave = rlb_nd_xmit(skb, bond);
+ if (tx_slave) {
+ do_tx_balance = false;
+ break;
+ }
+
+ if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) {
do_tx_balance = false;
break;
}
- /* The IPv6 header is pulled by alb_determine_nd */
/* Additionally, DAD probes should not be tx-balanced as that
* will lead to false positives for duplicate addresses and
* prevent address configuration from working.
@@ -1612,6 +2180,17 @@ void bond_alb_monitor(struct work_struct *work)
bond_info->rx_ntt = 0;
}
}
+ if (bond_info->rx6_ntt) {
+ if (bond_info->rlb6_update_delay_counter) {
+ --bond_info->rlb6_update_delay_counter;
+ } else {
+ rlb6_update_rx_clients(bond);
+ if (bond_info->rlb6_update_retry_counter)
+ --bond_info->rlb6_update_retry_counter;
+ else
+ bond_info->rx6_ntt = 0;
+ }
+ }
}
rcu_read_unlock();
re_arm:
@@ -1812,6 +2391,7 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
if (bond->alb_info.rlb_enabled) {
/* inform clients mac address has changed */
rlb_req_update_slave_clients(bond, curr_active);
+ rlb6_req_update_slave_clients(bond, curr_active);
}
}
diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
index 4f9b4a18c74c..90e88ff9b2bf 100644
--- a/drivers/net/bonding/bond_debugfs.c
+++ b/drivers/net/bonding/bond_debugfs.c
@@ -41,6 +41,20 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v)
client_info->slave->dev->name);
}
+ seq_puts(m, "SourceIP DestinationIP Destination MAC Src MAC DEV\n");
+
+ hash_index = bond_info->rx6_hashtbl_used_head;
+ for (; hash_index != RLB_NULL_INDEX;
+ hash_index = client_info->used_next) {
+ client_info = &bond_info->rx6_hashtbl[hash_index];
+ seq_printf(m, "%-40pI6 %-40pI6 %-17pM %-17pM %s\n",
+ &client_info->ip6_src,
+ &client_info->ip6_dst,
+ &client_info->mac_dst,
+ &client_info->mac_src,
+ client_info->slave->dev->name);
+ }
+
spin_unlock_bh(&bond->mode_lock);
return 0;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 15eddca7b4b6..b6252b181940 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1510,8 +1510,8 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
struct sk_buff *skb = *pskb;
struct slave *slave;
struct bonding *bond;
- int (*recv_probe)(const struct sk_buff *, struct bonding *,
- struct slave *);
+ int (*recv_probe)(struct sk_buff *skb, struct bonding *bond,
+ struct slave *slave);
int ret = RX_HANDLER_ANOTHER;
skb = skb_share_check(skb, GFP_ATOMIC);
@@ -3228,7 +3228,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
}
#endif
-int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond,
+int bond_rcv_validate(struct sk_buff *skb, struct bonding *bond,
struct slave *slave)
{
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index c89639381eca..70f4327dbd2a 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -347,7 +347,7 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci)
is_router /* router */,
true /* solicited */,
false /* override */,
- true /* inc_opt */);
+ true /* inc_opt */, NULL);
out:
dev_put(netdev);
}
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index 184105d68294..51886d9c928d 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -300,7 +300,7 @@ void bond_3ad_handle_link_change(struct slave *slave, char link);
int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info);
int __bond_3ad_get_active_agg_info(struct bonding *bond,
struct ad_info *ad_info);
-int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
+int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct bonding *bond,
struct slave *slave);
int bond_3ad_set_carrier(struct bonding *bond);
void bond_3ad_update_lacp_active(struct bonding *bond);
diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h
index 191c36afa1f4..b1a572eead31 100644
--- a/include/net/bond_alb.h
+++ b/include/net/bond_alb.h
@@ -94,6 +94,8 @@ struct tlb_client_info {
struct rlb_client_info {
__be32 ip_src; /* the server IP address */
__be32 ip_dst; /* the client IP address */
+ struct in6_addr ip6_src;
+ struct in6_addr ip6_dst;
u8 mac_src[ETH_ALEN]; /* the server MAC address */
u8 mac_dst[ETH_ALEN]; /* the client MAC address */
@@ -131,10 +133,13 @@ struct alb_bond_info {
/* -------- rlb parameters -------- */
int rlb_enabled;
struct rlb_client_info *rx_hashtbl; /* Receive hash table */
+ struct rlb_client_info *rx6_hashtbl; /* Receive hash table */
u32 rx_hashtbl_used_head;
+ u32 rx6_hashtbl_used_head;
u8 rx_ntt; /* flag - need to transmit
* to all rx clients
*/
+ u8 rx6_ntt;
struct slave *rx_slave;/* last slave to xmit from */
u8 primary_is_promisc; /* boolean */
u32 rlb_promisc_timeout_counter;/* counts primary
@@ -144,6 +149,8 @@ struct alb_bond_info {
u32 rlb_update_retry_counter;/* counter of retries
* of client update
*/
+ u32 rlb6_update_delay_counter;
+ u32 rlb6_update_retry_counter;
u8 rlb_rebalance; /* flag - indicates that the
* rx traffic should be
* rebalanced
diff --git a/include/net/bonding.h b/include/net/bonding.h
index b14f4c0b4e9e..552bce0168d1 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -220,8 +220,8 @@ struct bonding {
struct bond_up_slave __rcu *all_slaves;
bool force_primary;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
- int (*recv_probe)(const struct sk_buff *, struct bonding *,
- struct slave *);
+ int (*recv_probe)(struct sk_buff *skb, struct bonding *bond,
+ struct slave *slave);
/* mode_lock is used for mode-specific locking needs, currently used by:
* 3ad mode (4) - protect against running bond_3ad_unbind_slave() and
* bond_3ad_state_machine_handler() concurrently and also
@@ -639,7 +639,7 @@ struct bond_net {
struct class_attribute class_attr_bonding_masters;
};
-int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, struct slave *slave);
+int bond_rcv_validate(struct sk_buff *skb, struct bonding *bond, struct slave *slave);
netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
int bond_create(struct net *net, const char *name);
int bond_create_sysfs(struct bond_net *net);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 45e0339be6fa..2b64ea6590b6 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -56,7 +56,8 @@ struct ipv6_stub {
void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
- bool router, bool solicited, bool override, bool inc_opt);
+ bool router, bool solicited, bool override,
+ bool inc_opt, void *data);
#if IS_ENABLED(CONFIG_XFRM)
void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index da7eec8669ec..e71702a44a3d 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -107,6 +107,12 @@ struct nd_opt_hdr {
__u8 nd_opt_len;
} __packed;
+struct nd_sendinfo {
+ __u16 vlanid;
+ void *mac_dst;
+ const void *mac_src;
+};
+
/* ND options */
struct ndisc_options {
struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
@@ -460,7 +466,8 @@ void ndisc_send_rs(struct net_device *dev,
const struct in6_addr *saddr, const struct in6_addr *daddr);
void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
- bool router, bool solicited, bool override, bool inc_opt);
+ bool router, bool solicited, bool override, bool inc_opt,
+ void *data);
void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b22504176588..6825d70c34fb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -975,6 +975,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
kfree_rcu(ifp, rcu);
}
+EXPORT_SYMBOL(inet6_ifa_finish_destroy);
static void
ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
@@ -2037,6 +2038,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
return result;
}
+EXPORT_SYMBOL(ipv6_get_ifaddr);
/* Gets referenced address, destroys ifaddr */
@@ -4217,7 +4219,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr,
/*router=*/ !!ifp->idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
- /*inc_opt=*/ true);
+ /*inc_opt=*/ true, NULL);
}
if (send_rs) {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index fcb288b0ae13..47875aab86e5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -518,29 +518,37 @@ EXPORT_SYMBOL(ndisc_send_skb);
void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
- bool router, bool solicited, bool override, bool inc_opt)
+ bool router, bool solicited, bool override, bool inc_opt,
+ void *data)
{
struct sk_buff *skb;
struct in6_addr tmpaddr;
struct inet6_ifaddr *ifp;
const struct in6_addr *src_addr;
struct nd_msg *msg;
+ struct nd_sendinfo *sendinfo = data;
+ struct net *net = dev_net(dev);
+ struct sock *sk = net->ipv6.ndisc_sk;
int optlen = 0;
- /* for anycast or proxy, solicited_addr != src_addr */
- ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
- if (ifp) {
- src_addr = solicited_addr;
- if (ifp->flags & IFA_F_OPTIMISTIC)
- override = false;
- inc_opt |= ifp->idev->cnf.force_tllao;
- in6_ifa_put(ifp);
+ if (!sendinfo) {
+ /* for anycast or proxy, solicited_addr != src_addr */
+ ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
+ if (ifp) {
+ src_addr = solicited_addr;
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ override = false;
+ inc_opt |= ifp->idev->cnf.force_tllao;
+ in6_ifa_put(ifp);
+ } else {
+ if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
+ inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
+ &tmpaddr))
+ return;
+ src_addr = &tmpaddr;
+ }
} else {
- if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
- inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
- &tmpaddr))
- return;
- src_addr = &tmpaddr;
+ src_addr = solicited_addr;
}
if (!dev->addr_len)
@@ -568,8 +576,28 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
dev->dev_addr,
NDISC_NEIGHBOUR_ADVERTISEMENT);
+ if (!sendinfo) {
+ ndisc_send_skb(skb, daddr, src_addr);
+ } else {
+ if (sendinfo->vlanid)
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ sendinfo->vlanid);
+
+ msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, skb->len,
+ IPPROTO_ICMPV6,
+ csum_partial(&msg->icmph,
+ skb->len, 0));
- ndisc_send_skb(skb, daddr, src_addr);
+ ip6_nd_hdr(skb, src_addr, daddr, inet6_sk(sk)->hop_limit, skb->len);
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
+ if (dev_hard_header(skb, dev, ETH_P_IPV6, sendinfo->mac_dst,
+ sendinfo->mac_src, skb->len) < 0)
+ return;
+
+ dev_queue_xmit(skb);
+ }
}
static void ndisc_send_unsol_na(struct net_device *dev)
@@ -591,7 +619,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
/*router=*/ !!idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
- /*inc_opt=*/ true);
+ /*inc_opt=*/ true, NULL);
}
read_unlock_bh(&idev->lock);
@@ -932,7 +960,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
if (dad) {
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
- !!is_router, false, (ifp != NULL), true);
+ !!is_router, false, ifp, true, NULL);
goto out;
}
@@ -954,7 +982,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
if (neigh || !dev->header_ops) {
ndisc_send_na(dev, saddr, &msg->target, !!is_router,
- true, (ifp != NULL && inc), inc);
+ true, (ifp && inc), inc, NULL);
if (neigh)
neigh_release(neigh);
}
base-commit: c84d86a0295c24487db5b7db1a61d9c0eddfbb66
--
2.27.0
Thu, Mar 17, 2022 at 07:15:21AM CET, [email protected] wrote:
>This patch is implementing IPV6 RLB for balance-alb mode.
>
>Suggested-by: Hu Yadi <[email protected]>
>Signed-off-by: Sun Shouxin <[email protected]>
Could you please reply to my question I asked for v1:
Out of curiosity, what is exactly your usecase? I'm asking because
I don't see any good reason to use RLB/ALB modes. I have to be missing
something.
This is adding a lot of code in bonding that needs to be maintained.
However, if there is no particular need to add it, why would we?
Could you please spell out why exactly do you need this? I'm pretty sure
that in the end well find out, that you really don't need this at all.
Thanks!
On 3/17/22 12:15 AM, Sun Shouxin wrote:
> This patch is implementing IPV6 RLB for balance-alb mode.
>
> Suggested-by: Hu Yadi <[email protected]>
> Signed-off-by: Sun Shouxin <[email protected]>
> ---
> changelog:
> v1-->v2:
> -Remove ndisc_bond_send_na and refactor ndisc_send_na.
> -In rlb_nd_xmit, if the lladdr is not local, return curr_active_slave.
> -Don't send neighbor advertisement message when receiving
> neighbor advertisement message in rlb6_update_entry_from_na.
>
> v2-->v3:
> -Don't export ndisc_send_na.
> -Use ipv6_stub->ndisc_send_na to replace ndisc_send_na
> in rlb6_update_client.
>
> v3-->v4:
> -Submit all code at a whole patch.
you misunderstood Jakub's comment. The code should evolve with small,
focused patches and each patch needs to compile and function correctly
(ie., no breakage).
You need to respond to Jiri's question about why this feature is needed.
After that:
1. patch 1 adds void *data to ndisc_send_na stub function and
ndisc_send_na direct function. Update all places that use both
ndisc_send_na to pass NULL as the data parameter.
2. patch 2 refactors ndisc_send_na to handle the new data argument
3. patch 3 exports any IPv6 functions. explain why each needs to be
exported.
4. patch 4 .... bonding changes. (bonding folks can respond on how to
introduce that change).
David Ahern <[email protected]> wrote:
>On 3/17/22 12:15 AM, Sun Shouxin wrote:
>> This patch is implementing IPV6 RLB for balance-alb mode.
>>
>> Suggested-by: Hu Yadi <[email protected]>
>> Signed-off-by: Sun Shouxin <[email protected]>
>> ---
>> changelog:
>> v1-->v2:
>> -Remove ndisc_bond_send_na and refactor ndisc_send_na.
>> -In rlb_nd_xmit, if the lladdr is not local, return curr_active_slave.
>> -Don't send neighbor advertisement message when receiving
>> neighbor advertisement message in rlb6_update_entry_from_na.
>>
>> v2-->v3:
>> -Don't export ndisc_send_na.
>> -Use ipv6_stub->ndisc_send_na to replace ndisc_send_na
>> in rlb6_update_client.
>>
>> v3-->v4:
>> -Submit all code at a whole patch.
>
>you misunderstood Jakub's comment. The code should evolve with small,
>focused patches and each patch needs to compile and function correctly
>(ie., no breakage).
Agreed; the split of the patches was not at issue, it was that
each patch in a series must compile and the built kernel must function
rationally.
>You need to respond to Jiri's question about why this feature is needed.
I'm not entirely sold on adding IPv6 RLB for balance-alb, but
the IPv4 version of it does see moderate levels of use, even now. It's
less common than LACP by far, though. I'd like to know why someone
would choose IPv6 RLB over LACP. I wonder if this is a checklist item
somewhere that something must have "complete support for IPv6" or words
to that effect, versus an actual functional need.
>After that:
>
>1. patch 1 adds void *data to ndisc_send_na stub function and
>ndisc_send_na direct function. Update all places that use both
>ndisc_send_na to pass NULL as the data parameter.
>
>2. patch 2 refactors ndisc_send_na to handle the new data argument
>
>3. patch 3 exports any IPv6 functions. explain why each needs to be
>exported.
>
>4. patch 4 .... bonding changes. (bonding folks can respond on how to
>introduce that change).
Looking at the previous patch for bonding, my two initial
requests are:
1) A more detailed commit message. The only way to understand
how any of this actually works is reading the code, there is no higher
level description.
2) How does this interact with the IPv4 RLB logic? Is it
possible for a given bond interface MAC to be "assigned" to two
different peers (one IPv4, one IPv6), and if so, does that behave in an
expected manner? I.e., two peers on the network could receive
contradictory information via ARP and ND for the MAC address of a given
peer. This is already possible with the IPv4 RLB, but with an
additional IPv6 RLB, a single peer could see two different MACs for a
given host (one via IPv4, one via IPv6), and another peer could see the
opposite, or even disjoint information across several peers.
-J
---
-Jay Vosburgh, [email protected]
在 2022/3/18 4:10, Jay Vosburgh 写道:
> David Ahern <[email protected]> wrote:
>
>> On 3/17/22 12:15 AM, Sun Shouxin wrote:
>>> This patch is implementing IPV6 RLB for balance-alb mode.
>>>
>>> Suggested-by: Hu Yadi <[email protected]>
>>> Signed-off-by: Sun Shouxin <[email protected]>
>>> ---
>>> changelog:
>>> v1-->v2:
>>> -Remove ndisc_bond_send_na and refactor ndisc_send_na.
>>> -In rlb_nd_xmit, if the lladdr is not local, return curr_active_slave.
>>> -Don't send neighbor advertisement message when receiving
>>> neighbor advertisement message in rlb6_update_entry_from_na.
>>>
>>> v2-->v3:
>>> -Don't export ndisc_send_na.
>>> -Use ipv6_stub->ndisc_send_na to replace ndisc_send_na
>>> in rlb6_update_client.
>>>
>>> v3-->v4:
>>> -Submit all code at a whole patch.
>> you misunderstood Jakub's comment. The code should evolve with small,
>> focused patches and each patch needs to compile and function correctly
>> (ie., no breakage).
> Agreed; the split of the patches was not at issue, it was that
> each patch in a series must compile and the built kernel must function
> rationally.
>
>> You need to respond to Jiri's question about why this feature is needed.
> I'm not entirely sold on adding IPv6 RLB for balance-alb, but
> the IPv4 version of it does see moderate levels of use, even now. It's
> less common than LACP by far, though. I'd like to know why someone
> would choose IPv6 RLB over LACP. I wonder if this is a checklist item
> somewhere that something must have "complete support for IPv6" or words
> to that effect, versus an actual functional need.
This patch is certainly aim fix one real issue in ou lab.
For historical inheritance, the bond6 with ipv4 is widely used in our lab.
We started to support ipv6 for all service last year, networking
operation and maintenance team
think it does work with ipv6 ALB capacity take it for granted due to
bond6's specification
but it doesn't work in the end. as you know, it is impossible to change
link neworking to LACP
because of huge cost and effective to online server.
I believe this is the case another man meet as ipv6 promotion.
>> After that:
>>
>> 1. patch 1 adds void *data to ndisc_send_na stub function and
>> ndisc_send_na direct function. Update all places that use both
>> ndisc_send_na to pass NULL as the data parameter.
>>
>> 2. patch 2 refactors ndisc_send_na to handle the new data argument
>>
>> 3. patch 3 exports any IPv6 functions. explain why each needs to be
>> exported.
>>
>> 4. patch 4 .... bonding changes. (bonding folks can respond on how to
>> introduce that change).
> Looking at the previous patch for bonding, my two initial
> requests are:
>
> 1) A more detailed commit message. The only way to understand
> how any of this actually works is reading the code, there is no higher
> level description.
>
> 2) How does this interact with the IPv4 RLB logic? Is it
> possible for a given bond interface MAC to be "assigned" to two
> different peers (one IPv4, one IPv6), and if so, does that behave in an
> expected manner? I.e., two peers on the network could receive
> contradictory information via ARP and ND for the MAC address of a given
> peer. This is already possible with the IPv4 RLB, but with an
> additional IPv6 RLB, a single peer could see two different MACs for a
> given host (one via IPv4, one via IPv6), and another peer could see the
> opposite, or even disjoint information across several peers.
>
> -J
Sorry for not fully understood your question
If I understand correctly ,I don't think IPV6 ALB can interact with the
Ipv4 RLB logic.
Since they use different neighbor table when sending packets , what's more,
in the process of ALB, the rx6_hashtbl is used by IPV6 and rx_hashtbl
for IPV4.
please rectify me if miss your point.
>
> ---
> -Jay Vosburgh, [email protected]
在 2022/3/17 16:11, Jiri Pirko 写道:
> Thu, Mar 17, 2022 at 07:15:21AM CET, [email protected] wrote:
>> This patch is implementing IPV6 RLB for balance-alb mode.
>>
>> Suggested-by: Hu Yadi <[email protected]>
>> Signed-off-by: Sun Shouxin <[email protected]>
>
> Could you please reply to my question I asked for v1:
> Out of curiosity, what is exactly your usecase? I'm asking because
> I don't see any good reason to use RLB/ALB modes. I have to be missing
> something.
>
> This is adding a lot of code in bonding that needs to be maintained.
> However, if there is no particular need to add it, why would we?
>
> Could you please spell out why exactly do you need this? I'm pretty sure
> that in the end well find out, that you really don't need this at all.
>
> Thanks!
This patch is certainly aim fix one real issue in ou lab.
For historical inheritance, the bond6 with ipv4 is widely used in our lab.
We started to support ipv6 for all service last year, networking
operation and maintenance team
think it does work with ipv6 ALB capacity take it for granted due to
bond6's specification
but it doesn't work in the end. as you know, it is impossible to change
link neworking to LACP
because of huge cost and effective to online server.
在 2022/3/18 2:49, David Ahern 写道:
> On 3/17/22 12:15 AM, Sun Shouxin wrote:
>> This patch is implementing IPV6 RLB for balance-alb mode.
>>
>> Suggested-by: Hu Yadi <[email protected]>
>> Signed-off-by: Sun Shouxin <[email protected]>
>> ---
>> changelog:
>> v1-->v2:
>> -Remove ndisc_bond_send_na and refactor ndisc_send_na.
>> -In rlb_nd_xmit, if the lladdr is not local, return curr_active_slave.
>> -Don't send neighbor advertisement message when receiving
>> neighbor advertisement message in rlb6_update_entry_from_na.
>>
>> v2-->v3:
>> -Don't export ndisc_send_na.
>> -Use ipv6_stub->ndisc_send_na to replace ndisc_send_na
>> in rlb6_update_client.
>>
>> v3-->v4:
>> -Submit all code at a whole patch.
> you misunderstood Jakub's comment. The code should evolve with small,
> focused patches and each patch needs to compile and function correctly
> (ie., no breakage).
>
> You need to respond to Jiri's question about why this feature is needed.
> After that:
>
> 1. patch 1 adds void *data to ndisc_send_na stub function and
> ndisc_send_na direct function. Update all places that use both
> ndisc_send_na to pass NULL as the data parameter.
>
> 2. patch 2 refactors ndisc_send_na to handle the new data argument
>
> 3. patch 3 exports any IPv6 functions. explain why each needs to be
> exported.
>
> 4. patch 4 .... bonding changes. (bonding folks can respond on how to
> introduce that change).
Thanks your warmly instruction for newbee, I'll resend soon.
Thanks again.
Fri, Mar 18, 2022 at 10:49:02AM CET, [email protected] wrote:
>
>在 2022/3/17 16:11, Jiri Pirko 写道:
>> Thu, Mar 17, 2022 at 07:15:21AM CET, [email protected] wrote:
>> > This patch is implementing IPV6 RLB for balance-alb mode.
>> >
>> > Suggested-by: Hu Yadi <[email protected]>
>> > Signed-off-by: Sun Shouxin <[email protected]>
>>
>> Could you please reply to my question I asked for v1:
>> Out of curiosity, what is exactly your usecase? I'm asking because
>> I don't see any good reason to use RLB/ALB modes. I have to be missing
>> something.
>>
>> This is adding a lot of code in bonding that needs to be maintained.
>> However, if there is no particular need to add it, why would we?
>>
>> Could you please spell out why exactly do you need this? I'm pretty sure
>> that in the end well find out, that you really don't need this at all.
>>
>> Thanks!
>
>
>This patch is certainly aim fix one real issue in ou lab.
>For historical inheritance, the bond6 with ipv4 is widely used in our lab.
>We started to support ipv6 for all service last year, networking operation
>and maintenance team
>think it does work with ipv6 ALB capacity take it for granted due to bond6's
>specification
>but it doesn't work in the end. as you know, it is impossible to change link
>neworking to LACP
>because of huge cost and effective to online server.
I don't follow. Why exactly can't you use LACP? Every switch supports
it.
在 2022/3/18 19:34, Jiri Pirko 写道:
> Fri, Mar 18, 2022 at 10:49:02AM CET, [email protected] wrote:
>> 在 2022/3/17 16:11, Jiri Pirko 写道:
>>> Thu, Mar 17, 2022 at 07:15:21AM CET, [email protected] wrote:
>>>> This patch is implementing IPV6 RLB for balance-alb mode.
>>>>
>>>> Suggested-by: Hu Yadi <[email protected]>
>>>> Signed-off-by: Sun Shouxin <[email protected]>
>>> Could you please reply to my question I asked for v1:
>>> Out of curiosity, what is exactly your usecase? I'm asking because
>>> I don't see any good reason to use RLB/ALB modes. I have to be missing
>>> something.
>>>
>>> This is adding a lot of code in bonding that needs to be maintained.
>>> However, if there is no particular need to add it, why would we?
>>>
>>> Could you please spell out why exactly do you need this? I'm pretty sure
>>> that in the end well find out, that you really don't need this at all.
>>>
>>> Thanks!
>>
>> This patch is certainly aim fix one real issue in ou lab.
>> For historical inheritance, the bond6 with ipv4 is widely used in our lab.
>> We started to support ipv6 for all service last year, networking operation
>> and maintenance team
>> think it does work with ipv6 ALB capacity take it for granted due to bond6's
>> specification
>> but it doesn't work in the end. as you know, it is impossible to change link
>> neworking to LACP
>> because of huge cost and effective to online server.
> I don't follow. Why exactly can't you use LACP? Every switch supports
> it.
Hi jiri
Changing to Lacp means risk to our online service requring high available.
Also,we have multiple DCs installed bond6,it is huge cost to change it.
Mon, Mar 21, 2022 at 02:17:34AM CET, [email protected] wrote:
>
>在 2022/3/18 19:34, Jiri Pirko 写道:
>> Fri, Mar 18, 2022 at 10:49:02AM CET, [email protected] wrote:
>> > 在 2022/3/17 16:11, Jiri Pirko 写道:
>> > > Thu, Mar 17, 2022 at 07:15:21AM CET, [email protected] wrote:
>> > > > This patch is implementing IPV6 RLB for balance-alb mode.
>> > > >
>> > > > Suggested-by: Hu Yadi <[email protected]>
>> > > > Signed-off-by: Sun Shouxin <[email protected]>
>> > > Could you please reply to my question I asked for v1:
>> > > Out of curiosity, what is exactly your usecase? I'm asking because
>> > > I don't see any good reason to use RLB/ALB modes. I have to be missing
>> > > something.
>> > >
>> > > This is adding a lot of code in bonding that needs to be maintained.
>> > > However, if there is no particular need to add it, why would we?
>> > >
>> > > Could you please spell out why exactly do you need this? I'm pretty sure
>> > > that in the end well find out, that you really don't need this at all.
>> > >
>> > > Thanks!
>> >
>> > This patch is certainly aim fix one real issue in ou lab.
>> > For historical inheritance, the bond6 with ipv4 is widely used in our lab.
>> > We started to support ipv6 for all service last year, networking operation
>> > and maintenance team
>> > think it does work with ipv6 ALB capacity take it for granted due to bond6's
>> > specification
>> > but it doesn't work in the end. as you know, it is impossible to change link
>> > neworking to LACP
>> > because of huge cost and effective to online server.
>> I don't follow. Why exactly can't you use LACP? Every switch supports
>> it.
>
>
>Hi jiri
>
>
>Changing to Lacp means risk to our online service requring high available.
>
>Also,we have multiple DCs installed bond6,it is huge cost to change it.
So? This is 0 argument in this discussion. I believe that adding this
amount of code to bonding for use case that could be simply replaced by
LACP is wrong and we should not do that. The oridingal ALB/RLB
implementation was done when LACP was not that widely used. But now it
is 2022 - different story.