Add lag support for lan966x.
First 4 patches don't do any changes to the current behaviour, they
just prepare for lag support. While the rest is to add the lag support.
v1->v2:
- fix the LAG PGIDs when ports go down, in this way is not
needed anymore the last patch of the series.
Horatiu Vultur (7):
net: lan966x: Add reqisters used to configure lag interfaces
net: lan966x: Split lan966x_fdb_event_work
net: lan966x: Expose lan966x_switchdev_nb and
lan966x_switchdev_blocking_nb
net: lan966x: Extend lan966x_foreign_bridging_check
net: lan966x: Add lag support for lan966x.
net: lan966x: Extend FDB to support also lag
net: lan966x: Extend MAC to support also lag interfaces.
.../net/ethernet/microchip/lan966x/Makefile | 2 +-
.../ethernet/microchip/lan966x/lan966x_fdb.c | 153 ++++++---
.../ethernet/microchip/lan966x/lan966x_lag.c | 322 ++++++++++++++++++
.../ethernet/microchip/lan966x/lan966x_mac.c | 66 +++-
.../ethernet/microchip/lan966x/lan966x_main.h | 41 +++
.../ethernet/microchip/lan966x/lan966x_regs.h | 45 +++
.../microchip/lan966x/lan966x_switchdev.c | 115 +++++--
7 files changed, 654 insertions(+), 90 deletions(-)
create mode 100644 drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
--
2.33.0
Extend MAC support to support also lag interfaces:
1. In case an entry is learned on a port that is part of lag interface,
then notify the upper layers that the entry is learned on the bond
interface
2. If a port leaves the bond and the port is the first port in the lag
group, then it is required to update all MAC entries to change the
destination port.
Signed-off-by: Horatiu Vultur <[email protected]>
---
.../ethernet/microchip/lan966x/lan966x_lag.c | 14 ++++
.../ethernet/microchip/lan966x/lan966x_mac.c | 66 ++++++++++++++++---
.../ethernet/microchip/lan966x/lan966x_main.h | 5 ++
3 files changed, 77 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
index dd372a6497bb..6360fa3c5013 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
@@ -131,6 +131,20 @@ int lan966x_lag_port_join(struct lan966x_port *port,
void lan966x_lag_port_leave(struct lan966x_port *port, struct net_device *bond)
{
struct lan966x *lan966x = port->lan966x;
+ u32 bond_mask;
+ u32 lag_id;
+
+ if (lan966x_lag_first_port(port->bond, port->dev)) {
+ bond_mask = lan966x_lag_get_mask(lan966x, port->bond);
+ bond_mask &= ~BIT(port->chip_port);
+ if (bond_mask) {
+ lag_id = __ffs(bond_mask);
+ lan966x_mac_lag_replace_port_entry(lan966x, port,
+ lan966x->ports[lag_id]);
+ } else {
+ lan966x_mac_lag_remove_port_entry(lan966x, port);
+ }
+ }
port->bond = NULL;
lan966x_lag_set_port_ids(lan966x);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
index 005e56ea5da1..3348453d89df 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
@@ -22,6 +22,7 @@ struct lan966x_mac_entry {
u16 vid;
u16 port_index;
int row;
+ bool lag;
};
struct lan966x_mac_raw_entry {
@@ -156,8 +157,9 @@ void lan966x_mac_init(struct lan966x *lan966x)
INIT_LIST_HEAD(&lan966x->mac_entries);
}
-static struct lan966x_mac_entry *lan966x_mac_alloc_entry(const unsigned char *mac,
- u16 vid, u16 port_index)
+static struct lan966x_mac_entry *lan966x_mac_alloc_entry(struct lan966x_port *port,
+ const unsigned char *mac,
+ u16 vid)
{
struct lan966x_mac_entry *mac_entry;
@@ -167,8 +169,9 @@ static struct lan966x_mac_entry *lan966x_mac_alloc_entry(const unsigned char *ma
memcpy(mac_entry->mac, mac, ETH_ALEN);
mac_entry->vid = vid;
- mac_entry->port_index = port_index;
+ mac_entry->port_index = port->chip_port;
mac_entry->row = LAN966X_MAC_INVALID_ROW;
+ mac_entry->lag = port->bond ? true : false;
return mac_entry;
}
@@ -245,7 +248,7 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
return lan966x_mac_learn(lan966x, port->chip_port,
addr, vid, ENTRYTYPE_LOCKED);
- mac_entry = lan966x_mac_alloc_entry(addr, vid, port->chip_port);
+ mac_entry = lan966x_mac_alloc_entry(port, addr, vid);
if (!mac_entry)
return -ENOMEM;
@@ -254,7 +257,8 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port,
spin_unlock(&lan966x->mac_lock);
lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
- lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid, port->dev);
+ lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid,
+ port->bond ?: port->dev);
return 0;
}
@@ -281,6 +285,48 @@ int lan966x_mac_del_entry(struct lan966x *lan966x, const unsigned char *addr,
return 0;
}
+void lan966x_mac_lag_replace_port_entry(struct lan966x *lan966x,
+ struct lan966x_port *src,
+ struct lan966x_port *dst)
+{
+ struct lan966x_mac_entry *mac_entry;
+
+ spin_lock(&lan966x->mac_lock);
+ list_for_each_entry(mac_entry, &lan966x->mac_entries, list) {
+ if (mac_entry->port_index == src->chip_port &&
+ mac_entry->lag) {
+ lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
+ ENTRYTYPE_LOCKED);
+
+ lan966x_mac_learn(lan966x, dst->chip_port,
+ mac_entry->mac, mac_entry->vid,
+ ENTRYTYPE_LOCKED);
+ mac_entry->port_index = dst->chip_port;
+ }
+ }
+ spin_unlock(&lan966x->mac_lock);
+}
+
+void lan966x_mac_lag_remove_port_entry(struct lan966x *lan966x,
+ struct lan966x_port *src)
+{
+ struct lan966x_mac_entry *mac_entry, *tmp;
+
+ spin_lock(&lan966x->mac_lock);
+ list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries,
+ list) {
+ if (mac_entry->port_index == src->chip_port &&
+ mac_entry->lag) {
+ lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
+ ENTRYTYPE_LOCKED);
+
+ list_del(&mac_entry->list);
+ kfree(mac_entry);
+ }
+ }
+ spin_unlock(&lan966x->mac_lock);
+}
+
void lan966x_mac_purge_entries(struct lan966x *lan966x)
{
struct lan966x_mac_entry *mac_entry, *tmp;
@@ -325,6 +371,7 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
{
struct lan966x_mac_entry *mac_entry, *tmp;
unsigned char mac[ETH_ALEN] __aligned(2);
+ struct lan966x_port *port;
u32 dest_idx;
u32 column;
u16 vid;
@@ -366,9 +413,10 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
* anymore in the HW and remove the entry from the SW
* list
*/
+ port = lan966x->ports[mac_entry->port_index];
lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
mac_entry->mac, mac_entry->vid,
- lan966x->ports[mac_entry->port_index]->dev);
+ port->bond ?: port->dev);
list_del(&mac_entry->list);
kfree(mac_entry);
@@ -396,7 +444,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
if (WARN_ON(dest_idx >= lan966x->num_phys_ports))
continue;
- mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx);
+ port = lan966x->ports[dest_idx];
+ mac_entry = lan966x_mac_alloc_entry(port, mac, vid);
if (!mac_entry)
return;
@@ -407,7 +456,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
spin_unlock(&lan966x->mac_lock);
lan966x_mac_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
- mac, vid, lan966x->ports[dest_idx]->dev);
+ mac, vid,
+ port->bond ?: port->dev);
}
}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 1d7fe0e2a243..d361dd7c8eae 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -349,6 +349,11 @@ int lan966x_mac_add_entry(struct lan966x *lan966x,
struct lan966x_port *port,
const unsigned char *addr,
u16 vid);
+void lan966x_mac_lag_replace_port_entry(struct lan966x *lan966x,
+ struct lan966x_port *src,
+ struct lan966x_port *dst);
+void lan966x_mac_lag_remove_port_entry(struct lan966x *lan966x,
+ struct lan966x_port *src);
void lan966x_mac_purge_entries(struct lan966x *lan966x);
irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x);
--
2.33.0
Expose lan966x_switchdev_nb and lan966x_switchdev_blocking_nb to the
lan966x_main.h file because they will be needed by the lag driver.
Signed-off-by: Horatiu Vultur <[email protected]>
---
drivers/net/ethernet/microchip/lan966x/lan966x_main.h | 2 ++
drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c | 6 ++----
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 3b86ddddc756..4701c20c8393 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -296,6 +296,8 @@ struct lan966x_port {
extern const struct phylink_mac_ops lan966x_phylink_mac_ops;
extern const struct phylink_pcs_ops lan966x_phylink_pcs_ops;
extern const struct ethtool_ops lan966x_ethtool_ops;
+extern struct notifier_block lan966x_switchdev_nb __read_mostly;
+extern struct notifier_block lan966x_switchdev_blocking_nb __read_mostly;
bool lan966x_netdevice_check(const struct net_device *dev);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
index df2bee678559..300a5850e812 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
@@ -6,8 +6,6 @@
#include "lan966x_main.h"
static struct notifier_block lan966x_netdevice_nb __read_mostly;
-static struct notifier_block lan966x_switchdev_nb __read_mostly;
-static struct notifier_block lan966x_switchdev_blocking_nb __read_mostly;
static void lan966x_port_set_mcast_ip_flood(struct lan966x_port *port,
u32 pgid_ip)
@@ -571,11 +569,11 @@ static struct notifier_block lan966x_netdevice_nb __read_mostly = {
.notifier_call = lan966x_netdevice_event,
};
-static struct notifier_block lan966x_switchdev_nb __read_mostly = {
+struct notifier_block lan966x_switchdev_nb __read_mostly = {
.notifier_call = lan966x_switchdev_event,
};
-static struct notifier_block lan966x_switchdev_blocking_nb __read_mostly = {
+struct notifier_block lan966x_switchdev_blocking_nb __read_mostly = {
.notifier_call = lan966x_switchdev_blocking_event,
};
--
2.33.0
Add link aggregation hardware offload support for lan966x
Signed-off-by: Horatiu Vultur <[email protected]>
---
.../net/ethernet/microchip/lan966x/Makefile | 2 +-
.../ethernet/microchip/lan966x/lan966x_lag.c | 292 ++++++++++++++++++
.../ethernet/microchip/lan966x/lan966x_main.h | 27 ++
.../microchip/lan966x/lan966x_switchdev.c | 77 ++++-
4 files changed, 382 insertions(+), 16 deletions(-)
create mode 100644 drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile
index fd2e0ebb2427..0c22c86bdaa9 100644
--- a/drivers/net/ethernet/microchip/lan966x/Makefile
+++ b/drivers/net/ethernet/microchip/lan966x/Makefile
@@ -8,4 +8,4 @@ obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o
lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \
lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \
lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o \
- lan966x_ptp.o lan966x_fdma.o
+ lan966x_ptp.o lan966x_fdma.o lan966x_lag.o
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
new file mode 100644
index 000000000000..0da398e51aa3
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include "lan966x_main.h"
+
+static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x)
+{
+ u32 visited = GENMASK(lan966x->num_phys_ports - 1, 0);
+ int p, lag, i;
+
+ /* Reset destination and aggregation PGIDS */
+ for (p = 0; p < lan966x->num_phys_ports; ++p)
+ lan_wr(ANA_PGID_PGID_SET(BIT(p)),
+ lan966x, ANA_PGID(p));
+
+ for (p = PGID_AGGR; p < PGID_SRC; ++p)
+ lan_wr(ANA_PGID_PGID_SET(visited),
+ lan966x, ANA_PGID(p));
+
+ /* The visited ports bitmask holds the list of ports offloading any
+ * bonding interface. Initially we mark all these ports as unvisited,
+ * then every time we visit a port in this bitmask, we know that it is
+ * the lowest numbered port, i.e. the one whose logical ID == physical
+ * port ID == LAG ID. So we mark as visited all further ports in the
+ * bitmask that are offloading the same bonding interface. This way,
+ * we set up the aggregation PGIDs only once per bonding interface.
+ */
+ for (p = 0; p < lan966x->num_phys_ports; ++p) {
+ struct lan966x_port *port = lan966x->ports[p];
+
+ if (!port || !port->bond)
+ continue;
+
+ visited &= ~BIT(p);
+ }
+
+ /* Now, set PGIDs for each active LAG */
+ for (lag = 0; lag < lan966x->num_phys_ports; ++lag) {
+ struct net_device *bond = lan966x->ports[lag]->bond;
+ int num_active_ports = 0;
+ unsigned long bond_mask;
+ u8 aggr_idx[16];
+
+ if (!bond || (visited & BIT(lag)))
+ continue;
+
+ bond_mask = lan966x_lag_get_mask(lan966x, bond);
+
+ for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) {
+ struct lan966x_port *port = lan966x->ports[p];
+
+ lan_wr(ANA_PGID_PGID_SET(bond_mask),
+ lan966x, ANA_PGID(p));
+ if (port->lag_tx_active)
+ aggr_idx[num_active_ports++] = p;
+ }
+
+ for (i = PGID_AGGR; i < PGID_SRC; ++i) {
+ u32 ac;
+
+ ac = lan_rd(lan966x, ANA_PGID(i));
+ ac &= ~bond_mask;
+ /* Don't do division by zero if there was no active
+ * port. Just make all aggregation codes zero.
+ */
+ if (num_active_ports)
+ ac |= BIT(aggr_idx[i % num_active_ports]);
+ lan_wr(ANA_PGID_PGID_SET(ac),
+ lan966x, ANA_PGID(i));
+ }
+
+ /* Mark all ports in the same LAG as visited to avoid applying
+ * the same config again.
+ */
+ for (p = lag; p < lan966x->num_phys_ports; p++) {
+ struct lan966x_port *port = lan966x->ports[p];
+
+ if (!port)
+ continue;
+
+ if (port->bond == bond)
+ visited |= BIT(p);
+ }
+ }
+}
+
+static void lan966x_lag_set_port_ids(struct lan966x *lan966x)
+{
+ struct lan966x_port *port;
+ u32 bond_mask;
+ u32 lag_id;
+ int p;
+
+ for (p = 0; p < lan966x->num_phys_ports; ++p) {
+ port = lan966x->ports[p];
+ if (!port)
+ continue;
+
+ lag_id = port->chip_port;
+
+ bond_mask = lan966x_lag_get_mask(lan966x, port->bond);
+ if (bond_mask)
+ lag_id = __ffs(bond_mask);
+
+ lan_rmw(ANA_PORT_CFG_PORTID_VAL_SET(lag_id),
+ ANA_PORT_CFG_PORTID_VAL,
+ lan966x, ANA_PORT_CFG(port->chip_port));
+ }
+}
+
+int lan966x_lag_port_join(struct lan966x_port *port,
+ struct net_device *brport_dev,
+ struct net_device *bond,
+ struct netlink_ext_ack *extack)
+{
+ struct lan966x *lan966x = port->lan966x;
+ struct net_device *dev = port->dev;
+
+ port->bond = bond;
+ lan966x_lag_set_port_ids(lan966x);
+ lan966x_update_fwd_mask(lan966x);
+ lan966x_lag_set_aggr_pgids(lan966x);
+
+ switchdev_bridge_port_offload(brport_dev, dev, port,
+ &lan966x_switchdev_nb,
+ &lan966x_switchdev_blocking_nb,
+ false, extack);
+
+ return 0;
+}
+
+void lan966x_lag_port_leave(struct lan966x_port *port, struct net_device *bond)
+{
+ struct lan966x *lan966x = port->lan966x;
+
+ port->bond = NULL;
+ lan966x_lag_set_port_ids(lan966x);
+ lan966x_update_fwd_mask(lan966x);
+ lan966x_lag_set_aggr_pgids(lan966x);
+}
+
+int lan966x_lag_port_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct lan966x_port *port = netdev_priv(dev);
+ struct lan966x *lan966x = port->lan966x;
+ struct netdev_lag_upper_info *lui;
+ struct netlink_ext_ack *extack;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+ lui = info->upper_info;
+ if (!lui)
+ return NOTIFY_DONE;
+
+ if (lui->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "LAG device using unsupported Tx type");
+ return notifier_from_errno(-EINVAL);
+ }
+
+ switch (lui->hash_type) {
+ case NETDEV_LAG_HASH_L2:
+ lan_wr(ANA_AGGR_CFG_AC_DMAC_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_SMAC_ENA_SET(1),
+ lan966x, ANA_AGGR_CFG);
+ break;
+ case NETDEV_LAG_HASH_L34:
+ lan_wr(ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA_SET(1),
+ lan966x, ANA_AGGR_CFG);
+ break;
+ case NETDEV_LAG_HASH_L23:
+ lan_wr(ANA_AGGR_CFG_AC_DMAC_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_SMAC_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA_SET(1) |
+ ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA_SET(1),
+ lan966x, ANA_AGGR_CFG);
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack,
+ "LAG device using unsupported hash type");
+ return notifier_from_errno(-EINVAL);
+ }
+
+ return NOTIFY_OK;
+}
+
+int lan966x_lag_port_changelowerstate(struct net_device *dev,
+ struct netdev_notifier_changelowerstate_info *info)
+{
+ struct netdev_lag_lower_state_info *lag = info->lower_state_info;
+ struct lan966x_port *port = netdev_priv(dev);
+ struct lan966x *lan966x = port->lan966x;
+ bool is_active;
+
+ if (!port->bond)
+ return NOTIFY_DONE;
+
+ is_active = lag->link_up && lag->tx_enabled;
+ if (port->lag_tx_active == is_active)
+ return NOTIFY_DONE;
+
+ port->lag_tx_active = is_active;
+ lan966x_lag_set_aggr_pgids(lan966x);
+
+ return NOTIFY_OK;
+}
+
+int lan966x_lag_netdev_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct lan966x_port *port;
+ struct net_device *lower;
+ struct list_head *iter;
+ int err;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ if (!lan966x_netdevice_check(lower))
+ continue;
+
+ port = netdev_priv(lower);
+ if (port->bond != dev)
+ continue;
+
+ err = lan966x_port_prechangeupper(lower, dev, info);
+ if (err)
+ return err;
+ }
+
+ return NOTIFY_DONE;
+}
+
+int lan966x_lag_netdev_changeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct lan966x_port *port;
+ struct net_device *lower;
+ struct list_head *iter;
+ int err;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ if (!lan966x_netdevice_check(lower))
+ continue;
+
+ port = netdev_priv(lower);
+ if (port->bond != dev)
+ continue;
+
+ err = lan966x_port_changeupper(lower, dev, info);
+ if (err)
+ return err;
+ }
+
+ return NOTIFY_DONE;
+}
+
+bool lan966x_lag_first_port(struct net_device *lag, struct net_device *dev)
+{
+ struct lan966x_port *port = netdev_priv(dev);
+ struct lan966x *lan966x = port->lan966x;
+ unsigned long bond_mask;
+
+ if (port->bond != lag)
+ return false;
+
+ bond_mask = lan966x_lag_get_mask(lan966x, lag);
+ if (bond_mask && port->chip_port == __ffs(bond_mask))
+ return true;
+
+ return false;
+}
+
+u32 lan966x_lag_get_mask(struct lan966x *lan966x, struct net_device *bond)
+{
+ struct lan966x_port *port;
+ u32 mask = 0;
+ int p;
+
+ if (!bond)
+ return mask;
+
+ for (p = 0; p < lan966x->num_phys_ports; p++) {
+ port = lan966x->ports[p];
+ if (!port)
+ continue;
+
+ if (port->bond == bond)
+ mask |= BIT(p);
+ }
+
+ return mask;
+}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 4701c20c8393..23ddabea8d70 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -291,6 +291,9 @@ struct lan966x_port {
u8 ptp_cmd;
u16 ts_id;
struct sk_buff_head tx_skbs;
+
+ struct net_device *bond;
+ bool lag_tx_active;
};
extern const struct phylink_mac_ops lan966x_phylink_mac_ops;
@@ -407,6 +410,30 @@ int lan966x_fdma_init(struct lan966x *lan966x);
void lan966x_fdma_deinit(struct lan966x *lan966x);
irqreturn_t lan966x_fdma_irq_handler(int irq, void *args);
+int lan966x_lag_port_join(struct lan966x_port *port,
+ struct net_device *brport_dev,
+ struct net_device *bond,
+ struct netlink_ext_ack *extack);
+void lan966x_lag_port_leave(struct lan966x_port *port, struct net_device *bond);
+int lan966x_lag_port_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info);
+int lan966x_lag_port_changelowerstate(struct net_device *dev,
+ struct netdev_notifier_changelowerstate_info *info);
+int lan966x_lag_netdev_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info);
+int lan966x_lag_netdev_changeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info);
+bool lan966x_lag_first_port(struct net_device *lag, struct net_device *dev);
+u32 lan966x_lag_get_mask(struct lan966x *lan966x, struct net_device *bond);
+
+int lan966x_port_changeupper(struct net_device *dev,
+ struct net_device *brport_dev,
+ struct netdev_notifier_changeupper_info *info);
+int lan966x_port_prechangeupper(struct net_device *dev,
+ struct net_device *brport_dev,
+ struct netdev_notifier_changeupper_info *info);
+void lan966x_update_fwd_mask(struct lan966x *lan966x);
+
static inline void __iomem *lan_addr(void __iomem *base[],
int id, int tinst, int tcnt,
int gbase, int ginst,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
index 4bc626ce031a..84d54e528501 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
@@ -130,7 +130,7 @@ static int lan966x_port_pre_bridge_flags(struct lan966x_port *port,
return 0;
}
-static void lan966x_update_fwd_mask(struct lan966x *lan966x)
+void lan966x_update_fwd_mask(struct lan966x *lan966x)
{
int i;
@@ -138,9 +138,14 @@ static void lan966x_update_fwd_mask(struct lan966x *lan966x)
struct lan966x_port *port = lan966x->ports[i];
unsigned long mask = 0;
- if (port && lan966x->bridge_fwd_mask & BIT(i))
+ if (port && lan966x->bridge_fwd_mask & BIT(i)) {
mask = lan966x->bridge_fwd_mask & ~BIT(i);
+ if (port->bond)
+ mask &= ~lan966x_lag_get_mask(lan966x,
+ port->bond);
+ }
+
mask |= BIT(CPU_PORT);
lan_wr(ANA_PGID_PGID_SET(mask),
@@ -239,6 +244,7 @@ static int lan966x_port_attr_set(struct net_device *dev, const void *ctx,
}
static int lan966x_port_bridge_join(struct lan966x_port *port,
+ struct net_device *brport_dev,
struct net_device *bridge,
struct netlink_ext_ack *extack)
{
@@ -256,7 +262,7 @@ static int lan966x_port_bridge_join(struct lan966x_port *port,
}
}
- err = switchdev_bridge_port_offload(dev, dev, port,
+ err = switchdev_bridge_port_offload(brport_dev, dev, port,
&lan966x_switchdev_nb,
&lan966x_switchdev_blocking_nb,
false, extack);
@@ -293,8 +299,9 @@ static void lan966x_port_bridge_leave(struct lan966x_port *port,
lan966x_vlan_port_apply(port);
}
-static int lan966x_port_changeupper(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+int lan966x_port_changeupper(struct net_device *dev,
+ struct net_device *brport_dev,
+ struct netdev_notifier_changeupper_info *info)
{
struct lan966x_port *port = netdev_priv(dev);
struct netlink_ext_ack *extack;
@@ -304,25 +311,42 @@ static int lan966x_port_changeupper(struct net_device *dev,
if (netif_is_bridge_master(info->upper_dev)) {
if (info->linking)
- err = lan966x_port_bridge_join(port, info->upper_dev,
+ err = lan966x_port_bridge_join(port, brport_dev,
+ info->upper_dev,
extack);
else
lan966x_port_bridge_leave(port, info->upper_dev);
}
+ if (netif_is_lag_master(info->upper_dev)) {
+ if (info->linking)
+ err = lan966x_lag_port_join(port, info->upper_dev,
+ info->upper_dev,
+ extack);
+ else
+ lan966x_lag_port_leave(port, info->upper_dev);
+ }
+
return err;
}
-static int lan966x_port_prechangeupper(struct net_device *dev,
- struct netdev_notifier_changeupper_info *info)
+int lan966x_port_prechangeupper(struct net_device *dev,
+ struct net_device *brport_dev,
+ struct netdev_notifier_changeupper_info *info)
{
struct lan966x_port *port = netdev_priv(dev);
+ int err = NOTIFY_DONE;
if (netif_is_bridge_master(info->upper_dev) && !info->linking)
- switchdev_bridge_port_unoffload(port->dev, port,
- NULL, NULL);
+ switchdev_bridge_port_unoffload(brport_dev, port, NULL, NULL);
- return NOTIFY_DONE;
+ if (netif_is_lag_master(info->upper_dev) && info->linking)
+ err = lan966x_lag_port_prechangeupper(dev, info);
+
+ if (netif_is_lag_master(info->upper_dev) && !info->linking)
+ switchdev_bridge_port_unoffload(brport_dev, port, NULL, NULL);
+
+ return err;
}
static int lan966x_foreign_bridging_check(struct net_device *upper,
@@ -400,21 +424,44 @@ static int lan966x_netdevice_port_event(struct net_device *dev,
int err = 0;
if (!lan966x_netdevice_check(dev)) {
- if (event == NETDEV_CHANGEUPPER)
- return lan966x_bridge_check(dev, ptr);
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ case NETDEV_PRECHANGEUPPER:
+ err = lan966x_bridge_check(dev, ptr);
+ if (err)
+ return err;
+
+ if (netif_is_lag_master(dev)) {
+ if (event == NETDEV_CHANGEUPPER)
+ err = lan966x_lag_netdev_changeupper(dev,
+ ptr);
+ else
+ err = lan966x_lag_netdev_prechangeupper(dev,
+ ptr);
+
+ return err;
+ }
+ break;
+ default:
+ return 0;
+ }
+
return 0;
}
switch (event) {
case NETDEV_PRECHANGEUPPER:
- err = lan966x_port_prechangeupper(dev, ptr);
+ err = lan966x_port_prechangeupper(dev, dev, ptr);
break;
case NETDEV_CHANGEUPPER:
err = lan966x_bridge_check(dev, ptr);
if (err)
return err;
- err = lan966x_port_changeupper(dev, ptr);
+ err = lan966x_port_changeupper(dev, dev, ptr);
+ break;
+ case NETDEV_CHANGELOWERSTATE:
+ err = lan966x_lag_port_changelowerstate(dev, ptr);
break;
}
--
2.33.0
Offload FDB entries when the original device is a lag interface. Because
all the ports under the lag have the same chip id, which is the chip id
of first port, then add the entries only for the first port.
Signed-off-by: Horatiu Vultur <[email protected]>
---
.../ethernet/microchip/lan966x/lan966x_fdb.c | 31 +++++++++++++++++++
.../ethernet/microchip/lan966x/lan966x_lag.c | 16 ++++++++++
.../ethernet/microchip/lan966x/lan966x_main.h | 7 +++++
3 files changed, 54 insertions(+)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
index 2e186e9d9893..154efbd0c319 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
@@ -194,6 +194,35 @@ static void lan966x_fdb_bridge_event_work(struct lan966x_fdb_event_work *fdb_wor
}
}
+static void lan966x_fdb_lag_event_work(struct lan966x_fdb_event_work *fdb_work)
+{
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct lan966x_port *port;
+ struct lan966x *lan966x;
+
+ if (!lan966x_lag_first_port(fdb_work->orig_dev, fdb_work->dev))
+ return;
+
+ lan966x = fdb_work->lan966x;
+ port = netdev_priv(fdb_work->dev);
+ fdb_info = &fdb_work->fdb_info;
+
+ switch (fdb_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ if (!fdb_info->added_by_user)
+ break;
+ lan966x_lag_mac_add_entry(lan966x, port, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ if (!fdb_info->added_by_user)
+ break;
+ lan966x_lag_mac_del_entry(lan966x, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ }
+}
+
static void lan966x_fdb_event_work(struct work_struct *work)
{
struct lan966x_fdb_event_work *fdb_work =
@@ -203,6 +232,8 @@ static void lan966x_fdb_event_work(struct work_struct *work)
lan966x_fdb_port_event_work(fdb_work);
else if (netif_is_bridge_master(fdb_work->orig_dev))
lan966x_fdb_bridge_event_work(fdb_work);
+ else if (netif_is_lag_master(fdb_work->orig_dev))
+ lan966x_fdb_lag_event_work(fdb_work);
kfree(fdb_work->fdb_info.addr);
dev_put(fdb_work->orig_dev);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
index 0da398e51aa3..dd372a6497bb 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
@@ -290,3 +290,19 @@ u32 lan966x_lag_get_mask(struct lan966x *lan966x, struct net_device *bond)
return mask;
}
+
+void lan966x_lag_mac_del_entry(struct lan966x *lan966x,
+ const unsigned char *addr,
+ u16 vid)
+{
+ lan966x_mac_del_entry(lan966x, addr, vid);
+}
+
+int lan966x_lag_mac_add_entry(struct lan966x *lan966x,
+ struct lan966x_port *port,
+ const unsigned char *addr,
+ u16 vid)
+{
+ lan966x_mac_del_entry(lan966x, addr, vid);
+ return lan966x_mac_add_entry(lan966x, port, addr, vid);
+}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 23ddabea8d70..1d7fe0e2a243 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -425,6 +425,13 @@ int lan966x_lag_netdev_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info);
bool lan966x_lag_first_port(struct net_device *lag, struct net_device *dev);
u32 lan966x_lag_get_mask(struct lan966x *lan966x, struct net_device *bond);
+void lan966x_lag_mac_del_entry(struct lan966x *lan966x,
+ const unsigned char *addr,
+ u16 vid);
+int lan966x_lag_mac_add_entry(struct lan966x *lan966x,
+ struct lan966x_port *port,
+ const unsigned char *addr,
+ u16 vid);
int lan966x_port_changeupper(struct net_device *dev,
struct net_device *brport_dev,
--
2.33.0
Split the function lan966x_fdb_event_work. One case for when the
orig_dev is a bridge and one case when orig_dev is lan966x port.
This is preparation for lag support. There is no functional change.
Signed-off-by: Horatiu Vultur <[email protected]>
---
.../ethernet/microchip/lan966x/lan966x_fdb.c | 124 ++++++++++--------
1 file changed, 69 insertions(+), 55 deletions(-)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
index da5ca7188679..2e186e9d9893 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c
@@ -8,6 +8,7 @@ struct lan966x_fdb_event_work {
struct work_struct work;
struct switchdev_notifier_fdb_info fdb_info;
struct net_device *dev;
+ struct net_device *orig_dev;
struct lan966x *lan966x;
unsigned long event;
};
@@ -127,75 +128,86 @@ void lan966x_fdb_deinit(struct lan966x *lan966x)
lan966x_fdb_purge_entries(lan966x);
}
-static void lan966x_fdb_event_work(struct work_struct *work)
+static void lan966x_fdb_port_event_work(struct lan966x_fdb_event_work *fdb_work)
{
- struct lan966x_fdb_event_work *fdb_work =
- container_of(work, struct lan966x_fdb_event_work, work);
struct switchdev_notifier_fdb_info *fdb_info;
- struct net_device *dev = fdb_work->dev;
struct lan966x_port *port;
struct lan966x *lan966x;
- int ret;
- fdb_info = &fdb_work->fdb_info;
lan966x = fdb_work->lan966x;
+ port = netdev_priv(fdb_work->orig_dev);
+ fdb_info = &fdb_work->fdb_info;
- if (lan966x_netdevice_check(dev)) {
- port = netdev_priv(dev);
-
- switch (fdb_work->event) {
- case SWITCHDEV_FDB_ADD_TO_DEVICE:
- if (!fdb_info->added_by_user)
- break;
- lan966x_mac_add_entry(lan966x, port, fdb_info->addr,
- fdb_info->vid);
+ switch (fdb_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ if (!fdb_info->added_by_user)
break;
- case SWITCHDEV_FDB_DEL_TO_DEVICE:
- if (!fdb_info->added_by_user)
- break;
- lan966x_mac_del_entry(lan966x, fdb_info->addr,
- fdb_info->vid);
+ lan966x_mac_add_entry(lan966x, port, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ if (!fdb_info->added_by_user)
break;
- }
- } else {
- if (!netif_is_bridge_master(dev))
- goto out;
-
- /* In case the bridge is called */
- switch (fdb_work->event) {
- case SWITCHDEV_FDB_ADD_TO_DEVICE:
- /* If there is no front port in this vlan, there is no
- * point to copy the frame to CPU because it would be
- * just dropped at later point. So add it only if
- * there is a port but it is required to store the fdb
- * entry for later point when a port actually gets in
- * the vlan.
- */
- lan966x_fdb_add_entry(lan966x, fdb_info);
- if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
- fdb_info->vid))
- break;
-
- lan966x_mac_cpu_learn(lan966x, fdb_info->addr,
- fdb_info->vid);
+ lan966x_mac_del_entry(lan966x, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ }
+}
+
+static void lan966x_fdb_bridge_event_work(struct lan966x_fdb_event_work *fdb_work)
+{
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct lan966x *lan966x;
+ int ret;
+
+ lan966x = fdb_work->lan966x;
+ fdb_info = &fdb_work->fdb_info;
+
+ /* In case the bridge is called */
+ switch (fdb_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ /* If there is no front port in this vlan, there is no
+ * point to copy the frame to CPU because it would be
+ * just dropped at later point. So add it only if
+ * there is a port but it is required to store the fdb
+ * entry for later point when a port actually gets in
+ * the vlan.
+ */
+ lan966x_fdb_add_entry(lan966x, fdb_info);
+ if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
+ fdb_info->vid))
break;
- case SWITCHDEV_FDB_DEL_TO_DEVICE:
- ret = lan966x_fdb_del_entry(lan966x, fdb_info);
- if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
- fdb_info->vid))
- break;
-
- if (ret)
- lan966x_mac_cpu_forget(lan966x, fdb_info->addr,
- fdb_info->vid);
+
+ lan966x_mac_cpu_learn(lan966x, fdb_info->addr,
+ fdb_info->vid);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ ret = lan966x_fdb_del_entry(lan966x, fdb_info);
+ if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x,
+ fdb_info->vid))
break;
- }
+
+ if (ret)
+ lan966x_mac_cpu_forget(lan966x, fdb_info->addr,
+ fdb_info->vid);
+ break;
}
+}
+
+static void lan966x_fdb_event_work(struct work_struct *work)
+{
+ struct lan966x_fdb_event_work *fdb_work =
+ container_of(work, struct lan966x_fdb_event_work, work);
+
+ if (lan966x_netdevice_check(fdb_work->orig_dev))
+ lan966x_fdb_port_event_work(fdb_work);
+ else if (netif_is_bridge_master(fdb_work->orig_dev))
+ lan966x_fdb_bridge_event_work(fdb_work);
-out:
kfree(fdb_work->fdb_info.addr);
+ dev_put(fdb_work->orig_dev);
+ dev_put(fdb_work->dev);
kfree(fdb_work);
- dev_put(dev);
}
int lan966x_handle_fdb(struct net_device *dev,
@@ -221,7 +233,8 @@ int lan966x_handle_fdb(struct net_device *dev,
if (!fdb_work)
return -ENOMEM;
- fdb_work->dev = orig_dev;
+ fdb_work->dev = dev;
+ fdb_work->orig_dev = orig_dev;
fdb_work->lan966x = lan966x;
fdb_work->event = event;
INIT_WORK(&fdb_work->work, lan966x_fdb_event_work);
@@ -232,6 +245,7 @@ int lan966x_handle_fdb(struct net_device *dev,
ether_addr_copy((u8 *)fdb_work->fdb_info.addr, fdb_info->addr);
dev_hold(orig_dev);
+ dev_hold(dev);
queue_work(lan966x->fdb_work, &fdb_work->work);
break;
--
2.33.0
Extend lan966x_foreign_bridging_check to check also if the upper
interface is a lag device. Don't allow a lan966x port to be part of a
lag if it has foreign interfaces.
Signed-off-by: Horatiu Vultur <[email protected]>
---
.../microchip/lan966x/lan966x_switchdev.c | 32 ++++++++++++++-----
1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
index 300a5850e812..4bc626ce031a 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c
@@ -325,23 +325,25 @@ static int lan966x_port_prechangeupper(struct net_device *dev,
return NOTIFY_DONE;
}
-static int lan966x_foreign_bridging_check(struct net_device *bridge,
+static int lan966x_foreign_bridging_check(struct net_device *upper,
+ bool *has_foreign,
+ bool *seen_lan966x,
struct netlink_ext_ack *extack)
{
struct lan966x *lan966x = NULL;
- bool has_foreign = false;
struct net_device *dev;
struct list_head *iter;
- if (!netif_is_bridge_master(bridge))
+ if (!netif_is_bridge_master(upper) &&
+ !netif_is_lag_master(upper))
return 0;
- netdev_for_each_lower_dev(bridge, dev, iter) {
+ netdev_for_each_lower_dev(upper, dev, iter) {
if (lan966x_netdevice_check(dev)) {
struct lan966x_port *port = netdev_priv(dev);
if (lan966x) {
- /* Bridge already has at least one port of a
+ /* Upper already has at least one port of a
* lan966x switch inside it, check that it's
* the same instance of the driver.
*/
@@ -352,15 +354,24 @@ static int lan966x_foreign_bridging_check(struct net_device *bridge,
}
} else {
/* This is the first lan966x port inside this
- * bridge
+ * upper device
*/
lan966x = port->lan966x;
+ *seen_lan966x = true;
}
+ } else if (netif_is_lag_master(dev)) {
+ /* Allow to have bond interfaces that have only lan966x
+ * devices
+ */
+ if (lan966x_foreign_bridging_check(dev, has_foreign,
+ seen_lan966x,
+ extack))
+ *has_foreign = true;
} else {
- has_foreign = true;
+ *has_foreign = true;
}
- if (lan966x && has_foreign) {
+ if (*seen_lan966x && *has_foreign) {
NL_SET_ERR_MSG_MOD(extack,
"Bridging lan966x ports with foreign interfaces disallowed");
return -EINVAL;
@@ -373,7 +384,12 @@ static int lan966x_foreign_bridging_check(struct net_device *bridge,
static int lan966x_bridge_check(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
+ bool has_foreign = false;
+ bool seen_lan966x = false;
+
return lan966x_foreign_bridging_check(info->upper_dev,
+ &has_foreign,
+ &seen_lan966x,
info->info.extack);
}
--
2.33.0
Hi Horatiu,
On Mon, Jun 27, 2022 at 10:13:23PM +0200, Horatiu Vultur wrote:
> Add lag support for lan966x.
> First 4 patches don't do any changes to the current behaviour, they
> just prepare for lag support. While the rest is to add the lag support.
>
> v1->v2:
> - fix the LAG PGIDs when ports go down, in this way is not
> needed anymore the last patch of the series.
>
> Horatiu Vultur (7):
> net: lan966x: Add reqisters used to configure lag interfaces
> net: lan966x: Split lan966x_fdb_event_work
> net: lan966x: Expose lan966x_switchdev_nb and
> lan966x_switchdev_blocking_nb
> net: lan966x: Extend lan966x_foreign_bridging_check
> net: lan966x: Add lag support for lan966x.
> net: lan966x: Extend FDB to support also lag
> net: lan966x: Extend MAC to support also lag interfaces.
>
> .../net/ethernet/microchip/lan966x/Makefile | 2 +-
> .../ethernet/microchip/lan966x/lan966x_fdb.c | 153 ++++++---
> .../ethernet/microchip/lan966x/lan966x_lag.c | 322 ++++++++++++++++++
> .../ethernet/microchip/lan966x/lan966x_mac.c | 66 +++-
> .../ethernet/microchip/lan966x/lan966x_main.h | 41 +++
> .../ethernet/microchip/lan966x/lan966x_regs.h | 45 +++
> .../microchip/lan966x/lan966x_switchdev.c | 115 +++++--
> 7 files changed, 654 insertions(+), 90 deletions(-)
> create mode 100644 drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
>
> --
> 2.33.0
>
I've downloaded and applied your patches and I have some general feedback.
Some of it relates to changes which were not made and hence I couldn't
have commented on the patches themselves, so I'm posting it here.
1. switchdev_bridge_port_offload() returns an error code if object
replay failed, or if it couldn't get the port parent id, or if the user
tries to join a lan966x port and a port belonging to another switchdev
driver to the same LAG. It would be good to propagate this error and not
ignore it.
Side note: maybe this could help to eliminate the extra logic you need
to add to lan966x_foreign_bridging_check().
2. lan966x_foreign_dev_check() seems wrong/misunderstood. Currently it
reports that a LAG upper is a foreign interface (unoffloaded). In turn,
this makes switchdev_lower_dev_find() not find any lan966x interface
beneath a LAG, and hence, __switchdev_handle_fdb_event_to_device() would
not recurse to the lan966x "dev" below a LAG when the "orig_dev" of an
FDB event is the bridge itself. Otherwise said, if you have no direct
lan966x port under a bridge, but just bridge -> LAG -> lan966x, you will
miss all local (host-filtered) FDB event notifications that you should
otherwise learn towards the CPU.
3. The implementation of lan966x_lag_mac_add_entry(), with that first
call to lan966x_mac_del_entry(), seems a hack. Why do you need to do
that?
4. The handling of lan966x->mac_lock seems wrong in general, not just
particular to this patch set. In particular, it appears to protect too
little in lan966x_mac_add_entry(), i.e. just the list_add_tail.
This makes it possible for lan966x_mac_lookup and lan966x_mac_learn to
be concurrent with lan966x_mac_del_entry(). In turn, this appears bad
first and foremost for the hardware access interface, since the MAC
table access is indirect, and if you allow multiple threads to
concurrently call lan966x_mac_select(), change the command in
ANA_MACACCESS, and poll for command completion, things will go sideways
very quickly (one command will inadvertently poll for the completion of
another, which inadvertently operates on the row/column selected by yet
a third command, all that due to improper serialization).
5. There is a race between lan966x_fdb_lag_event_work() calling
lan966x_lag_first_port(), and lan966x_lag_port_leave() changing
port->bond = NULL. Specifically, when a lan966x port leaves a LAG, there
might still be deferred FDB events (add or del) which are still pending.
There exists a dead time during which you will ignore these, because you
think that the first lan966x LAG port isn't the first lan966x LAG port,
which will lead to a desynchronization between the bridge FDB and the
hardware FDB.
In DSA we solved this by flushing lan966x->fdb_work inside
lan966x_port_prechangeupper() on leave. This waits for the pending
events to finish, and the bridge will not emit further events.
It's important to do this in prechangeupper() rather than in
changeupper() because switchdev_handle_fdb_event_to_device() needs the
upper/lower relationship to still exist to function properly, and in
changeupper() it has already been destroyed.
Side note: if you flush lan966x->fdb_work, then you have an upper bound
for how long can lan966x_fdb_event_work be deferred. Specifically, you
can remove the dev_hold() and dev_put() calls, since it surely can't be
deferred until after the netdev is unregistered. The bounding event is
much quicker - the lan966x port leaves the LAG.
6. You are missing LAG FDB migration logic in lan966x_lag_port_join().
Specifically, you assume that the lan966x_lag_first_port() will never
change, probably because you just make the switch ports join the LAG in
the order 1, 2, 3. But they can also join in the order 3, 2, 1.
The 06/29/2022 12:26, Vladimir Oltean wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>
> Hi Horatiu,
Hi Vladimir,
Thanks for review this and for detail explanation.
>
> On Mon, Jun 27, 2022 at 10:13:23PM +0200, Horatiu Vultur wrote:
> > Add lag support for lan966x.
> > First 4 patches don't do any changes to the current behaviour, they
> > just prepare for lag support. While the rest is to add the lag support.
> >
> > v1->v2:
> > - fix the LAG PGIDs when ports go down, in this way is not
> > needed anymore the last patch of the series.
> >
> > Horatiu Vultur (7):
> > net: lan966x: Add reqisters used to configure lag interfaces
> > net: lan966x: Split lan966x_fdb_event_work
> > net: lan966x: Expose lan966x_switchdev_nb and
> > lan966x_switchdev_blocking_nb
> > net: lan966x: Extend lan966x_foreign_bridging_check
> > net: lan966x: Add lag support for lan966x.
> > net: lan966x: Extend FDB to support also lag
> > net: lan966x: Extend MAC to support also lag interfaces.
> >
> > .../net/ethernet/microchip/lan966x/Makefile | 2 +-
> > .../ethernet/microchip/lan966x/lan966x_fdb.c | 153 ++++++---
> > .../ethernet/microchip/lan966x/lan966x_lag.c | 322 ++++++++++++++++++
> > .../ethernet/microchip/lan966x/lan966x_mac.c | 66 +++-
> > .../ethernet/microchip/lan966x/lan966x_main.h | 41 +++
> > .../ethernet/microchip/lan966x/lan966x_regs.h | 45 +++
> > .../microchip/lan966x/lan966x_switchdev.c | 115 +++++--
> > 7 files changed, 654 insertions(+), 90 deletions(-)
> > create mode 100644 drivers/net/ethernet/microchip/lan966x/lan966x_lag.c
> >
> > --
> > 2.33.0
> >
>
> I've downloaded and applied your patches and I have some general feedback.
> Some of it relates to changes which were not made and hence I couldn't
> have commented on the patches themselves, so I'm posting it here.
>
> 1. switchdev_bridge_port_offload() returns an error code if object
> replay failed, or if it couldn't get the port parent id, or if the user
> tries to join a lan966x port and a port belonging to another switchdev
> driver to the same LAG. It would be good to propagate this error and not
> ignore it.
Yes, I will do that.
What about the case when the other port is not a switchdev port. For
example:
ip link set dev eth0 master bond0
ip link set dev dummy master bond0
ip link set dev bond0 master br0
At the last line, I was expecting to get an error.
>
> Side note: maybe this could help to eliminate the extra logic you need
> to add to lan966x_foreign_bridging_check().
>
> 2. lan966x_foreign_dev_check() seems wrong/misunderstood. Currently it
> reports that a LAG upper is a foreign interface (unoffloaded). In turn,
> this makes switchdev_lower_dev_find() not find any lan966x interface
> beneath a LAG, and hence, __switchdev_handle_fdb_event_to_device() would
> not recurse to the lan966x "dev" below a LAG when the "orig_dev" of an
> FDB event is the bridge itself. Otherwise said, if you have no direct
> lan966x port under a bridge, but just bridge -> LAG -> lan966x, you will
> miss all local (host-filtered) FDB event notifications that you should
> otherwise learn towards the CPU.
Good observation. I have missed that case.
>
> 3. The implementation of lan966x_lag_mac_add_entry(), with that first
> call to lan966x_mac_del_entry(), seems a hack. Why do you need to do
> that?
Ah... that is not needed anymore. I forgot to remove it.
>
> 4. The handling of lan966x->mac_lock seems wrong in general, not just
> particular to this patch set. In particular, it appears to protect too
> little in lan966x_mac_add_entry(), i.e. just the list_add_tail.
> This makes it possible for lan966x_mac_lookup and lan966x_mac_learn to
> be concurrent with lan966x_mac_del_entry(). In turn, this appears bad
> first and foremost for the hardware access interface, since the MAC
> table access is indirect, and if you allow multiple threads to
> concurrently call lan966x_mac_select(), change the command in
> ANA_MACACCESS, and poll for command completion, things will go sideways
> very quickly (one command will inadvertently poll for the completion of
> another, which inadvertently operates on the row/column selected by yet
> a third command, all that due to improper serialization).
Now that you mention it, I can see it. The following functions need to
be updated: lan966x_mac_learn, lan966x_mac_ip_learn, lan966x_mac_forget,
lan966x_mac_add_entry, lan966x_mac_del_entry.
But I think this needs to be in a separate patch for net.
>
> 5. There is a race between lan966x_fdb_lag_event_work() calling
> lan966x_lag_first_port(), and lan966x_lag_port_leave() changing
> port->bond = NULL. Specifically, when a lan966x port leaves a LAG, there
> might still be deferred FDB events (add or del) which are still pending.
> There exists a dead time during which you will ignore these, because you
> think that the first lan966x LAG port isn't the first lan966x LAG port,
> which will lead to a desynchronization between the bridge FDB and the
> hardware FDB.
>
> In DSA we solved this by flushing lan966x->fdb_work inside
> lan966x_port_prechangeupper() on leave. This waits for the pending
> events to finish, and the bridge will not emit further events.
> It's important to do this in prechangeupper() rather than in
> changeupper() because switchdev_handle_fdb_event_to_device() needs the
> upper/lower relationship to still exist to function properly, and in
> changeupper() it has already been destroyed.
>
> Side note: if you flush lan966x->fdb_work, then you have an upper bound
> for how long can lan966x_fdb_event_work be deferred. Specifically, you
> can remove the dev_hold() and dev_put() calls, since it surely can't be
> deferred until after the netdev is unregistered. The bounding event is
> much quicker - the lan966x port leaves the LAG.
Thanks for the observation, it would have been taken a long time to see
this.
>
> 6. You are missing LAG FDB migration logic in lan966x_lag_port_join().
> Specifically, you assume that the lan966x_lag_first_port() will never
> change, probably because you just make the switch ports join the LAG in
> the order 1, 2, 3. But they can also join in the order 3, 2, 1.
It would work, but there will be problems when the ports start to leave
the LAG.
It would work because all the ports under the LAG will have the same
value in PGID_ID for DST_IDX. So if the MAC entry points to any of
this entries will be OK. The problem is when the port leaves the LAG, if
the MAC entry points to the port that left the LAG then is not working
anymore.
I will fix this in the next series.
--
/Horatiu
On Thu, Jun 30, 2022 at 09:08:46PM +0200, Horatiu Vultur wrote:
> > I've downloaded and applied your patches and I have some general feedback.
> > Some of it relates to changes which were not made and hence I couldn't
> > have commented on the patches themselves, so I'm posting it here.
> >
> > 1. switchdev_bridge_port_offload() returns an error code if object
> > replay failed, or if it couldn't get the port parent id, or if the user
> > tries to join a lan966x port and a port belonging to another switchdev
> > driver to the same LAG. It would be good to propagate this error and not
> > ignore it.
>
> Yes, I will do that.
>
> What about the case when the other port is not a switchdev port. For
> example:
> ip link set dev eth0 master bond0
> ip link set dev dummy master bond0
> ip link set dev bond0 master br0
>
> At the last line, I was expecting to get an error.
switchdev_bridge_port_offload() currently only detects mismatched port
parent IDs, so it will not detect this condition where one bond slave is
switchdev and the other isn't. This is because the non-switchdev bond
slave does not even call switchdev_bridge_port_offload(), it's completely
silent from the perspective of the bridge.
Fact is that we don't have a common layer that enforces all common sense
netdev adjacency restrictions with switchdev, and that is one of the big
problems of the system.
> > 6. You are missing LAG FDB migration logic in lan966x_lag_port_join().
> > Specifically, you assume that the lan966x_lag_first_port() will never
> > change, probably because you just make the switch ports join the LAG in
> > the order 1, 2, 3. But they can also join in the order 3, 2, 1.
>
> It would work, but there will be problems when the ports start to leave
> the LAG.
> It would work because all the ports under the LAG will have the same
> value in PGID_ID for DST_IDX. So if the MAC entry points to any of
> this entries will be OK.
OK, I forgot DEST_IDX selects PGID and not logical port ID directly.
> The problem is when the port leaves the LAG, if the MAC entry points
> to the port that left the LAG then is not working anymore.
> I will fix this in the next series.