This series adds support for new firmware version 2.5 (pull request
was recently send to linux-firmware):
https://lore.kernel.org/linux-firmware/[email protected]/T/#u
Add support of LAG offloading and AC3X 98DX3265 device.
Serhiy Boiko (1):
net: marvell: prestera: add LAG support
Vadym Kochan (6):
net: marvell: prestera: bump supported firmware version to 2.5
net: marvell: prestera: disable events interrupt while handling
net: marvell: prestera: add support for AC3X 98DX3265 device
net: marvell: prestera: move netdev topology validation to
prestera_main
net: marvell: prestera: align flood setting according to latest
firmware version
net: marvell: prestera: fix port event handling on init
.../net/ethernet/marvell/prestera/prestera.h | 30 +-
.../ethernet/marvell/prestera/prestera_hw.c | 217 +++++++++++++-
.../ethernet/marvell/prestera/prestera_hw.h | 17 +-
.../ethernet/marvell/prestera/prestera_main.c | 275 +++++++++++++++++-
.../ethernet/marvell/prestera/prestera_pci.c | 22 +-
.../marvell/prestera/prestera_switchdev.c | 175 +++++++----
.../marvell/prestera/prestera_switchdev.h | 4 +-
7 files changed, 657 insertions(+), 83 deletions(-)
--
2.17.1
New firmware version has some ABI and feature changes like:
- LAG support
- initial L3 support
- changed events handling logic
Signed-off-by: Vadym Kochan <[email protected]>
---
drivers/net/ethernet/marvell/prestera/prestera_pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index be5677623455..b8a87d249647 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -14,7 +14,7 @@
#define PRESTERA_MSG_MAX_SIZE 1500
#define PRESTERA_SUPP_FW_MAJ_VER 2
-#define PRESTERA_SUPP_FW_MIN_VER 0
+#define PRESTERA_SUPP_FW_MIN_VER 5
#define PRESTERA_FW_PATH_FMT "mrvl/prestera/mvsw_prestera_fw-v%u.%u.img"
--
2.17.1
There are change in firmware which requires that receiver will
disable event interrupts before handling them and enable them
after finish with handling. Events still may come into the queue
but without receiver interruption.
Signed-off-by: Vadym Kochan <[email protected]>
---
.../ethernet/marvell/prestera/prestera_pci.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index b8a87d249647..f7b27ef02624 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved */
+#include <linux/bitfield.h>
#include <linux/circ_buf.h>
#include <linux/device.h>
#include <linux/firmware.h>
@@ -144,6 +145,11 @@ struct prestera_fw_regs {
/* PRESTERA_CMD_RCV_CTL_REG flags */
#define PRESTERA_CMD_F_REPL_SENT BIT(0)
+#define PRESTERA_FW_EVT_CTL_STATUS_MASK GENMASK(1, 0)
+
+#define PRESTERA_FW_EVT_CTL_STATUS_ON 0
+#define PRESTERA_FW_EVT_CTL_STATUS_OFF 1
+
#define PRESTERA_EVTQ_REG_OFFSET(q, f) \
(PRESTERA_FW_REG_OFFSET(evtq_list) + \
(q) * sizeof(struct prestera_fw_evtq_regs) + \
@@ -260,6 +266,15 @@ static u8 prestera_fw_evtq_pick(struct prestera_fw *fw)
return PRESTERA_EVT_QNUM_MAX;
}
+static void prestera_fw_evt_ctl_status_set(struct prestera_fw *fw, u32 val)
+{
+ u32 status = prestera_fw_read(fw, PRESTERA_FW_STATUS_REG);
+
+ u32p_replace_bits(&status, val, PRESTERA_FW_EVT_CTL_STATUS_MASK);
+
+ prestera_fw_write(fw, PRESTERA_FW_STATUS_REG, status);
+}
+
static void prestera_fw_evt_work_fn(struct work_struct *work)
{
struct prestera_fw *fw;
@@ -269,6 +284,8 @@ static void prestera_fw_evt_work_fn(struct work_struct *work)
fw = container_of(work, struct prestera_fw, evt_work);
msg = fw->evt_msg;
+ prestera_fw_evt_ctl_status_set(fw, PRESTERA_FW_EVT_CTL_STATUS_OFF);
+
while ((qid = prestera_fw_evtq_pick(fw)) < PRESTERA_EVT_QNUM_MAX) {
u32 idx;
u32 len;
@@ -288,6 +305,8 @@ static void prestera_fw_evt_work_fn(struct work_struct *work)
if (fw->dev.recv_msg)
fw->dev.recv_msg(&fw->dev, msg, len);
}
+
+ prestera_fw_evt_ctl_status_set(fw, PRESTERA_FW_EVT_CTL_STATUS_ON);
}
static int prestera_fw_wait_reg32(struct prestera_fw *fw, u32 reg, u32 cmp,
--
2.17.1
Move handling of PRECHANGEUPPER event from prestera_switchdev to
prestera_main which is responsible for basic netdev events handling
and routing them to related module.
Signed-off-by: Vadym Kochan <[email protected]>
---
.../ethernet/marvell/prestera/prestera_main.c | 29 +++++++++++++++++--
.../marvell/prestera/prestera_switchdev.c | 20 -------------
2 files changed, 26 insertions(+), 23 deletions(-)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 25dd903a3e92..53c7628a3938 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -510,13 +510,36 @@ struct prestera_port *prestera_port_dev_lower_find(struct net_device *dev)
static int prestera_netdev_port_event(struct net_device *dev,
unsigned long event, void *ptr)
{
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct netlink_ext_ack *extack;
+ struct net_device *upper;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+ upper = info->upper_dev;
+
switch (event) {
case NETDEV_PRECHANGEUPPER:
+ if (!netif_is_bridge_master(upper)) {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+ return -EINVAL;
+ }
+
+ if (!info->linking)
+ break;
+
+ if (netdev_has_any_upper_dev(upper)) {
+ NL_SET_ERR_MSG_MOD(extack, "Upper device is already enslaved");
+ return -EINVAL;
+ }
+ break;
+
case NETDEV_CHANGEUPPER:
- return prestera_bridge_port_event(dev, event, ptr);
- default:
- return 0;
+ if (netif_is_bridge_master(upper))
+ return prestera_bridge_port_event(dev, event, ptr);
+ break;
}
+
+ return 0;
}
static int prestera_netdev_event_handler(struct notifier_block *nb,
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 8c2b03151736..7736d5f498c9 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -537,35 +537,15 @@ int prestera_bridge_port_event(struct net_device *dev, unsigned long event,
void *ptr)
{
struct netdev_notifier_changeupper_info *info = ptr;
- struct netlink_ext_ack *extack;
struct prestera_port *port;
struct net_device *upper;
int err;
- extack = netdev_notifier_info_to_extack(&info->info);
port = netdev_priv(dev);
upper = info->upper_dev;
switch (event) {
- case NETDEV_PRECHANGEUPPER:
- if (!netif_is_bridge_master(upper)) {
- NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
- return -EINVAL;
- }
-
- if (!info->linking)
- break;
-
- if (netdev_has_any_upper_dev(upper)) {
- NL_SET_ERR_MSG_MOD(extack, "Upper device is already enslaved");
- return -EINVAL;
- }
- break;
-
case NETDEV_CHANGEUPPER:
- if (!netif_is_bridge_master(upper))
- break;
-
if (info->linking) {
err = prestera_port_bridge_join(port, upper);
if (err)
--
2.17.1
From: Serhiy Boiko <[email protected]>
The following features are supported:
- LAG basic operations
- create/delete LAG
- add/remove a member to LAG
- enable/disable member in LAG
- LAG Bridge support
- LAG VLAN support
- LAG FDB support
Limitations:
- Only HASH lag tx type is supported
- The Hash parameters are not configurable. They are applied
during the LAG creation stage.
- Enslaving a port to the LAG device that already has an
upper device is not supported.
Co-developed-by: Andrii Savka <[email protected]>
Signed-off-by: Andrii Savka <[email protected]>
Signed-off-by: Serhiy Boiko <[email protected]>
Signed-off-by: Vadym Kochan <[email protected]>
---
.../net/ethernet/marvell/prestera/prestera.h | 30 ++-
.../ethernet/marvell/prestera/prestera_hw.c | 180 ++++++++++++-
.../ethernet/marvell/prestera/prestera_hw.h | 14 +
.../ethernet/marvell/prestera/prestera_main.c | 247 +++++++++++++++++-
.../marvell/prestera/prestera_switchdev.c | 109 ++++++--
.../marvell/prestera/prestera_switchdev.h | 4 +-
6 files changed, 538 insertions(+), 46 deletions(-)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h
index 55aa4bf8a27c..ad0f33a7e517 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera.h
@@ -60,10 +60,19 @@ struct prestera_port_caps {
u8 transceiver;
};
+struct prestera_lag {
+ struct net_device *dev;
+ struct list_head members;
+ u16 member_count;
+ u16 lag_id;
+};
+
struct prestera_port {
struct net_device *dev;
struct prestera_switch *sw;
struct devlink_port dl_port;
+ struct list_head lag_member;
+ struct prestera_lag *lag;
u32 id;
u32 hw_id;
u32 dev_id;
@@ -127,6 +136,12 @@ struct prestera_port_event {
} data;
};
+enum prestera_fdb_entry_type {
+ PRESTERA_FDB_ENTRY_TYPE_REG_PORT,
+ PRESTERA_FDB_ENTRY_TYPE_LAG,
+ PRESTERA_FDB_ENTRY_TYPE_MAX
+};
+
enum prestera_fdb_event_id {
PRESTERA_FDB_EVENT_UNSPEC,
PRESTERA_FDB_EVENT_LEARNED,
@@ -134,7 +149,11 @@ enum prestera_fdb_event_id {
};
struct prestera_fdb_event {
- u32 port_id;
+ enum prestera_fdb_entry_type type;
+ union {
+ u32 port_id;
+ u16 lag_id;
+ } dest;
u32 vid;
union {
u8 mac[ETH_ALEN];
@@ -165,6 +184,9 @@ struct prestera_switch {
u32 mtu_min;
u32 mtu_max;
u8 id;
+ struct prestera_lag *lags;
+ u8 lag_member_max;
+ u8 lag_max;
};
struct prestera_rxtx_params {
@@ -203,4 +225,10 @@ int prestera_port_pvid_set(struct prestera_port *port, u16 vid);
bool prestera_netdev_check(const struct net_device *dev);
+bool prestera_port_is_lag_member(const struct prestera_port *port);
+
+struct prestera_lag *prestera_lag_by_id(struct prestera_switch *sw, u16 id);
+
+u16 prestera_port_lag_id(const struct prestera_port *port);
+
#endif /* _PRESTERA_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index 0424718d5998..8afb45f66862 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -39,6 +39,11 @@ enum prestera_cmd_type_t {
PRESTERA_CMD_TYPE_RXTX_INIT = 0x800,
PRESTERA_CMD_TYPE_RXTX_PORT_INIT = 0x801,
+ PRESTERA_CMD_TYPE_LAG_MEMBER_ADD = 0x900,
+ PRESTERA_CMD_TYPE_LAG_MEMBER_DELETE = 0x901,
+ PRESTERA_CMD_TYPE_LAG_MEMBER_ENABLE = 0x902,
+ PRESTERA_CMD_TYPE_LAG_MEMBER_DISABLE = 0x903,
+
PRESTERA_CMD_TYPE_STP_PORT_SET = 0x1000,
PRESTERA_CMD_TYPE_ACK = 0x10000,
@@ -127,6 +132,12 @@ enum {
PRESTERA_FC_SYMM_ASYMM,
};
+enum {
+ PRESTERA_HW_FDB_ENTRY_TYPE_REG_PORT = 0,
+ PRESTERA_HW_FDB_ENTRY_TYPE_LAG = 1,
+ PRESTERA_HW_FDB_ENTRY_TYPE_MAX = 2,
+};
+
struct prestera_fw_event_handler {
struct list_head list;
struct rcu_head rcu;
@@ -168,6 +179,8 @@ struct prestera_msg_switch_init_resp {
u32 port_count;
u32 mtu_max;
u8 switch_id;
+ u8 lag_max;
+ u8 lag_member_max;
};
struct prestera_msg_port_autoneg_param {
@@ -249,8 +262,13 @@ struct prestera_msg_vlan_req {
struct prestera_msg_fdb_req {
struct prestera_msg_cmd cmd;
u8 dest_type;
- u32 port;
- u32 dev;
+ union {
+ struct {
+ u32 port;
+ u32 dev;
+ };
+ u16 lag_id;
+ } dest;
u8 mac[ETH_ALEN];
u16 vid;
u8 dynamic;
@@ -293,6 +311,13 @@ struct prestera_msg_rxtx_port_req {
u32 dev;
};
+struct prestera_msg_lag_req {
+ struct prestera_msg_cmd cmd;
+ u32 port;
+ u32 dev;
+ u16 lag_id;
+};
+
struct prestera_msg_event {
u16 type;
u16 id;
@@ -315,7 +340,10 @@ union prestera_msg_event_fdb_param {
struct prestera_msg_event_fdb {
struct prestera_msg_event id;
u8 dest_type;
- u32 port_id;
+ union {
+ u32 port_id;
+ u16 lag_id;
+ } dest;
u32 vid;
union prestera_msg_event_fdb_param param;
};
@@ -386,7 +414,19 @@ static int prestera_fw_parse_fdb_evt(void *msg, struct prestera_event *evt)
{
struct prestera_msg_event_fdb *hw_evt = msg;
- evt->fdb_evt.port_id = hw_evt->port_id;
+ switch (hw_evt->dest_type) {
+ case PRESTERA_HW_FDB_ENTRY_TYPE_REG_PORT:
+ evt->fdb_evt.type = PRESTERA_FDB_ENTRY_TYPE_REG_PORT;
+ evt->fdb_evt.dest.port_id = hw_evt->dest.port_id;
+ break;
+ case PRESTERA_HW_FDB_ENTRY_TYPE_LAG:
+ evt->fdb_evt.type = PRESTERA_FDB_ENTRY_TYPE_LAG;
+ evt->fdb_evt.dest.lag_id = hw_evt->dest.lag_id;
+ break;
+ default:
+ return -EINVAL;
+ }
+
evt->fdb_evt.vid = hw_evt->vid;
ether_addr_copy(evt->fdb_evt.data.mac, hw_evt->param.mac);
@@ -531,6 +571,8 @@ int prestera_hw_switch_init(struct prestera_switch *sw)
sw->mtu_min = PRESTERA_MIN_MTU;
sw->mtu_max = resp.mtu_max;
sw->id = resp.switch_id;
+ sw->lag_member_max = resp.lag_member_max;
+ sw->lag_max = resp.lag_max;
return 0;
}
@@ -1067,8 +1109,10 @@ int prestera_hw_fdb_add(struct prestera_port *port, const unsigned char *mac,
u16 vid, bool dynamic)
{
struct prestera_msg_fdb_req req = {
- .port = port->hw_id,
- .dev = port->dev_id,
+ .dest = {
+ .dev = port->dev_id,
+ .port = port->hw_id,
+ },
.vid = vid,
.dynamic = dynamic,
};
@@ -1083,8 +1127,10 @@ int prestera_hw_fdb_del(struct prestera_port *port, const unsigned char *mac,
u16 vid)
{
struct prestera_msg_fdb_req req = {
- .port = port->hw_id,
- .dev = port->dev_id,
+ .dest = {
+ .dev = port->dev_id,
+ .port = port->hw_id,
+ },
.vid = vid,
};
@@ -1094,11 +1140,48 @@ int prestera_hw_fdb_del(struct prestera_port *port, const unsigned char *mac,
&req.cmd, sizeof(req));
}
+int prestera_hw_lag_fdb_add(struct prestera_switch *sw, u16 lag_id,
+ const unsigned char *mac, u16 vid, bool dynamic)
+{
+ struct prestera_msg_fdb_req req = {
+ .dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
+ .dest = {
+ .lag_id = lag_id,
+ },
+ .vid = vid,
+ .dynamic = dynamic,
+ };
+
+ ether_addr_copy(req.mac, mac);
+
+ return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_ADD,
+ &req.cmd, sizeof(req));
+}
+
+int prestera_hw_lag_fdb_del(struct prestera_switch *sw, u16 lag_id,
+ const unsigned char *mac, u16 vid)
+{
+ struct prestera_msg_fdb_req req = {
+ .dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
+ .dest = {
+ .lag_id = lag_id,
+ },
+ .vid = vid,
+ };
+
+ ether_addr_copy(req.mac, mac);
+
+ return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_DELETE,
+ &req.cmd, sizeof(req));
+}
+
int prestera_hw_fdb_flush_port(struct prestera_port *port, u32 mode)
{
struct prestera_msg_fdb_req req = {
- .port = port->hw_id,
- .dev = port->dev_id,
+ .dest = {
+ .dev = port->dev_id,
+ .port = port->hw_id,
+ },
.flush_mode = mode,
};
@@ -1121,8 +1204,10 @@ int prestera_hw_fdb_flush_port_vlan(struct prestera_port *port, u16 vid,
u32 mode)
{
struct prestera_msg_fdb_req req = {
- .port = port->hw_id,
- .dev = port->dev_id,
+ .dest = {
+ .dev = port->dev_id,
+ .port = port->hw_id,
+ },
.vid = vid,
.flush_mode = mode,
};
@@ -1131,6 +1216,37 @@ int prestera_hw_fdb_flush_port_vlan(struct prestera_port *port, u16 vid,
&req.cmd, sizeof(req));
}
+int prestera_hw_fdb_flush_lag(struct prestera_switch *sw, u16 lag_id,
+ u32 mode)
+{
+ struct prestera_msg_fdb_req req = {
+ .dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
+ .dest = {
+ .lag_id = lag_id,
+ },
+ .flush_mode = mode,
+ };
+
+ return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_FLUSH_PORT,
+ &req.cmd, sizeof(req));
+}
+
+int prestera_hw_fdb_flush_lag_vlan(struct prestera_switch *sw,
+ u16 lag_id, u16 vid, u32 mode)
+{
+ struct prestera_msg_fdb_req req = {
+ .dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
+ .dest = {
+ .lag_id = lag_id,
+ },
+ .vid = vid,
+ .flush_mode = mode,
+ };
+
+ return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_FLUSH_PORT_VLAN,
+ &req.cmd, sizeof(req));
+}
+
int prestera_hw_bridge_create(struct prestera_switch *sw, u16 *bridge_id)
{
struct prestera_msg_bridge_resp resp;
@@ -1212,6 +1328,46 @@ int prestera_hw_rxtx_port_init(struct prestera_port *port)
&req.cmd, sizeof(req));
}
+int prestera_hw_lag_member_add(struct prestera_port *port, u16 lag_id)
+{
+ struct prestera_msg_lag_req req = {
+ .port = port->hw_id,
+ .dev = port->dev_id,
+ .lag_id = lag_id,
+ };
+
+ return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_LAG_MEMBER_ADD,
+ &req.cmd, sizeof(req));
+}
+
+int prestera_hw_lag_member_del(struct prestera_port *port, u16 lag_id)
+{
+ struct prestera_msg_lag_req req = {
+ .port = port->hw_id,
+ .dev = port->dev_id,
+ .lag_id = lag_id,
+ };
+
+ return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_LAG_MEMBER_DELETE,
+ &req.cmd, sizeof(req));
+}
+
+int prestera_hw_lag_member_enable(struct prestera_port *port, u16 lag_id,
+ bool enable)
+{
+ struct prestera_msg_lag_req req = {
+ .port = port->hw_id,
+ .dev = port->dev_id,
+ .lag_id = lag_id,
+ };
+ u32 cmd;
+
+ cmd = enable ? PRESTERA_CMD_TYPE_LAG_MEMBER_ENABLE :
+ PRESTERA_CMD_TYPE_LAG_MEMBER_DISABLE;
+
+ return prestera_cmd(port->sw, cmd, &req.cmd, sizeof(req));
+}
+
int prestera_hw_event_handler_register(struct prestera_switch *sw,
enum prestera_event_type type,
prestera_event_cb_t fn,
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
index b2b5ac95b4e3..68ce41595349 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
@@ -179,4 +179,18 @@ int prestera_hw_rxtx_init(struct prestera_switch *sw,
struct prestera_rxtx_params *params);
int prestera_hw_rxtx_port_init(struct prestera_port *port);
+/* LAG API */
+int prestera_hw_lag_member_add(struct prestera_port *port, u16 lag_id);
+int prestera_hw_lag_member_del(struct prestera_port *port, u16 lag_id);
+int prestera_hw_lag_member_enable(struct prestera_port *port, u16 lag_id,
+ bool enable);
+int prestera_hw_lag_fdb_add(struct prestera_switch *sw, u16 lag_id,
+ const unsigned char *mac, u16 vid, bool dynamic);
+int prestera_hw_lag_fdb_del(struct prestera_switch *sw, u16 lag_id,
+ const unsigned char *mac, u16 vid);
+int prestera_hw_fdb_flush_lag(struct prestera_switch *sw, u16 lag_id,
+ u32 mode);
+int prestera_hw_fdb_flush_lag_vlan(struct prestera_switch *sw,
+ u16 lag_id, u16 vid, u32 mode);
+
#endif /* _PRESTERA_HW_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 53c7628a3938..39465e65d09b 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -8,6 +8,7 @@
#include <linux/netdev_features.h>
#include <linux/of.h>
#include <linux/of_net.h>
+#include <linux/if_vlan.h>
#include "prestera.h"
#include "prestera_hw.h"
@@ -281,6 +282,7 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id)
INIT_LIST_HEAD(&port->vlans_list);
port->pvid = PRESTERA_DEFAULT_VID;
+ port->lag = NULL;
port->dev = dev;
port->id = id;
port->sw = sw;
@@ -474,6 +476,151 @@ static int prestera_switch_set_base_mac_addr(struct prestera_switch *sw)
return prestera_hw_switch_mac_set(sw, sw->base_mac);
}
+struct prestera_lag *prestera_lag_by_id(struct prestera_switch *sw, u16 id)
+{
+ return id < sw->lag_max ? &sw->lags[id] : NULL;
+}
+
+static struct prestera_lag *prestera_lag_by_dev(struct prestera_switch *sw,
+ struct net_device *dev)
+{
+ struct prestera_lag *lag;
+ u16 id;
+
+ for (id = 0; id < sw->lag_max; id++) {
+ lag = &sw->lags[id];
+ if (lag->dev == dev)
+ return lag;
+ }
+
+ return NULL;
+}
+
+static struct prestera_lag *prestera_lag_create(struct prestera_switch *sw,
+ struct net_device *lag_dev)
+{
+ struct prestera_lag *lag;
+ u16 id;
+
+ for (id = 0; id < sw->lag_max; id++) {
+ lag = &sw->lags[id];
+ if (!lag->dev)
+ break;
+ }
+ if (lag) {
+ INIT_LIST_HEAD(&lag->members);
+ lag->dev = lag_dev;
+ }
+
+ return lag;
+}
+
+static void prestera_lag_destroy(struct prestera_switch *sw,
+ struct prestera_lag *lag)
+{
+ WARN_ON(!list_empty(&lag->members));
+ lag->member_count = 0;
+ lag->dev = NULL;
+}
+
+static int prestera_lag_port_add(struct prestera_port *port,
+ struct net_device *lag_dev)
+{
+ struct prestera_switch *sw = port->sw;
+ struct prestera_lag *lag;
+ int err;
+
+ lag = prestera_lag_by_dev(sw, lag_dev);
+ if (!lag) {
+ lag = prestera_lag_create(sw, lag_dev);
+ if (!lag)
+ return -ENOMEM;
+ }
+
+ if (lag->member_count >= sw->lag_member_max)
+ return -ENOMEM;
+
+ err = prestera_hw_lag_member_add(port, lag->lag_id);
+ if (err) {
+ if (!lag->member_count)
+ prestera_lag_destroy(sw, lag);
+ return err;
+ }
+
+ list_add(&port->lag_member, &lag->members);
+ lag->member_count++;
+ port->lag = lag;
+
+ return 0;
+}
+
+static int prestera_lag_port_del(struct prestera_port *port)
+{
+ struct prestera_switch *sw = port->sw;
+ struct prestera_lag *lag = port->lag;
+ int err;
+
+ if (!lag || !lag->member_count)
+ return -EINVAL;
+
+ err = prestera_hw_lag_member_del(port, lag->lag_id);
+ if (err)
+ return err;
+
+ list_del(&port->lag_member);
+ lag->member_count--;
+ port->lag = NULL;
+
+ if (netif_is_bridge_port(lag->dev)) {
+ struct netdev_notifier_changeupper_info br_info;
+
+ br_info.upper_dev = netdev_master_upper_dev_get(lag->dev);
+ br_info.linking = false;
+
+ prestera_bridge_port_event(lag->dev, port->dev,
+ NETDEV_CHANGEUPPER, &br_info);
+ }
+
+ if (!lag->member_count)
+ prestera_lag_destroy(sw, lag);
+
+ return 0;
+}
+
+bool prestera_port_is_lag_member(const struct prestera_port *port)
+{
+ return !!port->lag;
+}
+
+u16 prestera_port_lag_id(const struct prestera_port *port)
+{
+ return port->lag->lag_id;
+}
+
+static int prestera_lag_init(struct prestera_switch *sw)
+{
+ u16 id;
+
+ sw->lags = kcalloc(sw->lag_max, sizeof(*sw->lags), GFP_KERNEL);
+ if (!sw->lags)
+ return -ENOMEM;
+
+ for (id = 0; id < sw->lag_max; id++)
+ sw->lags[id].lag_id = id;
+
+ return 0;
+}
+
+static void prestera_lag_fini(struct prestera_switch *sw)
+{
+ u8 idx;
+
+ for (idx = 0; idx < sw->lag_max; idx++)
+ WARN_ON(sw->lags[idx].member_count);
+
+ kfree(sw->lags);
+}
+
bool prestera_netdev_check(const struct net_device *dev)
{
return dev->netdev_ops == &prestera_netdev_ops;
@@ -507,19 +654,54 @@ struct prestera_port *prestera_port_dev_lower_find(struct net_device *dev)
return port;
}
-static int prestera_netdev_port_event(struct net_device *dev,
+static int prestera_netdev_port_lower_event(struct net_device *dev,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_changelowerstate_info *info = ptr;
+ struct netdev_lag_lower_state_info *lower_state_info;
+ struct prestera_port *port = netdev_priv(dev);
+ bool enabled;
+
+ if (!netif_is_lag_port(dev))
+ return 0;
+ if (!prestera_port_is_lag_member(port))
+ return 0;
+
+ lower_state_info = info->lower_state_info;
+ enabled = lower_state_info->tx_enabled;
+
+ return prestera_hw_lag_member_enable(port, port->lag->lag_id, enabled);
+}
+
+static bool prestera_lag_master_check(struct net_device *lag_dev,
+ struct netdev_lag_upper_info *info,
+ struct netlink_ext_ack *ext_ack)
+{
+ if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+ NL_SET_ERR_MSG_MOD(ext_ack, "Unsupported LAG Tx type");
+ return false;
+ }
+
+ return true;
+}
+
+static int prestera_netdev_port_event(struct net_device *lower,
+ struct net_device *dev,
unsigned long event, void *ptr)
{
struct netdev_notifier_changeupper_info *info = ptr;
+ struct prestera_port *port = netdev_priv(dev);
struct netlink_ext_ack *extack;
struct net_device *upper;
+ int err;
extack = netdev_notifier_info_to_extack(&info->info);
upper = info->upper_dev;
switch (event) {
case NETDEV_PRECHANGEUPPER:
- if (!netif_is_bridge_master(upper)) {
+ if (!netif_is_bridge_master(upper) &&
+ !netif_is_lag_master(upper)) {
NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
return -EINVAL;
}
@@ -531,12 +713,60 @@ static int prestera_netdev_port_event(struct net_device *dev,
NL_SET_ERR_MSG_MOD(extack, "Upper device is already enslaved");
return -EINVAL;
}
+
+ if (netif_is_lag_master(upper) &&
+ !prestera_lag_master_check(upper, info->upper_info, extack))
+ return -EINVAL;
+ if (netif_is_lag_master(upper) && vlan_uses_dev(dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Master device is a LAG master and port has a VLAN");
+ return -EINVAL;
+ }
+ if (netif_is_lag_port(dev) && is_vlan_dev(upper) &&
+ !netif_is_lag_master(vlan_dev_real_dev(upper))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can not put a VLAN on a LAG port");
+ return -EINVAL;
+ }
break;
case NETDEV_CHANGEUPPER:
if (netif_is_bridge_master(upper))
- return prestera_bridge_port_event(dev, event, ptr);
+ return prestera_bridge_port_event(lower, dev, event,
+ ptr);
+
+ if (netif_is_lag_master(upper)) {
+ if (info->linking) {
+ err = prestera_lag_port_add(port, upper);
+ if (err)
+ return err;
+ } else {
+ prestera_lag_port_del(port);
+ }
+ }
break;
+
+ case NETDEV_CHANGELOWERSTATE:
+ return prestera_netdev_port_lower_event(dev, event, ptr);
+ }
+
+ return 0;
+}
+
+static int prestera_netdevice_lag_event(struct net_device *lag_dev,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev;
+ struct list_head *iter;
+ int err;
+
+ netdev_for_each_lower_dev(lag_dev, dev, iter) {
+ if (prestera_netdev_check(dev)) {
+ err = prestera_netdev_port_event(lag_dev, dev, event,
+ ptr);
+ if (err)
+ return err;
+ }
}
return 0;
@@ -549,7 +779,9 @@ static int prestera_netdev_event_handler(struct notifier_block *nb,
int err = 0;
if (prestera_netdev_check(dev))
- err = prestera_netdev_port_event(dev, event, ptr);
+ err = prestera_netdev_port_event(dev, dev, event, ptr);
+ else if (netif_is_lag_master(dev))
+ err = prestera_netdevice_lag_event(dev, event, ptr);
return notifier_from_errno(err);
}
@@ -603,6 +835,10 @@ static int prestera_switch_init(struct prestera_switch *sw)
if (err)
goto err_dl_register;
+ err = prestera_lag_init(sw);
+ if (err)
+ goto err_lag_init;
+
err = prestera_create_ports(sw);
if (err)
goto err_ports_create;
@@ -610,6 +846,8 @@ static int prestera_switch_init(struct prestera_switch *sw)
return 0;
err_ports_create:
+ prestera_lag_fini(sw);
+err_lag_init:
prestera_devlink_unregister(sw);
err_dl_register:
prestera_event_handlers_unregister(sw);
@@ -627,6 +865,7 @@ static int prestera_switch_init(struct prestera_switch *sw)
static void prestera_switch_fini(struct prestera_switch *sw)
{
prestera_destroy_ports(sw);
+ prestera_lag_fini(sw);
prestera_devlink_unregister(sw);
prestera_event_handlers_unregister(sw);
prestera_rxtx_switch_fini(sw);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 7736d5f498c9..3750c66a550b 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -180,6 +180,45 @@ prestera_port_vlan_create(struct prestera_port *port, u16 vid, bool untagged)
return ERR_PTR(err);
}
+static int prestera_fdb_add(struct prestera_port *port,
+ const unsigned char *mac, u16 vid, bool dynamic)
+{
+ if (prestera_port_is_lag_member(port))
+ return prestera_hw_lag_fdb_add(port->sw, prestera_port_lag_id(port),
+ mac, vid, dynamic);
+ else
+ return prestera_hw_fdb_add(port, mac, vid, dynamic);
+}
+
+static int prestera_fdb_del(struct prestera_port *port,
+ const unsigned char *mac, u16 vid)
+{
+ if (prestera_port_is_lag_member(port))
+ return prestera_hw_lag_fdb_del(port->sw, prestera_port_lag_id(port),
+ mac, vid);
+ else
+ return prestera_hw_fdb_del(port, mac, vid);
+}
+
+static int prestera_fdb_flush_port_vlan(struct prestera_port *port, u16 vid,
+ u32 mode)
+{
+ if (prestera_port_is_lag_member(port))
+ return prestera_hw_fdb_flush_lag_vlan(port->sw, prestera_port_lag_id(port),
+ vid, mode);
+ else
+ return prestera_hw_fdb_flush_port_vlan(port, vid, mode);
+}
+
+static int prestera_fdb_flush_port(struct prestera_port *port, u32 mode)
+{
+ if (prestera_port_is_lag_member(port))
+ return prestera_hw_fdb_flush_lag(port->sw, prestera_port_lag_id(port),
+ mode);
+ else
+ return prestera_hw_fdb_flush_port(port, mode);
+}
+
static void
prestera_port_vlan_bridge_leave(struct prestera_port_vlan *port_vlan)
{
@@ -199,11 +238,11 @@ prestera_port_vlan_bridge_leave(struct prestera_port_vlan *port_vlan)
last_port = port_count == 1;
if (last_vlan)
- prestera_hw_fdb_flush_port(port, fdb_flush_mode);
+ prestera_fdb_flush_port(port, fdb_flush_mode);
else if (last_port)
prestera_hw_fdb_flush_vlan(port->sw, vid, fdb_flush_mode);
else
- prestera_hw_fdb_flush_port_vlan(port, vid, fdb_flush_mode);
+ prestera_fdb_flush_port_vlan(port, vid, fdb_flush_mode);
list_del(&port_vlan->br_vlan_head);
prestera_bridge_vlan_put(br_vlan);
@@ -394,9 +433,9 @@ prestera_bridge_port_add(struct prestera_bridge *bridge, struct net_device *dev)
}
static int
-prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port)
+prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port,
+ struct prestera_port *port)
{
- struct prestera_port *port = netdev_priv(br_port->dev);
struct prestera_bridge *bridge = br_port->bridge;
int err;
@@ -423,6 +462,7 @@ prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port)
}
static int prestera_port_bridge_join(struct prestera_port *port,
+ struct net_device *lower,
struct net_device *upper)
{
struct prestera_switchdev *swdev = port->sw->swdev;
@@ -437,7 +477,7 @@ static int prestera_port_bridge_join(struct prestera_port *port,
return PTR_ERR(bridge);
}
- br_port = prestera_bridge_port_add(bridge, port->dev);
+ br_port = prestera_bridge_port_add(bridge, lower);
if (IS_ERR(br_port)) {
err = PTR_ERR(br_port);
goto err_brport_create;
@@ -446,7 +486,7 @@ static int prestera_port_bridge_join(struct prestera_port *port,
if (bridge->vlan_enabled)
return 0;
- err = prestera_bridge_1d_port_join(br_port);
+ err = prestera_bridge_1d_port_join(br_port, port);
if (err)
goto err_port_join;
@@ -459,19 +499,17 @@ static int prestera_port_bridge_join(struct prestera_port *port,
return err;
}
-static void prestera_bridge_1q_port_leave(struct prestera_bridge_port *br_port)
+static void prestera_bridge_1q_port_leave(struct prestera_bridge_port *br_port,
+ struct prestera_port *port)
{
- struct prestera_port *port = netdev_priv(br_port->dev);
-
- prestera_hw_fdb_flush_port(port, PRESTERA_FDB_FLUSH_MODE_ALL);
+ prestera_fdb_flush_port(port, PRESTERA_FDB_FLUSH_MODE_ALL);
prestera_port_pvid_set(port, PRESTERA_DEFAULT_VID);
}
-static void prestera_bridge_1d_port_leave(struct prestera_bridge_port *br_port)
+static void prestera_bridge_1d_port_leave(struct prestera_bridge_port *br_port,
+ struct prestera_port *port)
{
- struct prestera_port *port = netdev_priv(br_port->dev);
-
- prestera_hw_fdb_flush_port(port, PRESTERA_FDB_FLUSH_MODE_ALL);
+ prestera_fdb_flush_port(port, PRESTERA_FDB_FLUSH_MODE_ALL);
prestera_hw_bridge_port_delete(port, br_port->bridge->bridge_id);
}
@@ -506,6 +544,7 @@ static int prestera_port_vid_stp_set(struct prestera_port *port, u16 vid,
}
static void prestera_port_bridge_leave(struct prestera_port *port,
+ struct net_device *lower,
struct net_device *upper)
{
struct prestera_switchdev *swdev = port->sw->swdev;
@@ -516,16 +555,16 @@ static void prestera_port_bridge_leave(struct prestera_port *port,
if (!bridge)
return;
- br_port = __prestera_bridge_port_by_dev(bridge, port->dev);
+ br_port = __prestera_bridge_port_by_dev(bridge, lower);
if (!br_port)
return;
bridge = br_port->bridge;
if (bridge->vlan_enabled)
- prestera_bridge_1q_port_leave(br_port);
+ prestera_bridge_1q_port_leave(br_port, port);
else
- prestera_bridge_1d_port_leave(br_port);
+ prestera_bridge_1d_port_leave(br_port, port);
prestera_hw_port_learning_set(port, false);
prestera_hw_port_flood_set(port, false);
@@ -533,8 +572,8 @@ static void prestera_port_bridge_leave(struct prestera_port *port,
prestera_bridge_port_put(br_port);
}
-int prestera_bridge_port_event(struct net_device *dev, unsigned long event,
- void *ptr)
+int prestera_bridge_port_event(struct net_device *lower, struct net_device *dev,
+ unsigned long event, void *ptr)
{
struct netdev_notifier_changeupper_info *info = ptr;
struct prestera_port *port;
@@ -547,11 +586,11 @@ int prestera_bridge_port_event(struct net_device *dev, unsigned long event,
switch (event) {
case NETDEV_CHANGEUPPER:
if (info->linking) {
- err = prestera_port_bridge_join(port, upper);
+ err = prestera_port_bridge_join(port, lower, upper);
if (err)
return err;
} else {
- prestera_port_bridge_leave(port, upper);
+ prestera_port_bridge_leave(port, lower, upper);
}
break;
}
@@ -745,9 +784,9 @@ static int prestera_port_fdb_set(struct prestera_port *port,
vid = bridge->bridge_id;
if (adding)
- err = prestera_hw_fdb_add(port, fdb_info->addr, vid, false);
+ err = prestera_fdb_add(port, fdb_info->addr, vid, false);
else
- err = prestera_hw_fdb_del(port, fdb_info->addr, vid);
+ err = prestera_fdb_del(port, fdb_info->addr, vid);
return err;
}
@@ -1088,10 +1127,26 @@ static void prestera_fdb_event(struct prestera_switch *sw,
struct prestera_event *evt, void *arg)
{
struct switchdev_notifier_fdb_info info;
+ struct net_device *dev = NULL;
struct prestera_port *port;
+ struct prestera_lag *lag;
- port = prestera_find_port(sw, evt->fdb_evt.port_id);
- if (!port)
+ switch (evt->fdb_evt.type) {
+ case PRESTERA_FDB_ENTRY_TYPE_REG_PORT:
+ port = prestera_find_port(sw, evt->fdb_evt.dest.port_id);
+ if (port)
+ dev = port->dev;
+ break;
+ case PRESTERA_FDB_ENTRY_TYPE_LAG:
+ lag = prestera_lag_by_id(sw, evt->fdb_evt.dest.lag_id);
+ if (lag)
+ dev = lag->dev;
+ break;
+ default:
+ return;
+ }
+
+ if (!dev)
return;
info.addr = evt->fdb_evt.data.mac;
@@ -1103,11 +1158,11 @@ static void prestera_fdb_event(struct prestera_switch *sw,
switch (evt->id) {
case PRESTERA_FDB_EVENT_LEARNED:
call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
- port->dev, &info.info, NULL);
+ dev, &info.info, NULL);
break;
case PRESTERA_FDB_EVENT_AGED:
call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
- port->dev, &info.info, NULL);
+ dev, &info.info, NULL);
break;
}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
index 606e21d2355b..70e9ed87e24a 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
@@ -7,7 +7,7 @@
int prestera_switchdev_init(struct prestera_switch *sw);
void prestera_switchdev_fini(struct prestera_switch *sw);
-int prestera_bridge_port_event(struct net_device *dev, unsigned long event,
- void *ptr);
+int prestera_bridge_port_event(struct net_device *lower, struct net_device *dev,
+ unsigned long event, void *ptr);
#endif /* _PRESTERA_SWITCHDEV_H_ */
--
2.17.1
For some reason there might be a crash during ports creation if port
events are handling at the same time because fw may send initial
port event with down state.
The crash points to cancel_delayed_work() which is called when port went
is down. Currently I did not find out the real cause of the issue, so
fixed it by cancel port stats work only if previous port's state was up
& runnig.
The following is the crash which can be triggered:
[ 28.311104] Unable to handle kernel paging request at virtual address
000071775f776600
[ 28.319097] Mem abort info:
[ 28.321914] ESR = 0x96000004
[ 28.324996] EC = 0x25: DABT (current EL), IL = 32 bits
[ 28.330350] SET = 0, FnV = 0
[ 28.333430] EA = 0, S1PTW = 0
[ 28.336597] Data abort info:
[ 28.339499] ISV = 0, ISS = 0x00000004
[ 28.343362] CM = 0, WnR = 0
[ 28.346354] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000100bf7000
[ 28.352842] [000071775f776600] pgd=0000000000000000,
p4d=0000000000000000
[ 28.359695] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 28.365310] Modules linked in: prestera_pci(+) prestera
uio_pdrv_genirq
[ 28.372005] CPU: 0 PID: 1291 Comm: kworker/0:1H Not tainted
5.11.0-rc4 #1
[ 28.378846] Hardware name: DNI AmazonGo1 A7040 board (DT)
[ 28.384283] Workqueue: prestera_fw_wq prestera_fw_evt_work_fn
[prestera_pci]
[ 28.391413] pstate: 60000085 (nZCv daIf -PAN -UAO -TCO BTYPE=--)
[ 28.397468] pc : get_work_pool+0x48/0x60
[ 28.401442] lr : try_to_grab_pending+0x6c/0x1b0
[ 28.406018] sp : ffff80001391bc60
[ 28.409358] x29: ffff80001391bc60 x28: 0000000000000000
[ 28.414725] x27: ffff000104fc8b40 x26: ffff80001127de88
[ 28.420089] x25: 0000000000000000 x24: ffff000106119760
[ 28.425452] x23: ffff00010775dd60 x22: ffff00010567e000
[ 28.430814] x21: 0000000000000000 x20: ffff80001391bcb0
[ 28.436175] x19: ffff00010775deb8 x18: 00000000000000c0
[ 28.441537] x17: 0000000000000000 x16: 000000008d9b0e88
[ 28.446898] x15: 0000000000000001 x14: 00000000000002ba
[ 28.452261] x13: 80a3002c00000002 x12: 00000000000005f4
[ 28.457622] x11: 0000000000000030 x10: 000000000000000c
[ 28.462985] x9 : 000000000000000c x8 : 0000000000000030
[ 28.468346] x7 : ffff800014400000 x6 : ffff000106119758
[ 28.473708] x5 : 0000000000000003 x4 : ffff00010775dc60
[ 28.479068] x3 : 0000000000000000 x2 : 0000000000000060
[ 28.484429] x1 : 000071775f776600 x0 : ffff00010775deb8
[ 28.489791] Call trace:
[ 28.492259] get_work_pool+0x48/0x60
[ 28.495874] cancel_delayed_work+0x38/0xb0
[ 28.500011] prestera_port_handle_event+0x90/0xa0 [prestera]
[ 28.505743] prestera_evt_recv+0x98/0xe0 [prestera]
[ 28.510683] prestera_fw_evt_work_fn+0x180/0x228 [prestera_pci]
[ 28.516660] process_one_work+0x1e8/0x360
[ 28.520710] worker_thread+0x44/0x480
[ 28.524412] kthread+0x154/0x160
[ 28.527670] ret_from_fork+0x10/0x38
[ 28.531290] Code: a8c17bfd d50323bf d65f03c0 9278dc21 (f9400020)
[ 28.537429] ---[ end trace 5eced933df3a080b ]---
Signed-off-by: Vadym Kochan <[email protected]>
---
drivers/net/ethernet/marvell/prestera/prestera_main.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 39465e65d09b..122324dae47d 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -433,7 +433,8 @@ static void prestera_port_handle_event(struct prestera_switch *sw,
netif_carrier_on(port->dev);
if (!delayed_work_pending(caching_dw))
queue_delayed_work(prestera_wq, caching_dw, 0);
- } else {
+ } else if (netif_running(port->dev) &&
+ netif_carrier_ok(port->dev)) {
netif_carrier_off(port->dev);
if (delayed_work_pending(caching_dw))
cancel_delayed_work(caching_dw);
--
2.17.1
Latest FW IPC floow message format was changed to configure uc/mc
flooding separately, so change code according to this.
Signed-off-by: Vadym Kochan <[email protected]>
---
.../ethernet/marvell/prestera/prestera_hw.c | 37 +++++++++++++--
.../ethernet/marvell/prestera/prestera_hw.h | 3 +-
.../marvell/prestera/prestera_switchdev.c | 46 +++++++++++++++----
3 files changed, 72 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index 8afb45f66862..75034dcb3649 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -90,6 +90,11 @@ enum {
PRESTERA_PORT_TP_AUTO,
};
+enum {
+ PRESTERA_PORT_FLOOD_TYPE_UC = 0,
+ PRESTERA_PORT_FLOOD_TYPE_MC = 1,
+};
+
enum {
PRESTERA_PORT_GOOD_OCTETS_RCV_CNT,
PRESTERA_PORT_BAD_OCTETS_RCV_CNT,
@@ -201,6 +206,11 @@ struct prestera_msg_port_mdix_param {
u8 admin_mode;
};
+struct prestera_msg_port_flood_param {
+ u8 type;
+ u8 enable;
+};
+
union prestera_msg_port_param {
u8 admin_state;
u8 oper_state;
@@ -209,7 +219,6 @@ union prestera_msg_port_param {
u8 accept_frm_type;
u32 speed;
u8 learning;
- u8 flood;
u32 link_mode;
u8 type;
u8 duplex;
@@ -218,6 +227,7 @@ union prestera_msg_port_param {
struct prestera_msg_port_mdix_param mdix;
struct prestera_msg_port_autoneg_param autoneg;
struct prestera_msg_port_cap_param cap;
+ struct prestera_msg_port_flood_param flood;
};
struct prestera_msg_port_attr_req {
@@ -1030,14 +1040,35 @@ int prestera_hw_port_learning_set(struct prestera_port *port, bool enable)
&req.cmd, sizeof(req));
}
-int prestera_hw_port_flood_set(struct prestera_port *port, bool flood)
+int prestera_hw_port_uc_flood_set(struct prestera_port *port, bool flood)
+{
+ struct prestera_msg_port_attr_req req = {
+ .attr = PRESTERA_CMD_PORT_ATTR_FLOOD,
+ .port = port->hw_id,
+ .dev = port->dev_id,
+ .param = {
+ .flood = {
+ .type = PRESTERA_PORT_FLOOD_TYPE_UC,
+ .enable = flood,
+ }
+ }
+ };
+
+ return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+ &req.cmd, sizeof(req));
+}
+
+int prestera_hw_port_mc_flood_set(struct prestera_port *port, bool flood)
{
struct prestera_msg_port_attr_req req = {
.attr = PRESTERA_CMD_PORT_ATTR_FLOOD,
.port = port->hw_id,
.dev = port->dev_id,
.param = {
- .flood = flood,
+ .flood = {
+ .type = PRESTERA_PORT_FLOOD_TYPE_MC,
+ .enable = flood,
+ }
}
};
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
index 68ce41595349..03b52db6f359 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
@@ -138,7 +138,8 @@ int prestera_hw_port_mdix_get(const struct prestera_port *port, u8 *status,
int prestera_hw_port_mdix_set(const struct prestera_port *port, u8 mode);
int prestera_hw_port_speed_get(const struct prestera_port *port, u32 *speed);
int prestera_hw_port_learning_set(struct prestera_port *port, bool enable);
-int prestera_hw_port_flood_set(struct prestera_port *port, bool flood);
+int prestera_hw_port_uc_flood_set(struct prestera_port *port, bool flood);
+int prestera_hw_port_mc_flood_set(struct prestera_port *port, bool flood);
int prestera_hw_port_accept_frm_type(struct prestera_port *port,
enum prestera_accept_frm_type type);
/* Vlan API */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 3750c66a550b..8449539fe944 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -443,9 +443,13 @@ prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port,
if (err)
return err;
- err = prestera_hw_port_flood_set(port, br_port->flags & BR_FLOOD);
+ err = prestera_hw_port_uc_flood_set(port, br_port->flags & BR_FLOOD);
if (err)
- goto err_port_flood_set;
+ goto err_port_uc_flood_set;
+
+ err = prestera_hw_port_mc_flood_set(port, br_port->flags & BR_MCAST_FLOOD);
+ if (err)
+ goto err_port_mc_flood_set;
err = prestera_hw_port_learning_set(port, br_port->flags & BR_LEARNING);
if (err)
@@ -454,8 +458,10 @@ prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port,
return 0;
err_port_learning_set:
- prestera_hw_port_flood_set(port, false);
-err_port_flood_set:
+ prestera_hw_port_mc_flood_set(port, false);
+err_port_mc_flood_set:
+ prestera_hw_port_uc_flood_set(port, false);
+err_port_uc_flood_set:
prestera_hw_bridge_port_delete(port, bridge->bridge_id);
return err;
@@ -567,7 +573,8 @@ static void prestera_port_bridge_leave(struct prestera_port *port,
prestera_bridge_1d_port_leave(br_port, port);
prestera_hw_port_learning_set(port, false);
- prestera_hw_port_flood_set(port, false);
+ prestera_hw_port_uc_flood_set(port, false);
+ prestera_hw_port_mc_flood_set(port, false);
prestera_port_vid_stp_set(port, PRESTERA_VID_ALL, BR_STATE_FORWARDING);
prestera_bridge_port_put(br_port);
}
@@ -609,17 +616,28 @@ static int prestera_port_attr_br_flags_set(struct prestera_port *port,
if (!br_port)
return 0;
- err = prestera_hw_port_flood_set(port, flags & BR_FLOOD);
+ err = prestera_hw_port_uc_flood_set(port, flags & BR_FLOOD);
if (err)
- return err;
+ goto err_port_uc_flood_set;
+
+ err = prestera_hw_port_mc_flood_set(port, flags & BR_MCAST_FLOOD);
+ if (err)
+ goto err_port_mc_flood_set;
err = prestera_hw_port_learning_set(port, flags & BR_LEARNING);
if (err)
- return err;
+ goto err_port_learning_set;
memcpy(&br_port->flags, &flags, sizeof(flags));
return 0;
+
+err_port_learning_set:
+ prestera_hw_port_mc_flood_set(port, false);
+err_port_mc_flood_set:
+ prestera_hw_port_uc_flood_set(port, false);
+err_port_uc_flood_set:
+ return err;
}
static int prestera_port_attr_br_ageing_set(struct prestera_port *port,
@@ -914,9 +932,13 @@ prestera_port_vlan_bridge_join(struct prestera_port_vlan *port_vlan,
if (port_vlan->br_port)
return 0;
- err = prestera_hw_port_flood_set(port, br_port->flags & BR_FLOOD);
+ err = prestera_hw_port_uc_flood_set(port, br_port->flags & BR_FLOOD);
if (err)
- return err;
+ goto err_port_uc_flood_set;
+
+ err = prestera_hw_port_mc_flood_set(port, br_port->flags & BR_MCAST_FLOOD);
+ if (err)
+ goto err_port_mc_flood_set;
err = prestera_hw_port_learning_set(port, br_port->flags & BR_LEARNING);
if (err)
@@ -947,6 +969,10 @@ prestera_port_vlan_bridge_join(struct prestera_port_vlan *port_vlan,
err_port_vid_stp_set:
prestera_hw_port_learning_set(port, false);
err_port_learning_set:
+ prestera_hw_port_mc_flood_set(port, false);
+err_port_mc_flood_set:
+ prestera_hw_port_uc_flood_set(port, false);
+err_port_uc_flood_set:
return err;
}
--
2.17.1
Add PCI match for AC3X 98DX3265 device which is supported by the current
driver and firmware.
Signed-off-by: Vadym Kochan <[email protected]>
---
drivers/net/ethernet/marvell/prestera/prestera_pci.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index f7b27ef02624..b698a6b4a985 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -775,6 +775,7 @@ static void prestera_pci_remove(struct pci_dev *pdev)
static const struct pci_device_id prestera_pci_devices[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xC804) },
+ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xC80C) },
{ }
};
MODULE_DEVICE_TABLE(pci, prestera_pci_devices);
--
2.17.1
On Wed, 3 Feb 2021 18:54:53 +0200 Vadym Kochan wrote:
> There are change in firmware which requires that receiver will
> disable event interrupts before handling them and enable them
> after finish with handling. Events still may come into the queue
> but without receiver interruption.
Sounds like you should do a major version bump for this.
Old driver will not work correctly with new FW.
On Wed, 3 Feb 2021 18:54:56 +0200 Vadym Kochan wrote:
> From: Serhiy Boiko <[email protected]>
>
> The following features are supported:
>
> - LAG basic operations
> - create/delete LAG
> - add/remove a member to LAG
> - enable/disable member in LAG
> - LAG Bridge support
> - LAG VLAN support
> - LAG FDB support
>
> Limitations:
>
> - Only HASH lag tx type is supported
> - The Hash parameters are not configurable. They are applied
> during the LAG creation stage.
> - Enslaving a port to the LAG device that already has an
> upper device is not supported.
Tobias, Vladimir, you worked on LAG support recently, would you mind
taking a look at this one?
On Wed, 3 Feb 2021 18:54:58 +0200 Vadym Kochan wrote:
> For some reason there might be a crash during ports creation if port
> events are handling at the same time because fw may send initial
> port event with down state.
>
> The crash points to cancel_delayed_work() which is called when port went
> is down. Currently I did not find out the real cause of the issue, so
> fixed it by cancel port stats work only if previous port's state was up
> & runnig.
Maybe you just need to move the DELAYED_WORK_INIT() earlier?
Not sure why it's at the end of prestera_port_create(), it
just initializes some fields.
> [ 28.489791] Call trace:
> [ 28.492259] get_work_pool+0x48/0x60
> [ 28.495874] cancel_delayed_work+0x38/0xb0
> [ 28.500011] prestera_port_handle_event+0x90/0xa0 [prestera]
> [ 28.505743] prestera_evt_recv+0x98/0xe0 [prestera]
> [ 28.510683] prestera_fw_evt_work_fn+0x180/0x228 [prestera_pci]
> [ 28.516660] process_one_work+0x1e8/0x360
> [ 28.520710] worker_thread+0x44/0x480
> [ 28.524412] kthread+0x154/0x160
> [ 28.527670] ret_from_fork+0x10/0x38
> [ 28.531290] Code: a8c17bfd d50323bf d65f03c0 9278dc21 (f9400020)
> [ 28.537429] ---[ end trace 5eced933df3a080b ]---
>
> Signed-off-by: Vadym Kochan <[email protected]>
> ---
> drivers/net/ethernet/marvell/prestera/prestera_main.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
> index 39465e65d09b..122324dae47d 100644
> --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
> +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
> @@ -433,7 +433,8 @@ static void prestera_port_handle_event(struct prestera_switch *sw,
> netif_carrier_on(port->dev);
> if (!delayed_work_pending(caching_dw))
> queue_delayed_work(prestera_wq, caching_dw, 0);
> - } else {
> + } else if (netif_running(port->dev) &&
> + netif_carrier_ok(port->dev)) {
> netif_carrier_off(port->dev);
> if (delayed_work_pending(caching_dw))
> cancel_delayed_work(caching_dw);
Hi Jakub,
On Thu, Feb 04, 2021 at 09:10:12PM -0800, Jakub Kicinski wrote:
> On Wed, 3 Feb 2021 18:54:53 +0200 Vadym Kochan wrote:
> > There are change in firmware which requires that receiver will
> > disable event interrupts before handling them and enable them
> > after finish with handling. Events still may come into the queue
> > but without receiver interruption.
>
> Sounds like you should do a major version bump for this.
>
> Old driver will not work correctly with new FW.
Right, the old driver version should fail with new fw version.
Thanks, I will re-send new fw version.
On Wed, Feb 03, 2021 at 06:54:55PM +0200, Vadym Kochan wrote:
> Move handling of PRECHANGEUPPER event from prestera_switchdev to
> prestera_main which is responsible for basic netdev events handling
> and routing them to related module.
>
> Signed-off-by: Vadym Kochan <[email protected]>
> ---
Reviewed-by: Vladimir Oltean <[email protected]>
On Wed, Feb 03, 2021 at 06:54:56PM +0200, Vadym Kochan wrote:
> +static struct prestera_lag *prestera_lag_by_dev(struct prestera_switch *sw,
> + struct net_device *dev)
> +{
> + struct prestera_lag *lag;
> + u16 id;
> +
> + for (id = 0; id < sw->lag_max; id++) {
> + lag = &sw->lags[id];
> + if (lag->dev == dev)
> + return lag;
> + }
> +
> + return NULL;
> +}
> +
> +static struct prestera_lag *prestera_lag_create(struct prestera_switch *sw,
> + struct net_device *lag_dev)
> +{
> + struct prestera_lag *lag;
You should initialize with NULL.
> + u16 id;
> +
> + for (id = 0; id < sw->lag_max; id++) {
> + lag = &sw->lags[id];
> + if (!lag->dev)
> + break;
> + }
> + if (lag) {
> + INIT_LIST_HEAD(&lag->members);
> + lag->dev = lag_dev;
> + }
> +
> + return lag;
> +}
> +
> +static void prestera_lag_destroy(struct prestera_switch *sw,
> + struct prestera_lag *lag)
> +{
> + WARN_ON(!list_empty(&lag->members));
> + lag->member_count = 0;
> + lag->dev = NULL;
> +}
> +
> +static int prestera_lag_port_add(struct prestera_port *port,
> + struct net_device *lag_dev)
> +{
> + struct prestera_switch *sw = port->sw;
> + struct prestera_lag *lag;
> + int err;
> +
> + lag = prestera_lag_by_dev(sw, lag_dev);
> + if (!lag) {
> + lag = prestera_lag_create(sw, lag_dev);
> + if (!lag)
> + return -ENOMEM;
I think ENOMEM is reserved for dynamic memory allocation. I think
-ENOSPC may be a better error code (here and everywhere else).
Maybe you would also like to propagate the netlink extack from the
changeupper event and say what went wrong?
> + }
> +
> + if (lag->member_count >= sw->lag_member_max)
> + return -ENOMEM;
> +
> + err = prestera_hw_lag_member_add(port, lag->lag_id);
> + if (err) {
> + if (!lag->member_count)
> + prestera_lag_destroy(sw, lag);
> + return err;
> + }
> +
> + list_add(&port->lag_member, &lag->members);
> + lag->member_count++;
> + port->lag = lag;
> +
> + return 0;
> +}
> +
> +static int prestera_lag_port_del(struct prestera_port *port)
> +{
> + struct prestera_switch *sw = port->sw;
> + struct prestera_lag *lag = port->lag;
> + int err;
> +
> + if (!lag || !lag->member_count)
> + return -EINVAL;
> +
> + err = prestera_hw_lag_member_del(port, lag->lag_id);
> + if (err)
> + return err;
> +
> + list_del(&port->lag_member);
> + lag->member_count--;
> + port->lag = NULL;
> +
> + if (netif_is_bridge_port(lag->dev)) {
> + struct netdev_notifier_changeupper_info br_info;
> +
> + br_info.upper_dev = netdev_master_upper_dev_get(lag->dev);
> + br_info.linking = false;
> +
> + prestera_bridge_port_event(lag->dev, port->dev,
> + NETDEV_CHANGEUPPER, &br_info);
> + }
I think it might be more intuitive if you just call
prestera_port_bridge_leave than simulate a notifier call.
> +
> + if (!lag->member_count)
> + prestera_lag_destroy(sw, lag);
> +
> + return 0;
> +}
> +
> +bool prestera_port_is_lag_member(const struct prestera_port *port)
> +{
> + return !!port->lag;
> +}
> +
> +u16 prestera_port_lag_id(const struct prestera_port *port)
> +{
> + return port->lag->lag_id;
> +}
> +
> +static int prestera_lag_init(struct prestera_switch *sw)
> +{
> + u16 id;
> +
> + sw->lags = kcalloc(sw->lag_max, sizeof(*sw->lags), GFP_KERNEL);
> + if (!sw->lags)
> + return -ENOMEM;
> +
> + for (id = 0; id < sw->lag_max; id++)
> + sw->lags[id].lag_id = id;
> +
> + return 0;
> +}
> +
> +static void prestera_lag_fini(struct prestera_switch *sw)
> +{
> + u8 idx;
> +
> + for (idx = 0; idx < sw->lag_max; idx++)
> + WARN_ON(sw->lags[idx].member_count);
> +
> + kfree(sw->lags);
> +}
> +
> bool prestera_netdev_check(const struct net_device *dev)
> {
> return dev->netdev_ops == &prestera_netdev_ops;
> @@ -507,19 +654,54 @@ struct prestera_port *prestera_port_dev_lower_find(struct net_device *dev)
> return port;
> }
>
> -static int prestera_netdev_port_event(struct net_device *dev,
> +static int prestera_netdev_port_lower_event(struct net_device *dev,
> + unsigned long event, void *ptr)
> +{
> + struct netdev_notifier_changelowerstate_info *info = ptr;
> + struct netdev_lag_lower_state_info *lower_state_info;
> + struct prestera_port *port = netdev_priv(dev);
> + bool enabled;
> +
> + if (!netif_is_lag_port(dev))
> + return 0;
> + if (!prestera_port_is_lag_member(port))
> + return 0;
> +
> + lower_state_info = info->lower_state_info;
> + enabled = lower_state_info->tx_enabled;
You also need to check for info->link_up, otherwise the ports won't get
rebalanced for bonding interfaces with "mode balance-xor miimon 1" and such.
There is also a comment in net/dsa/port.c with more details.
> +
> + return prestera_hw_lag_member_enable(port, port->lag->lag_id, enabled);
> +}
> +
> +static bool prestera_lag_master_check(struct net_device *lag_dev,
> + struct netdev_lag_upper_info *info,
> + struct netlink_ext_ack *ext_ack)
> +{
> + if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
> + NL_SET_ERR_MSG_MOD(ext_ack, "Unsupported LAG Tx type");
> + return false;
> + }
> +
> + return true;
> +}
> +
> +static int prestera_netdev_port_event(struct net_device *lower,
> + struct net_device *dev,
> unsigned long event, void *ptr)
> {
> struct netdev_notifier_changeupper_info *info = ptr;
> + struct prestera_port *port = netdev_priv(dev);
> struct netlink_ext_ack *extack;
> struct net_device *upper;
> + int err;
>
> extack = netdev_notifier_info_to_extack(&info->info);
> upper = info->upper_dev;
>
> switch (event) {
> case NETDEV_PRECHANGEUPPER:
> - if (!netif_is_bridge_master(upper)) {
> + if (!netif_is_bridge_master(upper) &&
> + !netif_is_lag_master(upper)) {
No 8021q uppers allowed on Marvell Prestera switch ports?
> NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
> return -EINVAL;
> }
> @@ -531,12 +713,60 @@ static int prestera_netdev_port_event(struct net_device *dev,
> NL_SET_ERR_MSG_MOD(extack, "Upper device is already enslaved");
> return -EINVAL;
> }
> +
> + if (netif_is_lag_master(upper) &&
> + !prestera_lag_master_check(upper, info->upper_info, extack))
> + return -EINVAL;
-EOPNOTSUPP maybe?
In DSA we had a discussion and convened to do software fallback for
bonding modes that can't be offloaded, and just print an extack and
return 0. What is your take on that?
> + if (netif_is_lag_master(upper) && vlan_uses_dev(dev)) {
> + NL_SET_ERR_MSG_MOD(extack,
> + "Master device is a LAG master and port has a VLAN");
> + return -EINVAL;
> + }
> + if (netif_is_lag_port(dev) && is_vlan_dev(upper) &&
> + !netif_is_lag_master(vlan_dev_real_dev(upper))) {
> + NL_SET_ERR_MSG_MOD(extack,
> + "Can not put a VLAN on a LAG port");
> + return -EINVAL;
> + }
> break;
>
> case NETDEV_CHANGEUPPER:
> if (netif_is_bridge_master(upper))
> - return prestera_bridge_port_event(dev, event, ptr);
> + return prestera_bridge_port_event(lower, dev, event,
> + ptr);
> +
> + if (netif_is_lag_master(upper)) {
> + if (info->linking) {
> + err = prestera_lag_port_add(port, upper);
> + if (err)
> + return err;
> + } else {
> + prestera_lag_port_del(port);
> + }
> + }
> break;
> +
> + case NETDEV_CHANGELOWERSTATE:
> + return prestera_netdev_port_lower_event(dev, event, ptr);
> + }
> +
> + return 0;
> +}
> +
> +static int prestera_netdevice_lag_event(struct net_device *lag_dev,
> + unsigned long event, void *ptr)
> +{
> + struct net_device *dev;
> + struct list_head *iter;
> + int err;
> +
> + netdev_for_each_lower_dev(lag_dev, dev, iter) {
> + if (prestera_netdev_check(dev)) {
> + err = prestera_netdev_port_event(lag_dev, dev, event,
> + ptr);
> + if (err)
> + return err;
> + }
> }
>
> return 0;
> @@ -549,7 +779,9 @@ static int prestera_netdev_event_handler(struct notifier_block *nb,
> int err = 0;
>
> if (prestera_netdev_check(dev))
> - err = prestera_netdev_port_event(dev, event, ptr);
> + err = prestera_netdev_port_event(dev, dev, event, ptr);
> + else if (netif_is_lag_master(dev))
> + err = prestera_netdevice_lag_event(dev, event, ptr);
>
> return notifier_from_errno(err);
> }
> @@ -603,6 +835,10 @@ static int prestera_switch_init(struct prestera_switch *sw)
> if (err)
> goto err_dl_register;
>
> + err = prestera_lag_init(sw);
> + if (err)
> + goto err_lag_init;
> +
> err = prestera_create_ports(sw);
> if (err)
> goto err_ports_create;
> @@ -610,6 +846,8 @@ static int prestera_switch_init(struct prestera_switch *sw)
> return 0;
>
> err_ports_create:
> + prestera_lag_fini(sw);
> +err_lag_init:
> prestera_devlink_unregister(sw);
> err_dl_register:
> prestera_event_handlers_unregister(sw);
> @@ -627,6 +865,7 @@ static int prestera_switch_init(struct prestera_switch *sw)
> static void prestera_switch_fini(struct prestera_switch *sw)
> {
> prestera_destroy_ports(sw);
> + prestera_lag_fini(sw);
> prestera_devlink_unregister(sw);
> prestera_event_handlers_unregister(sw);
> prestera_rxtx_switch_fini(sw);
> diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
> index 7736d5f498c9..3750c66a550b 100644
> --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
> +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
> @@ -180,6 +180,45 @@ prestera_port_vlan_create(struct prestera_port *port, u16 vid, bool untagged)
> return ERR_PTR(err);
> }
>
> +static int prestera_fdb_add(struct prestera_port *port,
> + const unsigned char *mac, u16 vid, bool dynamic)
> +{
> + if (prestera_port_is_lag_member(port))
> + return prestera_hw_lag_fdb_add(port->sw, prestera_port_lag_id(port),
> + mac, vid, dynamic);
> + else
> + return prestera_hw_fdb_add(port, mac, vid, dynamic);
> +}
I think checkpatch tells you that "else" after "return" is not really
necessary.
> +
> +static int prestera_fdb_del(struct prestera_port *port,
> + const unsigned char *mac, u16 vid)
> +{
> + if (prestera_port_is_lag_member(port))
> + return prestera_hw_lag_fdb_del(port->sw, prestera_port_lag_id(port),
> + mac, vid);
> + else
> + return prestera_hw_fdb_del(port, mac, vid);
> +}
> +
> +static int prestera_fdb_flush_port_vlan(struct prestera_port *port, u16 vid,
> + u32 mode)
> +{
> + if (prestera_port_is_lag_member(port))
> + return prestera_hw_fdb_flush_lag_vlan(port->sw, prestera_port_lag_id(port),
> + vid, mode);
> + else
> + return prestera_hw_fdb_flush_port_vlan(port, vid, mode);
> +}
> +
> +static int prestera_fdb_flush_port(struct prestera_port *port, u32 mode)
> +{
> + if (prestera_port_is_lag_member(port))
> + return prestera_hw_fdb_flush_lag(port->sw, prestera_port_lag_id(port),
> + mode);
> + else
> + return prestera_hw_fdb_flush_port(port, mode);
> +}
> +
> static void
> prestera_port_vlan_bridge_leave(struct prestera_port_vlan *port_vlan)
> {
> @@ -199,11 +238,11 @@ prestera_port_vlan_bridge_leave(struct prestera_port_vlan *port_vlan)
> last_port = port_count == 1;
>
> if (last_vlan)
> - prestera_hw_fdb_flush_port(port, fdb_flush_mode);
> + prestera_fdb_flush_port(port, fdb_flush_mode);
> else if (last_port)
> prestera_hw_fdb_flush_vlan(port->sw, vid, fdb_flush_mode);
> else
> - prestera_hw_fdb_flush_port_vlan(port, vid, fdb_flush_mode);
> + prestera_fdb_flush_port_vlan(port, vid, fdb_flush_mode);
>
> list_del(&port_vlan->br_vlan_head);
> prestera_bridge_vlan_put(br_vlan);
> @@ -394,9 +433,9 @@ prestera_bridge_port_add(struct prestera_bridge *bridge, struct net_device *dev)
> }
>
> static int
> -prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port)
> +prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port,
> + struct prestera_port *port)
> {
> - struct prestera_port *port = netdev_priv(br_port->dev);
> struct prestera_bridge *bridge = br_port->bridge;
> int err;
>
> @@ -423,6 +462,7 @@ prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port)
> }
>
> static int prestera_port_bridge_join(struct prestera_port *port,
> + struct net_device *lower,
> struct net_device *upper)
> {
> struct prestera_switchdev *swdev = port->sw->swdev;
> @@ -437,7 +477,7 @@ static int prestera_port_bridge_join(struct prestera_port *port,
> return PTR_ERR(bridge);
> }
>
> - br_port = prestera_bridge_port_add(bridge, port->dev);
> + br_port = prestera_bridge_port_add(bridge, lower);
> if (IS_ERR(br_port)) {
> err = PTR_ERR(br_port);
> goto err_brport_create;
> @@ -446,7 +486,7 @@ static int prestera_port_bridge_join(struct prestera_port *port,
> if (bridge->vlan_enabled)
> return 0;
>
> - err = prestera_bridge_1d_port_join(br_port);
> + err = prestera_bridge_1d_port_join(br_port, port);
> if (err)
> goto err_port_join;
>
> @@ -459,19 +499,17 @@ static int prestera_port_bridge_join(struct prestera_port *port,
> return err;
> }
>
> -static void prestera_bridge_1q_port_leave(struct prestera_bridge_port *br_port)
> +static void prestera_bridge_1q_port_leave(struct prestera_bridge_port *br_port,
> + struct prestera_port *port)
> {
> - struct prestera_port *port = netdev_priv(br_port->dev);
> -
> - prestera_hw_fdb_flush_port(port, PRESTERA_FDB_FLUSH_MODE_ALL);
> + prestera_fdb_flush_port(port, PRESTERA_FDB_FLUSH_MODE_ALL);
> prestera_port_pvid_set(port, PRESTERA_DEFAULT_VID);
> }
>
> -static void prestera_bridge_1d_port_leave(struct prestera_bridge_port *br_port)
> +static void prestera_bridge_1d_port_leave(struct prestera_bridge_port *br_port,
> + struct prestera_port *port)
> {
> - struct prestera_port *port = netdev_priv(br_port->dev);
> -
> - prestera_hw_fdb_flush_port(port, PRESTERA_FDB_FLUSH_MODE_ALL);
> + prestera_fdb_flush_port(port, PRESTERA_FDB_FLUSH_MODE_ALL);
> prestera_hw_bridge_port_delete(port, br_port->bridge->bridge_id);
> }
>
> @@ -506,6 +544,7 @@ static int prestera_port_vid_stp_set(struct prestera_port *port, u16 vid,
> }
>
> static void prestera_port_bridge_leave(struct prestera_port *port,
> + struct net_device *lower,
> struct net_device *upper)
> {
> struct prestera_switchdev *swdev = port->sw->swdev;
> @@ -516,16 +555,16 @@ static void prestera_port_bridge_leave(struct prestera_port *port,
> if (!bridge)
> return;
>
> - br_port = __prestera_bridge_port_by_dev(bridge, port->dev);
> + br_port = __prestera_bridge_port_by_dev(bridge, lower);
> if (!br_port)
> return;
>
> bridge = br_port->bridge;
>
> if (bridge->vlan_enabled)
> - prestera_bridge_1q_port_leave(br_port);
> + prestera_bridge_1q_port_leave(br_port, port);
> else
> - prestera_bridge_1d_port_leave(br_port);
> + prestera_bridge_1d_port_leave(br_port, port);
>
> prestera_hw_port_learning_set(port, false);
> prestera_hw_port_flood_set(port, false);
> @@ -533,8 +572,8 @@ static void prestera_port_bridge_leave(struct prestera_port *port,
> prestera_bridge_port_put(br_port);
> }
>
> -int prestera_bridge_port_event(struct net_device *dev, unsigned long event,
> - void *ptr)
> +int prestera_bridge_port_event(struct net_device *lower, struct net_device *dev,
> + unsigned long event, void *ptr)
It's odd that you have a net_device lower and a net_device dev.
You're only using "dev" to retrieve the struct prestera_port, can't you
just pass that as parameter? It will also help avoid possible mistakes
in the future between lower (which can be a LAG or a port) and which is
associated with a struct prestera_bridge_port, and dev which is only a
port, and is associated with struct prestera_port.
Hi Jakub,
On Thu, Feb 04, 2021 at 09:19:34PM -0800, Jakub Kicinski wrote:
> On Wed, 3 Feb 2021 18:54:58 +0200 Vadym Kochan wrote:
> > For some reason there might be a crash during ports creation if port
> > events are handling at the same time because fw may send initial
> > port event with down state.
> >
> > The crash points to cancel_delayed_work() which is called when port went
> > is down. Currently I did not find out the real cause of the issue, so
> > fixed it by cancel port stats work only if previous port's state was up
> > & runnig.
>
> Maybe you just need to move the DELAYED_WORK_INIT() earlier?
> Not sure why it's at the end of prestera_port_create(), it
> just initializes some fields.
>
Thanks for suggestion, but it does not help. Will try to find-out the
real root cause but this is the only fix I 'v came up.
> > [ 28.489791] Call trace:
> > [ 28.492259] get_work_pool+0x48/0x60
> > [ 28.495874] cancel_delayed_work+0x38/0xb0
> > [ 28.500011] prestera_port_handle_event+0x90/0xa0 [prestera]
> > [ 28.505743] prestera_evt_recv+0x98/0xe0 [prestera]
> > [ 28.510683] prestera_fw_evt_work_fn+0x180/0x228 [prestera_pci]
> > [ 28.516660] process_one_work+0x1e8/0x360
> > [ 28.520710] worker_thread+0x44/0x480
> > [ 28.524412] kthread+0x154/0x160
> > [ 28.527670] ret_from_fork+0x10/0x38
> > [ 28.531290] Code: a8c17bfd d50323bf d65f03c0 9278dc21 (f9400020)
> > [ 28.537429] ---[ end trace 5eced933df3a080b ]---
> >
> > Signed-off-by: Vadym Kochan <[email protected]>
> > ---
> > drivers/net/ethernet/marvell/prestera/prestera_main.c | 3 ++-
> > 1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
> > index 39465e65d09b..122324dae47d 100644
> > --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
> > +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
> > @@ -433,7 +433,8 @@ static void prestera_port_handle_event(struct prestera_switch *sw,
> > netif_carrier_on(port->dev);
> > if (!delayed_work_pending(caching_dw))
> > queue_delayed_work(prestera_wq, caching_dw, 0);
> > - } else {
> > + } else if (netif_running(port->dev) &&
> > + netif_carrier_ok(port->dev)) {
> > netif_carrier_off(port->dev);
> > if (delayed_work_pending(caching_dw))
> > cancel_delayed_work(caching_dw);
>
On Thu, Feb 04, 2021 at 21:16, Jakub Kicinski <[email protected]> wrote:
> On Wed, 3 Feb 2021 18:54:56 +0200 Vadym Kochan wrote:
>> From: Serhiy Boiko <[email protected]>
>>
>> The following features are supported:
>>
>> - LAG basic operations
>> - create/delete LAG
>> - add/remove a member to LAG
>> - enable/disable member in LAG
>> - LAG Bridge support
>> - LAG VLAN support
>> - LAG FDB support
>>
>> Limitations:
>>
>> - Only HASH lag tx type is supported
>> - The Hash parameters are not configurable. They are applied
>> during the LAG creation stage.
>> - Enslaving a port to the LAG device that already has an
>> upper device is not supported.
>
> Tobias, Vladimir, you worked on LAG support recently, would you mind
> taking a look at this one?
Hi Jakub,
I took a quick look at it, and what I found left me very puzzled. I hope
you do not mind me asking a generic question about the policy around
switchdev drivers. If someone published a driver using something similar
to the following configuration flow:
iproute2 daemon(SDK)
| ^ |
: : : user/kernel boundary
v | |
netlink | |
| | |
v | |
driver | |
| | |
'--------' |
: kernel/hardware boundary
v
ASIC
My guess is that they would be (rightly IMO) told something along the
lines of "we do not accept drivers that are just shims for proprietary
SDKs".
But it seems like if that same someone has enough area to spare in their
ASIC to embed a CPU, it is perfectly fine to run that same SDK on it,
call it "firmware", and then push a shim driver into the kernel tree.
iproute2
|
: user/kernel boundary
v
netlink
|
v
driver
|
|
: kernel/hardware boundary
'-------------.
v
daemon(SDK)
|
v
ASIC
What have we, the community, gained by this? In the old world, the
vendor usually at least had to ship me the SDK in source form. Having
seen the inside of some of those sausage factories, they are not the
kinds of code bases that I want at the bottom of my stack; even less so
in binary form where I am entirely at the vendor's mercy for bugfixes.
We are talking about a pure Ethernet fabric here, so there is no fig
leaf of "regulatory requirements" to hide behind, in contrast to WiFi
for example.
Is it the opinion of the netdev community that it is OK for vendors to
use this model?
On Mon, 08 Feb 2021 20:54:29 +0100 Tobias Waldekranz wrote:
> On Thu, Feb 04, 2021 at 21:16, Jakub Kicinski <[email protected]> wrote:
> > On Wed, 3 Feb 2021 18:54:56 +0200 Vadym Kochan wrote:
> >> From: Serhiy Boiko <[email protected]>
> >>
> >> The following features are supported:
> >>
> >> - LAG basic operations
> >> - create/delete LAG
> >> - add/remove a member to LAG
> >> - enable/disable member in LAG
> >> - LAG Bridge support
> >> - LAG VLAN support
> >> - LAG FDB support
> >>
> >> Limitations:
> >>
> >> - Only HASH lag tx type is supported
> >> - The Hash parameters are not configurable. They are applied
> >> during the LAG creation stage.
> >> - Enslaving a port to the LAG device that already has an
> >> upper device is not supported.
> >
> > Tobias, Vladimir, you worked on LAG support recently, would you mind
> > taking a look at this one?
>
> I took a quick look at it, and what I found left me very puzzled. I hope
> you do not mind me asking a generic question about the policy around
> switchdev drivers. If someone published a driver using something similar
> to the following configuration flow:
>
> iproute2 daemon(SDK)
> | ^ |
> : : : user/kernel boundary
> v | |
> netlink | |
> | | |
> v | |
> driver | |
> | | |
> '--------' |
> : kernel/hardware boundary
> v
> ASIC
>
> My guess is that they would be (rightly IMO) told something along the
> lines of "we do not accept drivers that are just shims for proprietary
> SDKs".
>
> But it seems like if that same someone has enough area to spare in their
> ASIC to embed a CPU, it is perfectly fine to run that same SDK on it,
> call it "firmware", and then push a shim driver into the kernel tree.
>
> iproute2
> |
> : user/kernel boundary
> v
> netlink
> |
> v
> driver
> |
> |
> : kernel/hardware boundary
> '-------------.
> v
> daemon(SDK)
> |
> v
> ASIC
>
> What have we, the community, gained by this? In the old world, the
> vendor usually at least had to ship me the SDK in source form. Having
> seen the inside of some of those sausage factories, they are not the
> kinds of code bases that I want at the bottom of my stack; even less so
> in binary form where I am entirely at the vendor's mercy for bugfixes.
>
> We are talking about a pure Ethernet fabric here, so there is no fig
> leaf of "regulatory requirements" to hide behind, in contrast to WiFi
> for example.
>
> Is it the opinion of the netdev community that it is OK for vendors to
> use this model?
I ask myself that question pretty much every day. Sadly I have no clear
answer.
Silicon is cheap, you can embed a reasonable ARM or Risc-V core in the
chip for the area and power draw comparable to one high speed serdes
lane.
The drivers landing in the kernel are increasingly meaningless. My day
job is working for a hyperscaler. Even though we have one of the most
capable kernel teams on the planet most of issues with HW we face
result in "something is wrong with the FW, let's call the vendor".
And even when I say "drivers landing" it is an overstatement.
If you look at high speed anything these days the drivers cover
multiple generations of hardware, seems like ~5 years ago most
NIC vendors reached sufficient FW saturation to cover up differences
between HW generations.
At the same time some FW is necessary. Certain chip functions, are
best driven by a micro-controller running a tight control loop.
The complexity of FW is a spectrum, from basic to Qualcomm.
The problem is there is no way for us to know what FW is hiding
by just looking at the driver.
Where do we draw the line?
Personally I'd really like to see us pushing back stronger.
> > I took a quick look at it, and what I found left me very puzzled. I hope
> > you do not mind me asking a generic question about the policy around
> > switchdev drivers. If someone published a driver using something similar
> > to the following configuration flow:
> >
> > iproute2 daemon(SDK)
> > | ^ |
> > : : : user/kernel boundary
> > v | |
> > netlink | |
> > | | |
> > v | |
> > driver | |
> > | | |
> > '--------' |
> > : kernel/hardware boundary
> > v
> > ASIC
> >
> > My guess is that they would be (rightly IMO) told something along the
> > lines of "we do not accept drivers that are just shims for proprietary
> > SDKs".
> >
> > But it seems like if that same someone has enough area to spare in their
> > ASIC to embed a CPU, it is perfectly fine to run that same SDK on it,
> > call it "firmware", and then push a shim driver into the kernel tree.
> >
> > iproute2
> > |
> > : user/kernel boundary
> > v
> > netlink
> > |
> > v
> > driver
> > |
> > |
> > : kernel/hardware boundary
> > '-------------.
> > v
> > daemon(SDK)
> > |
> > v
> > ASIC
> >
> > What have we, the community, gained by this? In the old world, the
> > vendor usually at least had to ship me the SDK in source form. Having
> > seen the inside of some of those sausage factories, they are not the
> > kinds of code bases that I want at the bottom of my stack; even less so
> > in binary form where I am entirely at the vendor's mercy for bugfixes.
> >
> > We are talking about a pure Ethernet fabric here, so there is no fig
> > leaf of "regulatory requirements" to hide behind, in contrast to WiFi
> > for example.
> >
> > Is it the opinion of the netdev community that it is OK for vendors to
> > use this model?
What i find interesting is the comparison between Microchip Sparx5 and
Marvell Prestera. They offer similar capabilities. Both have a CPU on
them. As you say Marvell is pushing their SDK into this CPU, black
box. Microchip decided to open everything, no firmware, the kernel
driver is directly accessing the hardware, the datasheet is available,
and microchip engineers are here on the list.
I really hope that Sparx5 takes off, and displaces Prestera. In terms
of being able to solve issues, we the community can work with
Sparx5. Prestera is too much a black box.
Andrew
On Mon, Feb 08, 2021 at 13:05, Jakub Kicinski <[email protected]> wrote:
> On Mon, 08 Feb 2021 20:54:29 +0100 Tobias Waldekranz wrote:
>> On Thu, Feb 04, 2021 at 21:16, Jakub Kicinski <[email protected]> wrote:
>> > On Wed, 3 Feb 2021 18:54:56 +0200 Vadym Kochan wrote:
>> >> From: Serhiy Boiko <[email protected]>
>> >>
>> >> The following features are supported:
>> >>
>> >> - LAG basic operations
>> >> - create/delete LAG
>> >> - add/remove a member to LAG
>> >> - enable/disable member in LAG
>> >> - LAG Bridge support
>> >> - LAG VLAN support
>> >> - LAG FDB support
>> >>
>> >> Limitations:
>> >>
>> >> - Only HASH lag tx type is supported
>> >> - The Hash parameters are not configurable. They are applied
>> >> during the LAG creation stage.
>> >> - Enslaving a port to the LAG device that already has an
>> >> upper device is not supported.
>> >
>> > Tobias, Vladimir, you worked on LAG support recently, would you mind
>> > taking a look at this one?
>>
>> I took a quick look at it, and what I found left me very puzzled. I hope
>> you do not mind me asking a generic question about the policy around
>> switchdev drivers. If someone published a driver using something similar
>> to the following configuration flow:
>>
>> iproute2 daemon(SDK)
>> | ^ |
>> : : : user/kernel boundary
>> v | |
>> netlink | |
>> | | |
>> v | |
>> driver | |
>> | | |
>> '--------' |
>> : kernel/hardware boundary
>> v
>> ASIC
>>
>> My guess is that they would be (rightly IMO) told something along the
>> lines of "we do not accept drivers that are just shims for proprietary
>> SDKs".
>>
>> But it seems like if that same someone has enough area to spare in their
>> ASIC to embed a CPU, it is perfectly fine to run that same SDK on it,
>> call it "firmware", and then push a shim driver into the kernel tree.
>>
>> iproute2
>> |
>> : user/kernel boundary
>> v
>> netlink
>> |
>> v
>> driver
>> |
>> |
>> : kernel/hardware boundary
>> '-------------.
>> v
>> daemon(SDK)
>> |
>> v
>> ASIC
>>
>> What have we, the community, gained by this? In the old world, the
>> vendor usually at least had to ship me the SDK in source form. Having
>> seen the inside of some of those sausage factories, they are not the
>> kinds of code bases that I want at the bottom of my stack; even less so
>> in binary form where I am entirely at the vendor's mercy for bugfixes.
>>
>> We are talking about a pure Ethernet fabric here, so there is no fig
>> leaf of "regulatory requirements" to hide behind, in contrast to WiFi
>> for example.
>>
>> Is it the opinion of the netdev community that it is OK for vendors to
>> use this model?
>
> I ask myself that question pretty much every day. Sadly I have no clear
> answer.
Thank you for your candid answer, really appreciate it. I do not envy
you one bit, making those decisions must be extremely hard.
> Silicon is cheap, you can embed a reasonable ARM or Risc-V core in the
> chip for the area and power draw comparable to one high speed serdes
> lane.
>
> The drivers landing in the kernel are increasingly meaningless. My day
> job is working for a hyperscaler. Even though we have one of the most
> capable kernel teams on the planet most of issues with HW we face
> result in "something is wrong with the FW, let's call the vendor".
Right, and being a hyperscaler probably at least gets you some attention
when you call your vendor. My day job is working for a nanoscaler, so my
experience is that we must be prepared to solve all issues in-house; if
we get any help from the vendor that is just a bonus.
> And even when I say "drivers landing" it is an overstatement.
> If you look at high speed anything these days the drivers cover
> multiple generations of hardware, seems like ~5 years ago most
> NIC vendors reached sufficient FW saturation to cover up differences
> between HW generations.
>
> At the same time some FW is necessary. Certain chip functions, are
> best driven by a micro-controller running a tight control loop.
I agree. But I still do not understand why vendors cling to the source
of these like it was their wallet. That is the beauty of selling
silicon; you can fully leverage OSS and still have a very straight
forward business model.
> The complexity of FW is a spectrum, from basic to Qualcomm.
> The problem is there is no way for us to know what FW is hiding
> by just looking at the driver.
>
> Where do we draw the line?
Yeah it is a very hard problem. In this particular case though, the
vendor explicitly said that what they have done is compiled their
existing SDK to run on the ASIC:
https://lore.kernel.org/netdev/BN6PR18MB1587EB225C6B80BF35A44EBFBA5A0@BN6PR18MB1587.namprd18.prod.outlook.com
So there is no reason that it could not be done as a proper driver.
> Personally I'd really like to see us pushing back stronger.
Hear, hear!
On Mon, Feb 08, 2021 at 23:30, Andrew Lunn <[email protected]> wrote:
>> > I took a quick look at it, and what I found left me very puzzled. I hope
>> > you do not mind me asking a generic question about the policy around
>> > switchdev drivers. If someone published a driver using something similar
>> > to the following configuration flow:
>> >
>> > iproute2 daemon(SDK)
>> > | ^ |
>> > : : : user/kernel boundary
>> > v | |
>> > netlink | |
>> > | | |
>> > v | |
>> > driver | |
>> > | | |
>> > '--------' |
>> > : kernel/hardware boundary
>> > v
>> > ASIC
>> >
>> > My guess is that they would be (rightly IMO) told something along the
>> > lines of "we do not accept drivers that are just shims for proprietary
>> > SDKs".
>> >
>> > But it seems like if that same someone has enough area to spare in their
>> > ASIC to embed a CPU, it is perfectly fine to run that same SDK on it,
>> > call it "firmware", and then push a shim driver into the kernel tree.
>> >
>> > iproute2
>> > |
>> > : user/kernel boundary
>> > v
>> > netlink
>> > |
>> > v
>> > driver
>> > |
>> > |
>> > : kernel/hardware boundary
>> > '-------------.
>> > v
>> > daemon(SDK)
>> > |
>> > v
>> > ASIC
>> >
>> > What have we, the community, gained by this? In the old world, the
>> > vendor usually at least had to ship me the SDK in source form. Having
>> > seen the inside of some of those sausage factories, they are not the
>> > kinds of code bases that I want at the bottom of my stack; even less so
>> > in binary form where I am entirely at the vendor's mercy for bugfixes.
>> >
>> > We are talking about a pure Ethernet fabric here, so there is no fig
>> > leaf of "regulatory requirements" to hide behind, in contrast to WiFi
>> > for example.
>> >
>> > Is it the opinion of the netdev community that it is OK for vendors to
>> > use this model?
>
> What i find interesting is the comparison between Microchip Sparx5 and
> Marvell Prestera. They offer similar capabilities. Both have a CPU on
> them. As you say Marvell is pushing their SDK into this CPU, black
> box. Microchip decided to open everything, no firmware, the kernel
> driver is directly accessing the hardware, the datasheet is available,
> and microchip engineers are here on the list.
Indeed, it is a very stark difference in approach. Perhaps a silly
example, but it speaks to their developer focus, just the fact that they
have an online register reference on GitHub[1] amazed me. What a breath
of fresh air! ...and speaks to the general state of things, I guess :)
Unsurprisingly the team behind it are also really great to work with!
> I really hope that Sparx5 takes off, and displaces Prestera. In terms
We are certainly keeping our eyes on it!
> of being able to solve issues, we the community can work with
> Sparx5. Prestera is too much a black box.
I would only add that I still, perhaps naively, hope Marvell will
eventually see the benefits of having a truly open driver.
> Andrew
[1]: https://microchip-ung.github.io/sparx-5_reginfo/reginfo_sparx-5.html
> At the same time some FW is necessary. Certain chip functions, are
> best driven by a micro-controller running a tight control loop.
For a smart NIC, i could agree. But a switch? The data path is in
hardware. The driver is all about configuring this hardware, and then
it is idle. Polls the PHYs once a second, maybe gather statistics,
allows the network stack to perform STP, but otherwise it does
nothing.
So for me, i don't see that being a valid argument for this driver.
By putting their SDK inside the CPU on the switch, and adding an RPC
interface, Marvell can quickly get some sort of support working in the
Linux ecosystem. But this solution has all the problems of a binary
blob in userspace.
I doubt there is going to be any community engagement with this
driver. Marvell is going to have to add all the features. If a user
wants a feature which is not currently supported, they have little
chance of being able to add it themselves. There is no documentation
of the RPC interface. So even if the firmware has support for more
than what the Linux driver implements, only Marvell knows about it.
Products based around this driver are going to find it hard to
differentiate on switch features. The switch can do what Marvell
allows you to do. All differentiation is going to be limited to above
that, the user interface.
For some market segments, that might be enough. You don't see
community based patches adding new features to the Mellanex/nvidia
hardware. But when you look at the DSA drivers, a lot of the features
there are from the community. There is probably space for both.
Looking into my crystal ball, Marvell will probably have the base
features of their switch implemented before Microchip does, simply
because they are reusing code hidden away in the CPU. But then
development will stagnate. Microchip will take a bit longer to get the
base features implemented. But then because of the openness, users
will start using the hardware in different ways, and implement
features which are important to them. And contribute bug fixes. The
driver will keep gaining new features and mature, and in the end, the
device built from it will be a lot more divers and interesting.
What i'm not sure is how we as a community push back. Marvells whole
strategy is black box. I doubt we can make them open up the firmware.
Do we want to throw out the driver from the kernel? I don't think it
is that bad. We can point out the problems with Marvell's model. We
can put in review effort for Microchip, make their driver better. And
we can encourage the 3rd and 4th vendors in the enterprise switch
space to follow Microchips lead.
Andrew
On Tue, 9 Feb 2021 14:58:26 +0100 Andrew Lunn wrote:
> > At the same time some FW is necessary. Certain chip functions, are
> > best driven by a micro-controller running a tight control loop.
>
> For a smart NIC, i could agree. But a switch? The data path is in
> hardware. The driver is all about configuring this hardware, and then
> it is idle. Polls the PHYs once a second, maybe gather statistics,
> allows the network stack to perform STP, but otherwise it does
> nothing.
>
> So for me, i don't see that being a valid argument for this driver.
>
> By putting their SDK inside the CPU on the switch, and adding an RPC
> interface, Marvell can quickly get some sort of support working in the
> Linux ecosystem. But this solution has all the problems of a binary
> blob in userspace.
>
> I doubt there is going to be any community engagement with this
> driver. Marvell is going to have to add all the features. If a user
> wants a feature which is not currently supported, they have little
> chance of being able to add it themselves. There is no documentation
> of the RPC interface. So even if the firmware has support for more
> than what the Linux driver implements, only Marvell knows about it.
>
> Products based around this driver are going to find it hard to
> differentiate on switch features. The switch can do what Marvell
> allows you to do. All differentiation is going to be limited to above
> that, the user interface.
>
> For some market segments, that might be enough. You don't see
> community based patches adding new features to the Mellanex/nvidia
> hardware. But when you look at the DSA drivers, a lot of the features
> there are from the community. There is probably space for both.
>
> Looking into my crystal ball, Marvell will probably have the base
> features of their switch implemented before Microchip does, simply
> because they are reusing code hidden away in the CPU. But then
> development will stagnate. Microchip will take a bit longer to get the
> base features implemented. But then because of the openness, users
> will start using the hardware in different ways, and implement
> features which are important to them. And contribute bug fixes. The
> driver will keep gaining new features and mature, and in the end, the
> device built from it will be a lot more divers and interesting.
>
> What i'm not sure is how we as a community push back. Marvells whole
> strategy is black box. I doubt we can make them open up the firmware.
> Do we want to throw out the driver from the kernel? I don't think it
> is that bad. We can point out the problems with Marvell's model. We
> can put in review effort for Microchip, make their driver better. And
> we can encourage the 3rd and 4th vendors in the enterprise switch
> space to follow Microchips lead.
Sounds like we have 3 people who don't like FW-heavy designs dominating
the kernel - this conversation can only go one way.
Marvell, Plvision anything to share? AFAIU the values of Linux kernel
are open source, healthy community, empowering users. With the SDK on
the embedded CPU your driver does not seem to tick any of these boxes.
On Tue, 09 Feb 2021 12:56:55 +0100 Tobias Waldekranz wrote:
> > I ask myself that question pretty much every day. Sadly I have no clear
> > answer.
>
> Thank you for your candid answer, really appreciate it. I do not envy
> you one bit, making those decisions must be extremely hard.
>
> > Silicon is cheap, you can embed a reasonable ARM or Risc-V core in the
> > chip for the area and power draw comparable to one high speed serdes
> > lane.
> >
> > The drivers landing in the kernel are increasingly meaningless. My day
> > job is working for a hyperscaler. Even though we have one of the most
> > capable kernel teams on the planet most of issues with HW we face
> > result in "something is wrong with the FW, let's call the vendor".
>
> Right, and being a hyperscaler probably at least gets you some attention
> when you call your vendor. My day job is working for a nanoscaler, so my
> experience is that we must be prepared to solve all issues in-house; if
> we get any help from the vendor that is just a bonus.
>
> > And even when I say "drivers landing" it is an overstatement.
> > If you look at high speed anything these days the drivers cover
> > multiple generations of hardware, seems like ~5 years ago most
> > NIC vendors reached sufficient FW saturation to cover up differences
> > between HW generations.
> >
> > At the same time some FW is necessary. Certain chip functions, are
> > best driven by a micro-controller running a tight control loop.
>
> I agree. But I still do not understand why vendors cling to the source
> of these like it was their wallet. That is the beauty of selling
> silicon; you can fully leverage OSS and still have a very straight
> forward business model.
Vendors want to be able to "add value", lock users in and sell support.
Users adding features themselves hurts their bottom line. Take a look
at income breakdown for publicly traded companies. There were also
rumors recently about certain huge silicon vendor revoking the SDK
license from a NOS company after that company got bought.
Business people make rational choices, trust me. It's on us to make
rational choices in the interest of the community (incl. our users).
> > The complexity of FW is a spectrum, from basic to Qualcomm.
> > The problem is there is no way for us to know what FW is hiding
> > by just looking at the driver.
> >
> > Where do we draw the line?
>
> Yeah it is a very hard problem. In this particular case though, the
> vendor explicitly said that what they have done is compiled their
> existing SDK to run on the ASIC:
>
> https://lore.kernel.org/netdev/BN6PR18MB1587EB225C6B80BF35A44EBFBA5A0@BN6PR18MB1587.namprd18.prod.outlook.com
>
> So there is no reason that it could not be done as a proper driver.
I guess you meant "no _technical_ reason" ;)
> > Personally I'd really like to see us pushing back stronger.
>
> Hear, hear!
Hi Andrew, Jakub, Tobias,
On Tuesday, February 9, 2021 7:35 PM Jakub Kicinski wrote:
> Sounds like we have 3 people who don't like FW-heavy designs dominating the kernel - this conversation can only go one way.
> Marvell, Plvision anything to share? AFAIU the values of Linux kernel are open source, healthy community, empowering users. With the SDK on the embedded CPU your driver does not seem to tick any of these boxes.
I'll try to share Marvell's insight and plans regarding our Prestera drivers;
We do understand the importance and the vision behind the open-source community - while being committed to quality, functionality and the developers/end-users.
We started working on the Prestera driver in Q2 2019. it took us more than a year to get the first approved driver into 5.10, and we just started.
Right at the beginning - we implemented PP function into the Kernel driver like the SDMA operation (This is the RX/TX DMA engine).
Yet, the FW itself - is an SW package that supports many Marvell Prestera Switching families of devices - this is a significant SW package that will take many working years to adapt to the Kernel environment.
We do plan to port more and more PP functions as Kernel drivers along the way.
We also are working with the community to extend Kernel functionality with a new feature beneficial to all Kernel users (e.g. Devlink changes) and we will continue to do it.
By extending the Prestera driver to in-kernel implementation with more PP features - we will simplify the FW logic and enables cost-effective solutions to the market/developers.
Regards,
Mickey.
On Tue, Feb 09, 2021 at 20:31, Mickey Rachamim <[email protected]> wrote:
> Hi Andrew, Jakub, Tobias,
>
> On Tuesday, February 9, 2021 7:35 PM Jakub Kicinski wrote:
>> Sounds like we have 3 people who don't like FW-heavy designs dominating the kernel - this conversation can only go one way.
>> Marvell, Plvision anything to share? AFAIU the values of Linux kernel are open source, healthy community, empowering users. With the SDK on the embedded CPU your driver does not seem to tick any of these boxes.
>
> I'll try to share Marvell's insight and plans regarding our Prestera drivers;
>
> We do understand the importance and the vision behind the open-source community - while being committed to quality, functionality and the developers/end-users.
>
> We started working on the Prestera driver in Q2 2019. it took us more than a year to get the first approved driver into 5.10, and we just started.
> Right at the beginning - we implemented PP function into the Kernel driver like the SDMA operation (This is the RX/TX DMA engine).
> Yet, the FW itself - is an SW package that supports many Marvell Prestera Switching families of devices - this is a significant SW package that will take many working years to adapt to the Kernel environment.
> We do plan to port more and more PP functions as Kernel drivers along the way.
This is very encouraging to hear. I understand that it is a massive
undertaking.
> We also are working with the community to extend Kernel functionality with a new feature beneficial to all Kernel users (e.g. Devlink changes) and we will continue to do it.
> By extending the Prestera driver to in-kernel implementation with more PP features - we will simplify the FW logic and enables cost-effective solutions to the market/developers.
Until that day arrives, are there any chances of Marvell opening up CPSS
in the same way DSDT was re-licensed some years back?
Being able to clone github.com/Marvell-switching/prestera-firmware (or
whatever) and build the firmware from source would go a long way to
alleviate my fears at least.
In such a world, I at least have a chance of debugging any issue all the
way to the bottom of the stack. It would also make it possible for the
community to help out with the porting effort.
> Regards,
> Mickey.
> Right at the beginning - we implemented PP function into the Kernel
> driver like the SDMA operation (This is the RX/TX DMA engine).
> We do plan to port more and more PP functions as Kernel drivers
> along the way.
It will be interesting to see how well you manage to handle the 'split
brain' problem.
DMA packets to/from the host is pretty isolated from the rest of the
driver. Look at DSA, it has completely separate drivers. But when you
start having parts of the control plain in the driver poking switch
registers, and parts of the control plane in the SDK poking registers,
you have an interesting synchronisation problem.
I guess stats would be a good place to start. Throw away the current
code making an RPC into the SDK, and just directly get the values from
the registers. No real synchronisation problems there. In fact, most
of the ethtool get API calls should be reasonably easy to do via
direct hardware access, rather than using the SDK RPC. Getting values
like that should be easy to synchronise.
Andrew
> Until that day arrives, are there any chances of Marvell opening up CPSS in the same way DSDT was re-licensed some years back?
The CPSS code is available to everyone on Marvell Extranet (Requires simple registration process)
Anyway, as the transition process will progress - it will be less required.
> Being able to clone github.com/Marvell-switching/prestera-firmware (or
> whatever) and build the firmware from source would go a long way to alleviate my fears at least.
I understand your concerns but at this stage - we also concerned about others that might build not reliable FW images.
I also agree that at some point we should ensure most of the concerns are being addressed.
Mickey.
> It will be interesting to see how well you manage to handle the 'split brain' problem.
Right ???? this is the challenge per each feature to ensure no "register" corruption.
The PP itself provides us the right facilities and by driver-wise - we refactoring the driver almost from scratch.
> I guess stats would be a good place to start...
Right, mostly the MAC MIB counters.
Mickey
On Tue, 9 Feb 2021 20:31:32 +0000 Mickey Rachamim wrote:
> On Tuesday, February 9, 2021 7:35 PM Jakub Kicinski wrote:
> > Sounds like we have 3 people who don't like FW-heavy designs dominating the kernel - this conversation can only go one way.
> > Marvell, Plvision anything to share? AFAIU the values of Linux kernel are open source, healthy community, empowering users. With the SDK on the embedded CPU your driver does not seem to tick any of these boxes.
>
> I'll try to share Marvell's insight and plans regarding our Prestera drivers;
>
> We do understand the importance and the vision behind the open-source
> community - while being committed to quality, functionality and the
> developers/end-users.
>
> We started working on the Prestera driver in Q2 2019. it took us more
> than a year to get the first approved driver into 5.10, and we just
> started. Right at the beginning - we implemented PP function into the
> Kernel driver like the SDMA operation (This is the RX/TX DMA engine).
> Yet, the FW itself - is an SW package that supports many Marvell
> Prestera Switching families of devices - this is a significant SW
> package that will take many working years to adapt to the Kernel
> environment. We do plan to port more and more PP functions as Kernel
> drivers along the way.
Okay, so it sounds like there are no technical reason for you to keep
the SDK. My guess is also that you have a large customer who is
expecting you to provide upstream integration, hence the contractors
and taking the easiest way out.
> We also are working with the community to extend Kernel functionality
> with a new feature beneficial to all Kernel users (e.g. Devlink
> changes) and we will continue to do it.
Ah, devlink, every vendor's favorite interface. I keep my fingers
crossed that you're not just talking about exposing a bunch of
implementation-specific params, traps etc.
> By extending the Prestera driver to in-kernel implementation with
> more PP features - we will simplify the FW logic and enables
> cost-effective solutions to the market/developers.
On 09.02.2021 19:35, Jakub Kicinski wrote:
>
> Sounds like we have 3 people who don't like FW-heavy designs dominating
> the kernel - this conversation can only go one way.
>
> Marvell, Plvision anything to share? AFAIU the values of Linux kernel
> are open source, healthy community, empowering users. With the SDK on
> the embedded CPU your driver does not seem to tick any of these boxes.
>
I agree that FW-less solution has many advantages that enable the community
to engage in its development actively. We have continuance discussions with
Marvell and as Mickey stated, more PP modules will be managed from in-kernel
code and not from FW allowing kernel developers to extend/improve it.
On Wed, Feb 10, 2021 at 10:41, Mickey Rachamim <[email protected]> wrote:
>> Until that day arrives, are there any chances of Marvell opening up CPSS in the same way DSDT was re-licensed some years back?
> The CPSS code is available to everyone on Marvell Extranet (Requires simple registration process)
Right, but "available" is not the same as "open" unfortunately. Being
able to study the source is better than nothing, but it is a far cry
from having the ability to modify it and, most importantly, publish
those changes.
So, to restate my question more precisely: Can we expect that Marvell
will provide CPSS under a license that is compatible with the Linux
kernel?
If that is not possible, will Marvell at least commit to allow the
publishing of drivers developed from functional specifications and other
chip documentation?
> Anyway, as the transition process will progress - it will be less required.
Yes, but it makes it hard for smaller players to get on the ride early.
>> Being able to clone github.com/Marvell-switching/prestera-firmware (or
>> whatever) and build the firmware from source would go a long way to alleviate my fears at least.
> I understand your concerns but at this stage - we also concerned about others that might build not reliable FW images.
Totally fair. That problem should be solvable by some kind of taint
concept though. Presumably you have this problem already with the
existing SDK model? Customers can build things on top of CPSS that are
broken in a million ways.
> I also agree that at some point we should ensure most of the concerns are being addressed.
>
> Mickey.