From: Vadim Lomovtsev <[email protected]>
By default CN88XX BGX accepts all incoming multicast and broadcast
packets and filtering is disabled. The nic driver doesn't provide
an ability to change such behaviour.
This series is to implement DMAC filtering management for CN88XX
nic driver allowing user to enable/disable filtering and configure
specific MAC addresses to filter traffic.
Vadim Lomovtsev (7):
net: thunderx: move filter register related macro into proper place
net: thunderx: add MAC address filter tracking for LMAC
net: thunderx: add multicast filter management support
net: thunderx: add new messages for handle ndo_set_rx_mode callback
net: thunderx: add XCAST messages handlers for PF
net: thunderx: add workqueue control structures for handle
ndo_set_rx_mode request
net: thunderx: add ndo_set_rx_mode callback implementation for VF
drivers/net/ethernet/cavium/thunder/nic.h | 29 ++++
drivers/net/ethernet/cavium/thunder/nic_main.c | 45 ++++-
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 108 +++++++++++-
drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 196 ++++++++++++++++++++--
drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 17 +-
5 files changed, 366 insertions(+), 29 deletions(-)
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
The kernel calls ndo_set_rx_mode() callback supplying it will all necessary
info, such as device state flags, multicast mac addresses list and so on.
Since we have only 128 bits to communicate with PF we need to initiate
several requests to PF with small/short operation each based on input data.
So this commit implements following PF messages codes along with new
data structures for them:
NIC_MBOX_MSG_RESET_XCAST to flush all filters configured for this
particular network interface (VF)
NIC_MBOX_MSG_ADD_MCAST to add new MAC address to DMAC filter registers
for this particular network interface (VF)
NIC_MBOX_MSG_SET_XCAST to apply filtering configuration to filter control
register
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 4cacce5d2b16..069289b4f968 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -403,6 +403,9 @@ struct nicvf {
#define NIC_MBOX_MSG_PTP_CFG 0x19 /* HW packet timestamp */
#define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */
#define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */
+#define NIC_MBOX_MSG_RESET_XCAST 0xF2 /* Reset DCAM filtering mode */
+#define NIC_MBOX_MSG_ADD_MCAST 0xF3 /* Add MAC to DCAM filters */
+#define NIC_MBOX_MSG_SET_XCAST 0xF4 /* Set MCAST/BCAST RX mode */
struct nic_cfg_msg {
u8 msg;
@@ -556,6 +559,14 @@ struct set_ptp {
bool enable;
};
+struct xcast {
+ u8 msg;
+ union {
+ u8 mode;
+ u64 mac;
+ } data;
+};
+
/* 128 bit shared memory between PF and each VF */
union nic_mbx {
struct { u8 msg; } msg;
@@ -576,6 +587,7 @@ union nic_mbx {
struct reset_stat_cfg reset_stat;
struct pfc pfc;
struct set_ptp ptp;
+ struct xcast xcast;
};
#define NIC_NODE_ID_MASK 0x03
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
This commit is to add message handling for ndo_set_rx_mode()
callback at PF side.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic_main.c | 45 +++++++++++++++++++++++---
1 file changed, 41 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 7ff66a8194e2..55af04fa03a7 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -21,6 +21,8 @@
#define DRV_NAME "nicpf"
#define DRV_VERSION "1.0"
+#define NIC_VF_PER_MBX_REG 64
+
struct hw_info {
u8 bgx_cnt;
u8 chans_per_lmac;
@@ -1072,6 +1074,40 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
case NIC_MBOX_MSG_PTP_CFG:
nic_config_timestamp(nic, vf, &mbx.ptp);
break;
+ case NIC_MBOX_MSG_RESET_XCAST:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ bgx_reset_xcast_mode(nic->node, bgx, lmac,
+ vf < NIC_VF_PER_MBX_REG ? vf :
+ vf - NIC_VF_PER_MBX_REG);
+ break;
+
+ case NIC_MBOX_MSG_ADD_MCAST:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ bgx_set_dmac_cam_filter(nic->node, bgx, lmac,
+ mbx.xcast.data.mac,
+ vf < NIC_VF_PER_MBX_REG ? vf :
+ vf - NIC_VF_PER_MBX_REG);
+ break;
+
+ case NIC_MBOX_MSG_SET_XCAST:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.data.mode);
+ break;
default:
dev_err(&nic->pdev->dev,
"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1094,7 +1130,7 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
struct nicpf *nic = (struct nicpf *)nic_irq;
int mbx;
u64 intr;
- u8 vf, vf_per_mbx_reg = 64;
+ u8 vf;
if (irq == pci_irq_vector(nic->pdev, NIC_PF_INTR_ID_MBOX0))
mbx = 0;
@@ -1103,12 +1139,13 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3));
dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr);
- for (vf = 0; vf < vf_per_mbx_reg; vf++) {
+ for (vf = 0; vf < NIC_VF_PER_MBX_REG; vf++) {
if (intr & (1ULL << vf)) {
dev_dbg(&nic->pdev->dev, "Intr from VF %d\n",
- vf + (mbx * vf_per_mbx_reg));
+ vf + (mbx * NIC_VF_PER_MBX_REG));
- nic_handle_mbx_intr(nic, vf + (mbx * vf_per_mbx_reg));
+ nic_handle_mbx_intr(nic, vf +
+ (mbx * NIC_VF_PER_MBX_REG));
nic_clear_mbx_intr(nic, vf, mbx);
}
}
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
The ThunderX NIC has set of registers which allows to configure
filter policy for ingress packets. There are three possible regimes
of filtering multicasts, broadcasts and unicasts: accept all, reject all
and accept filter allowed only.
Current implementation has enum with all of them and two generic macro
for enabling filtering et all (CAM_ACCEPT) and enabling/disabling
broadcast packets, which also should be corrected in order to represent
register bits properly. All these values are private for driver and
there is no need to ‘publish’ them via header file.
This commit is to move filtering register manipulation values from
header file into source with explicit assignment of exact register
values to them to be used while register configuring.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 13 +++++++++++++
drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 11 -----------
2 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 91d34ea40e2c..0dd211605eb1 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -24,6 +24,19 @@
#define DRV_NAME "thunder_bgx"
#define DRV_VERSION "1.0"
+/* RX_DMAC_CTL configuration */
+enum MCAST_MODE {
+ MCAST_MODE_REJECT = 0x0,
+ MCAST_MODE_ACCEPT = 0x1,
+ MCAST_MODE_CAM_FILTER = 0x2,
+ RSVD = 0x3
+};
+
+#define BCAST_ACCEPT BIT(0)
+#define CAM_ACCEPT BIT(3)
+#define MCAST_MODE_MASK 0x3
+#define BGX_MCAST_MODE(x) (x << 1)
+
struct lmac {
struct bgx *bgx;
int dmac;
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 5a7567d31138..52439da62c97 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -205,17 +205,6 @@
#define LMAC_INTR_LINK_UP BIT(0)
#define LMAC_INTR_LINK_DOWN BIT(1)
-/* RX_DMAC_CTL configuration*/
-enum MCAST_MODE {
- MCAST_MODE_REJECT,
- MCAST_MODE_ACCEPT,
- MCAST_MODE_CAM_FILTER,
- RSVD
-};
-
-#define BCAST_ACCEPT 1
-#define CAM_ACCEPT 1
-
void octeon_mdiobus_force_mod_depencency(void);
void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
The kernel calls ndo_set_rx_mode() callback from atomic context which
causes messaging timeouts between VF and PF (as they’re implemented
via MSIx). So in order to handle ndo_set_rx_mode() we need to get rid of it.
This commit implements necessary workqueue related structures to let VF
queue kernel request processing in non-atomic context later.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 069289b4f968..5fc46c5a4f36 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -265,6 +265,22 @@ struct nicvf_drv_stats {
struct cavium_ptp;
+struct xcast_addr {
+ struct list_head list;
+ u64 addr;
+};
+
+struct xcast_addr_list {
+ struct list_head list;
+ int count;
+};
+
+struct nicvf_work {
+ struct delayed_work work;
+ u8 mode;
+ struct xcast_addr_list *mc;
+};
+
struct nicvf {
struct nicvf *pnicvf;
struct net_device *netdev;
@@ -313,6 +329,7 @@ struct nicvf {
struct nicvf_pfc pfc;
struct tasklet_struct qs_err_task;
struct work_struct reset_task;
+ struct nicvf_work rx_mode_work;
/* PTP timestamp */
struct cavium_ptp *ptp_clock;
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
The ndo_set_rx_mode() is called from atomic context which causes
messages response timeouts while VF to PF communication via MSIx.
To get rid of that we're copy passed mc list, parse flags and queue
handling of kernel request to ordered workqueue.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 108 ++++++++++++++++++++++-
1 file changed, 107 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 7d9c5ffbd041..0d4bec927623 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -21,6 +21,7 @@
#include <linux/bpf_trace.h>
#include <linux/filter.h>
#include <linux/net_tstamp.h>
+#include <linux/workqueue.h>
#include "nic_reg.h"
#include "nic.h"
@@ -67,6 +68,9 @@ module_param(cpi_alg, int, S_IRUGO);
MODULE_PARM_DESC(cpi_alg,
"PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
+/* workqueue for handling kernel ndo_set_rx_mode() calls */
+static struct workqueue_struct *nicvf_rx_mode_wq;
+
static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
{
if (nic->sqs_mode)
@@ -1919,6 +1923,98 @@ static int nicvf_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
}
}
+static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
+{
+ struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
+ work.work);
+ struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
+ union nic_mbx mbx = {};
+ struct xcast_addr *xaddr, *next;
+
+ if (!vf_work)
+ return;
+
+ /* From the inside of VM code flow we have only 128 bits memory
+ * available to send message to host's PF, so send all mc addrs
+ * one by one, starting from flush command in case if kernel
+ * requests to configure specific MAC filtering
+ */
+
+ /* flush DMAC filters and reset RX mode */
+ mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST;
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ if (vf_work->mode & BGX_XCAST_MCAST_FILTER) {
+ /* once enabling filtering, we need to signal to PF to add
+ * its' own LMAC to the filter to accept packets for it.
+ */
+ mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
+ mbx.xcast.data.mac = 0;
+ nicvf_send_msg_to_pf(nic, &mbx);
+ }
+
+ /* check if we have any specific MACs to be added to PF DMAC filter */
+ if (vf_work->mc) {
+ /* now go through kernel list of MACs and add them one by one */
+ list_for_each_entry_safe(xaddr, next, &vf_work->mc->list, list) {
+ mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
+ mbx.xcast.data.mac = xaddr->addr;
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ /* after receiving ACK from PF release memory */
+ list_del(&xaddr->list);
+ kfree(xaddr);
+ vf_work->mc->count--;
+ }
+ kfree(vf_work->mc);
+ }
+
+ /* and finally set rx mode for PF accordingly */
+ mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST;
+ mbx.xcast.data.mode = vf_work->mode;
+
+ nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_set_rx_mode(struct net_device *netdev)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ struct netdev_hw_addr *ha;
+ struct xcast_addr_list *mc_list = NULL;
+ u8 mode = 0;
+
+ if (netdev->flags & IFF_PROMISC) {
+ mode = BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT;
+ } else {
+ if (netdev->flags & IFF_BROADCAST)
+ mode |= BGX_XCAST_BCAST_ACCEPT;
+
+ if (netdev->flags & IFF_ALLMULTI) {
+ mode |= BGX_XCAST_MCAST_ACCEPT;
+ } else if (netdev->flags & IFF_MULTICAST) {
+ mode |= BGX_XCAST_MCAST_FILTER;
+ /* here we need to copy mc addrs */
+ if (netdev_mc_count(netdev)) {
+ struct xcast_addr *xaddr;
+
+ mc_list = kmalloc(sizeof(*mc_list), GFP_ATOMIC);
+ INIT_LIST_HEAD(&mc_list->list);
+ netdev_hw_addr_list_for_each(ha, &netdev->mc) {
+ xaddr = kmalloc(sizeof(*xaddr),
+ GFP_ATOMIC);
+ xaddr->addr = ether_addr_to_u64(ha->addr);
+ list_add_tail(&xaddr->list,
+ &mc_list->list);
+ mc_list->count++;
+ }
+ }
+ }
+ }
+ nic->rx_mode_work.mc = mc_list;
+ nic->rx_mode_work.mode = mode;
+ queue_delayed_work(nicvf_rx_mode_wq, &nic->rx_mode_work.work, 2 * HZ);
+}
+
static const struct net_device_ops nicvf_netdev_ops = {
.ndo_open = nicvf_open,
.ndo_stop = nicvf_stop,
@@ -1931,6 +2027,7 @@ static const struct net_device_ops nicvf_netdev_ops = {
.ndo_set_features = nicvf_set_features,
.ndo_bpf = nicvf_xdp,
.ndo_do_ioctl = nicvf_ioctl,
+ .ndo_set_rx_mode = nicvf_set_rx_mode,
};
static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
@@ -2071,6 +2168,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&nic->reset_task, nicvf_reset_task);
+ INIT_DELAYED_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
+
err = register_netdev(netdev);
if (err) {
dev_err(dev, "Failed to register netdevice\n");
@@ -2109,6 +2208,8 @@ static void nicvf_remove(struct pci_dev *pdev)
nic = netdev_priv(netdev);
pnetdev = nic->pnicvf->netdev;
+ cancel_delayed_work_sync(&nic->rx_mode_work.work);
+
/* Check if this Qset is assigned to different VF.
* If yes, clean primary and all secondary Qsets.
*/
@@ -2140,12 +2241,17 @@ static struct pci_driver nicvf_driver = {
static int __init nicvf_init_module(void)
{
pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
-
+ nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_generic",
+ WQ_MEM_RECLAIM);
return pci_register_driver(&nicvf_driver);
}
static void __exit nicvf_cleanup_module(void)
{
+ if (nicvf_rx_mode_wq) {
+ destroy_workqueue(nicvf_rx_mode_wq);
+ nicvf_rx_mode_wq = NULL;
+ }
pci_unregister_driver(&nicvf_driver);
}
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
The ThunderX NIC has two Ethernet Interfaces (BGX) each of them could has
up to four Logical MACs configured. Each of BGX has 32 filters to be
configured for filtering ingress packets. The number of filters available
to particular LMAC is from 8 (if we have four LMACs configured per BGX)
up to 32 (in case of only one LMAC is configured per BGX).
At the same time the NIC could present up to 128 VFs to OS as network
interfaces, each of them kernel will configure with set of MAC addresses
for filtering. So to prevent dupes in BGX filter registers from different
network interfaces it is required to cache and track all filter configuration
requests prior to applying them onto BGX filter registers.
This commit is to update LMAC structures with control fields to
allocate/releasing filters tracking list along with implementing
dmac array allocate/release per LMAC.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 42 +++++++++++++++--------
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 0dd211605eb1..cf0cc19c03c5 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -37,9 +37,18 @@ enum MCAST_MODE {
#define MCAST_MODE_MASK 0x3
#define BGX_MCAST_MODE(x) (x << 1)
+struct dmac_map {
+ u64 vf_map;
+ u64 dmac;
+};
+
struct lmac {
struct bgx *bgx;
- int dmac;
+ /* actual number of DMACs configured */
+ u8 dmacs_cfg;
+ /* overal number of possible DMACs could be configured per LMAC */
+ u8 dmacs_count;
+ struct dmac_map *dmacs; /* DMAC:VFs tracking filter array */
u8 mac[ETH_ALEN];
u8 lmac_type;
u8 lane_to_sds;
@@ -236,6 +245,17 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
}
EXPORT_SYMBOL(bgx_set_lmac_mac);
+static void bgx_flush_dmac_cam_filter(struct bgx *bgx, int lmacid)
+{
+ struct lmac *lmac = NULL;
+ u8 idx = 0;
+
+ lmac = &bgx->lmac[lmacid];
+ /* reset CAM filters */
+ for (idx = 0; idx < lmac->dmacs_count; idx++)
+ bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + ((lmacid * lmac->dmacs_count) + idx) * sizeof(u64), 0);
+}
+
void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
{
struct bgx *bgx = get_bgx(node, bgx_idx);
@@ -481,18 +501,6 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx)
}
EXPORT_SYMBOL(bgx_get_tx_stats);
-static void bgx_flush_dmac_addrs(struct bgx *bgx, int lmac)
-{
- u64 offset;
-
- while (bgx->lmac[lmac].dmac > 0) {
- offset = ((bgx->lmac[lmac].dmac - 1) * sizeof(u64)) +
- (lmac * MAX_DMAC_PER_LMAC * sizeof(u64));
- bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + offset, 0);
- bgx->lmac[lmac].dmac--;
- }
-}
-
/* Configure BGX LMAC in internal loopback mode */
void bgx_lmac_internal_loopback(int node, int bgx_idx,
int lmac_idx, bool enable)
@@ -925,6 +933,11 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_MIN_PKT, 60 + 4);
}
+ /* actual number of filters available to exact LMAC */
+ lmac->dmacs_count = (RX_DMAC_COUNT / bgx->lmac_count);
+ lmac->dmacs = kcalloc(lmac->dmacs_count, sizeof(*lmac->dmacs),
+ GFP_KERNEL);
+
/* Enable lmac */
bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
@@ -1011,7 +1024,8 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
cfg &= ~CMR_EN;
bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
- bgx_flush_dmac_addrs(bgx, lmacid);
+ bgx_flush_dmac_cam_filter(bgx, lmacid);
+ kfree(lmac->dmacs);
if ((lmac->lmac_type != BGX_MODE_XFI) &&
(lmac->lmac_type != BGX_MODE_XLAUI) &&
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
The ThunderX NIC could be partitioned to up to 128 VFs and thus
represented to system. Each VF is mapped to pair BGX:LMAC, and each of VF
is configured by kernel individually. Eventually the bunch of VFs could be
mapped onto same pair BGX:LMAC and thus could cause several multicast
filtering configuration requests to LMAC with the same MAC addresses.
This commit is to add ThunderX NIC BGX filtering manipulation routines.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 141 ++++++++++++++++++++++
drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 8 ++
2 files changed, 149 insertions(+)
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index cf0cc19c03c5..52fef3dab0a3 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -256,6 +256,147 @@ static void bgx_flush_dmac_cam_filter(struct bgx *bgx, int lmacid)
bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + ((lmacid * lmac->dmacs_count) + idx) * sizeof(u64), 0);
}
+static void bgx_lmac_remove_filters(struct lmac *lmac, u8 vf_id)
+{
+ int i = 0;
+
+ if (!lmac)
+ return;
+
+ /* We've got reset filters request from some of attached VF, while the
+ * others might want to keep their configuration. So in this case lets
+ * iterate over all of configured filters and decrease number of
+ * referencies. if some addresses get zero refs remove them from list
+ */
+ for (i = lmac->dmacs_cfg - 1; i >= 0; i--) {
+ lmac->dmacs[i].vf_map &= ~BIT_ULL(vf_id);
+ if (!lmac->dmacs[i].vf_map) {
+ lmac->dmacs_cfg--;
+ lmac->dmacs[i].dmac = 0;
+ lmac->dmacs[i].vf_map = 0;
+ }
+ }
+}
+
+static int bgx_lmac_save_filter(struct lmac *lmac, u64 dmac, u8 vf_id)
+{
+ u8 i = 0;
+
+ if (!lmac)
+ return;
+
+ /* At the same time we could have several VFs 'attached' to some
+ * particular LMAC, and each VF is represented as network interface
+ * for kernel. So from user perspective it should be possible to
+ * manipulate with its' (VF) receive modes. However from PF
+ * driver perspective we need to keep track of filter configurations
+ * for different VFs to prevent filter values dupes
+ */
+ for (i = 0; i < lmac->dmacs_cfg; i++) {
+ if (lmac->dmacs[i].dmac == dmac) {
+ lmac->dmacs[i].vf_map |= BIT_ULL(vf_id);
+ return -1;
+ }
+ }
+
+ if (!(lmac->dmacs_cfg < lmac->dmacs_count))
+ return -1;
+
+ /* keep it for further tracking */
+ lmac->dmacs[lmac->dmacs_cfg].dmac = dmac;
+ lmac->dmacs[lmac->dmacs_cfg].vf_map = BIT_ULL(vf_id);
+ lmac->dmacs_cfg++;
+ return 0;
+}
+
+static int bgx_set_dmac_cam_filter_mac(struct bgx *bgx, int lmacid, u64 cam_dmac, u8 idx)
+{
+ struct lmac *lmac = NULL;
+ u64 cfg = 0;
+
+ /* skip zero addresses as meaningless */
+ if (!cam_dmac || !bgx)
+ return -1;
+
+ lmac = &bgx->lmac[lmacid];
+
+ /* configure DCAM filtering for designated LMAC */
+ cfg = RX_DMACX_CAM_LMACID(lmacid & LMAC_ID_MASK) |
+ RX_DMACX_CAM_EN | cam_dmac;
+ bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + ((lmacid * lmac->dmacs_count) + idx) * sizeof(u64), cfg);
+ return 0;
+}
+
+void bgx_set_dmac_cam_filter(int node, int bgx_idx, int lmacid, u64 cam_dmac, u8 vf_id)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+ struct lmac *lmac = NULL;
+
+ if (!bgx)
+ return;
+
+ lmac = &bgx->lmac[lmacid];
+
+ if (!cam_dmac)
+ cam_dmac = ether_addr_to_u64(lmac->mac);
+
+ /* since we might have several VFs attached to particular LMAC
+ * and kernel could call mcast config for each of them with the
+ * same MAC, check if requested MAC is already in filtering list and
+ * updare/prepare list of MACs to be applied later to HW filters
+ */
+ bgx_lmac_save_filter(lmac, cam_dmac, vf_id);
+}
+EXPORT_SYMBOL(bgx_set_dmac_cam_filter);
+
+void bgx_set_xcast_mode(int node, int bgx_idx, int lmacid, u8 mode)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+ struct lmac *lmac = NULL;
+ u64 cfg = 0;
+ u8 i = 0;
+
+ if (!bgx)
+ return;
+
+ lmac = &bgx->lmac[lmacid];
+
+ cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL);
+ if (mode & BGX_XCAST_BCAST_ACCEPT)
+ cfg |= BCAST_ACCEPT;
+ else
+ cfg &= ~BCAST_ACCEPT;
+
+ /* disable all MCASTs and DMAC filtering */
+ cfg &= ~(CAM_ACCEPT | BGX_MCAST_MODE(MCAST_MODE_MASK));
+
+ /* check requested bits and set filtergin mode appropriately */
+ if (mode & (BGX_XCAST_MCAST_ACCEPT)) {
+ cfg |= (BGX_MCAST_MODE(MCAST_MODE_ACCEPT));
+ } else if (mode & BGX_XCAST_MCAST_FILTER) {
+ cfg |= (BGX_MCAST_MODE(MCAST_MODE_CAM_FILTER) | CAM_ACCEPT);
+ for (i = 0; i < lmac->dmacs_cfg; i++)
+ bgx_set_dmac_cam_filter_mac(bgx, lmacid,
+ lmac->dmacs[i].dmac, i);
+ }
+ bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, cfg);
+}
+EXPORT_SYMBOL(bgx_set_xcast_mode);
+
+void bgx_reset_xcast_mode(int node, int bgx_idx, int lmacid, u8 vf_id)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+
+ if (!bgx)
+ return;
+
+ bgx_lmac_remove_filters(&bgx->lmac[lmacid], vf_id);
+ bgx_flush_dmac_cam_filter(bgx, lmacid);
+ bgx_set_xcast_mode(node, bgx_idx, lmacid,
+ (BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT));
+}
+EXPORT_SYMBOL(bgx_reset_xcast_mode);
+
void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
{
struct bgx *bgx = get_bgx(node, bgx_idx);
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 52439da62c97..f5de44bc3bdb 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -30,6 +30,7 @@
#define DEFAULT_PAUSE_TIME 0xFFFF
#define BGX_ID_MASK 0x3
+#define LMAC_ID_MASK 0x3
#define MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE 2
@@ -205,6 +206,13 @@
#define LMAC_INTR_LINK_UP BIT(0)
#define LMAC_INTR_LINK_DOWN BIT(1)
+#define BGX_XCAST_BCAST_ACCEPT BIT(0)
+#define BGX_XCAST_MCAST_ACCEPT BIT(1)
+#define BGX_XCAST_MCAST_FILTER BIT(2)
+
+void bgx_set_dmac_cam_filter(int node, int bgx_idx, int lmacid, u64 mac, u8 vf);
+void bgx_reset_xcast_mode(int node, int bgx_idx, int lmacid, u8 vf);
+void bgx_set_xcast_mode(int node, int bgx_idx, int lmacid, u8 mode);
void octeon_mdiobus_force_mod_depencency(void);
void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
Date: Tue, 27 Mar 2018 08:07:29 -0700
> From: Vadim Lomovtsev <[email protected]>
>
> By default CN88XX BGX accepts all incoming multicast and broadcast
> packets and filtering is disabled. The nic driver doesn't provide
> an ability to change such behaviour.
>
> This series is to implement DMAC filtering management for CN88XX
> nic driver allowing user to enable/disable filtering and configure
> specific MAC addresses to filter traffic.
This doesn't even compile:
drivers/net/ethernet/cavium/thunder/thunder_bgx.c: In function ?bgx_lmac_save_filter?:
drivers/net/ethernet/cavium/thunder/thunder_bgx.c:286:3: warning: ?return? with no value, in function returning non-void [-Wreturn-type]
return;
^~~~~~
drivers/net/ethernet/cavium/thunder/thunder_bgx.c:281:12: note: declared here
static int bgx_lmac_save_filter(struct lmac *lmac, u64 dmac, u8 vf_id)
^~~~~~~~~~~~~~~~~~~~
In file included from drivers/net/ethernet/cavium/thunder/nic.h:15:0,
from drivers/net/ethernet/cavium/thunder/thunder_bgx.c:21:
drivers/net/ethernet/cavium/thunder/thunder_bgx.c: In function ?bgx_set_dmac_cam_filter_mac?:
drivers/net/ethernet/cavium/thunder/thunder_bgx.h:61:38: warning: left shift count >= width of type [-Wshift-count-overflow]
#define RX_DMACX_CAM_LMACID(x) (x << 49)
^
drivers/net/ethernet/cavium/thunder/thunder_bgx.c:324:8: note: in expansion of macro ?RX_DMACX_CAM_LMACID?
cfg = RX_DMACX_CAM_LMACID(lmacid & LMAC_ID_MASK) |
^~~~~~~~~~~~~~~~~~~
Hi David,
Thanks for feedback.
On Tue, Mar 27, 2018 at 01:28:22PM -0400, David Miller wrote:
> From: Vadim Lomovtsev <[email protected]>
> Date: Tue, 27 Mar 2018 08:07:29 -0700
>
> > From: Vadim Lomovtsev <[email protected]>
> >
> > By default CN88XX BGX accepts all incoming multicast and broadcast
> > packets and filtering is disabled. The nic driver doesn't provide
> > an ability to change such behaviour.
> >
> > This series is to implement DMAC filtering management for CN88XX
> > nic driver allowing user to enable/disable filtering and configure
> > specific MAC addresses to filter traffic.
>
> This doesn't even compile:
>
> drivers/net/ethernet/cavium/thunder/thunder_bgx.c: In function ‘bgx_lmac_save_filter’:
> drivers/net/ethernet/cavium/thunder/thunder_bgx.c:286:3: warning: ‘return’ with no value, in function returning non-void [-Wreturn-type]
> return;
> ^~~~~~
> drivers/net/ethernet/cavium/thunder/thunder_bgx.c:281:12: note: declared here
> static int bgx_lmac_save_filter(struct lmac *lmac, u64 dmac, u8 vf_id)
> ^~~~~~~~~~~~~~~~~~~~
> In file included from drivers/net/ethernet/cavium/thunder/nic.h:15:0,
> from drivers/net/ethernet/cavium/thunder/thunder_bgx.c:21:
> drivers/net/ethernet/cavium/thunder/thunder_bgx.c: In function ‘bgx_set_dmac_cam_filter_mac’:
> drivers/net/ethernet/cavium/thunder/thunder_bgx.h:61:38: warning: left shift count >= width of type [-Wshift-count-overflow]
> #define RX_DMACX_CAM_LMACID(x) (x << 49)
> ^
> drivers/net/ethernet/cavium/thunder/thunder_bgx.c:324:8: note: in expansion of macro ‘RX_DMACX_CAM_LMACID’
> cfg = RX_DMACX_CAM_LMACID(lmacid & LMAC_ID_MASK) |
> ^~~~~~~~~~~~~~~~~~~
Will update and repost series as v2.
Do you have any other comments ?
WBR,
Vadim
Hi Vadim,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on net-next/master]
[also build test WARNING on v4.16-rc7 next-20180328]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Vadim-Lomovtsev/net-thunderx-implement-DMAC-filtering-support/20180328-190332
config: x86_64-randconfig-s1-03281908 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All warnings (new ones prefixed by >>):
drivers/net/ethernet/cavium/thunder/thunder_bgx.c: In function 'bgx_lmac_save_filter':
>> drivers/net/ethernet/cavium/thunder/thunder_bgx.c:286:3: warning: 'return' with no value, in function returning non-void [-Wreturn-type]
return;
^~~~~~
drivers/net/ethernet/cavium/thunder/thunder_bgx.c:281:12: note: declared here
static int bgx_lmac_save_filter(struct lmac *lmac, u64 dmac, u8 vf_id)
^~~~~~~~~~~~~~~~~~~~
In file included from drivers/net/ethernet/cavium/thunder/nic.h:15:0,
from drivers/net/ethernet/cavium/thunder/thunder_bgx.c:21:
drivers/net/ethernet/cavium/thunder/thunder_bgx.c: In function 'bgx_set_dmac_cam_filter_mac':
>> drivers/net/ethernet/cavium/thunder/thunder_bgx.h:61:38: warning: left shift count >= width of type [-Wshift-count-overflow]
#define RX_DMACX_CAM_LMACID(x) (x << 49)
^
>> drivers/net/ethernet/cavium/thunder/thunder_bgx.c:324:8: note: in expansion of macro 'RX_DMACX_CAM_LMACID'
cfg = RX_DMACX_CAM_LMACID(lmacid & LMAC_ID_MASK) |
^~~~~~~~~~~~~~~~~~~
vim +/return +286 drivers/net/ethernet/cavium/thunder/thunder_bgx.c
280
> 281 static int bgx_lmac_save_filter(struct lmac *lmac, u64 dmac, u8 vf_id)
282 {
283 u8 i = 0;
284
285 if (!lmac)
> 286 return;
287
288 /* At the same time we could have several VFs 'attached' to some
289 * particular LMAC, and each VF is represented as network interface
290 * for kernel. So from user perspective it should be possible to
291 * manipulate with its' (VF) receive modes. However from PF
292 * driver perspective we need to keep track of filter configurations
293 * for different VFs to prevent filter values dupes
294 */
295 for (i = 0; i < lmac->dmacs_cfg; i++) {
296 if (lmac->dmacs[i].dmac == dmac) {
297 lmac->dmacs[i].vf_map |= BIT_ULL(vf_id);
298 return -1;
299 }
300 }
301
302 if (!(lmac->dmacs_cfg < lmac->dmacs_count))
303 return -1;
304
305 /* keep it for further tracking */
306 lmac->dmacs[lmac->dmacs_cfg].dmac = dmac;
307 lmac->dmacs[lmac->dmacs_cfg].vf_map = BIT_ULL(vf_id);
308 lmac->dmacs_cfg++;
309 return 0;
310 }
311
312 static int bgx_set_dmac_cam_filter_mac(struct bgx *bgx, int lmacid, u64 cam_dmac, u8 idx)
313 {
314 struct lmac *lmac = NULL;
315 u64 cfg = 0;
316
317 /* skip zero addresses as meaningless */
318 if (!cam_dmac || !bgx)
319 return -1;
320
321 lmac = &bgx->lmac[lmacid];
322
323 /* configure DCAM filtering for designated LMAC */
> 324 cfg = RX_DMACX_CAM_LMACID(lmacid & LMAC_ID_MASK) |
325 RX_DMACX_CAM_EN | cam_dmac;
326 bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + ((lmacid * lmac->dmacs_count) + idx) * sizeof(u64), cfg);
327 return 0;
328 }
329
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
From: Vadim Lomovtsev <[email protected]>
The kernel calls ndo_set_rx_mode() callback from atomic context which
causes messaging timeouts between VF and PF (as they’re implemented via
MSIx). So in order to handle ndo_set_rx_mode() we need to get rid of it.
This commit implements necessary workqueue related structures to let VF
queue kernel request processing in non-atomic context later.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 069289b4f968..5fc46c5a4f36 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -265,6 +265,22 @@ struct nicvf_drv_stats {
struct cavium_ptp;
+struct xcast_addr {
+ struct list_head list;
+ u64 addr;
+};
+
+struct xcast_addr_list {
+ struct list_head list;
+ int count;
+};
+
+struct nicvf_work {
+ struct delayed_work work;
+ u8 mode;
+ struct xcast_addr_list *mc;
+};
+
struct nicvf {
struct nicvf *pnicvf;
struct net_device *netdev;
@@ -313,6 +329,7 @@ struct nicvf {
struct nicvf_pfc pfc;
struct tasklet_struct qs_err_task;
struct work_struct reset_task;
+ struct nicvf_work rx_mode_work;
/* PTP timestamp */
struct cavium_ptp *ptp_clock;
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
By default CN88XX BGX accepts all incoming multicast and broadcast
packets and filtering is disabled. The nic driver doesn't provide
an ability to change such behaviour.
This series is to implement DMAC filtering management for CN88XX
nic driver allowing user to enable/disable filtering and configure
specific MAC addresses to filter traffic.
Changes from v1:
build issues:
- update code in order to address compiler warnings;
checkpatch.pl reported issues:
- update code in order to fit 80 symbols length;
- update commit descriptions in order to fit 80 symbols length;
Vadim Lomovtsev (7):
net: thunderx: move filter register related macro into proper place
net: thunderx: add MAC address filter tracking for LMAC
net: thunderx: add multicast filter management support
net: thunderx: add new messages for handle ndo_set_rx_mode callback
net: thunderx: add XCAST messages handlers for PF
net: thunderx: add workqueue control structures for handle
ndo_set_rx_mode request
net: thunderx: add ndo_set_rx_mode callback implementation for VF
drivers/net/ethernet/cavium/thunder/nic.h | 29 ++++
drivers/net/ethernet/cavium/thunder/nic_main.c | 45 ++++-
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 110 +++++++++++-
drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 201 ++++++++++++++++++++--
drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 19 +-
5 files changed, 374 insertions(+), 30 deletions(-)
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
The ThunderX NIC could be partitioned to up to 128 VFs and thus
represented to system. Each VF is mapped to pair BGX:LMAC, and each of VF
is configured by kernel individually. Eventually the bunch of VFs could be
mapped onto same pair BGX:LMAC and thus could cause several multicast
filtering configuration requests to LMAC with the same MAC addresses.
This commit is to add ThunderX NIC BGX filtering manipulation routines.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 144 ++++++++++++++++++++++
drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 10 +-
2 files changed, 153 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index de90e6aa5a4f..5d08d2aeb172 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -258,6 +258,150 @@ static void bgx_flush_dmac_cam_filter(struct bgx *bgx, int lmacid)
sizeof(u64), 0);
}
+static void bgx_lmac_remove_filters(struct lmac *lmac, u8 vf_id)
+{
+ int i = 0;
+
+ if (!lmac)
+ return;
+
+ /* We've got reset filters request from some of attached VF, while the
+ * others might want to keep their configuration. So in this case lets
+ * iterate over all of configured filters and decrease number of
+ * referencies. if some addresses get zero refs remove them from list
+ */
+ for (i = lmac->dmacs_cfg - 1; i >= 0; i--) {
+ lmac->dmacs[i].vf_map &= ~BIT_ULL(vf_id);
+ if (!lmac->dmacs[i].vf_map) {
+ lmac->dmacs_cfg--;
+ lmac->dmacs[i].dmac = 0;
+ lmac->dmacs[i].vf_map = 0;
+ }
+ }
+}
+
+static int bgx_lmac_save_filter(struct lmac *lmac, u64 dmac, u8 vf_id)
+{
+ u8 i = 0;
+
+ if (!lmac)
+ return -1;
+
+ /* At the same time we could have several VFs 'attached' to some
+ * particular LMAC, and each VF is represented as network interface
+ * for kernel. So from user perspective it should be possible to
+ * manipulate with its' (VF) receive modes. However from PF
+ * driver perspective we need to keep track of filter configurations
+ * for different VFs to prevent filter values dupes
+ */
+ for (i = 0; i < lmac->dmacs_cfg; i++) {
+ if (lmac->dmacs[i].dmac == dmac) {
+ lmac->dmacs[i].vf_map |= BIT_ULL(vf_id);
+ return -1;
+ }
+ }
+
+ if (!(lmac->dmacs_cfg < lmac->dmacs_count))
+ return -1;
+
+ /* keep it for further tracking */
+ lmac->dmacs[lmac->dmacs_cfg].dmac = dmac;
+ lmac->dmacs[lmac->dmacs_cfg].vf_map = BIT_ULL(vf_id);
+ lmac->dmacs_cfg++;
+ return 0;
+}
+
+static int bgx_set_dmac_cam_filter_mac(struct bgx *bgx, int lmacid,
+ u64 cam_dmac, u8 idx)
+{
+ struct lmac *lmac = NULL;
+ u64 cfg = 0;
+
+ /* skip zero addresses as meaningless */
+ if (!cam_dmac || !bgx)
+ return -1;
+
+ lmac = &bgx->lmac[lmacid];
+
+ /* configure DCAM filtering for designated LMAC */
+ cfg = RX_DMACX_CAM_LMACID(lmacid & LMAC_ID_MASK) |
+ RX_DMACX_CAM_EN | cam_dmac;
+ bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM +
+ ((lmacid * lmac->dmacs_count) + idx) * sizeof(u64), cfg);
+ return 0;
+}
+
+void bgx_set_dmac_cam_filter(int node, int bgx_idx, int lmacid,
+ u64 cam_dmac, u8 vf_id)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+ struct lmac *lmac = NULL;
+
+ if (!bgx)
+ return;
+
+ lmac = &bgx->lmac[lmacid];
+
+ if (!cam_dmac)
+ cam_dmac = ether_addr_to_u64(lmac->mac);
+
+ /* since we might have several VFs attached to particular LMAC
+ * and kernel could call mcast config for each of them with the
+ * same MAC, check if requested MAC is already in filtering list and
+ * updare/prepare list of MACs to be applied later to HW filters
+ */
+ bgx_lmac_save_filter(lmac, cam_dmac, vf_id);
+}
+EXPORT_SYMBOL(bgx_set_dmac_cam_filter);
+
+void bgx_set_xcast_mode(int node, int bgx_idx, int lmacid, u8 mode)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+ struct lmac *lmac = NULL;
+ u64 cfg = 0;
+ u8 i = 0;
+
+ if (!bgx)
+ return;
+
+ lmac = &bgx->lmac[lmacid];
+
+ cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL);
+ if (mode & BGX_XCAST_BCAST_ACCEPT)
+ cfg |= BCAST_ACCEPT;
+ else
+ cfg &= ~BCAST_ACCEPT;
+
+ /* disable all MCASTs and DMAC filtering */
+ cfg &= ~(CAM_ACCEPT | BGX_MCAST_MODE(MCAST_MODE_MASK));
+
+ /* check requested bits and set filtergin mode appropriately */
+ if (mode & (BGX_XCAST_MCAST_ACCEPT)) {
+ cfg |= (BGX_MCAST_MODE(MCAST_MODE_ACCEPT));
+ } else if (mode & BGX_XCAST_MCAST_FILTER) {
+ cfg |= (BGX_MCAST_MODE(MCAST_MODE_CAM_FILTER) | CAM_ACCEPT);
+ for (i = 0; i < lmac->dmacs_cfg; i++)
+ bgx_set_dmac_cam_filter_mac(bgx, lmacid,
+ lmac->dmacs[i].dmac, i);
+ }
+ bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, cfg);
+}
+EXPORT_SYMBOL(bgx_set_xcast_mode);
+
+void bgx_reset_xcast_mode(int node, int bgx_idx, int lmacid, u8 vf_id)
+{
+ struct bgx *bgx = get_bgx(node, bgx_idx);
+
+ if (!bgx)
+ return;
+
+ bgx_lmac_remove_filters(&bgx->lmac[lmacid], vf_id);
+ bgx_flush_dmac_cam_filter(bgx, lmacid);
+ bgx_set_xcast_mode(node, bgx_idx, lmacid,
+ (BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT));
+}
+EXPORT_SYMBOL(bgx_reset_xcast_mode);
+
void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
{
struct bgx *bgx = get_bgx(node, bgx_idx);
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 52439da62c97..cbdd20b9ee6f 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -30,6 +30,7 @@
#define DEFAULT_PAUSE_TIME 0xFFFF
#define BGX_ID_MASK 0x3
+#define LMAC_ID_MASK 0x3
#define MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE 2
@@ -57,7 +58,7 @@
#define BGX_CMRX_RX_FIFO_LEN 0x108
#define BGX_CMR_RX_DMACX_CAM 0x200
#define RX_DMACX_CAM_EN BIT_ULL(48)
-#define RX_DMACX_CAM_LMACID(x) (x << 49)
+#define RX_DMACX_CAM_LMACID(x) (((u64)x) << 49)
#define RX_DMAC_COUNT 32
#define BGX_CMR_RX_STREERING 0x300
#define RX_TRAFFIC_STEER_RULE_COUNT 8
@@ -205,6 +206,13 @@
#define LMAC_INTR_LINK_UP BIT(0)
#define LMAC_INTR_LINK_DOWN BIT(1)
+#define BGX_XCAST_BCAST_ACCEPT BIT(0)
+#define BGX_XCAST_MCAST_ACCEPT BIT(1)
+#define BGX_XCAST_MCAST_FILTER BIT(2)
+
+void bgx_set_dmac_cam_filter(int node, int bgx_idx, int lmacid, u64 mac, u8 vf);
+void bgx_reset_xcast_mode(int node, int bgx_idx, int lmacid, u8 vf);
+void bgx_set_xcast_mode(int node, int bgx_idx, int lmacid, u8 mode);
void octeon_mdiobus_force_mod_depencency(void);
void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
The ndo_set_rx_mode() is called from atomic context which causes
messages response timeouts while VF to PF communication via MSIx.
To get rid of that we're copy passed mc list, parse flags and queue
handling of kernel request to ordered workqueue.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 110 ++++++++++++++++++++++-
1 file changed, 109 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 7d9c5ffbd041..c8a8faaf17e9 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -21,6 +21,7 @@
#include <linux/bpf_trace.h>
#include <linux/filter.h>
#include <linux/net_tstamp.h>
+#include <linux/workqueue.h>
#include "nic_reg.h"
#include "nic.h"
@@ -67,6 +68,9 @@ module_param(cpi_alg, int, S_IRUGO);
MODULE_PARM_DESC(cpi_alg,
"PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
+/* workqueue for handling kernel ndo_set_rx_mode() calls */
+static struct workqueue_struct *nicvf_rx_mode_wq;
+
static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
{
if (nic->sqs_mode)
@@ -1919,6 +1923,100 @@ static int nicvf_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
}
}
+static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
+{
+ struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
+ work.work);
+ struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
+ union nic_mbx mbx = {};
+ struct xcast_addr *xaddr, *next;
+
+ if (!vf_work)
+ return;
+
+ /* From the inside of VM code flow we have only 128 bits memory
+ * available to send message to host's PF, so send all mc addrs
+ * one by one, starting from flush command in case if kernel
+ * requests to configure specific MAC filtering
+ */
+
+ /* flush DMAC filters and reset RX mode */
+ mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST;
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ if (vf_work->mode & BGX_XCAST_MCAST_FILTER) {
+ /* once enabling filtering, we need to signal to PF to add
+ * its' own LMAC to the filter to accept packets for it.
+ */
+ mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
+ mbx.xcast.data.mac = 0;
+ nicvf_send_msg_to_pf(nic, &mbx);
+ }
+
+ /* check if we have any specific MACs to be added to PF DMAC filter */
+ if (vf_work->mc) {
+ /* now go through kernel list of MACs and add them one by one */
+ list_for_each_entry_safe(xaddr, next,
+ &vf_work->mc->list, list) {
+ mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
+ mbx.xcast.data.mac = xaddr->addr;
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ /* after receiving ACK from PF release memory */
+ list_del(&xaddr->list);
+ kfree(xaddr);
+ vf_work->mc->count--;
+ }
+ kfree(vf_work->mc);
+ }
+
+ /* and finally set rx mode for PF accordingly */
+ mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST;
+ mbx.xcast.data.mode = vf_work->mode;
+
+ nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_set_rx_mode(struct net_device *netdev)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ struct netdev_hw_addr *ha;
+ struct xcast_addr_list *mc_list = NULL;
+ u8 mode = 0;
+
+ if (netdev->flags & IFF_PROMISC) {
+ mode = BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT;
+ } else {
+ if (netdev->flags & IFF_BROADCAST)
+ mode |= BGX_XCAST_BCAST_ACCEPT;
+
+ if (netdev->flags & IFF_ALLMULTI) {
+ mode |= BGX_XCAST_MCAST_ACCEPT;
+ } else if (netdev->flags & IFF_MULTICAST) {
+ mode |= BGX_XCAST_MCAST_FILTER;
+ /* here we need to copy mc addrs */
+ if (netdev_mc_count(netdev)) {
+ struct xcast_addr *xaddr;
+
+ mc_list = kmalloc(sizeof(*mc_list), GFP_ATOMIC);
+ INIT_LIST_HEAD(&mc_list->list);
+ netdev_hw_addr_list_for_each(ha, &netdev->mc) {
+ xaddr = kmalloc(sizeof(*xaddr),
+ GFP_ATOMIC);
+ xaddr->addr =
+ ether_addr_to_u64(ha->addr);
+ list_add_tail(&xaddr->list,
+ &mc_list->list);
+ mc_list->count++;
+ }
+ }
+ }
+ }
+ nic->rx_mode_work.mc = mc_list;
+ nic->rx_mode_work.mode = mode;
+ queue_delayed_work(nicvf_rx_mode_wq, &nic->rx_mode_work.work, 2 * HZ);
+}
+
static const struct net_device_ops nicvf_netdev_ops = {
.ndo_open = nicvf_open,
.ndo_stop = nicvf_stop,
@@ -1931,6 +2029,7 @@ static const struct net_device_ops nicvf_netdev_ops = {
.ndo_set_features = nicvf_set_features,
.ndo_bpf = nicvf_xdp,
.ndo_do_ioctl = nicvf_ioctl,
+ .ndo_set_rx_mode = nicvf_set_rx_mode,
};
static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
@@ -2071,6 +2170,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&nic->reset_task, nicvf_reset_task);
+ INIT_DELAYED_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
+
err = register_netdev(netdev);
if (err) {
dev_err(dev, "Failed to register netdevice\n");
@@ -2109,6 +2210,8 @@ static void nicvf_remove(struct pci_dev *pdev)
nic = netdev_priv(netdev);
pnetdev = nic->pnicvf->netdev;
+ cancel_delayed_work_sync(&nic->rx_mode_work.work);
+
/* Check if this Qset is assigned to different VF.
* If yes, clean primary and all secondary Qsets.
*/
@@ -2140,12 +2243,17 @@ static struct pci_driver nicvf_driver = {
static int __init nicvf_init_module(void)
{
pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
-
+ nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_generic",
+ WQ_MEM_RECLAIM);
return pci_register_driver(&nicvf_driver);
}
static void __exit nicvf_cleanup_module(void)
{
+ if (nicvf_rx_mode_wq) {
+ destroy_workqueue(nicvf_rx_mode_wq);
+ nicvf_rx_mode_wq = NULL;
+ }
pci_unregister_driver(&nicvf_driver);
}
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
The kernel calls ndo_set_rx_mode() callback supplying it will all necessary
info, such as device state flags, multicast mac addresses list and so on.
Since we have only 128 bits to communicate with PF we need to initiate
several requests to PF with small/short operation each based on input data.
So this commit implements following PF messages codes along with new
data structures for them:
NIC_MBOX_MSG_RESET_XCAST to flush all filters configured for this
particular network interface (VF)
NIC_MBOX_MSG_ADD_MCAST to add new MAC address to DMAC filter registers
for this particular network interface (VF)
NIC_MBOX_MSG_SET_XCAST to apply filtering configuration to filter control
register
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 4cacce5d2b16..069289b4f968 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -403,6 +403,9 @@ struct nicvf {
#define NIC_MBOX_MSG_PTP_CFG 0x19 /* HW packet timestamp */
#define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */
#define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */
+#define NIC_MBOX_MSG_RESET_XCAST 0xF2 /* Reset DCAM filtering mode */
+#define NIC_MBOX_MSG_ADD_MCAST 0xF3 /* Add MAC to DCAM filters */
+#define NIC_MBOX_MSG_SET_XCAST 0xF4 /* Set MCAST/BCAST RX mode */
struct nic_cfg_msg {
u8 msg;
@@ -556,6 +559,14 @@ struct set_ptp {
bool enable;
};
+struct xcast {
+ u8 msg;
+ union {
+ u8 mode;
+ u64 mac;
+ } data;
+};
+
/* 128 bit shared memory between PF and each VF */
union nic_mbx {
struct { u8 msg; } msg;
@@ -576,6 +587,7 @@ union nic_mbx {
struct reset_stat_cfg reset_stat;
struct pfc pfc;
struct set_ptp ptp;
+ struct xcast xcast;
};
#define NIC_NODE_ID_MASK 0x03
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
This commit is to add message handling for ndo_set_rx_mode()
callback at PF side.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic_main.c | 45 +++++++++++++++++++++++---
1 file changed, 41 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 7ff66a8194e2..55af04fa03a7 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -21,6 +21,8 @@
#define DRV_NAME "nicpf"
#define DRV_VERSION "1.0"
+#define NIC_VF_PER_MBX_REG 64
+
struct hw_info {
u8 bgx_cnt;
u8 chans_per_lmac;
@@ -1072,6 +1074,40 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
case NIC_MBOX_MSG_PTP_CFG:
nic_config_timestamp(nic, vf, &mbx.ptp);
break;
+ case NIC_MBOX_MSG_RESET_XCAST:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ bgx_reset_xcast_mode(nic->node, bgx, lmac,
+ vf < NIC_VF_PER_MBX_REG ? vf :
+ vf - NIC_VF_PER_MBX_REG);
+ break;
+
+ case NIC_MBOX_MSG_ADD_MCAST:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ bgx_set_dmac_cam_filter(nic->node, bgx, lmac,
+ mbx.xcast.data.mac,
+ vf < NIC_VF_PER_MBX_REG ? vf :
+ vf - NIC_VF_PER_MBX_REG);
+ break;
+
+ case NIC_MBOX_MSG_SET_XCAST:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.data.mode);
+ break;
default:
dev_err(&nic->pdev->dev,
"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1094,7 +1130,7 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
struct nicpf *nic = (struct nicpf *)nic_irq;
int mbx;
u64 intr;
- u8 vf, vf_per_mbx_reg = 64;
+ u8 vf;
if (irq == pci_irq_vector(nic->pdev, NIC_PF_INTR_ID_MBOX0))
mbx = 0;
@@ -1103,12 +1139,13 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3));
dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr);
- for (vf = 0; vf < vf_per_mbx_reg; vf++) {
+ for (vf = 0; vf < NIC_VF_PER_MBX_REG; vf++) {
if (intr & (1ULL << vf)) {
dev_dbg(&nic->pdev->dev, "Intr from VF %d\n",
- vf + (mbx * vf_per_mbx_reg));
+ vf + (mbx * NIC_VF_PER_MBX_REG));
- nic_handle_mbx_intr(nic, vf + (mbx * vf_per_mbx_reg));
+ nic_handle_mbx_intr(nic, vf +
+ (mbx * NIC_VF_PER_MBX_REG));
nic_clear_mbx_intr(nic, vf, mbx);
}
}
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
The ThunderX NIC has set of registers which allows to configure
filter policy for ingress packets. There are three possible regimes
of filtering multicasts, broadcasts and unicasts: accept all, reject all
and accept filter allowed only.
Current implementation has enum with all of them and two generic macro
for enabling filtering et all (CAM_ACCEPT) and enabling/disabling
broadcast packets, which also should be corrected in order to represent
register bits properly. All these values are private for driver and
there is no need to ‘publish’ them via header file.
This commit is to move filtering register manipulation values from
header file into source with explicit assignment of exact register
values to them to be used while register configuring.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 13 +++++++++++++
drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 11 -----------
2 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 91d34ea40e2c..0dd211605eb1 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -24,6 +24,19 @@
#define DRV_NAME "thunder_bgx"
#define DRV_VERSION "1.0"
+/* RX_DMAC_CTL configuration */
+enum MCAST_MODE {
+ MCAST_MODE_REJECT = 0x0,
+ MCAST_MODE_ACCEPT = 0x1,
+ MCAST_MODE_CAM_FILTER = 0x2,
+ RSVD = 0x3
+};
+
+#define BCAST_ACCEPT BIT(0)
+#define CAM_ACCEPT BIT(3)
+#define MCAST_MODE_MASK 0x3
+#define BGX_MCAST_MODE(x) (x << 1)
+
struct lmac {
struct bgx *bgx;
int dmac;
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 5a7567d31138..52439da62c97 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -205,17 +205,6 @@
#define LMAC_INTR_LINK_UP BIT(0)
#define LMAC_INTR_LINK_DOWN BIT(1)
-/* RX_DMAC_CTL configuration*/
-enum MCAST_MODE {
- MCAST_MODE_REJECT,
- MCAST_MODE_ACCEPT,
- MCAST_MODE_CAM_FILTER,
- RSVD
-};
-
-#define BCAST_ACCEPT 1
-#define CAM_ACCEPT 1
-
void octeon_mdiobus_force_mod_depencency(void);
void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
The ThunderX NIC has two Ethernet Interfaces (BGX) each of them could has
up to four Logical MACs configured. Each of BGX has 32 filters to be
configured for filtering ingress packets. The number of filters available
to particular LMAC is from 8 (if we have four LMACs configured per BGX)
up to 32 (in case of only one LMAC is configured per BGX).
At the same time the NIC could present up to 128 VFs to OS as network
interfaces, each of them kernel will configure with set of MAC addresses
for filtering. So to prevent dupes in BGX filter registers from different
network interfaces it is required to cache and track all filter
configuration requests prior to applying them onto BGX filter registers.
This commit is to update LMAC structures with control fields to
allocate/releasing filters tracking list along with implementing
dmac array allocate/release per LMAC.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 44 +++++++++++++++--------
1 file changed, 30 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 0dd211605eb1..de90e6aa5a4f 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -37,9 +37,18 @@ enum MCAST_MODE {
#define MCAST_MODE_MASK 0x3
#define BGX_MCAST_MODE(x) (x << 1)
+struct dmac_map {
+ u64 vf_map;
+ u64 dmac;
+};
+
struct lmac {
struct bgx *bgx;
- int dmac;
+ /* actual number of DMACs configured */
+ u8 dmacs_cfg;
+ /* overal number of possible DMACs could be configured per LMAC */
+ u8 dmacs_count;
+ struct dmac_map *dmacs; /* DMAC:VFs tracking filter array */
u8 mac[ETH_ALEN];
u8 lmac_type;
u8 lane_to_sds;
@@ -236,6 +245,19 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
}
EXPORT_SYMBOL(bgx_set_lmac_mac);
+static void bgx_flush_dmac_cam_filter(struct bgx *bgx, int lmacid)
+{
+ struct lmac *lmac = NULL;
+ u8 idx = 0;
+
+ lmac = &bgx->lmac[lmacid];
+ /* reset CAM filters */
+ for (idx = 0; idx < lmac->dmacs_count; idx++)
+ bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM +
+ ((lmacid * lmac->dmacs_count) + idx) *
+ sizeof(u64), 0);
+}
+
void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
{
struct bgx *bgx = get_bgx(node, bgx_idx);
@@ -481,18 +503,6 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx)
}
EXPORT_SYMBOL(bgx_get_tx_stats);
-static void bgx_flush_dmac_addrs(struct bgx *bgx, int lmac)
-{
- u64 offset;
-
- while (bgx->lmac[lmac].dmac > 0) {
- offset = ((bgx->lmac[lmac].dmac - 1) * sizeof(u64)) +
- (lmac * MAX_DMAC_PER_LMAC * sizeof(u64));
- bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + offset, 0);
- bgx->lmac[lmac].dmac--;
- }
-}
-
/* Configure BGX LMAC in internal loopback mode */
void bgx_lmac_internal_loopback(int node, int bgx_idx,
int lmac_idx, bool enable)
@@ -925,6 +935,11 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_MIN_PKT, 60 + 4);
}
+ /* actual number of filters available to exact LMAC */
+ lmac->dmacs_count = (RX_DMAC_COUNT / bgx->lmac_count);
+ lmac->dmacs = kcalloc(lmac->dmacs_count, sizeof(*lmac->dmacs),
+ GFP_KERNEL);
+
/* Enable lmac */
bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
@@ -1011,7 +1026,8 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
cfg &= ~CMR_EN;
bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
- bgx_flush_dmac_addrs(bgx, lmacid);
+ bgx_flush_dmac_cam_filter(bgx, lmacid);
+ kfree(lmac->dmacs);
if ((lmac->lmac_type != BGX_MODE_XFI) &&
(lmac->lmac_type != BGX_MODE_XLAUI) &&
--
2.14.3
From: Vadim Lomovtsev <[email protected]>
Date: Fri, 30 Mar 2018 04:59:46 -0700
> From: Vadim Lomovtsev <[email protected]>
>
> By default CN88XX BGX accepts all incoming multicast and broadcast
> packets and filtering is disabled. The nic driver doesn't provide
> an ability to change such behaviour.
>
> This series is to implement DMAC filtering management for CN88XX
> nic driver allowing user to enable/disable filtering and configure
> specific MAC addresses to filter traffic.
>
> Changes from v1:
> build issues:
> - update code in order to address compiler warnings;
> checkpatch.pl reported issues:
> - update code in order to fit 80 symbols length;
> - update commit descriptions in order to fit 80 symbols length;
Series applied.
On Sat, Mar 31, 2018 at 10:07:30PM -0400, David Miller wrote:
> From: Vadim Lomovtsev <[email protected]>
> Date: Fri, 30 Mar 2018 04:59:46 -0700
>
> > From: Vadim Lomovtsev <[email protected]>
> >
> > By default CN88XX BGX accepts all incoming multicast and broadcast
> > packets and filtering is disabled. The nic driver doesn't provide
> > an ability to change such behaviour.
> >
> > This series is to implement DMAC filtering management for CN88XX
> > nic driver allowing user to enable/disable filtering and configure
> > specific MAC addresses to filter traffic.
> >
> > Changes from v1:
> > build issues:
> > - update code in order to address compiler warnings;
> > checkpatch.pl reported issues:
> > - update code in order to fit 80 symbols length;
> > - update commit descriptions in order to fit 80 symbols length;
>
> Series applied.
Thank you.
WBR,
Vadim
At the end of NIC VF initialization VF sends CFG_DONE message to PF without
using nicvf_msg_send_to_pf routine. This potentially could re-write data in
mailbox. This commit is to implement common way of sending CFG_DONE message
by the same way with other configuration messages by using
nicvf_send_msg_to_pf() routine.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic_main.c | 2 +-
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 15 ++++++++++++---
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 6c8dcb65ff03..90497a27df18 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -1039,7 +1039,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
case NIC_MBOX_MSG_CFG_DONE:
/* Last message of VF config msg sequence */
nic_enable_vf(nic, vf, true);
- goto unlock;
+ break;
case NIC_MBOX_MSG_SHUTDOWN:
/* First msg in VF teardown sequence */
if (vf >= nic->num_vf_en)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index abf24e7dff2d..19b58fc3ca41 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -169,6 +169,17 @@ static int nicvf_check_pf_ready(struct nicvf *nic)
return 1;
}
+static void nicvf_send_cfg_done(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
+ if (nicvf_send_msg_to_pf(nic, &mbx)) {
+ netdev_err(nic->netdev,
+ "PF didn't respond to CFG DONE msg\n");
+ }
+}
+
static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx)
{
if (bgx->rx)
@@ -1416,7 +1427,6 @@ int nicvf_open(struct net_device *netdev)
struct nicvf *nic = netdev_priv(netdev);
struct queue_set *qs = nic->qs;
struct nicvf_cq_poll *cq_poll = NULL;
- union nic_mbx mbx = {};
/* wait till all queued set_rx_mode tasks completes if any */
drain_workqueue(nic->nicvf_rx_mode_wq);
@@ -1515,8 +1525,7 @@ int nicvf_open(struct net_device *netdev)
nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
/* Send VF config done msg to PF */
- mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
- nicvf_write_to_mbx(nic, &mbx);
+ nicvf_send_cfg_done(nic);
return 0;
cleanup:
--
2.17.2
In some cases it could happen that nicvf_send_msg_to_pf() could be called
concurrently for the same NIC VF, and thus re-writing mailbox contents and
breaking messaging sequence with PF by re-writing NICVF data.
This commit is to implement mutex for NICVF to protect mailbox registers
and NICVF messaging control data from concurrent access.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 2 ++
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 13 ++++++++++---
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 227343625e83..86cda3f4b37b 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -329,6 +329,8 @@ struct nicvf {
spinlock_t rx_mode_wq_lock;
/* workqueue for handling kernel ndo_set_rx_mode() calls */
struct workqueue_struct *nicvf_rx_mode_wq;
+ /* mutex to protect VF's mailbox contents from concurrent access */
+ struct mutex rx_mode_mtx;
/* PTP timestamp */
struct cavium_ptp *ptp_clock;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index da5986ca7bee..2332e3e95e0e 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -124,6 +124,9 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
{
int timeout = NIC_MBOX_MSG_TIMEOUT;
int sleep = 10;
+ int ret = 0;
+
+ mutex_lock(&nic->rx_mode_mtx);
nic->pf_acked = false;
nic->pf_nacked = false;
@@ -136,7 +139,8 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
netdev_err(nic->netdev,
"PF NACK to mbox msg 0x%02x from VF%d\n",
(mbx->msg.msg & 0xFF), nic->vf_id);
- return -EINVAL;
+ ret = -EINVAL;
+ break;
}
msleep(sleep);
if (nic->pf_acked)
@@ -146,10 +150,12 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
netdev_err(nic->netdev,
"PF didn't ACK to mbox msg 0x%02x from VF%d\n",
(mbx->msg.msg & 0xFF), nic->vf_id);
- return -EBUSY;
+ ret = -EBUSY;
+ break;
}
}
- return 0;
+ mutex_unlock(&nic->rx_mode_mtx);
+ return ret;
}
/* Checks if VF is able to comminicate with PF
@@ -2208,6 +2214,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
nic->vf_id);
INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
spin_lock_init(&nic->rx_mode_wq_lock);
+ mutex_init(&nic->rx_mode_mtx);
err = register_netdev(netdev);
if (err) {
--
2.17.2
Correct STREERING to STEERING at macro name for BGX steering register.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 2 +-
drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index e337da6ba2a4..673c57b8023f 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1217,7 +1217,7 @@ static void bgx_init_hw(struct bgx *bgx)
/* Disable MAC steering (NCSI traffic) */
for (i = 0; i < RX_TRAFFIC_STEER_RULE_COUNT; i++)
- bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00);
+ bgx_reg_write(bgx, 0, BGX_CMR_RX_STEERING + (i * 8), 0x00);
}
static u8 bgx_get_lane2sds_cfg(struct bgx *bgx, struct lmac *lmac)
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index cbdd20b9ee6f..5cbc54e9eb19 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -60,7 +60,7 @@
#define RX_DMACX_CAM_EN BIT_ULL(48)
#define RX_DMACX_CAM_LMACID(x) (((u64)x) << 49)
#define RX_DMAC_COUNT 32
-#define BGX_CMR_RX_STREERING 0x300
+#define BGX_CMR_RX_STEERING 0x300
#define RX_TRAFFIC_STEER_RULE_COUNT 8
#define BGX_CMR_CHAN_MSK_AND 0x450
#define BGX_CMR_BIST_STATUS 0x460
--
2.17.2
Move the link change polling task to VF side in order to
prevent races between VF and PF while sending link change
message(s). This commit is to implement link change request
to be initiated by VF.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 2 +-
.../net/ethernet/cavium/thunder/nic_main.c | 39 ++++++++++++--
.../net/ethernet/cavium/thunder/nicvf_main.c | 52 +++++++++++++------
3 files changed, 74 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 86cda3f4b37b..62636c1ed141 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -331,7 +331,7 @@ struct nicvf {
struct workqueue_struct *nicvf_rx_mode_wq;
/* mutex to protect VF's mailbox contents from concurrent access */
struct mutex rx_mode_mtx;
-
+ struct delayed_work link_change_work;
/* PTP timestamp */
struct cavium_ptp *ptp_clock;
/* Inbound timestamping is on */
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 620dbe082ca0..8ab71dae3988 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -929,6 +929,35 @@ static void nic_config_timestamp(struct nicpf *nic, int vf, struct set_ptp *ptp)
nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3), pkind_val);
}
+static void nic_link_status_get(struct nicpf *nic, u8 vf)
+{
+ union nic_mbx mbx = {};
+ struct bgx_link_status link;
+ u8 bgx, lmac;
+
+ mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
+
+ /* Get BGX, LMAC indices for the VF */
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+ /* Get interface link status */
+ bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
+
+ nic->link[vf] = link.link_up;
+ nic->duplex[vf] = link.duplex;
+ nic->speed[vf] = link.speed;
+
+ /* Send a mbox message to VF with current link status */
+ mbx.link_status.link_up = link.link_up;
+ mbx.link_status.duplex = link.duplex;
+ mbx.link_status.speed = link.speed;
+ mbx.link_status.mac_type = link.mac_type;
+
+ /* reply with link status */
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
/* Interrupt handler to handle mailbox messages from VFs */
static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
{
@@ -1108,6 +1137,13 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.mode);
break;
+ case NIC_MBOX_MSG_BGX_LINK_CHANGE:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ nic_link_status_get(nic, vf);
+ goto unlock;
default:
dev_err(&nic->pdev->dev,
"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1419,9 +1455,6 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_disable_sriov;
}
- INIT_DELAYED_WORK(&nic->dwork, nic_poll_for_link);
- queue_delayed_work(nic->check_link, &nic->dwork, 0);
-
return 0;
err_disable_sriov:
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 2332e3e95e0e..503cfadff4ac 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -242,21 +242,24 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
break;
case NIC_MBOX_MSG_BGX_LINK_CHANGE:
nic->pf_acked = true;
- nic->link_up = mbx.link_status.link_up;
- nic->duplex = mbx.link_status.duplex;
- nic->speed = mbx.link_status.speed;
- nic->mac_type = mbx.link_status.mac_type;
- if (nic->link_up) {
- netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n",
- nic->speed,
- nic->duplex == DUPLEX_FULL ?
- "Full" : "Half");
- netif_carrier_on(nic->netdev);
- netif_tx_start_all_queues(nic->netdev);
- } else {
- netdev_info(nic->netdev, "Link is Down\n");
- netif_carrier_off(nic->netdev);
- netif_tx_stop_all_queues(nic->netdev);
+ if (nic->link_up != mbx.link_status.link_up) {
+ nic->link_up = mbx.link_status.link_up;
+ nic->duplex = mbx.link_status.duplex;
+ nic->speed = mbx.link_status.speed;
+ nic->mac_type = mbx.link_status.mac_type;
+ if (nic->link_up) {
+ netdev_info(nic->netdev,
+ "Link is Up %d Mbps %s duplex\n",
+ nic->speed,
+ nic->duplex == DUPLEX_FULL ?
+ "Full" : "Half");
+ netif_carrier_on(nic->netdev);
+ netif_tx_start_all_queues(nic->netdev);
+ } else {
+ netdev_info(nic->netdev, "Link is Down\n");
+ netif_carrier_off(nic->netdev);
+ netif_tx_stop_all_queues(nic->netdev);
+ }
}
break;
case NIC_MBOX_MSG_ALLOC_SQS:
@@ -1325,6 +1328,8 @@ int nicvf_stop(struct net_device *netdev)
struct nicvf_cq_poll *cq_poll = NULL;
union nic_mbx mbx = {};
+ cancel_delayed_work_sync(&nic->link_change_work);
+
/* wait till all queued set_rx_mode tasks completes */
drain_workqueue(nic->nicvf_rx_mode_wq);
@@ -1427,6 +1432,18 @@ static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
return nicvf_send_msg_to_pf(nic, &mbx);
}
+static void nicvf_link_status_check_task(struct work_struct *work_arg)
+{
+ struct nicvf *nic = container_of(work_arg,
+ struct nicvf,
+ link_change_work.work);
+ union nic_mbx mbx = {};
+ mbx.msg.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
+ nicvf_send_msg_to_pf(nic, &mbx);
+ queue_delayed_work(nic->nicvf_rx_mode_wq,
+ &nic->link_change_work, 2 * HZ);
+}
+
int nicvf_open(struct net_device *netdev)
{
int cpu, err, qidx;
@@ -1533,6 +1550,11 @@ int nicvf_open(struct net_device *netdev)
/* Send VF config done msg to PF */
nicvf_send_cfg_done(nic);
+ INIT_DELAYED_WORK(&nic->link_change_work,
+ nicvf_link_status_check_task);
+ queue_delayed_work(nic->nicvf_rx_mode_wq,
+ &nic->link_change_work, 0);
+
return 0;
cleanup:
nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
--
2.17.2
The ThunderX CN88XX NIC Virtual Function driver uses mailbox interface
to communicate to physical function driver. Each of VF has it's own pair
of mailbox registers to read from and write to. The mailbox registers
has no protection from possible races, so it has to be implemented
at software side.
After long term testing by loop of 'ip link set <ifname> up/down'
command it was found that there are two possible scenarios when
race condition appears:
1. VF receives link change message from PF and VF send RX mode
configuration message to PF in the same time from separate thread.
2. PF receives RX mode configuration from VF and in the same time,
in separate thread PF detects link status change and sends appropriate
message to particular VF.
Both cases leads to mailbox data to be rewritten, NIC VF messaging control
data to be updated incorrectly and communication sequence gets broken.
This patch series is to address race condition with VF & PF communication.
Changes:
v1 -> v2
- 0000: correct typo in cover letter subject: 'betwen' -> 'between';
- move link state polling request task from pf to vf
instead of cheking status of mailbox irq;
v2 -> v3
- 0003: change return type of nicvf_send_cfg_done() function
from int to void;
- 0007: update subject and remove unused variable 'netdev'
from nicvf_link_status_check_task() function;
Vadim Lomovtsev (8):
net: thunderx: correct typo in macro name
net: thunderx: replace global nicvf_rx_mode_wq work queue for all VFs
to private for each of them.
net: thunderx: make CFG_DONE message to run through generic send-ack
sequence
net: thunderx: add nicvf_send_msg_to_pf result check for
set_rx_mode_task
net: thunderx: rework xcast message structure to make it fit into 64
bit
net: thunderx: add mutex to protect mailbox from concurrent calls for
same VF
net: thunderx: add LINK_CHANGE message handler at nicpf
net: thunderx: remove link change polling code and info from nicpf
drivers/net/ethernet/cavium/thunder/nic.h | 14 +-
.../net/ethernet/cavium/thunder/nic_main.c | 149 ++++++------------
.../net/ethernet/cavium/thunder/nicvf_main.c | 130 ++++++++++-----
.../net/ethernet/cavium/thunder/thunder_bgx.c | 2 +-
.../net/ethernet/cavium/thunder/thunder_bgx.h | 2 +-
5 files changed, 144 insertions(+), 153 deletions(-)
--
2.17.2
To communicate to PF each of ThunderX NIC VF uses mailbox which is
pair of 64 bit registers available to both VFn and PF.
This commit is to change the xcast message structure in order to
fit it into 64 bit.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 6 ++----
drivers/net/ethernet/cavium/thunder/nic_main.c | 4 ++--
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 6 +++---
3 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 376a96bce33f..227343625e83 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -577,10 +577,8 @@ struct set_ptp {
struct xcast {
u8 msg;
- union {
- u8 mode;
- u64 mac;
- } data;
+ u8 mode;
+ u64 mac:48;
};
/* 128 bit shared memory between PF and each VF */
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 90497a27df18..620dbe082ca0 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -1094,7 +1094,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
bgx_set_dmac_cam_filter(nic->node, bgx, lmac,
- mbx.xcast.data.mac,
+ mbx.xcast.mac,
vf < NIC_VF_PER_MBX_REG ? vf :
vf - NIC_VF_PER_MBX_REG);
break;
@@ -1106,7 +1106,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
}
bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
- bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.data.mode);
+ bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.mode);
break;
default:
dev_err(&nic->pdev->dev,
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 45f06504a61b..da5986ca7bee 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1961,7 +1961,7 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
* its' own LMAC to the filter to accept packets for it.
*/
mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
- mbx.xcast.data.mac = 0;
+ mbx.xcast.mac = 0;
if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
goto free_mc;
}
@@ -1971,7 +1971,7 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
/* now go through kernel list of MACs and add them one by one */
for (idx = 0; idx < mc_addrs->count; idx++) {
mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
- mbx.xcast.data.mac = mc_addrs->mc[idx];
+ mbx.xcast.mac = mc_addrs->mc[idx];
if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
goto free_mc;
}
@@ -1979,7 +1979,7 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
/* and finally set rx mode for PF accordingly */
mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST;
- mbx.xcast.data.mode = mode;
+ mbx.xcast.mode = mode;
nicvf_send_msg_to_pf(nic, &mbx);
free_mc:
--
2.17.2
Since link change polling routine was moved to nicvf side,
we don't need anymore polling function at nicpf side along
with link status info for all enabled Vfs as at VF side
this info is already tracked.
This commit is to remove unnecessary code & fields from
nicpf structure.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
.../net/ethernet/cavium/thunder/nic_main.c | 114 ++----------------
1 file changed, 12 insertions(+), 102 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 8ab71dae3988..c90252829ed3 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -57,14 +57,8 @@ struct nicpf {
#define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF)
#define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF)
u8 *vf_lmac_map;
- struct delayed_work dwork;
- struct workqueue_struct *check_link;
- u8 *link;
- u8 *duplex;
- u32 *speed;
u16 cpi_base[MAX_NUM_VFS_SUPPORTED];
u16 rssi_base[MAX_NUM_VFS_SUPPORTED];
- bool mbx_lock[MAX_NUM_VFS_SUPPORTED];
/* MSI-X */
u8 num_vec;
@@ -929,6 +923,10 @@ static void nic_config_timestamp(struct nicpf *nic, int vf, struct set_ptp *ptp)
nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3), pkind_val);
}
+/* Get BGX LMAC link status and update corresponding VF
+ * if there is a change, valid only if internal L2 switch
+ * is not present otherwise VF link is always treated as up
+ */
static void nic_link_status_get(struct nicpf *nic, u8 vf)
{
union nic_mbx mbx = {};
@@ -944,10 +942,6 @@ static void nic_link_status_get(struct nicpf *nic, u8 vf)
/* Get interface link status */
bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
- nic->link[vf] = link.link_up;
- nic->duplex[vf] = link.duplex;
- nic->speed[vf] = link.speed;
-
/* Send a mbox message to VF with current link status */
mbx.link_status.link_up = link.link_up;
mbx.link_status.duplex = link.duplex;
@@ -970,8 +964,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
int i;
int ret = 0;
- nic->mbx_lock[vf] = true;
-
mbx_addr = nic_get_mbx_addr(vf);
mbx_data = (u64 *)&mbx;
@@ -986,12 +978,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
switch (mbx.msg.msg) {
case NIC_MBOX_MSG_READY:
nic_mbx_send_ready(nic, vf);
- if (vf < nic->num_vf_en) {
- nic->link[vf] = 0;
- nic->duplex[vf] = 0;
- nic->speed[vf] = 0;
- }
- goto unlock;
+ return;
case NIC_MBOX_MSG_QS_CFG:
reg_addr = NIC_PF_QSET_0_127_CFG |
(mbx.qs.num << NIC_QS_ID_SHIFT);
@@ -1060,7 +1047,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
break;
case NIC_MBOX_MSG_RSS_SIZE:
nic_send_rss_size(nic, vf);
- goto unlock;
+ return;
case NIC_MBOX_MSG_RSS_CFG:
case NIC_MBOX_MSG_RSS_CFG_CONT:
nic_config_rss(nic, &mbx.rss_cfg);
@@ -1078,19 +1065,19 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
break;
case NIC_MBOX_MSG_ALLOC_SQS:
nic_alloc_sqs(nic, &mbx.sqs_alloc);
- goto unlock;
+ return;
case NIC_MBOX_MSG_NICVF_PTR:
nic->nicvf[vf] = mbx.nicvf.nicvf;
break;
case NIC_MBOX_MSG_PNICVF_PTR:
nic_send_pnicvf(nic, vf);
- goto unlock;
+ return;
case NIC_MBOX_MSG_SNICVF_PTR:
nic_send_snicvf(nic, &mbx.nicvf);
- goto unlock;
+ return;
case NIC_MBOX_MSG_BGX_STATS:
nic_get_bgx_stats(nic, &mbx.bgx_stats);
- goto unlock;
+ return;
case NIC_MBOX_MSG_LOOPBACK:
ret = nic_config_loopback(nic, &mbx.lbk);
break;
@@ -1099,7 +1086,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
break;
case NIC_MBOX_MSG_PFC:
nic_pause_frame(nic, vf, &mbx.pfc);
- goto unlock;
+ return;
case NIC_MBOX_MSG_PTP_CFG:
nic_config_timestamp(nic, vf, &mbx.ptp);
break;
@@ -1143,7 +1130,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
break;
}
nic_link_status_get(nic, vf);
- goto unlock;
+ return;
default:
dev_err(&nic->pdev->dev,
"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1157,8 +1144,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
mbx.msg.msg, vf);
nic_mbx_send_nack(nic, vf);
}
-unlock:
- nic->mbx_lock[vf] = false;
}
static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
@@ -1306,52 +1291,6 @@ static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
return 0;
}
-/* Poll for BGX LMAC link status and update corresponding VF
- * if there is a change, valid only if internal L2 switch
- * is not present otherwise VF link is always treated as up
- */
-static void nic_poll_for_link(struct work_struct *work)
-{
- union nic_mbx mbx = {};
- struct nicpf *nic;
- struct bgx_link_status link;
- u8 vf, bgx, lmac;
-
- nic = container_of(work, struct nicpf, dwork.work);
-
- mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
-
- for (vf = 0; vf < nic->num_vf_en; vf++) {
- /* Poll only if VF is UP */
- if (!nic->vf_enabled[vf])
- continue;
-
- /* Get BGX, LMAC indices for the VF */
- bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
- lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
- /* Get interface link status */
- bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
-
- /* Inform VF only if link status changed */
- if (nic->link[vf] == link.link_up)
- continue;
-
- if (!nic->mbx_lock[vf]) {
- nic->link[vf] = link.link_up;
- nic->duplex[vf] = link.duplex;
- nic->speed[vf] = link.speed;
-
- /* Send a mbox message to VF with current link status */
- mbx.link_status.link_up = link.link_up;
- mbx.link_status.duplex = link.duplex;
- mbx.link_status.speed = link.speed;
- mbx.link_status.mac_type = link.mac_type;
- nic_send_msg_to_vf(nic, vf, &mbx);
- }
- }
- queue_delayed_work(nic->check_link, &nic->dwork, HZ * 2);
-}
-
static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct device *dev = &pdev->dev;
@@ -1420,18 +1359,6 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (!nic->vf_lmac_map)
goto err_release_regions;
- nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
- if (!nic->link)
- goto err_release_regions;
-
- nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
- if (!nic->duplex)
- goto err_release_regions;
-
- nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL);
- if (!nic->speed)
- goto err_release_regions;
-
/* Initialize hardware */
nic_init_hw(nic);
@@ -1447,19 +1374,8 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_unregister_interrupts;
- /* Register a physical link status poll fn() */
- nic->check_link = alloc_workqueue("check_link_status",
- WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
- if (!nic->check_link) {
- err = -ENOMEM;
- goto err_disable_sriov;
- }
-
return 0;
-err_disable_sriov:
- if (nic->flags & NIC_SRIOV_ENABLED)
- pci_disable_sriov(pdev);
err_unregister_interrupts:
nic_unregister_interrupts(nic);
err_release_regions:
@@ -1480,12 +1396,6 @@ static void nic_remove(struct pci_dev *pdev)
if (nic->flags & NIC_SRIOV_ENABLED)
pci_disable_sriov(pdev);
- if (nic->check_link) {
- /* Destroy work Queue */
- cancel_delayed_work_sync(&nic->dwork);
- destroy_workqueue(nic->check_link);
- }
-
nic_unregister_interrupts(nic);
pci_release_regions(pdev);
--
2.17.2
Having one work queue for receive mode configuration ndo_set_rx_mode()
call for all VFs results in making each of them wait till the
set_rx_mode() call completes for another VF if any of close, set
receive mode and change flags calls being already invoked. Potentially
this could cause device state change before appropriate call of receive
mode configuration completes, so the call itself became meaningless,
corrupt data or break configuration sequence.
We don't need any delays in NIC VF configuration sequence so having delayed
work call with 0 delay has no sense.
This commit is to implement one work queue for each NIC VF for set_rx_mode
task and to let them work independently and replacing delayed_work
with work_struct.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 4 ++-
.../net/ethernet/cavium/thunder/nicvf_main.c | 30 ++++++++++---------
2 files changed, 19 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index f4d81765221e..376a96bce33f 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -271,7 +271,7 @@ struct xcast_addr_list {
};
struct nicvf_work {
- struct delayed_work work;
+ struct work_struct work;
u8 mode;
struct xcast_addr_list *mc;
};
@@ -327,6 +327,8 @@ struct nicvf {
struct nicvf_work rx_mode_work;
/* spinlock to protect workqueue arguments from concurrent access */
spinlock_t rx_mode_wq_lock;
+ /* workqueue for handling kernel ndo_set_rx_mode() calls */
+ struct workqueue_struct *nicvf_rx_mode_wq;
/* PTP timestamp */
struct cavium_ptp *ptp_clock;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 88f8a8fa93cd..abf24e7dff2d 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -68,9 +68,6 @@ module_param(cpi_alg, int, 0444);
MODULE_PARM_DESC(cpi_alg,
"PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
-/* workqueue for handling kernel ndo_set_rx_mode() calls */
-static struct workqueue_struct *nicvf_rx_mode_wq;
-
static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
{
if (nic->sqs_mode)
@@ -1311,6 +1308,9 @@ int nicvf_stop(struct net_device *netdev)
struct nicvf_cq_poll *cq_poll = NULL;
union nic_mbx mbx = {};
+ /* wait till all queued set_rx_mode tasks completes */
+ drain_workqueue(nic->nicvf_rx_mode_wq);
+
mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
nicvf_send_msg_to_pf(nic, &mbx);
@@ -1418,6 +1418,9 @@ int nicvf_open(struct net_device *netdev)
struct nicvf_cq_poll *cq_poll = NULL;
union nic_mbx mbx = {};
+ /* wait till all queued set_rx_mode tasks completes if any */
+ drain_workqueue(nic->nicvf_rx_mode_wq);
+
netif_carrier_off(netdev);
err = nicvf_register_misc_interrupt(nic);
@@ -1973,7 +1976,7 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
{
struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
- work.work);
+ work);
struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
u8 mode;
struct xcast_addr_list *mc;
@@ -2030,7 +2033,7 @@ static void nicvf_set_rx_mode(struct net_device *netdev)
kfree(nic->rx_mode_work.mc);
nic->rx_mode_work.mc = mc_list;
nic->rx_mode_work.mode = mode;
- queue_delayed_work(nicvf_rx_mode_wq, &nic->rx_mode_work.work, 0);
+ queue_work(nic->nicvf_rx_mode_wq, &nic->rx_mode_work.work);
spin_unlock(&nic->rx_mode_wq_lock);
}
@@ -2187,7 +2190,10 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&nic->reset_task, nicvf_reset_task);
- INIT_DELAYED_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
+ nic->nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_rx_mode_wq_VF%d",
+ WQ_MEM_RECLAIM,
+ nic->vf_id);
+ INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
spin_lock_init(&nic->rx_mode_wq_lock);
err = register_netdev(netdev);
@@ -2228,13 +2234,15 @@ static void nicvf_remove(struct pci_dev *pdev)
nic = netdev_priv(netdev);
pnetdev = nic->pnicvf->netdev;
- cancel_delayed_work_sync(&nic->rx_mode_work.work);
-
/* Check if this Qset is assigned to different VF.
* If yes, clean primary and all secondary Qsets.
*/
if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED))
unregister_netdev(pnetdev);
+ if (nic->nicvf_rx_mode_wq) {
+ destroy_workqueue(nic->nicvf_rx_mode_wq);
+ nic->nicvf_rx_mode_wq = NULL;
+ }
nicvf_unregister_interrupts(nic);
pci_set_drvdata(pdev, NULL);
if (nic->drv_stats)
@@ -2261,17 +2269,11 @@ static struct pci_driver nicvf_driver = {
static int __init nicvf_init_module(void)
{
pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
- nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_generic",
- WQ_MEM_RECLAIM);
return pci_register_driver(&nicvf_driver);
}
static void __exit nicvf_cleanup_module(void)
{
- if (nicvf_rx_mode_wq) {
- destroy_workqueue(nicvf_rx_mode_wq);
- nicvf_rx_mode_wq = NULL;
- }
pci_unregister_driver(&nicvf_driver);
}
--
2.17.2
The rx_set_mode invokes number of messages to be send to PF for receive
mode configuration. In case if there any issues we need to stop sending
messages and release allocated memory.
This commit is to implement check of nicvf_msg_send_to_pf() result.
Signed-off-by: Vadim Lomovtsev <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 19b58fc3ca41..45f06504a61b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1953,7 +1953,8 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
/* flush DMAC filters and reset RX mode */
mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST;
- nicvf_send_msg_to_pf(nic, &mbx);
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
if (mode & BGX_XCAST_MCAST_FILTER) {
/* once enabling filtering, we need to signal to PF to add
@@ -1961,7 +1962,8 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
*/
mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
mbx.xcast.data.mac = 0;
- nicvf_send_msg_to_pf(nic, &mbx);
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
}
/* check if we have any specific MACs to be added to PF DMAC filter */
@@ -1970,9 +1972,9 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
for (idx = 0; idx < mc_addrs->count; idx++) {
mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
mbx.xcast.data.mac = mc_addrs->mc[idx];
- nicvf_send_msg_to_pf(nic, &mbx);
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
}
- kfree(mc_addrs);
}
/* and finally set rx mode for PF accordingly */
@@ -1980,6 +1982,8 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
mbx.xcast.data.mode = mode;
nicvf_send_msg_to_pf(nic, &mbx);
+free_mc:
+ kfree(mc_addrs);
}
static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
--
2.17.2
sorry for occasionally reply to old thread.
On Wed, Feb 20, 2019 at 11:02:42AM +0000, Vadim Lomovtsev wrote:
> The ThunderX CN88XX NIC Virtual Function driver uses mailbox interface
> to communicate to physical function driver. Each of VF has it's own pair
> of mailbox registers to read from and write to. The mailbox registers
> has no protection from possible races, so it has to be implemented
> at software side.
>
> After long term testing by loop of 'ip link set <ifname> up/down'
> command it was found that there are two possible scenarios when
> race condition appears:
> 1. VF receives link change message from PF and VF send RX mode
> configuration message to PF in the same time from separate thread.
> 2. PF receives RX mode configuration from VF and in the same time,
> in separate thread PF detects link status change and sends appropriate
> message to particular VF.
>
> Both cases leads to mailbox data to be rewritten, NIC VF messaging control
> data to be updated incorrectly and communication sequence gets broken.
>
> This patch series is to address race condition with VF & PF communication.
>
> Changes:
> v1 -> v2
> - 0000: correct typo in cover letter subject: 'betwen' -> 'between';
> - move link state polling request task from pf to vf
> instead of cheking status of mailbox irq;
> v2 -> v3
> - 0003: change return type of nicvf_send_cfg_done() function
> from int to void;
> - 0007: update subject and remove unused variable 'netdev'
> from nicvf_link_status_check_task() function;
>
> Vadim Lomovtsev (8):
> net: thunderx: correct typo in macro name
> net: thunderx: replace global nicvf_rx_mode_wq work queue for all VFs
> to private for each of them.
> net: thunderx: make CFG_DONE message to run through generic send-ack
> sequence
> net: thunderx: add nicvf_send_msg_to_pf result check for
> set_rx_mode_task
> net: thunderx: rework xcast message structure to make it fit into 64
> bit
> net: thunderx: add mutex to protect mailbox from concurrent calls for
> same VF
> net: thunderx: add LINK_CHANGE message handler at nicpf
> net: thunderx: remove link change polling code and info from nicpf
>
> drivers/net/ethernet/cavium/thunder/nic.h | 14 +-
> .../net/ethernet/cavium/thunder/nic_main.c | 149 ++++++------------
> .../net/ethernet/cavium/thunder/nicvf_main.c | 130 ++++++++++-----
> .../net/ethernet/cavium/thunder/thunder_bgx.c | 2 +-
> .../net/ethernet/cavium/thunder/thunder_bgx.h | 2 +-
> 5 files changed, 144 insertions(+), 153 deletions(-)
>
> --
> 2.17.2
From: Vadim Lomovtsev <[email protected]>
Date: Wed, 20 Feb 2019 11:02:42 +0000
> The ThunderX CN88XX NIC Virtual Function driver uses mailbox interface
> to communicate to physical function driver. Each of VF has it's own pair
> of mailbox registers to read from and write to. The mailbox registers
> has no protection from possible races, so it has to be implemented
> at software side.
>
> After long term testing by loop of 'ip link set <ifname> up/down'
> command it was found that there are two possible scenarios when
> race condition appears:
> 1. VF receives link change message from PF and VF send RX mode
> configuration message to PF in the same time from separate thread.
> 2. PF receives RX mode configuration from VF and in the same time,
> in separate thread PF detects link status change and sends appropriate
> message to particular VF.
>
> Both cases leads to mailbox data to be rewritten, NIC VF messaging control
> data to be updated incorrectly and communication sequence gets broken.
>
> This patch series is to address race condition with VF & PF communication.
>
> Changes:
> v1 -> v2
> - 0000: correct typo in cover letter subject: 'betwen' -> 'between';
> - move link state polling request task from pf to vf
> instead of cheking status of mailbox irq;
> v2 -> v3
> - 0003: change return type of nicvf_send_cfg_done() function
> from int to void;
> - 0007: update subject and remove unused variable 'netdev'
> from nicvf_link_status_check_task() function;
Series applied, thanks.