2015-08-28 15:10:22

by Aleksey Makarov

[permalink] [raw]
Subject: [PATCH net-next 1/8] net: thunderx: fix MAINTAINERS

From: Aleksey Makarov <[email protected]>

The liquidio and thunder drivers have different maintainers.

Signed-off-by: Aleksey Makarov <[email protected]>
---
MAINTAINERS | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4e6dcb6..43cf79e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -928,7 +928,7 @@ M: Sunil Goutham <[email protected]>
M: Robert Richter <[email protected]>
L: [email protected] (moderated for non-subscribers)
S: Supported
-F: drivers/net/ethernet/cavium/
+F: drivers/net/ethernet/cavium/thunder/

ARM/CIRRUS LOGIC CLPS711X ARM ARCHITECTURE
M: Alexander Shiyan <[email protected]>
@@ -2543,7 +2543,6 @@ M: Raghu Vatsavayi <[email protected]>
L: [email protected]
W: http://www.cavium.com
S: Supported
-F: drivers/net/ethernet/cavium/
F: drivers/net/ethernet/cavium/liquidio/

CC2520 IEEE-802.15.4 RADIO DRIVER
--
2.5.0


2015-08-28 15:10:30

by Aleksey Makarov

[permalink] [raw]
Subject: [PATCH net-next 2/8] net: thunderx: Add receive error stats reporting via ethtool

From: Sunil Goutham <[email protected]>

Added ethtool support to dump receive packet error statistics reported
in CQE. Also made some small fixes

Signed-off-by: Sunil Goutham <[email protected]>
Signed-off-by: Aleksey Makarov <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 36 +++++++--
.../net/ethernet/cavium/thunder/nicvf_ethtool.c | 34 +++++++--
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 26 ++++---
drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 86 +++++++---------------
drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 41 -----------
5 files changed, 103 insertions(+), 120 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 8aee250..58adfd6 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -190,10 +190,10 @@ enum tx_stats_reg_offset {
};

struct nicvf_hw_stats {
- u64 rx_bytes_ok;
- u64 rx_ucast_frames_ok;
- u64 rx_bcast_frames_ok;
- u64 rx_mcast_frames_ok;
+ u64 rx_bytes;
+ u64 rx_ucast_frames;
+ u64 rx_bcast_frames;
+ u64 rx_mcast_frames;
u64 rx_fcs_errors;
u64 rx_l2_errors;
u64 rx_drop_red;
@@ -204,6 +204,31 @@ struct nicvf_hw_stats {
u64 rx_drop_mcast;
u64 rx_drop_l3_bcast;
u64 rx_drop_l3_mcast;
+ u64 rx_bgx_truncated_pkts;
+ u64 rx_jabber_errs;
+ u64 rx_fcs_errs;
+ u64 rx_bgx_errs;
+ u64 rx_prel2_errs;
+ u64 rx_l2_hdr_malformed;
+ u64 rx_oversize;
+ u64 rx_undersize;
+ u64 rx_l2_len_mismatch;
+ u64 rx_l2_pclp;
+ u64 rx_ip_ver_errs;
+ u64 rx_ip_csum_errs;
+ u64 rx_ip_hdr_malformed;
+ u64 rx_ip_payload_malformed;
+ u64 rx_ip_ttl_errs;
+ u64 rx_l3_pclp;
+ u64 rx_l4_malformed;
+ u64 rx_l4_csum_errs;
+ u64 rx_udp_len_errs;
+ u64 rx_l4_port_errs;
+ u64 rx_tcp_flag_errs;
+ u64 rx_tcp_offset_errs;
+ u64 rx_l4_pclp;
+ u64 rx_truncated_pkts;
+
u64 tx_bytes_ok;
u64 tx_ucast_frames_ok;
u64 tx_bcast_frames_ok;
@@ -222,6 +247,7 @@ struct nicvf_drv_stats {
u64 rx_frames_1518;
u64 rx_frames_jumbo;
u64 rx_drops;
+
/* Tx */
u64 tx_frames_ok;
u64 tx_drops;
@@ -257,7 +283,7 @@ struct nicvf {
u32 cq_coalesce_usecs;

u32 msg_enable;
- struct nicvf_hw_stats stats;
+ struct nicvf_hw_stats hw_stats;
struct nicvf_drv_stats drv_stats;
struct bgx_stats bgx_stats;
struct work_struct reset_task;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index a4228e6..a961aa3 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -35,10 +35,10 @@ struct nicvf_stat {
}

static const struct nicvf_stat nicvf_hw_stats[] = {
- NICVF_HW_STAT(rx_bytes_ok),
- NICVF_HW_STAT(rx_ucast_frames_ok),
- NICVF_HW_STAT(rx_bcast_frames_ok),
- NICVF_HW_STAT(rx_mcast_frames_ok),
+ NICVF_HW_STAT(rx_bytes),
+ NICVF_HW_STAT(rx_ucast_frames),
+ NICVF_HW_STAT(rx_bcast_frames),
+ NICVF_HW_STAT(rx_mcast_frames),
NICVF_HW_STAT(rx_fcs_errors),
NICVF_HW_STAT(rx_l2_errors),
NICVF_HW_STAT(rx_drop_red),
@@ -49,6 +49,30 @@ static const struct nicvf_stat nicvf_hw_stats[] = {
NICVF_HW_STAT(rx_drop_mcast),
NICVF_HW_STAT(rx_drop_l3_bcast),
NICVF_HW_STAT(rx_drop_l3_mcast),
+ NICVF_HW_STAT(rx_bgx_truncated_pkts),
+ NICVF_HW_STAT(rx_jabber_errs),
+ NICVF_HW_STAT(rx_fcs_errs),
+ NICVF_HW_STAT(rx_bgx_errs),
+ NICVF_HW_STAT(rx_prel2_errs),
+ NICVF_HW_STAT(rx_l2_hdr_malformed),
+ NICVF_HW_STAT(rx_oversize),
+ NICVF_HW_STAT(rx_undersize),
+ NICVF_HW_STAT(rx_l2_len_mismatch),
+ NICVF_HW_STAT(rx_l2_pclp),
+ NICVF_HW_STAT(rx_ip_ver_errs),
+ NICVF_HW_STAT(rx_ip_csum_errs),
+ NICVF_HW_STAT(rx_ip_hdr_malformed),
+ NICVF_HW_STAT(rx_ip_payload_malformed),
+ NICVF_HW_STAT(rx_ip_ttl_errs),
+ NICVF_HW_STAT(rx_l3_pclp),
+ NICVF_HW_STAT(rx_l4_malformed),
+ NICVF_HW_STAT(rx_l4_csum_errs),
+ NICVF_HW_STAT(rx_udp_len_errs),
+ NICVF_HW_STAT(rx_l4_port_errs),
+ NICVF_HW_STAT(rx_tcp_flag_errs),
+ NICVF_HW_STAT(rx_tcp_offset_errs),
+ NICVF_HW_STAT(rx_l4_pclp),
+ NICVF_HW_STAT(rx_truncated_pkts),
NICVF_HW_STAT(tx_bytes_ok),
NICVF_HW_STAT(tx_ucast_frames_ok),
NICVF_HW_STAT(tx_bcast_frames_ok),
@@ -195,7 +219,7 @@ static void nicvf_get_ethtool_stats(struct net_device *netdev,
nicvf_update_lmac_stats(nic);

for (stat = 0; stat < nicvf_n_hw_stats; stat++)
- *(data++) = ((u64 *)&nic->stats)
+ *(data++) = ((u64 *)&nic->hw_stats)
[nicvf_hw_stats[stat].index];
for (stat = 0; stat < nicvf_n_drv_stats; stat++)
*(data++) = ((u64 *)&nic->drv_stats)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 3b90afb..670ff9b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -456,6 +456,12 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
skb->data, skb->len, true);
}

+ /* If error packet, drop it here */
+ if (err) {
+ dev_kfree_skb_any(skb);
+ return;
+ }
+
nicvf_set_rx_frame_cnt(nic, skb);

skb_record_rx_queue(skb, cqe_rx->rq_idx);
@@ -1118,7 +1124,7 @@ void nicvf_update_lmac_stats(struct nicvf *nic)
void nicvf_update_stats(struct nicvf *nic)
{
int qidx;
- struct nicvf_hw_stats *stats = &nic->stats;
+ struct nicvf_hw_stats *stats = &nic->hw_stats;
struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
struct queue_set *qs = nic->qs;

@@ -1127,14 +1133,16 @@ void nicvf_update_stats(struct nicvf *nic)
#define GET_TX_STATS(reg) \
nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3))

- stats->rx_bytes_ok = GET_RX_STATS(RX_OCTS);
- stats->rx_ucast_frames_ok = GET_RX_STATS(RX_UCAST);
- stats->rx_bcast_frames_ok = GET_RX_STATS(RX_BCAST);
- stats->rx_mcast_frames_ok = GET_RX_STATS(RX_MCAST);
+ stats->rx_bytes = GET_RX_STATS(RX_OCTS);
+ stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST);
+ stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST);
+ stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST);
stats->rx_fcs_errors = GET_RX_STATS(RX_FCS);
stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR);
stats->rx_drop_red = GET_RX_STATS(RX_RED);
+ stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS);
stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN);
+ stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS);
stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST);
stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST);
stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST);
@@ -1146,9 +1154,6 @@ void nicvf_update_stats(struct nicvf *nic)
stats->tx_mcast_frames_ok = GET_TX_STATS(TX_MCAST);
stats->tx_drops = GET_TX_STATS(TX_DROP);

- drv_stats->rx_frames_ok = stats->rx_ucast_frames_ok +
- stats->rx_bcast_frames_ok +
- stats->rx_mcast_frames_ok;
drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok +
stats->tx_bcast_frames_ok +
stats->tx_mcast_frames_ok;
@@ -1167,14 +1172,15 @@ static struct rtnl_link_stats64 *nicvf_get_stats64(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
struct nicvf *nic = netdev_priv(netdev);
- struct nicvf_hw_stats *hw_stats = &nic->stats;
+ struct nicvf_hw_stats *hw_stats = &nic->hw_stats;
struct nicvf_drv_stats *drv_stats = &nic->drv_stats;

nicvf_update_stats(nic);

- stats->rx_bytes = hw_stats->rx_bytes_ok;
+ stats->rx_bytes = hw_stats->rx_bytes;
stats->rx_packets = drv_stats->rx_frames_ok;
stats->rx_dropped = drv_stats->rx_drops;
+ stats->multicast = hw_stats->rx_mcast_frames;

stats->tx_bytes = hw_stats->tx_bytes_ok;
stats->tx_packets = drv_stats->tx_frames_ok;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index ca4240a..4fc40d83 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -1371,10 +1371,11 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
int nicvf_check_cqe_rx_errs(struct nicvf *nic,
struct cmp_queue *cq, struct cqe_rx_t *cqe_rx)
{
- struct cmp_queue_stats *stats = &cq->stats;
+ struct nicvf_hw_stats *stats = &nic->hw_stats;
+ struct nicvf_drv_stats *drv_stats = &nic->drv_stats;

if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
- stats->rx.errop.good++;
+ drv_stats->rx_frames_ok++;
return 0;
}

@@ -1384,111 +1385,78 @@ int nicvf_check_cqe_rx_errs(struct nicvf *nic,
nic->netdev->name,
cqe_rx->err_level, cqe_rx->err_opcode);

- switch (cqe_rx->err_level) {
- case CQ_ERRLVL_MAC:
- stats->rx.errlvl.mac_errs++;
- break;
- case CQ_ERRLVL_L2:
- stats->rx.errlvl.l2_errs++;
- break;
- case CQ_ERRLVL_L3:
- stats->rx.errlvl.l3_errs++;
- break;
- case CQ_ERRLVL_L4:
- stats->rx.errlvl.l4_errs++;
- break;
- }
-
switch (cqe_rx->err_opcode) {
case CQ_RX_ERROP_RE_PARTIAL:
- stats->rx.errop.partial_pkts++;
+ stats->rx_bgx_truncated_pkts++;
break;
case CQ_RX_ERROP_RE_JABBER:
- stats->rx.errop.jabber_errs++;
+ stats->rx_jabber_errs++;
break;
case CQ_RX_ERROP_RE_FCS:
- stats->rx.errop.fcs_errs++;
- break;
- case CQ_RX_ERROP_RE_TERMINATE:
- stats->rx.errop.terminate_errs++;
+ stats->rx_fcs_errs++;
break;
case CQ_RX_ERROP_RE_RX_CTL:
- stats->rx.errop.bgx_rx_errs++;
+ stats->rx_bgx_errs++;
break;
case CQ_RX_ERROP_PREL2_ERR:
- stats->rx.errop.prel2_errs++;
- break;
- case CQ_RX_ERROP_L2_FRAGMENT:
- stats->rx.errop.l2_frags++;
- break;
- case CQ_RX_ERROP_L2_OVERRUN:
- stats->rx.errop.l2_overruns++;
- break;
- case CQ_RX_ERROP_L2_PFCS:
- stats->rx.errop.l2_pfcs++;
- break;
- case CQ_RX_ERROP_L2_PUNY:
- stats->rx.errop.l2_puny++;
+ stats->rx_prel2_errs++;
break;
case CQ_RX_ERROP_L2_MAL:
- stats->rx.errop.l2_hdr_malformed++;
+ stats->rx_l2_hdr_malformed++;
break;
case CQ_RX_ERROP_L2_OVERSIZE:
- stats->rx.errop.l2_oversize++;
+ stats->rx_oversize++;
break;
case CQ_RX_ERROP_L2_UNDERSIZE:
- stats->rx.errop.l2_undersize++;
+ stats->rx_undersize++;
break;
case CQ_RX_ERROP_L2_LENMISM:
- stats->rx.errop.l2_len_mismatch++;
+ stats->rx_l2_len_mismatch++;
break;
case CQ_RX_ERROP_L2_PCLP:
- stats->rx.errop.l2_pclp++;
+ stats->rx_l2_pclp++;
break;
case CQ_RX_ERROP_IP_NOT:
- stats->rx.errop.non_ip++;
+ stats->rx_ip_ver_errs++;
break;
case CQ_RX_ERROP_IP_CSUM_ERR:
- stats->rx.errop.ip_csum_err++;
+ stats->rx_ip_csum_errs++;
break;
case CQ_RX_ERROP_IP_MAL:
- stats->rx.errop.ip_hdr_malformed++;
+ stats->rx_ip_hdr_malformed++;
break;
case CQ_RX_ERROP_IP_MALD:
- stats->rx.errop.ip_payload_malformed++;
+ stats->rx_ip_payload_malformed++;
break;
case CQ_RX_ERROP_IP_HOP:
- stats->rx.errop.ip_hop_errs++;
- break;
- case CQ_RX_ERROP_L3_ICRC:
- stats->rx.errop.l3_icrc_errs++;
+ stats->rx_ip_ttl_errs++;
break;
case CQ_RX_ERROP_L3_PCLP:
- stats->rx.errop.l3_pclp++;
+ stats->rx_l3_pclp++;
break;
case CQ_RX_ERROP_L4_MAL:
- stats->rx.errop.l4_malformed++;
+ stats->rx_l4_malformed++;
break;
case CQ_RX_ERROP_L4_CHK:
- stats->rx.errop.l4_csum_errs++;
+ stats->rx_l4_csum_errs++;
break;
case CQ_RX_ERROP_UDP_LEN:
- stats->rx.errop.udp_len_err++;
+ stats->rx_udp_len_errs++;
break;
case CQ_RX_ERROP_L4_PORT:
- stats->rx.errop.bad_l4_port++;
+ stats->rx_l4_port_errs++;
break;
case CQ_RX_ERROP_TCP_FLAG:
- stats->rx.errop.bad_tcp_flag++;
+ stats->rx_tcp_flag_errs++;
break;
case CQ_RX_ERROP_TCP_OFFSET:
- stats->rx.errop.tcp_offset_errs++;
+ stats->rx_tcp_offset_errs++;
break;
case CQ_RX_ERROP_L4_PCLP:
- stats->rx.errop.l4_pclp++;
+ stats->rx_l4_pclp++;
break;
case CQ_RX_ERROP_RBDR_TRUNC:
- stats->rx.errop.pkt_truncated++;
+ stats->rx_truncated_pkts++;
break;
}

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index f0937b7..dc73872 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -181,47 +181,6 @@ enum CQ_TX_ERROP_E {
};

struct cmp_queue_stats {
- struct rx_stats {
- struct {
- u64 mac_errs;
- u64 l2_errs;
- u64 l3_errs;
- u64 l4_errs;
- } errlvl;
- struct {
- u64 good;
- u64 partial_pkts;
- u64 jabber_errs;
- u64 fcs_errs;
- u64 terminate_errs;
- u64 bgx_rx_errs;
- u64 prel2_errs;
- u64 l2_frags;
- u64 l2_overruns;
- u64 l2_pfcs;
- u64 l2_puny;
- u64 l2_hdr_malformed;
- u64 l2_oversize;
- u64 l2_undersize;
- u64 l2_len_mismatch;
- u64 l2_pclp;
- u64 non_ip;
- u64 ip_csum_err;
- u64 ip_hdr_malformed;
- u64 ip_payload_malformed;
- u64 ip_hop_errs;
- u64 l3_icrc_errs;
- u64 l3_pclp;
- u64 l4_malformed;
- u64 l4_csum_errs;
- u64 udp_len_err;
- u64 bad_l4_port;
- u64 bad_tcp_flag;
- u64 tcp_offset_errs;
- u64 l4_pclp;
- u64 pkt_truncated;
- } errop;
- } rx;
struct tx_stats {
u64 good;
u64 desc_fault;
--
2.5.0

2015-08-28 15:10:35

by Aleksey Makarov

[permalink] [raw]
Subject: [PATCH net-next 3/8] net: thunderx: mailboxes: remove code duplication

From: Sunil Goutham <[email protected]>

Use the nicvf_send_msg_to_pf() function in the mailbox code.

Signed-off-by: Sunil Goutham <[email protected]>
Signed-off-by: Robert Richter <[email protected]>
Signed-off-by: Aleksey Makarov <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 3 +-
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 44 ++++++------------------
2 files changed, 11 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 58adfd6..a83f567 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -295,10 +295,9 @@ struct nicvf {
char irq_name[NIC_VF_MSIX_VECTORS][20];
bool irq_allocated[NIC_VF_MSIX_VECTORS];

- bool pf_ready_to_rcv_msg;
+ /* VF <-> PF mailbox communication */
bool pf_acked;
bool pf_nacked;
- bool bgx_stats_acked;
bool set_mac_pending;
} ____cacheline_aligned_in_smp;

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 670ff9b..d4ad36e 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -105,7 +105,6 @@ u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx)
}

/* VF -> PF mailbox communication */
-
static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx)
{
u64 *msg = (u64 *)mbx;
@@ -147,26 +146,15 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
*/
static int nicvf_check_pf_ready(struct nicvf *nic)
{
- int timeout = 5000, sleep = 20;
union nic_mbx mbx = {};

mbx.msg.msg = NIC_MBOX_MSG_READY;
-
- nic->pf_ready_to_rcv_msg = false;
-
- nicvf_write_to_mbx(nic, &mbx);
-
- while (!nic->pf_ready_to_rcv_msg) {
- msleep(sleep);
- if (nic->pf_ready_to_rcv_msg)
- break;
- timeout -= sleep;
- if (!timeout) {
- netdev_err(nic->netdev,
- "PF didn't respond to READY msg\n");
- return 0;
- }
+ if (nicvf_send_msg_to_pf(nic, &mbx)) {
+ netdev_err(nic->netdev,
+ "PF didn't respond to READY msg\n");
+ return 0;
}
+
return 1;
}

@@ -197,7 +185,7 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg);
switch (mbx.msg.msg) {
case NIC_MBOX_MSG_READY:
- nic->pf_ready_to_rcv_msg = true;
+ nic->pf_acked = true;
nic->vf_id = mbx.nic_cfg.vf_id & 0x7F;
nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F;
nic->node = mbx.nic_cfg.node_id;
@@ -221,7 +209,6 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
case NIC_MBOX_MSG_BGX_STATS:
nicvf_read_bgx_stats(nic, &mbx.bgx_stats);
nic->pf_acked = true;
- nic->bgx_stats_acked = true;
break;
case NIC_MBOX_MSG_BGX_LINK_CHANGE:
nic->pf_acked = true;
@@ -1083,7 +1070,6 @@ void nicvf_update_lmac_stats(struct nicvf *nic)
{
int stat = 0;
union nic_mbx mbx = {};
- int timeout;

if (!netif_running(nic->netdev))
return;
@@ -1093,14 +1079,9 @@ void nicvf_update_lmac_stats(struct nicvf *nic)
/* Rx stats */
mbx.bgx_stats.rx = 1;
while (stat < BGX_RX_STATS_COUNT) {
- nic->bgx_stats_acked = 0;
mbx.bgx_stats.idx = stat;
- nicvf_send_msg_to_pf(nic, &mbx);
- timeout = 0;
- while ((!nic->bgx_stats_acked) && (timeout < 10)) {
- msleep(2);
- timeout++;
- }
+ if (nicvf_send_msg_to_pf(nic, &mbx))
+ return;
stat++;
}

@@ -1109,14 +1090,9 @@ void nicvf_update_lmac_stats(struct nicvf *nic)
/* Tx stats */
mbx.bgx_stats.rx = 0;
while (stat < BGX_TX_STATS_COUNT) {
- nic->bgx_stats_acked = 0;
mbx.bgx_stats.idx = stat;
- nicvf_send_msg_to_pf(nic, &mbx);
- timeout = 0;
- while ((!nic->bgx_stats_acked) && (timeout < 10)) {
- msleep(2);
- timeout++;
- }
+ if (nicvf_send_msg_to_pf(nic, &mbx))
+ return;
stat++;
}
}
--
2.5.0

2015-08-28 15:10:33

by Aleksey Makarov

[permalink] [raw]
Subject: [PATCH net-next 4/8] net: thunderx: Receive hashing HW offload support

From: Sunil Goutham <[email protected]>

Adding support for receive hashing HW offload by using RSS_ALG
and RSS_TAG fields of CQE_RX descriptor. Also removed dependency
on minimum receive queue count to configure RSS so that hash is
always generated.

This hash is used by RPS logic to distribute flows across multiple
CPUs. Offload can be disabled via ethtool.

Signed-off-by: Robert Richter <[email protected]>
Signed-off-by: Sunil Goutham <[email protected]>
Signed-off-by: Aleksey Makarov <[email protected]>
---
.../net/ethernet/cavium/thunder/nicvf_ethtool.c | 14 ++++-----
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 35 ++++++++++++++++++++--
2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index a961aa3..1eec2cd 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -525,17 +525,15 @@ static int nicvf_set_rxfh(struct net_device *dev, const u32 *indir,
struct nicvf_rss_info *rss = &nic->rss_info;
int idx;

- if ((nic->qs->rq_cnt <= 1) || (nic->cpi_alg != CPI_ALG_NONE)) {
- rss->enable = false;
- rss->hash_bits = 0;
- return -EIO;
- }
-
- /* We do not allow change in unsupported parameters */
if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;

- rss->enable = true;
+ if (!rss->enable) {
+ netdev_err(nic->netdev,
+ "RSS is disabled, cannot change settings\n");
+ return -EIO;
+ }
+
if (indir) {
for (idx = 0; idx < rss->rss_size; idx++)
rss->ind_tbl[idx] = indir[idx];
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index d4ad36e..afd8ad4 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -313,7 +313,7 @@ static int nicvf_rss_init(struct nicvf *nic)

nicvf_get_rss_size(nic);

- if ((nic->qs->rq_cnt <= 1) || (cpi_alg != CPI_ALG_NONE)) {
+ if (cpi_alg != CPI_ALG_NONE) {
rss->enable = false;
rss->hash_bits = 0;
return 0;
@@ -416,6 +416,34 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
}
}

+static inline void nicvf_set_rxhash(struct net_device *netdev,
+ struct cqe_rx_t *cqe_rx,
+ struct sk_buff *skb)
+{
+ u8 hash_type;
+ u32 hash;
+
+ if (!(netdev->features & NETIF_F_RXHASH))
+ return;
+
+ switch (cqe_rx->rss_alg) {
+ case RSS_ALG_TCP_IP:
+ case RSS_ALG_UDP_IP:
+ hash_type = PKT_HASH_TYPE_L4;
+ hash = cqe_rx->rss_tag;
+ break;
+ case RSS_ALG_IP:
+ hash_type = PKT_HASH_TYPE_L3;
+ hash = cqe_rx->rss_tag;
+ break;
+ default:
+ hash_type = PKT_HASH_TYPE_NONE;
+ hash = 0;
+ }
+
+ skb_set_hash(skb, hash, hash_type);
+}
+
static void nicvf_rcv_pkt_handler(struct net_device *netdev,
struct napi_struct *napi,
struct cmp_queue *cq,
@@ -451,6 +479,8 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,

nicvf_set_rx_frame_cnt(nic, skb);

+ nicvf_set_rxhash(netdev, cqe_rx, skb);
+
skb_record_rx_queue(skb, cqe_rx->rq_idx);
if (netdev->hw_features & NETIF_F_RXCSUM) {
/* HW by default verifies TCP/UDP/SCTP checksums */
@@ -1272,7 +1302,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_free_netdev;

netdev->features |= (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
- NETIF_F_TSO | NETIF_F_GRO);
+ NETIF_F_TSO | NETIF_F_GRO | NETIF_F_RXHASH);
+
netdev->hw_features = netdev->features;

netdev->netdev_ops = &nicvf_netdev_ops;
--
2.5.0

2015-08-28 15:10:39

by Aleksey Makarov

[permalink] [raw]
Subject: [PATCH net-next 5/8] net: thunderx: Support for HW VLAN stripping

From: Sunil Goutham <[email protected]>

This patch configures HW to strip 802.1Q header if found in a
receiving packet. The stripped VLAN ID and TCI information is
passed on to software via CQE_RX. Also sets netdev's 'vlan_features'
so that other HW offload features can be used for tagged packets.

This offload feature can be enabled or disabled via ethtool.

Network stack normally ignores RPS for 802.1Q packets and hence low
throughput. With this offload enabled throughput for tagged packets
will be almost same as normal packets.

Note: This patch doesn't enable HW VLAN insertion for transmit packets.

Signed-off-by: Sunil Goutham <[email protected]>
Signed-off-by: Aleksey Makarov <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic_main.c | 4 ++++
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 28 +++++++++++++++++++---
drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 28 ++++++++++++++++++----
drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 2 ++
4 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 6e0c031..7dfec4a 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -329,6 +329,10 @@ static void nic_init_hw(struct nicpf *nic)

/* Timer config */
nic_reg_write(nic, NIC_PF_INTR_TIMER_CFG, NICPF_CLK_PER_INT_TICK);
+
+ /* Enable VLAN ethertype matching and stripping */
+ nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7,
+ (2 << 19) | (ETYPE_ALG_VLAN_STRIP << 16) | ETH_P_8021Q);
}

/* Channel parse index configuration */
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index afd8ad4..de51828 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -10,6 +10,7 @@
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/log2.h>
@@ -491,6 +492,11 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,

skb->protocol = eth_type_trans(skb, netdev);

+ /* Check for stripped VLAN */
+ if (cqe_rx->vlan_found && cqe_rx->vlan_stripped)
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ ntohs((__force __be16)cqe_rx->vlan_tci));
+
if (napi && (netdev->features & NETIF_F_GRO))
napi_gro_receive(napi, skb);
else
@@ -1220,6 +1226,18 @@ static void nicvf_reset_task(struct work_struct *work)
nic->netdev->trans_start = jiffies;
}

+static int nicvf_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ netdev_features_t changed = features ^ netdev->features;
+
+ if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+ nicvf_config_vlan_stripping(nic, features);
+
+ return 0;
+}
+
static const struct net_device_ops nicvf_netdev_ops = {
.ndo_open = nicvf_open,
.ndo_stop = nicvf_stop,
@@ -1228,6 +1246,7 @@ static const struct net_device_ops nicvf_netdev_ops = {
.ndo_set_mac_address = nicvf_set_mac_address,
.ndo_get_stats64 = nicvf_get_stats64,
.ndo_tx_timeout = nicvf_tx_timeout,
+ .ndo_set_features = nicvf_set_features,
};

static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
@@ -1301,10 +1320,13 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_free_netdev;

- netdev->features |= (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
- NETIF_F_TSO | NETIF_F_GRO | NETIF_F_RXHASH);
+ netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
+ NETIF_F_TSO | NETIF_F_GRO |
+ NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXHASH);
+
+ netdev->features |= netdev->hw_features;

- netdev->hw_features = netdev->features;
+ netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;

netdev->netdev_ops = &nicvf_netdev_ops;
netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 4fc40d83..b294d67 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -475,6 +475,27 @@ static void nicvf_reclaim_rbdr(struct nicvf *nic,
return;
}

+void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features)
+{
+ u64 rq_cfg;
+ int sqs;
+
+ rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0);
+
+ /* Enable first VLAN stripping */
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+ rq_cfg |= (1ULL << 25);
+ else
+ rq_cfg &= ~(1ULL << 25);
+ nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
+
+ /* Configure Secondary Qsets, if any */
+ for (sqs = 0; sqs < nic->sqs_count; sqs++)
+ if (nic->snicvf[sqs])
+ nicvf_queue_reg_write(nic->snicvf[sqs],
+ NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
+}
+
/* Configures receive queue */
static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
int qidx, bool enable)
@@ -524,7 +545,9 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
mbx.rq.cfg = (1ULL << 62) | (RQ_CQ_DROP << 8);
nicvf_send_msg_to_pf(nic, &mbx);

- nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, qidx, 0x00);
+ nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 0x00);
+ if (!nic->sqs_mode)
+ nicvf_config_vlan_stripping(nic, nic->netdev->features);

/* Enable Receive queue */
rq_cfg.ena = 1;
@@ -961,9 +984,6 @@ nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,

/* Offload checksum calculation to HW */
if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (skb->protocol != htons(ETH_P_IP))
- return;
-
hdr->csum_l3 = 1; /* Enable IP csum calculation */
hdr->l3_offset = skb_network_offset(skb);
hdr->l4_offset = skb_transport_offset(skb);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index dc73872..8b93dd6 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -306,6 +306,8 @@ struct queue_set {

#define CQ_ERR_MASK (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT)

+void nicvf_config_vlan_stripping(struct nicvf *nic,
+ netdev_features_t features);
int nicvf_set_qset_resources(struct nicvf *nic);
int nicvf_config_data_transfer(struct nicvf *nic, bool enable);
void nicvf_qset_config(struct nicvf *nic, bool enable);
--
2.5.0

2015-08-28 15:10:45

by Aleksey Makarov

[permalink] [raw]
Subject: [PATCH net-next 6/8] net: thunderx: Rework interrupt handler

From: Sunil Goutham <[email protected]>

Rework interrupt handler to avoid checking IRQ affinity of
CQ interrupts. Now separate handlers are registered for each IRQ
including RBDR. Also register interrupt handlers for only those
which are being used.

Signed-off-by: Sunil Goutham <[email protected]>
Signed-off-by: Aleksey Makarov <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 1 +
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 172 ++++++++++++---------
drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 2 +
3 files changed, 103 insertions(+), 72 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index a83f567..89b997e 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -135,6 +135,7 @@
#define NICVF_TX_TIMEOUT (50 * HZ)

struct nicvf_cq_poll {
+ struct nicvf *nicvf;
u8 cq_idx; /* Completion queue index */
struct napi_struct napi;
};
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index de51828..2198f61 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -653,11 +653,20 @@ static void nicvf_handle_qs_err(unsigned long data)
nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
}

+static inline void nicvf_dump_intr_status(struct nicvf *nic)
+{
+ if (netif_msg_intr(nic))
+ netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n",
+ nic->netdev->name, nicvf_reg_read(nic, NIC_VF_INT));
+}
+
static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq)
{
struct nicvf *nic = (struct nicvf *)nicvf_irq;
u64 intr;

+ nicvf_dump_intr_status(nic);
+
intr = nicvf_reg_read(nic, NIC_VF_INT);
/* Check for spurious interrupt */
if (!(intr & NICVF_INTR_MBOX_MASK))
@@ -668,59 +677,58 @@ static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq)
return IRQ_HANDLED;
}

-static irqreturn_t nicvf_intr_handler(int irq, void *nicvf_irq)
+static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq)
+{
+ struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq;
+ struct nicvf *nic = cq_poll->nicvf;
+ int qidx = cq_poll->cq_idx;
+
+ nicvf_dump_intr_status(nic);
+
+ /* Disable interrupts */
+ nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
+
+ /* Schedule NAPI */
+ napi_schedule(&cq_poll->napi);
+
+ /* Clear interrupt */
+ nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq)
{
- u64 qidx, intr, clear_intr = 0;
- u64 cq_intr, rbdr_intr, qs_err_intr;
struct nicvf *nic = (struct nicvf *)nicvf_irq;
- struct queue_set *qs = nic->qs;
- struct nicvf_cq_poll *cq_poll = NULL;
+ u8 qidx;

- intr = nicvf_reg_read(nic, NIC_VF_INT);
- if (netif_msg_intr(nic))
- netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n",
- nic->netdev->name, intr);
-
- qs_err_intr = intr & NICVF_INTR_QS_ERR_MASK;
- if (qs_err_intr) {
- /* Disable Qset err interrupt and schedule softirq */
- nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
- tasklet_hi_schedule(&nic->qs_err_task);
- clear_intr |= qs_err_intr;
- }

- /* Disable interrupts and start polling */
- cq_intr = (intr & NICVF_INTR_CQ_MASK) >> NICVF_INTR_CQ_SHIFT;
- for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
- if (!(cq_intr & (1 << qidx)))
- continue;
- if (!nicvf_is_intr_enabled(nic, NICVF_INTR_CQ, qidx))
+ nicvf_dump_intr_status(nic);
+
+ /* Disable RBDR interrupt and schedule softirq */
+ for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) {
+ if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx))
continue;
+ nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
+ tasklet_hi_schedule(&nic->rbdr_task);
+ /* Clear interrupt */
+ nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
+ }

- nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
- clear_intr |= ((1 << qidx) << NICVF_INTR_CQ_SHIFT);
+ return IRQ_HANDLED;
+}

- cq_poll = nic->napi[qidx];
- /* Schedule NAPI */
- if (cq_poll)
- napi_schedule(&cq_poll->napi);
- }
+static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq)
+{
+ struct nicvf *nic = (struct nicvf *)nicvf_irq;

- /* Handle RBDR interrupts */
- rbdr_intr = (intr & NICVF_INTR_RBDR_MASK) >> NICVF_INTR_RBDR_SHIFT;
- if (rbdr_intr) {
- /* Disable RBDR interrupt and schedule softirq */
- for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
- if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx))
- continue;
- nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
- tasklet_hi_schedule(&nic->rbdr_task);
- clear_intr |= ((1 << qidx) << NICVF_INTR_RBDR_SHIFT);
- }
- }
+ nicvf_dump_intr_status(nic);
+
+ /* Disable Qset err interrupt and schedule softirq */
+ nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
+ tasklet_hi_schedule(&nic->qs_err_task);
+ nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);

- /* Clear interrupts */
- nicvf_reg_write(nic, NIC_VF_INT, clear_intr);
return IRQ_HANDLED;
}

@@ -754,7 +762,7 @@ static void nicvf_disable_msix(struct nicvf *nic)

static int nicvf_register_interrupts(struct nicvf *nic)
{
- int irq, free, ret = 0;
+ int irq, ret = 0;
int vector;

for_each_cq_irq(irq)
@@ -769,44 +777,42 @@ static int nicvf_register_interrupts(struct nicvf *nic)
sprintf(nic->irq_name[irq], "NICVF%d RBDR%d",
nic->vf_id, irq - NICVF_INTR_ID_RBDR);

- /* Register all interrupts except mailbox */
- for (irq = 0; irq < NICVF_INTR_ID_SQ; irq++) {
+ /* Register CQ interrupts */
+ for (irq = 0; irq < nic->qs->cq_cnt; irq++) {
vector = nic->msix_entries[irq].vector;
ret = request_irq(vector, nicvf_intr_handler,
- 0, nic->irq_name[irq], nic);
+ 0, nic->irq_name[irq], nic->napi[irq]);
if (ret)
- break;
+ goto err;
nic->irq_allocated[irq] = true;
}

- for (irq = NICVF_INTR_ID_SQ; irq < NICVF_INTR_ID_MISC; irq++) {
+ /* Register RBDR interrupt */
+ for (irq = NICVF_INTR_ID_RBDR;
+ irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) {
vector = nic->msix_entries[irq].vector;
- ret = request_irq(vector, nicvf_intr_handler,
+ ret = request_irq(vector, nicvf_rbdr_intr_handler,
0, nic->irq_name[irq], nic);
if (ret)
- break;
+ goto err;
nic->irq_allocated[irq] = true;
}

+ /* Register QS error interrupt */
sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR],
"NICVF%d Qset error", nic->vf_id);
- if (!ret) {
- vector = nic->msix_entries[NICVF_INTR_ID_QS_ERR].vector;
- irq = NICVF_INTR_ID_QS_ERR;
- ret = request_irq(vector, nicvf_intr_handler,
- 0, nic->irq_name[irq], nic);
- if (!ret)
- nic->irq_allocated[irq] = true;
- }
+ irq = NICVF_INTR_ID_QS_ERR;
+ ret = request_irq(nic->msix_entries[irq].vector,
+ nicvf_qs_err_intr_handler,
+ 0, nic->irq_name[irq], nic);
+ if (!ret)
+ nic->irq_allocated[irq] = true;

- if (ret) {
- netdev_err(nic->netdev, "Request irq failed\n");
- for (free = 0; free < irq; free++)
- free_irq(nic->msix_entries[free].vector, nic);
- return ret;
- }
+err:
+ if (ret)
+ netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq);

- return 0;
+ return ret;
}

static void nicvf_unregister_interrupts(struct nicvf *nic)
@@ -815,8 +821,14 @@ static void nicvf_unregister_interrupts(struct nicvf *nic)

/* Free registered interrupts */
for (irq = 0; irq < nic->num_vec; irq++) {
- if (nic->irq_allocated[irq])
+ if (!nic->irq_allocated[irq])
+ continue;
+
+ if (irq < NICVF_INTR_ID_SQ)
+ free_irq(nic->msix_entries[irq].vector, nic->napi[irq]);
+ else
free_irq(nic->msix_entries[irq].vector, nic);
+
nic->irq_allocated[irq] = false;
}

@@ -888,6 +900,20 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
return NETDEV_TX_OK;
}

+static inline void nicvf_free_cq_poll(struct nicvf *nic)
+{
+ struct nicvf_cq_poll *cq_poll = NULL;
+ int qidx;
+
+ for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) {
+ cq_poll = nic->napi[qidx];
+ if (!cq_poll)
+ continue;
+ nic->napi[qidx] = NULL;
+ kfree(cq_poll);
+ }
+}
+
int nicvf_stop(struct net_device *netdev)
{
int irq, qidx;
@@ -922,7 +948,6 @@ int nicvf_stop(struct net_device *netdev)
cq_poll = nic->napi[qidx];
if (!cq_poll)
continue;
- nic->napi[qidx] = NULL;
napi_synchronize(&cq_poll->napi);
/* CQ intr is enabled while napi_complete,
* so disable it now
@@ -931,7 +956,6 @@ int nicvf_stop(struct net_device *netdev)
nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
napi_disable(&cq_poll->napi);
netif_napi_del(&cq_poll->napi);
- kfree(cq_poll);
}

netif_tx_disable(netdev);
@@ -947,6 +971,8 @@ int nicvf_stop(struct net_device *netdev)

nicvf_unregister_interrupts(nic);

+ nicvf_free_cq_poll(nic);
+
return 0;
}

@@ -973,6 +999,7 @@ int nicvf_open(struct net_device *netdev)
goto napi_del;
}
cq_poll->cq_idx = qidx;
+ cq_poll->nicvf = nic;
netif_napi_add(netdev, &cq_poll->napi, nicvf_poll,
NAPI_POLL_WEIGHT);
napi_enable(&cq_poll->napi);
@@ -1040,6 +1067,8 @@ int nicvf_open(struct net_device *netdev)
cleanup:
nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
nicvf_unregister_interrupts(nic);
+ tasklet_kill(&nic->qs_err_task);
+ tasklet_kill(&nic->rbdr_task);
napi_del:
for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
cq_poll = nic->napi[qidx];
@@ -1047,9 +1076,8 @@ napi_del:
continue;
napi_disable(&cq_poll->napi);
netif_napi_del(&cq_poll->napi);
- kfree(cq_poll);
- nic->napi[qidx] = NULL;
}
+ nicvf_free_cq_poll(nic);
return err;
}

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 8b93dd6..c2ce270 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -251,6 +251,8 @@ struct cmp_queue {
void *desc;
struct q_desc_mem dmem;
struct cmp_queue_stats stats;
+ int irq;
+ cpumask_t affinity_mask;
} ____cacheline_aligned_in_smp;

struct snd_queue {
--
2.5.0

2015-08-28 15:11:11

by Aleksey Makarov

[permalink] [raw]
Subject: [PATCH net-next 7/8] net: thunderx: Support for upto 96 queues for a VF

From: Sunil Goutham <[email protected]>

This patch adds support for handling multiple qsets assigned to a
single VF. There by increasing no of queues from earlier 8 to max
no of CPUs in the system i.e 48 queues on a single node and 96 on
dual node system. User doesn't have option to assign which Qsets/VFs
to be merged. Upon request from VF, PF assigns next free Qsets as
secondary qsets. To maintain current behavior no of queues is kept
to 8 by default which can be increased via ethtool.

If user wants to unbind NICVF driver from a secondary Qset then it
should be done after tearing down primary VF's interface.

Signed-off-by: Sunil Goutham <[email protected]>
Signed-off-by: Aleksey Makarov <[email protected]>
Signed-off-by: Robert Richter <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 42 ++++-
drivers/net/ethernet/cavium/thunder/nic_main.c | 173 +++++++++++++++--
.../net/ethernet/cavium/thunder/nicvf_ethtool.c | 136 +++++++++----
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 210 +++++++++++++++++++--
drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 32 +++-
5 files changed, 507 insertions(+), 86 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 89b997e..35b2ee1 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -258,13 +258,23 @@ struct nicvf_drv_stats {
};

struct nicvf {
+ struct nicvf *pnicvf;
struct net_device *netdev;
struct pci_dev *pdev;
u8 vf_id;
u8 node;
- u8 tns_mode;
+ u8 tns_mode:1;
+ u8 sqs_mode:1;
u16 mtu;
struct queue_set *qs;
+#define MAX_SQS_PER_VF_SINGLE_NODE 5
+#define MAX_SQS_PER_VF 11
+ u8 sqs_id;
+ u8 sqs_count; /* Secondary Qset count */
+ struct nicvf *snicvf[MAX_SQS_PER_VF];
+ u8 rx_queues;
+ u8 tx_queues;
+ u8 max_queues;
void __iomem *reg_base;
bool link_up;
u8 duplex;
@@ -330,14 +340,19 @@ struct nicvf {
#define NIC_MBOX_MSG_RQ_SW_SYNC 0x0F /* Flush inflight pkts to RQ */
#define NIC_MBOX_MSG_BGX_STATS 0x10 /* Get stats from BGX */
#define NIC_MBOX_MSG_BGX_LINK_CHANGE 0x11 /* BGX:LMAC link status */
-#define NIC_MBOX_MSG_CFG_DONE 0x12 /* VF configuration done */
-#define NIC_MBOX_MSG_SHUTDOWN 0x13 /* VF is being shutdown */
+#define NIC_MBOX_MSG_ALLOC_SQS 0x12 /* Allocate secondary Qset */
+#define NIC_MBOX_MSG_NICVF_PTR 0x13 /* Send nicvf ptr to PF */
+#define NIC_MBOX_MSG_PNICVF_PTR 0x14 /* Get primary qset nicvf ptr */
+#define NIC_MBOX_MSG_SNICVF_PTR 0x15 /* Send sqet nicvf ptr to PVF */
+#define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */
+#define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */

struct nic_cfg_msg {
u8 msg;
u8 vf_id;
- u8 tns_mode;
u8 node_id;
+ u8 tns_mode:1;
+ u8 sqs_mode:1;
u8 mac_addr[ETH_ALEN];
};

@@ -345,6 +360,7 @@ struct nic_cfg_msg {
struct qs_cfg_msg {
u8 msg;
u8 num;
+ u8 sqs_count;
u64 cfg;
};

@@ -361,6 +377,7 @@ struct sq_cfg_msg {
u8 msg;
u8 qs_num;
u8 sq_num;
+ bool sqs_mode;
u64 cfg;
};

@@ -420,6 +437,21 @@ struct bgx_link_status {
u32 speed;
};

+/* Get Extra Qset IDs */
+struct sqs_alloc {
+ u8 msg;
+ u8 vf_id;
+ u8 qs_count;
+};
+
+struct nicvf_ptr {
+ u8 msg;
+ u8 vf_id;
+ bool sqs_mode;
+ u8 sqs_id;
+ u64 nicvf;
+};
+
/* 128 bit shared memory between PF and each VF */
union nic_mbx {
struct { u8 msg; } msg;
@@ -434,6 +466,8 @@ union nic_mbx {
struct rss_cfg_msg rss_cfg;
struct bgx_stats_msg bgx_stats;
struct bgx_link_status link_status;
+ struct sqs_alloc sqs_alloc;
+ struct nicvf_ptr nicvf;
};

#define NIC_NODE_ID_MASK 0x03
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 7dfec4a..51f3048 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -28,6 +28,11 @@ struct nicpf {
u8 num_vf_en; /* No of VF enabled */
bool vf_enabled[MAX_NUM_VFS_SUPPORTED];
void __iomem *reg_base; /* Register start address */
+ u8 num_sqs_en; /* Secondary qsets enabled */
+ u64 nicvf[MAX_NUM_VFS_SUPPORTED];
+ u8 vf_sqs[MAX_NUM_VFS_SUPPORTED][MAX_SQS_PER_VF];
+ u8 pqs_vf[MAX_NUM_VFS_SUPPORTED];
+ bool sqs_used[MAX_NUM_VFS_SUPPORTED];
struct pkind_cfg pkind;
#define NIC_SET_VF_LMAC_MAP(bgx, lmac) (((bgx & 0xF) << 4) | (lmac & 0xF))
#define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF)
@@ -139,13 +144,15 @@ static void nic_mbx_send_ready(struct nicpf *nic, int vf)

mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE;

- bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
- lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
-
- mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac);
- if (mac)
- ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac);
+ if (vf < MAX_LMAC) {
+ bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);

+ mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac);
+ if (mac)
+ ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac);
+ }
+ mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false;
mbx.nic_cfg.node_id = nic->node;
nic_send_msg_to_vf(nic, vf, &mbx);
}
@@ -433,6 +440,12 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
qset = cfg->vf_id;

for (; rssi < (rssi_base + cfg->tbl_len); rssi++) {
+ u8 svf = cfg->ind_tbl[idx] >> 3;
+
+ if (svf)
+ qset = nic->vf_sqs[cfg->vf_id][svf - 1];
+ else
+ qset = cfg->vf_id;
nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3),
(qset << 3) | (cfg->ind_tbl[idx] & 0x7));
idx++;
@@ -456,19 +469,31 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
* VNIC6-SQ0 -> TL4(528) -> TL3[132] -> TL2[33] -> TL1[1] -> BGX1
* VNIC7-SQ0 -> TL4(536) -> TL3[134] -> TL2[33] -> TL1[1] -> BGX1
*/
-static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, u8 sq_idx)
+static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
+ struct sq_cfg_msg *sq)
{
u32 bgx, lmac, chan;
u32 tl2, tl3, tl4;
u32 rr_quantum;
+ u8 sq_idx = sq->sq_num;
+ u8 pqs_vnic;
+
+ if (sq->sqs_mode)
+ pqs_vnic = nic->pqs_vf[vnic];
+ else
+ pqs_vnic = vnic;
+
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]);

- bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
- lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
/* 24 bytes for FCS, IPG and preamble */
rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);

tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
tl4 += sq_idx;
+ if (sq->sqs_mode)
+ tl4 += vnic * 8;
+
tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
((u64)vnic << NIC_QS_ID_SHIFT) |
@@ -489,6 +514,71 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, u8 sq_idx)
nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00);
}

+/* Send primary nicvf pointer to secondary QS's VF */
+static void nic_send_pnicvf(struct nicpf *nic, int sqs)
+{
+ union nic_mbx mbx = {};
+
+ mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR;
+ mbx.nicvf.nicvf = nic->nicvf[nic->pqs_vf[sqs]];
+ nic_send_msg_to_vf(nic, sqs, &mbx);
+}
+
+/* Send SQS's nicvf pointer to primary QS's VF */
+static void nic_send_snicvf(struct nicpf *nic, struct nicvf_ptr *nicvf)
+{
+ union nic_mbx mbx = {};
+ int sqs_id = nic->vf_sqs[nicvf->vf_id][nicvf->sqs_id];
+
+ mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR;
+ mbx.nicvf.sqs_id = nicvf->sqs_id;
+ mbx.nicvf.nicvf = nic->nicvf[sqs_id];
+ nic_send_msg_to_vf(nic, nicvf->vf_id, &mbx);
+}
+
+/* Find next available Qset that can be assigned as a
+ * secondary Qset to a VF.
+ */
+static int nic_nxt_avail_sqs(struct nicpf *nic)
+{
+ int sqs;
+
+ for (sqs = 0; sqs < nic->num_sqs_en; sqs++) {
+ if (!nic->sqs_used[sqs])
+ nic->sqs_used[sqs] = true;
+ else
+ continue;
+ return sqs + nic->num_vf_en;
+ }
+ return -1;
+}
+
+/* Allocate additional Qsets for requested VF */
+static void nic_alloc_sqs(struct nicpf *nic, struct sqs_alloc *sqs)
+{
+ union nic_mbx mbx = {};
+ int idx, alloc_qs = 0;
+ int sqs_id;
+
+ if (!nic->num_sqs_en)
+ goto send_mbox;
+
+ for (idx = 0; idx < sqs->qs_count; idx++) {
+ sqs_id = nic_nxt_avail_sqs(nic);
+ if (sqs_id < 0)
+ break;
+ nic->vf_sqs[sqs->vf_id][idx] = sqs_id;
+ nic->pqs_vf[sqs_id] = sqs->vf_id;
+ alloc_qs++;
+ }
+
+send_mbox:
+ mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS;
+ mbx.sqs_alloc.vf_id = sqs->vf_id;
+ mbx.sqs_alloc.qs_count = alloc_qs;
+ nic_send_msg_to_vf(nic, sqs->vf_id, &mbx);
+}
+
/* Interrupt handler to handle mailbox messages from VFs */
static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
{
@@ -496,6 +586,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
u64 *mbx_data;
u64 mbx_addr;
u64 reg_addr;
+ u64 cfg;
int bgx, lmac;
int i;
int ret = 0;
@@ -516,15 +607,24 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
switch (mbx.msg.msg) {
case NIC_MBOX_MSG_READY:
nic_mbx_send_ready(nic, vf);
- nic->link[vf] = 0;
- nic->duplex[vf] = 0;
- nic->speed[vf] = 0;
+ if (vf < MAX_LMAC) {
+ nic->link[vf] = 0;
+ nic->duplex[vf] = 0;
+ nic->speed[vf] = 0;
+ }
ret = 1;
break;
case NIC_MBOX_MSG_QS_CFG:
reg_addr = NIC_PF_QSET_0_127_CFG |
(mbx.qs.num << NIC_QS_ID_SHIFT);
- nic_reg_write(nic, reg_addr, mbx.qs.cfg);
+ cfg = mbx.qs.cfg;
+ /* Check if its a secondary Qset */
+ if (vf >= nic->num_vf_en) {
+ cfg = cfg & (~0x7FULL);
+ /* Assign this Qset to primary Qset's VF */
+ cfg |= nic->pqs_vf[vf];
+ }
+ nic_reg_write(nic, reg_addr, cfg);
break;
case NIC_MBOX_MSG_RQ_CFG:
reg_addr = NIC_PF_QSET_0_127_RQ_0_7_CFG |
@@ -552,9 +652,11 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
(mbx.sq.qs_num << NIC_QS_ID_SHIFT) |
(mbx.sq.sq_num << NIC_Q_NUM_SHIFT);
nic_reg_write(nic, reg_addr, mbx.sq.cfg);
- nic_tx_channel_cfg(nic, mbx.qs.num, mbx.sq.sq_num);
+ nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq);
break;
case NIC_MBOX_MSG_SET_MAC:
+ if (vf >= nic->num_vf_en)
+ break;
lmac = mbx.mac.vf_id;
bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
@@ -581,7 +683,22 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
case NIC_MBOX_MSG_SHUTDOWN:
/* First msg in VF teardown sequence */
nic->vf_enabled[vf] = false;
+ if (vf >= nic->num_vf_en)
+ nic->sqs_used[vf - nic->num_vf_en] = false;
+ nic->pqs_vf[vf] = 0;
+ break;
+ case NIC_MBOX_MSG_ALLOC_SQS:
+ nic_alloc_sqs(nic, &mbx.sqs_alloc);
+ goto unlock;
+ case NIC_MBOX_MSG_NICVF_PTR:
+ nic->nicvf[vf] = mbx.nicvf.nicvf;
break;
+ case NIC_MBOX_MSG_PNICVF_PTR:
+ nic_send_pnicvf(nic, vf);
+ goto unlock;
+ case NIC_MBOX_MSG_SNICVF_PTR:
+ nic_send_snicvf(nic, &mbx.nicvf);
+ goto unlock;
case NIC_MBOX_MSG_BGX_STATS:
nic_get_bgx_stats(nic, &mbx.bgx_stats);
goto unlock;
@@ -610,8 +727,7 @@ static void nic_mbx_intr_handler (struct nicpf *nic, int mbx)
if (intr & (1ULL << vf)) {
dev_dbg(&nic->pdev->dev, "Intr from VF %d\n",
vf + (mbx * vf_per_mbx_reg));
- if ((vf + (mbx * vf_per_mbx_reg)) > nic->num_vf_en)
- break;
+
nic_handle_mbx_intr(nic, vf + (mbx * vf_per_mbx_reg));
nic_clear_mbx_intr(nic, vf, mbx);
}
@@ -717,9 +833,24 @@ static void nic_unregister_interrupts(struct nicpf *nic)
nic_disable_msix(nic);
}

+static int nic_num_sqs_en(struct nicpf *nic, int vf_en)
+{
+ int pos = 0, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE;
+ u16 total_vf;
+
+ /* Check if its a multi-node environment */
+ if (nr_node_ids > 1)
+ sqs_per_vf = MAX_SQS_PER_VF;
+
+ pos = pci_find_ext_capability(nic->pdev, PCI_EXT_CAP_ID_SRIOV);
+ pci_read_config_word(nic->pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf);
+ return min(total_vf - vf_en, vf_en * sqs_per_vf);
+}
+
static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
{
int pos = 0;
+ int vf_en;
int err;
u16 total_vf_cnt;

@@ -736,16 +867,20 @@ static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
if (!total_vf_cnt)
return 0;

- err = pci_enable_sriov(pdev, nic->num_vf_en);
+ vf_en = nic->num_vf_en;
+ nic->num_sqs_en = nic_num_sqs_en(nic, nic->num_vf_en);
+ vf_en += nic->num_sqs_en;
+
+ err = pci_enable_sriov(pdev, vf_en);
if (err) {
dev_err(&pdev->dev, "SRIOV enable failed, num VF is %d\n",
- nic->num_vf_en);
+ vf_en);
nic->num_vf_en = 0;
return err;
}

dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n",
- nic->num_vf_en);
+ vf_en);

nic->flags |= NIC_SRIOV_ENABLED;
return 0;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index 1eec2cd..e4fa98a 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -149,10 +149,33 @@ static void nicvf_set_msglevel(struct net_device *netdev, u32 lvl)
nic->msg_enable = lvl;
}

+static void nicvf_get_qset_strings(struct nicvf *nic, u8 **data, int qset)
+{
+ int stats, qidx;
+ int start_qidx = qset * MAX_RCV_QUEUES_PER_QS;
+
+ for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
+ for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
+ sprintf(*data, "rxq%d: %s", qidx + start_qidx,
+ nicvf_queue_stats[stats].name);
+ *data += ETH_GSTRING_LEN;
+ }
+ }
+
+ for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
+ for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
+ sprintf(*data, "txq%d: %s", qidx + start_qidx,
+ nicvf_queue_stats[stats].name);
+ *data += ETH_GSTRING_LEN;
+ }
+ }
+}
+
static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
{
struct nicvf *nic = netdev_priv(netdev);
- int stats, qidx;
+ int stats;
+ int sqs;

if (sset != ETH_SS_STATS)
return;
@@ -167,20 +190,12 @@ static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
data += ETH_GSTRING_LEN;
}

- for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
- for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
- sprintf(data, "rxq%d: %s", qidx,
- nicvf_queue_stats[stats].name);
- data += ETH_GSTRING_LEN;
- }
- }
+ nicvf_get_qset_strings(nic, &data, 0);

- for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
- for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
- sprintf(data, "txq%d: %s", qidx,
- nicvf_queue_stats[stats].name);
- data += ETH_GSTRING_LEN;
- }
+ for (sqs = 0; sqs < nic->sqs_count; sqs++) {
+ if (!nic->snicvf[sqs])
+ continue;
+ nicvf_get_qset_strings(nic->snicvf[sqs], &data, sqs + 1);
}

for (stats = 0; stats < BGX_RX_STATS_COUNT; stats++) {
@@ -197,21 +212,58 @@ static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
static int nicvf_get_sset_count(struct net_device *netdev, int sset)
{
struct nicvf *nic = netdev_priv(netdev);
+ int qstats_count;
+ int sqs;

if (sset != ETH_SS_STATS)
return -EINVAL;

+ qstats_count = nicvf_n_queue_stats *
+ (nic->qs->rq_cnt + nic->qs->sq_cnt);
+ for (sqs = 0; sqs < nic->sqs_count; sqs++) {
+ struct nicvf *snic;
+
+ snic = nic->snicvf[sqs];
+ if (!snic)
+ continue;
+ qstats_count += nicvf_n_queue_stats *
+ (snic->qs->rq_cnt + snic->qs->sq_cnt);
+ }
+
return nicvf_n_hw_stats + nicvf_n_drv_stats +
- (nicvf_n_queue_stats *
- (nic->qs->rq_cnt + nic->qs->sq_cnt)) +
+ qstats_count +
BGX_RX_STATS_COUNT + BGX_TX_STATS_COUNT;
}

+static void nicvf_get_qset_stats(struct nicvf *nic,
+ struct ethtool_stats *stats, u64 **data)
+{
+ int stat, qidx;
+
+ if (!nic)
+ return;
+
+ for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
+ nicvf_update_rq_stats(nic, qidx);
+ for (stat = 0; stat < nicvf_n_queue_stats; stat++)
+ *((*data)++) = ((u64 *)&nic->qs->rq[qidx].stats)
+ [nicvf_queue_stats[stat].index];
+ }
+
+ for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
+ nicvf_update_sq_stats(nic, qidx);
+ for (stat = 0; stat < nicvf_n_queue_stats; stat++)
+ *((*data)++) = ((u64 *)&nic->qs->sq[qidx].stats)
+ [nicvf_queue_stats[stat].index];
+ }
+}
+
static void nicvf_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats *stats, u64 *data)
{
struct nicvf *nic = netdev_priv(netdev);
- int stat, qidx;
+ int stat;
+ int sqs;

nicvf_update_stats(nic);

@@ -225,16 +277,12 @@ static void nicvf_get_ethtool_stats(struct net_device *netdev,
*(data++) = ((u64 *)&nic->drv_stats)
[nicvf_drv_stats[stat].index];

- for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
- for (stat = 0; stat < nicvf_n_queue_stats; stat++)
- *(data++) = ((u64 *)&nic->qs->rq[qidx].stats)
- [nicvf_queue_stats[stat].index];
- }
+ nicvf_get_qset_stats(nic, stats, &data);

- for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
- for (stat = 0; stat < nicvf_n_queue_stats; stat++)
- *(data++) = ((u64 *)&nic->qs->sq[qidx].stats)
- [nicvf_queue_stats[stat].index];
+ for (sqs = 0; sqs < nic->sqs_count; sqs++) {
+ if (!nic->snicvf[sqs])
+ continue;
+ nicvf_get_qset_stats(nic->snicvf[sqs], stats, &data);
}

for (stat = 0; stat < BGX_RX_STATS_COUNT; stat++)
@@ -393,7 +441,7 @@ static int nicvf_get_rxnfc(struct net_device *dev,

switch (info->cmd) {
case ETHTOOL_GRXRINGS:
- info->data = nic->qs->rq_cnt;
+ info->data = nic->rx_queues;
ret = 0;
break;
case ETHTOOL_GRXFH:
@@ -556,11 +604,11 @@ static void nicvf_get_channels(struct net_device *dev,

memset(channel, 0, sizeof(*channel));

- channel->max_rx = MAX_RCV_QUEUES_PER_QS;
- channel->max_tx = MAX_SND_QUEUES_PER_QS;
+ channel->max_rx = nic->max_queues;
+ channel->max_tx = nic->max_queues;

- channel->rx_count = nic->qs->rq_cnt;
- channel->tx_count = nic->qs->sq_cnt;
+ channel->rx_count = nic->rx_queues;
+ channel->tx_count = nic->tx_queues;
}

/* Set no of Tx, Rx queues to be used */
@@ -568,24 +616,36 @@ static int nicvf_set_channels(struct net_device *dev,
struct ethtool_channels *channel)
{
struct nicvf *nic = netdev_priv(dev);
- int err = 0;
bool if_up = netif_running(dev);
+ int cqcount;
+ int err = 0;

if (!channel->rx_count || !channel->tx_count)
return -EINVAL;
- if (channel->rx_count > MAX_RCV_QUEUES_PER_QS)
+ if (channel->rx_count > nic->max_queues)
return -EINVAL;
- if (channel->tx_count > MAX_SND_QUEUES_PER_QS)
+ if (channel->tx_count > nic->max_queues)
return -EINVAL;

if (if_up)
nicvf_stop(dev);

- nic->qs->rq_cnt = channel->rx_count;
- nic->qs->sq_cnt = channel->tx_count;
+ cqcount = max(channel->rx_count, channel->tx_count);
+
+ if (cqcount > MAX_CMP_QUEUES_PER_QS) {
+ nic->sqs_count = roundup(cqcount, MAX_CMP_QUEUES_PER_QS);
+ nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1;
+ } else {
+ nic->sqs_count = 0;
+ }
+
+ nic->qs->rq_cnt = min_t(u32, channel->rx_count, MAX_RCV_QUEUES_PER_QS);
+ nic->qs->sq_cnt = min_t(u32, channel->tx_count, MAX_SND_QUEUES_PER_QS);
nic->qs->cq_cnt = max(nic->qs->rq_cnt, nic->qs->sq_cnt);

- err = nicvf_set_real_num_queues(dev, nic->qs->sq_cnt, nic->qs->rq_cnt);
+ nic->rx_queues = channel->rx_count;
+ nic->tx_queues = channel->tx_count;
+ err = nicvf_set_real_num_queues(dev, nic->tx_queues, nic->rx_queues);
if (err)
return err;

@@ -593,7 +653,7 @@ static int nicvf_set_channels(struct net_device *dev,
nicvf_open(dev);

netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n",
- nic->qs->sq_cnt, nic->qs->rq_cnt);
+ nic->tx_queues, nic->rx_queues);

return err;
}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 2198f61..9a1091a 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -51,6 +51,14 @@ module_param(cpi_alg, int, S_IRUGO);
MODULE_PARM_DESC(cpi_alg,
"PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");

+static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
+{
+ if (nic->sqs_mode)
+ return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS);
+ else
+ return qidx;
+}
+
static inline void nicvf_set_rx_frame_cnt(struct nicvf *nic,
struct sk_buff *skb)
{
@@ -193,6 +201,7 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
if (!nic->set_mac_pending)
ether_addr_copy(nic->netdev->dev_addr,
mbx.nic_cfg.mac_addr);
+ nic->sqs_mode = mbx.nic_cfg.sqs_mode;
nic->link_up = false;
nic->duplex = 0;
nic->speed = 0;
@@ -230,6 +239,26 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
netif_tx_stop_all_queues(nic->netdev);
}
break;
+ case NIC_MBOX_MSG_ALLOC_SQS:
+ nic->sqs_count = mbx.sqs_alloc.qs_count;
+ nic->pf_acked = true;
+ break;
+ case NIC_MBOX_MSG_SNICVF_PTR:
+ /* Primary VF: make note of secondary VF's pointer
+ * to be used while packet transmission.
+ */
+ nic->snicvf[mbx.nicvf.sqs_id] =
+ (struct nicvf *)mbx.nicvf.nicvf;
+ nic->pf_acked = true;
+ break;
+ case NIC_MBOX_MSG_PNICVF_PTR:
+ /* Secondary VF/Qset: make note of primary VF's pointer
+ * to be used while packet reception, to handover packet
+ * to primary VF's netdev.
+ */
+ nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf;
+ nic->pf_acked = true;
+ break;
default:
netdev_err(nic->netdev,
"Invalid message from PF, msg 0x%x\n", mbx.msg.msg);
@@ -338,11 +367,100 @@ static int nicvf_rss_init(struct nicvf *nic)

for (idx = 0; idx < rss->rss_size; idx++)
rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx,
- nic->qs->rq_cnt);
+ nic->rx_queues);
nicvf_config_rss(nic);
return 1;
}

+/* Request PF to allocate additional Qsets */
+static void nicvf_request_sqs(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+ int sqs;
+ int sqs_count = nic->sqs_count;
+ int rx_queues = 0, tx_queues = 0;
+
+ /* Only primary VF should request */
+ if (nic->sqs_mode || !nic->sqs_count)
+ return;
+
+ mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS;
+ mbx.sqs_alloc.vf_id = nic->vf_id;
+ mbx.sqs_alloc.qs_count = nic->sqs_count;
+ if (nicvf_send_msg_to_pf(nic, &mbx)) {
+ /* No response from PF */
+ nic->sqs_count = 0;
+ return;
+ }
+
+ /* Return if no Secondary Qsets available */
+ if (!nic->sqs_count)
+ return;
+
+ if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS)
+ rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS;
+ if (nic->tx_queues > MAX_SND_QUEUES_PER_QS)
+ tx_queues = nic->tx_queues - MAX_SND_QUEUES_PER_QS;
+
+ /* Set no of Rx/Tx queues in each of the SQsets */
+ for (sqs = 0; sqs < nic->sqs_count; sqs++) {
+ mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR;
+ mbx.nicvf.vf_id = nic->vf_id;
+ mbx.nicvf.sqs_id = sqs;
+ nicvf_send_msg_to_pf(nic, &mbx);
+
+ nic->snicvf[sqs]->sqs_id = sqs;
+ if (rx_queues > MAX_RCV_QUEUES_PER_QS) {
+ nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS;
+ rx_queues -= MAX_RCV_QUEUES_PER_QS;
+ } else {
+ nic->snicvf[sqs]->qs->rq_cnt = rx_queues;
+ rx_queues = 0;
+ }
+
+ if (tx_queues > MAX_SND_QUEUES_PER_QS) {
+ nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS;
+ tx_queues -= MAX_SND_QUEUES_PER_QS;
+ } else {
+ nic->snicvf[sqs]->qs->sq_cnt = tx_queues;
+ tx_queues = 0;
+ }
+
+ nic->snicvf[sqs]->qs->cq_cnt =
+ max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt);
+
+ /* Initialize secondary Qset's queues and its interrupts */
+ nicvf_open(nic->snicvf[sqs]->netdev);
+ }
+
+ /* Update stack with actual Rx/Tx queue count allocated */
+ if (sqs_count != nic->sqs_count)
+ nicvf_set_real_num_queues(nic->netdev,
+ nic->tx_queues, nic->rx_queues);
+}
+
+/* Send this Qset's nicvf pointer to PF.
+ * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs
+ * so that packets received by these Qsets can use primary VF's netdev
+ */
+static void nicvf_send_vf_struct(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR;
+ mbx.nicvf.sqs_mode = nic->sqs_mode;
+ mbx.nicvf.nicvf = (u64)nic;
+ nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_get_primary_vf_struct(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR;
+ nicvf_send_msg_to_pf(nic, &mbx);
+}
+
int nicvf_set_real_num_queues(struct net_device *netdev,
int tx_queues, int rx_queues)
{
@@ -453,6 +571,15 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
struct sk_buff *skb;
struct nicvf *nic = netdev_priv(netdev);
int err = 0;
+ int rq_idx;
+
+ rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx);
+
+ if (nic->sqs_mode) {
+ /* Use primary VF's 'nicvf' struct */
+ nic = nic->pnicvf;
+ netdev = nic->netdev;
+ }

/* Check for errors */
err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
@@ -482,7 +609,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,

nicvf_set_rxhash(netdev, cqe_rx, skb);

- skb_record_rx_queue(skb, cqe_rx->rq_idx);
+ skb_record_rx_queue(skb, rq_idx);
if (netdev->hw_features & NETIF_F_RXCSUM) {
/* HW by default verifies TCP/UDP/SCTP checksums */
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -578,8 +705,11 @@ loop:
done:
/* Wakeup TXQ if its stopped earlier due to SQ full */
if (tx_done) {
- txq = netdev_get_tx_queue(netdev, cq_idx);
- if (netif_tx_queue_stopped(txq)) {
+ netdev = nic->pnicvf->netdev;
+ txq = netdev_get_tx_queue(netdev,
+ nicvf_netdev_qidx(nic, cq_idx));
+ nic = nic->pnicvf;
+ if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) {
netif_tx_start_queue(txq);
nic->drv_stats.txq_wake++;
if (netif_msg_tx_err(nic))
@@ -893,7 +1023,6 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
netdev_warn(netdev,
"%s: Transmit ring full, stopping SQ%d\n",
netdev->name, qid);
-
return NETDEV_TX_BUSY;
}

@@ -926,6 +1055,17 @@ int nicvf_stop(struct net_device *netdev)
nicvf_send_msg_to_pf(nic, &mbx);

netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(nic->netdev);
+
+ /* Teardown secondary qsets first */
+ if (!nic->sqs_mode) {
+ for (qidx = 0; qidx < nic->sqs_count; qidx++) {
+ if (!nic->snicvf[qidx])
+ continue;
+ nicvf_stop(nic->snicvf[qidx]->netdev);
+ nic->snicvf[qidx] = NULL;
+ }
+ }

/* Disable RBDR & QS error interrupts */
for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
@@ -973,6 +1113,10 @@ int nicvf_stop(struct net_device *netdev)

nicvf_free_cq_poll(nic);

+ /* Clear multiqset info */
+ nic->pnicvf = nic;
+ nic->sqs_count = 0;
+
return 0;
}

@@ -1028,10 +1172,16 @@ int nicvf_open(struct net_device *netdev)

/* Configure CPI alorithm */
nic->cpi_alg = cpi_alg;
- nicvf_config_cpi(nic);
+ if (!nic->sqs_mode)
+ nicvf_config_cpi(nic);
+
+ nicvf_request_sqs(nic);
+ if (nic->sqs_mode)
+ nicvf_get_primary_vf_struct(nic);

/* Configure receive side scaling */
- nicvf_rss_init(nic);
+ if (!nic->sqs_mode)
+ nicvf_rss_init(nic);

err = nicvf_register_interrupts(nic);
if (err)
@@ -1282,8 +1432,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct device *dev = &pdev->dev;
struct net_device *netdev;
struct nicvf *nic;
- struct queue_set *qs;
- int err;
+ int err, qcount;

err = pci_enable_device(pdev);
if (err) {
@@ -1309,9 +1458,17 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_release_regions;
}

- netdev = alloc_etherdev_mqs(sizeof(struct nicvf),
- MAX_RCV_QUEUES_PER_QS,
- MAX_SND_QUEUES_PER_QS);
+ qcount = MAX_CMP_QUEUES_PER_QS;
+
+ /* Restrict multiqset support only for host bound VFs */
+ if (pdev->is_virtfn) {
+ /* Set max number of queues per VF */
+ qcount = roundup(num_online_cpus(), MAX_CMP_QUEUES_PER_QS);
+ qcount = min(qcount,
+ (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
+ }
+
+ netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount);
if (!netdev) {
err = -ENOMEM;
goto err_release_regions;
@@ -1324,6 +1481,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
nic = netdev_priv(netdev);
nic->netdev = netdev;
nic->pdev = pdev;
+ nic->pnicvf = nic;
+ nic->max_queues = qcount;

/* MAP VF's configuration registers */
nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
@@ -1337,20 +1496,26 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_free_netdev;

- qs = nic->qs;
-
- err = nicvf_set_real_num_queues(netdev, qs->sq_cnt, qs->rq_cnt);
- if (err)
- goto err_free_netdev;
-
/* Check if PF is alive and get MAC address for this VF */
err = nicvf_register_misc_interrupt(nic);
if (err)
goto err_free_netdev;

+ nicvf_send_vf_struct(nic);
+
+ /* Check if this VF is in QS only mode */
+ if (nic->sqs_mode)
+ return 0;
+
+ err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues);
+ if (err)
+ goto err_unregister_interrupts;
+
netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
NETIF_F_TSO | NETIF_F_GRO |
- NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXHASH);
+ NETIF_F_HW_VLAN_CTAG_RX);
+
+ netdev->hw_features |= NETIF_F_RXHASH;

netdev->features |= netdev->hw_features;

@@ -1389,8 +1554,13 @@ static void nicvf_remove(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct nicvf *nic = netdev_priv(netdev);
+ struct net_device *pnetdev = nic->pnicvf->netdev;

- unregister_netdev(netdev);
+ /* Check if this Qset is assigned to different VF.
+ * If yes, clean primary and all secondary Qsets.
+ */
+ if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED))
+ unregister_netdev(pnetdev);
nicvf_unregister_interrupts(nic);
pci_set_drvdata(pdev, NULL);
free_netdev(netdev);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index b294d67..63a870a 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -478,7 +478,7 @@ static void nicvf_reclaim_rbdr(struct nicvf *nic,
void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features)
{
u64 rq_cfg;
- int sqs;
+ int sqs = 0;

rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0);

@@ -621,6 +621,7 @@ static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
mbx.sq.qs_num = qs->vnic_id;
mbx.sq.sq_num = qidx;
+ mbx.sq.sqs_mode = nic->sqs_mode;
mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
nicvf_send_msg_to_pf(nic, &mbx);

@@ -702,6 +703,7 @@ void nicvf_qset_config(struct nicvf *nic, bool enable)
/* Send a mailbox msg to PF to config Qset */
mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
mbx.qs.num = qs->vnic_id;
+ mbx.qs.sqs_count = nic->sqs_count;

mbx.qs.cfg = 0;
qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
@@ -782,6 +784,10 @@ int nicvf_set_qset_resources(struct nicvf *nic)
qs->rbdr_len = RCV_BUF_COUNT;
qs->sq_len = SND_QUEUE_LEN;
qs->cq_len = CMP_QUEUE_LEN;
+
+ nic->rx_queues = qs->rq_cnt;
+ nic->tx_queues = qs->sq_cnt;
+
return 0;
}

@@ -1025,7 +1031,7 @@ static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
* them to SQ for transfer
*/
static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
- int qentry, struct sk_buff *skb)
+ int sq_num, int qentry, struct sk_buff *skb)
{
struct tso_t tso;
int seg_subdescs = 0, desc_cnt = 0;
@@ -1085,7 +1091,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,

/* Inform HW to xmit all TSO segments */
nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
- skb_get_queue_mapping(skb), desc_cnt);
+ sq_num, desc_cnt);
nic->drv_stats.tx_tso++;
return 1;
}
@@ -1096,10 +1102,24 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
int i, size;
int subdesc_cnt;
int sq_num, qentry;
- struct queue_set *qs = nic->qs;
+ struct queue_set *qs;
struct snd_queue *sq;

sq_num = skb_get_queue_mapping(skb);
+ if (sq_num >= MAX_SND_QUEUES_PER_QS) {
+ /* Get secondary Qset's SQ structure */
+ i = sq_num / MAX_SND_QUEUES_PER_QS;
+ if (!nic->snicvf[i - 1]) {
+ netdev_warn(nic->netdev,
+ "Secondary Qset#%d's ptr not initialized\n",
+ i - 1);
+ return 1;
+ }
+ nic = (struct nicvf *)nic->snicvf[i - 1];
+ sq_num = sq_num % MAX_SND_QUEUES_PER_QS;
+ }
+
+ qs = nic->qs;
sq = &qs->sq[sq_num];

subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
@@ -1110,7 +1130,7 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)

/* Check if its a TSO packet */
if (skb_shinfo(skb)->gso_size)
- return nicvf_sq_append_tso(nic, sq, qentry, skb);
+ return nicvf_sq_append_tso(nic, sq, sq_num, qentry, skb);

/* Add SQ header subdesc */
nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, skb, skb->len);
@@ -1146,6 +1166,8 @@ doorbell:
return 1;

append_fail:
+ /* Use original PCI dev for debug log */
+ nic = nic->pnicvf;
netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n");
return 0;
}
--
2.5.0

2015-08-28 15:10:50

by Aleksey Makarov

[permalink] [raw]
Subject: [PATCH net-next 8/8] net: thunderx: Support for internal loopback mode

From: Sunil Goutham <[email protected]>

Support for setting VF's corresponding BGX LMAC in internal
loopback mode. This mode can be used for verifying basic HW
functionality such as packet I/O, RX checksum validation,
CQ/RBDR interrupts, stats e.t.c. Useful when DUT has no external
network connectivity.

'loopback' mode can be enabled or disabled via ethtool.

Note: This feature is not supported when no of VFs enabled are
morethan no of physical interfaces i.e active BGX LMACs

Signed-off-by: Sunil Goutham <[email protected]>
Signed-off-by: Aleksey Makarov <[email protected]>
---
drivers/net/ethernet/cavium/thunder/nic.h | 11 ++++++++
drivers/net/ethernet/cavium/thunder/nic_main.c | 21 +++++++++++++++
drivers/net/ethernet/cavium/thunder/nicvf_main.c | 30 ++++++++++++++++++++++
drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 31 +++++++++++++++++++++++
drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 4 +++
5 files changed, 97 insertions(+)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 35b2ee1..d3950b2 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -265,6 +265,7 @@ struct nicvf {
u8 node;
u8 tns_mode:1;
u8 sqs_mode:1;
+ u8 loopback_supported:1;
u16 mtu;
struct queue_set *qs;
#define MAX_SQS_PER_VF_SINGLE_NODE 5
@@ -344,6 +345,7 @@ struct nicvf {
#define NIC_MBOX_MSG_NICVF_PTR 0x13 /* Send nicvf ptr to PF */
#define NIC_MBOX_MSG_PNICVF_PTR 0x14 /* Get primary qset nicvf ptr */
#define NIC_MBOX_MSG_SNICVF_PTR 0x15 /* Send sqet nicvf ptr to PVF */
+#define NIC_MBOX_MSG_LOOPBACK 0x16 /* Set interface in loopback */
#define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */
#define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */

@@ -353,6 +355,7 @@ struct nic_cfg_msg {
u8 node_id;
u8 tns_mode:1;
u8 sqs_mode:1;
+ u8 loopback_supported:1;
u8 mac_addr[ETH_ALEN];
};

@@ -452,6 +455,13 @@ struct nicvf_ptr {
u64 nicvf;
};

+/* Set interface in loopback mode */
+struct set_loopback {
+ u8 msg;
+ u8 vf_id;
+ bool enable;
+};
+
/* 128 bit shared memory between PF and each VF */
union nic_mbx {
struct { u8 msg; } msg;
@@ -468,6 +478,7 @@ union nic_mbx {
struct bgx_link_status link_status;
struct sqs_alloc sqs_alloc;
struct nicvf_ptr nicvf;
+ struct set_loopback lbk;
};

#define NIC_NODE_ID_MASK 0x03
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 51f3048..fd36820 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -154,6 +154,9 @@ static void nic_mbx_send_ready(struct nicpf *nic, int vf)
}
mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false;
mbx.nic_cfg.node_id = nic->node;
+
+ mbx.nic_cfg.loopback_supported = vf < MAX_LMAC;
+
nic_send_msg_to_vf(nic, vf, &mbx);
}

@@ -579,6 +582,21 @@ send_mbox:
nic_send_msg_to_vf(nic, sqs->vf_id, &mbx);
}

+static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk)
+{
+ int bgx_idx, lmac_idx;
+
+ if (lbk->vf_id > MAX_LMAC)
+ return -1;
+
+ bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
+ lmac_idx = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
+
+ bgx_lmac_internal_loopback(nic->node, bgx_idx, lmac_idx, lbk->enable);
+
+ return 0;
+}
+
/* Interrupt handler to handle mailbox messages from VFs */
static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
{
@@ -702,6 +720,9 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
case NIC_MBOX_MSG_BGX_STATS:
nic_get_bgx_stats(nic, &mbx.bgx_stats);
goto unlock;
+ case NIC_MBOX_MSG_LOOPBACK:
+ ret = nic_config_loopback(nic, &mbx.lbk);
+ break;
default:
dev_err(&nic->pdev->dev,
"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 9a1091a..49228b6 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -202,6 +202,7 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
ether_addr_copy(nic->netdev->dev_addr,
mbx.nic_cfg.mac_addr);
nic->sqs_mode = mbx.nic_cfg.sqs_mode;
+ nic->loopback_supported = mbx.nic_cfg.loopback_supported;
nic->link_up = false;
nic->duplex = 0;
nic->speed = 0;
@@ -1404,6 +1405,30 @@ static void nicvf_reset_task(struct work_struct *work)
nic->netdev->trans_start = jiffies;
}

+static int nicvf_config_loopback(struct nicvf *nic,
+ netdev_features_t features)
+{
+ union nic_mbx mbx = {};
+
+ mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK;
+ mbx.lbk.vf_id = nic->vf_id;
+ mbx.lbk.enable = (features & NETIF_F_LOOPBACK) != 0;
+
+ return nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static netdev_features_t nicvf_fix_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ if ((features & NETIF_F_LOOPBACK) &&
+ netif_running(netdev) && !nic->loopback_supported)
+ features &= ~NETIF_F_LOOPBACK;
+
+ return features;
+}
+
static int nicvf_set_features(struct net_device *netdev,
netdev_features_t features)
{
@@ -1413,6 +1438,9 @@ static int nicvf_set_features(struct net_device *netdev,
if (changed & NETIF_F_HW_VLAN_CTAG_RX)
nicvf_config_vlan_stripping(nic, features);

+ if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev))
+ return nicvf_config_loopback(nic, features);
+
return 0;
}

@@ -1424,6 +1452,7 @@ static const struct net_device_ops nicvf_netdev_ops = {
.ndo_set_mac_address = nicvf_set_mac_address,
.ndo_get_stats64 = nicvf_get_stats64,
.ndo_tx_timeout = nicvf_tx_timeout,
+ .ndo_fix_features = nicvf_fix_features,
.ndo_set_features = nicvf_set_features,
};

@@ -1518,6 +1547,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->hw_features |= NETIF_F_RXHASH;

netdev->features |= netdev->hw_features;
+ netdev->hw_features |= NETIF_F_LOOPBACK;

netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 5e54186..574c492 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -329,6 +329,37 @@ static void bgx_flush_dmac_addrs(struct bgx *bgx, int lmac)
}
}

+/* Configure BGX LMAC in internal loopback mode */
+void bgx_lmac_internal_loopback(int node, int bgx_idx,
+ int lmac_idx, bool enable)
+{
+ struct bgx *bgx;
+ struct lmac *lmac;
+ u64 cfg;
+
+ bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+ if (!bgx)
+ return;
+
+ lmac = &bgx->lmac[lmac_idx];
+ if (lmac->is_sgmii) {
+ cfg = bgx_reg_read(bgx, lmac_idx, BGX_GMP_PCS_MRX_CTL);
+ if (enable)
+ cfg |= PCS_MRX_CTL_LOOPBACK1;
+ else
+ cfg &= ~PCS_MRX_CTL_LOOPBACK1;
+ bgx_reg_write(bgx, lmac_idx, BGX_GMP_PCS_MRX_CTL, cfg);
+ } else {
+ cfg = bgx_reg_read(bgx, lmac_idx, BGX_SPUX_CONTROL1);
+ if (enable)
+ cfg |= SPU_CTL_LOOPBACK;
+ else
+ cfg &= ~SPU_CTL_LOOPBACK;
+ bgx_reg_write(bgx, lmac_idx, BGX_SPUX_CONTROL1, cfg);
+ }
+}
+EXPORT_SYMBOL(bgx_lmac_internal_loopback);
+
static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid)
{
u64 cfg;
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index ba4f53b..07b7ec6 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -72,6 +72,7 @@

#define BGX_SPUX_CONTROL1 0x10000
#define SPU_CTL_LOW_POWER BIT_ULL(11)
+#define SPU_CTL_LOOPBACK BIT_ULL(14)
#define SPU_CTL_RESET BIT_ULL(15)
#define BGX_SPUX_STATUS1 0x10008
#define SPU_STATUS1_RCV_LNK BIT_ULL(2)
@@ -126,6 +127,7 @@
#define PCS_MRX_CTL_RST_AN BIT_ULL(9)
#define PCS_MRX_CTL_PWR_DN BIT_ULL(11)
#define PCS_MRX_CTL_AN_EN BIT_ULL(12)
+#define PCS_MRX_CTL_LOOPBACK1 BIT_ULL(14)
#define PCS_MRX_CTL_RESET BIT_ULL(15)
#define BGX_GMP_PCS_MRX_STATUS 0x30008
#define PCS_MRX_STATUS_AN_CPT BIT_ULL(5)
@@ -186,6 +188,8 @@ int bgx_get_lmac_count(int node, int bgx);
const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid);
void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac);
void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status);
+void bgx_lmac_internal_loopback(int node, int bgx_idx,
+ int lmac_idx, bool enable);
u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx);
u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx);
#define BGX_RX_STATS_COUNT 11
--
2.5.0

2015-08-28 21:26:43

by David Miller

[permalink] [raw]
Subject: Re: [PATCH net-next 6/8] net: thunderx: Rework interrupt handler

From: Aleksey Makarov <[email protected]>
Date: Fri, 28 Aug 2015 17:59:58 +0300

> @@ -251,6 +251,8 @@ struct cmp_queue {
> void *desc;
> struct q_desc_mem dmem;
> struct cmp_queue_stats stats;
> + int irq;
> + cpumask_t affinity_mask;
> } ____cacheline_aligned_in_smp;
>
> struct snd_queue {

This new "affinity_mask" member is not used, please remove it and respin
this patch series.

Thanks.

2015-08-29 01:45:03

by Alexey Klimov

[permalink] [raw]
Subject: Re: [PATCH net-next 6/8] net: thunderx: Rework interrupt handler

Hi Aleksey,

let me add few minor points below.

On Fri, Aug 28, 2015 at 5:59 PM, Aleksey Makarov
<[email protected]> wrote:
> From: Sunil Goutham <[email protected]>
>
> Rework interrupt handler to avoid checking IRQ affinity of
> CQ interrupts. Now separate handlers are registered for each IRQ
> including RBDR. Also register interrupt handlers for only those
> which are being used.

Also add nicvf_dump_intr_status() and use it in irq handler(s).
I suggest to check and extend commit message and think about commit
name. Maybe "net: thunderx: rework interrupt handling and
registration" at least?

Please consider possibility of splitting this patch into few patches too.

>
> Signed-off-by: Sunil Goutham <[email protected]>
> Signed-off-by: Aleksey Makarov <[email protected]>
> ---
> drivers/net/ethernet/cavium/thunder/nic.h | 1 +
> drivers/net/ethernet/cavium/thunder/nicvf_main.c | 172 ++++++++++++---------
> drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 2 +
> 3 files changed, 103 insertions(+), 72 deletions(-)
>
> diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
> index a83f567..89b997e 100644
> --- a/drivers/net/ethernet/cavium/thunder/nic.h
> +++ b/drivers/net/ethernet/cavium/thunder/nic.h
> @@ -135,6 +135,7 @@
> #define NICVF_TX_TIMEOUT (50 * HZ)
>
> struct nicvf_cq_poll {
> + struct nicvf *nicvf;
> u8 cq_idx; /* Completion queue index */
> struct napi_struct napi;
> };
> diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
> index de51828..2198f61 100644
> --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
> +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
> @@ -653,11 +653,20 @@ static void nicvf_handle_qs_err(unsigned long data)
> nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
> }
>
> +static inline void nicvf_dump_intr_status(struct nicvf *nic)
> +{
> + if (netif_msg_intr(nic))
> + netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n",
> + nic->netdev->name, nicvf_reg_read(nic, NIC_VF_INT));
> +}

Please check if you really need to mark this 'inline' here.

> static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq)
> {
> struct nicvf *nic = (struct nicvf *)nicvf_irq;
> u64 intr;
>
> + nicvf_dump_intr_status(nic);
> +
> intr = nicvf_reg_read(nic, NIC_VF_INT);
> /* Check for spurious interrupt */
> if (!(intr & NICVF_INTR_MBOX_MASK))
> @@ -668,59 +677,58 @@ static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq)
> return IRQ_HANDLED;
> }
>
> -static irqreturn_t nicvf_intr_handler(int irq, void *nicvf_irq)
> +static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq)
> +{
> + struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq;
> + struct nicvf *nic = cq_poll->nicvf;
> + int qidx = cq_poll->cq_idx;
> +
> + nicvf_dump_intr_status(nic);
> +
> + /* Disable interrupts */
> + nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
> +
> + /* Schedule NAPI */
> + napi_schedule(&cq_poll->napi);
> +
> + /* Clear interrupt */
> + nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
> +
> + return IRQ_HANDLED;
> +}

You're not considering spurious irqs in all new irq handlers here and
below and schedule napi/tasklets unconditionally. Is it correct?
For me it looks like previous implementation relied on reading of
NIC_VF_INT to understand irq type and what actions should be
performed. It generally had idea that no interrupt might occur.


> +
> +static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq)
> {
> - u64 qidx, intr, clear_intr = 0;
> - u64 cq_intr, rbdr_intr, qs_err_intr;
> struct nicvf *nic = (struct nicvf *)nicvf_irq;
> - struct queue_set *qs = nic->qs;
> - struct nicvf_cq_poll *cq_poll = NULL;
> + u8 qidx;
>
> - intr = nicvf_reg_read(nic, NIC_VF_INT);
> - if (netif_msg_intr(nic))
> - netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n",
> - nic->netdev->name, intr);
> -
> - qs_err_intr = intr & NICVF_INTR_QS_ERR_MASK;
> - if (qs_err_intr) {
> - /* Disable Qset err interrupt and schedule softirq */
> - nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
> - tasklet_hi_schedule(&nic->qs_err_task);
> - clear_intr |= qs_err_intr;
> - }
>
> - /* Disable interrupts and start polling */
> - cq_intr = (intr & NICVF_INTR_CQ_MASK) >> NICVF_INTR_CQ_SHIFT;
> - for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
> - if (!(cq_intr & (1 << qidx)))
> - continue;
> - if (!nicvf_is_intr_enabled(nic, NICVF_INTR_CQ, qidx))
> + nicvf_dump_intr_status(nic);
> +
> + /* Disable RBDR interrupt and schedule softirq */
> + for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) {
> + if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx))
> continue;
> + nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
> + tasklet_hi_schedule(&nic->rbdr_task);
> + /* Clear interrupt */
> + nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
> + }
>
> - nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
> - clear_intr |= ((1 << qidx) << NICVF_INTR_CQ_SHIFT);
> + return IRQ_HANDLED;
> +}
>
> - cq_poll = nic->napi[qidx];
> - /* Schedule NAPI */
> - if (cq_poll)
> - napi_schedule(&cq_poll->napi);
> - }
> +static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq)
> +{
> + struct nicvf *nic = (struct nicvf *)nicvf_irq;
>
> - /* Handle RBDR interrupts */
> - rbdr_intr = (intr & NICVF_INTR_RBDR_MASK) >> NICVF_INTR_RBDR_SHIFT;
> - if (rbdr_intr) {
> - /* Disable RBDR interrupt and schedule softirq */
> - for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
> - if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx))
> - continue;
> - nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
> - tasklet_hi_schedule(&nic->rbdr_task);
> - clear_intr |= ((1 << qidx) << NICVF_INTR_RBDR_SHIFT);
> - }
> - }
> + nicvf_dump_intr_status(nic);
> +
> + /* Disable Qset err interrupt and schedule softirq */
> + nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
> + tasklet_hi_schedule(&nic->qs_err_task);
> + nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);
>
> - /* Clear interrupts */
> - nicvf_reg_write(nic, NIC_VF_INT, clear_intr);
> return IRQ_HANDLED;
> }
>
> @@ -754,7 +762,7 @@ static void nicvf_disable_msix(struct nicvf *nic)
>
> static int nicvf_register_interrupts(struct nicvf *nic)
> {
> - int irq, free, ret = 0;
> + int irq, ret = 0;
> int vector;
>
> for_each_cq_irq(irq)
> @@ -769,44 +777,42 @@ static int nicvf_register_interrupts(struct nicvf *nic)
> sprintf(nic->irq_name[irq], "NICVF%d RBDR%d",
> nic->vf_id, irq - NICVF_INTR_ID_RBDR);
>
> - /* Register all interrupts except mailbox */
> - for (irq = 0; irq < NICVF_INTR_ID_SQ; irq++) {
> + /* Register CQ interrupts */
> + for (irq = 0; irq < nic->qs->cq_cnt; irq++) {
> vector = nic->msix_entries[irq].vector;
> ret = request_irq(vector, nicvf_intr_handler,
> - 0, nic->irq_name[irq], nic);
> + 0, nic->irq_name[irq], nic->napi[irq]);
> if (ret)
> - break;
> + goto err;
> nic->irq_allocated[irq] = true;
> }
>
> - for (irq = NICVF_INTR_ID_SQ; irq < NICVF_INTR_ID_MISC; irq++) {
> + /* Register RBDR interrupt */
> + for (irq = NICVF_INTR_ID_RBDR;
> + irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) {
> vector = nic->msix_entries[irq].vector;
> - ret = request_irq(vector, nicvf_intr_handler,
> + ret = request_irq(vector, nicvf_rbdr_intr_handler,
> 0, nic->irq_name[irq], nic);
> if (ret)
> - break;
> + goto err;
> nic->irq_allocated[irq] = true;
> }
>
> + /* Register QS error interrupt */
> sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR],
> "NICVF%d Qset error", nic->vf_id);
> - if (!ret) {
> - vector = nic->msix_entries[NICVF_INTR_ID_QS_ERR].vector;
> - irq = NICVF_INTR_ID_QS_ERR;
> - ret = request_irq(vector, nicvf_intr_handler,
> - 0, nic->irq_name[irq], nic);
> - if (!ret)
> - nic->irq_allocated[irq] = true;
> - }
> + irq = NICVF_INTR_ID_QS_ERR;
> + ret = request_irq(nic->msix_entries[irq].vector,
> + nicvf_qs_err_intr_handler,
> + 0, nic->irq_name[irq], nic);
> + if (!ret)
> + nic->irq_allocated[irq] = true;
>
> - if (ret) {
> - netdev_err(nic->netdev, "Request irq failed\n");
> - for (free = 0; free < irq; free++)
> - free_irq(nic->msix_entries[free].vector, nic);
> - return ret;
> - }
> +err:
> + if (ret)
> + netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq);
>
> - return 0;
> + return ret;
> }
>
> static void nicvf_unregister_interrupts(struct nicvf *nic)
> @@ -815,8 +821,14 @@ static void nicvf_unregister_interrupts(struct nicvf *nic)
>
> /* Free registered interrupts */
> for (irq = 0; irq < nic->num_vec; irq++) {
> - if (nic->irq_allocated[irq])
> + if (!nic->irq_allocated[irq])
> + continue;
> +
> + if (irq < NICVF_INTR_ID_SQ)
> + free_irq(nic->msix_entries[irq].vector, nic->napi[irq]);
> + else
> free_irq(nic->msix_entries[irq].vector, nic);
> +
> nic->irq_allocated[irq] = false;
> }
>
> @@ -888,6 +900,20 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
> return NETDEV_TX_OK;
> }
>
> +static inline void nicvf_free_cq_poll(struct nicvf *nic)
> +{
> + struct nicvf_cq_poll *cq_poll = NULL;

Please check if you really need initialize it to NULL here.

> + int qidx;
> +
> + for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) {
> + cq_poll = nic->napi[qidx];
> + if (!cq_poll)
> + continue;
> + nic->napi[qidx] = NULL;
> + kfree(cq_poll);
> + }
> +}
> +
> int nicvf_stop(struct net_device *netdev)
> {
> int irq, qidx;
> @@ -922,7 +948,6 @@ int nicvf_stop(struct net_device *netdev)
> cq_poll = nic->napi[qidx];
> if (!cq_poll)
> continue;
> - nic->napi[qidx] = NULL;
> napi_synchronize(&cq_poll->napi);
> /* CQ intr is enabled while napi_complete,
> * so disable it now
> @@ -931,7 +956,6 @@ int nicvf_stop(struct net_device *netdev)
> nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
> napi_disable(&cq_poll->napi);
> netif_napi_del(&cq_poll->napi);
> - kfree(cq_poll);
> }
>
> netif_tx_disable(netdev);
> @@ -947,6 +971,8 @@ int nicvf_stop(struct net_device *netdev)
>
> nicvf_unregister_interrupts(nic);
>
> + nicvf_free_cq_poll(nic);
> +
> return 0;
> }
>
> @@ -973,6 +999,7 @@ int nicvf_open(struct net_device *netdev)
> goto napi_del;
> }
> cq_poll->cq_idx = qidx;
> + cq_poll->nicvf = nic;
> netif_napi_add(netdev, &cq_poll->napi, nicvf_poll,
> NAPI_POLL_WEIGHT);
> napi_enable(&cq_poll->napi);
> @@ -1040,6 +1067,8 @@ int nicvf_open(struct net_device *netdev)
> cleanup:
> nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
> nicvf_unregister_interrupts(nic);
> + tasklet_kill(&nic->qs_err_task);
> + tasklet_kill(&nic->rbdr_task);
> napi_del:
> for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
> cq_poll = nic->napi[qidx];
> @@ -1047,9 +1076,8 @@ napi_del:
> continue;
> napi_disable(&cq_poll->napi);
> netif_napi_del(&cq_poll->napi);
> - kfree(cq_poll);
> - nic->napi[qidx] = NULL;
> }
> + nicvf_free_cq_poll(nic);
> return err;
> }
>
> diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
> index 8b93dd6..c2ce270 100644
> --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
> +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
> @@ -251,6 +251,8 @@ struct cmp_queue {
> void *desc;
> struct q_desc_mem dmem;
> struct cmp_queue_stats stats;
> + int irq;
> + cpumask_t affinity_mask;
> } ____cacheline_aligned_in_smp;
>
> struct snd_queue {
> --
> 2.5.0
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel



--
Best regards, Klimov Alexey

2015-08-29 01:45:09

by Alexey Klimov

[permalink] [raw]
Subject: Re: [PATCH net-next 7/8] net: thunderx: Support for upto 96 queues for a VF

On Fri, Aug 28, 2015 at 5:59 PM, Aleksey Makarov
<[email protected]> wrote:
> From: Sunil Goutham <[email protected]>
>
> This patch adds support for handling multiple qsets assigned to a
> single VF. There by increasing no of queues from earlier 8 to max
> no of CPUs in the system i.e 48 queues on a single node and 96 on
> dual node system. User doesn't have option to assign which Qsets/VFs
> to be merged. Upon request from VF, PF assigns next free Qsets as
> secondary qsets. To maintain current behavior no of queues is kept
> to 8 by default which can be increased via ethtool.
>
> If user wants to unbind NICVF driver from a secondary Qset then it
> should be done after tearing down primary VF's interface.
>
> Signed-off-by: Sunil Goutham <[email protected]>
> Signed-off-by: Aleksey Makarov <[email protected]>
> Signed-off-by: Robert Richter <[email protected]>
> ---
> drivers/net/ethernet/cavium/thunder/nic.h | 42 ++++-
> drivers/net/ethernet/cavium/thunder/nic_main.c | 173 +++++++++++++++--
> .../net/ethernet/cavium/thunder/nicvf_ethtool.c | 136 +++++++++----
> drivers/net/ethernet/cavium/thunder/nicvf_main.c | 210 +++++++++++++++++++--
> drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 32 +++-
> 5 files changed, 507 insertions(+), 86 deletions(-)
>
> diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
> index 89b997e..35b2ee1 100644
> --- a/drivers/net/ethernet/cavium/thunder/nic.h
> +++ b/drivers/net/ethernet/cavium/thunder/nic.h
> @@ -258,13 +258,23 @@ struct nicvf_drv_stats {
> };
>
> struct nicvf {
> + struct nicvf *pnicvf;
> struct net_device *netdev;
> struct pci_dev *pdev;
> u8 vf_id;
> u8 node;
> - u8 tns_mode;
> + u8 tns_mode:1;
> + u8 sqs_mode:1;
> u16 mtu;
> struct queue_set *qs;
> +#define MAX_SQS_PER_VF_SINGLE_NODE 5
> +#define MAX_SQS_PER_VF 11
> + u8 sqs_id;
> + u8 sqs_count; /* Secondary Qset count */
> + struct nicvf *snicvf[MAX_SQS_PER_VF];
> + u8 rx_queues;
> + u8 tx_queues;
> + u8 max_queues;
> void __iomem *reg_base;
> bool link_up;
> u8 duplex;
> @@ -330,14 +340,19 @@ struct nicvf {
> #define NIC_MBOX_MSG_RQ_SW_SYNC 0x0F /* Flush inflight pkts to RQ */
> #define NIC_MBOX_MSG_BGX_STATS 0x10 /* Get stats from BGX */
> #define NIC_MBOX_MSG_BGX_LINK_CHANGE 0x11 /* BGX:LMAC link status */
> -#define NIC_MBOX_MSG_CFG_DONE 0x12 /* VF configuration done */
> -#define NIC_MBOX_MSG_SHUTDOWN 0x13 /* VF is being shutdown */
> +#define NIC_MBOX_MSG_ALLOC_SQS 0x12 /* Allocate secondary Qset */
> +#define NIC_MBOX_MSG_NICVF_PTR 0x13 /* Send nicvf ptr to PF */
> +#define NIC_MBOX_MSG_PNICVF_PTR 0x14 /* Get primary qset nicvf ptr */
> +#define NIC_MBOX_MSG_SNICVF_PTR 0x15 /* Send sqet nicvf ptr to PVF */
> +#define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */
> +#define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */
>
> struct nic_cfg_msg {
> u8 msg;
> u8 vf_id;
> - u8 tns_mode;
> u8 node_id;
> + u8 tns_mode:1;
> + u8 sqs_mode:1;
> u8 mac_addr[ETH_ALEN];
> };
>
> @@ -345,6 +360,7 @@ struct nic_cfg_msg {
> struct qs_cfg_msg {
> u8 msg;
> u8 num;
> + u8 sqs_count;
> u64 cfg;
> };
>
> @@ -361,6 +377,7 @@ struct sq_cfg_msg {
> u8 msg;
> u8 qs_num;
> u8 sq_num;
> + bool sqs_mode;
> u64 cfg;
> };
>
> @@ -420,6 +437,21 @@ struct bgx_link_status {
> u32 speed;
> };
>
> +/* Get Extra Qset IDs */
> +struct sqs_alloc {
> + u8 msg;
> + u8 vf_id;
> + u8 qs_count;
> +};
> +
> +struct nicvf_ptr {
> + u8 msg;
> + u8 vf_id;
> + bool sqs_mode;
> + u8 sqs_id;
> + u64 nicvf;
> +};
> +
> /* 128 bit shared memory between PF and each VF */
> union nic_mbx {
> struct { u8 msg; } msg;
> @@ -434,6 +466,8 @@ union nic_mbx {
> struct rss_cfg_msg rss_cfg;
> struct bgx_stats_msg bgx_stats;
> struct bgx_link_status link_status;
> + struct sqs_alloc sqs_alloc;
> + struct nicvf_ptr nicvf;
> };
>
> #define NIC_NODE_ID_MASK 0x03
> diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
> index 7dfec4a..51f3048 100644
> --- a/drivers/net/ethernet/cavium/thunder/nic_main.c
> +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
> @@ -28,6 +28,11 @@ struct nicpf {
> u8 num_vf_en; /* No of VF enabled */
> bool vf_enabled[MAX_NUM_VFS_SUPPORTED];
> void __iomem *reg_base; /* Register start address */
> + u8 num_sqs_en; /* Secondary qsets enabled */
> + u64 nicvf[MAX_NUM_VFS_SUPPORTED];
> + u8 vf_sqs[MAX_NUM_VFS_SUPPORTED][MAX_SQS_PER_VF];
> + u8 pqs_vf[MAX_NUM_VFS_SUPPORTED];
> + bool sqs_used[MAX_NUM_VFS_SUPPORTED];
> struct pkind_cfg pkind;
> #define NIC_SET_VF_LMAC_MAP(bgx, lmac) (((bgx & 0xF) << 4) | (lmac & 0xF))
> #define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF)
> @@ -139,13 +144,15 @@ static void nic_mbx_send_ready(struct nicpf *nic, int vf)
>
> mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE;
>
> - bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
> - lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
> -
> - mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac);
> - if (mac)
> - ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac);
> + if (vf < MAX_LMAC) {
> + bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
> + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
>
> + mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac);
> + if (mac)
> + ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac);
> + }
> + mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false;
> mbx.nic_cfg.node_id = nic->node;
> nic_send_msg_to_vf(nic, vf, &mbx);
> }
> @@ -433,6 +440,12 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
> qset = cfg->vf_id;
>
> for (; rssi < (rssi_base + cfg->tbl_len); rssi++) {
> + u8 svf = cfg->ind_tbl[idx] >> 3;
> +
> + if (svf)
> + qset = nic->vf_sqs[cfg->vf_id][svf - 1];
> + else
> + qset = cfg->vf_id;
> nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3),
> (qset << 3) | (cfg->ind_tbl[idx] & 0x7));
> idx++;
> @@ -456,19 +469,31 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
> * VNIC6-SQ0 -> TL4(528) -> TL3[132] -> TL2[33] -> TL1[1] -> BGX1
> * VNIC7-SQ0 -> TL4(536) -> TL3[134] -> TL2[33] -> TL1[1] -> BGX1
> */
> -static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, u8 sq_idx)
> +static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
> + struct sq_cfg_msg *sq)
> {
> u32 bgx, lmac, chan;
> u32 tl2, tl3, tl4;
> u32 rr_quantum;
> + u8 sq_idx = sq->sq_num;
> + u8 pqs_vnic;
> +
> + if (sq->sqs_mode)
> + pqs_vnic = nic->pqs_vf[vnic];
> + else
> + pqs_vnic = vnic;
> +
> + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]);
> + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]);
>
> - bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
> - lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
> /* 24 bytes for FCS, IPG and preamble */
> rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
>
> tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
> tl4 += sq_idx;
> + if (sq->sqs_mode)
> + tl4 += vnic * 8;
> +
> tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
> nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
> ((u64)vnic << NIC_QS_ID_SHIFT) |
> @@ -489,6 +514,71 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, u8 sq_idx)
> nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00);
> }
>
> +/* Send primary nicvf pointer to secondary QS's VF */
> +static void nic_send_pnicvf(struct nicpf *nic, int sqs)
> +{
> + union nic_mbx mbx = {};
> +
> + mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR;
> + mbx.nicvf.nicvf = nic->nicvf[nic->pqs_vf[sqs]];
> + nic_send_msg_to_vf(nic, sqs, &mbx);
> +}
> +
> +/* Send SQS's nicvf pointer to primary QS's VF */
> +static void nic_send_snicvf(struct nicpf *nic, struct nicvf_ptr *nicvf)
> +{
> + union nic_mbx mbx = {};
> + int sqs_id = nic->vf_sqs[nicvf->vf_id][nicvf->sqs_id];
> +
> + mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR;
> + mbx.nicvf.sqs_id = nicvf->sqs_id;
> + mbx.nicvf.nicvf = nic->nicvf[sqs_id];
> + nic_send_msg_to_vf(nic, nicvf->vf_id, &mbx);
> +}
> +
> +/* Find next available Qset that can be assigned as a
> + * secondary Qset to a VF.
> + */
> +static int nic_nxt_avail_sqs(struct nicpf *nic)
> +{
> + int sqs;
> +
> + for (sqs = 0; sqs < nic->num_sqs_en; sqs++) {
> + if (!nic->sqs_used[sqs])
> + nic->sqs_used[sqs] = true;
> + else
> + continue;
> + return sqs + nic->num_vf_en;
> + }
> + return -1;
> +}
> +
> +/* Allocate additional Qsets for requested VF */
> +static void nic_alloc_sqs(struct nicpf *nic, struct sqs_alloc *sqs)
> +{
> + union nic_mbx mbx = {};
> + int idx, alloc_qs = 0;
> + int sqs_id;
> +
> + if (!nic->num_sqs_en)
> + goto send_mbox;
> +
> + for (idx = 0; idx < sqs->qs_count; idx++) {
> + sqs_id = nic_nxt_avail_sqs(nic);
> + if (sqs_id < 0)
> + break;
> + nic->vf_sqs[sqs->vf_id][idx] = sqs_id;
> + nic->pqs_vf[sqs_id] = sqs->vf_id;
> + alloc_qs++;
> + }
> +
> +send_mbox:
> + mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS;
> + mbx.sqs_alloc.vf_id = sqs->vf_id;
> + mbx.sqs_alloc.qs_count = alloc_qs;
> + nic_send_msg_to_vf(nic, sqs->vf_id, &mbx);
> +}
> +
> /* Interrupt handler to handle mailbox messages from VFs */
> static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
> {
> @@ -496,6 +586,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
> u64 *mbx_data;
> u64 mbx_addr;
> u64 reg_addr;
> + u64 cfg;
> int bgx, lmac;
> int i;
> int ret = 0;
> @@ -516,15 +607,24 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
> switch (mbx.msg.msg) {
> case NIC_MBOX_MSG_READY:
> nic_mbx_send_ready(nic, vf);
> - nic->link[vf] = 0;
> - nic->duplex[vf] = 0;
> - nic->speed[vf] = 0;
> + if (vf < MAX_LMAC) {
> + nic->link[vf] = 0;
> + nic->duplex[vf] = 0;
> + nic->speed[vf] = 0;
> + }
> ret = 1;
> break;
> case NIC_MBOX_MSG_QS_CFG:
> reg_addr = NIC_PF_QSET_0_127_CFG |
> (mbx.qs.num << NIC_QS_ID_SHIFT);
> - nic_reg_write(nic, reg_addr, mbx.qs.cfg);
> + cfg = mbx.qs.cfg;
> + /* Check if its a secondary Qset */
> + if (vf >= nic->num_vf_en) {
> + cfg = cfg & (~0x7FULL);
> + /* Assign this Qset to primary Qset's VF */
> + cfg |= nic->pqs_vf[vf];
> + }
> + nic_reg_write(nic, reg_addr, cfg);
> break;
> case NIC_MBOX_MSG_RQ_CFG:
> reg_addr = NIC_PF_QSET_0_127_RQ_0_7_CFG |
> @@ -552,9 +652,11 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
> (mbx.sq.qs_num << NIC_QS_ID_SHIFT) |
> (mbx.sq.sq_num << NIC_Q_NUM_SHIFT);
> nic_reg_write(nic, reg_addr, mbx.sq.cfg);
> - nic_tx_channel_cfg(nic, mbx.qs.num, mbx.sq.sq_num);
> + nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq);
> break;
> case NIC_MBOX_MSG_SET_MAC:
> + if (vf >= nic->num_vf_en)
> + break;
> lmac = mbx.mac.vf_id;
> bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
> lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
> @@ -581,7 +683,22 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
> case NIC_MBOX_MSG_SHUTDOWN:
> /* First msg in VF teardown sequence */
> nic->vf_enabled[vf] = false;
> + if (vf >= nic->num_vf_en)
> + nic->sqs_used[vf - nic->num_vf_en] = false;
> + nic->pqs_vf[vf] = 0;
> + break;
> + case NIC_MBOX_MSG_ALLOC_SQS:
> + nic_alloc_sqs(nic, &mbx.sqs_alloc);
> + goto unlock;
> + case NIC_MBOX_MSG_NICVF_PTR:
> + nic->nicvf[vf] = mbx.nicvf.nicvf;
> break;
> + case NIC_MBOX_MSG_PNICVF_PTR:
> + nic_send_pnicvf(nic, vf);
> + goto unlock;
> + case NIC_MBOX_MSG_SNICVF_PTR:
> + nic_send_snicvf(nic, &mbx.nicvf);
> + goto unlock;
> case NIC_MBOX_MSG_BGX_STATS:
> nic_get_bgx_stats(nic, &mbx.bgx_stats);
> goto unlock;
> @@ -610,8 +727,7 @@ static void nic_mbx_intr_handler (struct nicpf *nic, int mbx)
> if (intr & (1ULL << vf)) {
> dev_dbg(&nic->pdev->dev, "Intr from VF %d\n",
> vf + (mbx * vf_per_mbx_reg));
> - if ((vf + (mbx * vf_per_mbx_reg)) > nic->num_vf_en)
> - break;
> +
> nic_handle_mbx_intr(nic, vf + (mbx * vf_per_mbx_reg));
> nic_clear_mbx_intr(nic, vf, mbx);
> }
> @@ -717,9 +833,24 @@ static void nic_unregister_interrupts(struct nicpf *nic)
> nic_disable_msix(nic);
> }
>
> +static int nic_num_sqs_en(struct nicpf *nic, int vf_en)
> +{
> + int pos = 0, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE;

Please check if you really need to initialize 'pos' by zero here.


> + u16 total_vf;
> +
> + /* Check if its a multi-node environment */
> + if (nr_node_ids > 1)
> + sqs_per_vf = MAX_SQS_PER_VF;
> +
> + pos = pci_find_ext_capability(nic->pdev, PCI_EXT_CAP_ID_SRIOV);
> + pci_read_config_word(nic->pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf);
> + return min(total_vf - vf_en, vf_en * sqs_per_vf);
> +}
> +
> static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
> {
> int pos = 0;
> + int vf_en;
> int err;
> u16 total_vf_cnt;
>
> @@ -736,16 +867,20 @@ static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
> if (!total_vf_cnt)
> return 0;
>
> - err = pci_enable_sriov(pdev, nic->num_vf_en);
> + vf_en = nic->num_vf_en;
> + nic->num_sqs_en = nic_num_sqs_en(nic, nic->num_vf_en);
> + vf_en += nic->num_sqs_en;
> +
> + err = pci_enable_sriov(pdev, vf_en);
> if (err) {
> dev_err(&pdev->dev, "SRIOV enable failed, num VF is %d\n",
> - nic->num_vf_en);
> + vf_en);
> nic->num_vf_en = 0;
> return err;
> }
>
> dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n",
> - nic->num_vf_en);
> + vf_en);
>
> nic->flags |= NIC_SRIOV_ENABLED;
> return 0;
> diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
> index 1eec2cd..e4fa98a 100644
> --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
> +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
> @@ -149,10 +149,33 @@ static void nicvf_set_msglevel(struct net_device *netdev, u32 lvl)
> nic->msg_enable = lvl;
> }
>
> +static void nicvf_get_qset_strings(struct nicvf *nic, u8 **data, int qset)
> +{
> + int stats, qidx;
> + int start_qidx = qset * MAX_RCV_QUEUES_PER_QS;
> +
> + for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
> + for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
> + sprintf(*data, "rxq%d: %s", qidx + start_qidx,
> + nicvf_queue_stats[stats].name);
> + *data += ETH_GSTRING_LEN;
> + }
> + }
> +
> + for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
> + for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
> + sprintf(*data, "txq%d: %s", qidx + start_qidx,
> + nicvf_queue_stats[stats].name);
> + *data += ETH_GSTRING_LEN;
> + }
> + }
> +}
> +
> static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
> {
> struct nicvf *nic = netdev_priv(netdev);
> - int stats, qidx;
> + int stats;
> + int sqs;
>
> if (sset != ETH_SS_STATS)
> return;
> @@ -167,20 +190,12 @@ static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
> data += ETH_GSTRING_LEN;
> }
>
> - for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
> - for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
> - sprintf(data, "rxq%d: %s", qidx,
> - nicvf_queue_stats[stats].name);
> - data += ETH_GSTRING_LEN;
> - }
> - }
> + nicvf_get_qset_strings(nic, &data, 0);
>
> - for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
> - for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
> - sprintf(data, "txq%d: %s", qidx,
> - nicvf_queue_stats[stats].name);
> - data += ETH_GSTRING_LEN;
> - }
> + for (sqs = 0; sqs < nic->sqs_count; sqs++) {
> + if (!nic->snicvf[sqs])
> + continue;
> + nicvf_get_qset_strings(nic->snicvf[sqs], &data, sqs + 1);
> }
>
> for (stats = 0; stats < BGX_RX_STATS_COUNT; stats++) {
> @@ -197,21 +212,58 @@ static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
> static int nicvf_get_sset_count(struct net_device *netdev, int sset)
> {
> struct nicvf *nic = netdev_priv(netdev);
> + int qstats_count;
> + int sqs;
>
> if (sset != ETH_SS_STATS)
> return -EINVAL;
>
> + qstats_count = nicvf_n_queue_stats *
> + (nic->qs->rq_cnt + nic->qs->sq_cnt);
> + for (sqs = 0; sqs < nic->sqs_count; sqs++) {
> + struct nicvf *snic;
> +
> + snic = nic->snicvf[sqs];
> + if (!snic)
> + continue;
> + qstats_count += nicvf_n_queue_stats *
> + (snic->qs->rq_cnt + snic->qs->sq_cnt);
> + }
> +
> return nicvf_n_hw_stats + nicvf_n_drv_stats +
> - (nicvf_n_queue_stats *
> - (nic->qs->rq_cnt + nic->qs->sq_cnt)) +
> + qstats_count +
> BGX_RX_STATS_COUNT + BGX_TX_STATS_COUNT;
> }
>
> +static void nicvf_get_qset_stats(struct nicvf *nic,
> + struct ethtool_stats *stats, u64 **data)
> +{
> + int stat, qidx;
> +
> + if (!nic)
> + return;
> +
> + for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
> + nicvf_update_rq_stats(nic, qidx);
> + for (stat = 0; stat < nicvf_n_queue_stats; stat++)
> + *((*data)++) = ((u64 *)&nic->qs->rq[qidx].stats)
> + [nicvf_queue_stats[stat].index];
> + }
> +
> + for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
> + nicvf_update_sq_stats(nic, qidx);
> + for (stat = 0; stat < nicvf_n_queue_stats; stat++)
> + *((*data)++) = ((u64 *)&nic->qs->sq[qidx].stats)
> + [nicvf_queue_stats[stat].index];
> + }
> +}
> +
> static void nicvf_get_ethtool_stats(struct net_device *netdev,
> struct ethtool_stats *stats, u64 *data)
> {
> struct nicvf *nic = netdev_priv(netdev);
> - int stat, qidx;
> + int stat;
> + int sqs;
>
> nicvf_update_stats(nic);
>
> @@ -225,16 +277,12 @@ static void nicvf_get_ethtool_stats(struct net_device *netdev,
> *(data++) = ((u64 *)&nic->drv_stats)
> [nicvf_drv_stats[stat].index];
>
> - for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
> - for (stat = 0; stat < nicvf_n_queue_stats; stat++)
> - *(data++) = ((u64 *)&nic->qs->rq[qidx].stats)
> - [nicvf_queue_stats[stat].index];
> - }
> + nicvf_get_qset_stats(nic, stats, &data);
>
> - for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
> - for (stat = 0; stat < nicvf_n_queue_stats; stat++)
> - *(data++) = ((u64 *)&nic->qs->sq[qidx].stats)
> - [nicvf_queue_stats[stat].index];
> + for (sqs = 0; sqs < nic->sqs_count; sqs++) {
> + if (!nic->snicvf[sqs])
> + continue;
> + nicvf_get_qset_stats(nic->snicvf[sqs], stats, &data);
> }
>
> for (stat = 0; stat < BGX_RX_STATS_COUNT; stat++)
> @@ -393,7 +441,7 @@ static int nicvf_get_rxnfc(struct net_device *dev,
>
> switch (info->cmd) {
> case ETHTOOL_GRXRINGS:
> - info->data = nic->qs->rq_cnt;
> + info->data = nic->rx_queues;
> ret = 0;
> break;
> case ETHTOOL_GRXFH:
> @@ -556,11 +604,11 @@ static void nicvf_get_channels(struct net_device *dev,
>
> memset(channel, 0, sizeof(*channel));
>
> - channel->max_rx = MAX_RCV_QUEUES_PER_QS;
> - channel->max_tx = MAX_SND_QUEUES_PER_QS;
> + channel->max_rx = nic->max_queues;
> + channel->max_tx = nic->max_queues;
>
> - channel->rx_count = nic->qs->rq_cnt;
> - channel->tx_count = nic->qs->sq_cnt;
> + channel->rx_count = nic->rx_queues;
> + channel->tx_count = nic->tx_queues;
> }
>
> /* Set no of Tx, Rx queues to be used */
> @@ -568,24 +616,36 @@ static int nicvf_set_channels(struct net_device *dev,
> struct ethtool_channels *channel)
> {
> struct nicvf *nic = netdev_priv(dev);
> - int err = 0;
> bool if_up = netif_running(dev);
> + int cqcount;

> + int err = 0;


Probably this initialization is not needed.

>
> if (!channel->rx_count || !channel->tx_count)
> return -EINVAL;
> - if (channel->rx_count > MAX_RCV_QUEUES_PER_QS)
> + if (channel->rx_count > nic->max_queues)
> return -EINVAL;
> - if (channel->tx_count > MAX_SND_QUEUES_PER_QS)
> + if (channel->tx_count > nic->max_queues)
> return -EINVAL;
>
> if (if_up)
> nicvf_stop(dev);
>
> - nic->qs->rq_cnt = channel->rx_count;
> - nic->qs->sq_cnt = channel->tx_count;
> + cqcount = max(channel->rx_count, channel->tx_count);
> +
> + if (cqcount > MAX_CMP_QUEUES_PER_QS) {
> + nic->sqs_count = roundup(cqcount, MAX_CMP_QUEUES_PER_QS);
> + nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1;
> + } else {
> + nic->sqs_count = 0;
> + }
> +
> + nic->qs->rq_cnt = min_t(u32, channel->rx_count, MAX_RCV_QUEUES_PER_QS);
> + nic->qs->sq_cnt = min_t(u32, channel->tx_count, MAX_SND_QUEUES_PER_QS);
> nic->qs->cq_cnt = max(nic->qs->rq_cnt, nic->qs->sq_cnt);
>
> - err = nicvf_set_real_num_queues(dev, nic->qs->sq_cnt, nic->qs->rq_cnt);
> + nic->rx_queues = channel->rx_count;
> + nic->tx_queues = channel->tx_count;
> + err = nicvf_set_real_num_queues(dev, nic->tx_queues, nic->rx_queues);
> if (err)
> return err;
>
> @@ -593,7 +653,7 @@ static int nicvf_set_channels(struct net_device *dev,
> nicvf_open(dev);
>
> netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n",
> - nic->qs->sq_cnt, nic->qs->rq_cnt);
> + nic->tx_queues, nic->rx_queues);
>
> return err;
> }
> diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
> index 2198f61..9a1091a 100644
> --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
> +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
> @@ -51,6 +51,14 @@ module_param(cpi_alg, int, S_IRUGO);
> MODULE_PARM_DESC(cpi_alg,
> "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
>
> +static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
> +{
> + if (nic->sqs_mode)
> + return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS);
> + else
> + return qidx;
> +}
> +
> static inline void nicvf_set_rx_frame_cnt(struct nicvf *nic,
> struct sk_buff *skb)
> {
> @@ -193,6 +201,7 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
> if (!nic->set_mac_pending)
> ether_addr_copy(nic->netdev->dev_addr,
> mbx.nic_cfg.mac_addr);
> + nic->sqs_mode = mbx.nic_cfg.sqs_mode;
> nic->link_up = false;
> nic->duplex = 0;
> nic->speed = 0;
> @@ -230,6 +239,26 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
> netif_tx_stop_all_queues(nic->netdev);
> }
> break;
> + case NIC_MBOX_MSG_ALLOC_SQS:
> + nic->sqs_count = mbx.sqs_alloc.qs_count;
> + nic->pf_acked = true;
> + break;
> + case NIC_MBOX_MSG_SNICVF_PTR:
> + /* Primary VF: make note of secondary VF's pointer
> + * to be used while packet transmission.
> + */
> + nic->snicvf[mbx.nicvf.sqs_id] =
> + (struct nicvf *)mbx.nicvf.nicvf;
> + nic->pf_acked = true;
> + break;
> + case NIC_MBOX_MSG_PNICVF_PTR:
> + /* Secondary VF/Qset: make note of primary VF's pointer
> + * to be used while packet reception, to handover packet
> + * to primary VF's netdev.
> + */
> + nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf;
> + nic->pf_acked = true;
> + break;
> default:
> netdev_err(nic->netdev,
> "Invalid message from PF, msg 0x%x\n", mbx.msg.msg);
> @@ -338,11 +367,100 @@ static int nicvf_rss_init(struct nicvf *nic)
>
> for (idx = 0; idx < rss->rss_size; idx++)
> rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx,
> - nic->qs->rq_cnt);
> + nic->rx_queues);
> nicvf_config_rss(nic);
> return 1;
> }
>
> +/* Request PF to allocate additional Qsets */
> +static void nicvf_request_sqs(struct nicvf *nic)
> +{
> + union nic_mbx mbx = {};
> + int sqs;
> + int sqs_count = nic->sqs_count;
> + int rx_queues = 0, tx_queues = 0;
> +
> + /* Only primary VF should request */
> + if (nic->sqs_mode || !nic->sqs_count)
> + return;
> +
> + mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS;
> + mbx.sqs_alloc.vf_id = nic->vf_id;
> + mbx.sqs_alloc.qs_count = nic->sqs_count;
> + if (nicvf_send_msg_to_pf(nic, &mbx)) {
> + /* No response from PF */
> + nic->sqs_count = 0;
> + return;
> + }
> +
> + /* Return if no Secondary Qsets available */
> + if (!nic->sqs_count)
> + return;
> +
> + if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS)
> + rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS;
> + if (nic->tx_queues > MAX_SND_QUEUES_PER_QS)
> + tx_queues = nic->tx_queues - MAX_SND_QUEUES_PER_QS;
> +
> + /* Set no of Rx/Tx queues in each of the SQsets */
> + for (sqs = 0; sqs < nic->sqs_count; sqs++) {
> + mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR;
> + mbx.nicvf.vf_id = nic->vf_id;
> + mbx.nicvf.sqs_id = sqs;
> + nicvf_send_msg_to_pf(nic, &mbx);
> +
> + nic->snicvf[sqs]->sqs_id = sqs;
> + if (rx_queues > MAX_RCV_QUEUES_PER_QS) {
> + nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS;
> + rx_queues -= MAX_RCV_QUEUES_PER_QS;
> + } else {
> + nic->snicvf[sqs]->qs->rq_cnt = rx_queues;
> + rx_queues = 0;
> + }
> +
> + if (tx_queues > MAX_SND_QUEUES_PER_QS) {
> + nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS;
> + tx_queues -= MAX_SND_QUEUES_PER_QS;
> + } else {
> + nic->snicvf[sqs]->qs->sq_cnt = tx_queues;
> + tx_queues = 0;
> + }
> +
> + nic->snicvf[sqs]->qs->cq_cnt =
> + max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt);
> +
> + /* Initialize secondary Qset's queues and its interrupts */
> + nicvf_open(nic->snicvf[sqs]->netdev);
> + }
> +
> + /* Update stack with actual Rx/Tx queue count allocated */
> + if (sqs_count != nic->sqs_count)
> + nicvf_set_real_num_queues(nic->netdev,
> + nic->tx_queues, nic->rx_queues);
> +}
> +
> +/* Send this Qset's nicvf pointer to PF.
> + * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs
> + * so that packets received by these Qsets can use primary VF's netdev
> + */
> +static void nicvf_send_vf_struct(struct nicvf *nic)
> +{
> + union nic_mbx mbx = {};
> +
> + mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR;
> + mbx.nicvf.sqs_mode = nic->sqs_mode;
> + mbx.nicvf.nicvf = (u64)nic;
> + nicvf_send_msg_to_pf(nic, &mbx);
> +}
> +
> +static void nicvf_get_primary_vf_struct(struct nicvf *nic)
> +{
> + union nic_mbx mbx = {};
> +
> + mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR;
> + nicvf_send_msg_to_pf(nic, &mbx);
> +}
> +
> int nicvf_set_real_num_queues(struct net_device *netdev,
> int tx_queues, int rx_queues)
> {
> @@ -453,6 +571,15 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
> struct sk_buff *skb;
> struct nicvf *nic = netdev_priv(netdev);
> int err = 0;
> + int rq_idx;
> +
> + rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx);
> +
> + if (nic->sqs_mode) {
> + /* Use primary VF's 'nicvf' struct */
> + nic = nic->pnicvf;
> + netdev = nic->netdev;
> + }
>
> /* Check for errors */
> err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
> @@ -482,7 +609,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
>
> nicvf_set_rxhash(netdev, cqe_rx, skb);
>
> - skb_record_rx_queue(skb, cqe_rx->rq_idx);
> + skb_record_rx_queue(skb, rq_idx);
> if (netdev->hw_features & NETIF_F_RXCSUM) {
> /* HW by default verifies TCP/UDP/SCTP checksums */
> skb->ip_summed = CHECKSUM_UNNECESSARY;
> @@ -578,8 +705,11 @@ loop:
> done:
> /* Wakeup TXQ if its stopped earlier due to SQ full */
> if (tx_done) {
> - txq = netdev_get_tx_queue(netdev, cq_idx);
> - if (netif_tx_queue_stopped(txq)) {
> + netdev = nic->pnicvf->netdev;
> + txq = netdev_get_tx_queue(netdev,
> + nicvf_netdev_qidx(nic, cq_idx));
> + nic = nic->pnicvf;
> + if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) {
> netif_tx_start_queue(txq);
> nic->drv_stats.txq_wake++;
> if (netif_msg_tx_err(nic))
> @@ -893,7 +1023,6 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
> netdev_warn(netdev,
> "%s: Transmit ring full, stopping SQ%d\n",
> netdev->name, qid);
> -
> return NETDEV_TX_BUSY;
> }
>
> @@ -926,6 +1055,17 @@ int nicvf_stop(struct net_device *netdev)
> nicvf_send_msg_to_pf(nic, &mbx);
>
> netif_carrier_off(netdev);
> + netif_tx_stop_all_queues(nic->netdev);
> +
> + /* Teardown secondary qsets first */
> + if (!nic->sqs_mode) {
> + for (qidx = 0; qidx < nic->sqs_count; qidx++) {
> + if (!nic->snicvf[qidx])
> + continue;
> + nicvf_stop(nic->snicvf[qidx]->netdev);
> + nic->snicvf[qidx] = NULL;
> + }
> + }
>
> /* Disable RBDR & QS error interrupts */
> for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
> @@ -973,6 +1113,10 @@ int nicvf_stop(struct net_device *netdev)
>
> nicvf_free_cq_poll(nic);
>
> + /* Clear multiqset info */
> + nic->pnicvf = nic;
> + nic->sqs_count = 0;
> +
> return 0;
> }
>
> @@ -1028,10 +1172,16 @@ int nicvf_open(struct net_device *netdev)
>
> /* Configure CPI alorithm */
> nic->cpi_alg = cpi_alg;
> - nicvf_config_cpi(nic);
> + if (!nic->sqs_mode)
> + nicvf_config_cpi(nic);
> +
> + nicvf_request_sqs(nic);
> + if (nic->sqs_mode)
> + nicvf_get_primary_vf_struct(nic);
>
> /* Configure receive side scaling */
> - nicvf_rss_init(nic);
> + if (!nic->sqs_mode)
> + nicvf_rss_init(nic);
>
> err = nicvf_register_interrupts(nic);
> if (err)
> @@ -1282,8 +1432,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> struct device *dev = &pdev->dev;
> struct net_device *netdev;
> struct nicvf *nic;
> - struct queue_set *qs;
> - int err;
> + int err, qcount;
>
> err = pci_enable_device(pdev);
> if (err) {
> @@ -1309,9 +1458,17 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> goto err_release_regions;
> }
>
> - netdev = alloc_etherdev_mqs(sizeof(struct nicvf),
> - MAX_RCV_QUEUES_PER_QS,
> - MAX_SND_QUEUES_PER_QS);
> + qcount = MAX_CMP_QUEUES_PER_QS;
> +
> + /* Restrict multiqset support only for host bound VFs */
> + if (pdev->is_virtfn) {
> + /* Set max number of queues per VF */
> + qcount = roundup(num_online_cpus(), MAX_CMP_QUEUES_PER_QS);
> + qcount = min(qcount,
> + (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
> + }
> +
> + netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount);
> if (!netdev) {
> err = -ENOMEM;
> goto err_release_regions;
> @@ -1324,6 +1481,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> nic = netdev_priv(netdev);
> nic->netdev = netdev;
> nic->pdev = pdev;
> + nic->pnicvf = nic;
> + nic->max_queues = qcount;
>
> /* MAP VF's configuration registers */
> nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
> @@ -1337,20 +1496,26 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> if (err)
> goto err_free_netdev;
>
> - qs = nic->qs;
> -
> - err = nicvf_set_real_num_queues(netdev, qs->sq_cnt, qs->rq_cnt);
> - if (err)
> - goto err_free_netdev;
> -
> /* Check if PF is alive and get MAC address for this VF */
> err = nicvf_register_misc_interrupt(nic);
> if (err)
> goto err_free_netdev;
>
> + nicvf_send_vf_struct(nic);
> +
> + /* Check if this VF is in QS only mode */
> + if (nic->sqs_mode)
> + return 0;
> +
> + err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues);
> + if (err)
> + goto err_unregister_interrupts;
> +
> netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
> NETIF_F_TSO | NETIF_F_GRO |
> - NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXHASH);
> + NETIF_F_HW_VLAN_CTAG_RX);
> +
> + netdev->hw_features |= NETIF_F_RXHASH;
>
> netdev->features |= netdev->hw_features;
>
> @@ -1389,8 +1554,13 @@ static void nicvf_remove(struct pci_dev *pdev)
> {
> struct net_device *netdev = pci_get_drvdata(pdev);
> struct nicvf *nic = netdev_priv(netdev);
> + struct net_device *pnetdev = nic->pnicvf->netdev;
>
> - unregister_netdev(netdev);
> + /* Check if this Qset is assigned to different VF.
> + * If yes, clean primary and all secondary Qsets.
> + */
> + if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED))
> + unregister_netdev(pnetdev);
> nicvf_unregister_interrupts(nic);
> pci_set_drvdata(pdev, NULL);
> free_netdev(netdev);
> diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
> index b294d67..63a870a 100644
> --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
> +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c


> @@ -478,7 +478,7 @@ static void nicvf_reclaim_rbdr(struct nicvf *nic,
> void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features)
> {
> u64 rq_cfg;
> - int sqs;
> + int sqs = 0;
>
> rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0);
>

This change in nicvf_config_vlan_stripping() probably is not part of
this patch. It fits better into 5/8. But please check if you really
need this change at all.

> @@ -621,6 +621,7 @@ static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
> mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
> mbx.sq.qs_num = qs->vnic_id;
> mbx.sq.sq_num = qidx;
> + mbx.sq.sqs_mode = nic->sqs_mode;
> mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
> nicvf_send_msg_to_pf(nic, &mbx);
>
> @@ -702,6 +703,7 @@ void nicvf_qset_config(struct nicvf *nic, bool enable)
> /* Send a mailbox msg to PF to config Qset */
> mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
> mbx.qs.num = qs->vnic_id;
> + mbx.qs.sqs_count = nic->sqs_count;
>
> mbx.qs.cfg = 0;
> qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
> @@ -782,6 +784,10 @@ int nicvf_set_qset_resources(struct nicvf *nic)
> qs->rbdr_len = RCV_BUF_COUNT;
> qs->sq_len = SND_QUEUE_LEN;
> qs->cq_len = CMP_QUEUE_LEN;
> +
> + nic->rx_queues = qs->rq_cnt;
> + nic->tx_queues = qs->sq_cnt;
> +
> return 0;
> }
>
> @@ -1025,7 +1031,7 @@ static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
> * them to SQ for transfer
> */
> static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
> - int qentry, struct sk_buff *skb)
> + int sq_num, int qentry, struct sk_buff *skb)
> {
> struct tso_t tso;
> int seg_subdescs = 0, desc_cnt = 0;
> @@ -1085,7 +1091,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
>
> /* Inform HW to xmit all TSO segments */
> nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
> - skb_get_queue_mapping(skb), desc_cnt);
> + sq_num, desc_cnt);
> nic->drv_stats.tx_tso++;
> return 1;
> }
> @@ -1096,10 +1102,24 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
> int i, size;
> int subdesc_cnt;
> int sq_num, qentry;
> - struct queue_set *qs = nic->qs;
> + struct queue_set *qs;
> struct snd_queue *sq;
>
> sq_num = skb_get_queue_mapping(skb);
> + if (sq_num >= MAX_SND_QUEUES_PER_QS) {
> + /* Get secondary Qset's SQ structure */
> + i = sq_num / MAX_SND_QUEUES_PER_QS;
> + if (!nic->snicvf[i - 1]) {
> + netdev_warn(nic->netdev,
> + "Secondary Qset#%d's ptr not initialized\n",
> + i - 1);
> + return 1;
> + }
> + nic = (struct nicvf *)nic->snicvf[i - 1];
> + sq_num = sq_num % MAX_SND_QUEUES_PER_QS;
> + }
> +
> + qs = nic->qs;
> sq = &qs->sq[sq_num];
>
> subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
> @@ -1110,7 +1130,7 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
>
> /* Check if its a TSO packet */
> if (skb_shinfo(skb)->gso_size)
> - return nicvf_sq_append_tso(nic, sq, qentry, skb);
> + return nicvf_sq_append_tso(nic, sq, sq_num, qentry, skb);
>
> /* Add SQ header subdesc */
> nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, skb, skb->len);
> @@ -1146,6 +1166,8 @@ doorbell:
> return 1;
>
> append_fail:
> + /* Use original PCI dev for debug log */
> + nic = nic->pnicvf;
> netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n");
> return 0;
> }
> --
> 2.5.0
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel



--
Best regards, Klimov Alexey

2015-08-29 05:10:53

by David Miller

[permalink] [raw]
Subject: Re: [PATCH net-next 7/8] net: thunderx: Support for upto 96 queues for a VF

From: Alexey Klimov <[email protected]>
Date: Sat, 29 Aug 2015 04:45:03 +0300

>> @@ -717,9 +833,24 @@ static void nic_unregister_interrupts(struct nicpf *nic)
>> nic_disable_msix(nic);
>> }
>>
>> +static int nic_num_sqs_en(struct nicpf *nic, int vf_en)
>> +{
>> + int pos = 0, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE;
>
> Please check if you really need to initialize 'pos' by zero here.

Please do _NOT_ quote hundreds of lines of code only to give feedback
on one particular hunk.

Quote _ONLY_ the exact context required, nothing more.

Everyone has to scroll through all of this unrelated crap you quoted,
and that makes more work for everyone.

Think particularly of _ME_ who has to be aware of what's going on in
every discussion thread for every patch that gets posted to this list.

2015-08-30 09:20:44

by Aleksey Makarov

[permalink] [raw]
Subject: Re: [PATCH net-next 6/8] net: thunderx: Rework interrupt handler

On 29.08.2015 04:44, Alexey Klimov wrote:

>> -static irqreturn_t nicvf_intr_handler(int irq, void *nicvf_irq)
>> +static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq)
>> +{
>> + struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq;
>> + struct nicvf *nic = cq_poll->nicvf;
>> + int qidx = cq_poll->cq_idx;
>> +
>> + nicvf_dump_intr_status(nic);
>> +
>> + /* Disable interrupts */
>> + nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
>> +
>> + /* Schedule NAPI */
>> + napi_schedule(&cq_poll->napi);
>> +
>> + /* Clear interrupt */
>> + nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
>> +
>> + return IRQ_HANDLED;
>> +}
>
> You're not considering spurious irqs in all new irq handlers here and
> below and schedule napi/tasklets unconditionally. Is it correct?
> For me it looks like previous implementation relied on reading of
> NIC_VF_INT to understand irq type and what actions should be
> performed. It generally had idea that no interrupt might occur.

1. The previous version of the handler did not handle spurious
interrupts either. Probably that means that the author of the patch
knows for sure that they do not happen.

2. Instead of reading the status register new version registers
different handlers for different irqs. I don't see why it can be wrong.

I am going to address your other suggestions in the next version of the
patchset.

Thank you
Aleksey Makarov

2015-08-30 16:31:08

by Sunil Kovvuri

[permalink] [raw]
Subject: Re: [PATCH net-next 6/8] net: thunderx: Rework interrupt handler

On Sun, Aug 30, 2015 at 2:50 PM, Aleksey Makarov <[email protected]> wrote:
> On 29.08.2015 04:44, Alexey Klimov wrote:
>
>>> -static irqreturn_t nicvf_intr_handler(int irq, void *nicvf_irq)
>>> +static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq)
>>> +{
>>> + struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq;
>>> + struct nicvf *nic = cq_poll->nicvf;
>>> + int qidx = cq_poll->cq_idx;
>>> +
>>> + nicvf_dump_intr_status(nic);
>>> +
>>> + /* Disable interrupts */
>>> + nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
>>> +
>>> + /* Schedule NAPI */
>>> + napi_schedule(&cq_poll->napi);
>>> +
>>> + /* Clear interrupt */
>>> + nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
>>> +
>>> + return IRQ_HANDLED;
>>> +}
>>
>>
>> You're not considering spurious irqs in all new irq handlers here and
>> below and schedule napi/tasklets unconditionally. Is it correct?
>> For me it looks like previous implementation relied on reading of
>> NIC_VF_INT to understand irq type and what actions should be
>> performed. It generally had idea that no interrupt might occur.
>
>
> 1. The previous version of the handler did not handle spurious interrupts
> either. Probably that means that the author of the patch knows for sure
> that they do not happen.

Yes, no spurious interrupts are expected from hardware.
Even if it does the NAPI poll routine will handle it as valid
descriptor count would be zero.
Don't think it makes sense to check for spurious interrupt upon every interrupt.

>
> 2. Instead of reading the status register new version registers different
> handlers for different irqs. I don't see why it can be wrong.

Previous implementation results on scheduling multiple NAPI poll handlers
on the same CPU even if IRQ's affinities are set to different CPUs.
Hence they are seperated now.

>
> I am going to address your other suggestions in the next version of the
> patchset.
>
> Thank you
> Aleksey Makarov
>