Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752039AbcDUGyT (ORCPT ); Thu, 21 Apr 2016 02:54:19 -0400 Received: from mail-pa0-f66.google.com ([209.85.220.66]:32853 "EHLO mail-pa0-f66.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751978AbcDUGyQ (ORCPT ); Thu, 21 Apr 2016 02:54:16 -0400 From: sunil.kovvuri@gmail.com To: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org, linux-arm-kernel@lists.infradead.org, sgoutham@cavium.com, robert.richter@caviumnetworks.com, Radoslaw Biernacki Subject: [PATCH v2 2/4] net: thunderx: Add multiqset support for dataplane apps Date: Thu, 21 Apr 2016 12:27:50 +0530 Message-Id: <1461221872-38841-3-git-send-email-sunil.kovvuri@gmail.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1461221872-38841-1-git-send-email-sunil.kovvuri@gmail.com> References: <1461221872-38841-1-git-send-email-sunil.kovvuri@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 11885 Lines: 379 From: Radoslaw Biernacki This patch adds support to PF for allocating additional Qsets to dataplane apps such as DPDK. Till now PF, upon host bound interface's request it used to allocate Qsets from the free ones, but for dataplane apps support has been added for it to request specific Qsets instead of just PF's choice. And also adds validation checks at different places, these are needed to have proper secondary Qset allocation when interfaces in different domain i.e Host, VFIO, DPDK e.t.c exist at the same time. Some of the checks are - Check if RSS indirection table has valid entries. - When host bound interface requests additional Qsets, PF should assign only those which in host domain i.e both primary VF and secondary VFs should be using same driver. Hence added PCI driver checks. - If dataplane app terminates without proper shutdown then when restarted it will request the same or different SQsets as were assigned before. This is taken care of otherwise application won't recover. Removed 'sqs_used' which became redundant due to new SQset allocation scheme. Signed-off-by: Radoslaw Biernacki Signed-off-by: Sunil Goutham --- drivers/net/ethernet/cavium/thunder/nic.h | 5 +- drivers/net/ethernet/cavium/thunder/nic_main.c | 197 +++++++++++++++++++--- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 2 +- 3 files changed, 174 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index e2ac9bd..b63278a 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -463,11 +463,12 @@ struct bgx_link_status { u32 speed; }; -/* Get Extra Qset IDs */ +/* Allocate additional SQS to VF */ struct sqs_alloc { u8 msg; - u8 vf_id; + u8 spec; /* 1 - For specific SQS allocation, 0 - For PF's choice */ u8 qs_count; + u8 svf[MAX_SQS_PER_VF]; /* SQS VF ids for specific allocation */ }; struct nicvf_ptr { diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 77ee260..cd5e7a4 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -29,9 +29,9 @@ struct nicpf { void __iomem *reg_base; /* Register start address */ u8 num_sqs_en; /* Secondary qsets enabled */ u64 nicvf[MAX_NUM_VFS_SUPPORTED]; +#define NIC_VF_UNASSIGNED ((u8)0xFF) u8 vf_sqs[MAX_NUM_VFS_SUPPORTED][MAX_SQS_PER_VF]; u8 pqs_vf[MAX_NUM_VFS_SUPPORTED]; - bool sqs_used[MAX_NUM_VFS_SUPPORTED]; struct pkind_cfg pkind; #define NIC_SET_VF_LMAC_MAP(bgx, lmac) (((bgx & 0xF) << 4) | (lmac & 0xF)) #define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF) @@ -46,6 +46,7 @@ struct nicpf { u16 rssi_base[MAX_NUM_VFS_SUPPORTED]; u16 rss_ind_tbl_size; bool mbx_lock[MAX_NUM_VFS_SUPPORTED]; + struct pci_dev *vf_pdev[MAX_NUM_VFS_SUPPORTED]; /* MSI-X */ bool msix_enabled; @@ -458,10 +459,18 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg) for (; rssi < (rssi_base + cfg->tbl_len); rssi++) { u8 svf = cfg->ind_tbl[idx] >> 3; - if (svf) + if (svf && (svf <= MAX_SQS_PER_VF)) { qset = nic->vf_sqs[cfg->vf_id][svf - 1]; - else + if ((qset >= MAX_NUM_VFS_SUPPORTED) || + (nic->pqs_vf[qset] != cfg->vf_id)) { + dev_err(&nic->pdev->dev, + "Invalid rss table entry %d from VF %d\n", + cfg->ind_tbl[idx], cfg->vf_id); + qset = cfg->vf_id; + } + } else { qset = cfg->vf_id; + } nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3), (qset << 3) | (cfg->ind_tbl[idx] & 0x7)); idx++; @@ -550,7 +559,19 @@ static void nic_send_pnicvf(struct nicpf *nic, int sqs) static void nic_send_snicvf(struct nicpf *nic, struct nicvf_ptr *nicvf) { union nic_mbx mbx = {}; - int sqs_id = nic->vf_sqs[nicvf->vf_id][nicvf->sqs_id]; + int sqs_id; + + if (nicvf->sqs_id >= MAX_SQS_PER_VF) { + nic_mbx_send_nack(nic, nicvf->vf_id); + return; + } + + sqs_id = nic->vf_sqs[nicvf->vf_id][nicvf->sqs_id]; + if ((sqs_id < nic->num_vf_en) || + (nic->pqs_vf[sqs_id] != nicvf->vf_id)) { + nic_mbx_send_nack(nic, nicvf->vf_id); + return; + } mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR; mbx.nicvf.sqs_id = nicvf->sqs_id; @@ -558,47 +579,152 @@ static void nic_send_snicvf(struct nicpf *nic, struct nicvf_ptr *nicvf) nic_send_msg_to_vf(nic, nicvf->vf_id, &mbx); } +#ifdef CONFIG_PCI_IOV +/* Find and take reference to all vf devices */ +static void nic_get_vf_pdev(struct nicpf *nic, int vf_en) +{ + struct pci_dev *pdev = nic->pdev; + struct pci_dev *vfdev; + u16 vid = pdev->vendor; + u16 devid; + int vf = 0, pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) + return; + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &devid); + + vfdev = pci_get_device(vid, devid, NULL); + for (; vfdev; vfdev = pci_get_device(vid, devid, vfdev)) { + if (!vfdev->is_virtfn) + continue; + if (vfdev->physfn != pdev) + continue; + if (vf >= vf_en) + continue; + nic->vf_pdev[vf] = vfdev; + pci_dev_get(vfdev); + ++vf; + } +} +#endif + +/* Release references to all vf devices */ +static void nic_put_vf_pdev(struct nicpf *nic) +{ + int vf; + + for (vf = 0; vf < MAX_NUM_VFS_SUPPORTED; vf++) { + struct pci_dev *vfdev = nic->vf_pdev[vf]; + + nic->vf_pdev[vf] = NULL; + if (vfdev) + pci_dev_put(vfdev); + } +} + +/* Check if pri.VF and sec.VF are in same domain i.e bound to same driver */ +static bool nic_check_svf_drv(struct nicpf *nic, u8 pvf, u8 svf) +{ + return pci_dev_driver(nic->vf_pdev[pvf]) == + pci_dev_driver(nic->vf_pdev[svf]); +} + /* Find next available Qset that can be assigned as a * secondary Qset to a VF. */ -static int nic_nxt_avail_sqs(struct nicpf *nic) +static int nic_nxt_avail_sqs(struct nicpf *nic, u8 pvf) { int sqs; - for (sqs = 0; sqs < nic->num_sqs_en; sqs++) { - if (!nic->sqs_used[sqs]) - nic->sqs_used[sqs] = true; + for (sqs = nic->num_vf_en; + sqs < (nic->num_vf_en + nic->num_sqs_en); sqs++) { + if ((nic->pqs_vf[sqs] == NIC_VF_UNASSIGNED) && + nic_check_svf_drv(nic, pvf, sqs)) + nic->pqs_vf[sqs] = pvf; else continue; - return sqs + nic->num_vf_en; + return sqs; } return -1; } /* Allocate additional Qsets for requested VF */ -static void nic_alloc_sqs(struct nicpf *nic, struct sqs_alloc *sqs) +static void nic_alloc_sqs(struct nicpf *nic, u8 pvf, struct sqs_alloc *sqs) { union nic_mbx mbx = {}; int idx, alloc_qs = 0; int sqs_id; - if (!nic->num_sqs_en) + if (!nic->num_sqs_en || (sqs->qs_count > MAX_SQS_PER_VF)) goto send_mbox; - for (idx = 0; idx < sqs->qs_count; idx++) { - sqs_id = nic_nxt_avail_sqs(nic); - if (sqs_id < 0) - break; - nic->vf_sqs[sqs->vf_id][idx] = sqs_id; - nic->pqs_vf[sqs_id] = sqs->vf_id; - alloc_qs++; + if (sqs->spec) { + for (idx = 0; idx < sqs->qs_count; idx++) { + sqs_id = sqs->svf[idx]; + + /* Check if desired SQS is within the allowed range */ + if (!((sqs_id >= nic->num_vf_en) && + (sqs_id < (nic->num_vf_en + nic->num_sqs_en)))) { + dev_err(&nic->pdev->dev, + "Req SQS is invalid sqs->svf[%d]=%u", + idx, sqs_id); + break; + } + + /* Check if desired SQS is free or assigned to a PVF */ + if ((nic->pqs_vf[sqs_id] != NIC_VF_UNASSIGNED) && + (nic->pqs_vf[sqs_id] != pvf)) { + dev_err(&nic->pdev->dev, + "SQS%d is already allocated to VF%u", + sqs_id, nic->pqs_vf[sqs_id]); + break; + } + + /* Check if SQS is bound to the same driver as PVF */ + if (!nic_check_svf_drv(nic, pvf, sqs_id)) { + dev_err(&nic->pdev->dev, + "SQS%d use different driver", sqs_id); + break; + } + } + + if (idx != sqs->qs_count) + goto send_mbox; + + /* Clear any existing assignments */ + for (idx = 0; idx < MAX_SQS_PER_VF; idx++) + nic->vf_sqs[pvf][idx] = NIC_VF_UNASSIGNED; + for (idx = nic->num_vf_en; + idx < (nic->num_vf_en + nic->num_sqs_en); idx++) { + if (nic->pqs_vf[idx] == pvf) + nic->pqs_vf[idx] = NIC_VF_UNASSIGNED; + } + + /* Populate VF's SQS table */ + for (idx = 0; idx < sqs->qs_count; idx++) { + sqs_id = sqs->svf[idx]; + nic->vf_sqs[pvf][idx] = sqs_id; + nic->pqs_vf[sqs_id] = pvf; + mbx.sqs_alloc.svf[idx] = sqs_id; + } + alloc_qs = idx; + } else { + for (idx = 0; idx < sqs->qs_count; idx++) { + sqs_id = nic_nxt_avail_sqs(nic, pvf); + if (sqs_id < 0) + break; + nic->vf_sqs[pvf][idx] = sqs_id; + nic->pqs_vf[sqs_id] = pvf; + mbx.sqs_alloc.svf[idx] = sqs_id; + } + alloc_qs = idx; } send_mbox: mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS; - mbx.sqs_alloc.vf_id = sqs->vf_id; mbx.sqs_alloc.qs_count = alloc_qs; - nic_send_msg_to_vf(nic, sqs->vf_id, &mbx); + nic_send_msg_to_vf(nic, pvf, &mbx); } static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk) @@ -776,13 +902,15 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) goto unlock; case NIC_MBOX_MSG_SHUTDOWN: /* First msg in VF teardown sequence */ - if (vf >= nic->num_vf_en) - nic->sqs_used[vf - nic->num_vf_en] = false; - nic->pqs_vf[vf] = 0; + if (vf < nic->num_vf_en) { + for (i = 0; i < MAX_SQS_PER_VF; i++) + nic->vf_sqs[vf][i] = NIC_VF_UNASSIGNED; + } + nic->pqs_vf[vf] = NIC_VF_UNASSIGNED; nic_enable_vf(nic, vf, false); break; case NIC_MBOX_MSG_ALLOC_SQS: - nic_alloc_sqs(nic, &mbx.sqs_alloc); + nic_alloc_sqs(nic, vf, &mbx.sqs_alloc); goto unlock; case NIC_MBOX_MSG_NICVF_PTR: nic->nicvf[vf] = mbx.nicvf.nicvf; @@ -979,6 +1107,10 @@ static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic) return err; } +#ifdef CONFIG_PCI_IOV + nic_get_vf_pdev(nic, vf_en); +#endif + dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n", vf_en); @@ -1035,7 +1167,7 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct device *dev = &pdev->dev; struct nicpf *nic; - int err; + int err, vf, sqs; BUILD_BUG_ON(sizeof(union nic_mbx) > 16); @@ -1090,6 +1222,13 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Set RSS TBL size for each VF */ nic->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE; + /* Initialize all VF's primary Qset */ + for (vf = 0; vf < MAX_NUM_VFS_SUPPORTED; vf++) { + nic->pqs_vf[vf] = NIC_VF_UNASSIGNED; + for (sqs = 0; sqs < MAX_SQS_PER_VF; sqs++) + nic->vf_sqs[vf][sqs] = NIC_VF_UNASSIGNED; + } + /* Register interrupts */ err = nic_register_interrupts(nic); if (err) @@ -1114,8 +1253,10 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_disable_sriov: - if (nic->flags & NIC_SRIOV_ENABLED) + if (nic->flags & NIC_SRIOV_ENABLED) { + nic_put_vf_pdev(nic); pci_disable_sriov(pdev); + } err_unregister_interrupts: nic_unregister_interrupts(nic); err_release_regions: @@ -1130,8 +1271,10 @@ static void nic_remove(struct pci_dev *pdev) { struct nicpf *nic = pci_get_drvdata(pdev); - if (nic->flags & NIC_SRIOV_ENABLED) + if (nic->flags & NIC_SRIOV_ENABLED) { + nic_put_vf_pdev(nic); pci_disable_sriov(pdev); + } if (nic->check_link) { /* Destroy work Queue */ diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index bfee298..87d0f56 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -386,7 +386,7 @@ static void nicvf_request_sqs(struct nicvf *nic) return; mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS; - mbx.sqs_alloc.vf_id = nic->vf_id; + mbx.sqs_alloc.spec = 0; /* Let PF choose which SQS to alloc */ mbx.sqs_alloc.qs_count = nic->sqs_count; if (nicvf_send_msg_to_pf(nic, &mbx)) { /* No response from PF */ -- 1.7.1