From: Corentin Labbe Subject: Re: [PATCH v3 1/3] drivers: crypto: Add Support for Octeon-tx CPT Engine Date: Wed, 21 Dec 2016 14:23:47 +0100 Message-ID: <20161221132347.GA21051@Red> References: <1482321373-407-1-git-send-email-george.cherian@cavium.com> <1482321373-407-2-git-send-email-george.cherian@cavium.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: herbert@gondor.apana.org.au, davem@davemloft.net, linux-kernel@vger.kernel.org, linux-crypto@vger.kernel.org, david.daney@cavium.com To: george.cherian@cavium.com Return-path: Content-Disposition: inline In-Reply-To: <1482321373-407-2-git-send-email-george.cherian@cavium.com> Sender: linux-kernel-owner@vger.kernel.org List-Id: linux-crypto.vger.kernel.org Hello I have some comment inline On Wed, Dec 21, 2016 at 11:56:11AM +0000, george.cherian@cavium.com wrote: > From: George Cherian > > Enable the Physical Function diver for the Cavium Crypto Engine (CPT) typo driver > found in Octeon-tx series of SoC's. CPT is the Cryptographic Acceleration > Unit. CPT includes microcoded GigaCypher symmetric engines (SEs) and > asymmetric engines (AEs). > > Signed-off-by: George Cherian > Reviewed-by: David Daney > --- > drivers/crypto/cavium/cpt/Kconfig | 16 + > drivers/crypto/cavium/cpt/Makefile | 2 + > drivers/crypto/cavium/cpt/cpt_common.h | 158 +++++++ > drivers/crypto/cavium/cpt/cpt_hw_types.h | 658 +++++++++++++++++++++++++++++ > drivers/crypto/cavium/cpt/cptpf.h | 69 +++ > drivers/crypto/cavium/cpt/cptpf_main.c | 703 +++++++++++++++++++++++++++++++ > drivers/crypto/cavium/cpt/cptpf_mbox.c | 163 +++++++ > 7 files changed, 1769 insertions(+) > create mode 100644 drivers/crypto/cavium/cpt/Kconfig > create mode 100644 drivers/crypto/cavium/cpt/Makefile > create mode 100644 drivers/crypto/cavium/cpt/cpt_common.h > create mode 100644 drivers/crypto/cavium/cpt/cpt_hw_types.h > create mode 100644 drivers/crypto/cavium/cpt/cptpf.h > create mode 100644 drivers/crypto/cavium/cpt/cptpf_main.c > create mode 100644 drivers/crypto/cavium/cpt/cptpf_mbox.c > > diff --git a/drivers/crypto/cavium/cpt/Kconfig b/drivers/crypto/cavium/cpt/Kconfig > new file mode 100644 > index 0000000..247f1cb > --- /dev/null > +++ b/drivers/crypto/cavium/cpt/Kconfig > @@ -0,0 +1,16 @@ > +# > +# Cavium crypto device configuration > +# > + > +config CRYPTO_DEV_CPT > + tristate > + > +config CAVIUM_CPT > + tristate "Cavium Cryptographic Accelerator driver" > + depends on ARCH_THUNDER > + select CRYPTO_DEV_CPT Could you add some COMPILE_TEST ? [...] > +struct microcode { > + u8 is_mc_valid; > + u8 is_ae; > + u8 group; > + u8 num_cores; > + u32 code_size; > + u64 core_mask; > + u8 version[32]; I see this "32" in some other place, perhaps you could use a define [...] > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include Header need to be sorted [...] > +static void cpt_disable_cores(struct cpt_device *cpt, u64 coremask, > + u8 type, u8 grp) > +{ > + u64 pf_exe_ctl; > + u32 timeout = 0xFFFFFFFF; > + u64 grpmask = 0; > + struct device *dev = &cpt->pdev->dev; > + > + if (type == AE_TYPES) > + coremask = (coremask << cpt->max_se_cores); > + > + /* Disengage the cores from groups */ > + grpmask = cpt_read_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp)); > + cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp), > + (grpmask & ~coremask)); > + udelay(CSR_DELAY); > + grp = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXEC_BUSY(0)); > + while (grp & coremask) { > + dev_err(dev, "Cores still busy %llx", coremask); > + grp = cpt_read_csr64(cpt->reg_base, > + CPTX_PF_EXEC_BUSY(0)); > + if (timeout--) > + break; The timeout seems enormous and you will flooding syslog with dev_err() > + } > + > + /* Disable the cores */ > + pf_exe_ctl = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0)); > + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), > + (pf_exe_ctl & ~coremask)); > + udelay(CSR_DELAY); > +} > + > +/* > + * Enable cores specified by coremask > + */ > +static void cpt_enable_cores(struct cpt_device *cpt, u64 coremask, > + u8 type) > +{ > + u64 pf_exe_ctl; > + > + if (type == AE_TYPES) > + coremask = (coremask << cpt->max_se_cores); > + > + pf_exe_ctl = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0)); > + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), > + (pf_exe_ctl | coremask)); > + udelay(CSR_DELAY); > +} > + > +static void cpt_configure_group(struct cpt_device *cpt, u8 grp, > + u64 coremask, u8 type) > +{ > + u64 pf_gx_en = 0; > + > + if (type == AE_TYPES) > + coremask = (coremask << cpt->max_se_cores); > + > + pf_gx_en = cpt_read_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp)); > + cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp), > + (pf_gx_en | coremask)); > + udelay(CSR_DELAY); > +} > + > +static void cpt_disable_mbox_interrupts(struct cpt_device *cpt) > +{ > + /* Clear mbox(0) interupts for all vfs */ > + cpt_write_csr64(cpt->reg_base, CPTX_PF_MBOX_ENA_W1CX(0, 0), ~0ull); > +} > + > +static void cpt_disable_ecc_interrupts(struct cpt_device *cpt) > +{ > + /* Clear ecc(0) interupts for all vfs */ > + cpt_write_csr64(cpt->reg_base, CPTX_PF_ECC0_ENA_W1C(0), ~0ull); > +} > + > +static void cpt_disable_exec_interrupts(struct cpt_device *cpt) > +{ > + /* Clear exec interupts for all vfs */ > + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXEC_ENA_W1C(0), ~0ull); > +} > + > +static void cpt_disable_all_interrupts(struct cpt_device *cpt) > +{ > + cpt_disable_mbox_interrupts(cpt); > + cpt_disable_ecc_interrupts(cpt); > + cpt_disable_exec_interrupts(cpt); > +} > + > +static void cpt_enable_mbox_interrupts(struct cpt_device *cpt) > +{ > + /* Set mbox(0) interupts for all vfs */ > + cpt_write_csr64(cpt->reg_base, CPTX_PF_MBOX_ENA_W1SX(0, 0), ~0ull); > +} > + > +static int cpt_load_microcode(struct cpt_device *cpt, struct microcode *mcode) > +{ > + int ret = 0, core = 0, shift = 0; > + u32 total_cores = 0; > + struct device *dev = &cpt->pdev->dev; > + > + if (!mcode || !mcode->code) { > + dev_err(dev, "Either the mcode is null or data is NULL\n"); > + return 1; This is not a standard error code > + } > + > + if (mcode->code_size == 0) { > + dev_err(dev, "microcode size is 0\n"); > + return 1; the same > + } > + > + /* Assumes 0-9 are SE cores for UCODE_BASE registers and > + * AE core bases follow > + */ > + if (mcode->is_ae) { > + core = CPT_MAX_SE_CORES; /* start couting from 10 */ > + total_cores = CPT_MAX_TOTAL_CORES; /* upto 15 */ > + } else { > + core = 0; /* start couting from 0 */ > + total_cores = CPT_MAX_SE_CORES; /* upto 9 */ > + } > + > + /* Point to microcode for each core of the group */ > + for (; core < total_cores ; core++, shift++) { > + if (mcode->core_mask & (1 << shift)) { > + cpt_write_csr64(cpt->reg_base, > + CPTX_PF_ENGX_UCODE_BASE(0, core), > + (u64)mcode->phys_base); > + } > + } > + return ret; > +} > + > +static int do_cpt_init(struct cpt_device *cpt, struct microcode *mcode) > +{ > + int ret = 0; > + struct device *dev = &cpt->pdev->dev; > + > + /* Make device not ready */ > + cpt->flags &= ~CPT_FLAG_DEVICE_READY; > + /* Disable All PF interrupts */ > + cpt_disable_all_interrupts(cpt); > + /* Calculate mcode group and coremasks */ > + if (mcode->is_ae) { > + if (mcode->num_cores > cpt->max_ae_cores) { > + dev_err(dev, "Requested for more cores than available AE cores\n"); > + ret = -1; This is not a standard error code > + goto cpt_init_fail; > + } > + > + if (cpt->next_group >= CPT_MAX_CORE_GROUPS) { > + dev_err(dev, "Can't load, all eight microcode groups in use"); > + return -ENFILE; > + } > + > + mcode->group = cpt->next_group; > + /* Convert requested cores to mask */ > + mcode->core_mask = GENMASK(mcode->num_cores, 0); > + cpt_disable_cores(cpt, mcode->core_mask, AE_TYPES, > + mcode->group); > + /* Load microcode for AE engines */ > + if (cpt_load_microcode(cpt, mcode)) { > + dev_err(dev, "Microcode load Failed for %s\n", > + mcode->version); > + ret = -1; again and you loose the error code given by cpt_load_microcode > + goto cpt_init_fail; > + } > + cpt->next_group++; > + /* Configure group mask for the mcode */ > + cpt_configure_group(cpt, mcode->group, mcode->core_mask, > + AE_TYPES); > + /* Enable AE cores for the group mask */ > + cpt_enable_cores(cpt, mcode->core_mask, AE_TYPES); > + } else { > + if (mcode->num_cores > cpt->max_se_cores) { > + dev_err(dev, "Requested for more cores than available SE cores\n"); > + ret = -1; Again > + goto cpt_init_fail; > + } > + if (cpt->next_group >= CPT_MAX_CORE_GROUPS) { > + dev_err(dev, "Can't load, all eight microcode groups in use"); > + return -ENFILE; > + } > + > + mcode->group = cpt->next_group; > + /* Covert requested cores to mask */ > + mcode->core_mask = GENMASK(mcode->num_cores, 0); > + cpt_disable_cores(cpt, mcode->core_mask, SE_TYPES, > + mcode->group); > + /* Load microcode for SE engines */ > + if (cpt_load_microcode(cpt, mcode)) { > + dev_err(dev, "Microcode load Failed for %s\n", > + mcode->version); > + ret = -1; Again > + goto cpt_init_fail; > + } > + cpt->next_group++; > + /* Configure group mask for the mcode */ > + cpt_configure_group(cpt, mcode->group, mcode->core_mask, > + SE_TYPES); > + /* Enable SE cores for the group mask */ > + cpt_enable_cores(cpt, mcode->core_mask, SE_TYPES); > + } > + > + /* Enabled PF mailbox interrupts */ > + cpt_enable_mbox_interrupts(cpt); > + cpt->flags |= CPT_FLAG_DEVICE_READY; > + > + return ret; > + > +cpt_init_fail: > + /* Enabled PF mailbox interrupts */ > + cpt_enable_mbox_interrupts(cpt); > + > + return ret; > +} > + > +struct ucode_header { > + u8 version[32]; > + u32 code_length; > + u32 data_length; > + u64 sram_address; > +}; > + > +static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae) > +{ > + const struct firmware *fw_entry; > + struct device *dev = &cpt->pdev->dev; > + struct ucode_header *ucode; > + struct microcode *mcode; > + int j, ret = 0; > + > + ret = request_firmware(&fw_entry, fw, dev); > + if (ret) > + return ret; I think you could also check for a minimal firmware size [...] > +static void cpt_disable_all_cores(struct cpt_device *cpt) > +{ > + u32 grp, timeout = 0xFFFFFFFF; > + struct device *dev = &cpt->pdev->dev; > + > + /* Disengage the cores from groups */ > + for (grp = 0; grp < CPT_MAX_CORE_GROUPS; grp++) { > + cpt_write_csr64(cpt->reg_base, CPTX_PF_GX_EN(0, grp), 0); > + udelay(CSR_DELAY); > + } > + > + grp = cpt_read_csr64(cpt->reg_base, CPTX_PF_EXEC_BUSY(0)); > + while (grp) { > + dev_err(dev, "Cores still busy"); > + grp = cpt_read_csr64(cpt->reg_base, > + CPTX_PF_EXEC_BUSY(0)); > + if (timeout--) > + break; > + } Same problem than cpt_disable_cores > + /* Disable the cores */ > + cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), 0); > +} > + > +/** > + * Ensure all cores are disenganed from all groups by typo engaged > + * calling cpt_disable_all_cores() before calling this > + * function. > + */ > +static void cpt_unload_microcode(struct cpt_device *cpt) > +{ > + u32 grp = 0, core; > + > + /* Free microcode bases and reset group masks */ > + for (grp = 0; grp < CPT_MAX_CORE_GROUPS; grp++) { > + struct microcode *mcode = &cpt->mcode[grp]; > + > + if (cpt->mcode[grp].code) > + dma_free_coherent(&cpt->pdev->dev, mcode->code_size, > + mcode->code, mcode->phys_base); > + mcode->code = NULL; > + //mcode->base = NULL; This is not a standard comment > + } > + /* Clear UCODE_BASE registers for all engines */ > + for (core = 0; core < CPT_MAX_TOTAL_CORES; core++) > + cpt_write_csr64(cpt->reg_base, > + CPTX_PF_ENGX_UCODE_BASE(0, core), 0ull); > +} > + > +static int cpt_device_init(struct cpt_device *cpt) > +{ > + u64 bist; > + struct device *dev = &cpt->pdev->dev; > + > + /* Reset the PF when probed first */ > + cpt_reset(cpt); > + mdelay((100)); double parenthesis > + > + /*Check BIST status*/ > + bist = (u64)cpt_check_bist_status(cpt); > + if (bist) { > + dev_err(dev, "RAM BIST failed with code 0x%llx", bist); > + return -ENODEV; > + } > + > + bist = cpt_check_exe_bist_status(cpt); > + if (bist) { > + dev_err(dev, "Engine BIST failed with code 0x%llx", bist); > + return -ENODEV; > + } > + > + /*Get CLK frequency*/ > + /*Get max enabled cores */ > + cpt_find_max_enabled_cores(cpt); > + /*Disable all cores*/ > + cpt_disable_all_cores(cpt); > + /*Reset device parameters*/ > + cpt->next_mc_idx = 0; > + cpt->next_group = 0; > + /* PF is ready */ > + cpt->flags |= CPT_FLAG_DEVICE_READY; > + > + return 0; > +} > + > +static int cpt_register_interrupts(struct cpt_device *cpt) > +{ > + int ret; > + struct device *dev = &cpt->pdev->dev; > + > + /* Enable MSI-X */ > + ret = cpt_enable_msix(cpt); > + if (ret) > + return ret; > + > + /* Register mailbox interrupt handlers */ > + ret = request_irq(cpt->msix_entries[CPT_PF_INT_VEC_E_MBOXX(0)].vector, > + cpt_mbx0_intr_handler, 0, "CPT Mbox0", cpt); > + if (ret) > + goto fail; > + > + cpt->irq_allocated[CPT_PF_INT_VEC_E_MBOXX(0)] = true; > + > + /* Enable mailbox interrupt */ > + cpt_enable_mbox_interrupts(cpt); > + return 0; > + > +fail: > + dev_err(dev, "Request irq failed\n"); > + cpt_free_all_interrupts(cpt); > + return ret; > +} > + > +static void cpt_unregister_interrupts(struct cpt_device *cpt) > +{ > + cpt_free_all_interrupts(cpt); > + cpt_disable_msix(cpt); > +} > + > +static int cpt_sriov_init(struct cpt_device *cpt, int num_vfs) > +{ > + int pos = 0; > + int err; > + u16 total_vf_cnt; > + struct pci_dev *pdev = cpt->pdev; > + > + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); > + if (!pos) { > + dev_err(&pdev->dev, "SRIOV capability is not found in PCIe config space\n"); > + return -ENODEV; > + } > + > + cpt->num_vf_en = num_vfs; /* User requested VFs */ > + pci_read_config_word(pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf_cnt); > + if (total_vf_cnt < cpt->num_vf_en) > + cpt->num_vf_en = total_vf_cnt; > + > + if (!total_vf_cnt) > + return 0; > + > + /*Enabled the available VFs */ > + err = pci_enable_sriov(pdev, cpt->num_vf_en); > + if (err) { > + dev_err(&pdev->dev, "SRIOV enable failed, num VF is %d\n", > + cpt->num_vf_en); > + cpt->num_vf_en = 0; > + return err; > + } > + > + /* TODO: Optionally enable static VQ priorities feature */ > + > + dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n", > + cpt->num_vf_en); > + > + cpt->flags |= CPT_FLAG_SRIOV_ENABLED; > + > + return 0; > +} > + > +static int cpt_probe(struct pci_dev *pdev, const struct pci_device_id *ent) > +{ > + struct device *dev = &pdev->dev; > + struct cpt_device *cpt; > + int err; > + > + if (num_vfs > 16) { > + pr_warn("Invalid vf count %d, Resetting it to 4(default)\n", > + num_vfs); Why not using dev_warn ? > + num_vfs = 4; > + } > + > + cpt = devm_kzalloc(dev, sizeof(struct cpt_device), GFP_KERNEL); Use sizeof(*cpt) like checkpatch will said. [...] > +static void cpt_shutdown(struct pci_dev *pdev) > +{ > + struct cpt_device *cpt = pci_get_drvdata(pdev); > + > + if (!cpt) > + return; > + > + dev_info(&pdev->dev, "Shutdown device %x:%x.\n", > + (u32)pdev->vendor, (u32)pdev->device); > + > + cpt_unregister_interrupts(cpt); > + pci_release_regions(pdev); > + pci_disable_device(pdev); > + pci_set_drvdata(pdev, NULL); > + kzfree(cpt); since cpt is allocated with devm_, this kzfree is unnecessary Thanks Regards Corentin Labbe