2017-10-30 13:46:54

by Horia Geantă

[permalink] [raw]
Subject: [PATCH RESEND 4/4] dma: caam: add dma memcpy driver

From: Radu Alexe <[email protected]>

This module introduces a memcpy DMA driver based on the DMA capabilities
of the CAAM hardware block. CAAM DMA is a platform driver that is only
probed if the device is defined in the device tree. The driver creates
a DMA channel for each JR of the CAAM. This introduces a dependency on
the JR driver. Therefore a defering mechanism was used to ensure that
the CAAM DMA driver is probed only after the JR driver.

Signed-off-by: Radu Alexe <[email protected]>
Signed-off-by: Tudor Ambarus <[email protected]>
Signed-off-by: Rajiv Vishwakarma <[email protected]>
---
drivers/dma/Kconfig | 17 ++
drivers/dma/Makefile | 1 +
drivers/dma/caam_dma.c | 444 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 462 insertions(+)
create mode 100644 drivers/dma/caam_dma.c

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index fadc4d8783bd..0df48307dac1 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -600,6 +600,23 @@ config ZX_DMA
help
Support the DMA engine for ZTE ZX family platform devices.

+config CRYPTO_DEV_FSL_CAAM_DMA
+ tristate "CAAM DMA engine support"
+ depends on CRYPTO_DEV_FSL_CAAM_JR
+ default y
+ select DMA_ENGINE
+ select ASYNC_CORE
+ select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ help
+ Selecting this will offload the DMA operations for users of
+ the scatter gather memcopy API to the CAAM via job rings. The
+ CAAM is a hardware module that provides hardware acceleration to
+ cryptographic operations. It has a built-in DMA controller that can
+ be programmed to read/write cryptographic data. This module defines
+ a DMA driver that uses the DMA capabilities of the CAAM.
+
+ To compile this as a module, choose M here: the module
+ will be called caam_dma.

# driver files
source "drivers/dma/bestcomm/Kconfig"
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index f08f8de1b567..37563454d624 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_TI_EDMA) += edma.o
obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
obj-$(CONFIG_ZX_DMA) += zx_dma.o
obj-$(CONFIG_ST_FDMA) += st_fdma.o
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_DMA) += caam_dma.o

obj-y += qcom/
obj-y += xilinx/
diff --git a/drivers/dma/caam_dma.c b/drivers/dma/caam_dma.c
new file mode 100644
index 000000000000..dfd5409864b0
--- /dev/null
+++ b/drivers/dma/caam_dma.c
@@ -0,0 +1,444 @@
+/*
+ * caam support for SG DMA
+ *
+ * Copyright 2016 Freescale Semiconductor, Inc
+ * Copyright 2017 NXP
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+
+#include <linux/dmaengine.h>
+#include "dmaengine.h"
+
+#include "../crypto/caam/regs.h"
+#include "../crypto/caam/jr.h"
+#include "../crypto/caam/error.h"
+#include "../crypto/caam/intern.h"
+#include "../crypto/caam/desc_constr.h"
+
+#define DESC_DMA_MEMCPY_LEN ((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) / \
+ CAAM_CMD_SZ)
+
+/* This is max chunk size of a DMA transfer. If a buffer is larger than this
+ * value it is internally broken into chunks of max CAAM_DMA_CHUNK_SIZE bytes
+ * and for each chunk a DMA transfer request is issued.
+ * This value is the largest number on 16 bits that is a multiple of 256 bytes
+ * (the largest configurable CAAM DMA burst size).
+ */
+#define CAAM_DMA_CHUNK_SIZE 65280
+
+struct caam_dma_sh_desc {
+ u32 desc[DESC_DMA_MEMCPY_LEN] ____cacheline_aligned;
+ dma_addr_t desc_dma;
+};
+
+/* caam dma extended descriptor */
+struct caam_dma_edesc {
+ struct dma_async_tx_descriptor async_tx;
+ struct list_head node;
+ struct caam_dma_ctx *ctx;
+ dma_addr_t src_dma;
+ dma_addr_t dst_dma;
+ unsigned int src_len;
+ unsigned int dst_len;
+ u32 jd[] ____cacheline_aligned;
+};
+
+/*
+ * caam_dma_ctx - per jr/channel context
+ * @chan: dma channel used by async_tx API
+ * @node: list_head used to attach to the global dma_ctx_list
+ * @jrdev: Job Ring device
+ * @submit_q: queue of pending (submitted, but not enqueued) jobs
+ * @done_not_acked: jobs that have been completed by jr, but maybe not acked
+ * @edesc_lock: protects extended descriptor
+ */
+struct caam_dma_ctx {
+ struct dma_chan chan;
+ struct list_head node;
+ struct device *jrdev;
+ struct list_head submit_q;
+ struct list_head done_not_acked;
+ spinlock_t edesc_lock;
+};
+
+static struct dma_device *dma_dev;
+static struct caam_dma_sh_desc *dma_sh_desc;
+static LIST_HEAD(dma_ctx_list);
+
+static dma_cookie_t caam_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct caam_dma_edesc *edesc = NULL;
+ struct caam_dma_ctx *ctx = NULL;
+ dma_cookie_t cookie;
+
+ edesc = container_of(tx, struct caam_dma_edesc, async_tx);
+ ctx = container_of(tx->chan, struct caam_dma_ctx, chan);
+
+ spin_lock_bh(&ctx->edesc_lock);
+
+ cookie = dma_cookie_assign(tx);
+ list_add_tail(&edesc->node, &ctx->submit_q);
+
+ spin_unlock_bh(&ctx->edesc_lock);
+
+ return cookie;
+}
+
+static void caam_jr_chan_free_edesc(struct caam_dma_edesc *edesc)
+{
+ struct caam_dma_ctx *ctx = edesc->ctx;
+ struct caam_dma_edesc *_edesc = NULL;
+
+ spin_lock_bh(&ctx->edesc_lock);
+
+ list_add_tail(&edesc->node, &ctx->done_not_acked);
+ list_for_each_entry_safe(edesc, _edesc, &ctx->done_not_acked, node) {
+ if (async_tx_test_ack(&edesc->async_tx)) {
+ list_del(&edesc->node);
+ kfree(edesc);
+ }
+ }
+
+ spin_unlock_bh(&ctx->edesc_lock);
+}
+
+static void caam_dma_done(struct device *dev, u32 *hwdesc, u32 err,
+ void *context)
+{
+ struct caam_dma_edesc *edesc = context;
+ struct caam_dma_ctx *ctx = edesc->ctx;
+ dma_async_tx_callback callback;
+ void *callback_param;
+
+ if (err)
+ caam_jr_strstatus(ctx->jrdev, err);
+
+ dma_run_dependencies(&edesc->async_tx);
+
+ spin_lock_bh(&ctx->edesc_lock);
+ dma_cookie_complete(&edesc->async_tx);
+ spin_unlock_bh(&ctx->edesc_lock);
+
+ callback = edesc->async_tx.callback;
+ callback_param = edesc->async_tx.callback_param;
+
+ dma_descriptor_unmap(&edesc->async_tx);
+
+ caam_jr_chan_free_edesc(edesc);
+
+ if (callback)
+ callback(callback_param);
+}
+
+static void caam_dma_memcpy_init_job_desc(struct caam_dma_edesc *edesc)
+{
+ u32 *jd = edesc->jd;
+ u32 *sh_desc = dma_sh_desc->desc;
+ dma_addr_t desc_dma = dma_sh_desc->desc_dma;
+
+ /* init the job descriptor */
+ init_job_desc_shared(jd, desc_dma, desc_len(sh_desc), HDR_REVERSE);
+
+ /* set SEQIN PTR */
+ append_seq_in_ptr(jd, edesc->src_dma, edesc->src_len, 0);
+
+ /* set SEQOUT PTR */
+ append_seq_out_ptr(jd, edesc->dst_dma, edesc->dst_len, 0);
+
+#ifdef DEBUG
+ print_hex_dump(KERN_ERR, "caam dma desc@" __stringify(__LINE__) ": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, jd, desc_bytes(jd), 1);
+#endif
+}
+
+static struct dma_async_tx_descriptor *
+caam_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct caam_dma_edesc *edesc;
+ struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx,
+ chan);
+
+ edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN, GFP_DMA | GFP_NOWAIT);
+ if (!edesc)
+ return ERR_PTR(-ENOMEM);
+
+ dma_async_tx_descriptor_init(&edesc->async_tx, chan);
+ edesc->async_tx.tx_submit = caam_dma_tx_submit;
+ edesc->async_tx.flags = flags;
+ edesc->async_tx.cookie = -EBUSY;
+
+ edesc->src_dma = src;
+ edesc->src_len = len;
+ edesc->dst_dma = dst;
+ edesc->dst_len = len;
+ edesc->ctx = ctx;
+
+ caam_dma_memcpy_init_job_desc(edesc);
+
+ return &edesc->async_tx;
+}
+
+/* This function can be called in an interrupt context */
+static void caam_dma_issue_pending(struct dma_chan *chan)
+{
+ struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx,
+ chan);
+ struct caam_dma_edesc *edesc, *_edesc;
+
+ spin_lock_bh(&ctx->edesc_lock);
+ list_for_each_entry_safe(edesc, _edesc, &ctx->submit_q, node) {
+ if (caam_jr_enqueue(ctx->jrdev, edesc->jd,
+ caam_dma_done, edesc) < 0)
+ break;
+ list_del(&edesc->node);
+ }
+ spin_unlock_bh(&ctx->edesc_lock);
+}
+
+static void caam_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx,
+ chan);
+ struct caam_dma_edesc *edesc, *_edesc;
+
+ spin_lock_bh(&ctx->edesc_lock);
+ list_for_each_entry_safe(edesc, _edesc, &ctx->submit_q, node) {
+ list_del(&edesc->node);
+ kfree(edesc);
+ }
+ list_for_each_entry_safe(edesc, _edesc, &ctx->done_not_acked, node) {
+ list_del(&edesc->node);
+ kfree(edesc);
+ }
+ spin_unlock_bh(&ctx->edesc_lock);
+}
+
+static int caam_dma_jr_chan_bind(void)
+{
+ struct device *jrdev;
+ struct caam_dma_ctx *ctx;
+ int bonds = 0;
+ int i;
+
+ for (i = 0; i < caam_jr_driver_probed(); i++) {
+ jrdev = caam_jridx_alloc(i);
+ if (IS_ERR(jrdev)) {
+ pr_err("job ring device %d allocation failed\n", i);
+ continue;
+ }
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx) {
+ caam_jr_free(jrdev);
+ continue;
+ }
+
+ ctx->chan.device = dma_dev;
+ ctx->chan.private = ctx;
+
+ ctx->jrdev = jrdev;
+
+ INIT_LIST_HEAD(&ctx->submit_q);
+ INIT_LIST_HEAD(&ctx->done_not_acked);
+ INIT_LIST_HEAD(&ctx->node);
+ spin_lock_init(&ctx->edesc_lock);
+
+ dma_cookie_init(&ctx->chan);
+
+ /* add the context of this channel to the context list */
+ list_add_tail(&ctx->node, &dma_ctx_list);
+
+ /* add this channel to the device chan list */
+ list_add_tail(&ctx->chan.device_node, &dma_dev->channels);
+
+ bonds++;
+ }
+
+ return bonds;
+}
+
+static inline void caam_jr_dma_free(struct dma_chan *chan)
+{
+ struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx,
+ chan);
+
+ list_del(&ctx->node);
+ list_del(&chan->device_node);
+ caam_jr_free(ctx->jrdev);
+ kfree(ctx);
+}
+
+static void set_caam_dma_desc(u32 *desc)
+{
+ u32 *jmp_cmd;
+
+ /* dma shared descriptor */
+ init_sh_desc(desc, HDR_SHARE_NEVER | (1 << HDR_START_IDX_SHIFT));
+
+ /* REG1 = CAAM_DMA_CHUNK_SIZE */
+ append_math_add_imm_u32(desc, REG1, ZERO, IMM, CAAM_DMA_CHUNK_SIZE);
+
+ /* REG0 = SEQINLEN - CAAM_DMA_CHUNK_SIZE */
+ append_math_sub_imm_u32(desc, REG0, SEQINLEN, IMM, CAAM_DMA_CHUNK_SIZE);
+
+ /* if (REG0 > 0)
+ * jmp to LABEL1
+ */
+ jmp_cmd = append_jump(desc, JUMP_TEST_INVALL | JUMP_COND_MATH_N |
+ JUMP_COND_MATH_Z);
+
+ /* REG1 = SEQINLEN */
+ append_math_sub(desc, REG1, SEQINLEN, ZERO, CAAM_CMD_SZ);
+
+ /* LABEL1 */
+ set_jump_tgt_here(desc, jmp_cmd);
+
+ /* VARSEQINLEN = REG1 */
+ append_math_add(desc, VARSEQINLEN, REG1, ZERO, CAAM_CMD_SZ);
+
+ /* VARSEQOUTLEN = REG1 */
+ append_math_add(desc, VARSEQOUTLEN, REG1, ZERO, CAAM_CMD_SZ);
+
+ /* do FIFO STORE */
+ append_seq_fifo_store(desc, 0, FIFOST_TYPE_METADATA | LDST_VLF);
+
+ /* do FIFO LOAD */
+ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 |
+ FIFOLD_TYPE_IFIFO | LDST_VLF);
+
+ /* if (REG0 > 0)
+ * jmp 0xF8 (after shared desc header)
+ */
+ append_jump(desc, JUMP_TEST_INVALL | JUMP_COND_MATH_N |
+ JUMP_COND_MATH_Z | 0xF8);
+
+#ifdef DEBUG
+ print_hex_dump(KERN_ERR, "caam dma shdesc@" __stringify(__LINE__) ": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+
+static int __init caam_dma_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device *ctrldev = dev->parent;
+ struct dma_chan *chan, *_chan;
+ u32 *sh_desc;
+ int err = -ENOMEM;
+ int bonds;
+
+ if (!caam_jr_driver_probed()) {
+ dev_info(dev, "Defer probing after JR driver probing\n");
+ return -EPROBE_DEFER;
+ }
+
+ dma_dev = kzalloc(sizeof(*dma_dev), GFP_KERNEL);
+ if (!dma_dev)
+ return -ENOMEM;
+
+ dma_sh_desc = kzalloc(sizeof(*dma_sh_desc), GFP_KERNEL | GFP_DMA);
+ if (!dma_sh_desc)
+ goto desc_err;
+
+ sh_desc = dma_sh_desc->desc;
+ set_caam_dma_desc(sh_desc);
+ dma_sh_desc->desc_dma = dma_map_single(ctrldev, sh_desc,
+ desc_bytes(sh_desc),
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(ctrldev, dma_sh_desc->desc_dma)) {
+ dev_err(dev, "unable to map dma descriptor\n");
+ goto map_err;
+ }
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ bonds = caam_dma_jr_chan_bind();
+ if (!bonds) {
+ err = -ENODEV;
+ goto jr_bind_err;
+ }
+
+ dma_dev->dev = dev;
+ dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+ dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+ dma_dev->device_tx_status = dma_cookie_status;
+ dma_dev->device_issue_pending = caam_dma_issue_pending;
+ dma_dev->device_prep_dma_memcpy = caam_dma_prep_memcpy;
+ dma_dev->device_free_chan_resources = caam_dma_free_chan_resources;
+
+ err = dma_async_device_register(dma_dev);
+ if (err) {
+ dev_err(dev, "Failed to register CAAM DMA engine\n");
+ goto jr_bind_err;
+ }
+
+ dev_info(dev, "caam dma support with %d job rings\n", bonds);
+
+ return err;
+
+jr_bind_err:
+ list_for_each_entry_safe(chan, _chan, &dma_dev->channels, device_node)
+ caam_jr_dma_free(chan);
+
+ dma_unmap_single(ctrldev, dma_sh_desc->desc_dma, desc_bytes(sh_desc),
+ DMA_TO_DEVICE);
+map_err:
+ kfree(dma_sh_desc);
+desc_err:
+ kfree(dma_dev);
+ return err;
+}
+
+static int caam_dma_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device *ctrldev = dev->parent;
+ struct caam_dma_ctx *ctx, *_ctx;
+
+ dma_async_device_unregister(dma_dev);
+
+ list_for_each_entry_safe(ctx, _ctx, &dma_ctx_list, node) {
+ list_del(&ctx->node);
+ caam_jr_free(ctx->jrdev);
+ kfree(ctx);
+ }
+
+ dma_unmap_single(ctrldev, dma_sh_desc->desc_dma,
+ desc_bytes(dma_sh_desc->desc), DMA_TO_DEVICE);
+
+ kfree(dma_sh_desc);
+ kfree(dma_dev);
+
+ dev_info(dev, "caam dma support disabled\n");
+ return 0;
+}
+
+static const struct of_device_id caam_dma_match[] = {
+ { .compatible = "fsl,sec-v5.4-dma", },
+ { .compatible = "fsl,sec-v5.0-dma", },
+ { .compatible = "fsl,sec-v4.0-dma", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, caam_dma_match);
+
+static struct platform_driver caam_dma_driver = {
+ .driver = {
+ .name = "caam-dma",
+ .of_match_table = caam_dma_match,
+ },
+ .probe = caam_dma_probe,
+ .remove = caam_dma_remove,
+};
+module_platform_driver(caam_dma_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("NXP CAAM support for SG DMA");
+MODULE_AUTHOR("NXP Semiconductors");
--
2.14.2.606.g7451fcd


2017-10-31 12:01:20

by Vinod Koul

[permalink] [raw]
Subject: Re: [PATCH RESEND 4/4] dma: caam: add dma memcpy driver

On Mon, Oct 30, 2017 at 03:46:54PM +0200, Horia Geantă wrote:

> @@ -600,6 +600,23 @@ config ZX_DMA
> help
> Support the DMA engine for ZTE ZX family platform devices.
>
> +config CRYPTO_DEV_FSL_CAAM_DMA

File is sorted alphabetically

> + tristate "CAAM DMA engine support"
> + depends on CRYPTO_DEV_FSL_CAAM_JR
> + default y

why should it be default?

> --- /dev/null
> +++ b/drivers/dma/caam_dma.c
> @@ -0,0 +1,444 @@
> +/*
> + * caam support for SG DMA
> + *
> + * Copyright 2016 Freescale Semiconductor, Inc
> + * Copyright 2017 NXP
> + */

that is interesting, no license text?

> +
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/interrupt.h>

why do you need this

> +#include <linux/slab.h>
> +#include <linux/debugfs.h>

i didn't see any debugfs code, why do you need this

alphabetical sort pls
> +
> +#include <linux/dmaengine.h>
> +#include "dmaengine.h"
> +
> +#include "../crypto/caam/regs.h"
> +#include "../crypto/caam/jr.h"
> +#include "../crypto/caam/error.h"
> +#include "../crypto/caam/intern.h"
> +#include "../crypto/caam/desc_constr.h"

ah that puts very hard assumptions on locations of the subsystems. Please do
not do that and move the required stuff into common header location in
include/

> +/* This is max chunk size of a DMA transfer. If a buffer is larger than this
> + * value it is internally broken into chunks of max CAAM_DMA_CHUNK_SIZE bytes
> + * and for each chunk a DMA transfer request is issued.
> + * This value is the largest number on 16 bits that is a multiple of 256 bytes
> + * (the largest configurable CAAM DMA burst size).
> + */

Kernel comments style we follow is:

/*
* this is for
* multi line comment
*/

Pls fix this in the file

> +#define CAAM_DMA_CHUNK_SIZE 65280
> +
> +struct caam_dma_sh_desc {

sh?

> +struct caam_dma_ctx {
> + struct dma_chan chan;
> + struct list_head node;
> + struct device *jrdev;
> + struct list_head submit_q;

call it pending

> +static struct dma_device *dma_dev;
> +static struct caam_dma_sh_desc *dma_sh_desc;
> +static LIST_HEAD(dma_ctx_list);

why do you need so many globals?

> +static dma_cookie_t caam_dma_tx_submit(struct dma_async_tx_descriptor *tx)
> +{
> + struct caam_dma_edesc *edesc = NULL;
> + struct caam_dma_ctx *ctx = NULL;
> + dma_cookie_t cookie;
> +
> + edesc = container_of(tx, struct caam_dma_edesc, async_tx);
> + ctx = container_of(tx->chan, struct caam_dma_ctx, chan);
> +
> + spin_lock_bh(&ctx->edesc_lock);

why _bh, i didnt see any irqs or tasklets here which is actually odd, so
what is going on

> +
> + cookie = dma_cookie_assign(tx);
> + list_add_tail(&edesc->node, &ctx->submit_q);
> +
> + spin_unlock_bh(&ctx->edesc_lock);
> +
> + return cookie;
> +}

we have a virtual channel wrapper where we do the same stuff as above, so
consider reusing that

> +static void caam_dma_memcpy_init_job_desc(struct caam_dma_edesc *edesc)
> +{
> + u32 *jd = edesc->jd;
> + u32 *sh_desc = dma_sh_desc->desc;
> + dma_addr_t desc_dma = dma_sh_desc->desc_dma;
> +
> + /* init the job descriptor */
> + init_job_desc_shared(jd, desc_dma, desc_len(sh_desc), HDR_REVERSE);
> +
> + /* set SEQIN PTR */
> + append_seq_in_ptr(jd, edesc->src_dma, edesc->src_len, 0);
> +
> + /* set SEQOUT PTR */
> + append_seq_out_ptr(jd, edesc->dst_dma, edesc->dst_len, 0);
> +
> +#ifdef DEBUG
> + print_hex_dump(KERN_ERR, "caam dma desc@" __stringify(__LINE__) ": ",
> + DUMP_PREFIX_ADDRESS, 16, 4, jd, desc_bytes(jd), 1);

ah this make you compile kernel. Consider the dynamic printk helpers for
printing

> +/* This function can be called in an interrupt context */
> +static void caam_dma_issue_pending(struct dma_chan *chan)
> +{
> + struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx,
> + chan);
> + struct caam_dma_edesc *edesc, *_edesc;
> +
> + spin_lock_bh(&ctx->edesc_lock);
> + list_for_each_entry_safe(edesc, _edesc, &ctx->submit_q, node) {
> + if (caam_jr_enqueue(ctx->jrdev, edesc->jd,
> + caam_dma_done, edesc) < 0)

what does the caam_jr_enqueue() do?

> +static int caam_dma_jr_chan_bind(void)
> +{
> + struct device *jrdev;
> + struct caam_dma_ctx *ctx;
> + int bonds = 0;
> + int i;
> +
> + for (i = 0; i < caam_jr_driver_probed(); i++) {
> + jrdev = caam_jridx_alloc(i);
> + if (IS_ERR(jrdev)) {
> + pr_err("job ring device %d allocation failed\n", i);
> + continue;
> + }
> +
> + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> + if (!ctx) {
> + caam_jr_free(jrdev);
> + continue;

you want to continue?

> + dma_dev->dev = dev;
> + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
> + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
> + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
> + dma_dev->device_tx_status = dma_cookie_status;
> + dma_dev->device_issue_pending = caam_dma_issue_pending;
> + dma_dev->device_prep_dma_memcpy = caam_dma_prep_memcpy;
> + dma_dev->device_free_chan_resources = caam_dma_free_chan_resources;
> +
> + err = dma_async_device_register(dma_dev);
> + if (err) {
> + dev_err(dev, "Failed to register CAAM DMA engine\n");
> + goto jr_bind_err;
> + }
> +
> + dev_info(dev, "caam dma support with %d job rings\n", bonds);

that is very noisy

> +MODULE_LICENSE("Dual BSD/GPL");
> +MODULE_DESCRIPTION("NXP CAAM support for SG DMA");
> +MODULE_AUTHOR("NXP Semiconductors");

No MODULE_ALIAS, how did you load the driver

--
~Vinod

2017-11-02 10:16:39

by kbuild test robot

[permalink] [raw]
Subject: Re: [PATCH RESEND 4/4] dma: caam: add dma memcpy driver

Hi Radu,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on cryptodev/master]
[also build test WARNING on v4.14-rc7 next-20171018]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/Horia-Geant/add-CAAM-DMA-memcpy-driver/20171102-081734
base: https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master
config: arm-allmodconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm

All warnings (new ones prefixed by >>):

>> WARNING: drivers/dma/caam_dma.o(.data+0x8): Section mismatch in reference from the variable caam_dma_driver to the function .init.text:caam_dma_probe()
The variable caam_dma_driver references
the function __init caam_dma_probe()
If the reference is valid then annotate the
variable with or __refdata (see linux/init.h) or name the variable:


---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (1.34 kB)
.config.gz (62.51 kB)
Download all attachments

2017-11-08 14:36:31

by Radu Andrei Alexe

[permalink] [raw]
Subject: Re: [PATCH RESEND 4/4] dma: caam: add dma memcpy driver

On 10/31/2017 2:01 PM, Vinod Koul wrote:
> On Mon, Oct 30, 2017 at 03:46:54PM +0200, Horia Geant? wrote:
>
>> @@ -600,6 +600,23 @@ config ZX_DMA
>> help
>> Support the DMA engine for ZTE ZX family platform devices.
>>
>> +config CRYPTO_DEV_FSL_CAAM_DMA
>
> File is sorted alphabetically
>

I didn't know. I will change the position in file.

>> + tristate "CAAM DMA engine support"
>> + depends on CRYPTO_DEV_FSL_CAAM_JR
>> + default y
>
> why should it be default?
>

My mistake. It remained to 'y' from testing. I will change it to 'n'.

>> --- /dev/null
>> +++ b/drivers/dma/caam_dma.c
>> @@ -0,0 +1,444 @@
>> +/*
>> + * caam support for SG DMA
>> + *
>> + * Copyright 2016 Freescale Semiconductor, Inc
>> + * Copyright 2017 NXP
>> + */
>
> that is interesting, no license text?
>

Thanks for the catch. The next patch will contain the full license text.

>> +
>> +#include <linux/module.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/dma-mapping.h>
>> +#include <linux/interrupt.h>
>
> why do you need this
>

I don't. I thought I removed any not-needed header files. I will remove it.

>> +#include <linux/slab.h>
>> +#include <linux/debugfs.h>
>
> i didn't see any debugfs code, why do you need this
>

I don't. I will remove it.

> alphabetical sort pls

It will be done.

>> +
>> +#include <linux/dmaengine.h>
>> +#include "dmaengine.h"
>> +
>> +#include "../crypto/caam/regs.h"
>> +#include "../crypto/caam/jr.h"
>> +#include "../crypto/caam/error.h"
>> +#include "../crypto/caam/intern.h"
>> +#include "../crypto/caam/desc_constr.h"
>
> ah that puts very hard assumptions on locations of the subsystems. Please do
> not do that and move the required stuff into common header location in
> include/
>

Unfortunately this is not possible. The functionality used by CAAM DMA
from the CAAM subsystem should not be exported to be used by other
modules. It is because this driver is so tightly coupled with the CAAM
driver(s) that it needs access to it's 'internal' functionality (that
should not be otherwise shared with anyone).

>> +/* This is max chunk size of a DMA transfer. If a buffer is larger than this
>> + * value it is internally broken into chunks of max CAAM_DMA_CHUNK_SIZE bytes
>> + * and for each chunk a DMA transfer request is issued.
>> + * This value is the largest number on 16 bits that is a multiple of 256 bytes
>> + * (the largest configurable CAAM DMA burst size).
>> + */
>
> Kernel comments style we follow is:
>
> /*
> * this is for
> * multi line comment
> */
>
> Pls fix this in the file
>

It will be done.

>> +#define CAAM_DMA_CHUNK_SIZE 65280
>> +
>> +struct caam_dma_sh_desc {
>
> sh?
>

It means "shared". It is obvious to anyone who is familiar with the CAAM
system.

>> +struct caam_dma_ctx {
>> + struct dma_chan chan;
>> + struct list_head node;
>> + struct device *jrdev;
>> + struct list_head submit_q;
>
> call it pending
>

It will be done.

>> +static struct dma_device *dma_dev;
>> +static struct caam_dma_sh_desc *dma_sh_desc;
>> +static LIST_HEAD(dma_ctx_list);
>
> why do you need so many globals?
>

How many globals are too many? I can group them in a common structure.
But I'm not sure how would that help.

>> +static dma_cookie_t caam_dma_tx_submit(struct dma_async_tx_descriptor *tx)
>> +{
>> + struct caam_dma_edesc *edesc = NULL;
>> + struct caam_dma_ctx *ctx = NULL;
>> + dma_cookie_t cookie;
>> +
>> + edesc = container_of(tx, struct caam_dma_edesc, async_tx);
>> + ctx = container_of(tx->chan, struct caam_dma_ctx, chan);
>> +
>> + spin_lock_bh(&ctx->edesc_lock);
>
> why _bh, i didnt see any irqs or tasklets here which is actually odd, so
> what is going on
>

The tasklet is hidden inside the JR driver from the CAAM subsystem (see
drivers/crypto/caam/jr.c). The function caam_dma_done(), which is called
from the JR tasklet handler, also uses the spin_lock. I need to disable
bottom half to make sure that I don't run into a deadlock when I'm
preempted.

>> +
>> + cookie = dma_cookie_assign(tx);
>> + list_add_tail(&edesc->node, &ctx->submit_q);
>> +
>> + spin_unlock_bh(&ctx->edesc_lock);
>> +
>> + return cookie;
>> +}
>
> we have a virtual channel wrapper where we do the same stuff as above, so
> consider reusing that
>

Some of the functionality that the virtual channel might provide cannot
be extracted because it is embedded into the JR driver (e.g. the
tasklet). Therefore the use of the virtual channel is inefficient at
best if not even impossible.

>> +static void caam_dma_memcpy_init_job_desc(struct caam_dma_edesc *edesc)
>> +{
>> + u32 *jd = edesc->jd;
>> + u32 *sh_desc = dma_sh_desc->desc;
>> + dma_addr_t desc_dma = dma_sh_desc->desc_dma;
>> +
>> + /* init the job descriptor */
>> + init_job_desc_shared(jd, desc_dma, desc_len(sh_desc), HDR_REVERSE);
>> +
>> + /* set SEQIN PTR */
>> + append_seq_in_ptr(jd, edesc->src_dma, edesc->src_len, 0);
>> +
>> + /* set SEQOUT PTR */
>> + append_seq_out_ptr(jd, edesc->dst_dma, edesc->dst_len, 0);
>> +
>> +#ifdef DEBUG
>> + print_hex_dump(KERN_ERR, "caam dma desc@" __stringify(__LINE__) ": ",
>> + DUMP_PREFIX_ADDRESS, 16, 4, jd, desc_bytes(jd), 1);
>
> ah this make you compile kernel. Consider the dynamic printk helpers for
> printing
>

It will be done.

>> +/* This function can be called in an interrupt context */
>> +static void caam_dma_issue_pending(struct dma_chan *chan)
>> +{
>> + struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx,
>> + chan);
>> + struct caam_dma_edesc *edesc, *_edesc;
>> +
>> + spin_lock_bh(&ctx->edesc_lock);
>> + list_for_each_entry_safe(edesc, _edesc, &ctx->submit_q, node) {
>> + if (caam_jr_enqueue(ctx->jrdev, edesc->jd,
>> + caam_dma_done, edesc) < 0)
>
> what does the caam_jr_enqueue() do?
>

Enqueues a job descriptor (jd) into a job ring (jr). Additionally the
"caam_dma_done()" function is registered to be used as a callback when
the job finishes. For more details see drivers/crypto/caam/jr.c.

>> +static int caam_dma_jr_chan_bind(void)
>> +{
>> + struct device *jrdev;
>> + struct caam_dma_ctx *ctx;
>> + int bonds = 0;
>> + int i;
>> +
>> + for (i = 0; i < caam_jr_driver_probed(); i++) {
>> + jrdev = caam_jridx_alloc(i);
>> + if (IS_ERR(jrdev)) {
>> + pr_err("job ring device %d allocation failed\n", i);
>> + continue;
>> + }
>> +
>> + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
>> + if (!ctx) {
>> + caam_jr_free(jrdev);
>> + continue;
>
> you want to continue?
>

Yes. If on the next loop memory becomes available I would like to
allocate and use another job ring(jr). If there is no memory still then
it will do nothing.

>> + dma_dev->dev = dev;
>> + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
>> + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
>> + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
>> + dma_dev->device_tx_status = dma_cookie_status;
>> + dma_dev->device_issue_pending = caam_dma_issue_pending;
>> + dma_dev->device_prep_dma_memcpy = caam_dma_prep_memcpy;
>> + dma_dev->device_free_chan_resources = caam_dma_free_chan_resources;
>> +
>> + err = dma_async_device_register(dma_dev);
>> + if (err) {
>> + dev_err(dev, "Failed to register CAAM DMA engine\n");
>> + goto jr_bind_err;
>> + }
>> +
>> + dev_info(dev, "caam dma support with %d job rings\n", bonds);
>
> that is very noisy
>

I want to print a line that informs that the caam_dma driver has been
successfully probed. Do you have any other suggestion?

>> +MODULE_LICENSE("Dual BSD/GPL");
>> +MODULE_DESCRIPTION("NXP CAAM support for SG DMA");
>> +MODULE_AUTHOR("NXP Semiconductors");
>
> No MODULE_ALIAS, how did you load the driver
>

It was built in as default. I will add a MODULE_ALIAS().

Thanks for the remarks.
Radu

2017-11-16 06:11:01

by Vinod Koul

[permalink] [raw]
Subject: Re: [PATCH RESEND 4/4] dma: caam: add dma memcpy driver

On Wed, Nov 08, 2017 at 02:36:31PM +0000, Radu Andrei Alexe wrote:
> On 10/31/2017 2:01 PM, Vinod Koul wrote:
> > On Mon, Oct 30, 2017 at 03:46:54PM +0200, Horia Geantă wrote:
> >> +/*
> >> + * caam support for SG DMA
> >> + *
> >> + * Copyright 2016 Freescale Semiconductor, Inc
> >> + * Copyright 2017 NXP
> >> + */
> >
> > that is interesting, no license text?
> >
>
> Thanks for the catch. The next patch will contain the full license text.

or as is the "new" practice, you may used SPDX tags

> >> +#include "../crypto/caam/regs.h"
> >> +#include "../crypto/caam/jr.h"
> >> +#include "../crypto/caam/error.h"
> >> +#include "../crypto/caam/intern.h"
> >> +#include "../crypto/caam/desc_constr.h"
> >
> > ah that puts very hard assumptions on locations of the subsystems. Please do
> > not do that and move the required stuff into common header location in
> > include/
> >
>
> Unfortunately this is not possible. The functionality used by CAAM DMA
> from the CAAM subsystem should not be exported to be used by other
> modules. It is because this driver is so tightly coupled with the CAAM
> driver(s) that it needs access to it's 'internal' functionality (that
> should not be otherwise shared with anyone).

Which other driver would be interested in your CAM implementation except
you. Realistically speaking none, so it is perfectly fine to do so in a
header at a right place!

> >> +struct caam_dma_sh_desc {
> >
> > sh?
> >
>
> It means "shared". It is obvious to anyone who is familiar with the CAAM
> system.

Unfortunately I am not. As a patch writer you have the onus to explain it to
people who live outside the CAAM world

> >> +static struct dma_device *dma_dev;
> >> +static struct caam_dma_sh_desc *dma_sh_desc;
> >> +static LIST_HEAD(dma_ctx_list);
> >
> > why do you need so many globals?
> >
>
> How many globals are too many? I can group them in a common structure.
> But I'm not sure how would that help.

I prefer none. Make sure the struct instance is not global and allocated in
your driver probe routine.


> >> +static dma_cookie_t caam_dma_tx_submit(struct dma_async_tx_descriptor *tx)
> >> +{
> >> + struct caam_dma_edesc *edesc = NULL;
> >> + struct caam_dma_ctx *ctx = NULL;
> >> + dma_cookie_t cookie;
> >> +
> >> + edesc = container_of(tx, struct caam_dma_edesc, async_tx);
> >> + ctx = container_of(tx->chan, struct caam_dma_ctx, chan);
> >> +
> >> + spin_lock_bh(&ctx->edesc_lock);
> >
> > why _bh, i didnt see any irqs or tasklets here which is actually odd, so
> > what is going on
> >
>
> The tasklet is hidden inside the JR driver from the CAAM subsystem (see
> drivers/crypto/caam/jr.c). The function caam_dma_done(), which is called
> from the JR tasklet handler, also uses the spin_lock. I need to disable
> bottom half to make sure that I don't run into a deadlock when I'm
> preempted.

This would be the right time to explain why. I understand that your subsystem
has tightly coupled DMA but that doesn't really mean the DMA controller should
not have its own IRQ and bh. This makes things harder to review and follow.

> >> + cookie = dma_cookie_assign(tx);
> >> + list_add_tail(&edesc->node, &ctx->submit_q);
> >> +
> >> + spin_unlock_bh(&ctx->edesc_lock);
> >> +
> >> + return cookie;
> >> +}
> >
> > we have a virtual channel wrapper where we do the same stuff as above, so
> > consider reusing that
> >
>
> Some of the functionality that the virtual channel might provide cannot
> be extracted because it is embedded into the JR driver (e.g. the
> tasklet). Therefore the use of the virtual channel is inefficient at
> best if not even impossible.

Which itself is problematic in first place and no explanation provided on
why. Yes you have a special hardware, so does every second person!

> >> +static void caam_dma_issue_pending(struct dma_chan *chan)
> >> +{
> >> + struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx,
> >> + chan);
> >> + struct caam_dma_edesc *edesc, *_edesc;
> >> +
> >> + spin_lock_bh(&ctx->edesc_lock);
> >> + list_for_each_entry_safe(edesc, _edesc, &ctx->submit_q, node) {
> >> + if (caam_jr_enqueue(ctx->jrdev, edesc->jd,
> >> + caam_dma_done, edesc) < 0)
> >
> > what does the caam_jr_enqueue() do?
> >
>
> Enqueues a job descriptor (jd) into a job ring (jr). Additionally the
> "caam_dma_done()" function is registered to be used as a callback when
> the job finishes. For more details see drivers/crypto/caam/jr.c.

Sorry I am going to refuse to look at other references. A patch should
provide enough explanation on its own for me to review.

> >> + err = dma_async_device_register(dma_dev);
> >> + if (err) {
> >> + dev_err(dev, "Failed to register CAAM DMA engine\n");
> >> + goto jr_bind_err;
> >> + }
> >> +
> >> + dev_info(dev, "caam dma support with %d job rings\n", bonds);
> >
> > that is very noisy
> >
>
> I want to print a line that informs that the caam_dma driver has been
> successfully probed. Do you have any other suggestion?

Please remove the line, or worst make this debug print

Overall I am not very excited about this implementation. I think things can
be improved and decoupled from crypto driver and things done independently.
If you do so this driver can be reused across different crypto
implementations where you have the same DMA controller.

Thanks
--
~Vinod