2023-12-14 11:43:27

by Md Sadre Alam

[permalink] [raw]
Subject: [PATCH 02/11] crypto: qce - Add bam dma support for crypto register r/w

Add BAM/DMA support for crypto register read/write.
With this change multiple crypto register will get
Written using bam in one go.

Signed-off-by: Md Sadre Alam <[email protected]>
---
drivers/crypto/qce/core.h | 9 ++
drivers/crypto/qce/dma.c | 233 ++++++++++++++++++++++++++++++++++++++
drivers/crypto/qce/dma.h | 24 +++-
3 files changed, 265 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/qce/core.h b/drivers/crypto/qce/core.h
index 25e2af45c047..bf28dedd1509 100644
--- a/drivers/crypto/qce/core.h
+++ b/drivers/crypto/qce/core.h
@@ -40,6 +40,8 @@ struct qce_device {
int burst_size;
unsigned int pipe_pair_id;
dma_addr_t base_dma;
+ __le32 *reg_read_buf;
+ dma_addr_t reg_buf_phys;
int (*async_req_enqueue)(struct qce_device *qce,
struct crypto_async_request *req);
void (*async_req_done)(struct qce_device *qce, int ret);
@@ -59,4 +61,11 @@ struct qce_algo_ops {
int (*async_req_handle)(struct crypto_async_request *async_req);
};

+int qce_write_reg_dma(struct qce_device *qce, unsigned int offset, u32 val,
+ int cnt);
+int qce_read_reg_dma(struct qce_device *qce, unsigned int offset, void *buff,
+ int cnt);
+void qce_clear_bam_transaction(struct qce_device *qce);
+int qce_submit_cmd_desc(struct qce_device *qce, unsigned long flags);
+struct qce_bam_transaction *qce_alloc_bam_txn(struct qce_dma_data *dma);
#endif /* _CORE_H_ */
diff --git a/drivers/crypto/qce/dma.c b/drivers/crypto/qce/dma.c
index 46db5bf366b4..85c8d4107afa 100644
--- a/drivers/crypto/qce/dma.c
+++ b/drivers/crypto/qce/dma.c
@@ -4,12 +4,220 @@
*/

#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
#include <crypto/scatterwalk.h>

#include "dma.h"
+#include "core.h"
+
+#define QCE_REG_BUF_DMA_ADDR(qce, vaddr) \
+ ((qce)->reg_buf_phys + \
+ ((uint8_t *)(vaddr) - (uint8_t *)(qce)->reg_read_buf))
+
+void qce_clear_bam_transaction(struct qce_device *qce)
+{
+ struct qce_bam_transaction *qce_bam_txn = qce->dma.qce_bam_txn;
+
+ qce_bam_txn->qce_bam_ce_index = 0;
+ qce_bam_txn->qce_write_sgl_cnt = 0;
+ qce_bam_txn->qce_read_sgl_cnt = 0;
+ qce_bam_txn->qce_bam_ce_index = 0;
+ qce_bam_txn->qce_pre_bam_ce_index = 0;
+}
+
+static int qce_dma_prep_cmd_sg(struct qce_device *qce, struct dma_chan *chan,
+ struct scatterlist *qce_bam_sgl,
+ int qce_sgl_cnt, unsigned long flags,
+ enum dma_transfer_direction dir,
+ dma_async_tx_callback cb, void *cb_param)
+{
+ struct dma_async_tx_descriptor *dma_desc;
+ struct qce_desc_info *desc;
+ dma_cookie_t cookie;
+
+ desc = qce->dma.qce_bam_txn->qce_desc;
+
+ if (!qce_bam_sgl || !qce_sgl_cnt)
+ return -EINVAL;
+
+ if (!dma_map_sg(qce->dev, qce_bam_sgl,
+ qce_sgl_cnt, dir)) {
+ dev_err(qce->dev, "failure in mapping sgl for cmd desc\n");
+ return -ENOMEM;
+ }
+
+ dma_desc = dmaengine_prep_slave_sg(chan, qce_bam_sgl, qce_sgl_cnt,
+ dir, flags);
+ if (!dma_desc) {
+ pr_err("%s:failure in prep cmd desc\n", __func__);
+ dma_unmap_sg(qce->dev, qce_bam_sgl, qce_sgl_cnt, dir);
+ kfree(desc);
+ return -EINVAL;
+ }
+
+ desc->dma_desc = dma_desc;
+ desc->dma_desc->callback = cb;
+ desc->dma_desc->callback_param = cb_param;
+
+ cookie = dmaengine_submit(desc->dma_desc);
+
+ return dma_submit_error(cookie);
+}
+
+int qce_submit_cmd_desc(struct qce_device *qce, unsigned long flags)
+{
+ struct qce_bam_transaction *qce_bam_txn = qce->dma.qce_bam_txn;
+ struct dma_chan *chan = qce->dma.rxchan;
+ unsigned long desc_flags;
+ int ret = 0;
+
+ desc_flags = DMA_PREP_CMD;
+
+ /* For command descriptor always use consumer pipe
+ * it recomended as per HPG
+ */
+
+ if (qce_bam_txn->qce_read_sgl_cnt) {
+ ret = qce_dma_prep_cmd_sg(qce, chan,
+ qce_bam_txn->qce_reg_read_sgl,
+ qce_bam_txn->qce_read_sgl_cnt,
+ desc_flags, DMA_DEV_TO_MEM,
+ NULL, NULL);
+ if (ret) {
+ pr_err("error while submiting cmd desc for rx\n");
+ return ret;
+ }
+ }
+
+ if (qce_bam_txn->qce_write_sgl_cnt) {
+ ret = qce_dma_prep_cmd_sg(qce, chan,
+ qce_bam_txn->qce_reg_write_sgl,
+ qce_bam_txn->qce_write_sgl_cnt,
+ desc_flags, DMA_MEM_TO_DEV,
+ NULL, NULL);
+ }
+
+ if (ret) {
+ pr_err("error while submiting cmd desc for tx\n");
+ return ret;
+ }
+
+ qce_dma_issue_pending(&qce->dma);
+
+ return ret;
+}
+
+static void qce_prep_dma_command_desc(struct qce_device *qce,
+ struct qce_dma_data *dma, bool read, unsigned int addr,
+ void *buff, int size)
+{
+ struct qce_bam_transaction *qce_bam_txn = dma->qce_bam_txn;
+ struct bam_cmd_element *qce_bam_ce_buffer;
+ int qce_bam_ce_size, cnt, index;
+
+ index = qce_bam_txn->qce_bam_ce_index;
+ qce_bam_ce_buffer = &qce_bam_txn->qce_bam_ce[index];
+ if (read)
+ bam_prep_ce(qce_bam_ce_buffer, addr, BAM_READ_COMMAND,
+ QCE_REG_BUF_DMA_ADDR(qce,
+ (unsigned int *)buff));
+ else
+ bam_prep_ce_le32(qce_bam_ce_buffer, addr, BAM_WRITE_COMMAND,
+ *((__le32 *)buff));
+
+ if (read) {
+ cnt = qce_bam_txn->qce_read_sgl_cnt;
+ qce_bam_ce_buffer = &qce_bam_txn->qce_bam_ce
+ [qce_bam_txn->qce_pre_bam_ce_index];
+ qce_bam_txn->qce_bam_ce_index += size;
+ qce_bam_ce_size = (qce_bam_txn->qce_bam_ce_index -
+ qce_bam_txn->qce_pre_bam_ce_index) *
+ sizeof(struct bam_cmd_element);
+
+ sg_set_buf(&qce_bam_txn->qce_reg_read_sgl[cnt],
+ qce_bam_ce_buffer,
+ qce_bam_ce_size);
+
+ ++qce_bam_txn->qce_read_sgl_cnt;
+ qce_bam_txn->qce_pre_bam_ce_index =
+ qce_bam_txn->qce_bam_ce_index;
+ } else {
+ cnt = qce_bam_txn->qce_write_sgl_cnt;
+ qce_bam_ce_buffer = &qce_bam_txn->qce_bam_ce
+ [qce_bam_txn->qce_pre_bam_ce_index];
+ qce_bam_txn->qce_bam_ce_index += size;
+ qce_bam_ce_size = (qce_bam_txn->qce_bam_ce_index -
+ qce_bam_txn->qce_pre_bam_ce_index) *
+ sizeof(struct bam_cmd_element);
+
+ sg_set_buf(&qce_bam_txn->qce_reg_write_sgl[cnt],
+ qce_bam_ce_buffer,
+ qce_bam_ce_size);
+
+ ++qce_bam_txn->qce_write_sgl_cnt;
+ qce_bam_txn->qce_pre_bam_ce_index =
+ qce_bam_txn->qce_bam_ce_index;
+ }
+}
+
+int qce_write_reg_dma(struct qce_device *qce,
+ unsigned int offset, u32 val, int cnt)
+{
+ void *buff;
+ unsigned int reg_addr;
+
+ buff = &val;
+
+ reg_addr = ((unsigned int)(qce->base_dma) + offset);
+ qce_prep_dma_command_desc(qce, &qce->dma, false, reg_addr, buff, cnt);
+
+ return 0;
+}
+
+int qce_read_reg_dma(struct qce_device *qce,
+ unsigned int offset, void *buff, int cnt)
+{
+ void *vaddr;
+ unsigned int reg_addr;
+
+ reg_addr = ((unsigned int)(qce->base_dma) + offset);
+ vaddr = qce->reg_read_buf;
+
+ qce_prep_dma_command_desc(qce, &qce->dma, true, reg_addr, vaddr, cnt);
+ memcpy(buff, vaddr, 4);
+
+ return 0;
+}
+
+struct qce_bam_transaction *qce_alloc_bam_txn(struct qce_dma_data *dma)
+{
+ struct qce_bam_transaction *qce_bam_txn;
+
+ dma->qce_bam_txn = kmalloc(sizeof(*qce_bam_txn), GFP_KERNEL);
+ if (!dma->qce_bam_txn)
+ return NULL;
+
+ dma->qce_bam_txn->qce_desc = kzalloc(sizeof(struct qce_desc_info),
+ GFP_KERNEL);
+ if (!dma->qce_bam_txn->qce_desc) {
+ kfree(dma->qce_bam_txn);
+ return NULL;
+ }
+
+ sg_init_table(dma->qce_bam_txn->qce_reg_write_sgl,
+ QCE_BAM_CMD_SGL_SIZE);
+
+ sg_init_table(dma->qce_bam_txn->qce_reg_read_sgl,
+ QCE_BAM_CMD_SGL_SIZE);
+
+ qce_bam_txn = dma->qce_bam_txn;
+
+ return qce_bam_txn;
+}

int qce_dma_request(struct device *dev, struct qce_dma_data *dma)
{
+ struct qce_device *qce = container_of(dma, struct qce_device, dma);
int ret;

dma->txchan = dma_request_chan(dev, "tx");
@@ -31,6 +239,21 @@ int qce_dma_request(struct device *dev, struct qce_dma_data *dma)

dma->ignore_buf = dma->result_buf + QCE_RESULT_BUF_SZ;

+ dma->qce_bam_txn = qce_alloc_bam_txn(dma);
+ if (!dma->qce_bam_txn) {
+ pr_err("Failed to allocate bam transaction\n");
+ return -ENOMEM;
+ }
+
+ qce->reg_read_buf = dmam_alloc_coherent(qce->dev,
+ QCE_MAX_REG_READ *
+ sizeof(*qce->reg_read_buf),
+ &qce->reg_buf_phys, GFP_KERNEL);
+ if (!qce->reg_read_buf) {
+ pr_err("Failed to allocate reg_read_buf\n");
+ return -ENOMEM;
+ }
+
return 0;
error_nomem:
dma_release_channel(dma->rxchan);
@@ -41,9 +264,19 @@ int qce_dma_request(struct device *dev, struct qce_dma_data *dma)

void qce_dma_release(struct qce_dma_data *dma)
{
+ struct qce_device *qce = container_of(dma,
+ struct qce_device, dma);
+
dma_release_channel(dma->txchan);
dma_release_channel(dma->rxchan);
kfree(dma->result_buf);
+ if (qce->reg_read_buf)
+ dmam_free_coherent(qce->dev, QCE_MAX_REG_READ *
+ sizeof(*qce->reg_read_buf),
+ qce->reg_read_buf,
+ qce->reg_buf_phys);
+ kfree(dma->qce_bam_txn->qce_desc);
+ kfree(dma->qce_bam_txn);
}

struct scatterlist *
diff --git a/drivers/crypto/qce/dma.h b/drivers/crypto/qce/dma.h
index 786402169360..f10991590b3f 100644
--- a/drivers/crypto/qce/dma.h
+++ b/drivers/crypto/qce/dma.h
@@ -7,6 +7,7 @@
#define _DMA_H_

#include <linux/dmaengine.h>
+#include <linux/dma/qcom_bam_dma.h>

/* maximum data transfer block size between BAM and CE */
#define QCE_BAM_BURST_SIZE 64
@@ -14,6 +15,10 @@
#define QCE_AUTHIV_REGS_CNT 16
#define QCE_AUTH_BYTECOUNT_REGS_CNT 4
#define QCE_CNTRIV_REGS_CNT 4
+#define QCE_BAM_CMD_SGL_SIZE 64
+#define QCE_BAM_CMD_ELEMENT_SIZE 64
+#define QCE_DMA_DESC_FLAG_BAM_NWD (0x0004)
+#define QCE_MAX_REG_READ 8

struct qce_result_dump {
u32 auth_iv[QCE_AUTHIV_REGS_CNT];
@@ -27,13 +32,30 @@ struct qce_result_dump {
#define QCE_RESULT_BUF_SZ \
ALIGN(sizeof(struct qce_result_dump), QCE_BAM_BURST_SIZE)

+struct qce_bam_transaction {
+ struct bam_cmd_element qce_bam_ce[QCE_BAM_CMD_ELEMENT_SIZE];
+ struct scatterlist qce_reg_write_sgl[QCE_BAM_CMD_SGL_SIZE];
+ struct scatterlist qce_reg_read_sgl[QCE_BAM_CMD_SGL_SIZE];
+ struct qce_desc_info *qce_desc;
+ u32 qce_bam_ce_index;
+ u32 qce_pre_bam_ce_index;
+ u32 qce_write_sgl_cnt;
+ u32 qce_read_sgl_cnt;
+};
+
struct qce_dma_data {
struct dma_chan *txchan;
struct dma_chan *rxchan;
struct qce_result_dump *result_buf;
+ struct qce_bam_transaction *qce_bam_txn;
void *ignore_buf;
};

+struct qce_desc_info {
+ struct dma_async_tx_descriptor *dma_desc;
+ enum dma_data_direction dir;
+};
+
int qce_dma_request(struct device *dev, struct qce_dma_data *dma);
void qce_dma_release(struct qce_dma_data *dma);
int qce_dma_prep_sgs(struct qce_dma_data *dma, struct scatterlist *sg_in,
@@ -44,5 +66,5 @@ int qce_dma_terminate_all(struct qce_dma_data *dma);
struct scatterlist *
qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add,
unsigned int max_len);
-
+void qce_dma_issue_cmd_desc_pending(struct qce_dma_data *dma, bool read);
#endif /* _DMA_H_ */
--
2.34.1



2023-12-15 00:12:58

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 02/11] crypto: qce - Add bam dma support for crypto register r/w

Hi Md,

kernel test robot noticed the following build errors:

[auto build test ERROR on herbert-cryptodev-2.6/master]
[also build test ERROR on vkoul-dmaengine/next linus/master v6.7-rc5 next-20231214]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Md-Sadre-Alam/crypto-qce-Add-support-for-crypto-address-read/20231214-194404
base: https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master
patch link: https://lore.kernel.org/r/20231214114239.2635325-3-quic_mdalam%40quicinc.com
patch subject: [PATCH 02/11] crypto: qce - Add bam dma support for crypto register r/w
config: arm-randconfig-004-20231215 (https://download.01.org/0day-ci/archive/20231215/[email protected]/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231215/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

In file included from drivers/crypto/qce/dma.c:11:
>> drivers/crypto/qce/core.h:32:24: error: field has incomplete type 'struct tasklet_struct'
struct tasklet_struct done_tasklet;
^
drivers/crypto/qce/core.h:32:9: note: forward declaration of 'struct tasklet_struct'
struct tasklet_struct done_tasklet;
^
drivers/crypto/qce/dma.c:44:17: warning: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Wenum-conversion]
qce_sgl_cnt, dir)) {
~~~~~~~~~~~~~^~~~
include/linux/dma-mapping.h:419:58: note: expanded from macro 'dma_map_sg'
#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0)
~~~~~~~~~~~~~~~~ ^
drivers/crypto/qce/dma.c:53:52: warning: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Wenum-conversion]
dma_unmap_sg(qce->dev, qce_bam_sgl, qce_sgl_cnt, dir);
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~
include/linux/dma-mapping.h:420:62: note: expanded from macro 'dma_unmap_sg'
#define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0)
~~~~~~~~~~~~~~~~~~ ^
2 warnings and 1 error generated.


vim +32 drivers/crypto/qce/core.h

ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 10
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 11 /**
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 12 * struct qce_device - crypto engine device structure
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 13 * @queue: crypto request queue
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 14 * @lock: the lock protects queue and req
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 15 * @done_tasklet: done tasklet object
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 16 * @req: current active request
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 17 * @result: result of current transform
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 18 * @base: virtual IO base
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 19 * @dev: pointer to device structure
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 20 * @core: core device clock
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 21 * @iface: interface clock
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 22 * @bus: bus clock
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 23 * @dma: pointer to dma data
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 24 * @burst_size: the crypto burst size
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 25 * @pipe_pair_id: which pipe pair id the device using
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 26 * @async_req_enqueue: invoked by every algorithm to enqueue a request
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 27 * @async_req_done: invoked by every algorithm to finish its request
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 28 */
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 29 struct qce_device {
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 30 struct crypto_queue queue;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 31 spinlock_t lock;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 @32 struct tasklet_struct done_tasklet;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 33 struct crypto_async_request *req;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 34 int result;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 35 void __iomem *base;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 36 struct device *dev;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 37 struct clk *core, *iface, *bus;
694ff00c9bb387 Thara Gopinath 2023-02-22 38 struct icc_path *mem_path;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 39 struct qce_dma_data dma;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 40 int burst_size;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 41 unsigned int pipe_pair_id;
f666e78afa2c49 Md Sadre Alam 2023-12-14 42 dma_addr_t base_dma;
74826d774de8a8 Md Sadre Alam 2023-12-14 43 __le32 *reg_read_buf;
74826d774de8a8 Md Sadre Alam 2023-12-14 44 dma_addr_t reg_buf_phys;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 45 int (*async_req_enqueue)(struct qce_device *qce,
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 46 struct crypto_async_request *req);
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 47 void (*async_req_done)(struct qce_device *qce, int ret);
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 48 };
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 49

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-12-15 00:24:59

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 02/11] crypto: qce - Add bam dma support for crypto register r/w

Hi Md,

kernel test robot noticed the following build errors:

[auto build test ERROR on herbert-cryptodev-2.6/master]
[also build test ERROR on vkoul-dmaengine/next linus/master v6.7-rc5 next-20231214]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Md-Sadre-Alam/crypto-qce-Add-support-for-crypto-address-read/20231214-194404
base: https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master
patch link: https://lore.kernel.org/r/20231214114239.2635325-3-quic_mdalam%40quicinc.com
patch subject: [PATCH 02/11] crypto: qce - Add bam dma support for crypto register r/w
config: m68k-allmodconfig (https://download.01.org/0day-ci/archive/20231215/[email protected]/config)
compiler: m68k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231215/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All error/warnings (new ones prefixed by >>):

In file included from drivers/crypto/qce/dma.c:11:
>> drivers/crypto/qce/core.h:32:31: error: field 'done_tasklet' has incomplete type
32 | struct tasklet_struct done_tasklet;
| ^~~~~~~~~~~~
In file included from drivers/crypto/qce/dma.c:7:
drivers/crypto/qce/dma.c: In function 'qce_dma_prep_cmd_sg':
>> drivers/crypto/qce/dma.c:44:38: warning: implicit conversion from 'enum dma_transfer_direction' to 'enum dma_data_direction' [-Wenum-conversion]
44 | qce_sgl_cnt, dir)) {
| ^~~
include/linux/dma-mapping.h:419:58: note: in definition of macro 'dma_map_sg'
419 | #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0)
| ^
drivers/crypto/qce/dma.c:53:66: warning: implicit conversion from 'enum dma_transfer_direction' to 'enum dma_data_direction' [-Wenum-conversion]
53 | dma_unmap_sg(qce->dev, qce_bam_sgl, qce_sgl_cnt, dir);
| ^~~
include/linux/dma-mapping.h:420:62: note: in definition of macro 'dma_unmap_sg'
420 | #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0)
| ^


vim +/done_tasklet +32 drivers/crypto/qce/core.h

ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 10
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 11 /**
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 12 * struct qce_device - crypto engine device structure
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 13 * @queue: crypto request queue
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 14 * @lock: the lock protects queue and req
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 15 * @done_tasklet: done tasklet object
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 16 * @req: current active request
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 17 * @result: result of current transform
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 18 * @base: virtual IO base
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 19 * @dev: pointer to device structure
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 20 * @core: core device clock
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 21 * @iface: interface clock
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 22 * @bus: bus clock
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 23 * @dma: pointer to dma data
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 24 * @burst_size: the crypto burst size
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 25 * @pipe_pair_id: which pipe pair id the device using
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 26 * @async_req_enqueue: invoked by every algorithm to enqueue a request
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 27 * @async_req_done: invoked by every algorithm to finish its request
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 28 */
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 29 struct qce_device {
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 30 struct crypto_queue queue;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 31 spinlock_t lock;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 @32 struct tasklet_struct done_tasklet;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 33 struct crypto_async_request *req;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 34 int result;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 35 void __iomem *base;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 36 struct device *dev;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 37 struct clk *core, *iface, *bus;
694ff00c9bb387 Thara Gopinath 2023-02-22 38 struct icc_path *mem_path;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 39 struct qce_dma_data dma;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 40 int burst_size;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 41 unsigned int pipe_pair_id;
f666e78afa2c49 Md Sadre Alam 2023-12-14 42 dma_addr_t base_dma;
74826d774de8a8 Md Sadre Alam 2023-12-14 43 __le32 *reg_read_buf;
74826d774de8a8 Md Sadre Alam 2023-12-14 44 dma_addr_t reg_buf_phys;
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 45 int (*async_req_enqueue)(struct qce_device *qce,
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 46 struct crypto_async_request *req);
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 47 void (*async_req_done)(struct qce_device *qce, int ret);
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 48 };
ec8f5d8f6f76b9 Stanimir Varbanov 2014-06-25 49

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-02-22 11:06:55

by Sricharan Ramabadhran

[permalink] [raw]
Subject: Re: [PATCH 02/11] crypto: qce - Add bam dma support for crypto register r/w



On 12/14/2023 5:12 PM, Md Sadre Alam wrote:
> Add BAM/DMA support for crypto register read/write.
> With this change multiple crypto register will get
> Written using bam in one go.
>
> Signed-off-by: Md Sadre Alam <[email protected]>
> ---
> drivers/crypto/qce/core.h | 9 ++
> drivers/crypto/qce/dma.c | 233 ++++++++++++++++++++++++++++++++++++++
> drivers/crypto/qce/dma.h | 24 +++-
> 3 files changed, 265 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/crypto/qce/core.h b/drivers/crypto/qce/core.h
> index 25e2af45c047..bf28dedd1509 100644
> --- a/drivers/crypto/qce/core.h
> +++ b/drivers/crypto/qce/core.h
> @@ -40,6 +40,8 @@ struct qce_device {
> int burst_size;
> unsigned int pipe_pair_id;
> dma_addr_t base_dma;
> + __le32 *reg_read_buf;
> + dma_addr_t reg_buf_phys;
> int (*async_req_enqueue)(struct qce_device *qce,
> struct crypto_async_request *req);
> void (*async_req_done)(struct qce_device *qce, int ret);
> @@ -59,4 +61,11 @@ struct qce_algo_ops {
> int (*async_req_handle)(struct crypto_async_request *async_req);
> };
>
> +int qce_write_reg_dma(struct qce_device *qce, unsigned int offset, u32 val,
> + int cnt);
> +int qce_read_reg_dma(struct qce_device *qce, unsigned int offset, void *buff,
> + int cnt);
> +void qce_clear_bam_transaction(struct qce_device *qce);
> +int qce_submit_cmd_desc(struct qce_device *qce, unsigned long flags);
> +struct qce_bam_transaction *qce_alloc_bam_txn(struct qce_dma_data *dma);
> #endif /* _CORE_H_ */
> diff --git a/drivers/crypto/qce/dma.c b/drivers/crypto/qce/dma.c
> index 46db5bf366b4..85c8d4107afa 100644
> --- a/drivers/crypto/qce/dma.c
> +++ b/drivers/crypto/qce/dma.c
> @@ -4,12 +4,220 @@
> */
>
> #include <linux/dmaengine.h>
> +#include <linux/dma-mapping.h>
> #include <crypto/scatterwalk.h>
>
> #include "dma.h"
> +#include "core.h"

alphabetical order

> +
> +#define QCE_REG_BUF_DMA_ADDR(qce, vaddr) \
> + ((qce)->reg_buf_phys + \
> + ((uint8_t *)(vaddr) - (uint8_t *)(qce)->reg_read_buf))
> +
> +void qce_clear_bam_transaction(struct qce_device *qce)
> +{
> + struct qce_bam_transaction *qce_bam_txn = qce->dma.qce_bam_txn;
> +
> + qce_bam_txn->qce_bam_ce_index = 0;
> + qce_bam_txn->qce_write_sgl_cnt = 0;
> + qce_bam_txn->qce_read_sgl_cnt = 0;
> + qce_bam_txn->qce_bam_ce_index = 0;
> + qce_bam_txn->qce_pre_bam_ce_index = 0;
> +}
> +

memset ?

> +static int qce_dma_prep_cmd_sg(struct qce_device *qce, struct dma_chan *chan,
> + struct scatterlist *qce_bam_sgl,
> + int qce_sgl_cnt, unsigned long flags,
> + enum dma_transfer_direction dir,
> + dma_async_tx_callback cb, void *cb_param)
> +{

Fix the alignment.

> + struct dma_async_tx_descriptor *dma_desc;
> + struct qce_desc_info *desc;
> + dma_cookie_t cookie;
> +
> + desc = qce->dma.qce_bam_txn->qce_desc;
> +
> + if (!qce_bam_sgl || !qce_sgl_cnt)
> + return -EINVAL;
> +
> + if (!dma_map_sg(qce->dev, qce_bam_sgl,
> + qce_sgl_cnt, dir)) {
> + dev_err(qce->dev, "failure in mapping sgl for cmd desc\n");
> + return -ENOMEM;
> + }
> +
> + dma_desc = dmaengine_prep_slave_sg(chan, qce_bam_sgl, qce_sgl_cnt,
> + dir, flags);
> + if (!dma_desc) {
> + pr_err("%s:failure in prep cmd desc\n", __func__);
> + dma_unmap_sg(qce->dev, qce_bam_sgl, qce_sgl_cnt, dir);
> + kfree(desc);
> + return -EINVAL;
> + }
> +
> + desc->dma_desc = dma_desc;
> + desc->dma_desc->callback = cb;
> + desc->dma_desc->callback_param = cb_param;
> +

you are overwriting same qce_desc here ?

> + cookie = dmaengine_submit(desc->dma_desc);
> +
> + return dma_submit_error(cookie);
> +}
> +
> +int qce_submit_cmd_desc(struct qce_device *qce, unsigned long flags)
> +{
> + struct qce_bam_transaction *qce_bam_txn = qce->dma.qce_bam_txn;
> + struct dma_chan *chan = qce->dma.rxchan;
> + unsigned long desc_flags;
> + int ret = 0;
> +
> + desc_flags = DMA_PREP_CMD;
> +
> + /* For command descriptor always use consumer pipe
> + * it recomended as per HPG
> + */
> +
> + if (qce_bam_txn->qce_read_sgl_cnt) {
> + ret = qce_dma_prep_cmd_sg(qce, chan,
> + qce_bam_txn->qce_reg_read_sgl,
> + qce_bam_txn->qce_read_sgl_cnt,
> + desc_flags, DMA_DEV_TO_MEM,
> + NULL, NULL);

alignment.

> + if (ret) {
> + pr_err("error while submiting cmd desc for rx\n");
> + return ret;
> + }
> + }
> +
> + if (qce_bam_txn->qce_write_sgl_cnt) {
> + ret = qce_dma_prep_cmd_sg(qce, chan,

Here chan is still pointing to rxchan. Is this correct ?

> + qce_bam_txn->qce_reg_write_sgl,
> + qce_bam_txn->qce_write_sgl_cnt,
> + desc_flags, DMA_MEM_TO_DEV,
> + NULL, NULL);
> + }
> +
> + if (ret) {
> + pr_err("error while submiting cmd desc for tx\n");
> + return ret;
> + }
> +
> + qce_dma_issue_pending(&qce->dma);
> +
> + return ret;
> +}
> +
> +static void qce_prep_dma_command_desc(struct qce_device *qce,
> + struct qce_dma_data *dma, bool read, unsigned int addr,
> + void *buff, int size)
> +{

alignment

> + struct qce_bam_transaction *qce_bam_txn = dma->qce_bam_txn;
> + struct bam_cmd_element *qce_bam_ce_buffer;
> + int qce_bam_ce_size, cnt, index;
> +
> + index = qce_bam_txn->qce_bam_ce_index;
> + qce_bam_ce_buffer = &qce_bam_txn->qce_bam_ce[index];
> + if (read)
> + bam_prep_ce(qce_bam_ce_buffer, addr, BAM_READ_COMMAND,
> + QCE_REG_BUF_DMA_ADDR(qce,
> + (unsigned int *)buff));
> + else
> + bam_prep_ce_le32(qce_bam_ce_buffer, addr, BAM_WRITE_COMMAND,
> + *((__le32 *)buff));
> +
> + if (read) {
> + cnt = qce_bam_txn->qce_read_sgl_cnt;
> + qce_bam_ce_buffer = &qce_bam_txn->qce_bam_ce
> + [qce_bam_txn->qce_pre_bam_ce_index];
> + qce_bam_txn->qce_bam_ce_index += size;
> + qce_bam_ce_size = (qce_bam_txn->qce_bam_ce_index -
> + qce_bam_txn->qce_pre_bam_ce_index) *
> + sizeof(struct bam_cmd_element);
> +
> + sg_set_buf(&qce_bam_txn->qce_reg_read_sgl[cnt],
> + qce_bam_ce_buffer,
> + qce_bam_ce_size);
> +
> + ++qce_bam_txn->qce_read_sgl_cnt;
> + qce_bam_txn->qce_pre_bam_ce_index =
> + qce_bam_txn->qce_bam_ce_index;
> + } else {
> + cnt = qce_bam_txn->qce_write_sgl_cnt;
> + qce_bam_ce_buffer = &qce_bam_txn->qce_bam_ce
> + [qce_bam_txn->qce_pre_bam_ce_index];
> + qce_bam_txn->qce_bam_ce_index += size;
> + qce_bam_ce_size = (qce_bam_txn->qce_bam_ce_index -
> + qce_bam_txn->qce_pre_bam_ce_index) *
> + sizeof(struct bam_cmd_element);
> +
> + sg_set_buf(&qce_bam_txn->qce_reg_write_sgl[cnt],
> + qce_bam_ce_buffer,
> + qce_bam_ce_size);
> +
> + ++qce_bam_txn->qce_write_sgl_cnt;
> + qce_bam_txn->qce_pre_bam_ce_index =
> + qce_bam_txn->qce_bam_ce_index;
> + }
> +}

Above piece of hunk can be improved.
*) Between read/write only array name is different, rest can be made
common
*) Can use some standard circular buffer apis, wrapping should be
taken care of.

> +
> +int qce_write_reg_dma(struct qce_device *qce,
> + unsigned int offset, u32 val, int cnt)
> +{
> + void *buff;
> + unsigned int reg_addr;
> +
> + buff = &val;
> +
> + reg_addr = ((unsigned int)(qce->base_dma) + offset);

Is this type-cast really required ?
The entire function can be folded in one line ?

> + qce_prep_dma_command_desc(qce, &qce->dma, false, reg_addr, buff, cnt);
> +
> + return 0;
> +}
> +
> +int qce_read_reg_dma(struct qce_device *qce,
> + unsigned int offset, void *buff, int cnt)
> +{
> + void *vaddr;
> + unsigned int reg_addr;
> +
> + reg_addr = ((unsigned int)(qce->base_dma) + offset);

same comment as above.

> + vaddr = qce->reg_read_buf;
> +
> + qce_prep_dma_command_desc(qce, &qce->dma, true, reg_addr, vaddr, cnt);
> + memcpy(buff, vaddr, 4);
> +
> + return 0;
> +}
> +
> +struct qce_bam_transaction *qce_alloc_bam_txn(struct qce_dma_data *dma)
> +{
> + struct qce_bam_transaction *qce_bam_txn;
> +
> + dma->qce_bam_txn = kmalloc(sizeof(*qce_bam_txn), GFP_KERNEL);
> + if (!dma->qce_bam_txn)
> + return NULL;
> +
> + dma->qce_bam_txn->qce_desc = kzalloc(sizeof(struct qce_desc_info),
> + GFP_KERNEL);

only one instance ?

> + if (!dma->qce_bam_txn->qce_desc) {
> + kfree(dma->qce_bam_txn);
> + return NULL;
> + }
> +
> + sg_init_table(dma->qce_bam_txn->qce_reg_write_sgl,
> + QCE_BAM_CMD_SGL_SIZE);
> +
> + sg_init_table(dma->qce_bam_txn->qce_reg_read_sgl,
> + QCE_BAM_CMD_SGL_SIZE);
> +
> + qce_bam_txn = dma->qce_bam_txn;
> +
> + return qce_bam_txn;

return dma->qce_bam_txn ??

> +}
>
> int qce_dma_request(struct device *dev, struct qce_dma_data *dma)
> {
> + struct qce_device *qce = container_of(dma, struct qce_device, dma);
> int ret;
>
> dma->txchan = dma_request_chan(dev, "tx");
> @@ -31,6 +239,21 @@ int qce_dma_request(struct device *dev, struct qce_dma_data *dma)
>
> dma->ignore_buf = dma->result_buf + QCE_RESULT_BUF_SZ;
>
> + dma->qce_bam_txn = qce_alloc_bam_txn(dma);
> + if (!dma->qce_bam_txn) {
> + pr_err("Failed to allocate bam transaction\n");
> + return -ENOMEM;
> + }
> +
> + qce->reg_read_buf = dmam_alloc_coherent(qce->dev,
> + QCE_MAX_REG_READ *
> + sizeof(*qce->reg_read_buf),
> + &qce->reg_buf_phys, GFP_KERNEL);

alignment

> + if (!qce->reg_read_buf) {
> + pr_err("Failed to allocate reg_read_buf\n");
> + return -ENOMEM;
> + }
> +
> return 0;
> error_nomem:
> dma_release_channel(dma->rxchan);
> @@ -41,9 +264,19 @@ int qce_dma_request(struct device *dev, struct qce_dma_data *dma)
>
> void qce_dma_release(struct qce_dma_data *dma)
> {
> + struct qce_device *qce = container_of(dma,
> + struct qce_device, dma);
> +
> dma_release_channel(dma->txchan);
> dma_release_channel(dma->rxchan);
> kfree(dma->result_buf);
> + if (qce->reg_read_buf)

is this check required ?

> + dmam_free_coherent(qce->dev, QCE_MAX_REG_READ *
> + sizeof(*qce->reg_read_buf),
> + qce->reg_read_buf,
> + qce->reg_buf_phys);
> + kfree(dma->qce_bam_txn->qce_desc);
> + kfree(dma->qce_bam_txn);
> }
>
> struct scatterlist *
> diff --git a/drivers/crypto/qce/dma.h b/drivers/crypto/qce/dma.h
> index 786402169360..f10991590b3f 100644
> --- a/drivers/crypto/qce/dma.h
> +++ b/drivers/crypto/qce/dma.h
> @@ -7,6 +7,7 @@
> #define _DMA_H_
>
> #include <linux/dmaengine.h>
> +#include <linux/dma/qcom_bam_dma.h>
>
> /* maximum data transfer block size between BAM and CE */
> #define QCE_BAM_BURST_SIZE 64
> @@ -14,6 +15,10 @@
> #define QCE_AUTHIV_REGS_CNT 16
> #define QCE_AUTH_BYTECOUNT_REGS_CNT 4
> #define QCE_CNTRIV_REGS_CNT 4
> +#define QCE_BAM_CMD_SGL_SIZE 64
> +#define QCE_BAM_CMD_ELEMENT_SIZE 64
> +#define QCE_DMA_DESC_FLAG_BAM_NWD (0x0004)
> +#define QCE_MAX_REG_READ 8
>
> struct qce_result_dump {
> u32 auth_iv[QCE_AUTHIV_REGS_CNT];
> @@ -27,13 +32,30 @@ struct qce_result_dump {
> #define QCE_RESULT_BUF_SZ \
> ALIGN(sizeof(struct qce_result_dump), QCE_BAM_BURST_SIZE)
>
> +struct qce_bam_transaction {
> + struct bam_cmd_element qce_bam_ce[QCE_BAM_CMD_ELEMENT_SIZE];

Any reason why this is not dmam_alloc_coherent ?

Regards,
Sricharan