Subject: [PATCH] crypto: caam/qi - optimize frame queue cleanup

Add reference counter incremented for each frame enqueued in CAAM
and replace unconditional sleep in empty_caam_fq() with polling the
reference counter.

When CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y boot time on LS1043A
platform with this optimization decreases from ~1100s to ~11s.

Signed-off-by: Valentin Ciocoi Radulescu <[email protected]>
---
drivers/crypto/caam/qi.c | 60 +++++++++++++++++++++++++++++++-----------------
drivers/crypto/caam/qi.h | 4 +++-
2 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index dacf2fa..b390b93 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -4,7 +4,7 @@
* Queue Interface backend functionality
*
* Copyright 2013-2016 Freescale Semiconductor, Inc.
- * Copyright 2016-2017, 2019 NXP
+ * Copyright 2016-2017, 2019-2020 NXP
*/

#include <linux/cpumask.h>
@@ -124,8 +124,10 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req)

do {
ret = qman_enqueue(req->drv_ctx->req_fq, &fd);
- if (likely(!ret))
+ if (likely(!ret)) {
+ refcount_inc(&req->drv_ctx->refcnt);
return 0;
+ }

if (ret != -EBUSY)
break;
@@ -148,11 +150,6 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,

fd = &msg->ern.fd;

- if (qm_fd_get_format(fd) != qm_fd_compound) {
- dev_err(qidev, "Non-compound FD from CAAM\n");
- return;
- }
-
drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd));
if (!drv_req) {
dev_err(qidev,
@@ -160,6 +157,13 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,
return;
}

+ refcount_dec(&drv_req->drv_ctx->refcnt);
+
+ if (qm_fd_get_format(fd) != qm_fd_compound) {
+ dev_err(qidev, "Non-compound FD from CAAM\n");
+ return;
+ }
+
dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd),
sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL);

@@ -287,9 +291,10 @@ static int kill_fq(struct device *qidev, struct qman_fq *fq)
return ret;
}

-static int empty_caam_fq(struct qman_fq *fq)
+static int empty_caam_fq(struct qman_fq *fq, struct caam_drv_ctx *drv_ctx)
{
int ret;
+ int retries = 10;
struct qm_mcr_queryfq_np np;

/* Wait till the older CAAM FQ get empty */
@@ -304,11 +309,18 @@ static int empty_caam_fq(struct qman_fq *fq)
msleep(20);
} while (1);

- /*
- * Give extra time for pending jobs from this FQ in holding tanks
- * to get processed
- */
- msleep(20);
+ /* Wait until pending jobs from this FQ are processed by CAAM */
+ do {
+ if (refcount_read(&drv_ctx->refcnt) == 1)
+ break;
+
+ msleep(20);
+ } while (--retries);
+
+ if (!retries)
+ dev_warn_once(drv_ctx->qidev, "%d frames from FQID %u still pending in CAAM\n",
+ refcount_read(&drv_ctx->refcnt), fq->fqid);
+
return 0;
}

@@ -340,7 +352,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc)
drv_ctx->req_fq = new_fq;

/* Empty and remove the older FQ */
- ret = empty_caam_fq(old_fq);
+ ret = empty_caam_fq(old_fq, drv_ctx);
if (ret) {
dev_err(qidev, "Old CAAM FQ empty failed: %d\n", ret);

@@ -453,6 +465,9 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev,
return ERR_PTR(-ENOMEM);
}

+ /* init reference counter used to track references to request FQ */
+ refcount_set(&drv_ctx->refcnt, 1);
+
drv_ctx->qidev = qidev;
return drv_ctx;
}
@@ -571,6 +586,16 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
return qman_cb_dqrr_stop;

fd = &dqrr->fd;
+
+ drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd));
+ if (unlikely(!drv_req)) {
+ dev_err(qidev,
+ "Can't find original request for caam response\n");
+ return qman_cb_dqrr_consume;
+ }
+
+ refcount_dec(&drv_req->drv_ctx->refcnt);
+
status = be32_to_cpu(fd->status);
if (unlikely(status)) {
u32 ssrc = status & JRSTA_SSRC_MASK;
@@ -588,13 +613,6 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
return qman_cb_dqrr_consume;
}

- drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd));
- if (unlikely(!drv_req)) {
- dev_err(qidev,
- "Can't find original request for caam response\n");
- return qman_cb_dqrr_consume;
- }
-
dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd),
sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL);

diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h
index 8489589..5894f16 100644
--- a/drivers/crypto/caam/qi.h
+++ b/drivers/crypto/caam/qi.h
@@ -3,7 +3,7 @@
* Public definitions for the CAAM/QI (Queue Interface) backend.
*
* Copyright 2013-2016 Freescale Semiconductor, Inc.
- * Copyright 2016-2017 NXP
+ * Copyright 2016-2017, 2020 NXP
*/

#ifndef __QI_H__
@@ -52,6 +52,7 @@ enum optype {
* @context_a: shared descriptor dma address
* @req_fq: to-CAAM request frame queue
* @rsp_fq: from-CAAM response frame queue
+ * @refcnt: reference counter incremented for each frame enqueued in to-CAAM FQ
* @cpu: cpu on which to receive CAAM response
* @op_type: operation type
* @qidev: device pointer for CAAM/QI backend
@@ -62,6 +63,7 @@ struct caam_drv_ctx {
dma_addr_t context_a;
struct qman_fq *req_fq;
struct qman_fq *rsp_fq;
+ refcount_t refcnt;
int cpu;
enum optype op_type;
struct device *qidev;
--
2.7.4


2020-02-05 09:50:01

by Horia Geanta

[permalink] [raw]
Subject: Re: [PATCH] crypto: caam/qi - optimize frame queue cleanup

On 1/31/2020 4:15 PM, Valentin Ciocoi Radulescu wrote:
> Add reference counter incremented for each frame enqueued in CAAM
> and replace unconditional sleep in empty_caam_fq() with polling the
> reference counter.
>
> When CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y boot time on LS1043A
> platform with this optimization decreases from ~1100s to ~11s.
>
> Signed-off-by: Valentin Ciocoi Radulescu <[email protected]>
Reviewed-by: Horia Geant? <[email protected]>

Thanks,
Horia

2020-02-13 09:21:03

by Herbert Xu

[permalink] [raw]
Subject: Re: [PATCH] crypto: caam/qi - optimize frame queue cleanup

On Fri, Jan 31, 2020 at 02:15:56PM +0000, Valentin Ciocoi Radulescu wrote:
> Add reference counter incremented for each frame enqueued in CAAM
> and replace unconditional sleep in empty_caam_fq() with polling the
> reference counter.
>
> When CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y boot time on LS1043A
> platform with this optimization decreases from ~1100s to ~11s.
>
> Signed-off-by: Valentin Ciocoi Radulescu <[email protected]>
> ---
> drivers/crypto/caam/qi.c | 60 +++++++++++++++++++++++++++++++-----------------
> drivers/crypto/caam/qi.h | 4 +++-
> 2 files changed, 42 insertions(+), 22 deletions(-)

Patch applied. Thanks.
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt