LinuxLists.cc - [PATCH 0/2] Improve DMA chaining for ahash requests

2016-08-18 12:14:08

Subject: [PATCH 0/2] Improve DMA chaining for ahash requests

This series contain performance improvement regarding ahash requests.
So far, ahash requests were systematically not chained at the DMA level.
However, in some case, like this is the case by using IPSec, some ahash
requests can be processed directly by the engine, and don't have
intermediaire partial update states.

This series firstly re-work the way outer IVs are copied from the SRAM
into the dma pool. To do so, we introduce a common dma pool for all type
of requests that contains outer results (like IV or digest). Then, for
ahash requests that can be processed directly by the engine, outer
results are copied from the SRAM into the common dma pool. These requests
are then allowed to be chained at the DMA level.

Benchmarking results with iperf throught IPSec
==============================================
ESP AH

Before 373 Mbits/s 530 Mbits/s
After 413 Mbits/s 578 Mbits/s
Improvement +11% +9%

Romain Perier (2):
crypto: marvell - Use an unique pool to copy results of requests
crypto: marvell - Don't break chain for computable last ahash requests

drivers/crypto/marvell/cesa.c | 4 +--
drivers/crypto/marvell/cesa.h | 6 ++--
drivers/crypto/marvell/cipher.c | 2 +-
drivers/crypto/marvell/hash.c | 69 ++++++++++++++++++++++++++++++++---------
drivers/crypto/marvell/tdma.c | 16 +++++-----
5 files changed, 68 insertions(+), 29 deletions(-)

--
2.8.1

2016-08-18 12:12:14

by Romain Perier

[permalink] [raw]

Subject: [PATCH 2/2] crypto: marvell - Don't break chain for computable last ahash requests

Currently, the driver breaks chain for all kind of hash requests in order
to don't override intermediate states of partial ahash updates. However,
some final ahash requests can be directly processed by the engine, and
so without intermediate state. This is typically the case for most for
the HMAC requests processed via IPSec.

This commits adds a TDMA descriptor to copy outer results for thise kind
of request into the "result" dma pool, then it allow to chain these
requests at the DMA level. The 'complete' operation is also updated to
retrieve the MAC digest from the right location.

Signed-off-by: Romain Perier <[email protected]>
---
drivers/crypto/marvell/hash.c | 69 +++++++++++++++++++++++++++++++++----------
1 file changed, 54 insertions(+), 15 deletions(-)

diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index 9f28468..1a91662 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -312,24 +312,48 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
int i;

digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
- for (i = 0; i < digsize / 4; i++)
- creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i));

- if (creq->last_req) {
+ if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
+ !(creq->base.chain.last->flags & CESA_TDMA_BREAK_CHAIN)) {
+ struct mv_cesa_tdma_desc *tdma = NULL;
+
+ for (tdma = creq->base.chain.first; tdma; tdma = tdma->next) {
+ u32 type = tdma->flags & CESA_TDMA_TYPE_MSK;
+ if (type == CESA_TDMA_RESULT)
+ break;
+ }
+
+ BUG_ON(!tdma);
+
/*
- * Hardware's MD5 digest is in little endian format, but
- * SHA in big endian format
+ * Result is already in the correct endianess when the SA is
+ * used
*/
- if (creq->algo_le) {
- __le32 *result = (void *)ahashreq->result;
+ __le32 *data = tdma->data + 0x40;
+ for (i = 0; i < digsize / 4; i++)
+ creq->state[i] = cpu_to_le32(data[i]);

- for (i = 0; i < digsize / 4; i++)
- result[i] = cpu_to_le32(creq->state[i]);
- } else {
- __be32 *result = (void *)ahashreq->result;
+ memcpy(ahashreq->result, data, digsize);
+ } else {
+ for (i = 0; i < digsize / 4; i++)
+ creq->state[i] = readl_relaxed(engine->regs +
+ CESA_IVDIG(i));
+ if (creq->last_req) {
+ /*
+ * Hardware's MD5 digest is in little endian format, but
+ * SHA in big endian format
+ */
+ if (creq->algo_le) {
+ __le32 *result = (void *)ahashreq->result;
+
+ for (i = 0; i < digsize / 4; i++)
+ result[i] = cpu_to_le32(creq->state[i]);
+ } else {
+ __be32 *result = (void *)ahashreq->result;

- for (i = 0; i < digsize / 4; i++)
- result[i] = cpu_to_be32(creq->state[i]);
+ for (i = 0; i < digsize / 4; i++)
+ result[i] = cpu_to_be32(creq->state[i]);
+ }
}
}

@@ -504,6 +528,11 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
CESA_SA_DESC_CFG_LAST_FRAG,
CESA_SA_DESC_CFG_FRAG_MSK);

+ ret = mv_cesa_dma_add_result_op(chain,
+ CESA_SA_MAC_IIV_SRAM_OFFSET, 96,
+ CESA_TDMA_SRC_IN_SRAM, flags);
+ if (ret)
+ return ERR_PTR(-ENOMEM);
return op;
}

@@ -564,6 +593,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
struct mv_cesa_op_ctx *op = NULL;
unsigned int frag_len;
int ret;
+ u32 type;

basereq->chain.first = NULL;
basereq->chain.last = NULL;
@@ -635,6 +665,8 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
goto err_free_tdma;
}

+ type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK;
+
if (op) {
/* Add dummy desc to wait for crypto operation end */
ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags);
@@ -648,8 +680,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
else
creq->cache_ptr = 0;

- basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ |
- CESA_TDMA_BREAK_CHAIN);
+ basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ;
+ /*
+ * If results are copied via DMA, this means that this
+ * request can be directly processed by the engine,
+ * without partial updates. So we can chain it at the
+ * DMA level with other requests.
+ */
+ if (type != CESA_TDMA_RESULT)
+ basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;

return 0;

--
2.8.1

2016-08-18 12:14:09

by Romain Perier

[permalink] [raw]

Subject: [PATCH 1/2] crypto: marvell - Use an unique pool to copy results of requests

So far, we used a dedicated dma pool to copy the result of outer IV for
cipher requests. Instead of using a dma pool per outer data, we prefer
use a common dma pool that contains the part of the SRAM that is likely
to be used by the 'complete' operation, later. In this way, any type of
result can be retrieved by DMA for cipher or ahash requests.

Signed-off-by: Romain Perier <[email protected]>
---
drivers/crypto/marvell/cesa.c | 4 ++--
drivers/crypto/marvell/cesa.h | 6 +++---
drivers/crypto/marvell/cipher.c | 2 +-
drivers/crypto/marvell/tdma.c | 16 ++++++++--------
4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index 37dadb2..4d308ad 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c
@@ -375,8 +375,8 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
if (!dma->padding_pool)
return -ENOMEM;

- dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0);
- if (!dma->iv_pool)
+ dma->result_pool = dmam_pool_create("cesa_result", dev, 96, 1, 0);
+ if (!dma->result_pool)
return -ENOMEM;

cesa->dma = dma;
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
index e423d33..3be1aa3 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h
@@ -277,7 +277,7 @@ struct mv_cesa_op_ctx {
#define CESA_TDMA_DUMMY 0
#define CESA_TDMA_DATA 1
#define CESA_TDMA_OP 2
-#define CESA_TDMA_IV 3
+#define CESA_TDMA_RESULT 3

/**
* struct mv_cesa_tdma_desc - TDMA descriptor
@@ -393,7 +393,7 @@ struct mv_cesa_dev_dma {
struct dma_pool *op_pool;
struct dma_pool *cache_pool;
struct dma_pool *padding_pool;
- struct dma_pool *iv_pool;
+ struct dma_pool *result_pool;
};

/**
@@ -839,7 +839,7 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain)
memset(chain, 0, sizeof(*chain));
}

-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
u32 size, u32 flags, gfp_t gfp_flags);

struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
index d19dc96..bd575b1 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cipher.c
@@ -373,7 +373,7 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req,

/* Add output data for IV */
ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
- ret = mv_cesa_dma_add_iv_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
+ ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
ivsize, CESA_TDMA_SRC_IN_SRAM, flags);

if (ret)
diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c
index 9fd7a5f..499a1d3 100644
--- a/drivers/crypto/marvell/tdma.c
+++ b/drivers/crypto/marvell/tdma.c
@@ -69,8 +69,8 @@ void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq)
if (type == CESA_TDMA_OP)
dma_pool_free(cesa_dev->dma->op_pool, tdma->op,
le32_to_cpu(tdma->src));
- else if (type == CESA_TDMA_IV)
- dma_pool_free(cesa_dev->dma->iv_pool, tdma->data,
+ else if (type == CESA_TDMA_RESULT)
+ dma_pool_free(cesa_dev->dma->result_pool, tdma->data,
le32_to_cpu(tdma->dst));

tdma = tdma->next;
@@ -209,29 +209,29 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
return new_tdma;
}

-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
u32 size, u32 flags, gfp_t gfp_flags)
{

struct mv_cesa_tdma_desc *tdma;
- u8 *iv;
+ u8 *result;
dma_addr_t dma_handle;

tdma = mv_cesa_dma_add_desc(chain, gfp_flags);
if (IS_ERR(tdma))
return PTR_ERR(tdma);

- iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle);
- if (!iv)
+ result = dma_pool_alloc(cesa_dev->dma->result_pool, gfp_flags, &dma_handle);
+ if (!result)
return -ENOMEM;

tdma->byte_cnt = cpu_to_le32(size | BIT(31));
tdma->src = src;
tdma->dst = cpu_to_le32(dma_handle);
- tdma->data = iv;
+ tdma->data = result;

flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM);
- tdma->flags = flags | CESA_TDMA_IV;
+ tdma->flags = flags | CESA_TDMA_RESULT;
return 0;
}

--
2.8.1

2016-08-20 06:27:02

by Boris Brezillon

[permalink] [raw]

Subject: Re: [PATCH 1/2] crypto: marvell - Use an unique pool to copy results of requests

Hi Romain,

On Thu, 18 Aug 2016 14:12:13 +0200
Romain Perier <[email protected]> wrote:

> So far, we used a dedicated dma pool to copy the result of outer IV for
> cipher requests. Instead of using a dma pool per outer data, we prefer
> use a common dma pool that contains the part of the SRAM that is likely
> to be used by the 'complete' operation, later. In this way, any type of
> result can be retrieved by DMA for cipher or ahash requests.

Can't we just re-use the op_pool (and drop the cesa_iv/result pool)? It
may be suboptimal in term of memory usage, but adding more pools also
adds some overhead.

>
> Signed-off-by: Romain Perier <[email protected]>
> ---
> drivers/crypto/marvell/cesa.c | 4 ++--
> drivers/crypto/marvell/cesa.h | 6 +++---
> drivers/crypto/marvell/cipher.c | 2 +-
> drivers/crypto/marvell/tdma.c | 16 ++++++++--------
> 4 files changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
> index 37dadb2..4d308ad 100644
> --- a/drivers/crypto/marvell/cesa.c
> +++ b/drivers/crypto/marvell/cesa.c
> @@ -375,8 +375,8 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
> if (!dma->padding_pool)
> return -ENOMEM;
>
> - dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0);
> - if (!dma->iv_pool)
> + dma->result_pool = dmam_pool_create("cesa_result", dev, 96, 1, 0);

It's better to use a sizeof(xxx) here to avoid bugs if the context size
grows.

The rest looks good.

Thanks,

Boris

> + if (!dma->result_pool)
> return -ENOMEM;
>
> cesa->dma = dma;
> diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
> index e423d33..3be1aa3 100644
> --- a/drivers/crypto/marvell/cesa.h
> +++ b/drivers/crypto/marvell/cesa.h
> @@ -277,7 +277,7 @@ struct mv_cesa_op_ctx {
> #define CESA_TDMA_DUMMY 0
> #define CESA_TDMA_DATA 1
> #define CESA_TDMA_OP 2
> -#define CESA_TDMA_IV 3
> +#define CESA_TDMA_RESULT 3
>
> /**
> * struct mv_cesa_tdma_desc - TDMA descriptor
> @@ -393,7 +393,7 @@ struct mv_cesa_dev_dma {
> struct dma_pool *op_pool;
> struct dma_pool *cache_pool;
> struct dma_pool *padding_pool;
> - struct dma_pool *iv_pool;
> + struct dma_pool *result_pool;
> };
>
> /**
> @@ -839,7 +839,7 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain)
> memset(chain, 0, sizeof(*chain));
> }
>
> -int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
> +int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
> u32 size, u32 flags, gfp_t gfp_flags);
>
> struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
> diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
> index d19dc96..bd575b1 100644
> --- a/drivers/crypto/marvell/cipher.c
> +++ b/drivers/crypto/marvell/cipher.c
> @@ -373,7 +373,7 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req,
>
> /* Add output data for IV */
> ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
> - ret = mv_cesa_dma_add_iv_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
> + ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
> ivsize, CESA_TDMA_SRC_IN_SRAM, flags);
>
> if (ret)
> diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c
> index 9fd7a5f..499a1d3 100644
> --- a/drivers/crypto/marvell/tdma.c
> +++ b/drivers/crypto/marvell/tdma.c
> @@ -69,8 +69,8 @@ void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq)
> if (type == CESA_TDMA_OP)
> dma_pool_free(cesa_dev->dma->op_pool, tdma->op,
> le32_to_cpu(tdma->src));
> - else if (type == CESA_TDMA_IV)
> - dma_pool_free(cesa_dev->dma->iv_pool, tdma->data,
> + else if (type == CESA_TDMA_RESULT)
> + dma_pool_free(cesa_dev->dma->result_pool, tdma->data,
> le32_to_cpu(tdma->dst));
>
> tdma = tdma->next;
> @@ -209,29 +209,29 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
> return new_tdma;
> }
>
> -int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
> +int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
> u32 size, u32 flags, gfp_t gfp_flags)
> {
>
> struct mv_cesa_tdma_desc *tdma;
> - u8 *iv;
> + u8 *result;
> dma_addr_t dma_handle;
>
> tdma = mv_cesa_dma_add_desc(chain, gfp_flags);
> if (IS_ERR(tdma))
> return PTR_ERR(tdma);
>
> - iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle);
> - if (!iv)
> + result = dma_pool_alloc(cesa_dev->dma->result_pool, gfp_flags, &dma_handle);
> + if (!result)
> return -ENOMEM;
>
> tdma->byte_cnt = cpu_to_le32(size | BIT(31));
> tdma->src = src;
> tdma->dst = cpu_to_le32(dma_handle);
> - tdma->data = iv;
> + tdma->data = result;
>
> flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM);
> - tdma->flags = flags | CESA_TDMA_IV;
> + tdma->flags = flags | CESA_TDMA_RESULT;
> return 0;
> }
>

2016-08-20 07:18:00

by Boris Brezillon

[permalink] [raw]

Subject: Re: [PATCH 2/2] crypto: marvell - Don't break chain for computable last ahash requests

On Thu, 18 Aug 2016 14:12:14 +0200
Romain Perier <[email protected]> wrote:

> Currently, the driver breaks chain for all kind of hash requests in order
> to don't override intermediate states of partial ahash updates. However,
> some final ahash requests can be directly processed by the engine, and
> so without intermediate state. This is typically the case for most for
> the HMAC requests processed via IPSec.
>
> This commits adds a TDMA descriptor to copy outer results for thise kind
> of request into the "result" dma pool, then it allow to chain these
> requests at the DMA level. The 'complete' operation is also updated to
> retrieve the MAC digest from the right location.
>
> Signed-off-by: Romain Perier <[email protected]>
> ---
> drivers/crypto/marvell/hash.c | 69 +++++++++++++++++++++++++++++++++----------
> 1 file changed, 54 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
> index 9f28468..1a91662 100644
> --- a/drivers/crypto/marvell/hash.c
> +++ b/drivers/crypto/marvell/hash.c
> @@ -312,24 +312,48 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
> int i;
>
> digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
> - for (i = 0; i < digsize / 4; i++)
> - creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i));
>
> - if (creq->last_req) {
> + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
> + !(creq->base.chain.last->flags & CESA_TDMA_BREAK_CHAIN)) {
> + struct mv_cesa_tdma_desc *tdma = NULL;
> +
> + for (tdma = creq->base.chain.first; tdma; tdma = tdma->next) {
> + u32 type = tdma->flags & CESA_TDMA_TYPE_MSK;
> + if (type == CESA_TDMA_RESULT)
> + break;
> + }
> +
> + BUG_ON(!tdma);

Let's try to use BUG_ON() only when we can't recover from a failure.
This is not the case here, just print an error message, or even better,
move this check in ->process() where you can return an error code
(note that this requires changing a bit the way you are handling
errors in mv_cesa_tdma_process()).

> +
> /*
> - * Hardware's MD5 digest is in little endian format, but
> - * SHA in big endian format
> + * Result is already in the correct endianess when the SA is
> + * used
> */
> - if (creq->algo_le) {
> - __le32 *result = (void *)ahashreq->result;
> + __le32 *data = tdma->data + 0x40;
> + for (i = 0; i < digsize / 4; i++)
> + creq->state[i] = cpu_to_le32(data[i]);
>
> - for (i = 0; i < digsize / 4; i++)
> - result[i] = cpu_to_le32(creq->state[i]);
> - } else {
> - __be32 *result = (void *)ahashreq->result;
> + memcpy(ahashreq->result, data, digsize);

Is it safe to do that when you're in big endian (that's not a
rhetorical question, I always have a hard time figuring when endianess
conversion should be done)?

> + } else {
> + for (i = 0; i < digsize / 4; i++)
> + creq->state[i] = readl_relaxed(engine->regs +
> + CESA_IVDIG(i));
> + if (creq->last_req) {
> + /*
> + * Hardware's MD5 digest is in little endian format, but
> + * SHA in big endian format
> + */
> + if (creq->algo_le) {
> + __le32 *result = (void *)ahashreq->result;
> +
> + for (i = 0; i < digsize / 4; i++)
> + result[i] = cpu_to_le32(creq->state[i]);
> + } else {
> + __be32 *result = (void *)ahashreq->result;
>
> - for (i = 0; i < digsize / 4; i++)
> - result[i] = cpu_to_be32(creq->state[i]);
> + for (i = 0; i < digsize / 4; i++)
> + result[i] = cpu_to_be32(creq->state[i]);
> + }
> }
> }
>
> @@ -504,6 +528,11 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
> CESA_SA_DESC_CFG_LAST_FRAG,
> CESA_SA_DESC_CFG_FRAG_MSK);
>
> + ret = mv_cesa_dma_add_result_op(chain,
> + CESA_SA_MAC_IIV_SRAM_OFFSET, 96,
> + CESA_TDMA_SRC_IN_SRAM, flags);
> + if (ret)
> + return ERR_PTR(-ENOMEM);
> return op;
> }
>
> @@ -564,6 +593,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
> struct mv_cesa_op_ctx *op = NULL;
> unsigned int frag_len;
> int ret;
> + u32 type;

How about naming this variable break_chain? This would be clearer IMO.

bool break_chain = false;

>
> basereq->chain.first = NULL;
> basereq->chain.last = NULL;
> @@ -635,6 +665,8 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
> goto err_free_tdma;
> }
>
> + type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK;
> +

if (basereq->chain.last->flags & CESA_TDMA_TYPE_MSK)
break_chain = true;

> if (op) {
> /* Add dummy desc to wait for crypto operation end */
> ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags);
> @@ -648,8 +680,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
> else
> creq->cache_ptr = 0;
>
> - basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ |
> - CESA_TDMA_BREAK_CHAIN);
> + basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ;
> + /*
> + * If results are copied via DMA, this means that this
> + * request can be directly processed by the engine,
> + * without partial updates. So we can chain it at the
> + * DMA level with other requests.
> + */

Move the comment where you're assigning break_chain to true.

> + if (type != CESA_TDMA_RESULT)

if (break_chain)

> + basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;
>
> return 0;
>