2023-06-09 10:44:08

by Meenakshi Aggarwal

[permalink] [raw]
Subject: [PATCH v2 1/5] crypto:caam - avoid allocating memory at crypto request runtime for skcipher

From: Meenakshi Aggarwal <[email protected]>

Remove CRYPTO_ALG_ALLOCATES_MEMORY flag and allocate the memory
needed by the driver, to fulfil a request, within the crypto
request object.
The extra size needed for base extended descriptor and hw
descriptor commands, link tables, IV is computed in frontend
driver (caamalg) initialization and saved in reqsize field
that indicates how much memory could be needed per request.

In reqsize we allocate memory for maximum 4 entries
for src and 1 for IV, and the same for dst, both aligned.
If the driver needs more than the 4 entries maximum, the memory
is dynamically allocated, at runtime.

Signed-off-by: Iuliana Prodan <[email protected]>
Signed-off-by: Meenakshi Aggarwal <[email protected]>
---
drivers/crypto/caam/caamalg.c | 73 ++++++++++++++++++++++++++---------
1 file changed, 54 insertions(+), 19 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index feb86013dbf6..dbc5d5eaf695 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -911,6 +911,7 @@ struct aead_edesc {
* @iv_dma: dma address of iv for checking continuity and link table
* @sec4_sg_bytes: length of dma mapped sec4_sg space
* @bklog: stored to determine if the request needs backlog
+ * @free: stored to determine if skcipher_edesc needs to be freed
* @sec4_sg_dma: bus physical mapped address of h/w link table
* @sec4_sg: pointer to h/w link table
* @hw_desc: the h/w job descriptor followed by any referenced link tables
@@ -924,6 +925,7 @@ struct skcipher_edesc {
dma_addr_t iv_dma;
int sec4_sg_bytes;
bool bklog;
+ bool free;
dma_addr_t sec4_sg_dma;
struct sec4_sg_entry *sec4_sg;
u32 hw_desc[];
@@ -1049,7 +1051,8 @@ static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err,
DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);

- kfree(edesc);
+ if (edesc->free)
+ kfree(edesc);

/*
* If no backlog flag, the completion of the request is done
@@ -1690,20 +1693,35 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,

sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry);

- /*
- * allocate space for base edesc and hw desc commands, link tables, IV
- */
+ /* Check if there's enough space for edesc saved in req */
aligned_size = sizeof(*edesc) + desc_bytes + sec4_sg_bytes;
aligned_size = ALIGN(aligned_size, dma_get_cache_alignment());
aligned_size += ~(ARCH_KMALLOC_MINALIGN - 1) &
(dma_get_cache_alignment() - 1);
aligned_size += ALIGN(ivsize, dma_get_cache_alignment());
- edesc = kzalloc(aligned_size, flags);
- if (!edesc) {
- dev_err(jrdev, "could not allocate extended descriptor\n");
- caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
- 0, 0, 0);
- return ERR_PTR(-ENOMEM);
+
+ if (aligned_size > (crypto_skcipher_reqsize(skcipher) -
+ sizeof(struct caam_skcipher_req_ctx))) {
+ /*
+ * allocate space for base edesc and hw desc commands,
+ * link tables, IV
+ */
+ edesc = kzalloc(aligned_size, flags);
+ if (!edesc) {
+ caam_unmap(jrdev, req->src, req->dst, src_nents,
+ dst_nents, 0, 0, 0, 0);
+ return ERR_PTR(-ENOMEM);
+ }
+ edesc->free = true;
+ } else {
+ /*
+ * get address for base edesc and hw desc commands,
+ * link tables, IV
+ */
+ edesc = (struct skcipher_edesc *)((u8 *)rctx +
+ sizeof(struct caam_skcipher_req_ctx));
+ /* clear memory */
+ memset(edesc, 0, sizeof(*edesc));
}

edesc->src_nents = src_nents;
@@ -1725,7 +1743,8 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
dev_err(jrdev, "unable to map IV\n");
caam_unmap(jrdev, req->src, req->dst, src_nents,
dst_nents, 0, 0, 0, 0);
- kfree(edesc);
+ if (edesc->free)
+ kfree(edesc);
return ERR_PTR(-ENOMEM);
}

@@ -1755,7 +1774,8 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
dev_err(jrdev, "unable to map S/G table\n");
caam_unmap(jrdev, req->src, req->dst, src_nents,
dst_nents, iv_dma, ivsize, 0, 0);
- kfree(edesc);
+ if (edesc->free)
+ kfree(edesc);
return ERR_PTR(-ENOMEM);
}
}
@@ -1786,7 +1806,8 @@ static int skcipher_do_one_req(struct crypto_engine *engine, void *areq)

if (ret != -EINPROGRESS) {
skcipher_unmap(ctx->jrdev, rctx->edesc, req);
- kfree(rctx->edesc);
+ if (rctx->edesc->free)
+ kfree(rctx->edesc);
} else {
ret = 0;
}
@@ -1863,7 +1884,8 @@ static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt)

if ((ret != -EINPROGRESS) && (ret != -EBUSY)) {
skcipher_unmap(jrdev, edesc, req);
- kfree(edesc);
+ if (edesc->free)
+ kfree(edesc);
}

return ret;
@@ -3415,10 +3437,22 @@ static int caam_cra_init(struct crypto_skcipher *tfm)
container_of(alg, typeof(*caam_alg), skcipher);
struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
u32 alg_aai = caam_alg->caam.class1_alg_type & OP_ALG_AAI_MASK;
- int ret = 0;
+ int ret = 0, extra_reqsize = 0;

ctx->enginectx.op.do_one_request = skcipher_do_one_req;

+ /*
+ * Compute extra space needed for base edesc and
+ * hw desc commands, link tables, IV
+ */
+ extra_reqsize = sizeof(struct skcipher_edesc) +
+ DESC_JOB_IO_LEN * CAAM_CMD_SZ + /* hw desc commands */
+ /* link tables for src and dst:
+ * 4 entries max + 1 for IV, aligned = 8
+ */
+ (16 * sizeof(struct sec4_sg_entry)) +
+ AES_BLOCK_SIZE; /* ivsize */
+
if (alg_aai == OP_ALG_AAI_XTS) {
const char *tfm_name = crypto_tfm_alg_name(&tfm->base);
struct crypto_skcipher *fallback;
@@ -3433,9 +3467,11 @@ static int caam_cra_init(struct crypto_skcipher *tfm)

ctx->fallback = fallback;
crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_skcipher_req_ctx) +
- crypto_skcipher_reqsize(fallback));
+ crypto_skcipher_reqsize(fallback) +
+ extra_reqsize);
} else {
- crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_skcipher_req_ctx));
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_skcipher_req_ctx) +
+ extra_reqsize);
}

ret = caam_init_common(ctx, &caam_alg->caam, false);
@@ -3508,8 +3544,7 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
alg->base.cra_module = THIS_MODULE;
alg->base.cra_priority = CAAM_CRA_PRIORITY;
alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding();
- alg->base.cra_flags |= (CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
- CRYPTO_ALG_KERN_DRIVER_ONLY);
+ alg->base.cra_flags |= (CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY);

alg->init = caam_cra_init;
alg->exit = caam_cra_exit;
--
2.25.1



2023-06-10 02:19:04

by Eric Biggers

[permalink] [raw]
Subject: Re: [PATCH v2 1/5] crypto:caam - avoid allocating memory at crypto request runtime for skcipher

On Fri, Jun 09, 2023 at 12:38:38PM +0200, [email protected] wrote:
> If the driver needs more than the 4 entries maximum, the memory
> is dynamically allocated, at runtime.

Again, this is not currently allowed. Please see my comment on v1:
https://lore.kernel.org/linux-crypto/[email protected]

- Eric

2023-06-12 08:31:58

by Meenakshi Aggarwal

[permalink] [raw]
Subject: RE: [PATCH v2 1/5] crypto:caam - avoid allocating memory at crypto request runtime for skcipher

Hi Eric,


Because of CRYPTO_ALG_ALLOCATES_MEMORY flag, caam algorithms are currently been skipped by dm-crypt, and we want to enable dm-crypt caam offload support.
As per earlier discussions, we are pre-allocating memory for 4 S/G entries and, if needed, extra memory will be allocated at runtime.

I suggest, we should update the documentation of CRYPTO_ALG_ALLOCATES_MEMORY with above information.

Thanks,
Meenakshi

> -----Original Message-----
> From: Eric Biggers <[email protected]>
> Sent: Saturday, June 10, 2023 7:49 AM
> To: Meenakshi Aggarwal <[email protected]>
> Cc: Horia Geanta <[email protected]>; Varun Sethi <[email protected]>;
> Pankaj Gupta <[email protected]>; Gaurav Jain <[email protected]>;
> [email protected]; [email protected]; linux-
> [email protected]; [email protected]; Iuliana Prodan
> <[email protected]>
> Subject: Re: [PATCH v2 1/5] crypto:caam - avoid allocating memory at crypto
> request runtime for skcipher
>
> On Fri, Jun 09, 2023 at 12:38:38PM +0200, [email protected]
> wrote:
> > If the driver needs more than the 4 entries maximum, the memory is
> > dynamically allocated, at runtime.
>
> Again, this is not currently allowed. Please see my comment on v1:
> https://lore.kern/
> el.org%2Flinux-
> crypto%2F20230523165503.GA864814%40google.com&data=05%7C01%7Cmee
> nakshi.aggarwal%40nxp.com%7Cfb75f9f6a37346e27eef08db695903a2%7C686
> ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C638219603267394331%7CUn
> known%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1
> haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=ENoae4C6pFeMAIn%2B
> GVDqK%2F3PfwiqM0HEvFjjGMUepeo%3D&reserved=0
>
> - Eric