2021-05-21 11:49:04

by Ard Biesheuvel

[permalink] [raw]
Subject: [PATCH v5 4/5] crypto: arm64/aes-ccm - remove non-SIMD fallback path

AES/CCM on arm64 is implemented as a synchronous AEAD, and so it is
guaranteed by the API that it is only invoked in task or softirq
context. Since softirqs are now only handled when the SIMD is not
being used in the task context that was interrupted to service the
softirq, we no longer need a fallback path. Let's remove it.

Signed-off-by: Ard Biesheuvel <[email protected]>
---
arch/arm64/crypto/aes-ce-ccm-core.S | 1 +
arch/arm64/crypto/aes-ce-ccm-glue.c | 181 ++++++--------------
2 files changed, 53 insertions(+), 129 deletions(-)

diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 99a028e298ed..8adff299fcd3 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -124,6 +124,7 @@ SYM_FUNC_START(ce_aes_ccm_final)
SYM_FUNC_END(ce_aes_ccm_final)

.macro aes_ccm_do_crypt,enc
+ cbz x2, 5f
ldr x8, [x6, #8] /* load lower ctr */
ld1 {v0.16b}, [x5] /* load mac */
CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index f6d19b0dc893..8effd18429ac 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -99,36 +99,8 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
u32 abytes, u32 *macp)
{
- if (crypto_simd_usable()) {
- kernel_neon_begin();
- ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
- num_rounds(key));
- kernel_neon_end();
- } else {
- if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
- int added = min(abytes, AES_BLOCK_SIZE - *macp);
-
- crypto_xor(&mac[*macp], in, added);
-
- *macp += added;
- in += added;
- abytes -= added;
- }
-
- while (abytes >= AES_BLOCK_SIZE) {
- aes_encrypt(key, mac, mac);
- crypto_xor(mac, in, AES_BLOCK_SIZE);
-
- in += AES_BLOCK_SIZE;
- abytes -= AES_BLOCK_SIZE;
- }
-
- if (abytes > 0) {
- aes_encrypt(key, mac, mac);
- crypto_xor(mac, in, abytes);
- *macp = abytes;
- }
- }
+ ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
+ num_rounds(key));
}

static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
@@ -171,54 +143,6 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
} while (len);
}

-static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[],
- struct crypto_aes_ctx *ctx, bool enc)
-{
- u8 buf[AES_BLOCK_SIZE];
- int err = 0;
-
- while (walk->nbytes) {
- int blocks = walk->nbytes / AES_BLOCK_SIZE;
- u32 tail = walk->nbytes % AES_BLOCK_SIZE;
- u8 *dst = walk->dst.virt.addr;
- u8 *src = walk->src.virt.addr;
- u32 nbytes = walk->nbytes;
-
- if (nbytes == walk->total && tail > 0) {
- blocks++;
- tail = 0;
- }
-
- do {
- u32 bsize = AES_BLOCK_SIZE;
-
- if (nbytes < AES_BLOCK_SIZE)
- bsize = nbytes;
-
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- aes_encrypt(ctx, buf, walk->iv);
- aes_encrypt(ctx, mac, mac);
- if (enc)
- crypto_xor(mac, src, bsize);
- crypto_xor_cpy(dst, src, buf, bsize);
- if (!enc)
- crypto_xor(mac, dst, bsize);
- dst += bsize;
- src += bsize;
- nbytes -= bsize;
- } while (--blocks);
-
- err = skcipher_walk_done(walk, tail);
- }
-
- if (!err) {
- aes_encrypt(ctx, buf, iv0);
- aes_encrypt(ctx, mac, mac);
- crypto_xor(mac, buf, AES_BLOCK_SIZE);
- }
- return err;
-}
-
static int ccm_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -233,41 +157,41 @@ static int ccm_encrypt(struct aead_request *req)
if (err)
return err;

- if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
-
/* preserve the original iv for the final round */
memcpy(buf, req->iv, AES_BLOCK_SIZE);

err = skcipher_walk_aead_encrypt(&walk, req, false);
+ if (unlikely(err))
+ return err;

- if (crypto_simd_usable()) {
- while (walk.nbytes) {
- u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ kernel_neon_begin();

- if (walk.nbytes == walk.total)
- tail = 0;
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);

- kernel_neon_begin();
- ce_aes_ccm_encrypt(walk.dst.virt.addr,
- walk.src.virt.addr,
- walk.nbytes - tail, ctx->key_enc,
- num_rounds(ctx), mac, walk.iv);
- kernel_neon_end();
+ do {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;

+ if (walk.nbytes == walk.total)
+ tail = 0;
+
+ ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes - tail, ctx->key_enc,
+ num_rounds(ctx), mac, walk.iv);
+
+ if (walk.nbytes == walk.total)
+ ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+ kernel_neon_end();
+
+ if (walk.nbytes) {
err = skcipher_walk_done(&walk, tail);
+ if (unlikely(err))
+ return err;
+ if (unlikely(walk.nbytes))
+ kernel_neon_begin();
}
- if (!err) {
- kernel_neon_begin();
- ce_aes_ccm_final(mac, buf, ctx->key_enc,
- num_rounds(ctx));
- kernel_neon_end();
- }
- } else {
- err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
- }
- if (err)
- return err;
+ } while (walk.nbytes);

/* copy authtag to end of dst */
scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
@@ -291,42 +215,41 @@ static int ccm_decrypt(struct aead_request *req)
if (err)
return err;

- if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
-
/* preserve the original iv for the final round */
memcpy(buf, req->iv, AES_BLOCK_SIZE);

err = skcipher_walk_aead_decrypt(&walk, req, false);
+ if (unlikely(err))
+ return err;

- if (crypto_simd_usable()) {
- while (walk.nbytes) {
- u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ kernel_neon_begin();

- if (walk.nbytes == walk.total)
- tail = 0;
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
+
+ do {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+ if (walk.nbytes == walk.total)
+ tail = 0;
+
+ ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes - tail, ctx->key_enc,
+ num_rounds(ctx), mac, walk.iv);

- kernel_neon_begin();
- ce_aes_ccm_decrypt(walk.dst.virt.addr,
- walk.src.virt.addr,
- walk.nbytes - tail, ctx->key_enc,
- num_rounds(ctx), mac, walk.iv);
- kernel_neon_end();
+ if (walk.nbytes == walk.total)
+ ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));

+ kernel_neon_end();
+
+ if (walk.nbytes) {
err = skcipher_walk_done(&walk, tail);
+ if (unlikely(err))
+ return err;
+ if (unlikely(walk.nbytes))
+ kernel_neon_begin();
}
- if (!err) {
- kernel_neon_begin();
- ce_aes_ccm_final(mac, buf, ctx->key_enc,
- num_rounds(ctx));
- kernel_neon_end();
- }
- } else {
- err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
- }
-
- if (err)
- return err;
+ } while (walk.nbytes);

/* compare calculated auth tag with the stored one */
scatterwalk_map_and_copy(buf, req->src,
--
2.20.1


2021-05-24 21:52:58

by Eric Biggers

[permalink] [raw]
Subject: Re: [PATCH v5 4/5] crypto: arm64/aes-ccm - remove non-SIMD fallback path

On Fri, May 21, 2021 at 12:20:52PM +0200, Ard Biesheuvel wrote:
> AES/CCM on arm64 is implemented as a synchronous AEAD, and so it is
> guaranteed by the API that it is only invoked in task or softirq
> context. Since softirqs are now only handled when the SIMD is not
> being used in the task context that was interrupted to service the
> softirq, we no longer need a fallback path. Let's remove it.
>
> Signed-off-by: Ard Biesheuvel <[email protected]>
> ---
> arch/arm64/crypto/aes-ce-ccm-core.S | 1 +
> arch/arm64/crypto/aes-ce-ccm-glue.c | 181 ++++++--------------
> 2 files changed, 53 insertions(+), 129 deletions(-)

This doesn't just remove the no-SIMD fallback, but it also does some
refactoring. Notably, it starts to process all the authenticated data in one
kernel_neon_begin() / kernel_neon_end() pair rather than many. Can you explain
why that is okay now when previously it wasn't, and also split this into two
separate commits?

- Eric

2021-05-26 09:14:26

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH v5 4/5] crypto: arm64/aes-ccm - remove non-SIMD fallback path

On Mon, 24 May 2021 at 23:51, Eric Biggers <[email protected]> wrote:
>
> On Fri, May 21, 2021 at 12:20:52PM +0200, Ard Biesheuvel wrote:
> > AES/CCM on arm64 is implemented as a synchronous AEAD, and so it is
> > guaranteed by the API that it is only invoked in task or softirq
> > context. Since softirqs are now only handled when the SIMD is not
> > being used in the task context that was interrupted to service the
> > softirq, we no longer need a fallback path. Let's remove it.
> >
> > Signed-off-by: Ard Biesheuvel <[email protected]>
> > ---
> > arch/arm64/crypto/aes-ce-ccm-core.S | 1 +
> > arch/arm64/crypto/aes-ce-ccm-glue.c | 181 ++++++--------------
> > 2 files changed, 53 insertions(+), 129 deletions(-)
>
> This doesn't just remove the no-SIMD fallback, but it also does some
> refactoring. Notably, it starts to process all the authenticated data in one
> kernel_neon_begin() / kernel_neon_end() pair rather than many. Can you explain
> why that is okay now when previously it wasn't, and also split this into two
> separate commits?
>

OK.

For the record, the reason is that, even though kernel_neon_begin/end
are reasonably cheap these days, the common case for CCM (given its
use in networking context) is for the auth/encrypt/finalize routines
to each be called a single time, without any potentially sleeping
calls into the skcipher walk layer in between. Now that we are doing
more work in there (disable softirq processing as well as preemption),
it was a suitable occasion to do some refactoring that I have had on
my list for a while now.