2020-11-29 18:23:41

by Ard Biesheuvel

[permalink] [raw]
Subject: [PATCH] crypto: aesni - add ccm(aes) algorithm implementation

From: Steve deRosier <[email protected]>

Add ccm(aes) implementation from linux-wireless mailing list (see
http://permalink.gmane.org/gmane.linux.kernel.wireless.general/126679).

This eliminates FPU context store/restore overhead existing in more
general ccm_base(ctr(aes-aesni),aes-aesni) case in MAC calculation.

Suggested-by: Ben Greear <[email protected]>
Co-developed-by: Steve deRosier <[email protected]>
Signed-off-by: Steve deRosier <[email protected]>
Signed-off-by: Ard Biesheuvel <[email protected]>
---
Ben,

This is almost a rewrite of the original patch, switching to the new
skcipher API, using the existing SIMD helper, and drop numerous unrelated
changes. The basic approach is almost identical, though, so I expect this
to perform on par or perhaps slightly faster than the original.

Could you please confirm with some numbers?

Thanks,
Ard.


arch/x86/crypto/aesni-intel_glue.c | 310 ++++++++++++++++++++
1 file changed, 310 insertions(+)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index ad8a7188a2bf..f59f3c8772a6 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -513,6 +513,298 @@ static int ctr_crypt(struct skcipher_request *req)
return err;
}

+static int aesni_ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+ return aes_set_key_common(crypto_aead_tfm(tfm), ctx, in_key, key_len);
+}
+
+static int aesni_ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ if ((authsize & 1) || authsize < 4)
+ return -EINVAL;
+ return 0;
+}
+
+static int ccm_set_msg_len(u8 *block, unsigned int msglen, int csize)
+{
+ __be32 data;
+
+ memset(block, 0, csize);
+ block += csize;
+
+ if (csize >= 4)
+ csize = 4;
+ else if (msglen > (1 << (8 * csize)))
+ return -EOVERFLOW;
+
+ data = cpu_to_be32(msglen);
+ memcpy(block - csize, (u8 *)&data + 4 - csize, csize);
+
+ return 0;
+}
+
+static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ __be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
+ u32 l = req->iv[0] + 1;
+
+ /* verify that CCM dimension 'L' is set correctly in the IV */
+ if (l < 2 || l > 8)
+ return -EINVAL;
+
+ /* verify that msglen can in fact be represented in L bytes */
+ if (l < 4 && msglen >> (8 * l))
+ return -EOVERFLOW;
+
+ /*
+ * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
+ * uses a u32 type to represent msglen so the top 4 bytes are always 0.
+ */
+ n[0] = 0;
+ n[1] = cpu_to_be32(msglen);
+
+ memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
+
+ /*
+ * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
+ * - bits 0..2 : max # of bytes required to represent msglen, minus 1
+ * (already set by caller)
+ * - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
+ * - bit 6 : indicates presence of authenticate-only data
+ */
+ maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
+ if (req->assoclen)
+ maciv[0] |= 0x40;
+
+ memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
+ return ccm_set_msg_len(maciv + AES_BLOCK_SIZE - l, msglen, l);
+}
+
+static int compute_mac(struct crypto_aes_ctx *ctx, u8 mac[], u8 *data, int n,
+ unsigned int ilen, u8 *idata)
+{
+ unsigned int bs = AES_BLOCK_SIZE;
+ u8 *odata = mac;
+ int datalen, getlen;
+
+ datalen = n;
+
+ /* first time in here, block may be partially filled. */
+ getlen = bs - ilen;
+ if (datalen >= getlen) {
+ memcpy(idata + ilen, data, getlen);
+
+ aesni_cbc_enc(ctx, odata, idata, AES_BLOCK_SIZE, odata);
+
+ datalen -= getlen;
+ data += getlen;
+ ilen = 0;
+ }
+
+ /* now encrypt rest of data */
+ while (datalen >= bs) {
+ aesni_cbc_enc(ctx, odata, data, AES_BLOCK_SIZE, odata);
+
+ datalen -= bs;
+ data += bs;
+ }
+
+ /* check and see if there's leftover data that wasn't
+ * enough to fill a block.
+ */
+ if (datalen) {
+ memcpy(idata + ilen, data, datalen);
+ ilen += datalen;
+ }
+ return ilen;
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req,
+ struct crypto_aes_ctx *ctx, u8 mac[],
+ struct scatterlist *src)
+{
+ unsigned int len = req->assoclen;
+ struct scatter_walk walk;
+ u8 idata[AES_BLOCK_SIZE];
+ unsigned int ilen;
+ struct {
+ __be16 l;
+ __be32 h;
+ } __packed *ltag = (void *)idata;
+
+ /* prepend the AAD with a length tag */
+ if (len < 0xff00) {
+ ltag->l = cpu_to_be16(len);
+ ilen = 2;
+ } else {
+ ltag->l = cpu_to_be16(0xfffe);
+ ltag->h = cpu_to_be32(len);
+ ilen = 6;
+ }
+
+ scatterwalk_start(&walk, src);
+
+ while (len) {
+ u8 *src;
+ int n;
+
+ n = scatterwalk_clamp(&walk, len);
+ if (!n) {
+ scatterwalk_start(&walk, sg_next(walk.sg));
+ n = scatterwalk_clamp(&walk, len);
+ }
+ src = scatterwalk_map(&walk);
+
+ ilen = compute_mac(ctx, mac, src, n, ilen, idata);
+ len -= n;
+
+ scatterwalk_unmap(src);
+ scatterwalk_advance(&walk, n);
+ scatterwalk_done(&walk, 0, len);
+ }
+
+ /* any leftover needs padding and then encrypted */
+ if (ilen) {
+ crypto_xor(mac, idata, ilen);
+ aesni_enc(ctx, mac, mac);
+ }
+}
+
+static int aesni_ccm_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_aead_ctx(aead));
+ u8 __aligned(8) mac[AES_BLOCK_SIZE];
+ struct skcipher_walk walk;
+ u32 l = req->iv[0] + 1;
+ int err;
+
+ err = ccm_init_mac(req, mac, req->cryptlen);
+ if (err)
+ return err;
+
+ kernel_fpu_begin();
+
+ aesni_enc(ctx, mac, mac);
+
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, ctx, mac, req->src);
+
+ req->iv[AES_BLOCK_SIZE - 1] = 0x1;
+ err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int len = walk.nbytes & AES_BLOCK_MASK;
+ int n;
+
+ for (n = 0; n < len; n += AES_BLOCK_SIZE)
+ aesni_cbc_enc(ctx, mac, walk.src.virt.addr + n,
+ AES_BLOCK_SIZE, mac);
+
+ aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, len,
+ walk.iv);
+
+ err = skcipher_walk_done(&walk, walk.nbytes & ~AES_BLOCK_MASK);
+ }
+ if (walk.nbytes) {
+ u8 __aligned(8) buf[AES_BLOCK_SIZE] = {};
+
+ memcpy(buf, walk.src.virt.addr, walk.nbytes);
+ aesni_cbc_enc(ctx, mac, buf, AES_BLOCK_SIZE, mac);
+
+ ctr_crypt_final(ctx, &walk);
+
+ err = skcipher_walk_done(&walk, 0);
+ }
+
+ if (err)
+ goto fail;
+
+ memset(walk.iv + AES_BLOCK_SIZE - l, 0, l);
+ aesni_ctr_enc(ctx, mac, mac, AES_BLOCK_SIZE, walk.iv);
+
+ /* copy authtag to end of dst */
+ scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
+ crypto_aead_authsize(aead), 1);
+
+fail:
+ kernel_fpu_end();
+ return err;
+}
+
+static int aesni_ccm_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_aead_ctx(aead));
+ unsigned int authsize = crypto_aead_authsize(aead);
+ u8 __aligned(8) mac[AES_BLOCK_SIZE];
+ u8 __aligned(8) tag[AES_BLOCK_SIZE];
+ struct skcipher_walk walk;
+ u32 l = req->iv[0] + 1;
+ int err;
+
+ err = ccm_init_mac(req, mac, req->cryptlen - authsize);
+ if (err)
+ return err;
+
+ /* copy authtag from end of src */
+ scatterwalk_map_and_copy(tag, req->src,
+ req->assoclen + req->cryptlen - authsize,
+ authsize, 0);
+
+ kernel_fpu_begin();
+
+ aesni_enc(ctx, mac, mac);
+
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, ctx, mac, req->src);
+
+ req->iv[AES_BLOCK_SIZE - 1] = 0x1;
+ err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int len = walk.nbytes & AES_BLOCK_MASK;
+ int n;
+
+ aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, len,
+ walk.iv);
+
+ for (n = 0; n < len; n += AES_BLOCK_SIZE)
+ aesni_cbc_enc(ctx, mac, walk.dst.virt.addr + n,
+ AES_BLOCK_SIZE, mac);
+
+ err = skcipher_walk_done(&walk, walk.nbytes & ~AES_BLOCK_MASK);
+ }
+ if (walk.nbytes) {
+ u8 __aligned(8) buf[AES_BLOCK_SIZE] = {};
+
+ ctr_crypt_final(ctx, &walk);
+
+ memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+ aesni_cbc_enc(ctx, mac, buf, AES_BLOCK_SIZE, mac);
+
+ err = skcipher_walk_done(&walk, 0);
+ }
+
+ if (err)
+ goto fail;
+
+ memset(walk.iv + AES_BLOCK_SIZE - l, 0, l);
+ aesni_ctr_enc(ctx, mac, mac, AES_BLOCK_SIZE, walk.iv);
+
+ /* compare calculated auth tag with the stored one */
+ if (crypto_memneq(mac, tag, authsize))
+ err = -EBADMSG;
+
+fail:
+ kernel_fpu_end();
+ return err;
+}
+
static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
@@ -1044,6 +1336,24 @@ static struct aead_alg aesni_aeads[] = { {
.cra_alignmask = AESNI_ALIGN - 1,
.cra_module = THIS_MODULE,
},
+}, {
+ .setkey = aesni_ccm_setkey,
+ .setauthsize = aesni_ccm_setauthsize,
+ .encrypt = aesni_ccm_encrypt,
+ .decrypt = aesni_ccm_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .base = {
+ .cra_name = "__ccm(aes)",
+ .cra_driver_name = "__ccm-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = AESNI_ALIGN - 1,
+ .cra_module = THIS_MODULE,
+ },
} };
#else
static struct aead_alg aesni_aeads[0];
--
2.17.1


2020-11-29 18:24:12

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH] crypto: aesni - add ccm(aes) algorithm implementation

On Sun, 29 Nov 2020 at 19:20, Ard Biesheuvel <[email protected]> wrote:
>
> From: Steve deRosier <[email protected]>
>

Whoops - please ignore this line.

> Add ccm(aes) implementation from linux-wireless mailing list (see
> http://permalink.gmane.org/gmane.linux.kernel.wireless.general/126679).
>
> This eliminates FPU context store/restore overhead existing in more
> general ccm_base(ctr(aes-aesni),aes-aesni) case in MAC calculation.
>
> Suggested-by: Ben Greear <[email protected]>
> Co-developed-by: Steve deRosier <[email protected]>
> Signed-off-by: Steve deRosier <[email protected]>
> Signed-off-by: Ard Biesheuvel <[email protected]>
> ---
> Ben,
>
> This is almost a rewrite of the original patch, switching to the new
> skcipher API, using the existing SIMD helper, and drop numerous unrelated
> changes. The basic approach is almost identical, though, so I expect this
> to perform on par or perhaps slightly faster than the original.
>
> Could you please confirm with some numbers?
>
> Thanks,
> Ard.
>
>
> arch/x86/crypto/aesni-intel_glue.c | 310 ++++++++++++++++++++
> 1 file changed, 310 insertions(+)
>
> diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
> index ad8a7188a2bf..f59f3c8772a6 100644
> --- a/arch/x86/crypto/aesni-intel_glue.c
> +++ b/arch/x86/crypto/aesni-intel_glue.c
> @@ -513,6 +513,298 @@ static int ctr_crypt(struct skcipher_request *req)
> return err;
> }
>
> +static int aesni_ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
> + unsigned int key_len)
> +{
> + struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
> +
> + return aes_set_key_common(crypto_aead_tfm(tfm), ctx, in_key, key_len);
> +}
> +
> +static int aesni_ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
> +{
> + if ((authsize & 1) || authsize < 4)
> + return -EINVAL;
> + return 0;
> +}
> +
> +static int ccm_set_msg_len(u8 *block, unsigned int msglen, int csize)
> +{
> + __be32 data;
> +
> + memset(block, 0, csize);
> + block += csize;
> +
> + if (csize >= 4)
> + csize = 4;
> + else if (msglen > (1 << (8 * csize)))
> + return -EOVERFLOW;
> +
> + data = cpu_to_be32(msglen);
> + memcpy(block - csize, (u8 *)&data + 4 - csize, csize);
> +
> + return 0;
> +}
> +
> +static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
> +{
> + struct crypto_aead *aead = crypto_aead_reqtfm(req);
> + __be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
> + u32 l = req->iv[0] + 1;
> +
> + /* verify that CCM dimension 'L' is set correctly in the IV */
> + if (l < 2 || l > 8)
> + return -EINVAL;
> +
> + /* verify that msglen can in fact be represented in L bytes */
> + if (l < 4 && msglen >> (8 * l))
> + return -EOVERFLOW;
> +
> + /*
> + * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
> + * uses a u32 type to represent msglen so the top 4 bytes are always 0.
> + */
> + n[0] = 0;
> + n[1] = cpu_to_be32(msglen);
> +
> + memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
> +
> + /*
> + * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
> + * - bits 0..2 : max # of bytes required to represent msglen, minus 1
> + * (already set by caller)
> + * - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
> + * - bit 6 : indicates presence of authenticate-only data
> + */
> + maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
> + if (req->assoclen)
> + maciv[0] |= 0x40;
> +
> + memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
> + return ccm_set_msg_len(maciv + AES_BLOCK_SIZE - l, msglen, l);
> +}
> +
> +static int compute_mac(struct crypto_aes_ctx *ctx, u8 mac[], u8 *data, int n,
> + unsigned int ilen, u8 *idata)
> +{
> + unsigned int bs = AES_BLOCK_SIZE;
> + u8 *odata = mac;
> + int datalen, getlen;
> +
> + datalen = n;
> +
> + /* first time in here, block may be partially filled. */
> + getlen = bs - ilen;
> + if (datalen >= getlen) {
> + memcpy(idata + ilen, data, getlen);
> +
> + aesni_cbc_enc(ctx, odata, idata, AES_BLOCK_SIZE, odata);
> +
> + datalen -= getlen;
> + data += getlen;
> + ilen = 0;
> + }
> +
> + /* now encrypt rest of data */
> + while (datalen >= bs) {
> + aesni_cbc_enc(ctx, odata, data, AES_BLOCK_SIZE, odata);
> +
> + datalen -= bs;
> + data += bs;
> + }
> +
> + /* check and see if there's leftover data that wasn't
> + * enough to fill a block.
> + */
> + if (datalen) {
> + memcpy(idata + ilen, data, datalen);
> + ilen += datalen;
> + }
> + return ilen;
> +}
> +
> +static void ccm_calculate_auth_mac(struct aead_request *req,
> + struct crypto_aes_ctx *ctx, u8 mac[],
> + struct scatterlist *src)
> +{
> + unsigned int len = req->assoclen;
> + struct scatter_walk walk;
> + u8 idata[AES_BLOCK_SIZE];
> + unsigned int ilen;
> + struct {
> + __be16 l;
> + __be32 h;
> + } __packed *ltag = (void *)idata;
> +
> + /* prepend the AAD with a length tag */
> + if (len < 0xff00) {
> + ltag->l = cpu_to_be16(len);
> + ilen = 2;
> + } else {
> + ltag->l = cpu_to_be16(0xfffe);
> + ltag->h = cpu_to_be32(len);
> + ilen = 6;
> + }
> +
> + scatterwalk_start(&walk, src);
> +
> + while (len) {
> + u8 *src;
> + int n;
> +
> + n = scatterwalk_clamp(&walk, len);
> + if (!n) {
> + scatterwalk_start(&walk, sg_next(walk.sg));
> + n = scatterwalk_clamp(&walk, len);
> + }
> + src = scatterwalk_map(&walk);
> +
> + ilen = compute_mac(ctx, mac, src, n, ilen, idata);
> + len -= n;
> +
> + scatterwalk_unmap(src);
> + scatterwalk_advance(&walk, n);
> + scatterwalk_done(&walk, 0, len);
> + }
> +
> + /* any leftover needs padding and then encrypted */
> + if (ilen) {
> + crypto_xor(mac, idata, ilen);
> + aesni_enc(ctx, mac, mac);
> + }
> +}
> +
> +static int aesni_ccm_encrypt(struct aead_request *req)
> +{
> + struct crypto_aead *aead = crypto_aead_reqtfm(req);
> + struct crypto_aes_ctx *ctx = aes_ctx(crypto_aead_ctx(aead));
> + u8 __aligned(8) mac[AES_BLOCK_SIZE];
> + struct skcipher_walk walk;
> + u32 l = req->iv[0] + 1;
> + int err;
> +
> + err = ccm_init_mac(req, mac, req->cryptlen);
> + if (err)
> + return err;
> +
> + kernel_fpu_begin();
> +
> + aesni_enc(ctx, mac, mac);
> +
> + if (req->assoclen)
> + ccm_calculate_auth_mac(req, ctx, mac, req->src);
> +
> + req->iv[AES_BLOCK_SIZE - 1] = 0x1;
> + err = skcipher_walk_aead_encrypt(&walk, req, true);
> +
> + while (walk.nbytes >= AES_BLOCK_SIZE) {
> + int len = walk.nbytes & AES_BLOCK_MASK;
> + int n;
> +
> + for (n = 0; n < len; n += AES_BLOCK_SIZE)
> + aesni_cbc_enc(ctx, mac, walk.src.virt.addr + n,
> + AES_BLOCK_SIZE, mac);
> +
> + aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, len,
> + walk.iv);
> +
> + err = skcipher_walk_done(&walk, walk.nbytes & ~AES_BLOCK_MASK);
> + }
> + if (walk.nbytes) {
> + u8 __aligned(8) buf[AES_BLOCK_SIZE] = {};
> +
> + memcpy(buf, walk.src.virt.addr, walk.nbytes);
> + aesni_cbc_enc(ctx, mac, buf, AES_BLOCK_SIZE, mac);
> +
> + ctr_crypt_final(ctx, &walk);
> +
> + err = skcipher_walk_done(&walk, 0);
> + }
> +
> + if (err)
> + goto fail;
> +
> + memset(walk.iv + AES_BLOCK_SIZE - l, 0, l);
> + aesni_ctr_enc(ctx, mac, mac, AES_BLOCK_SIZE, walk.iv);
> +
> + /* copy authtag to end of dst */
> + scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
> + crypto_aead_authsize(aead), 1);
> +
> +fail:
> + kernel_fpu_end();
> + return err;
> +}
> +
> +static int aesni_ccm_decrypt(struct aead_request *req)
> +{
> + struct crypto_aead *aead = crypto_aead_reqtfm(req);
> + struct crypto_aes_ctx *ctx = aes_ctx(crypto_aead_ctx(aead));
> + unsigned int authsize = crypto_aead_authsize(aead);
> + u8 __aligned(8) mac[AES_BLOCK_SIZE];
> + u8 __aligned(8) tag[AES_BLOCK_SIZE];
> + struct skcipher_walk walk;
> + u32 l = req->iv[0] + 1;
> + int err;
> +
> + err = ccm_init_mac(req, mac, req->cryptlen - authsize);
> + if (err)
> + return err;
> +
> + /* copy authtag from end of src */
> + scatterwalk_map_and_copy(tag, req->src,
> + req->assoclen + req->cryptlen - authsize,
> + authsize, 0);
> +
> + kernel_fpu_begin();
> +
> + aesni_enc(ctx, mac, mac);
> +
> + if (req->assoclen)
> + ccm_calculate_auth_mac(req, ctx, mac, req->src);
> +
> + req->iv[AES_BLOCK_SIZE - 1] = 0x1;
> + err = skcipher_walk_aead_decrypt(&walk, req, true);
> +
> + while (walk.nbytes >= AES_BLOCK_SIZE) {
> + int len = walk.nbytes & AES_BLOCK_MASK;
> + int n;
> +
> + aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, len,
> + walk.iv);
> +
> + for (n = 0; n < len; n += AES_BLOCK_SIZE)
> + aesni_cbc_enc(ctx, mac, walk.dst.virt.addr + n,
> + AES_BLOCK_SIZE, mac);
> +
> + err = skcipher_walk_done(&walk, walk.nbytes & ~AES_BLOCK_MASK);
> + }
> + if (walk.nbytes) {
> + u8 __aligned(8) buf[AES_BLOCK_SIZE] = {};
> +
> + ctr_crypt_final(ctx, &walk);
> +
> + memcpy(buf, walk.dst.virt.addr, walk.nbytes);
> + aesni_cbc_enc(ctx, mac, buf, AES_BLOCK_SIZE, mac);
> +
> + err = skcipher_walk_done(&walk, 0);
> + }
> +
> + if (err)
> + goto fail;
> +
> + memset(walk.iv + AES_BLOCK_SIZE - l, 0, l);
> + aesni_ctr_enc(ctx, mac, mac, AES_BLOCK_SIZE, walk.iv);
> +
> + /* compare calculated auth tag with the stored one */
> + if (crypto_memneq(mac, tag, authsize))
> + err = -EBADMSG;
> +
> +fail:
> + kernel_fpu_end();
> + return err;
> +}
> +
> static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
> unsigned int keylen)
> {
> @@ -1044,6 +1336,24 @@ static struct aead_alg aesni_aeads[] = { {
> .cra_alignmask = AESNI_ALIGN - 1,
> .cra_module = THIS_MODULE,
> },
> +}, {
> + .setkey = aesni_ccm_setkey,
> + .setauthsize = aesni_ccm_setauthsize,
> + .encrypt = aesni_ccm_encrypt,
> + .decrypt = aesni_ccm_decrypt,
> + .ivsize = AES_BLOCK_SIZE,
> + .chunksize = AES_BLOCK_SIZE,
> + .maxauthsize = AES_BLOCK_SIZE,
> + .base = {
> + .cra_name = "__ccm(aes)",
> + .cra_driver_name = "__ccm-aesni",
> + .cra_priority = 400,
> + .cra_flags = CRYPTO_ALG_INTERNAL,
> + .cra_blocksize = 1,
> + .cra_ctxsize = sizeof(struct crypto_aes_ctx),
> + .cra_alignmask = AESNI_ALIGN - 1,
> + .cra_module = THIS_MODULE,
> + },
> } };
> #else
> static struct aead_alg aesni_aeads[0];
> --
> 2.17.1
>

2020-11-30 23:13:54

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH] crypto: aesni - add ccm(aes) algorithm implementation

On 11/29/20 10:20 AM, Ard Biesheuvel wrote:
> From: Steve deRosier <[email protected]>
>
> Add ccm(aes) implementation from linux-wireless mailing list (see
> http://permalink.gmane.org/gmane.linux.kernel.wireless.general/126679).
>
> This eliminates FPU context store/restore overhead existing in more
> general ccm_base(ctr(aes-aesni),aes-aesni) case in MAC calculation.
>
> Suggested-by: Ben Greear <[email protected]>
> Co-developed-by: Steve deRosier <[email protected]>
> Signed-off-by: Steve deRosier <[email protected]>
> Signed-off-by: Ard Biesheuvel <[email protected]>
> ---
> Ben,
>
> This is almost a rewrite of the original patch, switching to the new
> skcipher API, using the existing SIMD helper, and drop numerous unrelated
> changes. The basic approach is almost identical, though, so I expect this
> to perform on par or perhaps slightly faster than the original.
>
> Could you please confirm with some numbers?

I tried this on my apu2 platform, here is perf top during a TCP download using
rx-sw-crypt (ie, the aesni cpu decrypt path):

18.77% [kernel] [k] acpi_idle_enter
14.68% [kernel] [k] kernel_fpu_begin
4.45% [kernel] [k] __crypto_xor
3.46% [kernel] [k] _aesni_enc1

Total throughput is 127Mbps or so. This is with your patch applied to 5.8.0+
kernel (it applied clean with 'git am')

Is there a good way to verify at runtime that I've properly applied your patch?

On my 5.4 kernel with the old version of the patch installed, I see 253Mbps throughput,
and perf-top shows:

13.33% [kernel] [k] acpi_idle_do_entry
9.21% [kernel] [k] _aesni_enc1
4.49% [unknown] [.] 0x00007fbc3f00adb6
4.34% [unknown] [.] 0x00007fbc3f00adba
3.85% [kernel] [k] memcpy


So, new patch is not working that well for me...

Thanks,
Ben


--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com

2020-12-01 06:35:19

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH] crypto: aesni - add ccm(aes) algorithm implementation

On Mon, 30 Nov 2020 at 23:48, Ben Greear <[email protected]> wrote:
>
> On 11/29/20 10:20 AM, Ard Biesheuvel wrote:
> > From: Steve deRosier <[email protected]>
> >
> > Add ccm(aes) implementation from linux-wireless mailing list (see
> > http://permalink.gmane.org/gmane.linux.kernel.wireless.general/126679).
> >
> > This eliminates FPU context store/restore overhead existing in more
> > general ccm_base(ctr(aes-aesni),aes-aesni) case in MAC calculation.
> >
> > Suggested-by: Ben Greear <[email protected]>
> > Co-developed-by: Steve deRosier <[email protected]>
> > Signed-off-by: Steve deRosier <[email protected]>
> > Signed-off-by: Ard Biesheuvel <[email protected]>
> > ---
> > Ben,
> >
> > This is almost a rewrite of the original patch, switching to the new
> > skcipher API, using the existing SIMD helper, and drop numerous unrelated
> > changes. The basic approach is almost identical, though, so I expect this
> > to perform on par or perhaps slightly faster than the original.
> >
> > Could you please confirm with some numbers?
>
> I tried this on my apu2 platform, here is perf top during a TCP download using
> rx-sw-crypt (ie, the aesni cpu decrypt path):
>
> 18.77% [kernel] [k] acpi_idle_enter
> 14.68% [kernel] [k] kernel_fpu_begin
> 4.45% [kernel] [k] __crypto_xor
> 3.46% [kernel] [k] _aesni_enc1
>
> Total throughput is 127Mbps or so. This is with your patch applied to 5.8.0+
> kernel (it applied clean with 'git am')
>
> Is there a good way to verify at runtime that I've properly applied your patch?
>
> On my 5.4 kernel with the old version of the patch installed, I see 253Mbps throughput,
> and perf-top shows:
>
> 13.33% [kernel] [k] acpi_idle_do_entry
> 9.21% [kernel] [k] _aesni_enc1
> 4.49% [unknown] [.] 0x00007fbc3f00adb6
> 4.34% [unknown] [.] 0x00007fbc3f00adba
> 3.85% [kernel] [k] memcpy
>
>
> So, new patch is not working that well for me...
>

That is odd. The net number of invocations of kernel_fpu_begin()
should be the same, so I cannot explain why they suddenly take more
time. I am starting to think that this is a different issue
altogether.

One thing that you could try is dropping the '.cra_alignmask' line as
we don't actually need it, but I am skeptical that this is the cause
of this.

2020-12-01 14:28:49

by Ben Greear

[permalink] [raw]
Subject: Re: [PATCH] crypto: aesni - add ccm(aes) algorithm implementation

On 11/30/20 10:32 PM, Ard Biesheuvel wrote:
> On Mon, 30 Nov 2020 at 23:48, Ben Greear <[email protected]> wrote:
>>
>> On 11/29/20 10:20 AM, Ard Biesheuvel wrote:
>>> From: Steve deRosier <[email protected]>
>>>
>>> Add ccm(aes) implementation from linux-wireless mailing list (see
>>> http://permalink.gmane.org/gmane.linux.kernel.wireless.general/126679).
>>>
>>> This eliminates FPU context store/restore overhead existing in more
>>> general ccm_base(ctr(aes-aesni),aes-aesni) case in MAC calculation.
>>>
>>> Suggested-by: Ben Greear <[email protected]>
>>> Co-developed-by: Steve deRosier <[email protected]>
>>> Signed-off-by: Steve deRosier <[email protected]>
>>> Signed-off-by: Ard Biesheuvel <[email protected]>
>>> ---
>>> Ben,
>>>
>>> This is almost a rewrite of the original patch, switching to the new
>>> skcipher API, using the existing SIMD helper, and drop numerous unrelated
>>> changes. The basic approach is almost identical, though, so I expect this
>>> to perform on par or perhaps slightly faster than the original.
>>>
>>> Could you please confirm with some numbers?
>>
>> I tried this on my apu2 platform, here is perf top during a TCP download using
>> rx-sw-crypt (ie, the aesni cpu decrypt path):
>>
>> 18.77% [kernel] [k] acpi_idle_enter
>> 14.68% [kernel] [k] kernel_fpu_begin
>> 4.45% [kernel] [k] __crypto_xor
>> 3.46% [kernel] [k] _aesni_enc1
>>
>> Total throughput is 127Mbps or so. This is with your patch applied to 5.8.0+
>> kernel (it applied clean with 'git am')
>>
>> Is there a good way to verify at runtime that I've properly applied your patch?
>>
>> On my 5.4 kernel with the old version of the patch installed, I see 253Mbps throughput,
>> and perf-top shows:
>>
>> 13.33% [kernel] [k] acpi_idle_do_entry
>> 9.21% [kernel] [k] _aesni_enc1
>> 4.49% [unknown] [.] 0x00007fbc3f00adb6
>> 4.34% [unknown] [.] 0x00007fbc3f00adba
>> 3.85% [kernel] [k] memcpy
>>
>>
>> So, new patch is not working that well for me...
>>
>
> That is odd. The net number of invocations of kernel_fpu_begin()
> should be the same, so I cannot explain why they suddenly take more
> time. I am starting to think that this is a different issue
> altogether.
>
> One thing that you could try is dropping the '.cra_alignmask' line as
> we don't actually need it, but I am skeptical that this is the cause
> of this.

Here is tcrypt output from the 5.8 kernel with your patch:

testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption
[54886.223056] test 0 (160 bit key, 16 byte blocks):
[54887.222241] 723747 operations in 1 seconds (11579952 bytes)
[54887.222274] test 1 (160 bit key, 64 byte blocks):
[54888.222216] 676632 operations in 1 seconds (43304448 bytes)
[54888.222251] test 2 (160 bit key, 256 byte blocks):
[54889.222178] 485715 operations in 1 seconds (124343040 bytes)
[54889.222197] test 3 (160 bit key, 512 byte blocks):
[54890.222169] 355708 operations in 1 seconds (182122496 bytes)
[54890.222188] test 4 (160 bit key, 1024 byte blocks):
[54891.222190] 237094 operations in 1 seconds (242784256 bytes)
[54891.222210] test 5 (160 bit key, 2048 byte blocks):
[54892.222169] 151576 operations in 1 seconds (310427648 bytes)
[54892.222189] test 6 (160 bit key, 4096 byte blocks):
[54893.222182] 89871 operations in 1 seconds (368111616 bytes)
[54893.222230] test 7 (160 bit key, 8192 byte blocks):
[54894.222144] 47446 operations in 1 seconds (388677632 bytes)
[54894.232292]
testing speed of gcm(aes) (generic-gcm-aesni) encryption
[54894.232310] test 0 (128 bit key, 16 byte blocks):
[54895.232121] 744793 operations in 1 seconds (11916688 bytes)
[54895.232139] test 1 (128 bit key, 64 byte blocks):
[54896.232147] 693209 operations in 1 seconds (44365376 bytes)
[54896.232166] test 2 (128 bit key, 256 byte blocks):
[54897.232108] 494839 operations in 1 seconds (126678784 bytes)
[54897.232127] test 3 (128 bit key, 512 byte blocks):
[54898.232129] 356805 operations in 1 seconds (182684160 bytes)
[54898.232148] test 4 (128 bit key, 1024 byte blocks):
[54899.232093] 238977 operations in 1 seconds (244712448 bytes)
[54899.232112] test 5 (128 bit key, 2048 byte blocks):
[54900.232086] 151400 operations in 1 seconds (310067200 bytes)
[54900.232107] test 6 (128 bit key, 4096 byte blocks):
[54901.232080] 88499 operations in 1 seconds (362491904 bytes)
[54901.232128] test 7 (128 bit key, 8192 byte blocks):
[54902.232073] 46508 operations in 1 seconds (380993536 bytes)
[54902.232093] test 8 (192 bit key, 16 byte blocks):
[54903.232055] 734289 operations in 1 seconds (11748624 bytes)
[54903.232074] test 9 (192 bit key, 64 byte blocks):
[54904.232046] 676257 operations in 1 seconds (43280448 bytes)
[54904.232066] test 10 (192 bit key, 256 byte blocks):
[54905.232037] 480367 operations in 1 seconds (122973952 bytes)
[54905.232057] test 11 (192 bit key, 512 byte blocks):
[54906.232028] 344775 operations in 1 seconds (176524800 bytes)
[54906.232048] test 12 (192 bit key, 1024 byte blocks):
[54907.232021] 246743 operations in 1 seconds (252664832 bytes)
[54907.232041] test 13 (192 bit key, 2048 byte blocks):
[54908.232013] 149042 operations in 1 seconds (305238016 bytes)
[54908.232033] test 14 (192 bit key, 4096 byte blocks):
[54909.232034] 83689 operations in 1 seconds (342790144 bytes)
[54909.232053] test 15 (192 bit key, 8192 byte blocks):
[54910.232004] 43424 operations in 1 seconds (355729408 bytes)
[54910.232042] test 16 (256 bit key, 16 byte blocks):
[54911.232030] 720990 operations in 1 seconds (11535840 bytes)
[54911.232050] test 17 (256 bit key, 64 byte blocks):
[54912.232006] 666866 operations in 1 seconds (42679424 bytes)
[54912.232054] test 18 (256 bit key, 256 byte blocks):
[54913.231997] 459305 operations in 1 seconds (117582080 bytes)
[54913.232018] test 19 (256 bit key, 512 byte blocks):
[54914.231958] 322779 operations in 1 seconds (165262848 bytes)
[54914.231979] test 20 (256 bit key, 1024 byte blocks):
[54915.231970] 229525 operations in 1 seconds (235033600 bytes)
[54915.231990] test 21 (256 bit key, 2048 byte blocks):
[54916.231975] 137955 operations in 1 seconds (282531840 bytes)
[54916.231995] test 22 (256 bit key, 4096 byte blocks):
[54917.231998] 75876 operations in 1 seconds (310788096 bytes)
[54917.232035] test 23 (256 bit key, 8192 byte blocks):
[54918.231938] 39803 operations in 1 seconds (326066176 bytes)
[54918.232046]
testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) decryption
[54918.232060] test 0 (160 bit key, 16 byte blocks):
[54919.231914] 711193 operations in 1 seconds (11379088 bytes)
[54919.231933] test 1 (160 bit key, 64 byte blocks):
[54920.231912] 683171 operations in 1 seconds (43722944 bytes)
[54920.231932] test 2 (160 bit key, 256 byte blocks):
[54921.231926] 490569 operations in 1 seconds (125585664 bytes)
[54921.231946] test 3 (160 bit key, 512 byte blocks):
[54922.231904] 354731 operations in 1 seconds (181622272 bytes)
[54922.231938] test 4 (160 bit key, 1024 byte blocks):
[54923.231879] 236161 operations in 1 seconds (241828864 bytes)
[54923.231930] test 5 (160 bit key, 2048 byte blocks):
[54924.231897] 148859 operations in 1 seconds (304863232 bytes)
[54924.231917] test 6 (160 bit key, 4096 byte blocks):
[54925.231866] 87114 operations in 1 seconds (356818944 bytes)
[54925.231885] test 7 (160 bit key, 8192 byte blocks):
[54926.231888] 46273 operations in 1 seconds (379068416 bytes)
[54926.232049]
testing speed of gcm(aes) (generic-gcm-aesni) decryption
[54926.232064] test 0 (128 bit key, 16 byte blocks):
[54927.231841] 743417 operations in 1 seconds (11894672 bytes)
[54927.231892] test 1 (128 bit key, 64 byte blocks):
[54928.231832] 708360 operations in 1 seconds (45335040 bytes)
[54928.231851] test 2 (128 bit key, 256 byte blocks):
[54929.231853] 501092 operations in 1 seconds (128279552 bytes)
[54929.231872] test 3 (128 bit key, 512 byte blocks):
[54930.231830] 362779 operations in 1 seconds (185742848 bytes)
[54930.231848] test 4 (128 bit key, 1024 byte blocks):
[54931.231808] 238285 operations in 1 seconds (244003840 bytes)
[54931.231828] test 5 (128 bit key, 2048 byte blocks):
[54932.231800] 149171 operations in 1 seconds (305502208 bytes)
[54932.231849] test 6 (128 bit key, 4096 byte blocks):
[54933.231821] 87536 operations in 1 seconds (358547456 bytes)
[54933.231841] test 7 (128 bit key, 8192 byte blocks):
[54934.231783] 46091 operations in 1 seconds (377577472 bytes)
[54934.231803] test 8 (192 bit key, 16 byte blocks):
[54935.231773] 730135 operations in 1 seconds (11682160 bytes)
[54935.231792] test 9 (192 bit key, 64 byte blocks):
[54936.231762] 694952 operations in 1 seconds (44476928 bytes)
[54936.231782] test 10 (192 bit key, 256 byte blocks):
[54937.231754] 479033 operations in 1 seconds (122632448 bytes)
[54937.231774] test 11 (192 bit key, 512 byte blocks):
[54938.231747] 339268 operations in 1 seconds (173705216 bytes)
[54938.231767] test 12 (192 bit key, 1024 byte blocks):
[54939.231744] 216619 operations in 1 seconds (221817856 bytes)
[54939.231763] test 13 (192 bit key, 2048 byte blocks):
[54940.231758] 136358 operations in 1 seconds (279261184 bytes)
[54940.231778] test 14 (192 bit key, 4096 byte blocks):
[54941.231719] 79845 operations in 1 seconds (327045120 bytes)
[54941.231756] test 15 (192 bit key, 8192 byte blocks):
[54942.231740] 42121 operations in 1 seconds (345055232 bytes)
[54942.231761] test 16 (256 bit key, 16 byte blocks):
[54943.231712] 718082 operations in 1 seconds (11489312 bytes)
[54943.231733] test 17 (256 bit key, 64 byte blocks):
[54944.231691] 677413 operations in 1 seconds (43354432 bytes)
[54944.231711] test 18 (256 bit key, 256 byte blocks):
[54945.231683] 463746 operations in 1 seconds (118718976 bytes)
[54945.231703] test 19 (256 bit key, 512 byte blocks):
[54946.231710] 321881 operations in 1 seconds (164803072 bytes)
[54946.231744] test 20 (256 bit key, 1024 byte blocks):
[54947.231667] 224947 operations in 1 seconds (230345728 bytes)
[54947.231687] test 21 (256 bit key, 2048 byte blocks):
[54948.231661] 136130 operations in 1 seconds (278794240 bytes)
[54948.231681] test 22 (256 bit key, 4096 byte blocks):
[54949.231667] 75775 operations in 1 seconds (310374400 bytes)
[54949.231701] test 23 (256 bit key, 8192 byte blocks):
[54950.231677] 39429 operations in 1 seconds (323002368 bytes)


And here is 5.4 with the old patch:

testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption
[ 189.151375] test 0 (160 bit key, 16 byte blocks):
[ 190.150706] 813049 operations in 1 seconds (13008784 bytes)
[ 190.150725] test 1 (160 bit key, 64 byte blocks):
[ 191.150708] 774554 operations in 1 seconds (49571456 bytes)
[ 191.150726] test 2 (160 bit key, 256 byte blocks):
[ 192.150714] 532955 operations in 1 seconds (136436480 bytes)
[ 192.150732] test 3 (160 bit key, 512 byte blocks):
[ 193.150663] 376599 operations in 1 seconds (192818688 bytes)
[ 193.150681] test 4 (160 bit key, 1024 byte blocks):
[ 194.150655] 262476 operations in 1 seconds (268775424 bytes)
[ 194.150703] test 5 (160 bit key, 2048 byte blocks):
[ 195.150673] 160616 operations in 1 seconds (328941568 bytes)
[ 195.150693] test 6 (160 bit key, 4096 byte blocks):
[ 196.150667] 90413 operations in 1 seconds (370331648 bytes)
[ 196.150687] test 7 (160 bit key, 8192 byte blocks):
[ 197.150658] 47446 operations in 1 seconds (388677632 bytes)
[ 197.150783]
testing speed of gcm(aes) (generic-gcm-aesni) encryption
[ 197.150797] test 0 (128 bit key, 16 byte blocks):
[ 198.150641] 851015 operations in 1 seconds (13616240 bytes)
[ 198.150660] test 1 (128 bit key, 64 byte blocks):
[ 199.150629] 815656 operations in 1 seconds (52201984 bytes)
[ 199.150648] test 2 (128 bit key, 256 byte blocks):
[ 200.150617] 553263 operations in 1 seconds (141635328 bytes)
[ 200.150675] test 3 (128 bit key, 512 byte blocks):
[ 201.150611] 386949 operations in 1 seconds (198117888 bytes)
[ 201.150660] test 4 (128 bit key, 1024 byte blocks):
[ 202.150601] 268681 operations in 1 seconds (275129344 bytes)
[ 202.150635] test 5 (128 bit key, 2048 byte blocks):
[ 203.150588] 162482 operations in 1 seconds (332763136 bytes)
[ 203.150607] test 6 (128 bit key, 4096 byte blocks):
[ 204.150549] 92852 operations in 1 seconds (380321792 bytes)
[ 204.150569] test 7 (128 bit key, 8192 byte blocks):
[ 205.150571] 48214 operations in 1 seconds (394969088 bytes)
[ 205.150592] test 8 (192 bit key, 16 byte blocks):
[ 206.150526] 832863 operations in 1 seconds (13325808 bytes)
[ 206.150546] test 9 (192 bit key, 64 byte blocks):
[ 207.150545] 784489 operations in 1 seconds (50207296 bytes)
[ 207.150566] test 10 (192 bit key, 256 byte blocks):
[ 208.150506] 530243 operations in 1 seconds (135742208 bytes)
[ 208.150526] test 11 (192 bit key, 512 byte blocks):
[ 209.150506] 366099 operations in 1 seconds (187442688 bytes)
[ 209.150532] test 12 (192 bit key, 1024 byte blocks):
[ 210.150488] 250462 operations in 1 seconds (256473088 bytes)
[ 210.150509] test 13 (192 bit key, 2048 byte blocks):
[ 211.150486] 151644 operations in 1 seconds (310566912 bytes)
[ 211.150507] test 14 (192 bit key, 4096 byte blocks):
[ 212.150474] 84226 operations in 1 seconds (344989696 bytes)
[ 212.150494] test 15 (192 bit key, 8192 byte blocks):
[ 213.150560] 43609 operations in 1 seconds (357244928 bytes)
[ 213.150581] test 16 (256 bit key, 16 byte blocks):
[ 214.150445] 804817 operations in 1 seconds (12877072 bytes)
[ 214.150464] test 17 (256 bit key, 64 byte blocks):
[ 215.150447] 764872 operations in 1 seconds (48951808 bytes)
[ 215.150467] test 18 (256 bit key, 256 byte blocks):
[ 216.150451] 501522 operations in 1 seconds (128389632 bytes)
[ 216.150471] test 19 (256 bit key, 512 byte blocks):
[ 217.150463] 339614 operations in 1 seconds (173882368 bytes)
[ 217.150495] test 20 (256 bit key, 1024 byte blocks):
[ 218.150406] 238889 operations in 1 seconds (244622336 bytes)
[ 218.150426] test 21 (256 bit key, 2048 byte blocks):
[ 219.150406] 141513 operations in 1 seconds (289818624 bytes)
[ 219.150426] test 22 (256 bit key, 4096 byte blocks):
[ 220.150432] 77995 operations in 1 seconds (319467520 bytes)
[ 220.150453] test 23 (256 bit key, 8192 byte blocks):
[ 221.150410] 40279 operations in 1 seconds (329965568 bytes)
[ 221.150546]
testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) decryption
[ 221.150561] test 0 (160 bit key, 16 byte blocks):
[ 222.150393] 758689 operations in 1 seconds (12139024 bytes)
[ 222.150426] test 1 (160 bit key, 64 byte blocks):
[ 223.150351] 599877 operations in 1 seconds (38392128 bytes)
[ 223.150399] test 2 (160 bit key, 256 byte blocks):
[ 224.150339] 453279 operations in 1 seconds (116039424 bytes)
[ 224.150360] test 3 (160 bit key, 512 byte blocks):
[ 225.150367] 332659 operations in 1 seconds (170321408 bytes)
[ 225.150392] test 4 (160 bit key, 1024 byte blocks):
[ 226.150375] 258949 operations in 1 seconds (265163776 bytes)
[ 226.150394] test 5 (160 bit key, 2048 byte blocks):
[ 227.150345] 157536 operations in 1 seconds (322633728 bytes)
[ 227.150382] test 6 (160 bit key, 4096 byte blocks):
[ 228.150341] 89150 operations in 1 seconds (365158400 bytes)
[ 228.150360] test 7 (160 bit key, 8192 byte blocks):
[ 229.150291] 46679 operations in 1 seconds (382394368 bytes)
[ 229.150420]
testing speed of gcm(aes) (generic-gcm-aesni) decryption
[ 229.150435] test 0 (128 bit key, 16 byte blocks):
[ 230.150312] 784010 operations in 1 seconds (12544160 bytes)
[ 230.150331] test 1 (128 bit key, 64 byte blocks):
[ 231.150271] 616765 operations in 1 seconds (39472960 bytes)
[ 231.150290] test 2 (128 bit key, 256 byte blocks):
[ 232.150251] 456053 operations in 1 seconds (116749568 bytes)
[ 232.150271] test 3 (128 bit key, 512 byte blocks):
[ 233.150245] 339125 operations in 1 seconds (173632000 bytes)
[ 233.150264] test 4 (128 bit key, 1024 byte blocks):
[ 234.150251] 260288 operations in 1 seconds (266534912 bytes)
[ 234.150300] test 5 (128 bit key, 2048 byte blocks):
[ 235.150225] 158126 operations in 1 seconds (323842048 bytes)
[ 235.150245] test 6 (128 bit key, 4096 byte blocks):
[ 236.150203] 89756 operations in 1 seconds (367640576 bytes)
[ 236.150222] test 7 (128 bit key, 8192 byte blocks):
[ 237.150238] 46408 operations in 1 seconds (380174336 bytes)
[ 237.150258] test 8 (192 bit key, 16 byte blocks):
[ 238.150185] 767710 operations in 1 seconds (12283360 bytes)
[ 238.150204] test 9 (192 bit key, 64 byte blocks):
[ 239.150223] 602290 operations in 1 seconds (38546560 bytes)
[ 239.150243] test 10 (192 bit key, 256 byte blocks):
[ 240.150156] 440038 operations in 1 seconds (112649728 bytes)
[ 240.150177] test 11 (192 bit key, 512 byte blocks):
[ 241.150144] 321800 operations in 1 seconds (164761600 bytes)
[ 241.150164] test 12 (192 bit key, 1024 byte blocks):
[ 242.150137] 213059 operations in 1 seconds (218172416 bytes)
[ 242.150186] test 13 (192 bit key, 2048 byte blocks):
[ 243.150119] 134641 operations in 1 seconds (275744768 bytes)
[ 243.150138] test 14 (192 bit key, 4096 byte blocks):
[ 244.150110] 78540 operations in 1 seconds (321699840 bytes)
[ 244.150131] test 15 (192 bit key, 8192 byte blocks):
[ 245.150124] 41604 operations in 1 seconds (340819968 bytes)
[ 245.150144] test 16 (256 bit key, 16 byte blocks):
[ 246.150143] 749367 operations in 1 seconds (11989872 bytes)
[ 246.150179] test 17 (256 bit key, 64 byte blocks):
[ 247.150101] 584427 operations in 1 seconds (37403328 bytes)
[ 247.150121] test 18 (256 bit key, 256 byte blocks):
[ 248.150087] 427519 operations in 1 seconds (109444864 bytes)
[ 248.150107] test 19 (256 bit key, 512 byte blocks):
[ 249.150046] 309171 operations in 1 seconds (158295552 bytes)
[ 249.150065] test 20 (256 bit key, 1024 byte blocks):
[ 250.150058] 236908 operations in 1 seconds (242593792 bytes)
[ 250.150078] test 21 (256 bit key, 2048 byte blocks):
[ 251.150027] 139251 operations in 1 seconds (285186048 bytes)
[ 251.150048] test 22 (256 bit key, 4096 byte blocks):
[ 252.150066] 76453 operations in 1 seconds (313151488 bytes)
[ 252.150118] test 23 (256 bit key, 8192 byte blocks):
[ 253.150039] 39852 operations in 1 seconds (326467584 bytes)


Thanks,
Ben

--
Ben Greear <[email protected]>
Candela Technologies Inc http://www.candelatech.com