Get rid of pointless indirect calls where the target of the call is decided
at boot and never changes. Also, make the size of the key struct variable,
and only carry the extra keys needed for aggregation when using a version
of the algorithm that makes use of them.
Ard Biesheuvel (5):
crypto: arm64/ghash - drop PMULL based shash
crypto: arm64/gcm - disentangle ghash and gcm setkey() routines
crypto: arm64/gcm - use variably sized key struct
crypto: arm64/gcm - use inline helper to suppress indirect calls
crypto: arm/ghash - use variably sized key struct
arch/arm/crypto/ghash-ce-glue.c | 51 ++--
arch/arm64/crypto/ghash-ce-glue.c | 257 +++++++-------------
2 files changed, 118 insertions(+), 190 deletions(-)
--
2.20.1
There are two ways to implement SIMD accelerated GCM on arm64:
- using the PMULL instructions for carryless 64x64->128 multiplication,
in which case the architecture guarantees that the AES instructions are
available as well, and so we can use the AEAD implementation that combines
both,
- using the PMULL instructions for carryless 8x8->16 bit multiplication,
which is implemented as a shash, and can be combined with any ctr(aes)
implementation by the generic GCM AEAD template driver.
So let's drop the 64x64->128 shash driver, which is never needed for GCM,
and not suitable for use anywhere else.
Signed-off-by: Ard Biesheuvel <[email protected]>
---
arch/arm64/crypto/ghash-ce-glue.c | 90 +++-----------------
1 file changed, 12 insertions(+), 78 deletions(-)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 22831d3b7f62..be63d8b5152c 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -113,12 +113,8 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
/* avoid hogging the CPU for too long */
#define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE)
-static int __ghash_update(struct shash_desc *desc, const u8 *src,
- unsigned int len,
- void (*simd_update)(int blocks, u64 dg[],
- const char *src,
- struct ghash_key const *k,
- const char *head))
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -145,7 +141,7 @@ static int __ghash_update(struct shash_desc *desc, const u8 *src,
ghash_do_update(chunk, ctx->digest, src, key,
partial ? ctx->buf : NULL,
- simd_update);
+ pmull_ghash_update_p8);
blocks -= chunk;
src += chunk * GHASH_BLOCK_SIZE;
@@ -157,19 +153,7 @@ static int __ghash_update(struct shash_desc *desc, const u8 *src,
return 0;
}
-static int ghash_update_p8(struct shash_desc *desc, const u8 *src,
- unsigned int len)
-{
- return __ghash_update(desc, src, len, pmull_ghash_update_p8);
-}
-
-static int ghash_update_p64(struct shash_desc *desc, const u8 *src,
- unsigned int len)
-{
- return __ghash_update(desc, src, len, pmull_ghash_update_p64);
-}
-
-static int ghash_final_p8(struct shash_desc *desc, u8 *dst)
+static int ghash_final(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -189,26 +173,6 @@ static int ghash_final_p8(struct shash_desc *desc, u8 *dst)
return 0;
}
-static int ghash_final_p64(struct shash_desc *desc, u8 *dst)
-{
- struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
- unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
-
- if (partial) {
- struct ghash_key *key = crypto_shash_ctx(desc->tfm);
-
- memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
-
- ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
- pmull_ghash_update_p64);
- }
- put_unaligned_be64(ctx->digest[1], dst);
- put_unaligned_be64(ctx->digest[0], dst + 8);
-
- *ctx = (struct ghash_desc_ctx){};
- return 0;
-}
-
static void ghash_reflect(u64 h[], const be128 *k)
{
u64 carry = be64_to_cpu(k->a) & BIT(63) ? 1 : 0;
@@ -254,7 +218,7 @@ static int ghash_setkey(struct crypto_shash *tfm,
return __ghash_setkey(key, inkey, keylen);
}
-static struct shash_alg ghash_alg[] = {{
+static struct shash_alg ghash_alg = {
.base.cra_name = "ghash",
.base.cra_driver_name = "ghash-neon",
.base.cra_priority = 150,
@@ -264,25 +228,11 @@ static struct shash_alg ghash_alg[] = {{
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
- .update = ghash_update_p8,
- .final = ghash_final_p8,
- .setkey = ghash_setkey,
- .descsize = sizeof(struct ghash_desc_ctx),
-}, {
- .base.cra_name = "ghash",
- .base.cra_driver_name = "ghash-ce",
- .base.cra_priority = 200,
- .base.cra_blocksize = GHASH_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct ghash_key),
- .base.cra_module = THIS_MODULE,
-
- .digestsize = GHASH_DIGEST_SIZE,
- .init = ghash_init,
- .update = ghash_update_p64,
- .final = ghash_final_p64,
+ .update = ghash_update,
+ .final = ghash_final,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
-}};
+};
static int num_rounds(struct crypto_aes_ctx *ctx)
{
@@ -641,37 +591,21 @@ static struct aead_alg gcm_aes_alg = {
static int __init ghash_ce_mod_init(void)
{
- int ret;
-
if (!cpu_have_named_feature(ASIMD))
return -ENODEV;
if (cpu_have_named_feature(PMULL))
- ret = crypto_register_shashes(ghash_alg,
- ARRAY_SIZE(ghash_alg));
- else
- /* only register the first array element */
- ret = crypto_register_shash(ghash_alg);
+ return crypto_register_aead(&gcm_aes_alg);
- if (ret)
- return ret;
-
- if (cpu_have_named_feature(PMULL)) {
- ret = crypto_register_aead(&gcm_aes_alg);
- if (ret)
- crypto_unregister_shashes(ghash_alg,
- ARRAY_SIZE(ghash_alg));
- }
- return ret;
+ return crypto_register_shash(&ghash_alg);
}
static void __exit ghash_ce_mod_exit(void)
{
if (cpu_have_named_feature(PMULL))
- crypto_unregister_shashes(ghash_alg, ARRAY_SIZE(ghash_alg));
+ crypto_unregister_aead(&gcm_aes_alg);
else
- crypto_unregister_shash(ghash_alg);
- crypto_unregister_aead(&gcm_aes_alg);
+ crypto_unregister_shash(&ghash_alg);
}
static const struct cpu_feature ghash_cpu_feature[] = {
--
2.20.1
The remaining ghash implementation does not support aggregation, and so
there is no point in including the precomputed powers of H in the key
struct. So move that into the GCM setkey routine, and get rid of the
shared sub-routine entirely.
Signed-off-by: Ard Biesheuvel <[email protected]>
---
arch/arm64/crypto/ghash-ce-glue.c | 47 +++++++++-----------
1 file changed, 22 insertions(+), 25 deletions(-)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index be63d8b5152c..921fa69b5ded 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -184,29 +184,6 @@ static void ghash_reflect(u64 h[], const be128 *k)
h[1] ^= 0xc200000000000000UL;
}
-static int __ghash_setkey(struct ghash_key *key,
- const u8 *inkey, unsigned int keylen)
-{
- be128 h;
-
- /* needed for the fallback */
- memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
-
- ghash_reflect(key->h, &key->k);
-
- h = key->k;
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h2, &h);
-
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h3, &h);
-
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h4, &h);
-
- return 0;
-}
-
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *inkey, unsigned int keylen)
{
@@ -215,7 +192,11 @@ static int ghash_setkey(struct crypto_shash *tfm,
if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
- return __ghash_setkey(key, inkey, keylen);
+ /* needed for the fallback */
+ memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
+
+ ghash_reflect(key->h, &key->k);
+ return 0;
}
static struct shash_alg ghash_alg = {
@@ -251,6 +232,7 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
{
struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
u8 key[GHASH_BLOCK_SIZE];
+ be128 h;
int ret;
ret = aes_expandkey(&ctx->aes_key, inkey, keylen);
@@ -259,7 +241,22 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){});
- return __ghash_setkey(&ctx->ghash_key, key, sizeof(be128));
+ /* needed for the fallback */
+ memcpy(&ctx->ghash_key.k, key, GHASH_BLOCK_SIZE);
+
+ ghash_reflect(ctx->ghash_key.h, &ctx->ghash_key.k);
+
+ h = ctx->ghash_key.k;
+ gf128mul_lle(&h, &ctx->ghash_key.k);
+ ghash_reflect(ctx->ghash_key.h2, &h);
+
+ gf128mul_lle(&h, &ctx->ghash_key.k);
+ ghash_reflect(ctx->ghash_key.h3, &h);
+
+ gf128mul_lle(&h, &ctx->ghash_key.k);
+ ghash_reflect(ctx->ghash_key.h4, &h);
+
+ return 0;
}
static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
--
2.20.1
Introduce an inline wrapper for ghash_do_update() that incorporates
the indirect call to the asm routine that is passed as an argument,
and keep the non-SIMD fallback code out of line. This ensures that
all references to the function pointer are inlined where the address
is taken, removing the need for any indirect calls to begin with.
Signed-off-by: Ard Biesheuvel <[email protected]>
---
arch/arm64/crypto/ghash-ce-glue.c | 85 +++++++++++---------
1 file changed, 46 insertions(+), 39 deletions(-)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 2ae95dcf648f..da1034867aaa 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -69,36 +69,43 @@ static int ghash_init(struct shash_desc *desc)
}
static void ghash_do_update(int blocks, u64 dg[], const char *src,
- struct ghash_key *key, const char *head,
- void (*simd_update)(int blocks, u64 dg[],
- const char *src,
- u64 const h[][2],
- const char *head))
+ struct ghash_key *key, const char *head)
{
- if (likely(crypto_simd_usable() && simd_update)) {
+ be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
+
+ do {
+ const u8 *in = src;
+
+ if (head) {
+ in = head;
+ blocks++;
+ head = NULL;
+ } else {
+ src += GHASH_BLOCK_SIZE;
+ }
+
+ crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
+ gf128mul_lle(&dst, &key->k);
+ } while (--blocks);
+
+ dg[0] = be64_to_cpu(dst.b);
+ dg[1] = be64_to_cpu(dst.a);
+}
+
+static __always_inline
+void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
+ struct ghash_key *key, const char *head,
+ void (*simd_update)(int blocks, u64 dg[],
+ const char *src,
+ u64 const h[][2],
+ const char *head))
+{
+ if (likely(crypto_simd_usable())) {
kernel_neon_begin();
simd_update(blocks, dg, src, key->h, head);
kernel_neon_end();
} else {
- be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
-
- do {
- const u8 *in = src;
-
- if (head) {
- in = head;
- blocks++;
- head = NULL;
- } else {
- src += GHASH_BLOCK_SIZE;
- }
-
- crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
- gf128mul_lle(&dst, &key->k);
- } while (--blocks);
-
- dg[0] = be64_to_cpu(dst.b);
- dg[1] = be64_to_cpu(dst.a);
+ ghash_do_update(blocks, dg, src, key, head);
}
}
@@ -131,9 +138,9 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
do {
int chunk = min(blocks, MAX_BLOCKS);
- ghash_do_update(chunk, ctx->digest, src, key,
- partial ? ctx->buf : NULL,
- pmull_ghash_update_p8);
+ ghash_do_simd_update(chunk, ctx->digest, src, key,
+ partial ? ctx->buf : NULL,
+ pmull_ghash_update_p8);
blocks -= chunk;
src += chunk * GHASH_BLOCK_SIZE;
@@ -155,8 +162,8 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
- ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
- pmull_ghash_update_p8);
+ ghash_do_simd_update(1, ctx->digest, ctx->buf, key, NULL,
+ pmull_ghash_update_p8);
}
put_unaligned_be64(ctx->digest[1], dst);
put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -280,9 +287,9 @@ static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
int blocks = count / GHASH_BLOCK_SIZE;
- ghash_do_update(blocks, dg, src, &ctx->ghash_key,
- *buf_count ? buf : NULL,
- pmull_ghash_update_p64);
+ ghash_do_simd_update(blocks, dg, src, &ctx->ghash_key,
+ *buf_count ? buf : NULL,
+ pmull_ghash_update_p64);
src += blocks * GHASH_BLOCK_SIZE;
count %= GHASH_BLOCK_SIZE;
@@ -326,8 +333,8 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
if (buf_count) {
memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
- ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL,
- pmull_ghash_update_p64);
+ ghash_do_simd_update(1, dg, buf, &ctx->ghash_key, NULL,
+ pmull_ghash_update_p64);
}
}
@@ -403,7 +410,7 @@ static int gcm_encrypt(struct aead_request *req)
} while (--remaining > 0);
ghash_do_update(blocks, dg, walk.dst.virt.addr,
- &ctx->ghash_key, NULL, NULL);
+ &ctx->ghash_key, NULL);
err = skcipher_walk_done(&walk,
walk.nbytes % AES_BLOCK_SIZE);
@@ -422,7 +429,7 @@ static int gcm_encrypt(struct aead_request *req)
tag = (u8 *)&lengths;
ghash_do_update(1, dg, tag, &ctx->ghash_key,
- walk.nbytes ? buf : NULL, NULL);
+ walk.nbytes ? buf : NULL);
if (walk.nbytes)
err = skcipher_walk_done(&walk, 0);
@@ -507,7 +514,7 @@ static int gcm_decrypt(struct aead_request *req)
u8 *dst = walk.dst.virt.addr;
ghash_do_update(blocks, dg, walk.src.virt.addr,
- &ctx->ghash_key, NULL, NULL);
+ &ctx->ghash_key, NULL);
do {
aes_encrypt(&ctx->aes_key, buf, iv);
@@ -530,7 +537,7 @@ static int gcm_decrypt(struct aead_request *req)
tag = (u8 *)&lengths;
ghash_do_update(1, dg, tag, &ctx->ghash_key,
- walk.nbytes ? buf : NULL, NULL);
+ walk.nbytes ? buf : NULL);
if (walk.nbytes) {
aes_encrypt(&ctx->aes_key, buf, iv);
--
2.20.1
Now that the ghash and gcm drivers are split, we no longer need to allocate
a key struct for the former that carries powers of H that are only used by
the latter. Also, take this opportunity to clean up the code a little bit.
Signed-off-by: Ard Biesheuvel <[email protected]>
---
arch/arm64/crypto/ghash-ce-glue.c | 49 +++++++++-----------
1 file changed, 21 insertions(+), 28 deletions(-)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 921fa69b5ded..2ae95dcf648f 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -31,12 +31,8 @@ MODULE_ALIAS_CRYPTO("ghash");
#define GCM_IV_SIZE 12
struct ghash_key {
- u64 h[2];
- u64 h2[2];
- u64 h3[2];
- u64 h4[2];
-
be128 k;
+ u64 h[][2];
};
struct ghash_desc_ctx {
@@ -51,22 +47,18 @@ struct gcm_aes_ctx {
};
asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+ u64 const h[][2], const char *head);
asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+ u64 const h[][2], const char *head);
asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
- struct ghash_key const *k, u64 dg[],
- u8 ctr[], u32 const rk[], int rounds,
- u8 tag[]);
+ u64 const h[][2], u64 dg[], u8 ctr[],
+ u32 const rk[], int rounds, u8 tag[]);
asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
- struct ghash_key const *k, u64 dg[],
- u8 ctr[], u32 const rk[], int rounds,
- u8 tag[]);
+ u64 const h[][2], u64 dg[], u8 ctr[],
+ u32 const rk[], int rounds, u8 tag[]);
static int ghash_init(struct shash_desc *desc)
{
@@ -80,12 +72,12 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
struct ghash_key *key, const char *head,
void (*simd_update)(int blocks, u64 dg[],
const char *src,
- struct ghash_key const *k,
+ u64 const h[][2],
const char *head))
{
if (likely(crypto_simd_usable() && simd_update)) {
kernel_neon_begin();
- simd_update(blocks, dg, src, key, head);
+ simd_update(blocks, dg, src, key->h, head);
kernel_neon_end();
} else {
be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
@@ -195,7 +187,7 @@ static int ghash_setkey(struct crypto_shash *tfm,
/* needed for the fallback */
memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
- ghash_reflect(key->h, &key->k);
+ ghash_reflect(key->h[0], &key->k);
return 0;
}
@@ -204,7 +196,7 @@ static struct shash_alg ghash_alg = {
.base.cra_driver_name = "ghash-neon",
.base.cra_priority = 150,
.base.cra_blocksize = GHASH_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct ghash_key),
+ .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
.digestsize = GHASH_DIGEST_SIZE,
@@ -244,17 +236,17 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
/* needed for the fallback */
memcpy(&ctx->ghash_key.k, key, GHASH_BLOCK_SIZE);
- ghash_reflect(ctx->ghash_key.h, &ctx->ghash_key.k);
+ ghash_reflect(ctx->ghash_key.h[0], &ctx->ghash_key.k);
h = ctx->ghash_key.k;
gf128mul_lle(&h, &ctx->ghash_key.k);
- ghash_reflect(ctx->ghash_key.h2, &h);
+ ghash_reflect(ctx->ghash_key.h[1], &h);
gf128mul_lle(&h, &ctx->ghash_key.k);
- ghash_reflect(ctx->ghash_key.h3, &h);
+ ghash_reflect(ctx->ghash_key.h[2], &h);
gf128mul_lle(&h, &ctx->ghash_key.k);
- ghash_reflect(ctx->ghash_key.h4, &h);
+ ghash_reflect(ctx->ghash_key.h[3], &h);
return 0;
}
@@ -380,8 +372,8 @@ static int gcm_encrypt(struct aead_request *req)
}
kernel_neon_begin();
- pmull_gcm_encrypt(nbytes, dst, src, &ctx->ghash_key, dg,
- iv, ctx->aes_key.key_enc, nrounds,
+ pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
+ dg, iv, ctx->aes_key.key_enc, nrounds,
tag);
kernel_neon_end();
@@ -494,8 +486,8 @@ static int gcm_decrypt(struct aead_request *req)
}
kernel_neon_begin();
- pmull_gcm_decrypt(nbytes, dst, src, &ctx->ghash_key, dg,
- iv, ctx->aes_key.key_enc, nrounds,
+ pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
+ dg, iv, ctx->aes_key.key_enc, nrounds,
tag);
kernel_neon_end();
@@ -582,7 +574,8 @@ static struct aead_alg gcm_aes_alg = {
.base.cra_driver_name = "gcm-aes-ce",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct gcm_aes_ctx),
+ .base.cra_ctxsize = sizeof(struct gcm_aes_ctx) +
+ 4 * sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
};
--
2.20.1
Of the two versions of GHASH that the ARM driver implements, only one
performs aggregation, and so the other one has no use for the powers
of H to be precomputed, or space to be allocated for them in the key
struct. So make the context size dependent on which version is being
selected, and while at it, use a static key to carry this decision,
and get rid of the function pointer.
Signed-off-by: Ard Biesheuvel <[email protected]>
---
arch/arm/crypto/ghash-ce-glue.c | 51 +++++++++-----------
1 file changed, 24 insertions(+), 27 deletions(-)
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index a00fd329255f..f13401f3e669 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -16,6 +16,7 @@
#include <crypto/gf128mul.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
+#include <linux/jump_label.h>
#include <linux/module.h>
MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
@@ -27,12 +28,8 @@ MODULE_ALIAS_CRYPTO("ghash");
#define GHASH_DIGEST_SIZE 16
struct ghash_key {
- u64 h[2];
- u64 h2[2];
- u64 h3[2];
- u64 h4[2];
-
be128 k;
+ u64 h[][2];
};
struct ghash_desc_ctx {
@@ -46,16 +43,12 @@ struct ghash_async_ctx {
};
asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+ u64 const h[][2], const char *head);
asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+ u64 const h[][2], const char *head);
-static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_p64);
static int ghash_init(struct shash_desc *desc)
{
@@ -70,7 +63,10 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
{
if (likely(crypto_simd_usable())) {
kernel_neon_begin();
- pmull_ghash_update(blocks, dg, src, key, head);
+ if (static_branch_likely(&use_p64))
+ pmull_ghash_update_p64(blocks, dg, src, key->h, head);
+ else
+ pmull_ghash_update_p8(blocks, dg, src, key->h, head);
kernel_neon_end();
} else {
be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
@@ -161,25 +157,26 @@ static int ghash_setkey(struct crypto_shash *tfm,
const u8 *inkey, unsigned int keylen)
{
struct ghash_key *key = crypto_shash_ctx(tfm);
- be128 h;
if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
/* needed for the fallback */
memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
- ghash_reflect(key->h, &key->k);
+ ghash_reflect(key->h[0], &key->k);
- h = key->k;
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h2, &h);
+ if (static_branch_likely(&use_p64)) {
+ be128 h = key->k;
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h3, &h);
+ gf128mul_lle(&h, &key->k);
+ ghash_reflect(key->h[1], &h);
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h4, &h);
+ gf128mul_lle(&h, &key->k);
+ ghash_reflect(key->h[2], &h);
+ gf128mul_lle(&h, &key->k);
+ ghash_reflect(key->h[3], &h);
+ }
return 0;
}
@@ -195,7 +192,7 @@ static struct shash_alg ghash_alg = {
.base.cra_driver_name = "ghash-ce-sync",
.base.cra_priority = 300 - 1,
.base.cra_blocksize = GHASH_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct ghash_key),
+ .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
};
@@ -354,10 +351,10 @@ static int __init ghash_ce_mod_init(void)
if (!(elf_hwcap & HWCAP_NEON))
return -ENODEV;
- if (elf_hwcap2 & HWCAP2_PMULL)
- pmull_ghash_update = pmull_ghash_update_p64;
- else
- pmull_ghash_update = pmull_ghash_update_p8;
+ if (elf_hwcap2 & HWCAP2_PMULL) {
+ ghash_alg.base.cra_ctxsize += 3 * sizeof(u64[2]);
+ static_branch_enable(&use_p64);
+ }
err = crypto_register_shash(&ghash_alg);
if (err)
--
2.20.1
On Mon, Jun 29, 2020 at 09:39:25AM +0200, Ard Biesheuvel wrote:
> Of the two versions of GHASH that the ARM driver implements, only one
> performs aggregation, and so the other one has no use for the powers
> of H to be precomputed, or space to be allocated for them in the key
> struct. So make the context size dependent on which version is being
> selected, and while at it, use a static key to carry this decision,
> and get rid of the function pointer.
>
> Signed-off-by: Ard Biesheuvel <[email protected]>
> ---
> arch/arm/crypto/ghash-ce-glue.c | 51 +++++++++-----------
> 1 file changed, 24 insertions(+), 27 deletions(-)
This introduces some new sparse warnings:
../arch/arm/crypto/ghash-ce-glue.c:67:65: warning: incorrect type in argument 4 (different modifiers)
../arch/arm/crypto/ghash-ce-glue.c:67:65: expected unsigned long long const [usertype] ( *h )[2]
../arch/arm/crypto/ghash-ce-glue.c:67:65: got unsigned long long [usertype] ( * )[2]
../arch/arm/crypto/ghash-ce-glue.c:69:64: warning: incorrect type in argument 4 (different modifiers)
../arch/arm/crypto/ghash-ce-glue.c:69:64: expected unsigned long long const [usertype] ( *h )[2]
../arch/arm/crypto/ghash-ce-glue.c:69:64: got unsigned long long [usertype] ( * )[2]
Thanks,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On Mon, Jun 29, 2020 at 09:39:23AM +0200, Ard Biesheuvel wrote:
> Now that the ghash and gcm drivers are split, we no longer need to allocate
> a key struct for the former that carries powers of H that are only used by
> the latter. Also, take this opportunity to clean up the code a little bit.
>
> Signed-off-by: Ard Biesheuvel <[email protected]>
> ---
> arch/arm64/crypto/ghash-ce-glue.c | 49 +++++++++-----------
> 1 file changed, 21 insertions(+), 28 deletions(-)
sparse doesn't like this patch either.
Thanks,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On Thu, 9 Jul 2020 at 11:22, Herbert Xu <[email protected]> wrote:
>
> On Mon, Jun 29, 2020 at 09:39:25AM +0200, Ard Biesheuvel wrote:
> > Of the two versions of GHASH that the ARM driver implements, only one
> > performs aggregation, and so the other one has no use for the powers
> > of H to be precomputed, or space to be allocated for them in the key
> > struct. So make the context size dependent on which version is being
> > selected, and while at it, use a static key to carry this decision,
> > and get rid of the function pointer.
> >
> > Signed-off-by: Ard Biesheuvel <[email protected]>
> > ---
> > arch/arm/crypto/ghash-ce-glue.c | 51 +++++++++-----------
> > 1 file changed, 24 insertions(+), 27 deletions(-)
>
> This introduces some new sparse warnings:
>
> ../arch/arm/crypto/ghash-ce-glue.c:67:65: warning: incorrect type in argument 4 (different modifiers)
> ../arch/arm/crypto/ghash-ce-glue.c:67:65: expected unsigned long long const [usertype] ( *h )[2]
> ../arch/arm/crypto/ghash-ce-glue.c:67:65: got unsigned long long [usertype] ( * )[2]
> ../arch/arm/crypto/ghash-ce-glue.c:69:64: warning: incorrect type in argument 4 (different modifiers)
> ../arch/arm/crypto/ghash-ce-glue.c:69:64: expected unsigned long long const [usertype] ( *h )[2]
> ../arch/arm/crypto/ghash-ce-glue.c:69:64: got unsigned long long [usertype] ( * )[2]
>
That looks like a sparse bug to me. Since when is it not allowed to
pass a non-const value as a const parameter?
I.e., you can pass a u64[] to a function that takes a u64 const *,
giving the caller the guarantee that their u64[] will not be modified
during the call, even if it is passed by reference.
Here, we are dealing with u64[][2], but the same reasoning holds. A
const u64[][2] formal parameter (or u64 const (*)[2] which comes down
to the same thing) does not require a const argument, it only tells
the caller that the array will be left untouched. This is why the
compiler is perfectly happy with this arrangement.
On Thu, Jul 09, 2020 at 11:51:10AM +0300, Ard Biesheuvel wrote:
>
> That looks like a sparse bug to me. Since when is it not allowed to
> pass a non-const value as a const parameter?
>
> I.e., you can pass a u64[] to a function that takes a u64 const *,
> giving the caller the guarantee that their u64[] will not be modified
> during the call, even if it is passed by reference.
>
> Here, we are dealing with u64[][2], but the same reasoning holds. A
> const u64[][2] formal parameter (or u64 const (*)[2] which comes down
> to the same thing) does not require a const argument, it only tells
> the caller that the array will be left untouched. This is why the
> compiler is perfectly happy with this arrangement.
You're right. Luc, here is the patch that triggers the bogus
warning with sparse.
Thanks,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index a00fd329255f..f13401f3e669 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -16,6 +16,7 @@
#include <crypto/gf128mul.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
+#include <linux/jump_label.h>
#include <linux/module.h>
MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
@@ -27,12 +28,8 @@ MODULE_ALIAS_CRYPTO("ghash");
#define GHASH_DIGEST_SIZE 16
struct ghash_key {
- u64 h[2];
- u64 h2[2];
- u64 h3[2];
- u64 h4[2];
-
be128 k;
+ u64 h[][2];
};
struct ghash_desc_ctx {
@@ -46,16 +43,12 @@ struct ghash_async_ctx {
};
asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+ u64 const h[][2], const char *head);
asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+ u64 const h[][2], const char *head);
-static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_p64);
static int ghash_init(struct shash_desc *desc)
{
@@ -70,7 +63,10 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
{
if (likely(crypto_simd_usable())) {
kernel_neon_begin();
- pmull_ghash_update(blocks, dg, src, key, head);
+ if (static_branch_likely(&use_p64))
+ pmull_ghash_update_p64(blocks, dg, src, key->h, head);
+ else
+ pmull_ghash_update_p8(blocks, dg, src, key->h, head);
kernel_neon_end();
} else {
be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
@@ -161,25 +157,26 @@ static int ghash_setkey(struct crypto_shash *tfm,
const u8 *inkey, unsigned int keylen)
{
struct ghash_key *key = crypto_shash_ctx(tfm);
- be128 h;
if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
/* needed for the fallback */
memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
- ghash_reflect(key->h, &key->k);
+ ghash_reflect(key->h[0], &key->k);
- h = key->k;
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h2, &h);
+ if (static_branch_likely(&use_p64)) {
+ be128 h = key->k;
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h3, &h);
+ gf128mul_lle(&h, &key->k);
+ ghash_reflect(key->h[1], &h);
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h4, &h);
+ gf128mul_lle(&h, &key->k);
+ ghash_reflect(key->h[2], &h);
+ gf128mul_lle(&h, &key->k);
+ ghash_reflect(key->h[3], &h);
+ }
return 0;
}
@@ -195,7 +192,7 @@ static struct shash_alg ghash_alg = {
.base.cra_driver_name = "ghash-ce-sync",
.base.cra_priority = 300 - 1,
.base.cra_blocksize = GHASH_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct ghash_key),
+ .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
};
@@ -354,10 +351,10 @@ static int __init ghash_ce_mod_init(void)
if (!(elf_hwcap & HWCAP_NEON))
return -ENODEV;
- if (elf_hwcap2 & HWCAP2_PMULL)
- pmull_ghash_update = pmull_ghash_update_p64;
- else
- pmull_ghash_update = pmull_ghash_update_p8;
+ if (elf_hwcap2 & HWCAP2_PMULL) {
+ ghash_alg.base.cra_ctxsize += 3 * sizeof(u64[2]);
+ static_branch_enable(&use_p64);
+ }
err = crypto_register_shash(&ghash_alg);
if (err)
On Thu, Jul 09, 2020 at 06:22:58PM +1000, Herbert Xu wrote:
> On Mon, Jun 29, 2020 at 09:39:23AM +0200, Ard Biesheuvel wrote:
> > Now that the ghash and gcm drivers are split, we no longer need to allocate
> > a key struct for the former that carries powers of H that are only used by
> > the latter. Also, take this opportunity to clean up the code a little bit.
> >
> > Signed-off-by: Ard Biesheuvel <[email protected]>
> > ---
> > arch/arm64/crypto/ghash-ce-glue.c | 49 +++++++++-----------
> > 1 file changed, 21 insertions(+), 28 deletions(-)
>
> sparse doesn't like this patch either.
And some of these warnings appear to be genuine.
Cheers,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On Thu, Jul 09, 2020 at 10:12:31PM +1000, Herbert Xu wrote:
> On Thu, Jul 09, 2020 at 06:22:58PM +1000, Herbert Xu wrote:
> > On Mon, Jun 29, 2020 at 09:39:23AM +0200, Ard Biesheuvel wrote:
> > > Now that the ghash and gcm drivers are split, we no longer need to allocate
> > > a key struct for the former that carries powers of H that are only used by
> > > the latter. Also, take this opportunity to clean up the code a little bit.
> > >
> > > Signed-off-by: Ard Biesheuvel <[email protected]>
> > > ---
> > > arch/arm64/crypto/ghash-ce-glue.c | 49 +++++++++-----------
> > > 1 file changed, 21 insertions(+), 28 deletions(-)
> >
> > sparse doesn't like this patch either.
>
> And some of these warnings appear to be genuine.
Nevermind these appear to be preexisting warnings.
Cheers,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On Mon, Jun 29, 2020 at 09:39:20AM +0200, Ard Biesheuvel wrote:
> Get rid of pointless indirect calls where the target of the call is decided
> at boot and never changes. Also, make the size of the key struct variable,
> and only carry the extra keys needed for aggregation when using a version
> of the algorithm that makes use of them.
>
> Ard Biesheuvel (5):
> crypto: arm64/ghash - drop PMULL based shash
> crypto: arm64/gcm - disentangle ghash and gcm setkey() routines
> crypto: arm64/gcm - use variably sized key struct
> crypto: arm64/gcm - use inline helper to suppress indirect calls
> crypto: arm/ghash - use variably sized key struct
>
> arch/arm/crypto/ghash-ce-glue.c | 51 ++--
> arch/arm64/crypto/ghash-ce-glue.c | 257 +++++++-------------
> 2 files changed, 118 insertions(+), 190 deletions(-)
All applied. Thanks.
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On Thu, Jul 09, 2020 at 10:09:37PM +1000, Herbert Xu wrote:
> On Thu, Jul 09, 2020 at 11:51:10AM +0300, Ard Biesheuvel wrote:
> >
> > That looks like a sparse bug to me. Since when is it not allowed to
> > pass a non-const value as a const parameter?
> >
> > I.e., you can pass a u64[] to a function that takes a u64 const *,
> > giving the caller the guarantee that their u64[] will not be modified
> > during the call, even if it is passed by reference.
> >
> > Here, we are dealing with u64[][2], but the same reasoning holds. A
> > const u64[][2] formal parameter (or u64 const (*)[2] which comes down
> > to the same thing) does not require a const argument, it only tells
> > the caller that the array will be left untouched. This is why the
> > compiler is perfectly happy with this arrangement.
>
> You're right. Luc, here is the patch that triggers the bogus
> warning with sparse.
Thanks for the analysis and the bug report.
A fix is under way and should be upstreamed in a few days.
-- Luc