From: Huang Ying Subject: [PATCH -v3 3/3] crypto: Add AES-NI support for more modes Date: Wed, 18 Mar 2009 11:22:02 +0800 Message-ID: <1237346522.24215.242.camel@yhuang-dev.sh.intel.com> Mime-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha1; protocol="application/pgp-signature"; boundary="=-HzbMVqR0yr35Bub+Eejo" Cc: linux-kernel@vger.kernel.org, linux-crypto@vger.kernel.org To: Herbert Xu , Sebastian Andrzej Siewior Return-path: Received: from mga09.intel.com ([134.134.136.24]:48201 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756849AbZCRDWH (ORCPT ); Tue, 17 Mar 2009 23:22:07 -0400 Sender: linux-crypto-owner@vger.kernel.org List-ID: --=-HzbMVqR0yr35Bub+Eejo Content-Type: text/plain Content-Transfer-Encoding: quoted-printable Because kernel_fpu_begin() and kernel_fpu_end() operations are too slow, the performance gain of general mode implementation + aes-aesni is almost all compensated. The AES-NI support for more modes are implemented as follow: - Add a new AES algorithm implementation named __aes-aesni without kernel_fpu_begin/end() - Use fpu((AES)) to provide kenrel_fpu_begin/end() invoking - Add (AES) ablkcipher, which uses cryptd(fpu((AES))) to defer cryption to cryptd context in soft_irq context. Now the ctr, lrw, pcbc and xts support are added. Performance testing based on dm-crypt shows that cryption time can be reduced to 50% of general mode implementation + aes-aesni implementation. v2: Add description of mode acceleration support in Kconfig v3: Fix some bugs of CTR block size, LRW and XTS min/max key size. Signed-off-by: Huang Ying --- arch/x86/crypto/aesni-intel_glue.c | 267 ++++++++++++++++++++++++++++++++= ++++- crypto/Kconfig | 5=20 2 files changed, 271 insertions(+), 1 deletion(-) --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -21,6 +21,22 @@ #include #include =20 +#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) +#define HAS_CTR +#endif + +#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE) +#define HAS_LRW +#endif + +#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) +#define HAS_PCBC +#endif + +#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE) +#define HAS_XTS +#endif + struct async_aes_ctx { struct cryptd_ablkcipher *cryptd_tfm; }; @@ -137,6 +153,41 @@ static struct crypto_alg aesni_alg =3D { } }; =20 +static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx =3D aes_ctx(crypto_tfm_ctx(tfm)); + + aesni_enc(ctx, dst, src); +} + +static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx =3D aes_ctx(crypto_tfm_ctx(tfm)); + + aesni_dec(ctx, dst, src); +} + +static struct crypto_alg __aesni_alg =3D { + .cra_name =3D "__aes-aesni", + .cra_driver_name =3D "__driver-aes-aesni", + .cra_priority =3D 0, + .cra_flags =3D CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize =3D AES_BLOCK_SIZE, + .cra_ctxsize =3D sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask =3D 0, + .cra_module =3D THIS_MODULE, + .cra_list =3D LIST_HEAD_INIT(__aesni_alg.cra_list), + .cra_u =3D { + .cipher =3D { + .cia_min_keysize =3D AES_MIN_KEY_SIZE, + .cia_max_keysize =3D AES_MAX_KEY_SIZE, + .cia_setkey =3D aes_set_key, + .cia_encrypt =3D __aes_encrypt, + .cia_decrypt =3D __aes_decrypt + } + } +}; + static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -277,8 +328,16 @@ static int ablk_set_key(struct crypto_ab unsigned int key_len) { struct async_aes_ctx *ctx =3D crypto_ablkcipher_ctx(tfm); + struct crypto_ablkcipher *child =3D &ctx->cryptd_tfm->base; + int err; =20 - return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len); + crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err =3D crypto_ablkcipher_setkey(child, key, key_len); + crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) + & CRYPTO_TFM_RES_MASK); + return err; } =20 static int ablk_encrypt(struct ablkcipher_request *req) @@ -411,6 +470,163 @@ static struct crypto_alg ablk_cbc_alg =3D=20 }, }; =20 +#ifdef HAS_CTR +static int ablk_ctr_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm =3D cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))", + 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_ctr_alg =3D { + .cra_name =3D "ctr(aes)", + .cra_driver_name =3D "ctr-aes-aesni", + .cra_priority =3D 400, + .cra_flags =3D CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize =3D 1, + .cra_ctxsize =3D sizeof(struct async_aes_ctx), + .cra_alignmask =3D 0, + .cra_type =3D &crypto_ablkcipher_type, + .cra_module =3D THIS_MODULE, + .cra_list =3D LIST_HEAD_INIT(ablk_ctr_alg.cra_list), + .cra_init =3D ablk_ctr_init, + .cra_exit =3D ablk_exit, + .cra_u =3D { + .ablkcipher =3D { + .min_keysize =3D AES_MIN_KEY_SIZE, + .max_keysize =3D AES_MAX_KEY_SIZE, + .ivsize =3D AES_BLOCK_SIZE, + .setkey =3D ablk_set_key, + .encrypt =3D ablk_encrypt, + .decrypt =3D ablk_decrypt, + .geniv =3D "chainiv", + }, + }, +}; +#endif + +#ifdef HAS_LRW +static int ablk_lrw_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm =3D cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))", + 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_lrw_alg =3D { + .cra_name =3D "lrw(aes)", + .cra_driver_name =3D "lrw-aes-aesni", + .cra_priority =3D 400, + .cra_flags =3D CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize =3D AES_BLOCK_SIZE, + .cra_ctxsize =3D sizeof(struct async_aes_ctx), + .cra_alignmask =3D 0, + .cra_type =3D &crypto_ablkcipher_type, + .cra_module =3D THIS_MODULE, + .cra_list =3D LIST_HEAD_INIT(ablk_lrw_alg.cra_list), + .cra_init =3D ablk_lrw_init, + .cra_exit =3D ablk_exit, + .cra_u =3D { + .ablkcipher =3D { + .min_keysize =3D AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, + .max_keysize =3D AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, + .ivsize =3D AES_BLOCK_SIZE, + .setkey =3D ablk_set_key, + .encrypt =3D ablk_encrypt, + .decrypt =3D ablk_decrypt, + }, + }, +}; +#endif + +#ifdef HAS_PCBC +static int ablk_pcbc_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm =3D cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))", + 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_pcbc_alg =3D { + .cra_name =3D "pcbc(aes)", + .cra_driver_name =3D "pcbc-aes-aesni", + .cra_priority =3D 400, + .cra_flags =3D CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize =3D AES_BLOCK_SIZE, + .cra_ctxsize =3D sizeof(struct async_aes_ctx), + .cra_alignmask =3D 0, + .cra_type =3D &crypto_ablkcipher_type, + .cra_module =3D THIS_MODULE, + .cra_list =3D LIST_HEAD_INIT(ablk_pcbc_alg.cra_list), + .cra_init =3D ablk_pcbc_init, + .cra_exit =3D ablk_exit, + .cra_u =3D { + .ablkcipher =3D { + .min_keysize =3D AES_MIN_KEY_SIZE, + .max_keysize =3D AES_MAX_KEY_SIZE, + .ivsize =3D AES_BLOCK_SIZE, + .setkey =3D ablk_set_key, + .encrypt =3D ablk_encrypt, + .decrypt =3D ablk_decrypt, + }, + }, +}; +#endif + +#ifdef HAS_XTS +static int ablk_xts_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm =3D cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))", + 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_xts_alg =3D { + .cra_name =3D "xts(aes)", + .cra_driver_name =3D "xts-aes-aesni", + .cra_priority =3D 400, + .cra_flags =3D CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize =3D AES_BLOCK_SIZE, + .cra_ctxsize =3D sizeof(struct async_aes_ctx), + .cra_alignmask =3D 0, + .cra_type =3D &crypto_ablkcipher_type, + .cra_module =3D THIS_MODULE, + .cra_list =3D LIST_HEAD_INIT(ablk_xts_alg.cra_list), + .cra_init =3D ablk_xts_init, + .cra_exit =3D ablk_exit, + .cra_u =3D { + .ablkcipher =3D { + .min_keysize =3D 2 * AES_MIN_KEY_SIZE, + .max_keysize =3D 2 * AES_MAX_KEY_SIZE, + .ivsize =3D AES_BLOCK_SIZE, + .setkey =3D ablk_set_key, + .encrypt =3D ablk_encrypt, + .decrypt =3D ablk_decrypt, + }, + }, +}; +#endif + static int __init aesni_init(void) { int err; @@ -421,6 +637,8 @@ static int __init aesni_init(void) } if ((err =3D crypto_register_alg(&aesni_alg))) goto aes_err; + if ((err =3D crypto_register_alg(&__aesni_alg))) + goto __aes_err; if ((err =3D crypto_register_alg(&blk_ecb_alg))) goto blk_ecb_err; if ((err =3D crypto_register_alg(&blk_cbc_alg))) @@ -429,9 +647,41 @@ static int __init aesni_init(void) goto ablk_ecb_err; if ((err =3D crypto_register_alg(&ablk_cbc_alg))) goto ablk_cbc_err; +#ifdef HAS_CTR + if ((err =3D crypto_register_alg(&ablk_ctr_alg))) + goto ablk_ctr_err; +#endif +#ifdef HAS_LRW + if ((err =3D crypto_register_alg(&ablk_lrw_alg))) + goto ablk_lrw_err; +#endif +#ifdef HAS_PCBC + if ((err =3D crypto_register_alg(&ablk_pcbc_alg))) + goto ablk_pcbc_err; +#endif +#ifdef HAS_XTS + if ((err =3D crypto_register_alg(&ablk_xts_alg))) + goto ablk_xts_err; +#endif =20 return err; =20 +#ifdef HAS_XTS +ablk_xts_err: +#endif +#ifdef HAS_PCBC + crypto_unregister_alg(&ablk_pcbc_alg); +ablk_pcbc_err: +#endif +#ifdef HAS_LRW + crypto_unregister_alg(&ablk_lrw_alg); +ablk_lrw_err: +#endif +#ifdef HAS_CTR + crypto_unregister_alg(&ablk_ctr_alg); +ablk_ctr_err: +#endif + crypto_unregister_alg(&ablk_cbc_alg); ablk_cbc_err: crypto_unregister_alg(&ablk_ecb_alg); ablk_ecb_err: @@ -439,6 +689,8 @@ ablk_ecb_err: blk_cbc_err: crypto_unregister_alg(&blk_ecb_alg); blk_ecb_err: + crypto_unregister_alg(&__aesni_alg); +__aes_err: crypto_unregister_alg(&aesni_alg); aes_err: return err; @@ -446,10 +698,23 @@ aes_err: =20 static void __exit aesni_exit(void) { +#ifdef HAS_XTS + crypto_unregister_alg(&ablk_xts_alg); +#endif +#ifdef HAS_PCBC + crypto_unregister_alg(&ablk_pcbc_alg); +#endif +#ifdef HAS_LRW + crypto_unregister_alg(&ablk_lrw_alg); +#endif +#ifdef HAS_CTR + crypto_unregister_alg(&ablk_ctr_alg); +#endif crypto_unregister_alg(&ablk_cbc_alg); crypto_unregister_alg(&ablk_ecb_alg); crypto_unregister_alg(&blk_cbc_alg); crypto_unregister_alg(&blk_ecb_alg); + crypto_unregister_alg(&__aesni_alg); crypto_unregister_alg(&aesni_alg); } =20 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -491,6 +491,7 @@ config CRYPTO_AES_NI_INTEL select CRYPTO_AES_X86_64 select CRYPTO_CRYPTD select CRYPTO_ALGAPI + select CRYPTO_FPU help Use Intel AES-NI instructions for AES algorithm. =20 @@ -510,6 +511,10 @@ config CRYPTO_AES_NI_INTEL =20 See for more information. =20 + In addition to AES cipher algorithm support, the + acceleration for some popular block cipher mode is supported + too, including ECB, CBC, CTR, LRW, PCBC, XTS. + config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" select CRYPTO_ALGAPI --=-HzbMVqR0yr35Bub+Eejo Content-Type: application/pgp-signature; name=signature.asc Content-Description: This is a digitally signed message part -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.9 (GNU/Linux) iEYEABECAAYFAknAaNcACgkQKhFGF+eHlpiifQCfdYbJQffEhv4SwSCmqgyAOgjS r2AAn2zFhadqdKCVuILE/h6Ypwz5EEVx =O5z8 -----END PGP SIGNATURE----- --=-HzbMVqR0yr35Bub+Eejo--