Received: by 2002:a25:31c3:0:0:0:0:0 with SMTP id x186csp2437746ybx; Fri, 8 Nov 2019 04:24:26 -0800 (PST) X-Google-Smtp-Source: APXvYqwCD8Ic+PJodx/gEY+K7DHlrvFejAMfHC24Oyad+1frlbmpY5fe8V0gdZDUhSOZ4Ycnc+12 X-Received: by 2002:a17:906:bce5:: with SMTP id op5mr8379815ejb.325.1573215866717; Fri, 08 Nov 2019 04:24:26 -0800 (PST) ARC-Seal: i=1; a=rsa-sha256; t=1573215866; cv=none; d=google.com; s=arc-20160816; b=PA4O2Jzw7GSXszu3VwYVFYtkhp8AZb/gwI/68kRh7XZwmY4bjFyCElUGZkZEgPfCKE /R089vim7njMAsZasEEcf4TpyUgx1p0iJgD9YAP0dwOuYxxZoK1YsTW1he5sFFf9P0Dt NPhtCjlhzyMKK7BBqf6hpSMEBimB7QaKEEe7yRDBsDA6sdZ/BYBpDhbK/QGagzWyO8TW QxNrxyB47fIxo5FYuAK34GMEEwlicCQ1kKVa/wj7n/1y3qCpUS1G+CTjPYjjN1cuoX7f dQnIeU6yWJvJL7qplDWIjAYmvBp2T30fkEbVzKLkTGq7aaFUIraLWwIInsXk+OEg+izY Pj2Q== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=list-id:precedence:sender:content-transfer-encoding:mime-version :references:in-reply-to:message-id:date:subject:cc:to:from :dkim-signature; bh=42XRjwwTCdCF7ZT8oIqibtiDkGL0zHOfx3XmEy96pdg=; b=QLdNvyv1gaXLYwgad3YfrrPKQjZWX7GP3QMtuzyYDjRPatkm/+PpTcJNN62FQX7emH oCJ4QLYcnsDxhVc3TOepzAQC3z11GVYJn5B4VUkf5I8s6IY5w9W762THfq0RuYeH5w1m PjzUr/tGQj98QMvBnSIuXEx8dI0AMDJ7f1DfF1C78kN06/kesaLDReSI2xgEQrQlDGir Hln5TWFtmkO7aJVloljvz1galy02hXG8/1zuxw0YNcX9L2J1UQSrQnimH6oBp1Y7E07X OLcMAEukuv1Ir37gnR51bIkBDkhLmYpbGnRZWGGhCEY0u/NOy5Y4vH8vN331722Uh3ib 0AWg== ARC-Authentication-Results: i=1; mx.google.com; dkim=pass header.i=@kernel.org header.s=default header.b=hUaEhsMM; spf=pass (google.com: best guess record for domain of linux-crypto-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) smtp.mailfrom=linux-crypto-owner@vger.kernel.org; dmarc=pass (p=NONE sp=NONE dis=NONE) header.from=kernel.org Return-Path: Received: from vger.kernel.org (vger.kernel.org. [209.132.180.67]) by mx.google.com with ESMTP id i9si2801807ejc.266.2019.11.08.04.24.02; Fri, 08 Nov 2019 04:24:26 -0800 (PST) Received-SPF: pass (google.com: best guess record for domain of linux-crypto-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67; Authentication-Results: mx.google.com; dkim=pass header.i=@kernel.org header.s=default header.b=hUaEhsMM; spf=pass (google.com: best guess record for domain of linux-crypto-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) smtp.mailfrom=linux-crypto-owner@vger.kernel.org; dmarc=pass (p=NONE sp=NONE dis=NONE) header.from=kernel.org Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732241AbfKHMXN (ORCPT + 99 others); Fri, 8 Nov 2019 07:23:13 -0500 Received: from mail.kernel.org ([198.145.29.99]:37734 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727598AbfKHMXN (ORCPT ); Fri, 8 Nov 2019 07:23:13 -0500 Received: from localhost.localdomain (laubervilliers-657-1-83-120.w92-154.abo.wanadoo.fr [92.154.90.120]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id C455C21D7E; Fri, 8 Nov 2019 12:23:09 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1573215792; bh=RtKVtJGNdQ7rb7Xkl3C5Z4EtLccIsTyEgY2LQ5jU4Ig=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=hUaEhsMMx+iDZeup9kzzecHa/u1wSxXoInX0YSj54z+GxhhO/DX7UTtHTKeOvZ+/d G/M2xststMU/zRDzDOAUOPbgpiBhR+8PItkdNvEIFWNSdAWdAduqvPjRDmFFUw3oez DQByUQ53vTYTvx2LMieDtAFm8Dw70+9dElnQxdaE= From: Ard Biesheuvel To: linux-crypto@vger.kernel.org Cc: Ard Biesheuvel , Herbert Xu , David Miller , "Jason A . Donenfeld" , Samuel Neves , Arnd Bergmann , Eric Biggers , Andy Lutomirski , Martin Willi , Rene van Dorst , David Sterba Subject: [PATCH v5 04/34] crypto: x86/chacha - expose SIMD ChaCha routine as library function Date: Fri, 8 Nov 2019 13:22:10 +0100 Message-Id: <20191108122240.28479-5-ardb@kernel.org> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20191108122240.28479-1-ardb@kernel.org> References: <20191108122240.28479-1-ardb@kernel.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: linux-crypto-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-crypto@vger.kernel.org Wire the existing x86 SIMD ChaCha code into the new ChaCha library interface, so that users of the library interface will get the accelerated version when available. Given that calls into the library API will always go through the routines in this module if it is enabled, switch to static keys to select the optimal implementation available (which may be none at all, in which case we defer to the generic implementation for all invocations). Signed-off-by: Ard Biesheuvel --- arch/x86/crypto/chacha_glue.c | 91 ++++++++++++++------ crypto/Kconfig | 1 + include/crypto/chacha.h | 6 ++ 3 files changed, 73 insertions(+), 25 deletions(-) diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index 0aabb382edce..b391e13a9e41 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds); -#ifdef CONFIG_AS_AVX2 + asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); -static bool chacha_use_avx2; -#ifdef CONFIG_AS_AVX512 + asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, unsigned int len, int nrounds); -static bool chacha_use_avx512vl; -#endif -#endif + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) { @@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds) { -#ifdef CONFIG_AS_AVX2 -#ifdef CONFIG_AS_AVX512 - if (chacha_use_avx512vl) { + if (IS_ENABLED(CONFIG_AS_AVX512) && + static_branch_likely(&chacha_use_avx512vl)) { while (bytes >= CHACHA_BLOCK_SIZE * 8) { chacha_8block_xor_avx512vl(state, dst, src, bytes, nrounds); @@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, return; } } -#endif - if (chacha_use_avx2) { + + if (IS_ENABLED(CONFIG_AS_AVX2) && + static_branch_likely(&chacha_use_avx2)) { while (bytes >= CHACHA_BLOCK_SIZE * 8) { chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); bytes -= CHACHA_BLOCK_SIZE * 8; @@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, return; } } -#endif + while (bytes >= CHACHA_BLOCK_SIZE * 4) { chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); bytes -= CHACHA_BLOCK_SIZE * 4; @@ -123,6 +123,43 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, } } +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) +{ + state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); + + if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) { + hchacha_block_generic(state, stream, nrounds); + } else { + kernel_fpu_begin(); + hchacha_block_ssse3(state, stream, nrounds); + kernel_fpu_end(); + } +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) +{ + state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); + + chacha_init_generic(state, key, iv); +} +EXPORT_SYMBOL(chacha_init_arch); + +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, + int nrounds) +{ + state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); + + if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() || + bytes <= CHACHA_BLOCK_SIZE) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + kernel_fpu_begin(); + chacha_dosimd(state, dst, src, bytes, nrounds); + kernel_fpu_end(); +} +EXPORT_SYMBOL(chacha_crypt_arch); + static int chacha_simd_stream_xor(struct skcipher_request *req, const struct chacha_ctx *ctx, const u8 *iv) { @@ -143,7 +180,8 @@ static int chacha_simd_stream_xor(struct skcipher_request *req, if (nbytes < walk.total) nbytes = round_down(nbytes, walk.stride); - if (!crypto_simd_usable()) { + if (!static_branch_likely(&chacha_use_simd) || + !crypto_simd_usable()) { chacha_crypt_generic(state, walk.dst.virt.addr, walk.src.virt.addr, nbytes, ctx->nrounds); @@ -246,18 +284,21 @@ static struct skcipher_alg algs[] = { static int __init chacha_simd_mod_init(void) { if (!boot_cpu_has(X86_FEATURE_SSSE3)) - return -ENODEV; - -#ifdef CONFIG_AS_AVX2 - chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); -#ifdef CONFIG_AS_AVX512 - chacha_use_avx512vl = chacha_use_avx2 && - boot_cpu_has(X86_FEATURE_AVX512VL) && - boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */ -#endif -#endif + return 0; + + static_branch_enable(&chacha_use_simd); + + if (IS_ENABLED(CONFIG_AS_AVX2) && + boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { + static_branch_enable(&chacha_use_avx2); + + if (IS_ENABLED(CONFIG_AS_AVX512) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ + static_branch_enable(&chacha_use_avx512vl); + } return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); } diff --git a/crypto/Kconfig b/crypto/Kconfig index 1abca30ed6ae..07762de1237f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1437,6 +1437,7 @@ config CRYPTO_CHACHA20_X86_64 depends on X86 && 64BIT select CRYPTO_SKCIPHER select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA help SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20, XChaCha20, and XChaCha12 stream ciphers. diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index 5c662f8fecac..2676f4fbd4c1 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -25,6 +25,12 @@ #define CHACHA_BLOCK_SIZE 64 #define CHACHAPOLY_IV_SIZE 12 +#ifdef CONFIG_X86_64 +#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32)) +#else +#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32)) +#endif + /* 192-bit nonce, then 64-bit stream position */ #define XCHACHA_IV_SIZE 32 -- 2.20.1