From: Daniel Axtens Subject: [PATCH 3/4] crypto: powerpc - Add CRC-T10DIF acceleration Date: Wed, 15 Mar 2017 23:37:36 +1100 Message-ID: <20170315123737.20234-3-dja@axtens.net> References: <20170315123737.20234-1-dja@axtens.net> Cc: anton@samba.org, Daniel Axtens To: linuxppc-dev@lists.ozlabs.org, linux-crypto@vger.kernel.org Return-path: Received: from mail-io0-f194.google.com ([209.85.223.194]:33003 "EHLO mail-io0-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751014AbdCOMiZ (ORCPT ); Wed, 15 Mar 2017 08:38:25 -0400 Received: by mail-io0-f194.google.com with SMTP id f84so2839174ioj.0 for ; Wed, 15 Mar 2017 05:38:24 -0700 (PDT) In-Reply-To: <20170315123737.20234-1-dja@axtens.net> Sender: linux-crypto-owner@vger.kernel.org List-ID: T10DIF is a CRC16 used heavily in NVMe. It turns out we can accelerate it with a CRC32 library and a few little tricks. Provide the accelerator based the refactored CRC32 code. Cc: Anton Blanchard Thanks-to: Hong Bo Peng Signed-off-by: Daniel Axtens --- arch/powerpc/crypto/Makefile | 2 + arch/powerpc/crypto/crct10dif-vpmsum_asm.S | 850 ++++++++++++++++++++++++++++ arch/powerpc/crypto/crct10dif-vpmsum_glue.c | 125 ++++ crypto/Kconfig | 9 + 4 files changed, 986 insertions(+) create mode 100644 arch/powerpc/crypto/crct10dif-vpmsum_asm.S create mode 100644 arch/powerpc/crypto/crct10dif-vpmsum_glue.c diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 87f40454bad3..e66aaf19764d 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o md5-ppc-y := md5-asm.o md5-glue.o @@ -17,3 +18,4 @@ sha1-powerpc-y := sha1-powerpc-asm.o sha1.o sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o +crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S new file mode 100644 index 000000000000..5e3d81a0af1b --- /dev/null +++ b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S @@ -0,0 +1,850 @@ +/* + * Calculate a CRC T10DIF with vpmsum acceleration + * + * Constants generated by crc32-vpmsum, available at + * https://github.com/antonblanchard/crc32-vpmsum + * + * crc32-vpmsum is + * Copyright (C) 2015 Anton Blanchard , IBM + * and is available under the GPL v2 or later. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + .section .rodata +.balign 16 + +.byteswap_constant: + /* byte reverse permute constant */ + .octa 0x0F0E0D0C0B0A09080706050403020100 + +.constants: + + /* Reduce 262144 kbits to 1024 bits */ + /* x^261184 mod p(x), x^261120 mod p(x) */ + .octa 0x0000000056d300000000000052550000 + + /* x^260160 mod p(x), x^260096 mod p(x) */ + .octa 0x00000000ee67000000000000a1e40000 + + /* x^259136 mod p(x), x^259072 mod p(x) */ + .octa 0x0000000060830000000000004ad10000 + + /* x^258112 mod p(x), x^258048 mod p(x) */ + .octa 0x000000008cfe0000000000009ab40000 + + /* x^257088 mod p(x), x^257024 mod p(x) */ + .octa 0x000000003e93000000000000fdb50000 + + /* x^256064 mod p(x), x^256000 mod p(x) */ + .octa 0x000000003c2000000000000045480000 + + /* x^255040 mod p(x), x^254976 mod p(x) */ + .octa 0x00000000b1fc0000000000008d690000 + + /* x^254016 mod p(x), x^253952 mod p(x) */ + .octa 0x00000000f82b00000000000024ad0000 + + /* x^252992 mod p(x), x^252928 mod p(x) */ + .octa 0x0000000044420000000000009f1a0000 + + /* x^251968 mod p(x), x^251904 mod p(x) */ + .octa 0x00000000e88c00000000000066ec0000 + + /* x^250944 mod p(x), x^250880 mod p(x) */ + .octa 0x00000000385c000000000000c87d0000 + + /* x^249920 mod p(x), x^249856 mod p(x) */ + .octa 0x000000003227000000000000c8ff0000 + + /* x^248896 mod p(x), x^248832 mod p(x) */ + .octa 0x00000000a9a900000000000033440000 + + /* x^247872 mod p(x), x^247808 mod p(x) */ + .octa 0x00000000abaa00000000000066eb0000 + + /* x^246848 mod p(x), x^246784 mod p(x) */ + .octa 0x000000001ac3000000000000c4ef0000 + + /* x^245824 mod p(x), x^245760 mod p(x) */ + .octa 0x0000000063f000000000000056f30000 + + /* x^244800 mod p(x), x^244736 mod p(x) */ + .octa 0x0000000032cc00000000000002050000 + + /* x^243776 mod p(x), x^243712 mod p(x) */ + .octa 0x00000000f8b5000000000000568e0000 + + /* x^242752 mod p(x), x^242688 mod p(x) */ + .octa 0x000000008db100000000000064290000 + + /* x^241728 mod p(x), x^241664 mod p(x) */ + .octa 0x0000000059ca0000000000006b660000 + + /* x^240704 mod p(x), x^240640 mod p(x) */ + .octa 0x000000005f5c00000000000018f80000 + + /* x^239680 mod p(x), x^239616 mod p(x) */ + .octa 0x0000000061af000000000000b6090000 + + /* x^238656 mod p(x), x^238592 mod p(x) */ + .octa 0x00000000e29e000000000000099a0000 + + /* x^237632 mod p(x), x^237568 mod p(x) */ + .octa 0x000000000975000000000000a8360000 + + /* x^236608 mod p(x), x^236544 mod p(x) */ + .octa 0x0000000043900000000000004f570000 + + /* x^235584 mod p(x), x^235520 mod p(x) */ + .octa 0x00000000f9cd000000000000134c0000 + + /* x^234560 mod p(x), x^234496 mod p(x) */ + .octa 0x000000007c29000000000000ec380000 + + /* x^233536 mod p(x), x^233472 mod p(x) */ + .octa 0x000000004c6a000000000000b0d10000 + + /* x^232512 mod p(x), x^232448 mod p(x) */ + .octa 0x00000000e7290000000000007d3e0000 + + /* x^231488 mod p(x), x^231424 mod p(x) */ + .octa 0x00000000f1ab000000000000f0b20000 + + /* x^230464 mod p(x), x^230400 mod p(x) */ + .octa 0x0000000039db0000000000009c270000 + + /* x^229440 mod p(x), x^229376 mod p(x) */ + .octa 0x000000005e2800000000000092890000 + + /* x^228416 mod p(x), x^228352 mod p(x) */ + .octa 0x00000000d44e000000000000d5ee0000 + + /* x^227392 mod p(x), x^227328 mod p(x) */ + .octa 0x00000000cd0a00000000000041f50000 + + /* x^226368 mod p(x), x^226304 mod p(x) */ + .octa 0x00000000c5b400000000000010520000 + + /* x^225344 mod p(x), x^225280 mod p(x) */ + .octa 0x00000000fd2100000000000042170000 + + /* x^224320 mod p(x), x^224256 mod p(x) */ + .octa 0x000000002f2500000000000095c20000 + + /* x^223296 mod p(x), x^223232 mod p(x) */ + .octa 0x000000001b0100000000000001ce0000 + + /* x^222272 mod p(x), x^222208 mod p(x) */ + .octa 0x000000000d430000000000002aca0000 + + /* x^221248 mod p(x), x^221184 mod p(x) */ + .octa 0x0000000030a6000000000000385e0000 + + /* x^220224 mod p(x), x^220160 mod p(x) */ + .octa 0x00000000e37b0000000000006f7a0000 + + /* x^219200 mod p(x), x^219136 mod p(x) */ + .octa 0x00000000873600000000000024320000 + + /* x^218176 mod p(x), x^218112 mod p(x) */ + .octa 0x00000000e9fb000000000000bd9c0000 + + /* x^217152 mod p(x), x^217088 mod p(x) */ + .octa 0x000000003b9500000000000054bc0000 + + /* x^216128 mod p(x), x^216064 mod p(x) */ + .octa 0x00000000133e000000000000a4660000 + + /* x^215104 mod p(x), x^215040 mod p(x) */ + .octa 0x00000000784500000000000079930000 + + /* x^214080 mod p(x), x^214016 mod p(x) */ + .octa 0x00000000b9800000000000001bb80000 + + /* x^213056 mod p(x), x^212992 mod p(x) */ + .octa 0x00000000687600000000000024400000 + + /* x^212032 mod p(x), x^211968 mod p(x) */ + .octa 0x00000000aff300000000000029e10000 + + /* x^211008 mod p(x), x^210944 mod p(x) */ + .octa 0x0000000024b50000000000005ded0000 + + /* x^209984 mod p(x), x^209920 mod p(x) */ + .octa 0x0000000017e8000000000000b12e0000 + + /* x^208960 mod p(x), x^208896 mod p(x) */ + .octa 0x00000000128400000000000026d20000 + + /* x^207936 mod p(x), x^207872 mod p(x) */ + .octa 0x000000002115000000000000a32a0000 + + /* x^206912 mod p(x), x^206848 mod p(x) */ + .octa 0x000000009595000000000000a1210000 + + /* x^205888 mod p(x), x^205824 mod p(x) */ + .octa 0x00000000281e000000000000ee8b0000 + + /* x^204864 mod p(x), x^204800 mod p(x) */ + .octa 0x0000000006010000000000003d0d0000 + + /* x^203840 mod p(x), x^203776 mod p(x) */ + .octa 0x00000000e2b600000000000034e90000 + + /* x^202816 mod p(x), x^202752 mod p(x) */ + .octa 0x000000001bd40000000000004cdb0000 + + /* x^201792 mod p(x), x^201728 mod p(x) */ + .octa 0x00000000df2800000000000030e90000 + + /* x^200768 mod p(x), x^200704 mod p(x) */ + .octa 0x0000000049c200000000000042590000 + + /* x^199744 mod p(x), x^199680 mod p(x) */ + .octa 0x000000009b97000000000000df950000 + + /* x^198720 mod p(x), x^198656 mod p(x) */ + .octa 0x000000006184000000000000da7b0000 + + /* x^197696 mod p(x), x^197632 mod p(x) */ + .octa 0x00000000461700000000000012510000 + + /* x^196672 mod p(x), x^196608 mod p(x) */ + .octa 0x000000009b40000000000000f37e0000 + + /* x^195648 mod p(x), x^195584 mod p(x) */ + .octa 0x00000000eeb2000000000000ecf10000 + + /* x^194624 mod p(x), x^194560 mod p(x) */ + .octa 0x00000000b2e800000000000050f20000 + + /* x^193600 mod p(x), x^193536 mod p(x) */ + .octa 0x00000000f59a000000000000e0b30000 + + /* x^192576 mod p(x), x^192512 mod p(x) */ + .octa 0x00000000467f0000000000004d5a0000 + + /* x^191552 mod p(x), x^191488 mod p(x) */ + .octa 0x00000000da92000000000000bb010000 + + /* x^190528 mod p(x), x^190464 mod p(x) */ + .octa 0x000000001e1000000000000022a40000 + + /* x^189504 mod p(x), x^189440 mod p(x) */ + .octa 0x0000000058fe000000000000836f0000 + + /* x^188480 mod p(x), x^188416 mod p(x) */ + .octa 0x00000000b9ce000000000000d78d0000 + + /* x^187456 mod p(x), x^187392 mod p(x) */ + .octa 0x0000000022210000000000004f8d0000 + + /* x^186432 mod p(x), x^186368 mod p(x) */ + .octa 0x00000000744600000000000033760000 + + /* x^185408 mod p(x), x^185344 mod p(x) */ + .octa 0x000000001c2e000000000000a1e50000 + + /* x^184384 mod p(x), x^184320 mod p(x) */ + .octa 0x00000000dcc8000000000000a1a40000 + + /* x^183360 mod p(x), x^183296 mod p(x) */ + .octa 0x00000000910f00000000000019a20000 + + /* x^182336 mod p(x), x^182272 mod p(x) */ + .octa 0x0000000055d5000000000000f6ae0000 + + /* x^181312 mod p(x), x^181248 mod p(x) */ + .octa 0x00000000c8ba000000000000a7ac0000 + + /* x^180288 mod p(x), x^180224 mod p(x) */ + .octa 0x0000000031f8000000000000eea20000 + + /* x^179264 mod p(x), x^179200 mod p(x) */ + .octa 0x000000001966000000000000c4d90000 + + /* x^178240 mod p(x), x^178176 mod p(x) */ + .octa 0x00000000b9810000000000002b470000 + + /* x^177216 mod p(x), x^177152 mod p(x) */ + .octa 0x000000008303000000000000f7cf0000 + + /* x^176192 mod p(x), x^176128 mod p(x) */ + .octa 0x000000002ce500000000000035b30000 + + /* x^175168 mod p(x), x^175104 mod p(x) */ + .octa 0x000000002fae0000000000000c7c0000 + + /* x^174144 mod p(x), x^174080 mod p(x) */ + .octa 0x00000000f50c0000000000009edf0000 + + /* x^173120 mod p(x), x^173056 mod p(x) */ + .octa 0x00000000714f00000000000004cd0000 + + /* x^172096 mod p(x), x^172032 mod p(x) */ + .octa 0x00000000c161000000000000541b0000 + + /* x^171072 mod p(x), x^171008 mod p(x) */ + .octa 0x0000000021c8000000000000e2700000 + + /* x^170048 mod p(x), x^169984 mod p(x) */ + .octa 0x00000000b93d00000000000009a60000 + + /* x^169024 mod p(x), x^168960 mod p(x) */ + .octa 0x00000000fbcf000000000000761c0000 + + /* x^168000 mod p(x), x^167936 mod p(x) */ + .octa 0x0000000026350000000000009db30000 + + /* x^166976 mod p(x), x^166912 mod p(x) */ + .octa 0x00000000b64f0000000000003e9f0000 + + /* x^165952 mod p(x), x^165888 mod p(x) */ + .octa 0x00000000bd0e00000000000078590000 + + /* x^164928 mod p(x), x^164864 mod p(x) */ + .octa 0x00000000d9360000000000008bc80000 + + /* x^163904 mod p(x), x^163840 mod p(x) */ + .octa 0x000000002f140000000000008c9f0000 + + /* x^162880 mod p(x), x^162816 mod p(x) */ + .octa 0x000000006a270000000000006af70000 + + /* x^161856 mod p(x), x^161792 mod p(x) */ + .octa 0x000000006685000000000000e5210000 + + /* x^160832 mod p(x), x^160768 mod p(x) */ + .octa 0x0000000062da00000000000008290000 + + /* x^159808 mod p(x), x^159744 mod p(x) */ + .octa 0x00000000bb4b000000000000e4d00000 + + /* x^158784 mod p(x), x^158720 mod p(x) */ + .octa 0x00000000d2490000000000004ae10000 + + /* x^157760 mod p(x), x^157696 mod p(x) */ + .octa 0x00000000c85b00000000000000e70000 + + /* x^156736 mod p(x), x^156672 mod p(x) */ + .octa 0x00000000c37a00000000000015650000 + + /* x^155712 mod p(x), x^155648 mod p(x) */ + .octa 0x0000000018530000000000001c2f0000 + + /* x^154688 mod p(x), x^154624 mod p(x) */ + .octa 0x00000000b46600000000000037bd0000 + + /* x^153664 mod p(x), x^153600 mod p(x) */ + .octa 0x00000000439b00000000000012190000 + + /* x^152640 mod p(x), x^152576 mod p(x) */ + .octa 0x00000000b1260000000000005ece0000 + + /* x^151616 mod p(x), x^151552 mod p(x) */ + .octa 0x00000000d8110000000000002a5e0000 + + /* x^150592 mod p(x), x^150528 mod p(x) */ + .octa 0x00000000099f00000000000052330000 + + /* x^149568 mod p(x), x^149504 mod p(x) */ + .octa 0x00000000f9f9000000000000f9120000 + + /* x^148544 mod p(x), x^148480 mod p(x) */ + .octa 0x000000005cc00000000000000ddc0000 + + /* x^147520 mod p(x), x^147456 mod p(x) */ + .octa 0x00000000343b00000000000012200000 + + /* x^146496 mod p(x), x^146432 mod p(x) */ + .octa 0x000000009222000000000000d12b0000 + + /* x^145472 mod p(x), x^145408 mod p(x) */ + .octa 0x00000000d781000000000000eb2d0000 + + /* x^144448 mod p(x), x^144384 mod p(x) */ + .octa 0x000000000bf400000000000058970000 + + /* x^143424 mod p(x), x^143360 mod p(x) */ + .octa 0x00000000094200000000000013690000 + + /* x^142400 mod p(x), x^142336 mod p(x) */ + .octa 0x00000000d55100000000000051950000 + + /* x^141376 mod p(x), x^141312 mod p(x) */ + .octa 0x000000008f11000000000000954b0000 + + /* x^140352 mod p(x), x^140288 mod p(x) */ + .octa 0x00000000140f000000000000b29e0000 + + /* x^139328 mod p(x), x^139264 mod p(x) */ + .octa 0x00000000c6db000000000000db5d0000 + + /* x^138304 mod p(x), x^138240 mod p(x) */ + .octa 0x00000000715b000000000000dfaf0000 + + /* x^137280 mod p(x), x^137216 mod p(x) */ + .octa 0x000000000dea000000000000e3b60000 + + /* x^136256 mod p(x), x^136192 mod p(x) */ + .octa 0x000000006f94000000000000ddaf0000 + + /* x^135232 mod p(x), x^135168 mod p(x) */ + .octa 0x0000000024e1000000000000e4f70000 + + /* x^134208 mod p(x), x^134144 mod p(x) */ + .octa 0x000000008810000000000000aa110000 + + /* x^133184 mod p(x), x^133120 mod p(x) */ + .octa 0x0000000030c2000000000000a8e60000 + + /* x^132160 mod p(x), x^132096 mod p(x) */ + .octa 0x00000000e6d0000000000000ccf30000 + + /* x^131136 mod p(x), x^131072 mod p(x) */ + .octa 0x000000004da000000000000079bf0000 + + /* x^130112 mod p(x), x^130048 mod p(x) */ + .octa 0x000000007759000000000000b3a30000 + + /* x^129088 mod p(x), x^129024 mod p(x) */ + .octa 0x00000000597400000000000028790000 + + /* x^128064 mod p(x), x^128000 mod p(x) */ + .octa 0x000000007acd000000000000b5820000 + + /* x^127040 mod p(x), x^126976 mod p(x) */ + .octa 0x00000000e6e400000000000026ad0000 + + /* x^126016 mod p(x), x^125952 mod p(x) */ + .octa 0x000000006d49000000000000985b0000 + + /* x^124992 mod p(x), x^124928 mod p(x) */ + .octa 0x000000000f0800000000000011520000 + + /* x^123968 mod p(x), x^123904 mod p(x) */ + .octa 0x000000002c7f000000000000846c0000 + + /* x^122944 mod p(x), x^122880 mod p(x) */ + .octa 0x000000005ce7000000000000ae1d0000 + + /* x^121920 mod p(x), x^121856 mod p(x) */ + .octa 0x00000000d4cb000000000000e21d0000 + + /* x^120896 mod p(x), x^120832 mod p(x) */ + .octa 0x000000003a2300000000000019bb0000 + + /* x^119872 mod p(x), x^119808 mod p(x) */ + .octa 0x000000000e1700000000000095290000 + + /* x^118848 mod p(x), x^118784 mod p(x) */ + .octa 0x000000006e6400000000000050d20000 + + /* x^117824 mod p(x), x^117760 mod p(x) */ + .octa 0x000000008d5c0000000000000cd10000 + + /* x^116800 mod p(x), x^116736 mod p(x) */ + .octa 0x00000000ef310000000000007b570000 + + /* x^115776 mod p(x), x^115712 mod p(x) */ + .octa 0x00000000645d00000000000053d60000 + + /* x^114752 mod p(x), x^114688 mod p(x) */ + .octa 0x0000000018fc00000000000077510000 + + /* x^113728 mod p(x), x^113664 mod p(x) */ + .octa 0x000000000cb3000000000000a7b70000 + + /* x^112704 mod p(x), x^112640 mod p(x) */ + .octa 0x00000000991b000000000000d0780000 + + /* x^111680 mod p(x), x^111616 mod p(x) */ + .octa 0x00000000845a000000000000be3c0000 + + /* x^110656 mod p(x), x^110592 mod p(x) */ + .octa 0x00000000d3a9000000000000df020000 + + /* x^109632 mod p(x), x^109568 mod p(x) */ + .octa 0x0000000017d7000000000000063e0000 + + /* x^108608 mod p(x), x^108544 mod p(x) */ + .octa 0x000000007a860000000000008ab40000 + + /* x^107584 mod p(x), x^107520 mod p(x) */ + .octa 0x00000000fd7c000000000000c7bd0000 + + /* x^106560 mod p(x), x^106496 mod p(x) */ + .octa 0x00000000a56b000000000000efd60000 + + /* x^105536 mod p(x), x^105472 mod p(x) */ + .octa 0x0000000010e400000000000071380000 + + /* x^104512 mod p(x), x^104448 mod p(x) */ + .octa 0x00000000994500000000000004d30000 + + /* x^103488 mod p(x), x^103424 mod p(x) */ + .octa 0x00000000b83c0000000000003b0e0000 + + /* x^102464 mod p(x), x^102400 mod p(x) */ + .octa 0x00000000d6c10000000000008b020000 + + /* x^101440 mod p(x), x^101376 mod p(x) */ + .octa 0x000000009efc000000000000da940000 + + /* x^100416 mod p(x), x^100352 mod p(x) */ + .octa 0x000000005e87000000000000f9f70000 + + /* x^99392 mod p(x), x^99328 mod p(x) */ + .octa 0x000000006c9b00000000000045e40000 + + /* x^98368 mod p(x), x^98304 mod p(x) */ + .octa 0x00000000178a00000000000083940000 + + /* x^97344 mod p(x), x^97280 mod p(x) */ + .octa 0x00000000f0c8000000000000f0a00000 + + /* x^96320 mod p(x), x^96256 mod p(x) */ + .octa 0x00000000f699000000000000b74b0000 + + /* x^95296 mod p(x), x^95232 mod p(x) */ + .octa 0x00000000316d000000000000c1cf0000 + + /* x^94272 mod p(x), x^94208 mod p(x) */ + .octa 0x00000000987e00000000000072680000 + + /* x^93248 mod p(x), x^93184 mod p(x) */ + .octa 0x00000000acff000000000000e0ab0000 + + /* x^92224 mod p(x), x^92160 mod p(x) */ + .octa 0x00000000a1f6000000000000c5a80000 + + /* x^91200 mod p(x), x^91136 mod p(x) */ + .octa 0x0000000061bd000000000000cf690000 + + /* x^90176 mod p(x), x^90112 mod p(x) */ + .octa 0x00000000c9f2000000000000cbcc0000 + + /* x^89152 mod p(x), x^89088 mod p(x) */ + .octa 0x000000005a33000000000000de050000 + + /* x^88128 mod p(x), x^88064 mod p(x) */ + .octa 0x00000000e416000000000000ccd70000 + + /* x^87104 mod p(x), x^87040 mod p(x) */ + .octa 0x0000000058930000000000002f670000 + + /* x^86080 mod p(x), x^86016 mod p(x) */ + .octa 0x00000000a9d3000000000000152f0000 + + /* x^85056 mod p(x), x^84992 mod p(x) */ + .octa 0x00000000c114000000000000ecc20000 + + /* x^84032 mod p(x), x^83968 mod p(x) */ + .octa 0x00000000b9270000000000007c890000 + + /* x^83008 mod p(x), x^82944 mod p(x) */ + .octa 0x000000002e6000000000000006ee0000 + + /* x^81984 mod p(x), x^81920 mod p(x) */ + .octa 0x00000000dfc600000000000009100000 + + /* x^80960 mod p(x), x^80896 mod p(x) */ + .octa 0x000000004911000000000000ad4e0000 + + /* x^79936 mod p(x), x^79872 mod p(x) */ + .octa 0x00000000ae1b000000000000b04d0000 + + /* x^78912 mod p(x), x^78848 mod p(x) */ + .octa 0x0000000005fa000000000000e9900000 + + /* x^77888 mod p(x), x^77824 mod p(x) */ + .octa 0x0000000004a1000000000000cc6f0000 + + /* x^76864 mod p(x), x^76800 mod p(x) */ + .octa 0x00000000af73000000000000ed110000 + + /* x^75840 mod p(x), x^75776 mod p(x) */ + .octa 0x0000000082530000000000008f7e0000 + + /* x^74816 mod p(x), x^74752 mod p(x) */ + .octa 0x00000000cfdc000000000000594f0000 + + /* x^73792 mod p(x), x^73728 mod p(x) */ + .octa 0x00000000a6b6000000000000a8750000 + + /* x^72768 mod p(x), x^72704 mod p(x) */ + .octa 0x00000000fd76000000000000aa0c0000 + + /* x^71744 mod p(x), x^71680 mod p(x) */ + .octa 0x0000000006f500000000000071db0000 + + /* x^70720 mod p(x), x^70656 mod p(x) */ + .octa 0x0000000037ca000000000000ab0c0000 + + /* x^69696 mod p(x), x^69632 mod p(x) */ + .octa 0x00000000d7ab000000000000b7a00000 + + /* x^68672 mod p(x), x^68608 mod p(x) */ + .octa 0x00000000440800000000000090d30000 + + /* x^67648 mod p(x), x^67584 mod p(x) */ + .octa 0x00000000186100000000000054730000 + + /* x^66624 mod p(x), x^66560 mod p(x) */ + .octa 0x000000007368000000000000a3a20000 + + /* x^65600 mod p(x), x^65536 mod p(x) */ + .octa 0x0000000026d0000000000000f9040000 + + /* x^64576 mod p(x), x^64512 mod p(x) */ + .octa 0x00000000fe770000000000009c0a0000 + + /* x^63552 mod p(x), x^63488 mod p(x) */ + .octa 0x000000002cba000000000000d1e70000 + + /* x^62528 mod p(x), x^62464 mod p(x) */ + .octa 0x00000000f8bd0000000000005ac10000 + + /* x^61504 mod p(x), x^61440 mod p(x) */ + .octa 0x000000007372000000000000d68d0000 + + /* x^60480 mod p(x), x^60416 mod p(x) */ + .octa 0x00000000f37f00000000000089f60000 + + /* x^59456 mod p(x), x^59392 mod p(x) */ + .octa 0x00000000078400000000000008a90000 + + /* x^58432 mod p(x), x^58368 mod p(x) */ + .octa 0x00000000d3e400000000000042360000 + + /* x^57408 mod p(x), x^57344 mod p(x) */ + .octa 0x00000000eba800000000000092d50000 + + /* x^56384 mod p(x), x^56320 mod p(x) */ + .octa 0x00000000afbe000000000000b4d50000 + + /* x^55360 mod p(x), x^55296 mod p(x) */ + .octa 0x00000000d8ca000000000000c9060000 + + /* x^54336 mod p(x), x^54272 mod p(x) */ + .octa 0x00000000c2d00000000000008f4f0000 + + /* x^53312 mod p(x), x^53248 mod p(x) */ + .octa 0x00000000373200000000000028690000 + + /* x^52288 mod p(x), x^52224 mod p(x) */ + .octa 0x0000000046ae000000000000c3b30000 + + /* x^51264 mod p(x), x^51200 mod p(x) */ + .octa 0x00000000b243000000000000f8700000 + + /* x^50240 mod p(x), x^50176 mod p(x) */ + .octa 0x00000000f7f500000000000029eb0000 + + /* x^49216 mod p(x), x^49152 mod p(x) */ + .octa 0x000000000c7e000000000000fe730000 + + /* x^48192 mod p(x), x^48128 mod p(x) */ + .octa 0x00000000c38200000000000096000000 + + /* x^47168 mod p(x), x^47104 mod p(x) */ + .octa 0x000000008956000000000000683c0000 + + /* x^46144 mod p(x), x^46080 mod p(x) */ + .octa 0x00000000422d0000000000005f1e0000 + + /* x^45120 mod p(x), x^45056 mod p(x) */ + .octa 0x00000000ac0f0000000000006f810000 + + /* x^44096 mod p(x), x^44032 mod p(x) */ + .octa 0x00000000ce30000000000000031f0000 + + /* x^43072 mod p(x), x^43008 mod p(x) */ + .octa 0x000000003d43000000000000455a0000 + + /* x^42048 mod p(x), x^41984 mod p(x) */ + .octa 0x000000007ebe000000000000a6050000 + + /* x^41024 mod p(x), x^40960 mod p(x) */ + .octa 0x00000000976e00000000000077eb0000 + + /* x^40000 mod p(x), x^39936 mod p(x) */ + .octa 0x000000000872000000000000389c0000 + + /* x^38976 mod p(x), x^38912 mod p(x) */ + .octa 0x000000008979000000000000c7b20000 + + /* x^37952 mod p(x), x^37888 mod p(x) */ + .octa 0x000000005c1e0000000000001d870000 + + /* x^36928 mod p(x), x^36864 mod p(x) */ + .octa 0x00000000aebb00000000000045810000 + + /* x^35904 mod p(x), x^35840 mod p(x) */ + .octa 0x000000004f7e0000000000006d4a0000 + + /* x^34880 mod p(x), x^34816 mod p(x) */ + .octa 0x00000000ea98000000000000b9200000 + + /* x^33856 mod p(x), x^33792 mod p(x) */ + .octa 0x00000000f39600000000000022f20000 + + /* x^32832 mod p(x), x^32768 mod p(x) */ + .octa 0x000000000bc500000000000041ca0000 + + /* x^31808 mod p(x), x^31744 mod p(x) */ + .octa 0x00000000786400000000000078500000 + + /* x^30784 mod p(x), x^30720 mod p(x) */ + .octa 0x00000000be970000000000009e7e0000 + + /* x^29760 mod p(x), x^29696 mod p(x) */ + .octa 0x00000000dd6d000000000000a53c0000 + + /* x^28736 mod p(x), x^28672 mod p(x) */ + .octa 0x000000004c3f00000000000039340000 + + /* x^27712 mod p(x), x^27648 mod p(x) */ + .octa 0x0000000093a4000000000000b58e0000 + + /* x^26688 mod p(x), x^26624 mod p(x) */ + .octa 0x0000000050fb00000000000062d40000 + + /* x^25664 mod p(x), x^25600 mod p(x) */ + .octa 0x00000000f505000000000000a26f0000 + + /* x^24640 mod p(x), x^24576 mod p(x) */ + .octa 0x0000000064f900000000000065e60000 + + /* x^23616 mod p(x), x^23552 mod p(x) */ + .octa 0x00000000e8c2000000000000aad90000 + + /* x^22592 mod p(x), x^22528 mod p(x) */ + .octa 0x00000000720b000000000000a3b00000 + + /* x^21568 mod p(x), x^21504 mod p(x) */ + .octa 0x00000000e992000000000000d2680000 + + /* x^20544 mod p(x), x^20480 mod p(x) */ + .octa 0x000000009132000000000000cf4c0000 + + /* x^19520 mod p(x), x^19456 mod p(x) */ + .octa 0x00000000608a00000000000076610000 + + /* x^18496 mod p(x), x^18432 mod p(x) */ + .octa 0x000000009948000000000000fb9f0000 + + /* x^17472 mod p(x), x^17408 mod p(x) */ + .octa 0x00000000173000000000000003770000 + + /* x^16448 mod p(x), x^16384 mod p(x) */ + .octa 0x000000006fe300000000000004880000 + + /* x^15424 mod p(x), x^15360 mod p(x) */ + .octa 0x00000000e15300000000000056a70000 + + /* x^14400 mod p(x), x^14336 mod p(x) */ + .octa 0x0000000092d60000000000009dfd0000 + + /* x^13376 mod p(x), x^13312 mod p(x) */ + .octa 0x0000000002fd00000000000074c80000 + + /* x^12352 mod p(x), x^12288 mod p(x) */ + .octa 0x00000000c78b000000000000a3ec0000 + + /* x^11328 mod p(x), x^11264 mod p(x) */ + .octa 0x000000009262000000000000b3530000 + + /* x^10304 mod p(x), x^10240 mod p(x) */ + .octa 0x0000000084f200000000000047bf0000 + + /* x^9280 mod p(x), x^9216 mod p(x) */ + .octa 0x0000000067ee000000000000e97c0000 + + /* x^8256 mod p(x), x^8192 mod p(x) */ + .octa 0x00000000535b00000000000091e10000 + + /* x^7232 mod p(x), x^7168 mod p(x) */ + .octa 0x000000007ebb00000000000055060000 + + /* x^6208 mod p(x), x^6144 mod p(x) */ + .octa 0x00000000c6a1000000000000fd360000 + + /* x^5184 mod p(x), x^5120 mod p(x) */ + .octa 0x000000001be500000000000055860000 + + /* x^4160 mod p(x), x^4096 mod p(x) */ + .octa 0x00000000ae0e0000000000005bd00000 + + /* x^3136 mod p(x), x^3072 mod p(x) */ + .octa 0x0000000022040000000000008db20000 + + /* x^2112 mod p(x), x^2048 mod p(x) */ + .octa 0x00000000c9eb000000000000efe20000 + + /* x^1088 mod p(x), x^1024 mod p(x) */ + .octa 0x0000000039b400000000000051d10000 + +.short_constants: + + /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ + /* x^2048 mod p(x), x^2016 mod p(x), x^1984 mod p(x), x^1952 mod p(x) */ + .octa 0xefe20000dccf00009440000033590000 + + /* x^1920 mod p(x), x^1888 mod p(x), x^1856 mod p(x), x^1824 mod p(x) */ + .octa 0xee6300002f3f000062180000e0ed0000 + + /* x^1792 mod p(x), x^1760 mod p(x), x^1728 mod p(x), x^1696 mod p(x) */ + .octa 0xcf5f000017ef0000ccbe000023d30000 + + /* x^1664 mod p(x), x^1632 mod p(x), x^1600 mod p(x), x^1568 mod p(x) */ + .octa 0x6d0c0000a30e00000920000042630000 + + /* x^1536 mod p(x), x^1504 mod p(x), x^1472 mod p(x), x^1440 mod p(x) */ + .octa 0x21d30000932b0000a7a00000efcc0000 + + /* x^1408 mod p(x), x^1376 mod p(x), x^1344 mod p(x), x^1312 mod p(x) */ + .octa 0x10be00000b310000666f00000d1c0000 + + /* x^1280 mod p(x), x^1248 mod p(x), x^1216 mod p(x), x^1184 mod p(x) */ + .octa 0x1f240000ce9e0000caad0000589e0000 + + /* x^1152 mod p(x), x^1120 mod p(x), x^1088 mod p(x), x^1056 mod p(x) */ + .octa 0x29610000d02b000039b400007cf50000 + + /* x^1024 mod p(x), x^992 mod p(x), x^960 mod p(x), x^928 mod p(x) */ + .octa 0x51d100009d9d00003c0e0000bfd60000 + + /* x^896 mod p(x), x^864 mod p(x), x^832 mod p(x), x^800 mod p(x) */ + .octa 0xda390000ceae000013830000713c0000 + + /* x^768 mod p(x), x^736 mod p(x), x^704 mod p(x), x^672 mod p(x) */ + .octa 0xb67800001e16000085c0000080a60000 + + /* x^640 mod p(x), x^608 mod p(x), x^576 mod p(x), x^544 mod p(x) */ + .octa 0x0db40000f7f90000371d0000e6580000 + + /* x^512 mod p(x), x^480 mod p(x), x^448 mod p(x), x^416 mod p(x) */ + .octa 0x87e70000044c0000aadb0000a4970000 + + /* x^384 mod p(x), x^352 mod p(x), x^320 mod p(x), x^288 mod p(x) */ + .octa 0x1f990000ad180000d8b30000e7b50000 + + /* x^256 mod p(x), x^224 mod p(x), x^192 mod p(x), x^160 mod p(x) */ + .octa 0xbe6c00006ee300004c1a000006df0000 + + /* x^128 mod p(x), x^96 mod p(x), x^64 mod p(x), x^32 mod p(x) */ + .octa 0xfb0b00002d560000136800008bb70000 + + +.barrett_constants: + /* Barrett constant m - (4^32)/n */ + .octa 0x000000000000000000000001f65a57f8 /* x^64 div p(x) */ + /* Barrett constant n */ + .octa 0x0000000000000000000000018bb70000 + +#define CRC_FUNCTION_NAME __crct10dif_vpmsum +#include "crc32-vpmsum_core.S" diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_glue.c b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c new file mode 100644 index 000000000000..3ab7ba159c20 --- /dev/null +++ b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c @@ -0,0 +1,125 @@ +/* + * Calculate a CRC T10-DIF with vpmsum acceleration + * + * Copyright 2017, Daniel Axtens, IBM Corporation. + * [based on crc32c-vpmsum_glue.c] + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define VMX_ALIGN 16 +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#define VECTOR_BREAKPOINT 64 + +u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len); + +static u16 crct10dif_vpmsum(u16 crci, unsigned char const *p, size_t len) +{ + unsigned int prealign; + unsigned int tail; + u32 crc = crci; + + if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || in_interrupt()) + return crc_t10dif_generic(crc, p, len); + + if ((unsigned long)p & VMX_ALIGN_MASK) { + prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + crc = crc_t10dif_generic(crc, p, prealign); + len -= prealign; + p += prealign; + } + + if (len & ~VMX_ALIGN_MASK) { + crc <<= 16; + pagefault_disable(); + enable_kernel_altivec(); + crc = __crct10dif_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + pagefault_enable(); + crc >>= 16; + } + + tail = len & VMX_ALIGN_MASK; + if (tail) { + p += len & ~VMX_ALIGN_MASK; + crc = crc_t10dif_generic(crc, p, tail); + } + + return crc & 0xffff; +} + +static int crct10dif_vpmsum_init(struct shash_desc *desc) +{ + u16 *crc = shash_desc_ctx(desc); + + *crc = 0; + return 0; +} + +static int crct10dif_vpmsum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u16 *crc = shash_desc_ctx(desc); + + *crc = crct10dif_vpmsum(*crc, data, length); + + return 0; +} + + +static int crct10dif_vpmsum_final(struct shash_desc *desc, u8 *out) +{ + u16 *crcp = shash_desc_ctx(desc); + + *(u16 *)out = *crcp; + return 0; +} + +static struct shash_alg alg = { + .init = crct10dif_vpmsum_init, + .update = crct10dif_vpmsum_update, + .final = crct10dif_vpmsum_final, + .descsize = CRC_T10DIF_DIGEST_SIZE, + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .base = { + .cra_name = "crct10dif", + .cra_driver_name = "crct10dif-vpmsum", + .cra_priority = 200, + .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init crct10dif_vpmsum_mod_init(void) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return -ENODEV; + + return crypto_register_shash(&alg); +} + +static void __exit crct10dif_vpmsum_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crct10dif_vpmsum_mod_init); +module_exit(crct10dif_vpmsum_mod_fini); + +MODULE_AUTHOR("Daniel Axtens "); +MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("crct10dif"); +MODULE_ALIAS_CRYPTO("crct10dif-vpmsum"); diff --git a/crypto/Kconfig b/crypto/Kconfig index f37e9cca50e1..9cf63dd84364 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -513,6 +513,15 @@ config CRYPTO_CRCT10DIF_PCLMUL 'crct10dif-plcmul' module, which is faster when computing the crct10dif checksum as compared with the generic table implementation. +config CRYPTO_CRCT10DIF_VPMSUM + tristate "CRC32T10DIF powerpc64 hardware acceleration" + depends on PPC64 && ALTIVEC && CRC_T10DIF + select CRYPTO_HASH + help + CRC10T10DIF algorithm implemented using vector polynomial + multiply-sum (vpmsum) instructions, introduced in POWER8. Enable on + POWER8 and newer processors for improved performance. + config CRYPTO_GHASH tristate "GHASH digest algorithm" select CRYPTO_GF128MUL -- 2.9.3