Received-SPF: pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67;
Message-ID: <1535411993.3516.4.camel@megha-Z97X-UD7-TH>
Subject: Re: [PATCH] crypto: x86 - remove SHA multibuffer routines and
 mcryptd
From:   Megha Dey <megha.dey@linux.intel.com>
To:     Tim Chen <tim.c.chen@linux.intel.com>
Cc:     Ard Biesheuvel <ard.biesheuvel@linaro.org>,
        linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
        linux-m68k@lists.linux-m68k.org, linux-s390@vger.kernel.org,
        linux-kernel@vger.kernel.org, ebiggers@google.com,
        Geert Uytterhoeven <geert@linux-m68k.org>,
        Martin Schwidefsky <schwidefsky@de.ibm.com>,
        Heiko Carstens <heiko.carstens@de.ibm.com>,
        Thomas Gleixner <tglx@linutronix.de>,
        Ingo Molnar <mingo@redhat.com>
Date:   Mon, 27 Aug 2018 16:19:53 -0700
In-Reply-To: <a877e275-4dd7-6580-2351-db8d3a4ba1a2@linux.intel.com>
References: <20180822085144.4872-1-ard.biesheuvel@linaro.org>
         <a877e275-4dd7-6580-2351-db8d3a4ba1a2@linux.intel.com>
Content-Type: text/plain; charset="UTF-8"
Mime-Version: 1.0
Content-Transfer-Encoding: 7bit
Sender: linux-kernel-owner@vger.kernel.org
Precedence: bulk

On Mon, 2018-08-27 at 15:28 -0700, Tim Chen wrote:
> On 08/22/2018 01:51 AM, Ard Biesheuvel wrote:
> > As it turns out, the AVX2 multibuffer SHA routines are currently
> > broken [0], in a way that would have likely been noticed if this
> > code were in wide use. Since the code is too complicated to be
> > maintained by anyone except the original authors, and since the
> > performance benefits for real-world use cases are debatable to
> > begin with, it is better to drop it entirely for the moment.
> > 
> > [0] https://marc.info/?l=linux-crypto-vger&m=153476243825350&w=2
> 
> Sorry I was out of the loop for a while and haven't been following
> the code too closely.
> 
> Megha is maintaining the code now.  Before we pull the code,
> please give us a chance to fix it first.
> 
> Thanks.
> 
> Tim
> 

Hi,

I am working to find a fix for these corner cases. If possible, we would
like to fix the issues instead of removing the code altogether.

-Megha 
> > 
> > Suggested-by: Eric Biggers <ebiggers@google.com>
> > Cc: Megha Dey <megha.dey@linux.intel.com>
> > Cc: Tim Chen <tim.c.chen@linux.intel.com>
> > Cc: Geert Uytterhoeven <geert@linux-m68k.org>
> > Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
> > Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
> > Cc: Thomas Gleixner <tglx@linutronix.de>
> > Cc: Ingo Molnar <mingo@redhat.com>
> > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> > ---
> >  MAINTAINERS                                   |    8 -
> >  arch/m68k/configs/amiga_defconfig             |    1 -
> >  arch/m68k/configs/apollo_defconfig            |    1 -
> >  arch/m68k/configs/atari_defconfig             |    1 -
> >  arch/m68k/configs/bvme6000_defconfig          |    1 -
> >  arch/m68k/configs/hp300_defconfig             |    1 -
> >  arch/m68k/configs/mac_defconfig               |    1 -
> >  arch/m68k/configs/multi_defconfig             |    1 -
> >  arch/m68k/configs/mvme147_defconfig           |    1 -
> >  arch/m68k/configs/mvme16x_defconfig           |    1 -
> >  arch/m68k/configs/q40_defconfig               |    1 -
> >  arch/m68k/configs/sun3_defconfig              |    1 -
> >  arch/m68k/configs/sun3x_defconfig             |    1 -
> >  arch/s390/configs/debug_defconfig             |    1 -
> >  arch/s390/configs/performance_defconfig       |    1 -
> >  arch/x86/crypto/Makefile                      |    3 -
> >  arch/x86/crypto/sha1-mb/Makefile              |   14 -
> >  arch/x86/crypto/sha1-mb/sha1_mb.c             | 1011 ----------------
> >  arch/x86/crypto/sha1-mb/sha1_mb_ctx.h         |  134 ---
> >  arch/x86/crypto/sha1-mb/sha1_mb_mgr.h         |  110 --
> >  .../crypto/sha1-mb/sha1_mb_mgr_datastruct.S   |  287 -----
> >  .../crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S   |  304 -----
> >  .../crypto/sha1-mb/sha1_mb_mgr_init_avx2.c    |   64 -
> >  .../crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S  |  209 ----
> >  arch/x86/crypto/sha1-mb/sha1_x8_avx2.S        |  492 --------
> >  arch/x86/crypto/sha256-mb/Makefile            |   14 -
> >  arch/x86/crypto/sha256-mb/sha256_mb.c         | 1013 ----------------
> >  arch/x86/crypto/sha256-mb/sha256_mb_ctx.h     |  134 ---
> >  arch/x86/crypto/sha256-mb/sha256_mb_mgr.h     |  108 --
> >  .../sha256-mb/sha256_mb_mgr_datastruct.S      |  304 -----
> >  .../sha256-mb/sha256_mb_mgr_flush_avx2.S      |  307 -----
> >  .../sha256-mb/sha256_mb_mgr_init_avx2.c       |   65 -
> >  .../sha256-mb/sha256_mb_mgr_submit_avx2.S     |  214 ----
> >  arch/x86/crypto/sha256-mb/sha256_x8_avx2.S    |  598 ----------
> >  arch/x86/crypto/sha512-mb/Makefile            |   12 -
> >  arch/x86/crypto/sha512-mb/sha512_mb.c         | 1047 -----------------
> >  arch/x86/crypto/sha512-mb/sha512_mb_ctx.h     |  128 --
> >  arch/x86/crypto/sha512-mb/sha512_mb_mgr.h     |  104 --
> >  .../sha512-mb/sha512_mb_mgr_datastruct.S      |  281 -----
> >  .../sha512-mb/sha512_mb_mgr_flush_avx2.S      |  297 -----
> >  .../sha512-mb/sha512_mb_mgr_init_avx2.c       |   69 --
> >  .../sha512-mb/sha512_mb_mgr_submit_avx2.S     |  224 ----
> >  arch/x86/crypto/sha512-mb/sha512_x4_avx2.S    |  531 ---------
> >  crypto/Kconfig                                |   62 -
> >  crypto/Makefile                               |    1 -
> >  crypto/mcryptd.c                              |  675 -----------
> >  include/crypto/mcryptd.h                      |  114 --
> >  47 files changed, 8952 deletions(-)
> >  delete mode 100644 arch/x86/crypto/sha1-mb/Makefile
> >  delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb.c
> >  delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
> >  delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
> >  delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
> >  delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
> >  delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
> >  delete mode 100644 arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
> >  delete mode 100644 arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
> >  delete mode 100644 arch/x86/crypto/sha256-mb/Makefile
> >  delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb.c
> >  delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
> >  delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
> >  delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
> >  delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
> >  delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
> >  delete mode 100644 arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
> >  delete mode 100644 arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
> >  delete mode 100644 arch/x86/crypto/sha512-mb/Makefile
> >  delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb.c
> >  delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
> >  delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
> >  delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
> >  delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
> >  delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
> >  delete mode 100644 arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
> >  delete mode 100644 arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
> >  delete mode 100644 crypto/mcryptd.c
> >  delete mode 100644 include/crypto/mcryptd.h
> > 
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 24b200d91b30..05747b8ac88e 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -7487,14 +7487,6 @@ S:	Supported
> >  F:	drivers/infiniband/hw/i40iw/
> >  F:	include/uapi/rdma/i40iw-abi.h
> >  
> > -INTEL SHA MULTIBUFFER DRIVER
> > -M:	Megha Dey <megha.dey@linux.intel.com>
> > -R:	Tim Chen <tim.c.chen@linux.intel.com>
> > -L:	linux-crypto@vger.kernel.org
> > -S:	Supported
> > -F:	arch/x86/crypto/sha*-mb/
> > -F:	crypto/mcryptd.c
> > -
> >  INTEL TELEMETRY DRIVER
> >  M:	Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
> >  L:	platform-driver-x86@vger.kernel.org
> > diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
> > index 1d5483f6e457..70b10d712624 100644
> > --- a/arch/m68k/configs/amiga_defconfig
> > +++ b/arch/m68k/configs/amiga_defconfig
> > @@ -621,7 +621,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
> > index 52a0af127951..211eec5859e8 100644
> > --- a/arch/m68k/configs/apollo_defconfig
> > +++ b/arch/m68k/configs/apollo_defconfig
> > @@ -578,7 +578,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
> > index b3103e51268a..0da45c6084f7 100644
> > --- a/arch/m68k/configs/atari_defconfig
> > +++ b/arch/m68k/configs/atari_defconfig
> > @@ -599,7 +599,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
> > index fb7d651a4cab..c09ae7219416 100644
> > --- a/arch/m68k/configs/bvme6000_defconfig
> > +++ b/arch/m68k/configs/bvme6000_defconfig
> > @@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
> > index 6b37f5537c39..8c4775b30748 100644
> > --- a/arch/m68k/configs/hp300_defconfig
> > +++ b/arch/m68k/configs/hp300_defconfig
> > @@ -580,7 +580,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
> > index c717bf879449..48ad520e2f2d 100644
> > --- a/arch/m68k/configs/mac_defconfig
> > +++ b/arch/m68k/configs/mac_defconfig
> > @@ -602,7 +602,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
> > index 226c994ce794..3a3cccb9f625 100644
> > --- a/arch/m68k/configs/multi_defconfig
> > +++ b/arch/m68k/configs/multi_defconfig
> > @@ -684,7 +684,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
> > index b383327fd77a..63dc311f94ff 100644
> > --- a/arch/m68k/configs/mvme147_defconfig
> > +++ b/arch/m68k/configs/mvme147_defconfig
> > @@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
> > index 9783d3deb9e9..1ae39d1f9bb5 100644
> > --- a/arch/m68k/configs/mvme16x_defconfig
> > +++ b/arch/m68k/configs/mvme16x_defconfig
> > @@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
> > index a35d10ee10cb..ba2f351811da 100644
> > --- a/arch/m68k/configs/q40_defconfig
> > +++ b/arch/m68k/configs/q40_defconfig
> > @@ -593,7 +593,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
> > index 573bf922d448..544b7475ff6a 100644
> > --- a/arch/m68k/configs/sun3_defconfig
> > +++ b/arch/m68k/configs/sun3_defconfig
> > @@ -571,7 +571,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
> > index efb27a7fcc55..149edafbb9f9 100644
> > --- a/arch/m68k/configs/sun3x_defconfig
> > +++ b/arch/m68k/configs/sun3x_defconfig
> > @@ -572,7 +572,6 @@ CONFIG_CRYPTO_ECDH=m
> >  CONFIG_CRYPTO_MANAGER=y
> >  CONFIG_CRYPTO_USER=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_AEGIS128=m
> > diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
> > index 941d8cc6c9f5..259d1698ac50 100644
> > --- a/arch/s390/configs/debug_defconfig
> > +++ b/arch/s390/configs/debug_defconfig
> > @@ -668,7 +668,6 @@ CONFIG_CRYPTO_USER=m
> >  # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
> >  CONFIG_CRYPTO_PCRYPT=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_LRW=m
> > diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
> > index eb6f75f24208..37fd60c20e22 100644
> > --- a/arch/s390/configs/performance_defconfig
> > +++ b/arch/s390/configs/performance_defconfig
> > @@ -610,7 +610,6 @@ CONFIG_CRYPTO_USER=m
> >  # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
> >  CONFIG_CRYPTO_PCRYPT=m
> >  CONFIG_CRYPTO_CRYPTD=m
> > -CONFIG_CRYPTO_MCRYPTD=m
> >  CONFIG_CRYPTO_TEST=m
> >  CONFIG_CRYPTO_CHACHA20POLY1305=m
> >  CONFIG_CRYPTO_LRW=m
> > diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> > index a450ad573dcb..9edfa5469f9f 100644
> > --- a/arch/x86/crypto/Makefile
> > +++ b/arch/x86/crypto/Makefile
> > @@ -60,9 +60,6 @@ endif
> >  ifeq ($(avx2_supported),yes)
> >  	obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
> >  	obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
> > -	obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
> > -	obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
> > -	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
> >  
> >  	obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
> >  endif
> > diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile
> > deleted file mode 100644
> > index 815ded3ba90e..000000000000
> > --- a/arch/x86/crypto/sha1-mb/Makefile
> > +++ /dev/null
> > @@ -1,14 +0,0 @@
> > -# SPDX-License-Identifier: GPL-2.0
> > -#
> > -# Arch-specific CryptoAPI modules.
> > -#
> > -
> > -OBJECT_FILES_NON_STANDARD := y
> > -
> > -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
> > -                                $(comma)4)$(comma)%ymm2,yes,no)
> > -ifeq ($(avx2_supported),yes)
> > -	obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o
> > -	sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \
> > -	     sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o
> > -endif
> > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c
> > deleted file mode 100644
> > index b93805664c1d..000000000000
> > --- a/arch/x86/crypto/sha1-mb/sha1_mb.c
> > +++ /dev/null
> > @@ -1,1011 +0,0 @@
> > -/*
> > - * Multi buffer SHA1 algorithm Glue Code
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *	Tim Chen <tim.c.chen@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
> > -
> > -#include <crypto/internal/hash.h>
> > -#include <linux/init.h>
> > -#include <linux/module.h>
> > -#include <linux/mm.h>
> > -#include <linux/cryptohash.h>
> > -#include <linux/types.h>
> > -#include <linux/list.h>
> > -#include <crypto/scatterwalk.h>
> > -#include <crypto/sha.h>
> > -#include <crypto/mcryptd.h>
> > -#include <crypto/crypto_wq.h>
> > -#include <asm/byteorder.h>
> > -#include <linux/hardirq.h>
> > -#include <asm/fpu/api.h>
> > -#include "sha1_mb_ctx.h"
> > -
> > -#define FLUSH_INTERVAL 1000 /* in usec */
> > -
> > -static struct mcryptd_alg_state sha1_mb_alg_state;
> > -
> > -struct sha1_mb_ctx {
> > -	struct mcryptd_ahash *mcryptd_tfm;
> > -};
> > -
> > -static inline struct mcryptd_hash_request_ctx
> > -		*cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
> > -{
> > -	struct ahash_request *areq;
> > -
> > -	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
> > -	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
> > -}
> > -
> > -static inline struct ahash_request
> > -		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
> > -{
> > -	return container_of((void *) ctx, struct ahash_request, __ctx);
> > -}
> > -
> > -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
> > -				struct ahash_request *areq)
> > -{
> > -	rctx->flag = HASH_UPDATE;
> > -}
> > -
> > -static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state);
> > -static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)
> > -			(struct sha1_mb_mgr *state, struct job_sha1 *job);
> > -static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
> > -						(struct sha1_mb_mgr *state);
> > -static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
> > -						(struct sha1_mb_mgr *state);
> > -
> > -static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
> > -			 uint64_t total_len)
> > -{
> > -	uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
> > -
> > -	memset(&padblock[i], 0, SHA1_BLOCK_SIZE);
> > -	padblock[i] = 0x80;
> > -
> > -	i += ((SHA1_BLOCK_SIZE - 1) &
> > -	      (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1)))
> > -	     + 1 + SHA1_PADLENGTHFIELD_SIZE;
> > -
> > -#if SHA1_PADLENGTHFIELD_SIZE == 16
> > -	*((uint64_t *) &padblock[i - 16]) = 0;
> > -#endif
> > -
> > -	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
> > -
> > -	/* Number of extra blocks to hash */
> > -	return i >> SHA1_LOG2_BLOCK_SIZE;
> > -}
> > -
> > -static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr,
> > -						struct sha1_hash_ctx *ctx)
> > -{
> > -	while (ctx) {
> > -		if (ctx->status & HASH_CTX_STS_COMPLETE) {
> > -			/* Clear PROCESSING bit */
> > -			ctx->status = HASH_CTX_STS_COMPLETE;
> > -			return ctx;
> > -		}
> > -
> > -		/*
> > -		 * If the extra blocks are empty, begin hashing what remains
> > -		 * in the user's buffer.
> > -		 */
> > -		if (ctx->partial_block_buffer_length == 0 &&
> > -		    ctx->incoming_buffer_length) {
> > -
> > -			const void *buffer = ctx->incoming_buffer;
> > -			uint32_t len = ctx->incoming_buffer_length;
> > -			uint32_t copy_len;
> > -
> > -			/*
> > -			 * Only entire blocks can be hashed.
> > -			 * Copy remainder to extra blocks buffer.
> > -			 */
> > -			copy_len = len & (SHA1_BLOCK_SIZE-1);
> > -
> > -			if (copy_len) {
> > -				len -= copy_len;
> > -				memcpy(ctx->partial_block_buffer,
> > -				       ((const char *) buffer + len),
> > -				       copy_len);
> > -				ctx->partial_block_buffer_length = copy_len;
> > -			}
> > -
> > -			ctx->incoming_buffer_length = 0;
> > -
> > -			/* len should be a multiple of the block size now */
> > -			assert((len % SHA1_BLOCK_SIZE) == 0);
> > -
> > -			/* Set len to the number of blocks to be hashed */
> > -			len >>= SHA1_LOG2_BLOCK_SIZE;
> > -
> > -			if (len) {
> > -
> > -				ctx->job.buffer = (uint8_t *) buffer;
> > -				ctx->job.len = len;
> > -				ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr,
> > -										&ctx->job);
> > -				continue;
> > -			}
> > -		}
> > -
> > -		/*
> > -		 * If the extra blocks are not empty, then we are
> > -		 * either on the last block(s) or we need more
> > -		 * user input before continuing.
> > -		 */
> > -		if (ctx->status & HASH_CTX_STS_LAST) {
> > -
> > -			uint8_t *buf = ctx->partial_block_buffer;
> > -			uint32_t n_extra_blocks =
> > -					sha1_pad(buf, ctx->total_length);
> > -
> > -			ctx->status = (HASH_CTX_STS_PROCESSING |
> > -				       HASH_CTX_STS_COMPLETE);
> > -			ctx->job.buffer = buf;
> > -			ctx->job.len = (uint32_t) n_extra_blocks;
> > -			ctx = (struct sha1_hash_ctx *)
> > -				sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
> > -			continue;
> > -		}
> > -
> > -		ctx->status = HASH_CTX_STS_IDLE;
> > -		return ctx;
> > -	}
> > -
> > -	return NULL;
> > -}
> > -
> > -static struct sha1_hash_ctx
> > -			*sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
> > -{
> > -	/*
> > -	 * If get_comp_job returns NULL, there are no jobs complete.
> > -	 * If get_comp_job returns a job, verify that it is safe to return to
> > -	 * the user.
> > -	 * If it is not ready, resubmit the job to finish processing.
> > -	 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
> > -	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
> > -	 * still need processing.
> > -	 */
> > -	struct sha1_hash_ctx *ctx;
> > -
> > -	ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr);
> > -	return sha1_ctx_mgr_resubmit(mgr, ctx);
> > -}
> > -
> > -static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr)
> > -{
> > -	sha1_job_mgr_init(&mgr->mgr);
> > -}
> > -
> > -static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
> > -					  struct sha1_hash_ctx *ctx,
> > -					  const void *buffer,
> > -					  uint32_t len,
> > -					  int flags)
> > -{
> > -	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
> > -		/* User should not pass anything other than UPDATE or LAST */
> > -		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
> > -		return ctx;
> > -	}
> > -
> > -	if (ctx->status & HASH_CTX_STS_PROCESSING) {
> > -		/* Cannot submit to a currently processing job. */
> > -		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
> > -		return ctx;
> > -	}
> > -
> > -	if (ctx->status & HASH_CTX_STS_COMPLETE) {
> > -		/* Cannot update a finished job. */
> > -		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
> > -		return ctx;
> > -	}
> > -
> > -	/*
> > -	 * If we made it here, there were no errors during this call to
> > -	 * submit
> > -	 */
> > -	ctx->error = HASH_CTX_ERROR_NONE;
> > -
> > -	/* Store buffer ptr info from user */
> > -	ctx->incoming_buffer = buffer;
> > -	ctx->incoming_buffer_length = len;
> > -
> > -	/*
> > -	 * Store the user's request flags and mark this ctx as currently
> > -	 * being processed.
> > -	 */
> > -	ctx->status = (flags & HASH_LAST) ?
> > -			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
> > -			HASH_CTX_STS_PROCESSING;
> > -
> > -	/* Advance byte counter */
> > -	ctx->total_length += len;
> > -
> > -	/*
> > -	 * If there is anything currently buffered in the extra blocks,
> > -	 * append to it until it contains a whole block.
> > -	 * Or if the user's buffer contains less than a whole block,
> > -	 * append as much as possible to the extra block.
> > -	 */
> > -	if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) {
> > -		/*
> > -		 * Compute how many bytes to copy from user buffer into
> > -		 * extra block
> > -		 */
> > -		uint32_t copy_len = SHA1_BLOCK_SIZE -
> > -					ctx->partial_block_buffer_length;
> > -		if (len < copy_len)
> > -			copy_len = len;
> > -
> > -		if (copy_len) {
> > -			/* Copy and update relevant pointers and counters */
> > -			memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
> > -				buffer, copy_len);
> > -
> > -			ctx->partial_block_buffer_length += copy_len;
> > -			ctx->incoming_buffer = (const void *)
> > -					((const char *)buffer + copy_len);
> > -			ctx->incoming_buffer_length = len - copy_len;
> > -		}
> > -
> > -		/*
> > -		 * The extra block should never contain more than 1 block
> > -		 * here
> > -		 */
> > -		assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
> > -
> > -		/*
> > -		 * If the extra block buffer contains exactly 1 block, it can
> > -		 * be hashed.
> > -		 */
> > -		if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
> > -			ctx->partial_block_buffer_length = 0;
> > -
> > -			ctx->job.buffer = ctx->partial_block_buffer;
> > -			ctx->job.len = 1;
> > -			ctx = (struct sha1_hash_ctx *)
> > -				sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
> > -		}
> > -	}
> > -
> > -	return sha1_ctx_mgr_resubmit(mgr, ctx);
> > -}
> > -
> > -static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr)
> > -{
> > -	struct sha1_hash_ctx *ctx;
> > -
> > -	while (1) {
> > -		ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr);
> > -
> > -		/* If flush returned 0, there are no more jobs in flight. */
> > -		if (!ctx)
> > -			return NULL;
> > -
> > -		/*
> > -		 * If flush returned a job, resubmit the job to finish
> > -		 * processing.
> > -		 */
> > -		ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
> > -
> > -		/*
> > -		 * If sha1_ctx_mgr_resubmit returned a job, it is ready to be
> > -		 * returned. Otherwise, all jobs currently being managed by the
> > -		 * sha1_ctx_mgr still need processing. Loop.
> > -		 */
> > -		if (ctx)
> > -			return ctx;
> > -	}
> > -}
> > -
> > -static int sha1_mb_init(struct ahash_request *areq)
> > -{
> > -	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
> > -
> > -	hash_ctx_init(sctx);
> > -	sctx->job.result_digest[0] = SHA1_H0;
> > -	sctx->job.result_digest[1] = SHA1_H1;
> > -	sctx->job.result_digest[2] = SHA1_H2;
> > -	sctx->job.result_digest[3] = SHA1_H3;
> > -	sctx->job.result_digest[4] = SHA1_H4;
> > -	sctx->total_length = 0;
> > -	sctx->partial_block_buffer_length = 0;
> > -	sctx->status = HASH_CTX_STS_IDLE;
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
> > -{
> > -	int	i;
> > -	struct	sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
> > -	__be32	*dst = (__be32 *) rctx->out;
> > -
> > -	for (i = 0; i < 5; ++i)
> > -		dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
> > -			struct mcryptd_alg_cstate *cstate, bool flush)
> > -{
> > -	int	flag = HASH_UPDATE;
> > -	int	nbytes, err = 0;
> > -	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
> > -	struct sha1_hash_ctx *sha_ctx;
> > -
> > -	/* more work ? */
> > -	while (!(rctx->flag & HASH_DONE)) {
> > -		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
> > -		if (nbytes < 0) {
> > -			err = nbytes;
> > -			goto out;
> > -		}
> > -		/* check if the walk is done */
> > -		if (crypto_ahash_walk_last(&rctx->walk)) {
> > -			rctx->flag |= HASH_DONE;
> > -			if (rctx->flag & HASH_FINAL)
> > -				flag |= HASH_LAST;
> > -
> > -		}
> > -		sha_ctx = (struct sha1_hash_ctx *)
> > -						ahash_request_ctx(&rctx->areq);
> > -		kernel_fpu_begin();
> > -		sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx,
> > -						rctx->walk.data, nbytes, flag);
> > -		if (!sha_ctx) {
> > -			if (flush)
> > -				sha_ctx = sha1_ctx_mgr_flush(cstate->mgr);
> > -		}
> > -		kernel_fpu_end();
> > -		if (sha_ctx)
> > -			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		else {
> > -			rctx = NULL;
> > -			goto out;
> > -		}
> > -	}
> > -
> > -	/* copy the results */
> > -	if (rctx->flag & HASH_FINAL)
> > -		sha1_mb_set_results(rctx);
> > -
> > -out:
> > -	*ret_rctx = rctx;
> > -	return err;
> > -}
> > -
> > -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
> > -			    struct mcryptd_alg_cstate *cstate,
> > -			    int err)
> > -{
> > -	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
> > -	struct sha1_hash_ctx *sha_ctx;
> > -	struct mcryptd_hash_request_ctx *req_ctx;
> > -	int ret;
> > -
> > -	/* remove from work list */
> > -	spin_lock(&cstate->work_lock);
> > -	list_del(&rctx->waiter);
> > -	spin_unlock(&cstate->work_lock);
> > -
> > -	if (irqs_disabled())
> > -		rctx->complete(&req->base, err);
> > -	else {
> > -		local_bh_disable();
> > -		rctx->complete(&req->base, err);
> > -		local_bh_enable();
> > -	}
> > -
> > -	/* check to see if there are other jobs that are done */
> > -	sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
> > -	while (sha_ctx) {
> > -		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		ret = sha_finish_walk(&req_ctx, cstate, false);
> > -		if (req_ctx) {
> > -			spin_lock(&cstate->work_lock);
> > -			list_del(&req_ctx->waiter);
> > -			spin_unlock(&cstate->work_lock);
> > -
> > -			req = cast_mcryptd_ctx_to_req(req_ctx);
> > -			if (irqs_disabled())
> > -				req_ctx->complete(&req->base, ret);
> > -			else {
> > -				local_bh_disable();
> > -				req_ctx->complete(&req->base, ret);
> > -				local_bh_enable();
> > -			}
> > -		}
> > -		sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
> > -	}
> > -
> > -	return 0;
> > -}
> > -
> > -static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
> > -			     struct mcryptd_alg_cstate *cstate)
> > -{
> > -	unsigned long next_flush;
> > -	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
> > -
> > -	/* initialize tag */
> > -	rctx->tag.arrival = jiffies;    /* tag the arrival time */
> > -	rctx->tag.seq_num = cstate->next_seq_num++;
> > -	next_flush = rctx->tag.arrival + delay;
> > -	rctx->tag.expire = next_flush;
> > -
> > -	spin_lock(&cstate->work_lock);
> > -	list_add_tail(&rctx->waiter, &cstate->work_list);
> > -	spin_unlock(&cstate->work_lock);
> > -
> > -	mcryptd_arm_flusher(cstate, delay);
> > -}
> > -
> > -static int sha1_mb_update(struct ahash_request *areq)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx =
> > -		container_of(areq, struct mcryptd_hash_request_ctx, areq);
> > -	struct mcryptd_alg_cstate *cstate =
> > -				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
> > -
> > -	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
> > -	struct sha1_hash_ctx *sha_ctx;
> > -	int ret = 0, nbytes;
> > -
> > -
> > -	/* sanity check */
> > -	if (rctx->tag.cpu != smp_processor_id()) {
> > -		pr_err("mcryptd error: cpu clash\n");
> > -		goto done;
> > -	}
> > -
> > -	/* need to init context */
> > -	req_ctx_init(rctx, areq);
> > -
> > -	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
> > -
> > -	if (nbytes < 0) {
> > -		ret = nbytes;
> > -		goto done;
> > -	}
> > -
> > -	if (crypto_ahash_walk_last(&rctx->walk))
> > -		rctx->flag |= HASH_DONE;
> > -
> > -	/* submit */
> > -	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
> > -	sha1_mb_add_list(rctx, cstate);
> > -	kernel_fpu_begin();
> > -	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
> > -							nbytes, HASH_UPDATE);
> > -	kernel_fpu_end();
> > -
> > -	/* check if anything is returned */
> > -	if (!sha_ctx)
> > -		return -EINPROGRESS;
> > -
> > -	if (sha_ctx->error) {
> > -		ret = sha_ctx->error;
> > -		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		goto done;
> > -	}
> > -
> > -	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -	ret = sha_finish_walk(&rctx, cstate, false);
> > -
> > -	if (!rctx)
> > -		return -EINPROGRESS;
> > -done:
> > -	sha_complete_job(rctx, cstate, ret);
> > -	return ret;
> > -}
> > -
> > -static int sha1_mb_finup(struct ahash_request *areq)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx =
> > -		container_of(areq, struct mcryptd_hash_request_ctx, areq);
> > -	struct mcryptd_alg_cstate *cstate =
> > -				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
> > -
> > -	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
> > -	struct sha1_hash_ctx *sha_ctx;
> > -	int ret = 0, flag = HASH_UPDATE, nbytes;
> > -
> > -	/* sanity check */
> > -	if (rctx->tag.cpu != smp_processor_id()) {
> > -		pr_err("mcryptd error: cpu clash\n");
> > -		goto done;
> > -	}
> > -
> > -	/* need to init context */
> > -	req_ctx_init(rctx, areq);
> > -
> > -	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
> > -
> > -	if (nbytes < 0) {
> > -		ret = nbytes;
> > -		goto done;
> > -	}
> > -
> > -	if (crypto_ahash_walk_last(&rctx->walk)) {
> > -		rctx->flag |= HASH_DONE;
> > -		flag = HASH_LAST;
> > -	}
> > -
> > -	/* submit */
> > -	rctx->flag |= HASH_FINAL;
> > -	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
> > -	sha1_mb_add_list(rctx, cstate);
> > -
> > -	kernel_fpu_begin();
> > -	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
> > -								nbytes, flag);
> > -	kernel_fpu_end();
> > -
> > -	/* check if anything is returned */
> > -	if (!sha_ctx)
> > -		return -EINPROGRESS;
> > -
> > -	if (sha_ctx->error) {
> > -		ret = sha_ctx->error;
> > -		goto done;
> > -	}
> > -
> > -	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -	ret = sha_finish_walk(&rctx, cstate, false);
> > -	if (!rctx)
> > -		return -EINPROGRESS;
> > -done:
> > -	sha_complete_job(rctx, cstate, ret);
> > -	return ret;
> > -}
> > -
> > -static int sha1_mb_final(struct ahash_request *areq)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx =
> > -		container_of(areq, struct mcryptd_hash_request_ctx, areq);
> > -	struct mcryptd_alg_cstate *cstate =
> > -				this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
> > -
> > -	struct sha1_hash_ctx *sha_ctx;
> > -	int ret = 0;
> > -	u8 data;
> > -
> > -	/* sanity check */
> > -	if (rctx->tag.cpu != smp_processor_id()) {
> > -		pr_err("mcryptd error: cpu clash\n");
> > -		goto done;
> > -	}
> > -
> > -	/* need to init context */
> > -	req_ctx_init(rctx, areq);
> > -
> > -	rctx->flag |= HASH_DONE | HASH_FINAL;
> > -
> > -	sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
> > -	/* flag HASH_FINAL and 0 data size */
> > -	sha1_mb_add_list(rctx, cstate);
> > -	kernel_fpu_begin();
> > -	sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
> > -								HASH_LAST);
> > -	kernel_fpu_end();
> > -
> > -	/* check if anything is returned */
> > -	if (!sha_ctx)
> > -		return -EINPROGRESS;
> > -
> > -	if (sha_ctx->error) {
> > -		ret = sha_ctx->error;
> > -		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		goto done;
> > -	}
> > -
> > -	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -	ret = sha_finish_walk(&rctx, cstate, false);
> > -	if (!rctx)
> > -		return -EINPROGRESS;
> > -done:
> > -	sha_complete_job(rctx, cstate, ret);
> > -	return ret;
> > -}
> > -
> > -static int sha1_mb_export(struct ahash_request *areq, void *out)
> > -{
> > -	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
> > -
> > -	memcpy(out, sctx, sizeof(*sctx));
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha1_mb_import(struct ahash_request *areq, const void *in)
> > -{
> > -	struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
> > -
> > -	memcpy(sctx, in, sizeof(*sctx));
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
> > -{
> > -	struct mcryptd_ahash *mcryptd_tfm;
> > -	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
> > -	struct mcryptd_hash_ctx *mctx;
> > -
> > -	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
> > -						CRYPTO_ALG_INTERNAL,
> > -						CRYPTO_ALG_INTERNAL);
> > -	if (IS_ERR(mcryptd_tfm))
> > -		return PTR_ERR(mcryptd_tfm);
> > -	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
> > -	mctx->alg_state = &sha1_mb_alg_state;
> > -	ctx->mcryptd_tfm = mcryptd_tfm;
> > -	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> > -				sizeof(struct ahash_request) +
> > -				crypto_ahash_reqsize(&mcryptd_tfm->base));
> > -
> > -	return 0;
> > -}
> > -
> > -static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
> > -{
> > -	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
> > -
> > -	mcryptd_free_ahash(ctx->mcryptd_tfm);
> > -}
> > -
> > -static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm)
> > -{
> > -	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> > -				sizeof(struct ahash_request) +
> > -				sizeof(struct sha1_hash_ctx));
> > -
> > -	return 0;
> > -}
> > -
> > -static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm)
> > -{
> > -	struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
> > -
> > -	mcryptd_free_ahash(ctx->mcryptd_tfm);
> > -}
> > -
> > -static struct ahash_alg sha1_mb_areq_alg = {
> > -	.init		=	sha1_mb_init,
> > -	.update		=	sha1_mb_update,
> > -	.final		=	sha1_mb_final,
> > -	.finup		=	sha1_mb_finup,
> > -	.export		=	sha1_mb_export,
> > -	.import		=	sha1_mb_import,
> > -	.halg		=	{
> > -		.digestsize	=	SHA1_DIGEST_SIZE,
> > -		.statesize	=	sizeof(struct sha1_hash_ctx),
> > -		.base		=	{
> > -			.cra_name	 = "__sha1-mb",
> > -			.cra_driver_name = "__intel_sha1-mb",
> > -			.cra_priority	 = 100,
> > -			/*
> > -			 * use ASYNC flag as some buffers in multi-buffer
> > -			 * algo may not have completed before hashing thread
> > -			 * sleep
> > -			 */
> > -			.cra_flags	= CRYPTO_ALG_ASYNC |
> > -					  CRYPTO_ALG_INTERNAL,
> > -			.cra_blocksize	= SHA1_BLOCK_SIZE,
> > -			.cra_module	= THIS_MODULE,
> > -			.cra_list	= LIST_HEAD_INIT
> > -					(sha1_mb_areq_alg.halg.base.cra_list),
> > -			.cra_init	= sha1_mb_areq_init_tfm,
> > -			.cra_exit	= sha1_mb_areq_exit_tfm,
> > -			.cra_ctxsize	= sizeof(struct sha1_hash_ctx),
> > -		}
> > -	}
> > -};
> > -
> > -static int sha1_mb_async_init(struct ahash_request *req)
> > -{
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_init(mcryptd_req);
> > -}
> > -
> > -static int sha1_mb_async_update(struct ahash_request *req)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_update(mcryptd_req);
> > -}
> > -
> > -static int sha1_mb_async_finup(struct ahash_request *req)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_finup(mcryptd_req);
> > -}
> > -
> > -static int sha1_mb_async_final(struct ahash_request *req)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_final(mcryptd_req);
> > -}
> > -
> > -static int sha1_mb_async_digest(struct ahash_request *req)
> > -{
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_digest(mcryptd_req);
> > -}
> > -
> > -static int sha1_mb_async_export(struct ahash_request *req, void *out)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_export(mcryptd_req, out);
> > -}
> > -
> > -static int sha1_mb_async_import(struct ahash_request *req, const void *in)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
> > -	struct mcryptd_hash_request_ctx *rctx;
> > -	struct ahash_request *areq;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	rctx = ahash_request_ctx(mcryptd_req);
> > -	areq = &rctx->areq;
> > -
> > -	ahash_request_set_tfm(areq, child);
> > -	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
> > -					rctx->complete, req);
> > -
> > -	return crypto_ahash_import(mcryptd_req, in);
> > -}
> > -
> > -static struct ahash_alg sha1_mb_async_alg = {
> > -	.init           = sha1_mb_async_init,
> > -	.update         = sha1_mb_async_update,
> > -	.final          = sha1_mb_async_final,
> > -	.finup          = sha1_mb_async_finup,
> > -	.digest         = sha1_mb_async_digest,
> > -	.export		= sha1_mb_async_export,
> > -	.import		= sha1_mb_async_import,
> > -	.halg = {
> > -		.digestsize     = SHA1_DIGEST_SIZE,
> > -		.statesize	= sizeof(struct sha1_hash_ctx),
> > -		.base = {
> > -			.cra_name               = "sha1",
> > -			.cra_driver_name        = "sha1_mb",
> > -			/*
> > -			 * Low priority, since with few concurrent hash requests
> > -			 * this is extremely slow due to the flush delay.  Users
> > -			 * whose workloads would benefit from this can request
> > -			 * it explicitly by driver name, or can increase its
> > -			 * priority at runtime using NETLINK_CRYPTO.
> > -			 */
> > -			.cra_priority           = 50,
> > -			.cra_flags              = CRYPTO_ALG_ASYNC,
> > -			.cra_blocksize          = SHA1_BLOCK_SIZE,
> > -			.cra_module             = THIS_MODULE,
> > -			.cra_list               = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list),
> > -			.cra_init               = sha1_mb_async_init_tfm,
> > -			.cra_exit               = sha1_mb_async_exit_tfm,
> > -			.cra_ctxsize		= sizeof(struct sha1_mb_ctx),
> > -			.cra_alignmask		= 0,
> > -		},
> > -	},
> > -};
> > -
> > -static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx;
> > -	unsigned long cur_time;
> > -	unsigned long next_flush = 0;
> > -	struct sha1_hash_ctx *sha_ctx;
> > -
> > -
> > -	cur_time = jiffies;
> > -
> > -	while (!list_empty(&cstate->work_list)) {
> > -		rctx = list_entry(cstate->work_list.next,
> > -				struct mcryptd_hash_request_ctx, waiter);
> > -		if (time_before(cur_time, rctx->tag.expire))
> > -			break;
> > -		kernel_fpu_begin();
> > -		sha_ctx = (struct sha1_hash_ctx *)
> > -					sha1_ctx_mgr_flush(cstate->mgr);
> > -		kernel_fpu_end();
> > -		if (!sha_ctx) {
> > -			pr_err("sha1_mb error: nothing got flushed for non-empty list\n");
> > -			break;
> > -		}
> > -		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		sha_finish_walk(&rctx, cstate, true);
> > -		sha_complete_job(rctx, cstate, 0);
> > -	}
> > -
> > -	if (!list_empty(&cstate->work_list)) {
> > -		rctx = list_entry(cstate->work_list.next,
> > -				struct mcryptd_hash_request_ctx, waiter);
> > -		/* get the hash context and then flush time */
> > -		next_flush = rctx->tag.expire;
> > -		mcryptd_arm_flusher(cstate, get_delay(next_flush));
> > -	}
> > -	return next_flush;
> > -}
> > -
> > -static int __init sha1_mb_mod_init(void)
> > -{
> > -
> > -	int cpu;
> > -	int err;
> > -	struct mcryptd_alg_cstate *cpu_state;
> > -
> > -	/* check for dependent cpu features */
> > -	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
> > -	    !boot_cpu_has(X86_FEATURE_BMI2))
> > -		return -ENODEV;
> > -
> > -	/* initialize multibuffer structures */
> > -	sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate);
> > -
> > -	sha1_job_mgr_init = sha1_mb_mgr_init_avx2;
> > -	sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2;
> > -	sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2;
> > -	sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2;
> > -
> > -	if (!sha1_mb_alg_state.alg_cstate)
> > -		return -ENOMEM;
> > -	for_each_possible_cpu(cpu) {
> > -		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
> > -		cpu_state->next_flush = 0;
> > -		cpu_state->next_seq_num = 0;
> > -		cpu_state->flusher_engaged = false;
> > -		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
> > -		cpu_state->cpu = cpu;
> > -		cpu_state->alg_state = &sha1_mb_alg_state;
> > -		cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr),
> > -					GFP_KERNEL);
> > -		if (!cpu_state->mgr)
> > -			goto err2;
> > -		sha1_ctx_mgr_init(cpu_state->mgr);
> > -		INIT_LIST_HEAD(&cpu_state->work_list);
> > -		spin_lock_init(&cpu_state->work_lock);
> > -	}
> > -	sha1_mb_alg_state.flusher = &sha1_mb_flusher;
> > -
> > -	err = crypto_register_ahash(&sha1_mb_areq_alg);
> > -	if (err)
> > -		goto err2;
> > -	err = crypto_register_ahash(&sha1_mb_async_alg);
> > -	if (err)
> > -		goto err1;
> > -
> > -
> > -	return 0;
> > -err1:
> > -	crypto_unregister_ahash(&sha1_mb_areq_alg);
> > -err2:
> > -	for_each_possible_cpu(cpu) {
> > -		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
> > -		kfree(cpu_state->mgr);
> > -	}
> > -	free_percpu(sha1_mb_alg_state.alg_cstate);
> > -	return -ENODEV;
> > -}
> > -
> > -static void __exit sha1_mb_mod_fini(void)
> > -{
> > -	int cpu;
> > -	struct mcryptd_alg_cstate *cpu_state;
> > -
> > -	crypto_unregister_ahash(&sha1_mb_async_alg);
> > -	crypto_unregister_ahash(&sha1_mb_areq_alg);
> > -	for_each_possible_cpu(cpu) {
> > -		cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
> > -		kfree(cpu_state->mgr);
> > -	}
> > -	free_percpu(sha1_mb_alg_state.alg_cstate);
> > -}
> > -
> > -module_init(sha1_mb_mod_init);
> > -module_exit(sha1_mb_mod_fini);
> > -
> > -MODULE_LICENSE("GPL");
> > -MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated");
> > -
> > -MODULE_ALIAS_CRYPTO("sha1");
> > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
> > deleted file mode 100644
> > index 9454bd16f9f8..000000000000
> > --- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
> > +++ /dev/null
> > @@ -1,134 +0,0 @@
> > -/*
> > - * Header file for multi buffer SHA context
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *	Tim Chen <tim.c.chen@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#ifndef _SHA_MB_CTX_INTERNAL_H
> > -#define _SHA_MB_CTX_INTERNAL_H
> > -
> > -#include "sha1_mb_mgr.h"
> > -
> > -#define HASH_UPDATE          0x00
> > -#define HASH_LAST            0x01
> > -#define HASH_DONE	     0x02
> > -#define HASH_FINAL	     0x04
> > -
> > -#define HASH_CTX_STS_IDLE       0x00
> > -#define HASH_CTX_STS_PROCESSING 0x01
> > -#define HASH_CTX_STS_LAST       0x02
> > -#define HASH_CTX_STS_COMPLETE   0x04
> > -
> > -enum hash_ctx_error {
> > -	HASH_CTX_ERROR_NONE               =  0,
> > -	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
> > -	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
> > -	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
> > -
> > -#ifdef HASH_CTX_DEBUG
> > -	HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
> > -#endif
> > -};
> > -
> > -
> > -#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
> > -#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
> > -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
> > -#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
> > -#define hash_ctx_status(ctx)     ((ctx)->status)
> > -#define hash_ctx_error(ctx)      ((ctx)->error)
> > -#define hash_ctx_init(ctx) \
> > -	do { \
> > -		(ctx)->error = HASH_CTX_ERROR_NONE; \
> > -		(ctx)->status = HASH_CTX_STS_COMPLETE; \
> > -	} while (0)
> > -
> > -
> > -/* Hash Constants and Typedefs */
> > -#define SHA1_DIGEST_LENGTH          5
> > -#define SHA1_LOG2_BLOCK_SIZE        6
> > -
> > -#define SHA1_PADLENGTHFIELD_SIZE    8
> > -
> > -#ifdef SHA_MB_DEBUG
> > -#define assert(expr) \
> > -do { \
> > -	if (unlikely(!(expr))) { \
> > -		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
> > -		#expr, __FILE__, __func__, __LINE__); \
> > -	} \
> > -} while (0)
> > -#else
> > -#define assert(expr) do {} while (0)
> > -#endif
> > -
> > -struct sha1_ctx_mgr {
> > -	struct sha1_mb_mgr mgr;
> > -};
> > -
> > -/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */
> > -
> > -struct sha1_hash_ctx {
> > -	/* Must be at struct offset 0 */
> > -	struct job_sha1       job;
> > -	/* status flag */
> > -	int status;
> > -	/* error flag */
> > -	int error;
> > -
> > -	uint64_t	total_length;
> > -	const void	*incoming_buffer;
> > -	uint32_t	incoming_buffer_length;
> > -	uint8_t		partial_block_buffer[SHA1_BLOCK_SIZE * 2];
> > -	uint32_t	partial_block_buffer_length;
> > -	void		*user_data;
> > -};
> > -
> > -#endif
> > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
> > deleted file mode 100644
> > index 08ad1a9acfd7..000000000000
> > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
> > +++ /dev/null
> > @@ -1,110 +0,0 @@
> > -/*
> > - * Header file for multi buffer SHA1 algorithm manager
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *      James Guilford <james.guilford@intel.com>
> > - *	Tim Chen <tim.c.chen@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -#ifndef __SHA_MB_MGR_H
> > -#define __SHA_MB_MGR_H
> > -
> > -
> > -#include <linux/types.h>
> > -
> > -#define NUM_SHA1_DIGEST_WORDS 5
> > -
> > -enum job_sts {	STS_UNKNOWN = 0,
> > -		STS_BEING_PROCESSED = 1,
> > -		STS_COMPLETED = 2,
> > -		STS_INTERNAL_ERROR = 3,
> > -		STS_ERROR = 4
> > -};
> > -
> > -struct job_sha1 {
> > -	u8	*buffer;
> > -	u32	len;
> > -	u32	result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32);
> > -	enum	job_sts status;
> > -	void	*user_data;
> > -};
> > -
> > -/* SHA1 out-of-order scheduler */
> > -
> > -/* typedef uint32_t sha1_digest_array[5][8]; */
> > -
> > -struct sha1_args_x8 {
> > -	uint32_t	digest[5][8];
> > -	uint8_t		*data_ptr[8];
> > -};
> > -
> > -struct sha1_lane_data {
> > -	struct job_sha1 *job_in_lane;
> > -};
> > -
> > -struct sha1_mb_mgr {
> > -	struct sha1_args_x8 args;
> > -
> > -	uint32_t lens[8];
> > -
> > -	/* each byte is index (0...7) of unused lanes */
> > -	uint64_t unused_lanes;
> > -	/* byte 4 is set to FF as a flag */
> > -	struct sha1_lane_data ldata[8];
> > -};
> > -
> > -
> > -#define SHA1_MB_MGR_NUM_LANES_AVX2 8
> > -
> > -void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state);
> > -struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state,
> > -					 struct job_sha1 *job);
> > -struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state);
> > -struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state);
> > -
> > -#endif
> > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
> > deleted file mode 100644
> > index 86688c6e7a25..000000000000
> > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
> > +++ /dev/null
> > @@ -1,287 +0,0 @@
> > -/*
> > - * Header file for multi buffer SHA1 algorithm data structure
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *      James Guilford <james.guilford@intel.com>
> > - *	Tim Chen <tim.c.chen@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -# Macros for defining data structures
> > -
> > -# Usage example
> > -
> > -#START_FIELDS	# JOB_AES
> > -###	name		size	align
> > -#FIELD	_plaintext,	8,	8	# pointer to plaintext
> > -#FIELD	_ciphertext,	8,	8	# pointer to ciphertext
> > -#FIELD	_IV,		16,	8	# IV
> > -#FIELD	_keys,		8,	8	# pointer to keys
> > -#FIELD	_len,		4,	4	# length in bytes
> > -#FIELD	_status,	4,	4	# status enumeration
> > -#FIELD	_user_data,	8,	8	# pointer to user data
> > -#UNION  _union,         size1,  align1, \
> > -#	                size2,  align2, \
> > -#	                size3,  align3, \
> > -#	                ...
> > -#END_FIELDS
> > -#%assign _JOB_AES_size	_FIELD_OFFSET
> > -#%assign _JOB_AES_align	_STRUCT_ALIGN
> > -
> > -#########################################################################
> > -
> > -# Alternate "struc-like" syntax:
> > -#	STRUCT job_aes2
> > -#	RES_Q	.plaintext,	1
> > -#	RES_Q	.ciphertext,	1
> > -#	RES_DQ	.IV,		1
> > -#	RES_B	.nested,	_JOB_AES_SIZE, _JOB_AES_ALIGN
> > -#	RES_U	.union,		size1, align1, \
> > -#				size2, align2, \
> > -#				...
> > -#	ENDSTRUCT
> > -#	# Following only needed if nesting
> > -#	%assign job_aes2_size	_FIELD_OFFSET
> > -#	%assign job_aes2_align	_STRUCT_ALIGN
> > -#
> > -# RES_* macros take a name, a count and an optional alignment.
> > -# The count in in terms of the base size of the macro, and the
> > -# default alignment is the base size.
> > -# The macros are:
> > -# Macro    Base size
> > -# RES_B	    1
> > -# RES_W	    2
> > -# RES_D     4
> > -# RES_Q     8
> > -# RES_DQ   16
> > -# RES_Y    32
> > -# RES_Z    64
> > -#
> > -# RES_U defines a union. It's arguments are a name and two or more
> > -# pairs of "size, alignment"
> > -#
> > -# The two assigns are only needed if this structure is being nested
> > -# within another. Even if the assigns are not done, one can still use
> > -# STRUCT_NAME_size as the size of the structure.
> > -#
> > -# Note that for nesting, you still need to assign to STRUCT_NAME_size.
> > -#
> > -# The differences between this and using "struc" directly are that each
> > -# type is implicitly aligned to its natural length (although this can be
> > -# over-ridden with an explicit third parameter), and that the structure
> > -# is padded at the end to its overall alignment.
> > -#
> > -
> > -#########################################################################
> > -
> > -#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_
> > -#define _SHA1_MB_MGR_DATASTRUCT_ASM_
> > -
> > -## START_FIELDS
> > -.macro START_FIELDS
> > - _FIELD_OFFSET = 0
> > - _STRUCT_ALIGN = 0
> > -.endm
> > -
> > -## FIELD name size align
> > -.macro FIELD name size align
> > - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
> > - \name	= _FIELD_OFFSET
> > - _FIELD_OFFSET = _FIELD_OFFSET + (\size)
> > -.if (\align > _STRUCT_ALIGN)
> > - _STRUCT_ALIGN = \align
> > -.endif
> > -.endm
> > -
> > -## END_FIELDS
> > -.macro END_FIELDS
> > - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
> > -.endm
> > -
> > -########################################################################
> > -
> > -.macro STRUCT p1
> > -START_FIELDS
> > -.struc \p1
> > -.endm
> > -
> > -.macro ENDSTRUCT
> > - tmp = _FIELD_OFFSET
> > - END_FIELDS
> > - tmp = (_FIELD_OFFSET - %%tmp)
> > -.if (tmp > 0)
> > -	.lcomm	tmp
> > -.endif
> > -.endstruc
> > -.endm
> > -
> > -## RES_int name size align
> > -.macro RES_int p1 p2 p3
> > - name = \p1
> > - size = \p2
> > - align = .\p3
> > -
> > - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
> > -.align align
> > -.lcomm name size
> > - _FIELD_OFFSET = _FIELD_OFFSET + (size)
> > -.if (align > _STRUCT_ALIGN)
> > - _STRUCT_ALIGN = align
> > -.endif
> > -.endm
> > -
> > -
> > -
> > -# macro RES_B name, size [, align]
> > -.macro RES_B _name, _size, _align=1
> > -RES_int _name _size _align
> > -.endm
> > -
> > -# macro RES_W name, size [, align]
> > -.macro RES_W _name, _size, _align=2
> > -RES_int _name 2*(_size) _align
> > -.endm
> > -
> > -# macro RES_D name, size [, align]
> > -.macro RES_D _name, _size, _align=4
> > -RES_int _name 4*(_size) _align
> > -.endm
> > -
> > -# macro RES_Q name, size [, align]
> > -.macro RES_Q _name, _size, _align=8
> > -RES_int _name 8*(_size) _align
> > -.endm
> > -
> > -# macro RES_DQ name, size [, align]
> > -.macro RES_DQ _name, _size, _align=16
> > -RES_int _name 16*(_size) _align
> > -.endm
> > -
> > -# macro RES_Y name, size [, align]
> > -.macro RES_Y _name, _size, _align=32
> > -RES_int _name 32*(_size) _align
> > -.endm
> > -
> > -# macro RES_Z name, size [, align]
> > -.macro RES_Z _name, _size, _align=64
> > -RES_int _name 64*(_size) _align
> > -.endm
> > -
> > -
> > -#endif
> > -
> > -########################################################################
> > -#### Define constants
> > -########################################################################
> > -
> > -########################################################################
> > -#### Define SHA1 Out Of Order Data Structures
> > -########################################################################
> > -
> > -START_FIELDS    # LANE_DATA
> > -###     name            size    align
> > -FIELD   _job_in_lane,   8,      8       # pointer to job object
> > -END_FIELDS
> > -
> > -_LANE_DATA_size = _FIELD_OFFSET
> > -_LANE_DATA_align = _STRUCT_ALIGN
> > -
> > -########################################################################
> > -
> > -START_FIELDS    # SHA1_ARGS_X8
> > -###     name            size    align
> > -FIELD   _digest,        4*5*8,  16      # transposed digest
> > -FIELD   _data_ptr,      8*8,    8       # array of pointers to data
> > -END_FIELDS
> > -
> > -_SHA1_ARGS_X4_size =     _FIELD_OFFSET
> > -_SHA1_ARGS_X4_align =    _STRUCT_ALIGN
> > -_SHA1_ARGS_X8_size =     _FIELD_OFFSET
> > -_SHA1_ARGS_X8_align =    _STRUCT_ALIGN
> > -
> > -########################################################################
> > -
> > -START_FIELDS    # MB_MGR
> > -###     name            size    align
> > -FIELD   _args,          _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align
> > -FIELD   _lens,          4*8,    8
> > -FIELD   _unused_lanes,  8,      8
> > -FIELD   _ldata,         _LANE_DATA_size*8, _LANE_DATA_align
> > -END_FIELDS
> > -
> > -_MB_MGR_size =   _FIELD_OFFSET
> > -_MB_MGR_align =  _STRUCT_ALIGN
> > -
> > -_args_digest    =     _args + _digest
> > -_args_data_ptr  =     _args + _data_ptr
> > -
> > -
> > -########################################################################
> > -#### Define constants
> > -########################################################################
> > -
> > -#define STS_UNKNOWN             0
> > -#define STS_BEING_PROCESSED     1
> > -#define STS_COMPLETED           2
> > -
> > -########################################################################
> > -#### Define JOB_SHA1 structure
> > -########################################################################
> > -
> > -START_FIELDS    # JOB_SHA1
> > -
> > -###     name                            size    align
> > -FIELD   _buffer,                        8,      8       # pointer to buffer
> > -FIELD   _len,                           4,      4       # length in bytes
> > -FIELD   _result_digest,                 5*4,    32      # Digest (output)
> > -FIELD   _status,                        4,      4
> > -FIELD   _user_data,                     8,      8
> > -END_FIELDS
> > -
> > -_JOB_SHA1_size =  _FIELD_OFFSET
> > -_JOB_SHA1_align = _STRUCT_ALIGN
> > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
> > deleted file mode 100644
> > index 7cfba738f104..000000000000
> > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
> > +++ /dev/null
> > @@ -1,304 +0,0 @@
> > -/*
> > - * Flush routine for SHA1 multibuffer
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *      James Guilford <james.guilford@intel.com>
> > - *	Tim Chen <tim.c.chen@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -#include <linux/linkage.h>
> > -#include <asm/frame.h>
> > -#include "sha1_mb_mgr_datastruct.S"
> > -
> > -
> > -.extern sha1_x8_avx2
> > -
> > -# LINUX register definitions
> > -#define arg1    %rdi
> > -#define arg2    %rsi
> > -
> > -# Common definitions
> > -#define state   arg1
> > -#define job     arg2
> > -#define len2    arg2
> > -
> > -# idx must be a register not clobbered by sha1_x8_avx2
> > -#define idx		%r8
> > -#define DWORD_idx	%r8d
> > -
> > -#define unused_lanes    %rbx
> > -#define lane_data       %rbx
> > -#define tmp2            %rbx
> > -#define tmp2_w		%ebx
> > -
> > -#define job_rax         %rax
> > -#define tmp1            %rax
> > -#define size_offset     %rax
> > -#define tmp             %rax
> > -#define start_offset    %rax
> > -
> > -#define tmp3            %arg1
> > -
> > -#define extra_blocks    %arg2
> > -#define p               %arg2
> > -
> > -.macro LABEL prefix n
> > -\prefix\n\():
> > -.endm
> > -
> > -.macro JNE_SKIP i
> > -jne     skip_\i
> > -.endm
> > -
> > -.altmacro
> > -.macro SET_OFFSET _offset
> > -offset = \_offset
> > -.endm
> > -.noaltmacro
> > -
> > -# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
> > -# arg 1 : rcx : state
> > -ENTRY(sha1_mb_mgr_flush_avx2)
> > -	FRAME_BEGIN
> > -	push	%rbx
> > -
> > -	# If bit (32+3) is set, then all lanes are empty
> > -	mov     _unused_lanes(state), unused_lanes
> > -	bt      $32+3, unused_lanes
> > -	jc      return_null
> > -
> > -	# find a lane with a non-null job
> > -	xor     idx, idx
> > -	offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq    $0, offset(state)
> > -	cmovne  one(%rip), idx
> > -	offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq    $0, offset(state)
> > -	cmovne  two(%rip), idx
> > -	offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq    $0, offset(state)
> > -	cmovne  three(%rip), idx
> > -	offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq    $0, offset(state)
> > -	cmovne  four(%rip), idx
> > -	offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq    $0, offset(state)
> > -	cmovne  five(%rip), idx
> > -	offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq    $0, offset(state)
> > -	cmovne  six(%rip), idx
> > -	offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq    $0, offset(state)
> > -	cmovne  seven(%rip), idx
> > -
> > -	# copy idx to empty lanes
> > -copy_lane_data:
> > -	offset =  (_args + _data_ptr)
> > -	mov     offset(state,idx,8), tmp
> > -
> > -	I = 0
> > -.rep 8
> > -	offset =  (_ldata + I * _LANE_DATA_size + _job_in_lane)
> > -	cmpq    $0, offset(state)
> > -.altmacro
> > -	JNE_SKIP %I
> > -	offset =  (_args + _data_ptr + 8*I)
> > -	mov     tmp, offset(state)
> > -	offset =  (_lens + 4*I)
> > -	movl    $0xFFFFFFFF, offset(state)
> > -LABEL skip_ %I
> > -	I = (I+1)
> > -.noaltmacro
> > -.endr
> > -
> > -	# Find min length
> > -	vmovdqu _lens+0*16(state), %xmm0
> > -	vmovdqu _lens+1*16(state), %xmm1
> > -
> > -	vpminud %xmm1, %xmm0, %xmm2     # xmm2 has {D,C,B,A}
> > -	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
> > -	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
> > -	vpalignr $4, %xmm2, %xmm3, %xmm3    # xmm3 has {x,x,x,E}
> > -	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
> > -
> > -	vmovd   %xmm2, DWORD_idx
> > -	mov	idx, len2
> > -	and	$0xF, idx
> > -	shr	$4, len2
> > -	jz	len_is_0
> > -
> > -	vpand   clear_low_nibble(%rip), %xmm2, %xmm2
> > -	vpshufd $0, %xmm2, %xmm2
> > -
> > -	vpsubd  %xmm2, %xmm0, %xmm0
> > -	vpsubd  %xmm2, %xmm1, %xmm1
> > -
> > -	vmovdqu %xmm0, _lens+0*16(state)
> > -	vmovdqu %xmm1, _lens+1*16(state)
> > -
> > -	# "state" and "args" are the same address, arg1
> > -	# len is arg2
> > -	call	sha1_x8_avx2
> > -	# state and idx are intact
> > -
> > -
> > -len_is_0:
> > -	# process completed job "idx"
> > -	imul    $_LANE_DATA_size, idx, lane_data
> > -	lea     _ldata(state, lane_data), lane_data
> > -
> > -	mov     _job_in_lane(lane_data), job_rax
> > -	movq    $0, _job_in_lane(lane_data)
> > -	movl    $STS_COMPLETED, _status(job_rax)
> > -	mov     _unused_lanes(state), unused_lanes
> > -	shl     $4, unused_lanes
> > -	or      idx, unused_lanes
> > -	mov     unused_lanes, _unused_lanes(state)
> > -
> > -	movl	$0xFFFFFFFF, _lens(state, idx, 4)
> > -
> > -	vmovd    _args_digest(state , idx, 4) , %xmm0
> > -	vpinsrd  $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
> > -	vpinsrd  $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
> > -	vpinsrd  $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
> > -	movl    _args_digest+4*32(state, idx, 4), tmp2_w
> > -
> > -	vmovdqu  %xmm0, _result_digest(job_rax)
> > -	offset =  (_result_digest + 1*16)
> > -	mov     tmp2_w, offset(job_rax)
> > -
> > -return:
> > -	pop	%rbx
> > -	FRAME_END
> > -	ret
> > -
> > -return_null:
> > -	xor     job_rax, job_rax
> > -	jmp     return
> > -ENDPROC(sha1_mb_mgr_flush_avx2)
> > -
> > -
> > -#################################################################
> > -
> > -.align 16
> > -ENTRY(sha1_mb_mgr_get_comp_job_avx2)
> > -	push    %rbx
> > -
> > -	## if bit 32+3 is set, then all lanes are empty
> > -	mov     _unused_lanes(state), unused_lanes
> > -	bt      $(32+3), unused_lanes
> > -	jc      .return_null
> > -
> > -	# Find min length
> > -	vmovdqu _lens(state), %xmm0
> > -	vmovdqu _lens+1*16(state), %xmm1
> > -
> > -	vpminud %xmm1, %xmm0, %xmm2        # xmm2 has {D,C,B,A}
> > -	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
> > -	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
> > -	vpalignr $4, %xmm2, %xmm3, %xmm3    # xmm3 has {x,x,x,E}
> > -	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
> > -
> > -	vmovd   %xmm2, DWORD_idx
> > -	test    $~0xF, idx
> > -	jnz     .return_null
> > -
> > -	# process completed job "idx"
> > -	imul    $_LANE_DATA_size, idx, lane_data
> > -	lea     _ldata(state, lane_data), lane_data
> > -
> > -	mov     _job_in_lane(lane_data), job_rax
> > -	movq    $0,  _job_in_lane(lane_data)
> > -	movl    $STS_COMPLETED, _status(job_rax)
> > -	mov     _unused_lanes(state), unused_lanes
> > -	shl     $4, unused_lanes
> > -	or      idx, unused_lanes
> > -	mov     unused_lanes, _unused_lanes(state)
> > -
> > -	movl    $0xFFFFFFFF, _lens(state,  idx, 4)
> > -
> > -	vmovd   _args_digest(state, idx, 4), %xmm0
> > -	vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
> > -	vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
> > -	vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
> > -	movl    _args_digest+4*32(state, idx, 4), tmp2_w
> > -
> > -	vmovdqu %xmm0, _result_digest(job_rax)
> > -	movl    tmp2_w, _result_digest+1*16(job_rax)
> > -
> > -	pop     %rbx
> > -
> > -	ret
> > -
> > -.return_null:
> > -	xor     job_rax, job_rax
> > -	pop     %rbx
> > -	ret
> > -ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
> > -
> > -.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
> > -.align 16
> > -clear_low_nibble:
> > -.octa	0x000000000000000000000000FFFFFFF0
> > -
> > -.section	.rodata.cst8, "aM", @progbits, 8
> > -.align 8
> > -one:
> > -.quad  1
> > -two:
> > -.quad  2
> > -three:
> > -.quad  3
> > -four:
> > -.quad  4
> > -five:
> > -.quad  5
> > -six:
> > -.quad  6
> > -seven:
> > -.quad  7
> > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
> > deleted file mode 100644
> > index d2add0d35f43..000000000000
> > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
> > +++ /dev/null
> > @@ -1,64 +0,0 @@
> > -/*
> > - * Initialization code for multi buffer SHA1 algorithm for AVX2
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *	Tim Chen <tim.c.chen@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#include "sha1_mb_mgr.h"
> > -
> > -void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
> > -{
> > -	unsigned int j;
> > -	state->unused_lanes = 0xF76543210ULL;
> > -	for (j = 0; j < 8; j++) {
> > -		state->lens[j] = 0xFFFFFFFF;
> > -		state->ldata[j].job_in_lane = NULL;
> > -	}
> > -}
> > diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
> > deleted file mode 100644
> > index 7a93b1c0d69a..000000000000
> > --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
> > +++ /dev/null
> > @@ -1,209 +0,0 @@
> > -/*
> > - * Buffer submit code for multi buffer SHA1 algorithm
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *      James Guilford <james.guilford@intel.com>
> > - *	Tim Chen <tim.c.chen@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#include <linux/linkage.h>
> > -#include <asm/frame.h>
> > -#include "sha1_mb_mgr_datastruct.S"
> > -
> > -
> > -.extern sha1_x8_avx
> > -
> > -# LINUX register definitions
> > -arg1    = %rdi
> > -arg2    = %rsi
> > -size_offset	= %rcx
> > -tmp2		= %rcx
> > -extra_blocks	= %rdx
> > -
> > -# Common definitions
> > -#define state   arg1
> > -#define job     %rsi
> > -#define len2    arg2
> > -#define p2      arg2
> > -
> > -# idx must be a register not clobberred by sha1_x8_avx2
> > -idx		= %r8
> > -DWORD_idx	= %r8d
> > -last_len	= %r8
> > -
> > -p               = %r11
> > -start_offset    = %r11
> > -
> > -unused_lanes    = %rbx
> > -BYTE_unused_lanes = %bl
> > -
> > -job_rax         = %rax
> > -len             = %rax
> > -DWORD_len	= %eax
> > -
> > -lane            = %r12
> > -tmp3            = %r12
> > -
> > -tmp             = %r9
> > -DWORD_tmp	= %r9d
> > -
> > -lane_data       = %r10
> > -
> > -# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
> > -# arg 1 : rcx : state
> > -# arg 2 : rdx : job
> > -ENTRY(sha1_mb_mgr_submit_avx2)
> > -	FRAME_BEGIN
> > -	push	%rbx
> > -	push	%r12
> > -
> > -	mov     _unused_lanes(state), unused_lanes
> > -	mov	unused_lanes, lane
> > -	and	$0xF, lane
> > -	shr     $4, unused_lanes
> > -	imul    $_LANE_DATA_size, lane, lane_data
> > -	movl    $STS_BEING_PROCESSED, _status(job)
> > -	lea     _ldata(state, lane_data), lane_data
> > -	mov     unused_lanes, _unused_lanes(state)
> > -	movl    _len(job),  DWORD_len
> > -
> > -	mov	job, _job_in_lane(lane_data)
> > -	shl	$4, len
> > -	or	lane, len
> > -
> > -	movl    DWORD_len,  _lens(state , lane, 4)
> > -
> > -	# Load digest words from result_digest
> > -	vmovdqu	_result_digest(job), %xmm0
> > -	mov	_result_digest+1*16(job), DWORD_tmp
> > -	vmovd    %xmm0, _args_digest(state, lane, 4)
> > -	vpextrd  $1, %xmm0, _args_digest+1*32(state , lane, 4)
> > -	vpextrd  $2, %xmm0, _args_digest+2*32(state , lane, 4)
> > -	vpextrd  $3, %xmm0, _args_digest+3*32(state , lane, 4)
> > -	movl    DWORD_tmp, _args_digest+4*32(state , lane, 4)
> > -
> > -	mov     _buffer(job), p
> > -	mov     p, _args_data_ptr(state, lane, 8)
> > -
> > -	cmp     $0xF, unused_lanes
> > -	jne     return_null
> > -
> > -start_loop:
> > -	# Find min length
> > -	vmovdqa _lens(state), %xmm0
> > -	vmovdqa _lens+1*16(state), %xmm1
> > -
> > -	vpminud %xmm1, %xmm0, %xmm2        # xmm2 has {D,C,B,A}
> > -	vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
> > -	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
> > -	vpalignr $4, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,x,E}
> > -	vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
> > -
> > -	vmovd   %xmm2, DWORD_idx
> > -	mov    idx, len2
> > -	and    $0xF, idx
> > -	shr    $4, len2
> > -	jz     len_is_0
> > -
> > -	vpand   clear_low_nibble(%rip), %xmm2, %xmm2
> > -	vpshufd $0, %xmm2, %xmm2
> > -
> > -	vpsubd  %xmm2, %xmm0, %xmm0
> > -	vpsubd  %xmm2, %xmm1, %xmm1
> > -
> > -	vmovdqa %xmm0, _lens + 0*16(state)
> > -	vmovdqa %xmm1, _lens + 1*16(state)
> > -
> > -
> > -	# "state" and "args" are the same address, arg1
> > -	# len is arg2
> > -	call    sha1_x8_avx2
> > -
> > -	# state and idx are intact
> > -
> > -len_is_0:
> > -	# process completed job "idx"
> > -	imul    $_LANE_DATA_size, idx, lane_data
> > -	lea     _ldata(state, lane_data), lane_data
> > -
> > -	mov     _job_in_lane(lane_data), job_rax
> > -	mov     _unused_lanes(state), unused_lanes
> > -	movq    $0, _job_in_lane(lane_data)
> > -	movl    $STS_COMPLETED, _status(job_rax)
> > -	shl     $4, unused_lanes
> > -	or      idx, unused_lanes
> > -	mov     unused_lanes, _unused_lanes(state)
> > -
> > -	movl	$0xFFFFFFFF, _lens(state, idx, 4)
> > -
> > -	vmovd    _args_digest(state, idx, 4), %xmm0
> > -	vpinsrd  $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
> > -	vpinsrd  $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
> > -	vpinsrd  $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
> > -	movl     _args_digest+4*32(state, idx, 4), DWORD_tmp
> > -
> > -	vmovdqu  %xmm0, _result_digest(job_rax)
> > -	movl    DWORD_tmp, _result_digest+1*16(job_rax)
> > -
> > -return:
> > -	pop	%r12
> > -	pop	%rbx
> > -	FRAME_END
> > -	ret
> > -
> > -return_null:
> > -	xor     job_rax, job_rax
> > -	jmp     return
> > -
> > -ENDPROC(sha1_mb_mgr_submit_avx2)
> > -
> > -.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
> > -.align 16
> > -clear_low_nibble:
> > -	.octa	0x000000000000000000000000FFFFFFF0
> > diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
> > deleted file mode 100644
> > index 20f77aa633de..000000000000
> > --- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
> > +++ /dev/null
> > @@ -1,492 +0,0 @@
> > -/*
> > - * Multi-buffer SHA1 algorithm hash compute routine
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *      James Guilford <james.guilford@intel.com>
> > - *	Tim Chen <tim.c.chen@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2014 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#include <linux/linkage.h>
> > -#include "sha1_mb_mgr_datastruct.S"
> > -
> > -## code to compute oct SHA1 using SSE-256
> > -## outer calling routine takes care of save and restore of XMM registers
> > -
> > -## Function clobbers: rax, rcx, rdx,   rbx, rsi, rdi, r9-r15# ymm0-15
> > -##
> > -## Linux clobbers:    rax rbx rcx rdx rsi            r9 r10 r11 r12 r13 r14 r15
> > -## Linux preserves:                       rdi rbp r8
> > -##
> > -## clobbers ymm0-15
> > -
> > -
> > -# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
> > -# "transpose" data in {r0...r7} using temps {t0...t1}
> > -# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
> > -# r0 = {a7 a6 a5 a4   a3 a2 a1 a0}
> > -# r1 = {b7 b6 b5 b4   b3 b2 b1 b0}
> > -# r2 = {c7 c6 c5 c4   c3 c2 c1 c0}
> > -# r3 = {d7 d6 d5 d4   d3 d2 d1 d0}
> > -# r4 = {e7 e6 e5 e4   e3 e2 e1 e0}
> > -# r5 = {f7 f6 f5 f4   f3 f2 f1 f0}
> > -# r6 = {g7 g6 g5 g4   g3 g2 g1 g0}
> > -# r7 = {h7 h6 h5 h4   h3 h2 h1 h0}
> > -#
> > -# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
> > -# r0 = {h0 g0 f0 e0   d0 c0 b0 a0}
> > -# r1 = {h1 g1 f1 e1   d1 c1 b1 a1}
> > -# r2 = {h2 g2 f2 e2   d2 c2 b2 a2}
> > -# r3 = {h3 g3 f3 e3   d3 c3 b3 a3}
> > -# r4 = {h4 g4 f4 e4   d4 c4 b4 a4}
> > -# r5 = {h5 g5 f5 e5   d5 c5 b5 a5}
> > -# r6 = {h6 g6 f6 e6   d6 c6 b6 a6}
> > -# r7 = {h7 g7 f7 e7   d7 c7 b7 a7}
> > -#
> > -
> > -.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
> > -	# process top half (r0..r3) {a...d}
> > -	vshufps  $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
> > -	vshufps  $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
> > -	vshufps  $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
> > -	vshufps  $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
> > -	vshufps  $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5   d1 c1 b1 a1}
> > -	vshufps  $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6   d2 c2 b2 a2}
> > -	vshufps  $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7   d3 c3 b3 a3}
> > -	vshufps  $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4   d0 c0 b0 a0}
> > -
> > -	# use r2 in place of t0
> > -	# process bottom half (r4..r7) {e...h}
> > -	vshufps  $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4   f1 f0 e1 e0}
> > -	vshufps  $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6   f3 f2 e3 e2}
> > -	vshufps  $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4   h1 h0 g1 g0}
> > -	vshufps  $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6   h3 h2 g3 g2}
> > -	vshufps  $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5   h1 g1 f1 e1}
> > -	vshufps  $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6   h2 g2 f2 e2}
> > -	vshufps  $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7   h3 g3 f3 e3}
> > -	vshufps  $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4   h0 g0 f0 e0}
> > -
> > -	vperm2f128      $0x13, \r1, \r5, \r6  # h6...a6
> > -	vperm2f128      $0x02, \r1, \r5, \r2  # h2...a2
> > -	vperm2f128      $0x13, \r3, \r7, \r5  # h5...a5
> > -	vperm2f128      $0x02, \r3, \r7, \r1  # h1...a1
> > -	vperm2f128      $0x13, \r0, \r4, \r7  # h7...a7
> > -	vperm2f128      $0x02, \r0, \r4, \r3  # h3...a3
> > -	vperm2f128      $0x13, \t0, \t1, \r4  # h4...a4
> > -	vperm2f128      $0x02, \t0, \t1, \r0  # h0...a0
> > -
> > -.endm
> > -##
> > -## Magic functions defined in FIPS 180-1
> > -##
> > -# macro MAGIC_F0 F,B,C,D,T   ## F = (D ^ (B & (C ^ D)))
> > -.macro MAGIC_F0 regF regB regC regD regT
> > -    vpxor \regD, \regC, \regF
> > -    vpand \regB, \regF, \regF
> > -    vpxor \regD, \regF, \regF
> > -.endm
> > -
> > -# macro MAGIC_F1 F,B,C,D,T   ## F = (B ^ C ^ D)
> > -.macro MAGIC_F1 regF regB regC regD regT
> > -    vpxor  \regC, \regD, \regF
> > -    vpxor  \regB, \regF, \regF
> > -.endm
> > -
> > -# macro MAGIC_F2 F,B,C,D,T   ## F = ((B & C) | (B & D) | (C & D))
> > -.macro MAGIC_F2 regF regB regC regD regT
> > -    vpor  \regC, \regB, \regF
> > -    vpand \regC, \regB, \regT
> > -    vpand \regD, \regF, \regF
> > -    vpor  \regT, \regF, \regF
> > -.endm
> > -
> > -# macro MAGIC_F3 F,B,C,D,T   ## F = (B ^ C ^ D)
> > -.macro MAGIC_F3 regF regB regC regD regT
> > -    MAGIC_F1 \regF,\regB,\regC,\regD,\regT
> > -.endm
> > -
> > -# PROLD reg, imm, tmp
> > -.macro PROLD reg imm tmp
> > -	vpsrld  $(32-\imm), \reg, \tmp
> > -	vpslld  $\imm, \reg, \reg
> > -	vpor    \tmp, \reg, \reg
> > -.endm
> > -
> > -.macro PROLD_nd reg imm tmp src
> > -	vpsrld  $(32-\imm), \src, \tmp
> > -	vpslld  $\imm, \src, \reg
> > -	vpor	\tmp, \reg, \reg
> > -.endm
> > -
> > -.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC
> > -	vpaddd	\immCNT, \regE, \regE
> > -	vpaddd	\memW*32(%rsp), \regE, \regE
> > -	PROLD_nd \regT, 5, \regF, \regA
> > -	vpaddd	\regT, \regE, \regE
> > -	\MAGIC  \regF, \regB, \regC, \regD, \regT
> > -        PROLD   \regB, 30, \regT
> > -        vpaddd  \regF, \regE, \regE
> > -.endm
> > -
> > -.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC
> > -	vpaddd	\immCNT, \regE, \regE
> > -	offset = ((\memW - 14) & 15) * 32
> > -	vmovdqu offset(%rsp), W14
> > -	vpxor	W14, W16, W16
> > -	offset = ((\memW -  8) & 15) * 32
> > -	vpxor	offset(%rsp), W16, W16
> > -	offset = ((\memW -  3) & 15) * 32
> > -	vpxor	offset(%rsp), W16, W16
> > -	vpsrld	$(32-1), W16, \regF
> > -	vpslld	$1, W16, W16
> > -	vpor	W16, \regF, \regF
> > -
> > -	ROTATE_W
> > -
> > -	offset = ((\memW - 0) & 15) * 32
> > -	vmovdqu	\regF, offset(%rsp)
> > -	vpaddd	\regF, \regE, \regE
> > -	PROLD_nd \regT, 5, \regF, \regA
> > -	vpaddd	\regT, \regE, \regE
> > -	\MAGIC \regF,\regB,\regC,\regD,\regT      ## FUN  = MAGIC_Fi(B,C,D)
> > -	PROLD   \regB,30, \regT
> > -	vpaddd  \regF, \regE, \regE
> > -.endm
> > -
> > -########################################################################
> > -########################################################################
> > -########################################################################
> > -
> > -## FRAMESZ plus pushes must be an odd multiple of 8
> > -YMM_SAVE = (15-15)*32
> > -FRAMESZ = 32*16 + YMM_SAVE
> > -_YMM  =   FRAMESZ - YMM_SAVE
> > -
> > -#define VMOVPS   vmovups
> > -
> > -IDX  = %rax
> > -inp0 = %r9
> > -inp1 = %r10
> > -inp2 = %r11
> > -inp3 = %r12
> > -inp4 = %r13
> > -inp5 = %r14
> > -inp6 = %r15
> > -inp7 = %rcx
> > -arg1 = %rdi
> > -arg2 = %rsi
> > -RSP_SAVE = %rdx
> > -
> > -# ymm0 A
> > -# ymm1 B
> > -# ymm2 C
> > -# ymm3 D
> > -# ymm4 E
> > -# ymm5         F       AA
> > -# ymm6         T0      BB
> > -# ymm7         T1      CC
> > -# ymm8         T2      DD
> > -# ymm9         T3      EE
> > -# ymm10                T4      TMP
> > -# ymm11                T5      FUN
> > -# ymm12                T6      K
> > -# ymm13                T7      W14
> > -# ymm14                T8      W15
> > -# ymm15                T9      W16
> > -
> > -
> > -A  =     %ymm0
> > -B  =     %ymm1
> > -C  =     %ymm2
> > -D  =     %ymm3
> > -E  =     %ymm4
> > -F  =     %ymm5
> > -T0 =	 %ymm6
> > -T1 =     %ymm7
> > -T2 =     %ymm8
> > -T3 =     %ymm9
> > -T4 =     %ymm10
> > -T5 =     %ymm11
> > -T6 =     %ymm12
> > -T7 =     %ymm13
> > -T8  =     %ymm14
> > -T9  =     %ymm15
> > -
> > -AA  =     %ymm5
> > -BB  =     %ymm6
> > -CC  =     %ymm7
> > -DD  =     %ymm8
> > -EE  =     %ymm9
> > -TMP =     %ymm10
> > -FUN =     %ymm11
> > -K   =     %ymm12
> > -W14 =     %ymm13
> > -W15 =     %ymm14
> > -W16 =     %ymm15
> > -
> > -.macro ROTATE_ARGS
> > - TMP_ = E
> > - E = D
> > - D = C
> > - C = B
> > - B = A
> > - A = TMP_
> > -.endm
> > -
> > -.macro ROTATE_W
> > -TMP_  = W16
> > -W16  = W15
> > -W15  = W14
> > -W14  = TMP_
> > -.endm
> > -
> > -# 8 streams x 5 32bit words per digest x 4 bytes per word
> > -#define DIGEST_SIZE (8*5*4)
> > -
> > -.align 32
> > -
> > -# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size)
> > -# arg 1 : pointer to array[4] of pointer to input data
> > -# arg 2 : size (in blocks) ;; assumed to be >= 1
> > -#
> > -ENTRY(sha1_x8_avx2)
> > -
> > -	# save callee-saved clobbered registers to comply with C function ABI
> > -	push	%r12
> > -	push	%r13
> > -	push	%r14
> > -	push	%r15
> > -
> > -	#save rsp
> > -	mov	%rsp, RSP_SAVE
> > -	sub     $FRAMESZ, %rsp
> > -
> > -	#align rsp to 32 Bytes
> > -	and	$~0x1F, %rsp
> > -
> > -	## Initialize digests
> > -	vmovdqu  0*32(arg1), A
> > -	vmovdqu  1*32(arg1), B
> > -	vmovdqu  2*32(arg1), C
> > -	vmovdqu  3*32(arg1), D
> > -	vmovdqu  4*32(arg1), E
> > -
> > -	## transpose input onto stack
> > -	mov     _data_ptr+0*8(arg1),inp0
> > -	mov     _data_ptr+1*8(arg1),inp1
> > -	mov     _data_ptr+2*8(arg1),inp2
> > -	mov     _data_ptr+3*8(arg1),inp3
> > -	mov     _data_ptr+4*8(arg1),inp4
> > -	mov     _data_ptr+5*8(arg1),inp5
> > -	mov     _data_ptr+6*8(arg1),inp6
> > -	mov     _data_ptr+7*8(arg1),inp7
> > -
> > -	xor     IDX, IDX
> > -lloop:
> > -	vmovdqu  PSHUFFLE_BYTE_FLIP_MASK(%rip), F
> > -	I=0
> > -.rep 2
> > -	VMOVPS   (inp0, IDX), T0
> > -	VMOVPS   (inp1, IDX), T1
> > -	VMOVPS   (inp2, IDX), T2
> > -	VMOVPS   (inp3, IDX), T3
> > -	VMOVPS   (inp4, IDX), T4
> > -	VMOVPS   (inp5, IDX), T5
> > -	VMOVPS   (inp6, IDX), T6
> > -	VMOVPS   (inp7, IDX), T7
> > -
> > -	TRANSPOSE8       T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
> > -	vpshufb  F, T0, T0
> > -	vmovdqu  T0, (I*8)*32(%rsp)
> > -	vpshufb  F, T1, T1
> > -	vmovdqu  T1, (I*8+1)*32(%rsp)
> > -	vpshufb  F, T2, T2
> > -	vmovdqu  T2, (I*8+2)*32(%rsp)
> > -	vpshufb  F, T3, T3
> > -	vmovdqu  T3, (I*8+3)*32(%rsp)
> > -	vpshufb  F, T4, T4
> > -	vmovdqu  T4, (I*8+4)*32(%rsp)
> > -	vpshufb  F, T5, T5
> > -	vmovdqu  T5, (I*8+5)*32(%rsp)
> > -	vpshufb  F, T6, T6
> > -	vmovdqu  T6, (I*8+6)*32(%rsp)
> > -	vpshufb  F, T7, T7
> > -	vmovdqu  T7, (I*8+7)*32(%rsp)
> > -	add     $32, IDX
> > -	I = (I+1)
> > -.endr
> > -	# save old digests
> > -	vmovdqu  A,AA
> > -	vmovdqu  B,BB
> > -	vmovdqu  C,CC
> > -	vmovdqu  D,DD
> > -	vmovdqu  E,EE
> > -
> > -##
> > -## perform 0-79 steps
> > -##
> > -	vmovdqu  K00_19(%rip), K
> > -## do rounds 0...15
> > -	I = 0
> > -.rep 16
> > -	SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
> > -	ROTATE_ARGS
> > -	I = (I+1)
> > -.endr
> > -
> > -## do rounds 16...19
> > -	vmovdqu  ((16 - 16) & 15) * 32 (%rsp), W16
> > -	vmovdqu  ((16 - 15) & 15) * 32 (%rsp), W15
> > -.rep 4
> > -	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
> > -	ROTATE_ARGS
> > -	I = (I+1)
> > -.endr
> > -
> > -## do rounds 20...39
> > -	vmovdqu  K20_39(%rip), K
> > -.rep 20
> > -	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1
> > -	ROTATE_ARGS
> > -	I = (I+1)
> > -.endr
> > -
> > -## do rounds 40...59
> > -	vmovdqu  K40_59(%rip), K
> > -.rep 20
> > -	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2
> > -	ROTATE_ARGS
> > -	I = (I+1)
> > -.endr
> > -
> > -## do rounds 60...79
> > -	vmovdqu  K60_79(%rip), K
> > -.rep 20
> > -	SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3
> > -	ROTATE_ARGS
> > -	I = (I+1)
> > -.endr
> > -
> > -	vpaddd   AA,A,A
> > -	vpaddd   BB,B,B
> > -	vpaddd   CC,C,C
> > -	vpaddd   DD,D,D
> > -	vpaddd   EE,E,E
> > -
> > -	sub     $1, arg2
> > -	jne     lloop
> > -
> > -	# write out digests
> > -	vmovdqu  A, 0*32(arg1)
> > -	vmovdqu  B, 1*32(arg1)
> > -	vmovdqu  C, 2*32(arg1)
> > -	vmovdqu  D, 3*32(arg1)
> > -	vmovdqu  E, 4*32(arg1)
> > -
> > -	# update input pointers
> > -	add     IDX, inp0
> > -	add     IDX, inp1
> > -	add     IDX, inp2
> > -	add     IDX, inp3
> > -	add     IDX, inp4
> > -	add     IDX, inp5
> > -	add     IDX, inp6
> > -	add     IDX, inp7
> > -	mov     inp0, _data_ptr (arg1)
> > -	mov     inp1, _data_ptr + 1*8(arg1)
> > -	mov     inp2, _data_ptr + 2*8(arg1)
> > -	mov     inp3, _data_ptr + 3*8(arg1)
> > -	mov     inp4, _data_ptr + 4*8(arg1)
> > -	mov     inp5, _data_ptr + 5*8(arg1)
> > -	mov     inp6, _data_ptr + 6*8(arg1)
> > -	mov     inp7, _data_ptr + 7*8(arg1)
> > -
> > -	################
> > -	## Postamble
> > -
> > -	mov     RSP_SAVE, %rsp
> > -
> > -	# restore callee-saved clobbered registers
> > -	pop	%r15
> > -	pop	%r14
> > -	pop	%r13
> > -	pop	%r12
> > -
> > -	ret
> > -ENDPROC(sha1_x8_avx2)
> > -
> > -
> > -.section	.rodata.cst32.K00_19, "aM", @progbits, 32
> > -.align 32
> > -K00_19:
> > -.octa 0x5A8279995A8279995A8279995A827999
> > -.octa 0x5A8279995A8279995A8279995A827999
> > -
> > -.section	.rodata.cst32.K20_39, "aM", @progbits, 32
> > -.align 32
> > -K20_39:
> > -.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
> > -.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
> > -
> > -.section	.rodata.cst32.K40_59, "aM", @progbits, 32
> > -.align 32
> > -K40_59:
> > -.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
> > -.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
> > -
> > -.section	.rodata.cst32.K60_79, "aM", @progbits, 32
> > -.align 32
> > -K60_79:
> > -.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
> > -.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
> > -
> > -.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
> > -.align 32
> > -PSHUFFLE_BYTE_FLIP_MASK:
> > -.octa 0x0c0d0e0f08090a0b0405060700010203
> > -.octa 0x0c0d0e0f08090a0b0405060700010203
> > diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile
> > deleted file mode 100644
> > index 53ad6e7db747..000000000000
> > --- a/arch/x86/crypto/sha256-mb/Makefile
> > +++ /dev/null
> > @@ -1,14 +0,0 @@
> > -# SPDX-License-Identifier: GPL-2.0
> > -#
> > -# Arch-specific CryptoAPI modules.
> > -#
> > -
> > -OBJECT_FILES_NON_STANDARD := y
> > -
> > -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
> > -                                $(comma)4)$(comma)%ymm2,yes,no)
> > -ifeq ($(avx2_supported),yes)
> > -	obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o
> > -	sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \
> > -	     sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o
> > -endif
> > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c
> > deleted file mode 100644
> > index 97c5fc43e115..000000000000
> > --- a/arch/x86/crypto/sha256-mb/sha256_mb.c
> > +++ /dev/null
> > @@ -1,1013 +0,0 @@
> > -/*
> > - * Multi buffer SHA256 algorithm Glue Code
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *	Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
> > -
> > -#include <crypto/internal/hash.h>
> > -#include <linux/init.h>
> > -#include <linux/module.h>
> > -#include <linux/mm.h>
> > -#include <linux/cryptohash.h>
> > -#include <linux/types.h>
> > -#include <linux/list.h>
> > -#include <crypto/scatterwalk.h>
> > -#include <crypto/sha.h>
> > -#include <crypto/mcryptd.h>
> > -#include <crypto/crypto_wq.h>
> > -#include <asm/byteorder.h>
> > -#include <linux/hardirq.h>
> > -#include <asm/fpu/api.h>
> > -#include "sha256_mb_ctx.h"
> > -
> > -#define FLUSH_INTERVAL 1000 /* in usec */
> > -
> > -static struct mcryptd_alg_state sha256_mb_alg_state;
> > -
> > -struct sha256_mb_ctx {
> > -	struct mcryptd_ahash *mcryptd_tfm;
> > -};
> > -
> > -static inline struct mcryptd_hash_request_ctx
> > -		*cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx)
> > -{
> > -	struct ahash_request *areq;
> > -
> > -	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
> > -	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
> > -}
> > -
> > -static inline struct ahash_request
> > -		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
> > -{
> > -	return container_of((void *) ctx, struct ahash_request, __ctx);
> > -}
> > -
> > -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
> > -				struct ahash_request *areq)
> > -{
> > -	rctx->flag = HASH_UPDATE;
> > -}
> > -
> > -static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state);
> > -static asmlinkage struct job_sha256* (*sha256_job_mgr_submit)
> > -			(struct sha256_mb_mgr *state, struct job_sha256 *job);
> > -static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
> > -			(struct sha256_mb_mgr *state);
> > -static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
> > -			(struct sha256_mb_mgr *state);
> > -
> > -inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
> > -			 uint64_t total_len)
> > -{
> > -	uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
> > -
> > -	memset(&padblock[i], 0, SHA256_BLOCK_SIZE);
> > -	padblock[i] = 0x80;
> > -
> > -	i += ((SHA256_BLOCK_SIZE - 1) &
> > -	      (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1)))
> > -	     + 1 + SHA256_PADLENGTHFIELD_SIZE;
> > -
> > -#if SHA256_PADLENGTHFIELD_SIZE == 16
> > -	*((uint64_t *) &padblock[i - 16]) = 0;
> > -#endif
> > -
> > -	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
> > -
> > -	/* Number of extra blocks to hash */
> > -	return i >> SHA256_LOG2_BLOCK_SIZE;
> > -}
> > -
> > -static struct sha256_hash_ctx
> > -		*sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr,
> > -					struct sha256_hash_ctx *ctx)
> > -{
> > -	while (ctx) {
> > -		if (ctx->status & HASH_CTX_STS_COMPLETE) {
> > -			/* Clear PROCESSING bit */
> > -			ctx->status = HASH_CTX_STS_COMPLETE;
> > -			return ctx;
> > -		}
> > -
> > -		/*
> > -		 * If the extra blocks are empty, begin hashing what remains
> > -		 * in the user's buffer.
> > -		 */
> > -		if (ctx->partial_block_buffer_length == 0 &&
> > -		    ctx->incoming_buffer_length) {
> > -
> > -			const void *buffer = ctx->incoming_buffer;
> > -			uint32_t len = ctx->incoming_buffer_length;
> > -			uint32_t copy_len;
> > -
> > -			/*
> > -			 * Only entire blocks can be hashed.
> > -			 * Copy remainder to extra blocks buffer.
> > -			 */
> > -			copy_len = len & (SHA256_BLOCK_SIZE-1);
> > -
> > -			if (copy_len) {
> > -				len -= copy_len;
> > -				memcpy(ctx->partial_block_buffer,
> > -				       ((const char *) buffer + len),
> > -				       copy_len);
> > -				ctx->partial_block_buffer_length = copy_len;
> > -			}
> > -
> > -			ctx->incoming_buffer_length = 0;
> > -
> > -			/* len should be a multiple of the block size now */
> > -			assert((len % SHA256_BLOCK_SIZE) == 0);
> > -
> > -			/* Set len to the number of blocks to be hashed */
> > -			len >>= SHA256_LOG2_BLOCK_SIZE;
> > -
> > -			if (len) {
> > -
> > -				ctx->job.buffer = (uint8_t *) buffer;
> > -				ctx->job.len = len;
> > -				ctx = (struct sha256_hash_ctx *)
> > -				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
> > -				continue;
> > -			}
> > -		}
> > -
> > -		/*
> > -		 * If the extra blocks are not empty, then we are
> > -		 * either on the last block(s) or we need more
> > -		 * user input before continuing.
> > -		 */
> > -		if (ctx->status & HASH_CTX_STS_LAST) {
> > -
> > -			uint8_t *buf = ctx->partial_block_buffer;
> > -			uint32_t n_extra_blocks =
> > -				sha256_pad(buf, ctx->total_length);
> > -
> > -			ctx->status = (HASH_CTX_STS_PROCESSING |
> > -				       HASH_CTX_STS_COMPLETE);
> > -			ctx->job.buffer = buf;
> > -			ctx->job.len = (uint32_t) n_extra_blocks;
> > -			ctx = (struct sha256_hash_ctx *)
> > -				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
> > -			continue;
> > -		}
> > -
> > -		ctx->status = HASH_CTX_STS_IDLE;
> > -		return ctx;
> > -	}
> > -
> > -	return NULL;
> > -}
> > -
> > -static struct sha256_hash_ctx
> > -		*sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr)
> > -{
> > -	/*
> > -	 * If get_comp_job returns NULL, there are no jobs complete.
> > -	 * If get_comp_job returns a job, verify that it is safe to return to
> > -	 * the user. If it is not ready, resubmit the job to finish processing.
> > -	 * If sha256_ctx_mgr_resubmit returned a job, it is ready to be
> > -	 * returned. Otherwise, all jobs currently being managed by the
> > -	 * hash_ctx_mgr still need processing.
> > -	 */
> > -	struct sha256_hash_ctx *ctx;
> > -
> > -	ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr);
> > -	return sha256_ctx_mgr_resubmit(mgr, ctx);
> > -}
> > -
> > -static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr)
> > -{
> > -	sha256_job_mgr_init(&mgr->mgr);
> > -}
> > -
> > -static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
> > -					  struct sha256_hash_ctx *ctx,
> > -					  const void *buffer,
> > -					  uint32_t len,
> > -					  int flags)
> > -{
> > -	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
> > -		/* User should not pass anything other than UPDATE or LAST */
> > -		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
> > -		return ctx;
> > -	}
> > -
> > -	if (ctx->status & HASH_CTX_STS_PROCESSING) {
> > -		/* Cannot submit to a currently processing job. */
> > -		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
> > -		return ctx;
> > -	}
> > -
> > -	if (ctx->status & HASH_CTX_STS_COMPLETE) {
> > -		/* Cannot update a finished job. */
> > -		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
> > -		return ctx;
> > -	}
> > -
> > -	/* If we made it here, there was no error during this call to submit */
> > -	ctx->error = HASH_CTX_ERROR_NONE;
> > -
> > -	/* Store buffer ptr info from user */
> > -	ctx->incoming_buffer = buffer;
> > -	ctx->incoming_buffer_length = len;
> > -
> > -	/*
> > -	 * Store the user's request flags and mark this ctx as currently
> > -	 * being processed.
> > -	 */
> > -	ctx->status = (flags & HASH_LAST) ?
> > -			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
> > -			HASH_CTX_STS_PROCESSING;
> > -
> > -	/* Advance byte counter */
> > -	ctx->total_length += len;
> > -
> > -	/*
> > -	 * If there is anything currently buffered in the extra blocks,
> > -	 * append to it until it contains a whole block.
> > -	 * Or if the user's buffer contains less than a whole block,
> > -	 * append as much as possible to the extra block.
> > -	 */
> > -	if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) {
> > -		/*
> > -		 * Compute how many bytes to copy from user buffer into
> > -		 * extra block
> > -		 */
> > -		uint32_t copy_len = SHA256_BLOCK_SIZE -
> > -					ctx->partial_block_buffer_length;
> > -		if (len < copy_len)
> > -			copy_len = len;
> > -
> > -		if (copy_len) {
> > -			/* Copy and update relevant pointers and counters */
> > -			memcpy(
> > -		&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
> > -				buffer, copy_len);
> > -
> > -			ctx->partial_block_buffer_length += copy_len;
> > -			ctx->incoming_buffer = (const void *)
> > -					((const char *)buffer + copy_len);
> > -			ctx->incoming_buffer_length = len - copy_len;
> > -		}
> > -
> > -		/* The extra block should never contain more than 1 block */
> > -		assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE);
> > -
> > -		/*
> > -		 * If the extra block buffer contains exactly 1 block,
> > -		 * it can be hashed.
> > -		 */
> > -		if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) {
> > -			ctx->partial_block_buffer_length = 0;
> > -
> > -			ctx->job.buffer = ctx->partial_block_buffer;
> > -			ctx->job.len = 1;
> > -			ctx = (struct sha256_hash_ctx *)
> > -				sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
> > -		}
> > -	}
> > -
> > -	return sha256_ctx_mgr_resubmit(mgr, ctx);
> > -}
> > -
> > -static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr)
> > -{
> > -	struct sha256_hash_ctx *ctx;
> > -
> > -	while (1) {
> > -		ctx = (struct sha256_hash_ctx *)
> > -					sha256_job_mgr_flush(&mgr->mgr);
> > -
> > -		/* If flush returned 0, there are no more jobs in flight. */
> > -		if (!ctx)
> > -			return NULL;
> > -
> > -		/*
> > -		 * If flush returned a job, resubmit the job to finish
> > -		 * processing.
> > -		 */
> > -		ctx = sha256_ctx_mgr_resubmit(mgr, ctx);
> > -
> > -		/*
> > -		 * If sha256_ctx_mgr_resubmit returned a job, it is ready to
> > -		 * be returned. Otherwise, all jobs currently being managed by
> > -		 * the sha256_ctx_mgr still need processing. Loop.
> > -		 */
> > -		if (ctx)
> > -			return ctx;
> > -	}
> > -}
> > -
> > -static int sha256_mb_init(struct ahash_request *areq)
> > -{
> > -	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
> > -
> > -	hash_ctx_init(sctx);
> > -	sctx->job.result_digest[0] = SHA256_H0;
> > -	sctx->job.result_digest[1] = SHA256_H1;
> > -	sctx->job.result_digest[2] = SHA256_H2;
> > -	sctx->job.result_digest[3] = SHA256_H3;
> > -	sctx->job.result_digest[4] = SHA256_H4;
> > -	sctx->job.result_digest[5] = SHA256_H5;
> > -	sctx->job.result_digest[6] = SHA256_H6;
> > -	sctx->job.result_digest[7] = SHA256_H7;
> > -	sctx->total_length = 0;
> > -	sctx->partial_block_buffer_length = 0;
> > -	sctx->status = HASH_CTX_STS_IDLE;
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
> > -{
> > -	int	i;
> > -	struct	sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
> > -	__be32	*dst = (__be32 *) rctx->out;
> > -
> > -	for (i = 0; i < 8; ++i)
> > -		dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
> > -			struct mcryptd_alg_cstate *cstate, bool flush)
> > -{
> > -	int	flag = HASH_UPDATE;
> > -	int	nbytes, err = 0;
> > -	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
> > -	struct sha256_hash_ctx *sha_ctx;
> > -
> > -	/* more work ? */
> > -	while (!(rctx->flag & HASH_DONE)) {
> > -		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
> > -		if (nbytes < 0) {
> > -			err = nbytes;
> > -			goto out;
> > -		}
> > -		/* check if the walk is done */
> > -		if (crypto_ahash_walk_last(&rctx->walk)) {
> > -			rctx->flag |= HASH_DONE;
> > -			if (rctx->flag & HASH_FINAL)
> > -				flag |= HASH_LAST;
> > -
> > -		}
> > -		sha_ctx = (struct sha256_hash_ctx *)
> > -						ahash_request_ctx(&rctx->areq);
> > -		kernel_fpu_begin();
> > -		sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx,
> > -						rctx->walk.data, nbytes, flag);
> > -		if (!sha_ctx) {
> > -			if (flush)
> > -				sha_ctx = sha256_ctx_mgr_flush(cstate->mgr);
> > -		}
> > -		kernel_fpu_end();
> > -		if (sha_ctx)
> > -			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		else {
> > -			rctx = NULL;
> > -			goto out;
> > -		}
> > -	}
> > -
> > -	/* copy the results */
> > -	if (rctx->flag & HASH_FINAL)
> > -		sha256_mb_set_results(rctx);
> > -
> > -out:
> > -	*ret_rctx = rctx;
> > -	return err;
> > -}
> > -
> > -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
> > -			    struct mcryptd_alg_cstate *cstate,
> > -			    int err)
> > -{
> > -	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
> > -	struct sha256_hash_ctx *sha_ctx;
> > -	struct mcryptd_hash_request_ctx *req_ctx;
> > -	int ret;
> > -
> > -	/* remove from work list */
> > -	spin_lock(&cstate->work_lock);
> > -	list_del(&rctx->waiter);
> > -	spin_unlock(&cstate->work_lock);
> > -
> > -	if (irqs_disabled())
> > -		rctx->complete(&req->base, err);
> > -	else {
> > -		local_bh_disable();
> > -		rctx->complete(&req->base, err);
> > -		local_bh_enable();
> > -	}
> > -
> > -	/* check to see if there are other jobs that are done */
> > -	sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
> > -	while (sha_ctx) {
> > -		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		ret = sha_finish_walk(&req_ctx, cstate, false);
> > -		if (req_ctx) {
> > -			spin_lock(&cstate->work_lock);
> > -			list_del(&req_ctx->waiter);
> > -			spin_unlock(&cstate->work_lock);
> > -
> > -			req = cast_mcryptd_ctx_to_req(req_ctx);
> > -			if (irqs_disabled())
> > -				req_ctx->complete(&req->base, ret);
> > -			else {
> > -				local_bh_disable();
> > -				req_ctx->complete(&req->base, ret);
> > -				local_bh_enable();
> > -			}
> > -		}
> > -		sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
> > -	}
> > -
> > -	return 0;
> > -}
> > -
> > -static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
> > -			     struct mcryptd_alg_cstate *cstate)
> > -{
> > -	unsigned long next_flush;
> > -	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
> > -
> > -	/* initialize tag */
> > -	rctx->tag.arrival = jiffies;    /* tag the arrival time */
> > -	rctx->tag.seq_num = cstate->next_seq_num++;
> > -	next_flush = rctx->tag.arrival + delay;
> > -	rctx->tag.expire = next_flush;
> > -
> > -	spin_lock(&cstate->work_lock);
> > -	list_add_tail(&rctx->waiter, &cstate->work_list);
> > -	spin_unlock(&cstate->work_lock);
> > -
> > -	mcryptd_arm_flusher(cstate, delay);
> > -}
> > -
> > -static int sha256_mb_update(struct ahash_request *areq)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx =
> > -		container_of(areq, struct mcryptd_hash_request_ctx, areq);
> > -	struct mcryptd_alg_cstate *cstate =
> > -				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
> > -
> > -	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
> > -	struct sha256_hash_ctx *sha_ctx;
> > -	int ret = 0, nbytes;
> > -
> > -	/* sanity check */
> > -	if (rctx->tag.cpu != smp_processor_id()) {
> > -		pr_err("mcryptd error: cpu clash\n");
> > -		goto done;
> > -	}
> > -
> > -	/* need to init context */
> > -	req_ctx_init(rctx, areq);
> > -
> > -	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
> > -
> > -	if (nbytes < 0) {
> > -		ret = nbytes;
> > -		goto done;
> > -	}
> > -
> > -	if (crypto_ahash_walk_last(&rctx->walk))
> > -		rctx->flag |= HASH_DONE;
> > -
> > -	/* submit */
> > -	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
> > -	sha256_mb_add_list(rctx, cstate);
> > -	kernel_fpu_begin();
> > -	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
> > -							nbytes, HASH_UPDATE);
> > -	kernel_fpu_end();
> > -
> > -	/* check if anything is returned */
> > -	if (!sha_ctx)
> > -		return -EINPROGRESS;
> > -
> > -	if (sha_ctx->error) {
> > -		ret = sha_ctx->error;
> > -		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		goto done;
> > -	}
> > -
> > -	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -	ret = sha_finish_walk(&rctx, cstate, false);
> > -
> > -	if (!rctx)
> > -		return -EINPROGRESS;
> > -done:
> > -	sha_complete_job(rctx, cstate, ret);
> > -	return ret;
> > -}
> > -
> > -static int sha256_mb_finup(struct ahash_request *areq)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx =
> > -		container_of(areq, struct mcryptd_hash_request_ctx, areq);
> > -	struct mcryptd_alg_cstate *cstate =
> > -				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
> > -
> > -	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
> > -	struct sha256_hash_ctx *sha_ctx;
> > -	int ret = 0, flag = HASH_UPDATE, nbytes;
> > -
> > -	/* sanity check */
> > -	if (rctx->tag.cpu != smp_processor_id()) {
> > -		pr_err("mcryptd error: cpu clash\n");
> > -		goto done;
> > -	}
> > -
> > -	/* need to init context */
> > -	req_ctx_init(rctx, areq);
> > -
> > -	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
> > -
> > -	if (nbytes < 0) {
> > -		ret = nbytes;
> > -		goto done;
> > -	}
> > -
> > -	if (crypto_ahash_walk_last(&rctx->walk)) {
> > -		rctx->flag |= HASH_DONE;
> > -		flag = HASH_LAST;
> > -	}
> > -
> > -	/* submit */
> > -	rctx->flag |= HASH_FINAL;
> > -	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
> > -	sha256_mb_add_list(rctx, cstate);
> > -
> > -	kernel_fpu_begin();
> > -	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
> > -								nbytes, flag);
> > -	kernel_fpu_end();
> > -
> > -	/* check if anything is returned */
> > -	if (!sha_ctx)
> > -		return -EINPROGRESS;
> > -
> > -	if (sha_ctx->error) {
> > -		ret = sha_ctx->error;
> > -		goto done;
> > -	}
> > -
> > -	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -	ret = sha_finish_walk(&rctx, cstate, false);
> > -	if (!rctx)
> > -		return -EINPROGRESS;
> > -done:
> > -	sha_complete_job(rctx, cstate, ret);
> > -	return ret;
> > -}
> > -
> > -static int sha256_mb_final(struct ahash_request *areq)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx =
> > -			container_of(areq, struct mcryptd_hash_request_ctx,
> > -			areq);
> > -	struct mcryptd_alg_cstate *cstate =
> > -				this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
> > -
> > -	struct sha256_hash_ctx *sha_ctx;
> > -	int ret = 0;
> > -	u8 data;
> > -
> > -	/* sanity check */
> > -	if (rctx->tag.cpu != smp_processor_id()) {
> > -		pr_err("mcryptd error: cpu clash\n");
> > -		goto done;
> > -	}
> > -
> > -	/* need to init context */
> > -	req_ctx_init(rctx, areq);
> > -
> > -	rctx->flag |= HASH_DONE | HASH_FINAL;
> > -
> > -	sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
> > -	/* flag HASH_FINAL and 0 data size */
> > -	sha256_mb_add_list(rctx, cstate);
> > -	kernel_fpu_begin();
> > -	sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
> > -								HASH_LAST);
> > -	kernel_fpu_end();
> > -
> > -	/* check if anything is returned */
> > -	if (!sha_ctx)
> > -		return -EINPROGRESS;
> > -
> > -	if (sha_ctx->error) {
> > -		ret = sha_ctx->error;
> > -		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		goto done;
> > -	}
> > -
> > -	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -	ret = sha_finish_walk(&rctx, cstate, false);
> > -	if (!rctx)
> > -		return -EINPROGRESS;
> > -done:
> > -	sha_complete_job(rctx, cstate, ret);
> > -	return ret;
> > -}
> > -
> > -static int sha256_mb_export(struct ahash_request *areq, void *out)
> > -{
> > -	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
> > -
> > -	memcpy(out, sctx, sizeof(*sctx));
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha256_mb_import(struct ahash_request *areq, const void *in)
> > -{
> > -	struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
> > -
> > -	memcpy(sctx, in, sizeof(*sctx));
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm)
> > -{
> > -	struct mcryptd_ahash *mcryptd_tfm;
> > -	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
> > -	struct mcryptd_hash_ctx *mctx;
> > -
> > -	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb",
> > -						CRYPTO_ALG_INTERNAL,
> > -						CRYPTO_ALG_INTERNAL);
> > -	if (IS_ERR(mcryptd_tfm))
> > -		return PTR_ERR(mcryptd_tfm);
> > -	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
> > -	mctx->alg_state = &sha256_mb_alg_state;
> > -	ctx->mcryptd_tfm = mcryptd_tfm;
> > -	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> > -				sizeof(struct ahash_request) +
> > -				crypto_ahash_reqsize(&mcryptd_tfm->base));
> > -
> > -	return 0;
> > -}
> > -
> > -static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm)
> > -{
> > -	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
> > -
> > -	mcryptd_free_ahash(ctx->mcryptd_tfm);
> > -}
> > -
> > -static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm)
> > -{
> > -	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> > -				sizeof(struct ahash_request) +
> > -				sizeof(struct sha256_hash_ctx));
> > -
> > -	return 0;
> > -}
> > -
> > -static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm)
> > -{
> > -	struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
> > -
> > -	mcryptd_free_ahash(ctx->mcryptd_tfm);
> > -}
> > -
> > -static struct ahash_alg sha256_mb_areq_alg = {
> > -	.init		=	sha256_mb_init,
> > -	.update		=	sha256_mb_update,
> > -	.final		=	sha256_mb_final,
> > -	.finup		=	sha256_mb_finup,
> > -	.export		=	sha256_mb_export,
> > -	.import		=	sha256_mb_import,
> > -	.halg		=	{
> > -	.digestsize	=	SHA256_DIGEST_SIZE,
> > -	.statesize	=	sizeof(struct sha256_hash_ctx),
> > -		.base		=	{
> > -			.cra_name	 = "__sha256-mb",
> > -			.cra_driver_name = "__intel_sha256-mb",
> > -			.cra_priority	 = 100,
> > -			/*
> > -			 * use ASYNC flag as some buffers in multi-buffer
> > -			 * algo may not have completed before hashing thread
> > -			 * sleep
> > -			 */
> > -			.cra_flags	= CRYPTO_ALG_ASYNC |
> > -					  CRYPTO_ALG_INTERNAL,
> > -			.cra_blocksize	= SHA256_BLOCK_SIZE,
> > -			.cra_module	= THIS_MODULE,
> > -			.cra_list	= LIST_HEAD_INIT
> > -					(sha256_mb_areq_alg.halg.base.cra_list),
> > -			.cra_init	= sha256_mb_areq_init_tfm,
> > -			.cra_exit	= sha256_mb_areq_exit_tfm,
> > -			.cra_ctxsize	= sizeof(struct sha256_hash_ctx),
> > -		}
> > -	}
> > -};
> > -
> > -static int sha256_mb_async_init(struct ahash_request *req)
> > -{
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_init(mcryptd_req);
> > -}
> > -
> > -static int sha256_mb_async_update(struct ahash_request *req)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_update(mcryptd_req);
> > -}
> > -
> > -static int sha256_mb_async_finup(struct ahash_request *req)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_finup(mcryptd_req);
> > -}
> > -
> > -static int sha256_mb_async_final(struct ahash_request *req)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_final(mcryptd_req);
> > -}
> > -
> > -static int sha256_mb_async_digest(struct ahash_request *req)
> > -{
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_digest(mcryptd_req);
> > -}
> > -
> > -static int sha256_mb_async_export(struct ahash_request *req, void *out)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_export(mcryptd_req, out);
> > -}
> > -
> > -static int sha256_mb_async_import(struct ahash_request *req, const void *in)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
> > -	struct mcryptd_hash_request_ctx *rctx;
> > -	struct ahash_request *areq;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	rctx = ahash_request_ctx(mcryptd_req);
> > -	areq = &rctx->areq;
> > -
> > -	ahash_request_set_tfm(areq, child);
> > -	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
> > -					rctx->complete, req);
> > -
> > -	return crypto_ahash_import(mcryptd_req, in);
> > -}
> > -
> > -static struct ahash_alg sha256_mb_async_alg = {
> > -	.init           = sha256_mb_async_init,
> > -	.update         = sha256_mb_async_update,
> > -	.final          = sha256_mb_async_final,
> > -	.finup          = sha256_mb_async_finup,
> > -	.export         = sha256_mb_async_export,
> > -	.import         = sha256_mb_async_import,
> > -	.digest         = sha256_mb_async_digest,
> > -	.halg = {
> > -		.digestsize     = SHA256_DIGEST_SIZE,
> > -		.statesize      = sizeof(struct sha256_hash_ctx),
> > -		.base = {
> > -			.cra_name               = "sha256",
> > -			.cra_driver_name        = "sha256_mb",
> > -			/*
> > -			 * Low priority, since with few concurrent hash requests
> > -			 * this is extremely slow due to the flush delay.  Users
> > -			 * whose workloads would benefit from this can request
> > -			 * it explicitly by driver name, or can increase its
> > -			 * priority at runtime using NETLINK_CRYPTO.
> > -			 */
> > -			.cra_priority           = 50,
> > -			.cra_flags              = CRYPTO_ALG_ASYNC,
> > -			.cra_blocksize          = SHA256_BLOCK_SIZE,
> > -			.cra_module             = THIS_MODULE,
> > -			.cra_list               = LIST_HEAD_INIT
> > -				(sha256_mb_async_alg.halg.base.cra_list),
> > -			.cra_init               = sha256_mb_async_init_tfm,
> > -			.cra_exit               = sha256_mb_async_exit_tfm,
> > -			.cra_ctxsize		= sizeof(struct sha256_mb_ctx),
> > -			.cra_alignmask		= 0,
> > -		},
> > -	},
> > -};
> > -
> > -static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx;
> > -	unsigned long cur_time;
> > -	unsigned long next_flush = 0;
> > -	struct sha256_hash_ctx *sha_ctx;
> > -
> > -
> > -	cur_time = jiffies;
> > -
> > -	while (!list_empty(&cstate->work_list)) {
> > -		rctx = list_entry(cstate->work_list.next,
> > -				struct mcryptd_hash_request_ctx, waiter);
> > -		if (time_before(cur_time, rctx->tag.expire))
> > -			break;
> > -		kernel_fpu_begin();
> > -		sha_ctx = (struct sha256_hash_ctx *)
> > -					sha256_ctx_mgr_flush(cstate->mgr);
> > -		kernel_fpu_end();
> > -		if (!sha_ctx) {
> > -			pr_err("sha256_mb error: nothing got"
> > -					" flushed for non-empty list\n");
> > -			break;
> > -		}
> > -		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		sha_finish_walk(&rctx, cstate, true);
> > -		sha_complete_job(rctx, cstate, 0);
> > -	}
> > -
> > -	if (!list_empty(&cstate->work_list)) {
> > -		rctx = list_entry(cstate->work_list.next,
> > -				struct mcryptd_hash_request_ctx, waiter);
> > -		/* get the hash context and then flush time */
> > -		next_flush = rctx->tag.expire;
> > -		mcryptd_arm_flusher(cstate, get_delay(next_flush));
> > -	}
> > -	return next_flush;
> > -}
> > -
> > -static int __init sha256_mb_mod_init(void)
> > -{
> > -
> > -	int cpu;
> > -	int err;
> > -	struct mcryptd_alg_cstate *cpu_state;
> > -
> > -	/* check for dependent cpu features */
> > -	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
> > -	    !boot_cpu_has(X86_FEATURE_BMI2))
> > -		return -ENODEV;
> > -
> > -	/* initialize multibuffer structures */
> > -	sha256_mb_alg_state.alg_cstate = alloc_percpu
> > -						(struct mcryptd_alg_cstate);
> > -
> > -	sha256_job_mgr_init = sha256_mb_mgr_init_avx2;
> > -	sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2;
> > -	sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2;
> > -	sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2;
> > -
> > -	if (!sha256_mb_alg_state.alg_cstate)
> > -		return -ENOMEM;
> > -	for_each_possible_cpu(cpu) {
> > -		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
> > -		cpu_state->next_flush = 0;
> > -		cpu_state->next_seq_num = 0;
> > -		cpu_state->flusher_engaged = false;
> > -		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
> > -		cpu_state->cpu = cpu;
> > -		cpu_state->alg_state = &sha256_mb_alg_state;
> > -		cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr),
> > -					GFP_KERNEL);
> > -		if (!cpu_state->mgr)
> > -			goto err2;
> > -		sha256_ctx_mgr_init(cpu_state->mgr);
> > -		INIT_LIST_HEAD(&cpu_state->work_list);
> > -		spin_lock_init(&cpu_state->work_lock);
> > -	}
> > -	sha256_mb_alg_state.flusher = &sha256_mb_flusher;
> > -
> > -	err = crypto_register_ahash(&sha256_mb_areq_alg);
> > -	if (err)
> > -		goto err2;
> > -	err = crypto_register_ahash(&sha256_mb_async_alg);
> > -	if (err)
> > -		goto err1;
> > -
> > -
> > -	return 0;
> > -err1:
> > -	crypto_unregister_ahash(&sha256_mb_areq_alg);
> > -err2:
> > -	for_each_possible_cpu(cpu) {
> > -		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
> > -		kfree(cpu_state->mgr);
> > -	}
> > -	free_percpu(sha256_mb_alg_state.alg_cstate);
> > -	return -ENODEV;
> > -}
> > -
> > -static void __exit sha256_mb_mod_fini(void)
> > -{
> > -	int cpu;
> > -	struct mcryptd_alg_cstate *cpu_state;
> > -
> > -	crypto_unregister_ahash(&sha256_mb_async_alg);
> > -	crypto_unregister_ahash(&sha256_mb_areq_alg);
> > -	for_each_possible_cpu(cpu) {
> > -		cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
> > -		kfree(cpu_state->mgr);
> > -	}
> > -	free_percpu(sha256_mb_alg_state.alg_cstate);
> > -}
> > -
> > -module_init(sha256_mb_mod_init);
> > -module_exit(sha256_mb_mod_fini);
> > -
> > -MODULE_LICENSE("GPL");
> > -MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated");
> > -
> > -MODULE_ALIAS_CRYPTO("sha256");
> > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
> > deleted file mode 100644
> > index 7c432543dc7f..000000000000
> > --- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
> > +++ /dev/null
> > @@ -1,134 +0,0 @@
> > -/*
> > - * Header file for multi buffer SHA256 context
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *	Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#ifndef _SHA_MB_CTX_INTERNAL_H
> > -#define _SHA_MB_CTX_INTERNAL_H
> > -
> > -#include "sha256_mb_mgr.h"
> > -
> > -#define HASH_UPDATE          0x00
> > -#define HASH_LAST            0x01
> > -#define HASH_DONE	     0x02
> > -#define HASH_FINAL	     0x04
> > -
> > -#define HASH_CTX_STS_IDLE       0x00
> > -#define HASH_CTX_STS_PROCESSING 0x01
> > -#define HASH_CTX_STS_LAST       0x02
> > -#define HASH_CTX_STS_COMPLETE   0x04
> > -
> > -enum hash_ctx_error {
> > -	HASH_CTX_ERROR_NONE               =  0,
> > -	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
> > -	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
> > -	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
> > -
> > -#ifdef HASH_CTX_DEBUG
> > -	HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
> > -#endif
> > -};
> > -
> > -
> > -#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
> > -#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
> > -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
> > -#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
> > -#define hash_ctx_status(ctx)     ((ctx)->status)
> > -#define hash_ctx_error(ctx)      ((ctx)->error)
> > -#define hash_ctx_init(ctx) \
> > -	do { \
> > -		(ctx)->error = HASH_CTX_ERROR_NONE; \
> > -		(ctx)->status = HASH_CTX_STS_COMPLETE; \
> > -	} while (0)
> > -
> > -
> > -/* Hash Constants and Typedefs */
> > -#define SHA256_DIGEST_LENGTH        8
> > -#define SHA256_LOG2_BLOCK_SIZE        6
> > -
> > -#define SHA256_PADLENGTHFIELD_SIZE    8
> > -
> > -#ifdef SHA_MB_DEBUG
> > -#define assert(expr) \
> > -do { \
> > -	if (unlikely(!(expr))) { \
> > -		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
> > -		#expr, __FILE__, __func__, __LINE__); \
> > -	} \
> > -} while (0)
> > -#else
> > -#define assert(expr) do {} while (0)
> > -#endif
> > -
> > -struct sha256_ctx_mgr {
> > -	struct sha256_mb_mgr mgr;
> > -};
> > -
> > -/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */
> > -
> > -struct sha256_hash_ctx {
> > -	/* Must be at struct offset 0 */
> > -	struct job_sha256       job;
> > -	/* status flag */
> > -	int status;
> > -	/* error flag */
> > -	int error;
> > -
> > -	uint64_t	total_length;
> > -	const void	*incoming_buffer;
> > -	uint32_t	incoming_buffer_length;
> > -	uint8_t		partial_block_buffer[SHA256_BLOCK_SIZE * 2];
> > -	uint32_t	partial_block_buffer_length;
> > -	void		*user_data;
> > -};
> > -
> > -#endif
> > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
> > deleted file mode 100644
> > index b01ae408c56d..000000000000
> > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
> > +++ /dev/null
> > @@ -1,108 +0,0 @@
> > -/*
> > - * Header file for multi buffer SHA256 algorithm manager
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *	Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -#ifndef __SHA_MB_MGR_H
> > -#define __SHA_MB_MGR_H
> > -
> > -#include <linux/types.h>
> > -
> > -#define NUM_SHA256_DIGEST_WORDS 8
> > -
> > -enum job_sts {	STS_UNKNOWN = 0,
> > -		STS_BEING_PROCESSED = 1,
> > -		STS_COMPLETED = 2,
> > -		STS_INTERNAL_ERROR = 3,
> > -		STS_ERROR = 4
> > -};
> > -
> > -struct job_sha256 {
> > -	u8	*buffer;
> > -	u32	len;
> > -	u32	result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32);
> > -	enum	job_sts status;
> > -	void	*user_data;
> > -};
> > -
> > -/* SHA256 out-of-order scheduler */
> > -
> > -/* typedef uint32_t sha8_digest_array[8][8]; */
> > -
> > -struct sha256_args_x8 {
> > -	uint32_t	digest[8][8];
> > -	uint8_t		*data_ptr[8];
> > -};
> > -
> > -struct sha256_lane_data {
> > -	struct job_sha256 *job_in_lane;
> > -};
> > -
> > -struct sha256_mb_mgr {
> > -	struct sha256_args_x8 args;
> > -
> > -	uint32_t lens[8];
> > -
> > -	/* each byte is index (0...7) of unused lanes */
> > -	uint64_t unused_lanes;
> > -	/* byte 4 is set to FF as a flag */
> > -	struct sha256_lane_data ldata[8];
> > -};
> > -
> > -
> > -#define SHA256_MB_MGR_NUM_LANES_AVX2 8
> > -
> > -void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state);
> > -struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state,
> > -					 struct job_sha256 *job);
> > -struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state);
> > -struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state);
> > -
> > -#endif
> > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
> > deleted file mode 100644
> > index 5c377bac21d0..000000000000
> > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
> > +++ /dev/null
> > @@ -1,304 +0,0 @@
> > -/*
> > - * Header file for multi buffer SHA256 algorithm data structure
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * This program is free software; you can redistribute it and/or modify
> > - * it under the terms of version 2 of the GNU General Public License as
> > - * published by the Free Software Foundation.
> > - *
> > - * This program is distributed in the hope that it will be useful, but
> > - * WITHOUT ANY WARRANTY; without even the implied warranty of
> > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - * General Public License for more details.
> > - *
> > - * Contact Information:
> > - *     Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - * BSD LICENSE
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * Redistribution and use in source and binary forms, with or without
> > - * modification, are permitted provided that the following conditions
> > - * are met:
> > - *
> > - *   * Redistributions of source code must retain the above copyright
> > - *     notice, this list of conditions and the following disclaimer.
> > - *   * Redistributions in binary form must reproduce the above copyright
> > - *     notice, this list of conditions and the following disclaimer in
> > - *     the documentation and/or other materials provided with the
> > - *     distribution.
> > - *   * Neither the name of Intel Corporation nor the names of its
> > - *     contributors may be used to endorse or promote products derived
> > - *     from this software without specific prior written permission.
> > - *
> > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -# Macros for defining data structures
> > -
> > -# Usage example
> > -
> > -#START_FIELDS	# JOB_AES
> > -###	name		size	align
> > -#FIELD	_plaintext,	8,	8	# pointer to plaintext
> > -#FIELD	_ciphertext,	8,	8	# pointer to ciphertext
> > -#FIELD	_IV,		16,	8	# IV
> > -#FIELD	_keys,		8,	8	# pointer to keys
> > -#FIELD	_len,		4,	4	# length in bytes
> > -#FIELD	_status,	4,	4	# status enumeration
> > -#FIELD	_user_data,	8,	8	# pointer to user data
> > -#UNION  _union,         size1,  align1, \
> > -#	                size2,  align2, \
> > -#	                size3,  align3, \
> > -#	                ...
> > -#END_FIELDS
> > -#%assign _JOB_AES_size	_FIELD_OFFSET
> > -#%assign _JOB_AES_align	_STRUCT_ALIGN
> > -
> > -#########################################################################
> > -
> > -# Alternate "struc-like" syntax:
> > -#	STRUCT job_aes2
> > -#	RES_Q	.plaintext,	1
> > -#	RES_Q	.ciphertext, 	1
> > -#	RES_DQ	.IV,		1
> > -#	RES_B	.nested,	_JOB_AES_SIZE, _JOB_AES_ALIGN
> > -#	RES_U	.union,		size1, align1, \
> > -#				size2, align2, \
> > -#				...
> > -#	ENDSTRUCT
> > -#	# Following only needed if nesting
> > -#	%assign job_aes2_size	_FIELD_OFFSET
> > -#	%assign job_aes2_align	_STRUCT_ALIGN
> > -#
> > -# RES_* macros take a name, a count and an optional alignment.
> > -# The count in in terms of the base size of the macro, and the
> > -# default alignment is the base size.
> > -# The macros are:
> > -# Macro    Base size
> > -# RES_B	    1
> > -# RES_W	    2
> > -# RES_D     4
> > -# RES_Q     8
> > -# RES_DQ   16
> > -# RES_Y    32
> > -# RES_Z    64
> > -#
> > -# RES_U defines a union. It's arguments are a name and two or more
> > -# pairs of "size, alignment"
> > -#
> > -# The two assigns are only needed if this structure is being nested
> > -# within another. Even if the assigns are not done, one can still use
> > -# STRUCT_NAME_size as the size of the structure.
> > -#
> > -# Note that for nesting, you still need to assign to STRUCT_NAME_size.
> > -#
> > -# The differences between this and using "struc" directly are that each
> > -# type is implicitly aligned to its natural length (although this can be
> > -# over-ridden with an explicit third parameter), and that the structure
> > -# is padded at the end to its overall alignment.
> > -#
> > -
> > -#########################################################################
> > -
> > -#ifndef _DATASTRUCT_ASM_
> > -#define _DATASTRUCT_ASM_
> > -
> > -#define SZ8			8*SHA256_DIGEST_WORD_SIZE
> > -#define ROUNDS			64*SZ8
> > -#define PTR_SZ                  8
> > -#define SHA256_DIGEST_WORD_SIZE 4
> > -#define MAX_SHA256_LANES        8
> > -#define SHA256_DIGEST_WORDS 8
> > -#define SHA256_DIGEST_ROW_SIZE  (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE)
> > -#define SHA256_DIGEST_SIZE      (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS)
> > -#define SHA256_BLK_SZ           64
> > -
> > -# START_FIELDS
> > -.macro START_FIELDS
> > - _FIELD_OFFSET = 0
> > - _STRUCT_ALIGN = 0
> > -.endm
> > -
> > -# FIELD name size align
> > -.macro FIELD name size align
> > - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
> > - \name	= _FIELD_OFFSET
> > - _FIELD_OFFSET = _FIELD_OFFSET + (\size)
> > -.if (\align > _STRUCT_ALIGN)
> > - _STRUCT_ALIGN = \align
> > -.endif
> > -.endm
> > -
> > -# END_FIELDS
> > -.macro END_FIELDS
> > - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
> > -.endm
> > -
> > -########################################################################
> > -
> > -.macro STRUCT p1
> > -START_FIELDS
> > -.struc \p1
> > -.endm
> > -
> > -.macro ENDSTRUCT
> > - tmp = _FIELD_OFFSET
> > - END_FIELDS
> > - tmp = (_FIELD_OFFSET - %%tmp)
> > -.if (tmp > 0)
> > -	.lcomm	tmp
> > -.endif
> > -.endstruc
> > -.endm
> > -
> > -## RES_int name size align
> > -.macro RES_int p1 p2 p3
> > - name = \p1
> > - size = \p2
> > - align = .\p3
> > -
> > - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
> > -.align align
> > -.lcomm name size
> > - _FIELD_OFFSET = _FIELD_OFFSET + (size)
> > -.if (align > _STRUCT_ALIGN)
> > - _STRUCT_ALIGN = align
> > -.endif
> > -.endm
> > -
> > -# macro RES_B name, size [, align]
> > -.macro RES_B _name, _size, _align=1
> > -RES_int _name _size _align
> > -.endm
> > -
> > -# macro RES_W name, size [, align]
> > -.macro RES_W _name, _size, _align=2
> > -RES_int _name 2*(_size) _align
> > -.endm
> > -
> > -# macro RES_D name, size [, align]
> > -.macro RES_D _name, _size, _align=4
> > -RES_int _name 4*(_size) _align
> > -.endm
> > -
> > -# macro RES_Q name, size [, align]
> > -.macro RES_Q _name, _size, _align=8
> > -RES_int _name 8*(_size) _align
> > -.endm
> > -
> > -# macro RES_DQ name, size [, align]
> > -.macro RES_DQ _name, _size, _align=16
> > -RES_int _name 16*(_size) _align
> > -.endm
> > -
> > -# macro RES_Y name, size [, align]
> > -.macro RES_Y _name, _size, _align=32
> > -RES_int _name 32*(_size) _align
> > -.endm
> > -
> > -# macro RES_Z name, size [, align]
> > -.macro RES_Z _name, _size, _align=64
> > -RES_int _name 64*(_size) _align
> > -.endm
> > -
> > -#endif
> > -
> > -
> > -########################################################################
> > -#### Define SHA256 Out Of Order Data Structures
> > -########################################################################
> > -
> > -START_FIELDS    # LANE_DATA
> > -###     name            size    align
> > -FIELD   _job_in_lane,   8,      8       # pointer to job object
> > -END_FIELDS
> > -
> > - _LANE_DATA_size = _FIELD_OFFSET
> > - _LANE_DATA_align = _STRUCT_ALIGN
> > -
> > -########################################################################
> > -
> > -START_FIELDS    # SHA256_ARGS_X4
> > -###     name            size    align
> > -FIELD   _digest,        4*8*8,  4       # transposed digest
> > -FIELD   _data_ptr,      8*8,    8       # array of pointers to data
> > -END_FIELDS
> > -
> > - _SHA256_ARGS_X4_size  =  _FIELD_OFFSET
> > - _SHA256_ARGS_X4_align = _STRUCT_ALIGN
> > - _SHA256_ARGS_X8_size  =	_FIELD_OFFSET
> > - _SHA256_ARGS_X8_align =	_STRUCT_ALIGN
> > -
> > -#######################################################################
> > -
> > -START_FIELDS    # MB_MGR
> > -###     name            size    align
> > -FIELD   _args,          _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align
> > -FIELD   _lens,          4*8,    8
> > -FIELD   _unused_lanes,  8,      8
> > -FIELD   _ldata,         _LANE_DATA_size*8, _LANE_DATA_align
> > -END_FIELDS
> > -
> > - _MB_MGR_size  =  _FIELD_OFFSET
> > - _MB_MGR_align =  _STRUCT_ALIGN
> > -
> > -_args_digest   =     _args + _digest
> > -_args_data_ptr =     _args + _data_ptr
> > -
> > -#######################################################################
> > -
> > -START_FIELDS    #STACK_FRAME
> > -###     name            size    align
> > -FIELD   _data,		16*SZ8,   1       # transposed digest
> > -FIELD   _digest,         8*SZ8,   1       # array of pointers to data
> > -FIELD   _ytmp,           4*SZ8,   1
> > -FIELD   _rsp,            8,       1
> > -END_FIELDS
> > -
> > - _STACK_FRAME_size  =  _FIELD_OFFSET
> > - _STACK_FRAME_align =  _STRUCT_ALIGN
> > -
> > -#######################################################################
> > -
> > -########################################################################
> > -#### Define constants
> > -########################################################################
> > -
> > -#define STS_UNKNOWN             0
> > -#define STS_BEING_PROCESSED     1
> > -#define STS_COMPLETED           2
> > -
> > -########################################################################
> > -#### Define JOB_SHA256 structure
> > -########################################################################
> > -
> > -START_FIELDS    # JOB_SHA256
> > -
> > -###     name                            size    align
> > -FIELD   _buffer,                        8,      8       # pointer to buffer
> > -FIELD   _len,                           8,      8       # length in bytes
> > -FIELD   _result_digest,                 8*4,    32      # Digest (output)
> > -FIELD   _status,                        4,      4
> > -FIELD   _user_data,                     8,      8
> > -END_FIELDS
> > -
> > - _JOB_SHA256_size = _FIELD_OFFSET
> > - _JOB_SHA256_align = _STRUCT_ALIGN
> > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
> > deleted file mode 100644
> > index d2364c55bbde..000000000000
> > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
> > +++ /dev/null
> > @@ -1,307 +0,0 @@
> > -/*
> > - * Flush routine for SHA256 multibuffer
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *      Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -#include <linux/linkage.h>
> > -#include <asm/frame.h>
> > -#include "sha256_mb_mgr_datastruct.S"
> > -
> > -.extern sha256_x8_avx2
> > -
> > -#LINUX register definitions
> > -#define arg1	%rdi
> > -#define arg2	%rsi
> > -
> > -# Common register definitions
> > -#define state	arg1
> > -#define job	arg2
> > -#define len2	arg2
> > -
> > -# idx must be a register not clobberred by sha1_mult
> > -#define idx		%r8
> > -#define DWORD_idx	%r8d
> > -
> > -#define unused_lanes	%rbx
> > -#define lane_data	%rbx
> > -#define tmp2		%rbx
> > -#define tmp2_w		%ebx
> > -
> > -#define job_rax		%rax
> > -#define tmp1		%rax
> > -#define size_offset	%rax
> > -#define tmp		%rax
> > -#define start_offset	%rax
> > -
> > -#define tmp3		%arg1
> > -
> > -#define extra_blocks	%arg2
> > -#define p		%arg2
> > -
> > -.macro LABEL prefix n
> > -\prefix\n\():
> > -.endm
> > -
> > -.macro JNE_SKIP i
> > -jne     skip_\i
> > -.endm
> > -
> > -.altmacro
> > -.macro SET_OFFSET _offset
> > -offset = \_offset
> > -.endm
> > -.noaltmacro
> > -
> > -# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state)
> > -# arg 1 : rcx : state
> > -ENTRY(sha256_mb_mgr_flush_avx2)
> > -	FRAME_BEGIN
> > -        push    %rbx
> > -
> > -	# If bit (32+3) is set, then all lanes are empty
> > -	mov	_unused_lanes(state), unused_lanes
> > -	bt	$32+3, unused_lanes
> > -	jc	return_null
> > -
> > -	# find a lane with a non-null job
> > -	xor	idx, idx
> > -	offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq	$0, offset(state)
> > -	cmovne	one(%rip), idx
> > -	offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq	$0, offset(state)
> > -	cmovne	two(%rip), idx
> > -	offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq	$0, offset(state)
> > -	cmovne	three(%rip), idx
> > -	offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq	$0, offset(state)
> > -	cmovne	four(%rip), idx
> > -	offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq	$0, offset(state)
> > -	cmovne	five(%rip), idx
> > -	offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq	$0, offset(state)
> > -	cmovne	six(%rip), idx
> > -	offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
> > -	cmpq	$0, offset(state)
> > -	cmovne	seven(%rip), idx
> > -
> > -	# copy idx to empty lanes
> > -copy_lane_data:
> > -	offset =  (_args + _data_ptr)
> > -	mov	offset(state,idx,8), tmp
> > -
> > -	I = 0
> > -.rep 8
> > -	offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
> > -	cmpq	$0, offset(state)
> > -.altmacro
> > -	JNE_SKIP %I
> > -	offset =  (_args + _data_ptr + 8*I)
> > -	mov	tmp, offset(state)
> > -	offset =  (_lens + 4*I)
> > -	movl	$0xFFFFFFFF, offset(state)
> > -LABEL skip_ %I
> > -	I = (I+1)
> > -.noaltmacro
> > -.endr
> > -
> > -	# Find min length
> > -	vmovdqu _lens+0*16(state), %xmm0
> > -	vmovdqu _lens+1*16(state), %xmm1
> > -
> > -	vpminud %xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
> > -	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
> > -	vpminud %xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
> > -	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
> > -	vpminud %xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
> > -
> > -	vmovd	%xmm2, DWORD_idx
> > -	mov	idx, len2
> > -	and	$0xF, idx
> > -	shr	$4, len2
> > -	jz	len_is_0
> > -
> > -	vpand	clear_low_nibble(%rip), %xmm2, %xmm2
> > -	vpshufd	$0, %xmm2, %xmm2
> > -
> > -	vpsubd	%xmm2, %xmm0, %xmm0
> > -	vpsubd	%xmm2, %xmm1, %xmm1
> > -
> > -	vmovdqu	%xmm0, _lens+0*16(state)
> > -	vmovdqu	%xmm1, _lens+1*16(state)
> > -
> > -	# "state" and "args" are the same address, arg1
> > -	# len is arg2
> > -	call	sha256_x8_avx2
> > -	# state and idx are intact
> > -
> > -len_is_0:
> > -	# process completed job "idx"
> > -	imul	$_LANE_DATA_size, idx, lane_data
> > -	lea	_ldata(state, lane_data), lane_data
> > -
> > -	mov	_job_in_lane(lane_data), job_rax
> > -	movq	$0, _job_in_lane(lane_data)
> > -	movl	$STS_COMPLETED, _status(job_rax)
> > -	mov	_unused_lanes(state), unused_lanes
> > -	shl	$4, unused_lanes
> > -	or	idx, unused_lanes
> > -
> > -	mov	unused_lanes, _unused_lanes(state)
> > -	movl	$0xFFFFFFFF, _lens(state,idx,4)
> > -
> > -	vmovd	_args_digest(state , idx, 4) , %xmm0
> > -	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
> > -	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
> > -	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
> > -	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
> > -	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
> > -	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
> > -	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
> > -
> > -	vmovdqu	%xmm0, _result_digest(job_rax)
> > -	offset =  (_result_digest + 1*16)
> > -	vmovdqu	%xmm1, offset(job_rax)
> > -
> > -return:
> > -	pop     %rbx
> > -	FRAME_END
> > -	ret
> > -
> > -return_null:
> > -	xor	job_rax, job_rax
> > -	jmp	return
> > -ENDPROC(sha256_mb_mgr_flush_avx2)
> > -
> > -##############################################################################
> > -
> > -.align 16
> > -ENTRY(sha256_mb_mgr_get_comp_job_avx2)
> > -	push	%rbx
> > -
> > -	## if bit 32+3 is set, then all lanes are empty
> > -	mov	_unused_lanes(state), unused_lanes
> > -	bt	$(32+3), unused_lanes
> > -	jc	.return_null
> > -
> > -	# Find min length
> > -	vmovdqu	_lens(state), %xmm0
> > -	vmovdqu	_lens+1*16(state), %xmm1
> > -
> > -	vpminud	%xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
> > -	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
> > -	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
> > -	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
> > -	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
> > -
> > -	vmovd	%xmm2, DWORD_idx
> > -	test	$~0xF, idx
> > -	jnz	.return_null
> > -
> > -	# process completed job "idx"
> > -	imul	$_LANE_DATA_size, idx, lane_data
> > -	lea	_ldata(state, lane_data), lane_data
> > -
> > -	mov	_job_in_lane(lane_data), job_rax
> > -	movq	$0,  _job_in_lane(lane_data)
> > -	movl	$STS_COMPLETED, _status(job_rax)
> > -	mov	_unused_lanes(state), unused_lanes
> > -	shl	$4, unused_lanes
> > -	or	idx, unused_lanes
> > -	mov	unused_lanes, _unused_lanes(state)
> > -
> > -	movl	$0xFFFFFFFF, _lens(state,  idx, 4)
> > -
> > -	vmovd	_args_digest(state, idx, 4), %xmm0
> > -	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
> > -	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
> > -	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
> > -	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
> > -	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
> > -	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
> > -	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
> > -
> > -        vmovdqu %xmm0, _result_digest(job_rax)
> > -        offset =  (_result_digest + 1*16)
> > -        vmovdqu %xmm1, offset(job_rax)
> > -
> > -	pop	%rbx
> > -
> > -	ret
> > -
> > -.return_null:
> > -	xor	job_rax, job_rax
> > -	pop	%rbx
> > -	ret
> > -ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
> > -
> > -.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
> > -.align 16
> > -clear_low_nibble:
> > -.octa	0x000000000000000000000000FFFFFFF0
> > -
> > -.section	.rodata.cst8, "aM", @progbits, 8
> > -.align 8
> > -one:
> > -.quad	1
> > -two:
> > -.quad	2
> > -three:
> > -.quad	3
> > -four:
> > -.quad	4
> > -five:
> > -.quad	5
> > -six:
> > -.quad	6
> > -seven:
> > -.quad  7
> > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
> > deleted file mode 100644
> > index b0c498371e67..000000000000
> > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
> > +++ /dev/null
> > @@ -1,65 +0,0 @@
> > -/*
> > - * Initialization code for multi buffer SHA256 algorithm for AVX2
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *      Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#include "sha256_mb_mgr.h"
> > -
> > -void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state)
> > -{
> > -	unsigned int j;
> > -
> > -	state->unused_lanes = 0xF76543210ULL;
> > -	for (j = 0; j < 8; j++) {
> > -		state->lens[j] = 0xFFFFFFFF;
> > -		state->ldata[j].job_in_lane = NULL;
> > -	}
> > -}
> > diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
> > deleted file mode 100644
> > index b36ae7454084..000000000000
> > --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
> > +++ /dev/null
> > @@ -1,214 +0,0 @@
> > -/*
> > - * Buffer submit code for multi buffer SHA256 algorithm
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *      Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#include <linux/linkage.h>
> > -#include <asm/frame.h>
> > -#include "sha256_mb_mgr_datastruct.S"
> > -
> > -.extern sha256_x8_avx2
> > -
> > -# LINUX register definitions
> > -arg1		= %rdi
> > -arg2		= %rsi
> > -size_offset	= %rcx
> > -tmp2		= %rcx
> > -extra_blocks	= %rdx
> > -
> > -# Common definitions
> > -#define state	arg1
> > -#define job	%rsi
> > -#define len2	arg2
> > -#define p2	arg2
> > -
> > -# idx must be a register not clobberred by sha1_x8_avx2
> > -idx		= %r8
> > -DWORD_idx	= %r8d
> > -last_len	= %r8
> > -
> > -p		= %r11
> > -start_offset	= %r11
> > -
> > -unused_lanes	= %rbx
> > -BYTE_unused_lanes = %bl
> > -
> > -job_rax		= %rax
> > -len		= %rax
> > -DWORD_len	= %eax
> > -
> > -lane		= %r12
> > -tmp3		= %r12
> > -
> > -tmp		= %r9
> > -DWORD_tmp	= %r9d
> > -
> > -lane_data	= %r10
> > -
> > -# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job)
> > -# arg 1 : rcx : state
> > -# arg 2 : rdx : job
> > -ENTRY(sha256_mb_mgr_submit_avx2)
> > -	FRAME_BEGIN
> > -	push	%rbx
> > -	push	%r12
> > -
> > -	mov	_unused_lanes(state), unused_lanes
> > -	mov	unused_lanes, lane
> > -	and	$0xF, lane
> > -	shr	$4, unused_lanes
> > -	imul	$_LANE_DATA_size, lane, lane_data
> > -	movl	$STS_BEING_PROCESSED, _status(job)
> > -	lea	_ldata(state, lane_data), lane_data
> > -	mov	unused_lanes, _unused_lanes(state)
> > -	movl	_len(job),  DWORD_len
> > -
> > -	mov	job, _job_in_lane(lane_data)
> > -	shl	$4, len
> > -	or	lane, len
> > -
> > -	movl	DWORD_len,  _lens(state , lane, 4)
> > -
> > -	# Load digest words from result_digest
> > -	vmovdqu	_result_digest(job), %xmm0
> > -	vmovdqu	_result_digest+1*16(job), %xmm1
> > -	vmovd	%xmm0, _args_digest(state, lane, 4)
> > -	vpextrd	$1, %xmm0, _args_digest+1*32(state , lane, 4)
> > -	vpextrd	$2, %xmm0, _args_digest+2*32(state , lane, 4)
> > -	vpextrd	$3, %xmm0, _args_digest+3*32(state , lane, 4)
> > -	vmovd	%xmm1, _args_digest+4*32(state , lane, 4)
> > -
> > -	vpextrd	$1, %xmm1, _args_digest+5*32(state , lane, 4)
> > -	vpextrd	$2, %xmm1, _args_digest+6*32(state , lane, 4)
> > -	vpextrd	$3, %xmm1, _args_digest+7*32(state , lane, 4)
> > -
> > -	mov	_buffer(job), p
> > -	mov	p, _args_data_ptr(state, lane, 8)
> > -
> > -	cmp	$0xF, unused_lanes
> > -	jne	return_null
> > -
> > -start_loop:
> > -	# Find min length
> > -	vmovdqa	_lens(state), %xmm0
> > -	vmovdqa	_lens+1*16(state), %xmm1
> > -
> > -	vpminud	%xmm1, %xmm0, %xmm2		# xmm2 has {D,C,B,A}
> > -	vpalignr $8, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,D,C}
> > -	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has {x,x,E,F}
> > -	vpalignr $4, %xmm2, %xmm3, %xmm3	# xmm3 has {x,x,x,E}
> > -	vpminud	%xmm3, %xmm2, %xmm2		# xmm2 has min val in low dword
> > -
> > -	vmovd	%xmm2, DWORD_idx
> > -	mov	idx, len2
> > -	and	$0xF, idx
> > -	shr	$4, len2
> > -	jz	len_is_0
> > -
> > -	vpand	clear_low_nibble(%rip), %xmm2, %xmm2
> > -	vpshufd	$0, %xmm2, %xmm2
> > -
> > -	vpsubd	%xmm2, %xmm0, %xmm0
> > -	vpsubd	%xmm2, %xmm1, %xmm1
> > -
> > -	vmovdqa	%xmm0, _lens + 0*16(state)
> > -	vmovdqa	%xmm1, _lens + 1*16(state)
> > -
> > -	# "state" and "args" are the same address, arg1
> > -	# len is arg2
> > -	call	sha256_x8_avx2
> > -
> > -	# state and idx are intact
> > -
> > -len_is_0:
> > -	# process completed job "idx"
> > -	imul	$_LANE_DATA_size, idx, lane_data
> > -	lea	_ldata(state, lane_data), lane_data
> > -
> > -	mov	_job_in_lane(lane_data), job_rax
> > -	mov	_unused_lanes(state), unused_lanes
> > -	movq	$0, _job_in_lane(lane_data)
> > -	movl	$STS_COMPLETED, _status(job_rax)
> > -	shl	$4, unused_lanes
> > -	or	idx, unused_lanes
> > -	mov	unused_lanes, _unused_lanes(state)
> > -
> > -	movl	$0xFFFFFFFF, _lens(state,idx,4)
> > -
> > -	vmovd	_args_digest(state, idx, 4), %xmm0
> > -	vpinsrd	$1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
> > -	vpinsrd	$2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
> > -	vpinsrd	$3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
> > -	vmovd	_args_digest+4*32(state, idx, 4), %xmm1
> > -
> > -	vpinsrd	$1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1
> > -	vpinsrd	$2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1
> > -	vpinsrd	$3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1
> > -
> > -	vmovdqu	%xmm0, _result_digest(job_rax)
> > -	vmovdqu	%xmm1, _result_digest+1*16(job_rax)
> > -
> > -return:
> > -	pop     %r12
> > -        pop     %rbx
> > -        FRAME_END
> > -	ret
> > -
> > -return_null:
> > -	xor	job_rax, job_rax
> > -	jmp	return
> > -
> > -ENDPROC(sha256_mb_mgr_submit_avx2)
> > -
> > -.section	.rodata.cst16.clear_low_nibble, "aM", @progbits, 16
> > -.align 16
> > -clear_low_nibble:
> > -	.octa	0x000000000000000000000000FFFFFFF0
> > diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
> > deleted file mode 100644
> > index 1687c80c5995..000000000000
> > --- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
> > +++ /dev/null
> > @@ -1,598 +0,0 @@
> > -/*
> > - * Multi-buffer SHA256 algorithm hash compute routine
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *	Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#include <linux/linkage.h>
> > -#include "sha256_mb_mgr_datastruct.S"
> > -
> > -## code to compute oct SHA256 using SSE-256
> > -## outer calling routine takes care of save and restore of XMM registers
> > -## Logic designed/laid out by JDG
> > -
> > -## Function clobbers: rax, rcx, rdx,   rbx, rsi, rdi, r9-r15; %ymm0-15
> > -## Linux clobbers:    rax rbx rcx rdx rsi            r9 r10 r11 r12 r13 r14 r15
> > -## Linux preserves:                       rdi rbp r8
> > -##
> > -## clobbers %ymm0-15
> > -
> > -arg1 = %rdi
> > -arg2 = %rsi
> > -reg3 = %rcx
> > -reg4 = %rdx
> > -
> > -# Common definitions
> > -STATE = arg1
> > -INP_SIZE = arg2
> > -
> > -IDX = %rax
> > -ROUND = %rbx
> > -TBL = reg3
> > -
> > -inp0 = %r9
> > -inp1 = %r10
> > -inp2 = %r11
> > -inp3 = %r12
> > -inp4 = %r13
> > -inp5 = %r14
> > -inp6 = %r15
> > -inp7 = reg4
> > -
> > -a = %ymm0
> > -b = %ymm1
> > -c = %ymm2
> > -d = %ymm3
> > -e = %ymm4
> > -f = %ymm5
> > -g = %ymm6
> > -h = %ymm7
> > -
> > -T1 = %ymm8
> > -
> > -a0 = %ymm12
> > -a1 = %ymm13
> > -a2 = %ymm14
> > -TMP = %ymm15
> > -TMP0 = %ymm6
> > -TMP1 = %ymm7
> > -
> > -TT0 = %ymm8
> > -TT1 = %ymm9
> > -TT2 = %ymm10
> > -TT3 = %ymm11
> > -TT4 = %ymm12
> > -TT5 = %ymm13
> > -TT6 = %ymm14
> > -TT7 = %ymm15
> > -
> > -# Define stack usage
> > -
> > -# Assume stack aligned to 32 bytes before call
> > -# Therefore FRAMESZ mod 32 must be 32-8 = 24
> > -
> > -#define FRAMESZ	0x388
> > -
> > -#define VMOVPS	vmovups
> > -
> > -# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
> > -# "transpose" data in {r0...r7} using temps {t0...t1}
> > -# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
> > -# r0 = {a7 a6 a5 a4   a3 a2 a1 a0}
> > -# r1 = {b7 b6 b5 b4   b3 b2 b1 b0}
> > -# r2 = {c7 c6 c5 c4   c3 c2 c1 c0}
> > -# r3 = {d7 d6 d5 d4   d3 d2 d1 d0}
> > -# r4 = {e7 e6 e5 e4   e3 e2 e1 e0}
> > -# r5 = {f7 f6 f5 f4   f3 f2 f1 f0}
> > -# r6 = {g7 g6 g5 g4   g3 g2 g1 g0}
> > -# r7 = {h7 h6 h5 h4   h3 h2 h1 h0}
> > -#
> > -# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
> > -# r0 = {h0 g0 f0 e0   d0 c0 b0 a0}
> > -# r1 = {h1 g1 f1 e1   d1 c1 b1 a1}
> > -# r2 = {h2 g2 f2 e2   d2 c2 b2 a2}
> > -# r3 = {h3 g3 f3 e3   d3 c3 b3 a3}
> > -# r4 = {h4 g4 f4 e4   d4 c4 b4 a4}
> > -# r5 = {h5 g5 f5 e5   d5 c5 b5 a5}
> > -# r6 = {h6 g6 f6 e6   d6 c6 b6 a6}
> > -# r7 = {h7 g7 f7 e7   d7 c7 b7 a7}
> > -#
> > -
> > -.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
> > -	# process top half (r0..r3) {a...d}
> > -	vshufps	$0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
> > -	vshufps	$0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
> > -	vshufps	$0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
> > -	vshufps	$0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
> > -	vshufps	$0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5   d1 c1 b1 a1}
> > -	vshufps	$0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6   d2 c2 b2 a2}
> > -	vshufps	$0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7   d3 c3 b3 a3}
> > -	vshufps	$0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4   d0 c0 b0 a0}
> > -
> > -	# use r2 in place of t0
> > -	# process bottom half (r4..r7) {e...h}
> > -	vshufps	$0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4   f1 f0 e1 e0}
> > -	vshufps	$0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6   f3 f2 e3 e2}
> > -	vshufps	$0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4   h1 h0 g1 g0}
> > -	vshufps	$0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6   h3 h2 g3 g2}
> > -	vshufps	$0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5   h1 g1 f1 e1}
> > -	vshufps	$0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6   h2 g2 f2 e2}
> > -	vshufps	$0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7   h3 g3 f3 e3}
> > -	vshufps	$0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4   h0 g0 f0 e0}
> > -
> > -	vperm2f128	$0x13, \r1, \r5, \r6  # h6...a6
> > -	vperm2f128	$0x02, \r1, \r5, \r2  # h2...a2
> > -	vperm2f128	$0x13, \r3, \r7, \r5  # h5...a5
> > -	vperm2f128	$0x02, \r3, \r7, \r1  # h1...a1
> > -	vperm2f128	$0x13, \r0, \r4, \r7  # h7...a7
> > -	vperm2f128	$0x02, \r0, \r4, \r3  # h3...a3
> > -	vperm2f128	$0x13, \t0, \t1, \r4  # h4...a4
> > -	vperm2f128	$0x02, \t0, \t1, \r0  # h0...a0
> > -
> > -.endm
> > -
> > -.macro ROTATE_ARGS
> > -TMP_ = h
> > -h = g
> > -g = f
> > -f = e
> > -e = d
> > -d = c
> > -c = b
> > -b = a
> > -a = TMP_
> > -.endm
> > -
> > -.macro _PRORD reg imm tmp
> > -	vpslld	$(32-\imm),\reg,\tmp
> > -	vpsrld	$\imm,\reg, \reg
> > -	vpor	\tmp,\reg, \reg
> > -.endm
> > -
> > -# PRORD_nd reg, imm, tmp, src
> > -.macro _PRORD_nd reg imm tmp src
> > -	vpslld	$(32-\imm), \src, \tmp
> > -	vpsrld	$\imm, \src, \reg
> > -	vpor	\tmp, \reg, \reg
> > -.endm
> > -
> > -# PRORD dst/src, amt
> > -.macro PRORD reg imm
> > -	_PRORD	\reg,\imm,TMP
> > -.endm
> > -
> > -# PRORD_nd dst, src, amt
> > -.macro PRORD_nd reg tmp imm
> > -	_PRORD_nd	\reg, \imm, TMP, \tmp
> > -.endm
> > -
> > -# arguments passed implicitly in preprocessor symbols i, a...h
> > -.macro ROUND_00_15 _T1 i
> > -	PRORD_nd	a0,e,5	# sig1: a0 = (e >> 5)
> > -
> > -	vpxor	g, f, a2	# ch: a2 = f^g
> > -	vpand	e,a2, a2	# ch: a2 = (f^g)&e
> > -	vpxor	g, a2, a2	# a2 = ch
> > -
> > -	PRORD_nd	a1,e,25	# sig1: a1 = (e >> 25)
> > -
> > -	vmovdqu	\_T1,(SZ8*(\i & 0xf))(%rsp)
> > -	vpaddd	(TBL,ROUND,1), \_T1, \_T1	# T1 = W + K
> > -	vpxor	e,a0, a0	# sig1: a0 = e ^ (e >> 5)
> > -	PRORD	a0, 6		# sig1: a0 = (e >> 6) ^ (e >> 11)
> > -	vpaddd	a2, h, h	# h = h + ch
> > -	PRORD_nd	a2,a,11	# sig0: a2 = (a >> 11)
> > -	vpaddd	\_T1,h, h 	# h = h + ch + W + K
> > -	vpxor	a1, a0, a0	# a0 = sigma1
> > -	PRORD_nd	a1,a,22	# sig0: a1 = (a >> 22)
> > -	vpxor	c, a, \_T1	# maj: T1 = a^c
> > -	add	$SZ8, ROUND	# ROUND++
> > -	vpand	b, \_T1, \_T1	# maj: T1 = (a^c)&b
> > -	vpaddd	a0, h, h
> > -	vpaddd	h, d, d
> > -	vpxor	a, a2, a2	# sig0: a2 = a ^ (a >> 11)
> > -	PRORD	a2,2		# sig0: a2 = (a >> 2) ^ (a >> 13)
> > -	vpxor	a1, a2, a2	# a2 = sig0
> > -	vpand	c, a, a1	# maj: a1 = a&c
> > -	vpor	\_T1, a1, a1 	# a1 = maj
> > -	vpaddd	a1, h, h	# h = h + ch + W + K + maj
> > -	vpaddd	a2, h, h	# h = h + ch + W + K + maj + sigma0
> > -	ROTATE_ARGS
> > -.endm
> > -
> > -# arguments passed implicitly in preprocessor symbols i, a...h
> > -.macro ROUND_16_XX _T1 i
> > -	vmovdqu	(SZ8*((\i-15)&0xf))(%rsp), \_T1
> > -	vmovdqu	(SZ8*((\i-2)&0xf))(%rsp), a1
> > -	vmovdqu	\_T1, a0
> > -	PRORD	\_T1,11
> > -	vmovdqu	a1, a2
> > -	PRORD	a1,2
> > -	vpxor	a0, \_T1, \_T1
> > -	PRORD	\_T1, 7
> > -	vpxor	a2, a1, a1
> > -	PRORD	a1, 17
> > -	vpsrld	$3, a0, a0
> > -	vpxor	a0, \_T1, \_T1
> > -	vpsrld	$10, a2, a2
> > -	vpxor	a2, a1, a1
> > -	vpaddd	(SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1
> > -	vpaddd	(SZ8*((\i-7)&0xf))(%rsp), a1, a1
> > -	vpaddd	a1, \_T1, \_T1
> > -
> > -	ROUND_00_15 \_T1,\i
> > -.endm
> > -
> > -# SHA256_ARGS:
> > -#   UINT128 digest[8];  // transposed digests
> > -#   UINT8  *data_ptr[4];
> > -
> > -# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes);
> > -# arg 1 : STATE : pointer to array of pointers to input data
> > -# arg 2 : INP_SIZE  : size of input in blocks
> > -	# general registers preserved in outer calling routine
> > -	# outer calling routine saves all the XMM registers
> > -	# save rsp, allocate 32-byte aligned for local variables
> > -ENTRY(sha256_x8_avx2)
> > -
> > -	# save callee-saved clobbered registers to comply with C function ABI
> > -	push    %r12
> > -	push    %r13
> > -	push    %r14
> > -	push    %r15
> > -
> > -	mov	%rsp, IDX
> > -	sub	$FRAMESZ, %rsp
> > -	and	$~0x1F, %rsp
> > -	mov	IDX, _rsp(%rsp)
> > -
> > -	# Load the pre-transposed incoming digest.
> > -	vmovdqu	0*SHA256_DIGEST_ROW_SIZE(STATE),a
> > -	vmovdqu	1*SHA256_DIGEST_ROW_SIZE(STATE),b
> > -	vmovdqu	2*SHA256_DIGEST_ROW_SIZE(STATE),c
> > -	vmovdqu	3*SHA256_DIGEST_ROW_SIZE(STATE),d
> > -	vmovdqu	4*SHA256_DIGEST_ROW_SIZE(STATE),e
> > -	vmovdqu	5*SHA256_DIGEST_ROW_SIZE(STATE),f
> > -	vmovdqu	6*SHA256_DIGEST_ROW_SIZE(STATE),g
> > -	vmovdqu	7*SHA256_DIGEST_ROW_SIZE(STATE),h
> > -
> > -	lea	K256_8(%rip),TBL
> > -
> > -	# load the address of each of the 4 message lanes
> > -	# getting ready to transpose input onto stack
> > -	mov	_args_data_ptr+0*PTR_SZ(STATE),inp0
> > -	mov	_args_data_ptr+1*PTR_SZ(STATE),inp1
> > -	mov	_args_data_ptr+2*PTR_SZ(STATE),inp2
> > -	mov	_args_data_ptr+3*PTR_SZ(STATE),inp3
> > -	mov	_args_data_ptr+4*PTR_SZ(STATE),inp4
> > -	mov	_args_data_ptr+5*PTR_SZ(STATE),inp5
> > -	mov	_args_data_ptr+6*PTR_SZ(STATE),inp6
> > -	mov	_args_data_ptr+7*PTR_SZ(STATE),inp7
> > -
> > -	xor	IDX, IDX
> > -lloop:
> > -	xor	ROUND, ROUND
> > -
> > -	# save old digest
> > -	vmovdqu	a, _digest(%rsp)
> > -	vmovdqu	b, _digest+1*SZ8(%rsp)
> > -	vmovdqu	c, _digest+2*SZ8(%rsp)
> > -	vmovdqu	d, _digest+3*SZ8(%rsp)
> > -	vmovdqu	e, _digest+4*SZ8(%rsp)
> > -	vmovdqu	f, _digest+5*SZ8(%rsp)
> > -	vmovdqu	g, _digest+6*SZ8(%rsp)
> > -	vmovdqu	h, _digest+7*SZ8(%rsp)
> > -	i = 0
> > -.rep 2
> > -	VMOVPS	i*32(inp0, IDX), TT0
> > -	VMOVPS	i*32(inp1, IDX), TT1
> > -	VMOVPS	i*32(inp2, IDX), TT2
> > -	VMOVPS	i*32(inp3, IDX), TT3
> > -	VMOVPS	i*32(inp4, IDX), TT4
> > -	VMOVPS	i*32(inp5, IDX), TT5
> > -	VMOVPS	i*32(inp6, IDX), TT6
> > -	VMOVPS	i*32(inp7, IDX), TT7
> > -	vmovdqu	g, _ytmp(%rsp)
> > -	vmovdqu	h, _ytmp+1*SZ8(%rsp)
> > -	TRANSPOSE8	TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7,   TMP0, TMP1
> > -	vmovdqu	PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1
> > -	vmovdqu	_ytmp(%rsp), g
> > -	vpshufb	TMP1, TT0, TT0
> > -	vpshufb	TMP1, TT1, TT1
> > -	vpshufb	TMP1, TT2, TT2
> > -	vpshufb	TMP1, TT3, TT3
> > -	vpshufb	TMP1, TT4, TT4
> > -	vpshufb	TMP1, TT5, TT5
> > -	vpshufb	TMP1, TT6, TT6
> > -	vpshufb	TMP1, TT7, TT7
> > -	vmovdqu	_ytmp+1*SZ8(%rsp), h
> > -	vmovdqu	TT4, _ytmp(%rsp)
> > -	vmovdqu	TT5, _ytmp+1*SZ8(%rsp)
> > -	vmovdqu	TT6, _ytmp+2*SZ8(%rsp)
> > -	vmovdqu	TT7, _ytmp+3*SZ8(%rsp)
> > -	ROUND_00_15	TT0,(i*8+0)
> > -	vmovdqu	_ytmp(%rsp), TT0
> > -	ROUND_00_15	TT1,(i*8+1)
> > -	vmovdqu	_ytmp+1*SZ8(%rsp), TT1
> > -	ROUND_00_15	TT2,(i*8+2)
> > -	vmovdqu	_ytmp+2*SZ8(%rsp), TT2
> > -	ROUND_00_15	TT3,(i*8+3)
> > -	vmovdqu	_ytmp+3*SZ8(%rsp), TT3
> > -	ROUND_00_15	TT0,(i*8+4)
> > -	ROUND_00_15	TT1,(i*8+5)
> > -	ROUND_00_15	TT2,(i*8+6)
> > -	ROUND_00_15	TT3,(i*8+7)
> > -	i = (i+1)
> > -.endr
> > -	add	$64, IDX
> > -	i = (i*8)
> > -
> > -	jmp	Lrounds_16_xx
> > -.align 16
> > -Lrounds_16_xx:
> > -.rep 16
> > -	ROUND_16_XX	T1, i
> > -	i = (i+1)
> > -.endr
> > -
> > -	cmp	$ROUNDS,ROUND
> > -	jb	Lrounds_16_xx
> > -
> > -	# add old digest
> > -	vpaddd	_digest+0*SZ8(%rsp), a, a
> > -	vpaddd	_digest+1*SZ8(%rsp), b, b
> > -	vpaddd	_digest+2*SZ8(%rsp), c, c
> > -	vpaddd	_digest+3*SZ8(%rsp), d, d
> > -	vpaddd	_digest+4*SZ8(%rsp), e, e
> > -	vpaddd	_digest+5*SZ8(%rsp), f, f
> > -	vpaddd	_digest+6*SZ8(%rsp), g, g
> > -	vpaddd	_digest+7*SZ8(%rsp), h, h
> > -
> > -	sub	$1, INP_SIZE  # unit is blocks
> > -	jne	lloop
> > -
> > -	# write back to memory (state object) the transposed digest
> > -	vmovdqu	a, 0*SHA256_DIGEST_ROW_SIZE(STATE)
> > -	vmovdqu	b, 1*SHA256_DIGEST_ROW_SIZE(STATE)
> > -	vmovdqu	c, 2*SHA256_DIGEST_ROW_SIZE(STATE)
> > -	vmovdqu	d, 3*SHA256_DIGEST_ROW_SIZE(STATE)
> > -	vmovdqu	e, 4*SHA256_DIGEST_ROW_SIZE(STATE)
> > -	vmovdqu	f, 5*SHA256_DIGEST_ROW_SIZE(STATE)
> > -	vmovdqu	g, 6*SHA256_DIGEST_ROW_SIZE(STATE)
> > -	vmovdqu	h, 7*SHA256_DIGEST_ROW_SIZE(STATE)
> > -
> > -	# update input pointers
> > -	add	IDX, inp0
> > -	mov	inp0, _args_data_ptr+0*8(STATE)
> > -	add	IDX, inp1
> > -	mov	inp1, _args_data_ptr+1*8(STATE)
> > -	add	IDX, inp2
> > -	mov	inp2, _args_data_ptr+2*8(STATE)
> > -	add	IDX, inp3
> > -	mov	inp3, _args_data_ptr+3*8(STATE)
> > -	add	IDX, inp4
> > -	mov	inp4, _args_data_ptr+4*8(STATE)
> > -	add	IDX, inp5
> > -	mov	inp5, _args_data_ptr+5*8(STATE)
> > -	add	IDX, inp6
> > -	mov	inp6, _args_data_ptr+6*8(STATE)
> > -	add	IDX, inp7
> > -	mov	inp7, _args_data_ptr+7*8(STATE)
> > -
> > -	# Postamble
> > -	mov	_rsp(%rsp), %rsp
> > -
> > -	# restore callee-saved clobbered registers
> > -	pop     %r15
> > -	pop     %r14
> > -	pop     %r13
> > -	pop     %r12
> > -
> > -	ret
> > -ENDPROC(sha256_x8_avx2)
> > -
> > -.section	.rodata.K256_8, "a", @progbits
> > -.align 64
> > -K256_8:
> > -	.octa	0x428a2f98428a2f98428a2f98428a2f98
> > -	.octa	0x428a2f98428a2f98428a2f98428a2f98
> > -	.octa	0x71374491713744917137449171374491
> > -	.octa	0x71374491713744917137449171374491
> > -	.octa	0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
> > -	.octa	0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
> > -	.octa	0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
> > -	.octa	0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
> > -	.octa	0x3956c25b3956c25b3956c25b3956c25b
> > -	.octa	0x3956c25b3956c25b3956c25b3956c25b
> > -	.octa	0x59f111f159f111f159f111f159f111f1
> > -	.octa	0x59f111f159f111f159f111f159f111f1
> > -	.octa	0x923f82a4923f82a4923f82a4923f82a4
> > -	.octa	0x923f82a4923f82a4923f82a4923f82a4
> > -	.octa	0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
> > -	.octa	0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
> > -	.octa	0xd807aa98d807aa98d807aa98d807aa98
> > -	.octa	0xd807aa98d807aa98d807aa98d807aa98
> > -	.octa	0x12835b0112835b0112835b0112835b01
> > -	.octa	0x12835b0112835b0112835b0112835b01
> > -	.octa	0x243185be243185be243185be243185be
> > -	.octa	0x243185be243185be243185be243185be
> > -	.octa	0x550c7dc3550c7dc3550c7dc3550c7dc3
> > -	.octa	0x550c7dc3550c7dc3550c7dc3550c7dc3
> > -	.octa	0x72be5d7472be5d7472be5d7472be5d74
> > -	.octa	0x72be5d7472be5d7472be5d7472be5d74
> > -	.octa	0x80deb1fe80deb1fe80deb1fe80deb1fe
> > -	.octa	0x80deb1fe80deb1fe80deb1fe80deb1fe
> > -	.octa	0x9bdc06a79bdc06a79bdc06a79bdc06a7
> > -	.octa	0x9bdc06a79bdc06a79bdc06a79bdc06a7
> > -	.octa	0xc19bf174c19bf174c19bf174c19bf174
> > -	.octa	0xc19bf174c19bf174c19bf174c19bf174
> > -	.octa	0xe49b69c1e49b69c1e49b69c1e49b69c1
> > -	.octa	0xe49b69c1e49b69c1e49b69c1e49b69c1
> > -	.octa	0xefbe4786efbe4786efbe4786efbe4786
> > -	.octa	0xefbe4786efbe4786efbe4786efbe4786
> > -	.octa	0x0fc19dc60fc19dc60fc19dc60fc19dc6
> > -	.octa	0x0fc19dc60fc19dc60fc19dc60fc19dc6
> > -	.octa	0x240ca1cc240ca1cc240ca1cc240ca1cc
> > -	.octa	0x240ca1cc240ca1cc240ca1cc240ca1cc
> > -	.octa	0x2de92c6f2de92c6f2de92c6f2de92c6f
> > -	.octa	0x2de92c6f2de92c6f2de92c6f2de92c6f
> > -	.octa	0x4a7484aa4a7484aa4a7484aa4a7484aa
> > -	.octa	0x4a7484aa4a7484aa4a7484aa4a7484aa
> > -	.octa	0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
> > -	.octa	0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
> > -	.octa	0x76f988da76f988da76f988da76f988da
> > -	.octa	0x76f988da76f988da76f988da76f988da
> > -	.octa	0x983e5152983e5152983e5152983e5152
> > -	.octa	0x983e5152983e5152983e5152983e5152
> > -	.octa	0xa831c66da831c66da831c66da831c66d
> > -	.octa	0xa831c66da831c66da831c66da831c66d
> > -	.octa	0xb00327c8b00327c8b00327c8b00327c8
> > -	.octa	0xb00327c8b00327c8b00327c8b00327c8
> > -	.octa	0xbf597fc7bf597fc7bf597fc7bf597fc7
> > -	.octa	0xbf597fc7bf597fc7bf597fc7bf597fc7
> > -	.octa	0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
> > -	.octa	0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
> > -	.octa	0xd5a79147d5a79147d5a79147d5a79147
> > -	.octa	0xd5a79147d5a79147d5a79147d5a79147
> > -	.octa	0x06ca635106ca635106ca635106ca6351
> > -	.octa	0x06ca635106ca635106ca635106ca6351
> > -	.octa	0x14292967142929671429296714292967
> > -	.octa	0x14292967142929671429296714292967
> > -	.octa	0x27b70a8527b70a8527b70a8527b70a85
> > -	.octa	0x27b70a8527b70a8527b70a8527b70a85
> > -	.octa	0x2e1b21382e1b21382e1b21382e1b2138
> > -	.octa	0x2e1b21382e1b21382e1b21382e1b2138
> > -	.octa	0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
> > -	.octa	0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
> > -	.octa	0x53380d1353380d1353380d1353380d13
> > -	.octa	0x53380d1353380d1353380d1353380d13
> > -	.octa	0x650a7354650a7354650a7354650a7354
> > -	.octa	0x650a7354650a7354650a7354650a7354
> > -	.octa	0x766a0abb766a0abb766a0abb766a0abb
> > -	.octa	0x766a0abb766a0abb766a0abb766a0abb
> > -	.octa	0x81c2c92e81c2c92e81c2c92e81c2c92e
> > -	.octa	0x81c2c92e81c2c92e81c2c92e81c2c92e
> > -	.octa	0x92722c8592722c8592722c8592722c85
> > -	.octa	0x92722c8592722c8592722c8592722c85
> > -	.octa	0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
> > -	.octa	0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
> > -	.octa	0xa81a664ba81a664ba81a664ba81a664b
> > -	.octa	0xa81a664ba81a664ba81a664ba81a664b
> > -	.octa	0xc24b8b70c24b8b70c24b8b70c24b8b70
> > -	.octa	0xc24b8b70c24b8b70c24b8b70c24b8b70
> > -	.octa	0xc76c51a3c76c51a3c76c51a3c76c51a3
> > -	.octa	0xc76c51a3c76c51a3c76c51a3c76c51a3
> > -	.octa	0xd192e819d192e819d192e819d192e819
> > -	.octa	0xd192e819d192e819d192e819d192e819
> > -	.octa	0xd6990624d6990624d6990624d6990624
> > -	.octa	0xd6990624d6990624d6990624d6990624
> > -	.octa	0xf40e3585f40e3585f40e3585f40e3585
> > -	.octa	0xf40e3585f40e3585f40e3585f40e3585
> > -	.octa	0x106aa070106aa070106aa070106aa070
> > -	.octa	0x106aa070106aa070106aa070106aa070
> > -	.octa	0x19a4c11619a4c11619a4c11619a4c116
> > -	.octa	0x19a4c11619a4c11619a4c11619a4c116
> > -	.octa	0x1e376c081e376c081e376c081e376c08
> > -	.octa	0x1e376c081e376c081e376c081e376c08
> > -	.octa	0x2748774c2748774c2748774c2748774c
> > -	.octa	0x2748774c2748774c2748774c2748774c
> > -	.octa	0x34b0bcb534b0bcb534b0bcb534b0bcb5
> > -	.octa	0x34b0bcb534b0bcb534b0bcb534b0bcb5
> > -	.octa	0x391c0cb3391c0cb3391c0cb3391c0cb3
> > -	.octa	0x391c0cb3391c0cb3391c0cb3391c0cb3
> > -	.octa	0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
> > -	.octa	0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
> > -	.octa	0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
> > -	.octa	0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
> > -	.octa	0x682e6ff3682e6ff3682e6ff3682e6ff3
> > -	.octa	0x682e6ff3682e6ff3682e6ff3682e6ff3
> > -	.octa	0x748f82ee748f82ee748f82ee748f82ee
> > -	.octa	0x748f82ee748f82ee748f82ee748f82ee
> > -	.octa	0x78a5636f78a5636f78a5636f78a5636f
> > -	.octa	0x78a5636f78a5636f78a5636f78a5636f
> > -	.octa	0x84c8781484c8781484c8781484c87814
> > -	.octa	0x84c8781484c8781484c8781484c87814
> > -	.octa	0x8cc702088cc702088cc702088cc70208
> > -	.octa	0x8cc702088cc702088cc702088cc70208
> > -	.octa	0x90befffa90befffa90befffa90befffa
> > -	.octa	0x90befffa90befffa90befffa90befffa
> > -	.octa	0xa4506ceba4506ceba4506ceba4506ceb
> > -	.octa	0xa4506ceba4506ceba4506ceba4506ceb
> > -	.octa	0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
> > -	.octa	0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
> > -	.octa	0xc67178f2c67178f2c67178f2c67178f2
> > -	.octa	0xc67178f2c67178f2c67178f2c67178f2
> > -
> > -.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
> > -.align 32
> > -PSHUFFLE_BYTE_FLIP_MASK:
> > -.octa 0x0c0d0e0f08090a0b0405060700010203
> > -.octa 0x0c0d0e0f08090a0b0405060700010203
> > -
> > -.section	.rodata.cst256.K256, "aM", @progbits, 256
> > -.align 64
> > -.global K256
> > -K256:
> > -	.int	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
> > -	.int	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
> > -	.int	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
> > -	.int	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
> > -	.int	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
> > -	.int	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
> > -	.int	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
> > -	.int	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
> > -	.int	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
> > -	.int	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
> > -	.int	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
> > -	.int	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
> > -	.int	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
> > -	.int	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
> > -	.int	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
> > -	.int	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
> > diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile
> > deleted file mode 100644
> > index 90f1ef69152e..000000000000
> > --- a/arch/x86/crypto/sha512-mb/Makefile
> > +++ /dev/null
> > @@ -1,12 +0,0 @@
> > -# SPDX-License-Identifier: GPL-2.0
> > -#
> > -# Arch-specific CryptoAPI modules.
> > -#
> > -
> > -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
> > -                                $(comma)4)$(comma)%ymm2,yes,no)
> > -ifeq ($(avx2_supported),yes)
> > -	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
> > -	sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
> > -	     sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
> > -endif
> > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
> > deleted file mode 100644
> > index 26b85678012d..000000000000
> > --- a/arch/x86/crypto/sha512-mb/sha512_mb.c
> > +++ /dev/null
> > @@ -1,1047 +0,0 @@
> > -/*
> > - * Multi buffer SHA512 algorithm Glue Code
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * This program is free software; you can redistribute it and/or modify
> > - * it under the terms of version 2 of the GNU General Public License as
> > - * published by the Free Software Foundation.
> > - *
> > - * This program is distributed in the hope that it will be useful, but
> > - * WITHOUT ANY WARRANTY; without even the implied warranty of
> > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - * General Public License for more details.
> > - *
> > - * Contact Information:
> > - *	Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - * BSD LICENSE
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * Redistribution and use in source and binary forms, with or without
> > - * modification, are permitted provided that the following conditions
> > - * are met:
> > - *
> > - *   * Redistributions of source code must retain the above copyright
> > - *     notice, this list of conditions and the following disclaimer.
> > - *   * Redistributions in binary form must reproduce the above copyright
> > - *     notice, this list of conditions and the following disclaimer in
> > - *     the documentation and/or other materials provided with the
> > - *     distribution.
> > - *   * Neither the name of Intel Corporation nor the names of its
> > - *     contributors may be used to endorse or promote products derived
> > - *     from this software without specific prior written permission.
> > - *
> > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
> > -
> > -#include <crypto/internal/hash.h>
> > -#include <linux/init.h>
> > -#include <linux/module.h>
> > -#include <linux/mm.h>
> > -#include <linux/cryptohash.h>
> > -#include <linux/types.h>
> > -#include <linux/list.h>
> > -#include <crypto/scatterwalk.h>
> > -#include <crypto/sha.h>
> > -#include <crypto/mcryptd.h>
> > -#include <crypto/crypto_wq.h>
> > -#include <asm/byteorder.h>
> > -#include <linux/hardirq.h>
> > -#include <asm/fpu/api.h>
> > -#include "sha512_mb_ctx.h"
> > -
> > -#define FLUSH_INTERVAL 1000 /* in usec */
> > -
> > -static struct mcryptd_alg_state sha512_mb_alg_state;
> > -
> > -struct sha512_mb_ctx {
> > -	struct mcryptd_ahash *mcryptd_tfm;
> > -};
> > -
> > -static inline struct mcryptd_hash_request_ctx
> > -		*cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx)
> > -{
> > -	struct ahash_request *areq;
> > -
> > -	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
> > -	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
> > -}
> > -
> > -static inline struct ahash_request
> > -		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
> > -{
> > -	return container_of((void *) ctx, struct ahash_request, __ctx);
> > -}
> > -
> > -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
> > -				struct ahash_request *areq)
> > -{
> > -	rctx->flag = HASH_UPDATE;
> > -}
> > -
> > -static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state);
> > -static asmlinkage struct job_sha512* (*sha512_job_mgr_submit)
> > -						(struct sha512_mb_mgr *state,
> > -						struct job_sha512 *job);
> > -static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
> > -						(struct sha512_mb_mgr *state);
> > -static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
> > -						(struct sha512_mb_mgr *state);
> > -
> > -inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
> > -			 uint64_t total_len)
> > -{
> > -	uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
> > -
> > -	memset(&padblock[i], 0, SHA512_BLOCK_SIZE);
> > -	padblock[i] = 0x80;
> > -
> > -	i += ((SHA512_BLOCK_SIZE - 1) &
> > -	      (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1)))
> > -	     + 1 + SHA512_PADLENGTHFIELD_SIZE;
> > -
> > -#if SHA512_PADLENGTHFIELD_SIZE == 16
> > -	*((uint64_t *) &padblock[i - 16]) = 0;
> > -#endif
> > -
> > -	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
> > -
> > -	/* Number of extra blocks to hash */
> > -	return i >> SHA512_LOG2_BLOCK_SIZE;
> > -}
> > -
> > -static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
> > -		(struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx)
> > -{
> > -	while (ctx) {
> > -		if (ctx->status & HASH_CTX_STS_COMPLETE) {
> > -			/* Clear PROCESSING bit */
> > -			ctx->status = HASH_CTX_STS_COMPLETE;
> > -			return ctx;
> > -		}
> > -
> > -		/*
> > -		 * If the extra blocks are empty, begin hashing what remains
> > -		 * in the user's buffer.
> > -		 */
> > -		if (ctx->partial_block_buffer_length == 0 &&
> > -		    ctx->incoming_buffer_length) {
> > -
> > -			const void *buffer = ctx->incoming_buffer;
> > -			uint32_t len = ctx->incoming_buffer_length;
> > -			uint32_t copy_len;
> > -
> > -			/*
> > -			 * Only entire blocks can be hashed.
> > -			 * Copy remainder to extra blocks buffer.
> > -			 */
> > -			copy_len = len & (SHA512_BLOCK_SIZE-1);
> > -
> > -			if (copy_len) {
> > -				len -= copy_len;
> > -				memcpy(ctx->partial_block_buffer,
> > -				       ((const char *) buffer + len),
> > -				       copy_len);
> > -				ctx->partial_block_buffer_length = copy_len;
> > -			}
> > -
> > -			ctx->incoming_buffer_length = 0;
> > -
> > -			/* len should be a multiple of the block size now */
> > -			assert((len % SHA512_BLOCK_SIZE) == 0);
> > -
> > -			/* Set len to the number of blocks to be hashed */
> > -			len >>= SHA512_LOG2_BLOCK_SIZE;
> > -
> > -			if (len) {
> > -
> > -				ctx->job.buffer = (uint8_t *) buffer;
> > -				ctx->job.len = len;
> > -				ctx = (struct sha512_hash_ctx *)
> > -					sha512_job_mgr_submit(&mgr->mgr,
> > -					&ctx->job);
> > -				continue;
> > -			}
> > -		}
> > -
> > -		/*
> > -		 * If the extra blocks are not empty, then we are
> > -		 * either on the last block(s) or we need more
> > -		 * user input before continuing.
> > -		 */
> > -		if (ctx->status & HASH_CTX_STS_LAST) {
> > -
> > -			uint8_t *buf = ctx->partial_block_buffer;
> > -			uint32_t n_extra_blocks =
> > -					sha512_pad(buf, ctx->total_length);
> > -
> > -			ctx->status = (HASH_CTX_STS_PROCESSING |
> > -				       HASH_CTX_STS_COMPLETE);
> > -			ctx->job.buffer = buf;
> > -			ctx->job.len = (uint32_t) n_extra_blocks;
> > -			ctx = (struct sha512_hash_ctx *)
> > -				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
> > -			continue;
> > -		}
> > -
> > -		if (ctx)
> > -			ctx->status = HASH_CTX_STS_IDLE;
> > -		return ctx;
> > -	}
> > -
> > -	return NULL;
> > -}
> > -
> > -static struct sha512_hash_ctx
> > -		*sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
> > -{
> > -	/*
> > -	 * If get_comp_job returns NULL, there are no jobs complete.
> > -	 * If get_comp_job returns a job, verify that it is safe to return to
> > -	 * the user.
> > -	 * If it is not ready, resubmit the job to finish processing.
> > -	 * If sha512_ctx_mgr_resubmit returned a job, it is ready to be
> > -	 * returned.
> > -	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
> > -	 * still need processing.
> > -	 */
> > -	struct sha512_ctx_mgr *mgr;
> > -	struct sha512_hash_ctx *ctx;
> > -	unsigned long flags;
> > -
> > -	mgr = cstate->mgr;
> > -	spin_lock_irqsave(&cstate->work_lock, flags);
> > -	ctx = (struct sha512_hash_ctx *)
> > -				sha512_job_mgr_get_comp_job(&mgr->mgr);
> > -	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
> > -	spin_unlock_irqrestore(&cstate->work_lock, flags);
> > -	return ctx;
> > -}
> > -
> > -static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
> > -{
> > -	sha512_job_mgr_init(&mgr->mgr);
> > -}
> > -
> > -static struct sha512_hash_ctx
> > -			*sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
> > -					  struct sha512_hash_ctx *ctx,
> > -					  const void *buffer,
> > -					  uint32_t len,
> > -					  int flags)
> > -{
> > -	struct sha512_ctx_mgr *mgr;
> > -	unsigned long irqflags;
> > -
> > -	mgr = cstate->mgr;
> > -	spin_lock_irqsave(&cstate->work_lock, irqflags);
> > -	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
> > -		/* User should not pass anything other than UPDATE or LAST */
> > -		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
> > -		goto unlock;
> > -	}
> > -
> > -	if (ctx->status & HASH_CTX_STS_PROCESSING) {
> > -		/* Cannot submit to a currently processing job. */
> > -		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
> > -		goto unlock;
> > -	}
> > -
> > -	if (ctx->status & HASH_CTX_STS_COMPLETE) {
> > -		/* Cannot update a finished job. */
> > -		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
> > -		goto unlock;
> > -	}
> > -
> > -	/*
> > -	 * If we made it here, there were no errors during this call to
> > -	 * submit
> > -	 */
> > -	ctx->error = HASH_CTX_ERROR_NONE;
> > -
> > -	/* Store buffer ptr info from user */
> > -	ctx->incoming_buffer = buffer;
> > -	ctx->incoming_buffer_length = len;
> > -
> > -	/*
> > -	 * Store the user's request flags and mark this ctx as currently being
> > -	 * processed.
> > -	 */
> > -	ctx->status = (flags & HASH_LAST) ?
> > -			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
> > -			HASH_CTX_STS_PROCESSING;
> > -
> > -	/* Advance byte counter */
> > -	ctx->total_length += len;
> > -
> > -	/*
> > -	 * If there is anything currently buffered in the extra blocks,
> > -	 * append to it until it contains a whole block.
> > -	 * Or if the user's buffer contains less than a whole block,
> > -	 * append as much as possible to the extra block.
> > -	 */
> > -	if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) {
> > -		/* Compute how many bytes to copy from user buffer into extra
> > -		 * block
> > -		 */
> > -		uint32_t copy_len = SHA512_BLOCK_SIZE -
> > -					ctx->partial_block_buffer_length;
> > -		if (len < copy_len)
> > -			copy_len = len;
> > -
> > -		if (copy_len) {
> > -			/* Copy and update relevant pointers and counters */
> > -			memcpy
> > -		(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
> > -				buffer, copy_len);
> > -
> > -			ctx->partial_block_buffer_length += copy_len;
> > -			ctx->incoming_buffer = (const void *)
> > -					((const char *)buffer + copy_len);
> > -			ctx->incoming_buffer_length = len - copy_len;
> > -		}
> > -
> > -		/* The extra block should never contain more than 1 block
> > -		 * here
> > -		 */
> > -		assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE);
> > -
> > -		/* If the extra block buffer contains exactly 1 block, it can
> > -		 * be hashed.
> > -		 */
> > -		if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) {
> > -			ctx->partial_block_buffer_length = 0;
> > -
> > -			ctx->job.buffer = ctx->partial_block_buffer;
> > -			ctx->job.len = 1;
> > -			ctx = (struct sha512_hash_ctx *)
> > -				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
> > -		}
> > -	}
> > -
> > -	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
> > -unlock:
> > -	spin_unlock_irqrestore(&cstate->work_lock, irqflags);
> > -	return ctx;
> > -}
> > -
> > -static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
> > -{
> > -	struct sha512_ctx_mgr *mgr;
> > -	struct sha512_hash_ctx *ctx;
> > -	unsigned long flags;
> > -
> > -	mgr = cstate->mgr;
> > -	spin_lock_irqsave(&cstate->work_lock, flags);
> > -	while (1) {
> > -		ctx = (struct sha512_hash_ctx *)
> > -					sha512_job_mgr_flush(&mgr->mgr);
> > -
> > -		/* If flush returned 0, there are no more jobs in flight. */
> > -		if (!ctx)
> > -			break;
> > -
> > -		/*
> > -		 * If flush returned a job, resubmit the job to finish
> > -		 * processing.
> > -		 */
> > -		ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
> > -
> > -		/*
> > -		 * If sha512_ctx_mgr_resubmit returned a job, it is ready to
> > -		 * be returned. Otherwise, all jobs currently being managed by
> > -		 * the sha512_ctx_mgr still need processing. Loop.
> > -		 */
> > -		if (ctx)
> > -			break;
> > -	}
> > -	spin_unlock_irqrestore(&cstate->work_lock, flags);
> > -	return ctx;
> > -}
> > -
> > -static int sha512_mb_init(struct ahash_request *areq)
> > -{
> > -	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
> > -
> > -	hash_ctx_init(sctx);
> > -	sctx->job.result_digest[0] = SHA512_H0;
> > -	sctx->job.result_digest[1] = SHA512_H1;
> > -	sctx->job.result_digest[2] = SHA512_H2;
> > -	sctx->job.result_digest[3] = SHA512_H3;
> > -	sctx->job.result_digest[4] = SHA512_H4;
> > -	sctx->job.result_digest[5] = SHA512_H5;
> > -	sctx->job.result_digest[6] = SHA512_H6;
> > -	sctx->job.result_digest[7] = SHA512_H7;
> > -	sctx->total_length = 0;
> > -	sctx->partial_block_buffer_length = 0;
> > -	sctx->status = HASH_CTX_STS_IDLE;
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
> > -{
> > -	int	i;
> > -	struct	sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
> > -	__be64	*dst = (__be64 *) rctx->out;
> > -
> > -	for (i = 0; i < 8; ++i)
> > -		dst[i] = cpu_to_be64(sctx->job.result_digest[i]);
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
> > -			struct mcryptd_alg_cstate *cstate, bool flush)
> > -{
> > -	int	flag = HASH_UPDATE;
> > -	int	nbytes, err = 0;
> > -	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
> > -	struct sha512_hash_ctx *sha_ctx;
> > -
> > -	/* more work ? */
> > -	while (!(rctx->flag & HASH_DONE)) {
> > -		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
> > -		if (nbytes < 0) {
> > -			err = nbytes;
> > -			goto out;
> > -		}
> > -		/* check if the walk is done */
> > -		if (crypto_ahash_walk_last(&rctx->walk)) {
> > -			rctx->flag |= HASH_DONE;
> > -			if (rctx->flag & HASH_FINAL)
> > -				flag |= HASH_LAST;
> > -
> > -		}
> > -		sha_ctx = (struct sha512_hash_ctx *)
> > -						ahash_request_ctx(&rctx->areq);
> > -		kernel_fpu_begin();
> > -		sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
> > -						rctx->walk.data, nbytes, flag);
> > -		if (!sha_ctx) {
> > -			if (flush)
> > -				sha_ctx = sha512_ctx_mgr_flush(cstate);
> > -		}
> > -		kernel_fpu_end();
> > -		if (sha_ctx)
> > -			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		else {
> > -			rctx = NULL;
> > -			goto out;
> > -		}
> > -	}
> > -
> > -	/* copy the results */
> > -	if (rctx->flag & HASH_FINAL)
> > -		sha512_mb_set_results(rctx);
> > -
> > -out:
> > -	*ret_rctx = rctx;
> > -	return err;
> > -}
> > -
> > -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
> > -			    struct mcryptd_alg_cstate *cstate,
> > -			    int err)
> > -{
> > -	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
> > -	struct sha512_hash_ctx *sha_ctx;
> > -	struct mcryptd_hash_request_ctx *req_ctx;
> > -	int ret;
> > -	unsigned long flags;
> > -
> > -	/* remove from work list */
> > -	spin_lock_irqsave(&cstate->work_lock, flags);
> > -	list_del(&rctx->waiter);
> > -	spin_unlock_irqrestore(&cstate->work_lock, flags);
> > -
> > -	if (irqs_disabled())
> > -		rctx->complete(&req->base, err);
> > -	else {
> > -		local_bh_disable();
> > -		rctx->complete(&req->base, err);
> > -		local_bh_enable();
> > -	}
> > -
> > -	/* check to see if there are other jobs that are done */
> > -	sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
> > -	while (sha_ctx) {
> > -		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		ret = sha_finish_walk(&req_ctx, cstate, false);
> > -		if (req_ctx) {
> > -			spin_lock_irqsave(&cstate->work_lock, flags);
> > -			list_del(&req_ctx->waiter);
> > -			spin_unlock_irqrestore(&cstate->work_lock, flags);
> > -
> > -			req = cast_mcryptd_ctx_to_req(req_ctx);
> > -			if (irqs_disabled())
> > -				req_ctx->complete(&req->base, ret);
> > -			else {
> > -				local_bh_disable();
> > -				req_ctx->complete(&req->base, ret);
> > -				local_bh_enable();
> > -			}
> > -		}
> > -		sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
> > -	}
> > -
> > -	return 0;
> > -}
> > -
> > -static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
> > -			     struct mcryptd_alg_cstate *cstate)
> > -{
> > -	unsigned long next_flush;
> > -	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
> > -	unsigned long flags;
> > -
> > -	/* initialize tag */
> > -	rctx->tag.arrival = jiffies;    /* tag the arrival time */
> > -	rctx->tag.seq_num = cstate->next_seq_num++;
> > -	next_flush = rctx->tag.arrival + delay;
> > -	rctx->tag.expire = next_flush;
> > -
> > -	spin_lock_irqsave(&cstate->work_lock, flags);
> > -	list_add_tail(&rctx->waiter, &cstate->work_list);
> > -	spin_unlock_irqrestore(&cstate->work_lock, flags);
> > -
> > -	mcryptd_arm_flusher(cstate, delay);
> > -}
> > -
> > -static int sha512_mb_update(struct ahash_request *areq)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx =
> > -			container_of(areq, struct mcryptd_hash_request_ctx,
> > -									areq);
> > -	struct mcryptd_alg_cstate *cstate =
> > -				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
> > -
> > -	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
> > -	struct sha512_hash_ctx *sha_ctx;
> > -	int ret = 0, nbytes;
> > -
> > -
> > -	/* sanity check */
> > -	if (rctx->tag.cpu != smp_processor_id()) {
> > -		pr_err("mcryptd error: cpu clash\n");
> > -		goto done;
> > -	}
> > -
> > -	/* need to init context */
> > -	req_ctx_init(rctx, areq);
> > -
> > -	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
> > -
> > -	if (nbytes < 0) {
> > -		ret = nbytes;
> > -		goto done;
> > -	}
> > -
> > -	if (crypto_ahash_walk_last(&rctx->walk))
> > -		rctx->flag |= HASH_DONE;
> > -
> > -	/* submit */
> > -	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
> > -	sha512_mb_add_list(rctx, cstate);
> > -	kernel_fpu_begin();
> > -	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
> > -							nbytes, HASH_UPDATE);
> > -	kernel_fpu_end();
> > -
> > -	/* check if anything is returned */
> > -	if (!sha_ctx)
> > -		return -EINPROGRESS;
> > -
> > -	if (sha_ctx->error) {
> > -		ret = sha_ctx->error;
> > -		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		goto done;
> > -	}
> > -
> > -	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -	ret = sha_finish_walk(&rctx, cstate, false);
> > -
> > -	if (!rctx)
> > -		return -EINPROGRESS;
> > -done:
> > -	sha_complete_job(rctx, cstate, ret);
> > -	return ret;
> > -}
> > -
> > -static int sha512_mb_finup(struct ahash_request *areq)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx =
> > -			container_of(areq, struct mcryptd_hash_request_ctx,
> > -									areq);
> > -	struct mcryptd_alg_cstate *cstate =
> > -				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
> > -
> > -	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
> > -	struct sha512_hash_ctx *sha_ctx;
> > -	int ret = 0, flag = HASH_UPDATE, nbytes;
> > -
> > -	/* sanity check */
> > -	if (rctx->tag.cpu != smp_processor_id()) {
> > -		pr_err("mcryptd error: cpu clash\n");
> > -		goto done;
> > -	}
> > -
> > -	/* need to init context */
> > -	req_ctx_init(rctx, areq);
> > -
> > -	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
> > -
> > -	if (nbytes < 0) {
> > -		ret = nbytes;
> > -		goto done;
> > -	}
> > -
> > -	if (crypto_ahash_walk_last(&rctx->walk)) {
> > -		rctx->flag |= HASH_DONE;
> > -		flag = HASH_LAST;
> > -	}
> > -
> > -	/* submit */
> > -	rctx->flag |= HASH_FINAL;
> > -	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
> > -	sha512_mb_add_list(rctx, cstate);
> > -
> > -	kernel_fpu_begin();
> > -	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
> > -								nbytes, flag);
> > -	kernel_fpu_end();
> > -
> > -	/* check if anything is returned */
> > -	if (!sha_ctx)
> > -		return -EINPROGRESS;
> > -
> > -	if (sha_ctx->error) {
> > -		ret = sha_ctx->error;
> > -		goto done;
> > -	}
> > -
> > -	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -	ret = sha_finish_walk(&rctx, cstate, false);
> > -	if (!rctx)
> > -		return -EINPROGRESS;
> > -done:
> > -	sha_complete_job(rctx, cstate, ret);
> > -	return ret;
> > -}
> > -
> > -static int sha512_mb_final(struct ahash_request *areq)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx =
> > -			container_of(areq, struct mcryptd_hash_request_ctx,
> > -									areq);
> > -	struct mcryptd_alg_cstate *cstate =
> > -				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
> > -
> > -	struct sha512_hash_ctx *sha_ctx;
> > -	int ret = 0;
> > -	u8 data;
> > -
> > -	/* sanity check */
> > -	if (rctx->tag.cpu != smp_processor_id()) {
> > -		pr_err("mcryptd error: cpu clash\n");
> > -		goto done;
> > -	}
> > -
> > -	/* need to init context */
> > -	req_ctx_init(rctx, areq);
> > -
> > -	rctx->flag |= HASH_DONE | HASH_FINAL;
> > -
> > -	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
> > -	/* flag HASH_FINAL and 0 data size */
> > -	sha512_mb_add_list(rctx, cstate);
> > -	kernel_fpu_begin();
> > -	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
> > -	kernel_fpu_end();
> > -
> > -	/* check if anything is returned */
> > -	if (!sha_ctx)
> > -		return -EINPROGRESS;
> > -
> > -	if (sha_ctx->error) {
> > -		ret = sha_ctx->error;
> > -		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		goto done;
> > -	}
> > -
> > -	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -	ret = sha_finish_walk(&rctx, cstate, false);
> > -	if (!rctx)
> > -		return -EINPROGRESS;
> > -done:
> > -	sha_complete_job(rctx, cstate, ret);
> > -	return ret;
> > -}
> > -
> > -static int sha512_mb_export(struct ahash_request *areq, void *out)
> > -{
> > -	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
> > -
> > -	memcpy(out, sctx, sizeof(*sctx));
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha512_mb_import(struct ahash_request *areq, const void *in)
> > -{
> > -	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
> > -
> > -	memcpy(sctx, in, sizeof(*sctx));
> > -
> > -	return 0;
> > -}
> > -
> > -static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm)
> > -{
> > -	struct mcryptd_ahash *mcryptd_tfm;
> > -	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
> > -	struct mcryptd_hash_ctx *mctx;
> > -
> > -	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb",
> > -						CRYPTO_ALG_INTERNAL,
> > -						CRYPTO_ALG_INTERNAL);
> > -	if (IS_ERR(mcryptd_tfm))
> > -		return PTR_ERR(mcryptd_tfm);
> > -	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
> > -	mctx->alg_state = &sha512_mb_alg_state;
> > -	ctx->mcryptd_tfm = mcryptd_tfm;
> > -	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> > -				sizeof(struct ahash_request) +
> > -				crypto_ahash_reqsize(&mcryptd_tfm->base));
> > -
> > -	return 0;
> > -}
> > -
> > -static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm)
> > -{
> > -	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
> > -
> > -	mcryptd_free_ahash(ctx->mcryptd_tfm);
> > -}
> > -
> > -static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm)
> > -{
> > -	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> > -				sizeof(struct ahash_request) +
> > -				sizeof(struct sha512_hash_ctx));
> > -
> > -	return 0;
> > -}
> > -
> > -static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm)
> > -{
> > -	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
> > -
> > -	mcryptd_free_ahash(ctx->mcryptd_tfm);
> > -}
> > -
> > -static struct ahash_alg sha512_mb_areq_alg = {
> > -	.init		=	sha512_mb_init,
> > -	.update		=	sha512_mb_update,
> > -	.final		=	sha512_mb_final,
> > -	.finup		=	sha512_mb_finup,
> > -	.export		=	sha512_mb_export,
> > -	.import		=	sha512_mb_import,
> > -	.halg		=	{
> > -	.digestsize	=	SHA512_DIGEST_SIZE,
> > -	.statesize	=	sizeof(struct sha512_hash_ctx),
> > -	.base		=	{
> > -			.cra_name	 = "__sha512-mb",
> > -			.cra_driver_name = "__intel_sha512-mb",
> > -			.cra_priority	 = 100,
> > -			/*
> > -			 * use ASYNC flag as some buffers in multi-buffer
> > -			 * algo may not have completed before hashing thread
> > -			 * sleep
> > -			 */
> > -			.cra_flags	= CRYPTO_ALG_ASYNC |
> > -					  CRYPTO_ALG_INTERNAL,
> > -			.cra_blocksize	= SHA512_BLOCK_SIZE,
> > -			.cra_module	= THIS_MODULE,
> > -			.cra_list	= LIST_HEAD_INIT
> > -					(sha512_mb_areq_alg.halg.base.cra_list),
> > -			.cra_init	= sha512_mb_areq_init_tfm,
> > -			.cra_exit	= sha512_mb_areq_exit_tfm,
> > -			.cra_ctxsize	= sizeof(struct sha512_hash_ctx),
> > -		}
> > -	}
> > -};
> > -
> > -static int sha512_mb_async_init(struct ahash_request *req)
> > -{
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_init(mcryptd_req);
> > -}
> > -
> > -static int sha512_mb_async_update(struct ahash_request *req)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_update(mcryptd_req);
> > -}
> > -
> > -static int sha512_mb_async_finup(struct ahash_request *req)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_finup(mcryptd_req);
> > -}
> > -
> > -static int sha512_mb_async_final(struct ahash_request *req)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_final(mcryptd_req);
> > -}
> > -
> > -static int sha512_mb_async_digest(struct ahash_request *req)
> > -{
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_digest(mcryptd_req);
> > -}
> > -
> > -static int sha512_mb_async_export(struct ahash_request *req, void *out)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	return crypto_ahash_export(mcryptd_req, out);
> > -}
> > -
> > -static int sha512_mb_async_import(struct ahash_request *req, const void *in)
> > -{
> > -	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
> > -	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
> > -	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
> > -	struct mcryptd_hash_request_ctx *rctx;
> > -	struct ahash_request *areq;
> > -
> > -	memcpy(mcryptd_req, req, sizeof(*req));
> > -	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
> > -	rctx = ahash_request_ctx(mcryptd_req);
> > -
> > -	areq = &rctx->areq;
> > -
> > -	ahash_request_set_tfm(areq, child);
> > -	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
> > -					rctx->complete, req);
> > -
> > -	return crypto_ahash_import(mcryptd_req, in);
> > -}
> > -
> > -static struct ahash_alg sha512_mb_async_alg = {
> > -	.init           = sha512_mb_async_init,
> > -	.update         = sha512_mb_async_update,
> > -	.final          = sha512_mb_async_final,
> > -	.finup          = sha512_mb_async_finup,
> > -	.digest         = sha512_mb_async_digest,
> > -	.export		= sha512_mb_async_export,
> > -	.import		= sha512_mb_async_import,
> > -	.halg = {
> > -		.digestsize     = SHA512_DIGEST_SIZE,
> > -		.statesize      = sizeof(struct sha512_hash_ctx),
> > -		.base = {
> > -			.cra_name               = "sha512",
> > -			.cra_driver_name        = "sha512_mb",
> > -			/*
> > -			 * Low priority, since with few concurrent hash requests
> > -			 * this is extremely slow due to the flush delay.  Users
> > -			 * whose workloads would benefit from this can request
> > -			 * it explicitly by driver name, or can increase its
> > -			 * priority at runtime using NETLINK_CRYPTO.
> > -			 */
> > -			.cra_priority           = 50,
> > -			.cra_flags              = CRYPTO_ALG_ASYNC,
> > -			.cra_blocksize          = SHA512_BLOCK_SIZE,
> > -			.cra_module             = THIS_MODULE,
> > -			.cra_list               = LIST_HEAD_INIT
> > -				(sha512_mb_async_alg.halg.base.cra_list),
> > -			.cra_init               = sha512_mb_async_init_tfm,
> > -			.cra_exit               = sha512_mb_async_exit_tfm,
> > -			.cra_ctxsize		= sizeof(struct sha512_mb_ctx),
> > -			.cra_alignmask		= 0,
> > -		},
> > -	},
> > -};
> > -
> > -static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx;
> > -	unsigned long cur_time;
> > -	unsigned long next_flush = 0;
> > -	struct sha512_hash_ctx *sha_ctx;
> > -
> > -
> > -	cur_time = jiffies;
> > -
> > -	while (!list_empty(&cstate->work_list)) {
> > -		rctx = list_entry(cstate->work_list.next,
> > -				struct mcryptd_hash_request_ctx, waiter);
> > -		if time_before(cur_time, rctx->tag.expire)
> > -			break;
> > -		kernel_fpu_begin();
> > -		sha_ctx = (struct sha512_hash_ctx *)
> > -					sha512_ctx_mgr_flush(cstate);
> > -		kernel_fpu_end();
> > -		if (!sha_ctx) {
> > -			pr_err("sha512_mb error: nothing got flushed for"
> > -							" non-empty list\n");
> > -			break;
> > -		}
> > -		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
> > -		sha_finish_walk(&rctx, cstate, true);
> > -		sha_complete_job(rctx, cstate, 0);
> > -	}
> > -
> > -	if (!list_empty(&cstate->work_list)) {
> > -		rctx = list_entry(cstate->work_list.next,
> > -				struct mcryptd_hash_request_ctx, waiter);
> > -		/* get the hash context and then flush time */
> > -		next_flush = rctx->tag.expire;
> > -		mcryptd_arm_flusher(cstate, get_delay(next_flush));
> > -	}
> > -	return next_flush;
> > -}
> > -
> > -static int __init sha512_mb_mod_init(void)
> > -{
> > -
> > -	int cpu;
> > -	int err;
> > -	struct mcryptd_alg_cstate *cpu_state;
> > -
> > -	/* check for dependent cpu features */
> > -	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
> > -	    !boot_cpu_has(X86_FEATURE_BMI2))
> > -		return -ENODEV;
> > -
> > -	/* initialize multibuffer structures */
> > -	sha512_mb_alg_state.alg_cstate =
> > -				alloc_percpu(struct mcryptd_alg_cstate);
> > -
> > -	sha512_job_mgr_init = sha512_mb_mgr_init_avx2;
> > -	sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2;
> > -	sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2;
> > -	sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2;
> > -
> > -	if (!sha512_mb_alg_state.alg_cstate)
> > -		return -ENOMEM;
> > -	for_each_possible_cpu(cpu) {
> > -		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
> > -		cpu_state->next_flush = 0;
> > -		cpu_state->next_seq_num = 0;
> > -		cpu_state->flusher_engaged = false;
> > -		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
> > -		cpu_state->cpu = cpu;
> > -		cpu_state->alg_state = &sha512_mb_alg_state;
> > -		cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr),
> > -								GFP_KERNEL);
> > -		if (!cpu_state->mgr)
> > -			goto err2;
> > -		sha512_ctx_mgr_init(cpu_state->mgr);
> > -		INIT_LIST_HEAD(&cpu_state->work_list);
> > -		spin_lock_init(&cpu_state->work_lock);
> > -	}
> > -	sha512_mb_alg_state.flusher = &sha512_mb_flusher;
> > -
> > -	err = crypto_register_ahash(&sha512_mb_areq_alg);
> > -	if (err)
> > -		goto err2;
> > -	err = crypto_register_ahash(&sha512_mb_async_alg);
> > -	if (err)
> > -		goto err1;
> > -
> > -
> > -	return 0;
> > -err1:
> > -	crypto_unregister_ahash(&sha512_mb_areq_alg);
> > -err2:
> > -	for_each_possible_cpu(cpu) {
> > -		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
> > -		kfree(cpu_state->mgr);
> > -	}
> > -	free_percpu(sha512_mb_alg_state.alg_cstate);
> > -	return -ENODEV;
> > -}
> > -
> > -static void __exit sha512_mb_mod_fini(void)
> > -{
> > -	int cpu;
> > -	struct mcryptd_alg_cstate *cpu_state;
> > -
> > -	crypto_unregister_ahash(&sha512_mb_async_alg);
> > -	crypto_unregister_ahash(&sha512_mb_areq_alg);
> > -	for_each_possible_cpu(cpu) {
> > -		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
> > -		kfree(cpu_state->mgr);
> > -	}
> > -	free_percpu(sha512_mb_alg_state.alg_cstate);
> > -}
> > -
> > -module_init(sha512_mb_mod_init);
> > -module_exit(sha512_mb_mod_fini);
> > -
> > -MODULE_LICENSE("GPL");
> > -MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated");
> > -
> > -MODULE_ALIAS("sha512");
> > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
> > deleted file mode 100644
> > index e5c465bd821e..000000000000
> > --- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
> > +++ /dev/null
> > @@ -1,128 +0,0 @@
> > -/*
> > - * Header file for multi buffer SHA512 context
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *      Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#ifndef _SHA_MB_CTX_INTERNAL_H
> > -#define _SHA_MB_CTX_INTERNAL_H
> > -
> > -#include "sha512_mb_mgr.h"
> > -
> > -#define HASH_UPDATE          0x00
> > -#define HASH_LAST            0x01
> > -#define HASH_DONE            0x02
> > -#define HASH_FINAL           0x04
> > -
> > -#define HASH_CTX_STS_IDLE       0x00
> > -#define HASH_CTX_STS_PROCESSING 0x01
> > -#define HASH_CTX_STS_LAST       0x02
> > -#define HASH_CTX_STS_COMPLETE   0x04
> > -
> > -enum hash_ctx_error {
> > -	HASH_CTX_ERROR_NONE               =  0,
> > -	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
> > -	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
> > -	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
> > -};
> > -
> > -#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
> > -#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
> > -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
> > -#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
> > -#define hash_ctx_status(ctx)     ((ctx)->status)
> > -#define hash_ctx_error(ctx)      ((ctx)->error)
> > -#define hash_ctx_init(ctx) \
> > -	do { \
> > -		(ctx)->error = HASH_CTX_ERROR_NONE; \
> > -		(ctx)->status = HASH_CTX_STS_COMPLETE; \
> > -	} while (0)
> > -
> > -/* Hash Constants and Typedefs */
> > -#define SHA512_DIGEST_LENGTH          8
> > -#define SHA512_LOG2_BLOCK_SIZE        7
> > -
> > -#define SHA512_PADLENGTHFIELD_SIZE    16
> > -
> > -#ifdef SHA_MB_DEBUG
> > -#define assert(expr) \
> > -do { \
> > -	if (unlikely(!(expr))) { \
> > -		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
> > -		#expr, __FILE__, __func__, __LINE__); \
> > -	} \
> > -} while (0)
> > -#else
> > -#define assert(expr) do {} while (0)
> > -#endif
> > -
> > -struct sha512_ctx_mgr {
> > -	struct sha512_mb_mgr mgr;
> > -};
> > -
> > -/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
> > -
> > -struct sha512_hash_ctx {
> > -	/* Must be at struct offset 0 */
> > -	struct job_sha512       job;
> > -	/* status flag */
> > -	int status;
> > -	/* error flag */
> > -	int error;
> > -
> > -	uint64_t        total_length;
> > -	const void      *incoming_buffer;
> > -	uint32_t        incoming_buffer_length;
> > -	uint8_t         partial_block_buffer[SHA512_BLOCK_SIZE * 2];
> > -	uint32_t        partial_block_buffer_length;
> > -	void            *user_data;
> > -};
> > -
> > -#endif
> > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
> > deleted file mode 100644
> > index 178f17eef382..000000000000
> > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
> > +++ /dev/null
> > @@ -1,104 +0,0 @@
> > -/*
> > - * Header file for multi buffer SHA512 algorithm manager
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *      Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#ifndef __SHA_MB_MGR_H
> > -#define __SHA_MB_MGR_H
> > -
> > -#include <linux/types.h>
> > -
> > -#define NUM_SHA512_DIGEST_WORDS 8
> > -
> > -enum job_sts {STS_UNKNOWN = 0,
> > -	STS_BEING_PROCESSED = 1,
> > -	STS_COMPLETED =       2,
> > -	STS_INTERNAL_ERROR = 3,
> > -	STS_ERROR = 4
> > -};
> > -
> > -struct job_sha512 {
> > -	u8  *buffer;
> > -	u64  len;
> > -	u64  result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
> > -	enum job_sts status;
> > -	void   *user_data;
> > -};
> > -
> > -struct sha512_args_x4 {
> > -	uint64_t        digest[8][4];
> > -	uint8_t         *data_ptr[4];
> > -};
> > -
> > -struct sha512_lane_data {
> > -	struct job_sha512 *job_in_lane;
> > -};
> > -
> > -struct sha512_mb_mgr {
> > -	struct sha512_args_x4 args;
> > -
> > -	uint64_t lens[4];
> > -
> > -	/* each byte is index (0...7) of unused lanes */
> > -	uint64_t unused_lanes;
> > -	/* byte 4 is set to FF as a flag */
> > -	struct sha512_lane_data ldata[4];
> > -};
> > -
> > -#define SHA512_MB_MGR_NUM_LANES_AVX2 4
> > -
> > -void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
> > -struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
> > -						struct job_sha512 *job);
> > -struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
> > -struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
> > -
> > -#endif
> > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
> > deleted file mode 100644
> > index cf2636d4c9ba..000000000000
> > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
> > +++ /dev/null
> > @@ -1,281 +0,0 @@
> > -/*
> > - * Header file for multi buffer SHA256 algorithm data structure
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  This program is free software; you can redistribute it and/or modify
> > - *  it under the terms of version 2 of the GNU General Public License as
> > - *  published by the Free Software Foundation.
> > - *
> > - *  This program is distributed in the hope that it will be useful, but
> > - *  WITHOUT ANY WARRANTY; without even the implied warranty of
> > - *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - *  General Public License for more details.
> > - *
> > - *  Contact Information:
> > - *      Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - *  BSD LICENSE
> > - *
> > - *  Copyright(c) 2016 Intel Corporation.
> > - *
> > - *  Redistribution and use in source and binary forms, with or without
> > - *  modification, are permitted provided that the following conditions
> > - *  are met:
> > - *
> > - *    * Redistributions of source code must retain the above copyright
> > - *      notice, this list of conditions and the following disclaimer.
> > - *    * Redistributions in binary form must reproduce the above copyright
> > - *      notice, this list of conditions and the following disclaimer in
> > - *      the documentation and/or other materials provided with the
> > - *      distribution.
> > - *    * Neither the name of Intel Corporation nor the names of its
> > - *      contributors may be used to endorse or promote products derived
> > - *      from this software without specific prior written permission.
> > - *
> > - *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -# Macros for defining data structures
> > -
> > -# Usage example
> > -
> > -#START_FIELDS   # JOB_AES
> > -###     name            size    align
> > -#FIELD  _plaintext,     8,      8       # pointer to plaintext
> > -#FIELD  _ciphertext,    8,      8       # pointer to ciphertext
> > -#FIELD  _IV,            16,     8       # IV
> > -#FIELD  _keys,          8,      8       # pointer to keys
> > -#FIELD  _len,           4,      4       # length in bytes
> > -#FIELD  _status,        4,      4       # status enumeration
> > -#FIELD  _user_data,     8,      8       # pointer to user data
> > -#UNION  _union,         size1,  align1, \
> > -#                       size2,  align2, \
> > -#                       size3,  align3, \
> > -#                       ...
> > -#END_FIELDS
> > -#%assign _JOB_AES_size  _FIELD_OFFSET
> > -#%assign _JOB_AES_align _STRUCT_ALIGN
> > -
> > -#########################################################################
> > -
> > -# Alternate "struc-like" syntax:
> > -#       STRUCT job_aes2
> > -#       RES_Q   .plaintext,     1
> > -#       RES_Q   .ciphertext,    1
> > -#       RES_DQ  .IV,            1
> > -#       RES_B   .nested,        _JOB_AES_SIZE, _JOB_AES_ALIGN
> > -#       RES_U   .union,         size1, align1, \
> > -#                               size2, align2, \
> > -#                               ...
> > -#       ENDSTRUCT
> > -#       # Following only needed if nesting
> > -#       %assign job_aes2_size   _FIELD_OFFSET
> > -#       %assign job_aes2_align  _STRUCT_ALIGN
> > -#
> > -# RES_* macros take a name, a count and an optional alignment.
> > -# The count in in terms of the base size of the macro, and the
> > -# default alignment is the base size.
> > -# The macros are:
> > -# Macro    Base size
> > -# RES_B     1
> > -# RES_W     2
> > -# RES_D     4
> > -# RES_Q     8
> > -# RES_DQ   16
> > -# RES_Y    32
> > -# RES_Z    64
> > -#
> > -# RES_U defines a union. It's arguments are a name and two or more
> > -# pairs of "size, alignment"
> > -#
> > -# The two assigns are only needed if this structure is being nested
> > -# within another. Even if the assigns are not done, one can still use
> > -# STRUCT_NAME_size as the size of the structure.
> > -#
> > -# Note that for nesting, you still need to assign to STRUCT_NAME_size.
> > -#
> > -# The differences between this and using "struc" directly are that each
> > -# type is implicitly aligned to its natural length (although this can be
> > -# over-ridden with an explicit third parameter), and that the structure
> > -# is padded at the end to its overall alignment.
> > -#
> > -
> > -#########################################################################
> > -
> > -#ifndef _DATASTRUCT_ASM_
> > -#define _DATASTRUCT_ASM_
> > -
> > -#define PTR_SZ                  8
> > -#define SHA512_DIGEST_WORD_SIZE 8
> > -#define SHA512_MB_MGR_NUM_LANES_AVX2 4
> > -#define NUM_SHA512_DIGEST_WORDS 8
> > -#define SZ4                     4*SHA512_DIGEST_WORD_SIZE
> > -#define ROUNDS                  80*SZ4
> > -#define SHA512_DIGEST_ROW_SIZE  (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
> > -
> > -# START_FIELDS
> > -.macro START_FIELDS
> > - _FIELD_OFFSET = 0
> > - _STRUCT_ALIGN = 0
> > -.endm
> > -
> > -# FIELD name size align
> > -.macro FIELD name size align
> > - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
> > - \name  = _FIELD_OFFSET
> > - _FIELD_OFFSET = _FIELD_OFFSET + (\size)
> > -.if (\align > _STRUCT_ALIGN)
> > - _STRUCT_ALIGN = \align
> > -.endif
> > -.endm
> > -
> > -# END_FIELDS
> > -.macro END_FIELDS
> > - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
> > -.endm
> > -
> > -.macro STRUCT p1
> > -START_FIELDS
> > -.struc \p1
> > -.endm
> > -
> > -.macro ENDSTRUCT
> > - tmp = _FIELD_OFFSET
> > - END_FIELDS
> > - tmp = (_FIELD_OFFSET - ##tmp)
> > -.if (tmp > 0)
> > -        .lcomm  tmp
> > -.endm
> > -
> > -## RES_int name size align
> > -.macro RES_int p1 p2 p3
> > - name = \p1
> > - size = \p2
> > - align = .\p3
> > -
> > - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
> > -.align align
> > -.lcomm name size
> > - _FIELD_OFFSET = _FIELD_OFFSET + (size)
> > -.if (align > _STRUCT_ALIGN)
> > - _STRUCT_ALIGN = align
> > -.endif
> > -.endm
> > -
> > -# macro RES_B name, size [, align]
> > -.macro RES_B _name, _size, _align=1
> > -RES_int _name _size _align
> > -.endm
> > -
> > -# macro RES_W name, size [, align]
> > -.macro RES_W _name, _size, _align=2
> > -RES_int _name 2*(_size) _align
> > -.endm
> > -
> > -# macro RES_D name, size [, align]
> > -.macro RES_D _name, _size, _align=4
> > -RES_int _name 4*(_size) _align
> > -.endm
> > -
> > -# macro RES_Q name, size [, align]
> > -.macro RES_Q _name, _size, _align=8
> > -RES_int _name 8*(_size) _align
> > -.endm
> > -
> > -# macro RES_DQ name, size [, align]
> > -.macro RES_DQ _name, _size, _align=16
> > -RES_int _name 16*(_size) _align
> > -.endm
> > -
> > -# macro RES_Y name, size [, align]
> > -.macro RES_Y _name, _size, _align=32
> > -RES_int _name 32*(_size) _align
> > -.endm
> > -
> > -# macro RES_Z name, size [, align]
> > -.macro RES_Z _name, _size, _align=64
> > -RES_int _name 64*(_size) _align
> > -.endm
> > -
> > -#endif
> > -
> > -###################################################################
> > -### Define SHA512 Out Of Order Data Structures
> > -###################################################################
> > -
> > -START_FIELDS    # LANE_DATA
> > -###     name            size    align
> > -FIELD   _job_in_lane,   8,      8       # pointer to job object
> > -END_FIELDS
> > -
> > - _LANE_DATA_size = _FIELD_OFFSET
> > - _LANE_DATA_align = _STRUCT_ALIGN
> > -
> > -####################################################################
> > -
> > -START_FIELDS    # SHA512_ARGS_X4
> > -###     name            size    align
> > -FIELD   _digest,        8*8*4,  4      # transposed digest
> > -FIELD   _data_ptr,      8*4,    8       # array of pointers to data
> > -END_FIELDS
> > -
> > - _SHA512_ARGS_X4_size  =  _FIELD_OFFSET
> > - _SHA512_ARGS_X4_align =  _STRUCT_ALIGN
> > -
> > -#####################################################################
> > -
> > -START_FIELDS    # MB_MGR
> > -###     name            size    align
> > -FIELD   _args,          _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
> > -FIELD   _lens,          8*4,    8
> > -FIELD   _unused_lanes,  8,      8
> > -FIELD   _ldata,         _LANE_DATA_size*4, _LANE_DATA_align
> > -END_FIELDS
> > -
> > - _MB_MGR_size  =  _FIELD_OFFSET
> > - _MB_MGR_align =  _STRUCT_ALIGN
> > -
> > -_args_digest = _args + _digest
> > -_args_data_ptr = _args + _data_ptr
> > -
> > -#######################################################################
> > -
> > -#######################################################################
> > -#### Define constants
> > -#######################################################################
> > -
> > -#define STS_UNKNOWN             0
> > -#define STS_BEING_PROCESSED     1
> > -#define STS_COMPLETED           2
> > -
> > -#######################################################################
> > -#### Define JOB_SHA512 structure
> > -#######################################################################
> > -
> > -START_FIELDS    # JOB_SHA512
> > -###     name                            size    align
> > -FIELD   _buffer,                        8,      8       # pointer to buffer
> > -FIELD   _len,                           8,      8       # length in bytes
> > -FIELD   _result_digest,                 8*8,    32      # Digest (output)
> > -FIELD   _status,                        4,      4
> > -FIELD   _user_data,                     8,      8
> > -END_FIELDS
> > -
> > - _JOB_SHA512_size = _FIELD_OFFSET
> > - _JOB_SHA512_align = _STRUCT_ALIGN
> > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
> > deleted file mode 100644
> > index 7c629caebc05..000000000000
> > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
> > +++ /dev/null
> > @@ -1,297 +0,0 @@
> > -/*
> > - * Flush routine for SHA512 multibuffer
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * This program is free software; you can redistribute it and/or modify
> > - * it under the terms of version 2 of the GNU General Public License as
> > - * published by the Free Software Foundation.
> > - *
> > - * This program is distributed in the hope that it will be useful, but
> > - * WITHOUT ANY WARRANTY; without even the implied warranty of
> > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - * General Public License for more details.
> > - *
> > - * Contact Information:
> > - *     Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - * BSD LICENSE
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * Redistribution and use in source and binary forms, with or without
> > - * modification, are permitted provided that the following conditions
> > - * are met:
> > - *
> > - *   * Redistributions of source code must retain the above copyright
> > - *     notice, this list of conditions and the following disclaimer.
> > - *   * Redistributions in binary form must reproduce the above copyright
> > - *     notice, this list of conditions and the following disclaimer in
> > - *     the documentation and/or other materials provided with the
> > - *     distribution.
> > - *   * Neither the name of Intel Corporation nor the names of its
> > - *     contributors may be used to endorse or promote products derived
> > - *     from this software without specific prior written permission.
> > - *
> > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#include <linux/linkage.h>
> > -#include <asm/frame.h>
> > -#include "sha512_mb_mgr_datastruct.S"
> > -
> > -.extern sha512_x4_avx2
> > -
> > -# LINUX register definitions
> > -#define arg1    %rdi
> > -#define arg2    %rsi
> > -
> > -# idx needs to be other than arg1, arg2, rbx, r12
> > -#define idx     %rdx
> > -
> > -# Common definitions
> > -#define state   arg1
> > -#define job     arg2
> > -#define len2    arg2
> > -
> > -#define unused_lanes    %rbx
> > -#define lane_data       %rbx
> > -#define tmp2            %rbx
> > -
> > -#define job_rax         %rax
> > -#define tmp1            %rax
> > -#define size_offset     %rax
> > -#define tmp             %rax
> > -#define start_offset    %rax
> > -
> > -#define tmp3            arg1
> > -
> > -#define extra_blocks    arg2
> > -#define p               arg2
> > -
> > -#define tmp4            %r8
> > -#define lens0           %r8
> > -
> > -#define lens1           %r9
> > -#define lens2           %r10
> > -#define lens3           %r11
> > -
> > -.macro LABEL prefix n
> > -\prefix\n\():
> > -.endm
> > -
> > -.macro JNE_SKIP i
> > -jne     skip_\i
> > -.endm
> > -
> > -.altmacro
> > -.macro SET_OFFSET _offset
> > -offset = \_offset
> > -.endm
> > -.noaltmacro
> > -
> > -# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
> > -# arg 1 : rcx : state
> > -ENTRY(sha512_mb_mgr_flush_avx2)
> > -	FRAME_BEGIN
> > -	push	%rbx
> > -
> > -	# If bit (32+3) is set, then all lanes are empty
> > -	mov     _unused_lanes(state), unused_lanes
> > -        bt      $32+7, unused_lanes
> > -        jc      return_null
> > -
> > -        # find a lane with a non-null job
> > -	xor     idx, idx
> > -        offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
> > -        cmpq    $0, offset(state)
> > -        cmovne  one(%rip), idx
> > -        offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
> > -        cmpq    $0, offset(state)
> > -        cmovne  two(%rip), idx
> > -        offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
> > -        cmpq    $0, offset(state)
> > -        cmovne  three(%rip), idx
> > -
> > -        # copy idx to empty lanes
> > -copy_lane_data:
> > -	offset =  (_args + _data_ptr)
> > -        mov     offset(state,idx,8), tmp
> > -
> > -        I = 0
> > -.rep 4
> > -	offset =  (_ldata + I * _LANE_DATA_size + _job_in_lane)
> > -        cmpq    $0, offset(state)
> > -.altmacro
> > -        JNE_SKIP %I
> > -        offset =  (_args + _data_ptr + 8*I)
> > -        mov     tmp, offset(state)
> > -        offset =  (_lens + 8*I +4)
> > -        movl    $0xFFFFFFFF, offset(state)
> > -LABEL skip_ %I
> > -        I = (I+1)
> > -.noaltmacro
> > -.endr
> > -
> > -        # Find min length
> > -        mov     _lens + 0*8(state),lens0
> > -        mov     lens0,idx
> > -        mov     _lens + 1*8(state),lens1
> > -        cmp     idx,lens1
> > -        cmovb   lens1,idx
> > -        mov     _lens + 2*8(state),lens2
> > -        cmp     idx,lens2
> > -        cmovb   lens2,idx
> > -        mov     _lens + 3*8(state),lens3
> > -        cmp     idx,lens3
> > -        cmovb   lens3,idx
> > -        mov     idx,len2
> > -        and     $0xF,idx
> > -        and     $~0xFF,len2
> > -	jz      len_is_0
> > -
> > -        sub     len2, lens0
> > -        sub     len2, lens1
> > -        sub     len2, lens2
> > -        sub     len2, lens3
> > -        shr     $32,len2
> > -        mov     lens0, _lens + 0*8(state)
> > -        mov     lens1, _lens + 1*8(state)
> > -        mov     lens2, _lens + 2*8(state)
> > -        mov     lens3, _lens + 3*8(state)
> > -
> > -        # "state" and "args" are the same address, arg1
> > -        # len is arg2
> > -        call    sha512_x4_avx2
> > -        # state and idx are intact
> > -
> > -len_is_0:
> > -        # process completed job "idx"
> > -	imul    $_LANE_DATA_size, idx, lane_data
> > -        lea     _ldata(state, lane_data), lane_data
> > -
> > -        mov     _job_in_lane(lane_data), job_rax
> > -        movq    $0,  _job_in_lane(lane_data)
> > -        movl    $STS_COMPLETED, _status(job_rax)
> > -        mov     _unused_lanes(state), unused_lanes
> > -        shl     $8, unused_lanes
> > -        or      idx, unused_lanes
> > -        mov     unused_lanes, _unused_lanes(state)
> > -
> > -	movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
> > -
> > -	vmovq _args_digest+0*32(state, idx, 8), %xmm0
> > -        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
> > -	vmovq _args_digest+2*32(state, idx, 8), %xmm1
> > -        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
> > -	vmovq _args_digest+4*32(state, idx, 8), %xmm2
> > -        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
> > -	vmovq _args_digest+6*32(state, idx, 8), %xmm3
> > -	vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
> > -
> > -	vmovdqu %xmm0, _result_digest(job_rax)
> > -	vmovdqu %xmm1, _result_digest+1*16(job_rax)
> > -	vmovdqu %xmm2, _result_digest+2*16(job_rax)
> > -	vmovdqu %xmm3, _result_digest+3*16(job_rax)
> > -
> > -return:
> > -	pop	%rbx
> > -	FRAME_END
> > -        ret
> > -
> > -return_null:
> > -        xor     job_rax, job_rax
> > -        jmp     return
> > -ENDPROC(sha512_mb_mgr_flush_avx2)
> > -.align 16
> > -
> > -ENTRY(sha512_mb_mgr_get_comp_job_avx2)
> > -        push    %rbx
> > -
> > -	mov     _unused_lanes(state), unused_lanes
> > -        bt      $(32+7), unused_lanes
> > -        jc      .return_null
> > -
> > -        # Find min length
> > -        mov     _lens(state),lens0
> > -        mov     lens0,idx
> > -        mov     _lens+1*8(state),lens1
> > -        cmp     idx,lens1
> > -        cmovb   lens1,idx
> > -        mov     _lens+2*8(state),lens2
> > -        cmp     idx,lens2
> > -        cmovb   lens2,idx
> > -        mov     _lens+3*8(state),lens3
> > -        cmp     idx,lens3
> > -        cmovb   lens3,idx
> > -        test    $~0xF,idx
> > -        jnz     .return_null
> > -        and     $0xF,idx
> > -
> > -        #process completed job "idx"
> > -	imul    $_LANE_DATA_size, idx, lane_data
> > -        lea     _ldata(state, lane_data), lane_data
> > -
> > -        mov     _job_in_lane(lane_data), job_rax
> > -        movq    $0,  _job_in_lane(lane_data)
> > -        movl    $STS_COMPLETED, _status(job_rax)
> > -        mov     _unused_lanes(state), unused_lanes
> > -        shl     $8, unused_lanes
> > -        or      idx, unused_lanes
> > -        mov     unused_lanes, _unused_lanes(state)
> > -
> > -        movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
> > -
> > -	vmovq   _args_digest(state, idx, 8), %xmm0
> > -        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
> > -	vmovq    _args_digest+2*32(state, idx, 8), %xmm1
> > -        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
> > -	vmovq    _args_digest+4*32(state, idx, 8), %xmm2
> > -        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
> > -        vmovq    _args_digest+6*32(state, idx, 8), %xmm3
> > -        vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
> > -
> > -	vmovdqu %xmm0, _result_digest+0*16(job_rax)
> > -	vmovdqu %xmm1, _result_digest+1*16(job_rax)
> > -	vmovdqu %xmm2, _result_digest+2*16(job_rax)
> > -	vmovdqu %xmm3, _result_digest+3*16(job_rax)
> > -
> > -	pop     %rbx
> > -
> > -        ret
> > -
> > -.return_null:
> > -        xor     job_rax, job_rax
> > -	pop     %rbx
> > -        ret
> > -ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
> > -
> > -.section	.rodata.cst8.one, "aM", @progbits, 8
> > -.align 8
> > -one:
> > -.quad  1
> > -
> > -.section	.rodata.cst8.two, "aM", @progbits, 8
> > -.align 8
> > -two:
> > -.quad  2
> > -
> > -.section	.rodata.cst8.three, "aM", @progbits, 8
> > -.align 8
> > -three:
> > -.quad  3
> > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
> > deleted file mode 100644
> > index d08805032f01..000000000000
> > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
> > +++ /dev/null
> > @@ -1,69 +0,0 @@
> > -/*
> > - * Initialization code for multi buffer SHA256 algorithm for AVX2
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * This program is free software; you can redistribute it and/or modify
> > - * it under the terms of version 2 of the GNU General Public License as
> > - * published by the Free Software Foundation.
> > - *
> > - * This program is distributed in the hope that it will be useful, but
> > - * WITHOUT ANY WARRANTY; without even the implied warranty of
> > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - * General Public License for more details.
> > - *
> > - * Contact Information:
> > - *     Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - * BSD LICENSE
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * Redistribution and use in source and binary forms, with or without
> > - * modification, are permitted provided that the following conditions
> > - * are met:
> > - *
> > - *   * Redistributions of source code must retain the above copyright
> > - *     notice, this list of conditions and the following disclaimer.
> > - *   * Redistributions in binary form must reproduce the above copyright
> > - *     notice, this list of conditions and the following disclaimer in
> > - *     the documentation and/or other materials provided with the
> > - *     distribution.
> > - *   * Neither the name of Intel Corporation nor the names of its
> > - *     contributors may be used to endorse or promote products derived
> > - *     from this software without specific prior written permission.
> > - *
> > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#include "sha512_mb_mgr.h"
> > -
> > -void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
> > -{
> > -	unsigned int j;
> > -
> > -	/* initially all lanes are unused */
> > -	state->lens[0] = 0xFFFFFFFF00000000;
> > -	state->lens[1] = 0xFFFFFFFF00000001;
> > -	state->lens[2] = 0xFFFFFFFF00000002;
> > -	state->lens[3] = 0xFFFFFFFF00000003;
> > -
> > -	state->unused_lanes = 0xFF03020100;
> > -	for (j = 0; j < 4; j++)
> > -		state->ldata[j].job_in_lane = NULL;
> > -}
> > diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
> > deleted file mode 100644
> > index 4ba709ba78e5..000000000000
> > --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
> > +++ /dev/null
> > @@ -1,224 +0,0 @@
> > -/*
> > - * Buffer submit code for multi buffer SHA512 algorithm
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * This program is free software; you can redistribute it and/or modify
> > - * it under the terms of version 2 of the GNU General Public License as
> > - * published by the Free Software Foundation.
> > - *
> > - * This program is distributed in the hope that it will be useful, but
> > - * WITHOUT ANY WARRANTY; without even the implied warranty of
> > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - * General Public License for more details.
> > - *
> > - * Contact Information:
> > - *     Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - * BSD LICENSE
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * Redistribution and use in source and binary forms, with or without
> > - * modification, are permitted provided that the following conditions
> > - * are met:
> > - *
> > - *   * Redistributions of source code must retain the above copyright
> > - *     notice, this list of conditions and the following disclaimer.
> > - *   * Redistributions in binary form must reproduce the above copyright
> > - *     notice, this list of conditions and the following disclaimer in
> > - *     the documentation and/or other materials provided with the
> > - *     distribution.
> > - *   * Neither the name of Intel Corporation nor the names of its
> > - *     contributors may be used to endorse or promote products derived
> > - *     from this software without specific prior written permission.
> > - *
> > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -#include <linux/linkage.h>
> > -#include <asm/frame.h>
> > -#include "sha512_mb_mgr_datastruct.S"
> > -
> > -.extern sha512_x4_avx2
> > -
> > -#define arg1    %rdi
> > -#define arg2    %rsi
> > -
> > -#define idx             %rdx
> > -#define last_len        %rdx
> > -
> > -#define size_offset     %rcx
> > -#define tmp2            %rcx
> > -
> > -# Common definitions
> > -#define state   arg1
> > -#define job     arg2
> > -#define len2    arg2
> > -#define p2      arg2
> > -
> > -#define p               %r11
> > -#define start_offset    %r11
> > -
> > -#define unused_lanes    %rbx
> > -
> > -#define job_rax         %rax
> > -#define len             %rax
> > -
> > -#define lane            %r12
> > -#define tmp3            %r12
> > -#define lens3           %r12
> > -
> > -#define extra_blocks    %r8
> > -#define lens0           %r8
> > -
> > -#define tmp             %r9
> > -#define lens1           %r9
> > -
> > -#define lane_data       %r10
> > -#define lens2           %r10
> > -
> > -#define DWORD_len %eax
> > -
> > -# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
> > -# arg 1 : rcx : state
> > -# arg 2 : rdx : job
> > -ENTRY(sha512_mb_mgr_submit_avx2)
> > -	FRAME_BEGIN
> > -	push	%rbx
> > -	push	%r12
> > -
> > -        mov     _unused_lanes(state), unused_lanes
> > -        movzb     %bl,lane
> > -        shr     $8, unused_lanes
> > -        imul    $_LANE_DATA_size, lane,lane_data
> > -        movl    $STS_BEING_PROCESSED, _status(job)
> > -	lea     _ldata(state, lane_data), lane_data
> > -        mov     unused_lanes, _unused_lanes(state)
> > -        movl    _len(job),  DWORD_len
> > -
> > -	mov     job, _job_in_lane(lane_data)
> > -        movl    DWORD_len,_lens+4(state , lane, 8)
> > -
> > -	# Load digest words from result_digest
> > -	vmovdqu	_result_digest+0*16(job), %xmm0
> > -	vmovdqu _result_digest+1*16(job), %xmm1
> > -	vmovdqu	_result_digest+2*16(job), %xmm2
> > -        vmovdqu	_result_digest+3*16(job), %xmm3
> > -
> > -	vmovq    %xmm0, _args_digest(state, lane, 8)
> > -	vpextrq  $1, %xmm0, _args_digest+1*32(state , lane, 8)
> > -	vmovq    %xmm1, _args_digest+2*32(state , lane, 8)
> > -	vpextrq  $1, %xmm1, _args_digest+3*32(state , lane, 8)
> > -	vmovq    %xmm2, _args_digest+4*32(state , lane, 8)
> > -	vpextrq  $1, %xmm2, _args_digest+5*32(state , lane, 8)
> > -	vmovq    %xmm3, _args_digest+6*32(state , lane, 8)
> > -	vpextrq  $1, %xmm3, _args_digest+7*32(state , lane, 8)
> > -
> > -	mov     _buffer(job), p
> > -	mov     p, _args_data_ptr(state, lane, 8)
> > -
> > -	cmp     $0xFF, unused_lanes
> > -	jne     return_null
> > -
> > -start_loop:
> > -
> > -	# Find min length
> > -	mov     _lens+0*8(state),lens0
> > -	mov     lens0,idx
> > -	mov     _lens+1*8(state),lens1
> > -	cmp     idx,lens1
> > -	cmovb   lens1, idx
> > -	mov     _lens+2*8(state),lens2
> > -	cmp     idx,lens2
> > -	cmovb   lens2,idx
> > -	mov     _lens+3*8(state),lens3
> > -	cmp     idx,lens3
> > -	cmovb   lens3,idx
> > -	mov     idx,len2
> > -	and     $0xF,idx
> > -	and     $~0xFF,len2
> > -	jz      len_is_0
> > -
> > -	sub     len2,lens0
> > -	sub     len2,lens1
> > -	sub     len2,lens2
> > -	sub     len2,lens3
> > -	shr     $32,len2
> > -	mov     lens0, _lens + 0*8(state)
> > -	mov     lens1, _lens + 1*8(state)
> > -	mov     lens2, _lens + 2*8(state)
> > -	mov     lens3, _lens + 3*8(state)
> > -
> > -	# "state" and "args" are the same address, arg1
> > -	# len is arg2
> > -	call    sha512_x4_avx2
> > -	# state and idx are intact
> > -
> > -len_is_0:
> > -
> > -	# process completed job "idx"
> > -	imul    $_LANE_DATA_size, idx, lane_data
> > -	lea     _ldata(state, lane_data), lane_data
> > -
> > -	mov     _job_in_lane(lane_data), job_rax
> > -	mov     _unused_lanes(state), unused_lanes
> > -	movq    $0, _job_in_lane(lane_data)
> > -	movl    $STS_COMPLETED, _status(job_rax)
> > -	shl     $8, unused_lanes
> > -	or      idx, unused_lanes
> > -	mov     unused_lanes, _unused_lanes(state)
> > -
> > -	movl	$0xFFFFFFFF,_lens+4(state,idx,8)
> > -	vmovq    _args_digest+0*32(state , idx, 8), %xmm0
> > -	vpinsrq  $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
> > -	vmovq    _args_digest+2*32(state , idx, 8), %xmm1
> > -	vpinsrq  $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
> > -	vmovq    _args_digest+4*32(state , idx, 8), %xmm2
> > -	vpinsrq  $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
> > -	vmovq    _args_digest+6*32(state , idx, 8), %xmm3
> > -	vpinsrq  $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
> > -
> > -	vmovdqu  %xmm0, _result_digest + 0*16(job_rax)
> > -	vmovdqu  %xmm1, _result_digest + 1*16(job_rax)
> > -	vmovdqu  %xmm2, _result_digest + 2*16(job_rax)
> > -	vmovdqu  %xmm3, _result_digest + 3*16(job_rax)
> > -
> > -return:
> > -	pop	%r12
> > -	pop	%rbx
> > -	FRAME_END
> > -	ret
> > -
> > -return_null:
> > -	xor     job_rax, job_rax
> > -	jmp     return
> > -ENDPROC(sha512_mb_mgr_submit_avx2)
> > -
> > -/* UNUSED?
> > -.section	.rodata.cst16, "aM", @progbits, 16
> > -.align 16
> > -H0:     .int  0x6a09e667
> > -H1:     .int  0xbb67ae85
> > -H2:     .int  0x3c6ef372
> > -H3:     .int  0xa54ff53a
> > -H4:     .int  0x510e527f
> > -H5:     .int  0x9b05688c
> > -H6:     .int  0x1f83d9ab
> > -H7:     .int  0x5be0cd19
> > -*/
> > diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
> > deleted file mode 100644
> > index e22e907643a6..000000000000
> > --- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
> > +++ /dev/null
> > @@ -1,531 +0,0 @@
> > -/*
> > - * Multi-buffer SHA512 algorithm hash compute routine
> > - *
> > - * This file is provided under a dual BSD/GPLv2 license.  When using or
> > - * redistributing this file, you may do so under either license.
> > - *
> > - * GPL LICENSE SUMMARY
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * This program is free software; you can redistribute it and/or modify
> > - * it under the terms of version 2 of the GNU General Public License as
> > - * published by the Free Software Foundation.
> > - *
> > - * This program is distributed in the hope that it will be useful, but
> > - * WITHOUT ANY WARRANTY; without even the implied warranty of
> > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > - * General Public License for more details.
> > - *
> > - * Contact Information:
> > - *     Megha Dey <megha.dey@linux.intel.com>
> > - *
> > - * BSD LICENSE
> > - *
> > - * Copyright(c) 2016 Intel Corporation.
> > - *
> > - * Redistribution and use in source and binary forms, with or without
> > - * modification, are permitted provided that the following conditions
> > - * are met:
> > - *
> > - *   * Redistributions of source code must retain the above copyright
> > - *     notice, this list of conditions and the following disclaimer.
> > - *   * Redistributions in binary form must reproduce the above copyright
> > - *     notice, this list of conditions and the following disclaimer in
> > - *     the documentation and/or other materials provided with the
> > - *     distribution.
> > - *   * Neither the name of Intel Corporation nor the names of its
> > - *     contributors may be used to endorse or promote products derived
> > - *     from this software without specific prior written permission.
> > - *
> > - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> > - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> > - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> > - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> > - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> > - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> > - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> > - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> > - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> > - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> > - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> > - */
> > -
> > -# code to compute quad SHA512 using AVX2
> > -# use YMMs to tackle the larger digest size
> > -# outer calling routine takes care of save and restore of XMM registers
> > -# Logic designed/laid out by JDG
> > -
> > -# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
> > -# Stack must be aligned to 32 bytes before call
> > -# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
> > -# Linux preserves: rcx rdx rdi rbp r13 r14 r15
> > -# clobbers ymm0-15
> > -
> > -#include <linux/linkage.h>
> > -#include "sha512_mb_mgr_datastruct.S"
> > -
> > -arg1 = %rdi
> > -arg2 = %rsi
> > -
> > -# Common definitions
> > -STATE = arg1
> > -INP_SIZE = arg2
> > -
> > -IDX = %rax
> > -ROUND = %rbx
> > -TBL = %r8
> > -
> > -inp0 = %r9
> > -inp1 = %r10
> > -inp2 = %r11
> > -inp3 = %r12
> > -
> > -a = %ymm0
> > -b = %ymm1
> > -c = %ymm2
> > -d = %ymm3
> > -e = %ymm4
> > -f = %ymm5
> > -g = %ymm6
> > -h = %ymm7
> > -
> > -a0 = %ymm8
> > -a1 = %ymm9
> > -a2 = %ymm10
> > -
> > -TT0 = %ymm14
> > -TT1 = %ymm13
> > -TT2 = %ymm12
> > -TT3 = %ymm11
> > -TT4 = %ymm10
> > -TT5 = %ymm9
> > -
> > -T1 = %ymm14
> > -TMP = %ymm15
> > -
> > -# Define stack usage
> > -STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
> > -
> > -#define VMOVPD	vmovupd
> > -_digest = SZ4*16
> > -
> > -# transpose r0, r1, r2, r3, t0, t1
> > -# "transpose" data in {r0..r3} using temps {t0..t3}
> > -# Input looks like: {r0 r1 r2 r3}
> > -# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
> > -# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
> > -# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
> > -# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
> > -#
> > -# output looks like: {t0 r1 r0 r3}
> > -# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
> > -# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
> > -# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
> > -# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
> > -
> > -.macro TRANSPOSE r0 r1 r2 r3 t0 t1
> > -	vshufps  $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
> > -        vshufps  $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
> > -        vshufps  $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
> > -        vshufps  $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
> > -
> > -	vperm2f128      $0x20, \r2, \r0, \r1  # h6...a6
> > -        vperm2f128      $0x31, \r2, \r0, \r3  # h2...a2
> > -        vperm2f128      $0x31, \t1, \t0, \r0  # h5...a5
> > -        vperm2f128      $0x20, \t1, \t0, \t0  # h1...a1
> > -.endm
> > -
> > -.macro ROTATE_ARGS
> > -TMP_ = h
> > -h = g
> > -g = f
> > -f = e
> > -e = d
> > -d = c
> > -c = b
> > -b = a
> > -a = TMP_
> > -.endm
> > -
> > -# PRORQ reg, imm, tmp
> > -# packed-rotate-right-double
> > -# does a rotate by doing two shifts and an or
> > -.macro _PRORQ reg imm tmp
> > -	vpsllq	$(64-\imm),\reg,\tmp
> > -	vpsrlq	$\imm,\reg, \reg
> > -	vpor	\tmp,\reg, \reg
> > -.endm
> > -
> > -# non-destructive
> > -# PRORQ_nd reg, imm, tmp, src
> > -.macro _PRORQ_nd reg imm tmp src
> > -	vpsllq	$(64-\imm), \src, \tmp
> > -	vpsrlq	$\imm, \src, \reg
> > -	vpor	\tmp, \reg, \reg
> > -.endm
> > -
> > -# PRORQ dst/src, amt
> > -.macro PRORQ reg imm
> > -	_PRORQ	\reg, \imm, TMP
> > -.endm
> > -
> > -# PRORQ_nd dst, src, amt
> > -.macro PRORQ_nd reg tmp imm
> > -	_PRORQ_nd	\reg, \imm, TMP, \tmp
> > -.endm
> > -
> > -#; arguments passed implicitly in preprocessor symbols i, a...h
> > -.macro ROUND_00_15 _T1 i
> > -	PRORQ_nd a0, e, (18-14)	# sig1: a0 = (e >> 4)
> > -
> > -	vpxor   g, f, a2        # ch: a2 = f^g
> > -        vpand   e,a2, a2                # ch: a2 = (f^g)&e
> > -        vpxor   g, a2, a2               # a2 = ch
> > -
> > -        PRORQ_nd        a1,e,41         # sig1: a1 = (e >> 25)
> > -
> > -        offset = SZ4*(\i & 0xf)
> > -        vmovdqu \_T1,offset(%rsp)
> > -        vpaddq  (TBL,ROUND,1), \_T1, \_T1       # T1 = W + K
> > -        vpxor   e,a0, a0        # sig1: a0 = e ^ (e >> 5)
> > -        PRORQ   a0, 14           # sig1: a0 = (e >> 6) ^ (e >> 11)
> > -        vpaddq  a2, h, h        # h = h + ch
> > -        PRORQ_nd        a2,a,6  # sig0: a2 = (a >> 11)
> > -        vpaddq  \_T1,h, h       # h = h + ch + W + K
> > -        vpxor   a1, a0, a0      # a0 = sigma1
> > -	vmovdqu a,\_T1
> > -        PRORQ_nd        a1,a,39 # sig0: a1 = (a >> 22)
> > -        vpxor   c, \_T1, \_T1      # maj: T1 = a^c
> > -        add     $SZ4, ROUND     # ROUND++
> > -        vpand   b, \_T1, \_T1   # maj: T1 = (a^c)&b
> > -        vpaddq  a0, h, h
> > -        vpaddq  h, d, d
> > -        vpxor   a, a2, a2       # sig0: a2 = a ^ (a >> 11)
> > -        PRORQ   a2,28            # sig0: a2 = (a >> 2) ^ (a >> 13)
> > -        vpxor   a1, a2, a2      # a2 = sig0
> > -        vpand   c, a, a1        # maj: a1 = a&c
> > -        vpor    \_T1, a1, a1    # a1 = maj
> > -        vpaddq  a1, h, h        # h = h + ch + W + K + maj
> > -        vpaddq  a2, h, h        # h = h + ch + W + K + maj + sigma0
> > -        ROTATE_ARGS
> > -.endm
> > -
> > -
> > -#; arguments passed implicitly in preprocessor symbols i, a...h
> > -.macro ROUND_16_XX _T1 i
> > -	vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
> > -        vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
> > -        vmovdqu \_T1, a0
> > -        PRORQ   \_T1,7
> > -        vmovdqu a1, a2
> > -        PRORQ   a1,42
> > -        vpxor   a0, \_T1, \_T1
> > -        PRORQ   \_T1, 1
> > -        vpxor   a2, a1, a1
> > -        PRORQ   a1, 19
> > -        vpsrlq  $7, a0, a0
> > -        vpxor   a0, \_T1, \_T1
> > -        vpsrlq  $6, a2, a2
> > -        vpxor   a2, a1, a1
> > -        vpaddq  SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
> > -        vpaddq  SZ4*((\i-7)&0xf)(%rsp), a1, a1
> > -        vpaddq  a1, \_T1, \_T1
> > -
> > -        ROUND_00_15 \_T1,\i
> > -.endm
> > -
> > -
> > -# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
> > -# arg 1 : STATE    : pointer to input data
> > -# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
> > -ENTRY(sha512_x4_avx2)
> > -	# general registers preserved in outer calling routine
> > -	# outer calling routine saves all the XMM registers
> > -	# save callee-saved clobbered registers to comply with C function ABI
> > -	push    %r12
> > -	push    %r13
> > -	push    %r14
> > -	push    %r15
> > -
> > -	sub     $STACK_SPACE1, %rsp
> > -
> > -        # Load the pre-transposed incoming digest.
> > -        vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
> > -        vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
> > -        vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
> > -        vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
> > -        vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
> > -        vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
> > -        vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
> > -        vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
> > -
> > -        lea     K512_4(%rip),TBL
> > -
> > -        # load the address of each of the 4 message lanes
> > -        # getting ready to transpose input onto stack
> > -        mov     _data_ptr+0*PTR_SZ(STATE),inp0
> > -        mov     _data_ptr+1*PTR_SZ(STATE),inp1
> > -        mov     _data_ptr+2*PTR_SZ(STATE),inp2
> > -        mov     _data_ptr+3*PTR_SZ(STATE),inp3
> > -
> > -        xor     IDX, IDX
> > -lloop:
> > -        xor     ROUND, ROUND
> > -
> > -	# save old digest
> > -        vmovdqu a, _digest(%rsp)
> > -        vmovdqu b, _digest+1*SZ4(%rsp)
> > -        vmovdqu c, _digest+2*SZ4(%rsp)
> > -        vmovdqu d, _digest+3*SZ4(%rsp)
> > -        vmovdqu e, _digest+4*SZ4(%rsp)
> > -        vmovdqu f, _digest+5*SZ4(%rsp)
> > -        vmovdqu g, _digest+6*SZ4(%rsp)
> > -        vmovdqu h, _digest+7*SZ4(%rsp)
> > -        i = 0
> > -.rep 4
> > -	vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
> > -        VMOVPD  i*32(inp0, IDX), TT2
> > -        VMOVPD  i*32(inp1, IDX), TT1
> > -        VMOVPD  i*32(inp2, IDX), TT4
> > -        VMOVPD  i*32(inp3, IDX), TT3
> > -	TRANSPOSE	TT2, TT1, TT4, TT3, TT0, TT5
> > -	vpshufb	TMP, TT0, TT0
> > -	vpshufb	TMP, TT1, TT1
> > -	vpshufb	TMP, TT2, TT2
> > -	vpshufb	TMP, TT3, TT3
> > -	ROUND_00_15	TT0,(i*4+0)
> > -	ROUND_00_15	TT1,(i*4+1)
> > -	ROUND_00_15	TT2,(i*4+2)
> > -	ROUND_00_15	TT3,(i*4+3)
> > -	i = (i+1)
> > -.endr
> > -        add     $128, IDX
> > -
> > -        i = (i*4)
> > -
> > -        jmp     Lrounds_16_xx
> > -.align 16
> > -Lrounds_16_xx:
> > -.rep 16
> > -        ROUND_16_XX     T1, i
> > -        i = (i+1)
> > -.endr
> > -        cmp     $0xa00,ROUND
> > -        jb      Lrounds_16_xx
> > -
> > -	# add old digest
> > -        vpaddq  _digest(%rsp), a, a
> > -        vpaddq  _digest+1*SZ4(%rsp), b, b
> > -        vpaddq  _digest+2*SZ4(%rsp), c, c
> > -        vpaddq  _digest+3*SZ4(%rsp), d, d
> > -        vpaddq  _digest+4*SZ4(%rsp), e, e
> > -        vpaddq  _digest+5*SZ4(%rsp), f, f
> > -        vpaddq  _digest+6*SZ4(%rsp), g, g
> > -        vpaddq  _digest+7*SZ4(%rsp), h, h
> > -
> > -        sub     $1, INP_SIZE  # unit is blocks
> > -        jne     lloop
> > -
> > -        # write back to memory (state object) the transposed digest
> > -        vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
> > -        vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
> > -        vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
> > -        vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
> > -        vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
> > -        vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
> > -        vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
> > -        vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
> > -
> > -	# update input data pointers
> > -	add     IDX, inp0
> > -        mov     inp0, _data_ptr+0*PTR_SZ(STATE)
> > -        add     IDX, inp1
> > -        mov     inp1, _data_ptr+1*PTR_SZ(STATE)
> > -        add     IDX, inp2
> > -        mov     inp2, _data_ptr+2*PTR_SZ(STATE)
> > -        add     IDX, inp3
> > -        mov     inp3, _data_ptr+3*PTR_SZ(STATE)
> > -
> > -	#;;;;;;;;;;;;;;;
> > -	#; Postamble
> > -	add $STACK_SPACE1, %rsp
> > -	# restore callee-saved clobbered registers
> > -
> > -	pop     %r15
> > -	pop     %r14
> > -	pop     %r13
> > -	pop     %r12
> > -
> > -	# outer calling routine restores XMM and other GP registers
> > -	ret
> > -ENDPROC(sha512_x4_avx2)
> > -
> > -.section	.rodata.K512_4, "a", @progbits
> > -.align 64
> > -K512_4:
> > -	.octa 0x428a2f98d728ae22428a2f98d728ae22,\
> > -		0x428a2f98d728ae22428a2f98d728ae22
> > -	.octa 0x7137449123ef65cd7137449123ef65cd,\
> > -		0x7137449123ef65cd7137449123ef65cd
> > -	.octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
> > -		0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
> > -	.octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
> > -		0xe9b5dba58189dbbce9b5dba58189dbbc
> > -	.octa 0x3956c25bf348b5383956c25bf348b538,\
> > -		0x3956c25bf348b5383956c25bf348b538
> > -	.octa 0x59f111f1b605d01959f111f1b605d019,\
> > -		0x59f111f1b605d01959f111f1b605d019
> > -	.octa 0x923f82a4af194f9b923f82a4af194f9b,\
> > -		0x923f82a4af194f9b923f82a4af194f9b
> > -	.octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
> > -		0xab1c5ed5da6d8118ab1c5ed5da6d8118
> > -	.octa 0xd807aa98a3030242d807aa98a3030242,\
> > -		0xd807aa98a3030242d807aa98a3030242
> > -	.octa 0x12835b0145706fbe12835b0145706fbe,\
> > -		0x12835b0145706fbe12835b0145706fbe
> > -	.octa 0x243185be4ee4b28c243185be4ee4b28c,\
> > -		0x243185be4ee4b28c243185be4ee4b28c
> > -	.octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
> > -		0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
> > -	.octa 0x72be5d74f27b896f72be5d74f27b896f,\
> > -		0x72be5d74f27b896f72be5d74f27b896f
> > -	.octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
> > -		0x80deb1fe3b1696b180deb1fe3b1696b1
> > -	.octa 0x9bdc06a725c712359bdc06a725c71235,\
> > -		0x9bdc06a725c712359bdc06a725c71235
> > -	.octa 0xc19bf174cf692694c19bf174cf692694,\
> > -		0xc19bf174cf692694c19bf174cf692694
> > -	.octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
> > -		0xe49b69c19ef14ad2e49b69c19ef14ad2
> > -	.octa 0xefbe4786384f25e3efbe4786384f25e3,\
> > -		0xefbe4786384f25e3efbe4786384f25e3
> > -	.octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
> > -		0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
> > -	.octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
> > -		0x240ca1cc77ac9c65240ca1cc77ac9c65
> > -	.octa 0x2de92c6f592b02752de92c6f592b0275,\
> > -		0x2de92c6f592b02752de92c6f592b0275
> > -	.octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
> > -		0x4a7484aa6ea6e4834a7484aa6ea6e483
> > -	.octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
> > -		0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
> > -	.octa 0x76f988da831153b576f988da831153b5,\
> > -		0x76f988da831153b576f988da831153b5
> > -	.octa 0x983e5152ee66dfab983e5152ee66dfab,\
> > -		0x983e5152ee66dfab983e5152ee66dfab
> > -	.octa 0xa831c66d2db43210a831c66d2db43210,\
> > -		0xa831c66d2db43210a831c66d2db43210
> > -	.octa 0xb00327c898fb213fb00327c898fb213f,\
> > -		0xb00327c898fb213fb00327c898fb213f
> > -	.octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
> > -		0xbf597fc7beef0ee4bf597fc7beef0ee4
> > -	.octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
> > -		0xc6e00bf33da88fc2c6e00bf33da88fc2
> > -	.octa 0xd5a79147930aa725d5a79147930aa725,\
> > -		0xd5a79147930aa725d5a79147930aa725
> > -	.octa 0x06ca6351e003826f06ca6351e003826f,\
> > -		0x06ca6351e003826f06ca6351e003826f
> > -	.octa 0x142929670a0e6e70142929670a0e6e70,\
> > -		0x142929670a0e6e70142929670a0e6e70
> > -	.octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
> > -		0x27b70a8546d22ffc27b70a8546d22ffc
> > -	.octa 0x2e1b21385c26c9262e1b21385c26c926,\
> > -		0x2e1b21385c26c9262e1b21385c26c926
> > -	.octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
> > -		0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
> > -	.octa 0x53380d139d95b3df53380d139d95b3df,\
> > -		0x53380d139d95b3df53380d139d95b3df
> > -	.octa 0x650a73548baf63de650a73548baf63de,\
> > -		0x650a73548baf63de650a73548baf63de
> > -	.octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
> > -		0x766a0abb3c77b2a8766a0abb3c77b2a8
> > -	.octa 0x81c2c92e47edaee681c2c92e47edaee6,\
> > -		0x81c2c92e47edaee681c2c92e47edaee6
> > -	.octa 0x92722c851482353b92722c851482353b,\
> > -		0x92722c851482353b92722c851482353b
> > -	.octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
> > -		0xa2bfe8a14cf10364a2bfe8a14cf10364
> > -	.octa 0xa81a664bbc423001a81a664bbc423001,\
> > -		0xa81a664bbc423001a81a664bbc423001
> > -	.octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
> > -		0xc24b8b70d0f89791c24b8b70d0f89791
> > -	.octa 0xc76c51a30654be30c76c51a30654be30,\
> > -		0xc76c51a30654be30c76c51a30654be30
> > -	.octa 0xd192e819d6ef5218d192e819d6ef5218,\
> > -		0xd192e819d6ef5218d192e819d6ef5218
> > -	.octa 0xd69906245565a910d69906245565a910,\
> > -		0xd69906245565a910d69906245565a910
> > -	.octa 0xf40e35855771202af40e35855771202a,\
> > -		0xf40e35855771202af40e35855771202a
> > -	.octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
> > -		0x106aa07032bbd1b8106aa07032bbd1b8
> > -	.octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
> > -		0x19a4c116b8d2d0c819a4c116b8d2d0c8
> > -	.octa 0x1e376c085141ab531e376c085141ab53,\
> > -		0x1e376c085141ab531e376c085141ab53
> > -	.octa 0x2748774cdf8eeb992748774cdf8eeb99,\
> > -		0x2748774cdf8eeb992748774cdf8eeb99
> > -	.octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
> > -		0x34b0bcb5e19b48a834b0bcb5e19b48a8
> > -	.octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
> > -		0x391c0cb3c5c95a63391c0cb3c5c95a63
> > -	.octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
> > -		0x4ed8aa4ae3418acb4ed8aa4ae3418acb
> > -	.octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
> > -		0x5b9cca4f7763e3735b9cca4f7763e373
> > -	.octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
> > -		0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
> > -	.octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
> > -		0x748f82ee5defb2fc748f82ee5defb2fc
> > -	.octa 0x78a5636f43172f6078a5636f43172f60,\
> > -		0x78a5636f43172f6078a5636f43172f60
> > -	.octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
> > -		0x84c87814a1f0ab7284c87814a1f0ab72
> > -	.octa 0x8cc702081a6439ec8cc702081a6439ec,\
> > -		0x8cc702081a6439ec8cc702081a6439ec
> > -	.octa 0x90befffa23631e2890befffa23631e28,\
> > -		0x90befffa23631e2890befffa23631e28
> > -	.octa 0xa4506cebde82bde9a4506cebde82bde9,\
> > -		0xa4506cebde82bde9a4506cebde82bde9
> > -	.octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
> > -		0xbef9a3f7b2c67915bef9a3f7b2c67915
> > -	.octa 0xc67178f2e372532bc67178f2e372532b,\
> > -		0xc67178f2e372532bc67178f2e372532b
> > -	.octa 0xca273eceea26619cca273eceea26619c,\
> > -		0xca273eceea26619cca273eceea26619c
> > -	.octa 0xd186b8c721c0c207d186b8c721c0c207,\
> > -		0xd186b8c721c0c207d186b8c721c0c207
> > -	.octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
> > -		0xeada7dd6cde0eb1eeada7dd6cde0eb1e
> > -	.octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
> > -		0xf57d4f7fee6ed178f57d4f7fee6ed178
> > -	.octa 0x06f067aa72176fba06f067aa72176fba,\
> > -		0x06f067aa72176fba06f067aa72176fba
> > -	.octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
> > -		0x0a637dc5a2c898a60a637dc5a2c898a6
> > -	.octa 0x113f9804bef90dae113f9804bef90dae,\
> > -		0x113f9804bef90dae113f9804bef90dae
> > -	.octa 0x1b710b35131c471b1b710b35131c471b,\
> > -		0x1b710b35131c471b1b710b35131c471b
> > -	.octa 0x28db77f523047d8428db77f523047d84,\
> > -		0x28db77f523047d8428db77f523047d84
> > -	.octa 0x32caab7b40c7249332caab7b40c72493,\
> > -		0x32caab7b40c7249332caab7b40c72493
> > -	.octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
> > -		0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
> > -	.octa 0x431d67c49c100d4c431d67c49c100d4c,\
> > -		0x431d67c49c100d4c431d67c49c100d4c
> > -	.octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
> > -		0x4cc5d4becb3e42b64cc5d4becb3e42b6
> > -	.octa 0x597f299cfc657e2a597f299cfc657e2a,\
> > -		0x597f299cfc657e2a597f299cfc657e2a
> > -	.octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
> > -		0x5fcb6fab3ad6faec5fcb6fab3ad6faec
> > -	.octa 0x6c44198c4a4758176c44198c4a475817,\
> > -		0x6c44198c4a4758176c44198c4a475817
> > -
> > -.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
> > -.align 32
> > -PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
> > -                         .octa 0x18191a1b1c1d1e1f1011121314151617
> > diff --git a/crypto/Kconfig b/crypto/Kconfig
> > index f3e40ac56d93..4ee600bdefdb 100644
> > --- a/crypto/Kconfig
> > +++ b/crypto/Kconfig
> > @@ -213,20 +213,6 @@ config CRYPTO_CRYPTD
> >  	  converts an arbitrary synchronous software crypto algorithm
> >  	  into an asynchronous algorithm that executes in a kernel thread.
> >  
> > -config CRYPTO_MCRYPTD
> > -	tristate "Software async multi-buffer crypto daemon"
> > -	select CRYPTO_BLKCIPHER
> > -	select CRYPTO_HASH
> > -	select CRYPTO_MANAGER
> > -	select CRYPTO_WORKQUEUE
> > -	help
> > -	  This is a generic software asynchronous crypto daemon that
> > -	  provides the kernel thread to assist multi-buffer crypto
> > -	  algorithms for submitting jobs and flushing jobs in multi-buffer
> > -	  crypto algorithms.  Multi-buffer crypto algorithms are executed
> > -	  in the context of this kernel thread and drivers can post
> > -	  their crypto request asynchronously to be processed by this daemon.
> > -
> >  config CRYPTO_AUTHENC
> >  	tristate "Authenc support"
> >  	select CRYPTO_AEAD
> > @@ -848,54 +834,6 @@ config CRYPTO_SHA1_PPC_SPE
> >  	  SHA-1 secure hash standard (DFIPS 180-4) implemented
> >  	  using powerpc SPE SIMD instruction set.
> >  
> > -config CRYPTO_SHA1_MB
> > -	tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)"
> > -	depends on X86 && 64BIT
> > -	select CRYPTO_SHA1
> > -	select CRYPTO_HASH
> > -	select CRYPTO_MCRYPTD
> > -	help
> > -	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
> > -	  using multi-buffer technique.  This algorithm computes on
> > -	  multiple data lanes concurrently with SIMD instructions for
> > -	  better throughput.  It should not be enabled by default but
> > -	  used when there is significant amount of work to keep the keep
> > -	  the data lanes filled to get performance benefit.  If the data
> > -	  lanes remain unfilled, a flush operation will be initiated to
> > -	  process the crypto jobs, adding a slight latency.
> > -
> > -config CRYPTO_SHA256_MB
> > -	tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)"
> > -	depends on X86 && 64BIT
> > -	select CRYPTO_SHA256
> > -	select CRYPTO_HASH
> > -	select CRYPTO_MCRYPTD
> > -	help
> > -	  SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
> > -	  using multi-buffer technique.  This algorithm computes on
> > -	  multiple data lanes concurrently with SIMD instructions for
> > -	  better throughput.  It should not be enabled by default but
> > -	  used when there is significant amount of work to keep the keep
> > -	  the data lanes filled to get performance benefit.  If the data
> > -	  lanes remain unfilled, a flush operation will be initiated to
> > -	  process the crypto jobs, adding a slight latency.
> > -
> > -config CRYPTO_SHA512_MB
> > -        tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)"
> > -        depends on X86 && 64BIT
> > -        select CRYPTO_SHA512
> > -        select CRYPTO_HASH
> > -        select CRYPTO_MCRYPTD
> > -        help
> > -          SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
> > -          using multi-buffer technique.  This algorithm computes on
> > -          multiple data lanes concurrently with SIMD instructions for
> > -          better throughput.  It should not be enabled by default but
> > -          used when there is significant amount of work to keep the keep
> > -          the data lanes filled to get performance benefit.  If the data
> > -          lanes remain unfilled, a flush operation will be initiated to
> > -          process the crypto jobs, adding a slight latency.
> > -
> >  config CRYPTO_SHA256
> >  	tristate "SHA224 and SHA256 digest algorithm"
> >  	select CRYPTO_HASH
> > diff --git a/crypto/Makefile b/crypto/Makefile
> > index 6d1d40eeb964..80e3da755cbf 100644
> > --- a/crypto/Makefile
> > +++ b/crypto/Makefile
> > @@ -93,7 +93,6 @@ obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
> >  obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
> >  obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
> >  obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
> > -obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
> >  obj-$(CONFIG_CRYPTO_DES) += des_generic.o
> >  obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
> >  obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o
> > diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
> > deleted file mode 100644
> > index f14152147ce8..000000000000
> > --- a/crypto/mcryptd.c
> > +++ /dev/null
> > @@ -1,675 +0,0 @@
> > -/*
> > - * Software multibuffer async crypto daemon.
> > - *
> > - * Copyright (c) 2014 Tim Chen <tim.c.chen@linux.intel.com>
> > - *
> > - * Adapted from crypto daemon.
> > - *
> > - * This program is free software; you can redistribute it and/or modify it
> > - * under the terms of the GNU General Public License as published by the Free
> > - * Software Foundation; either version 2 of the License, or (at your option)
> > - * any later version.
> > - *
> > - */
> > -
> > -#include <crypto/algapi.h>
> > -#include <crypto/internal/hash.h>
> > -#include <crypto/internal/aead.h>
> > -#include <crypto/mcryptd.h>
> > -#include <crypto/crypto_wq.h>
> > -#include <linux/err.h>
> > -#include <linux/init.h>
> > -#include <linux/kernel.h>
> > -#include <linux/list.h>
> > -#include <linux/module.h>
> > -#include <linux/scatterlist.h>
> > -#include <linux/sched.h>
> > -#include <linux/sched/stat.h>
> > -#include <linux/slab.h>
> > -
> > -#define MCRYPTD_MAX_CPU_QLEN 100
> > -#define MCRYPTD_BATCH 9
> > -
> > -static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
> > -				   unsigned int tail);
> > -
> > -struct mcryptd_flush_list {
> > -	struct list_head list;
> > -	struct mutex lock;
> > -};
> > -
> > -static struct mcryptd_flush_list __percpu *mcryptd_flist;
> > -
> > -struct hashd_instance_ctx {
> > -	struct crypto_ahash_spawn spawn;
> > -	struct mcryptd_queue *queue;
> > -};
> > -
> > -static void mcryptd_queue_worker(struct work_struct *work);
> > -
> > -void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay)
> > -{
> > -	struct mcryptd_flush_list *flist;
> > -
> > -	if (!cstate->flusher_engaged) {
> > -		/* put the flusher on the flush list */
> > -		flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
> > -		mutex_lock(&flist->lock);
> > -		list_add_tail(&cstate->flush_list, &flist->list);
> > -		cstate->flusher_engaged = true;
> > -		cstate->next_flush = jiffies + delay;
> > -		queue_delayed_work_on(smp_processor_id(), kcrypto_wq,
> > -			&cstate->flush, delay);
> > -		mutex_unlock(&flist->lock);
> > -	}
> > -}
> > -EXPORT_SYMBOL(mcryptd_arm_flusher);
> > -
> > -static int mcryptd_init_queue(struct mcryptd_queue *queue,
> > -			     unsigned int max_cpu_qlen)
> > -{
> > -	int cpu;
> > -	struct mcryptd_cpu_queue *cpu_queue;
> > -
> > -	queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue);
> > -	pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue);
> > -	if (!queue->cpu_queue)
> > -		return -ENOMEM;
> > -	for_each_possible_cpu(cpu) {
> > -		cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
> > -		pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
> > -		crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
> > -		INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
> > -		spin_lock_init(&cpu_queue->q_lock);
> > -	}
> > -	return 0;
> > -}
> > -
> > -static void mcryptd_fini_queue(struct mcryptd_queue *queue)
> > -{
> > -	int cpu;
> > -	struct mcryptd_cpu_queue *cpu_queue;
> > -
> > -	for_each_possible_cpu(cpu) {
> > -		cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
> > -		BUG_ON(cpu_queue->queue.qlen);
> > -	}
> > -	free_percpu(queue->cpu_queue);
> > -}
> > -
> > -static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
> > -				  struct crypto_async_request *request,
> > -				  struct mcryptd_hash_request_ctx *rctx)
> > -{
> > -	int cpu, err;
> > -	struct mcryptd_cpu_queue *cpu_queue;
> > -
> > -	cpu_queue = raw_cpu_ptr(queue->cpu_queue);
> > -	spin_lock(&cpu_queue->q_lock);
> > -	cpu = smp_processor_id();
> > -	rctx->tag.cpu = smp_processor_id();
> > -
> > -	err = crypto_enqueue_request(&cpu_queue->queue, request);
> > -	pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
> > -		 cpu, cpu_queue, request);
> > -	spin_unlock(&cpu_queue->q_lock);
> > -	queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
> > -
> > -	return err;
> > -}
> > -
> > -/*
> > - * Try to opportunisticlly flush the partially completed jobs if
> > - * crypto daemon is the only task running.
> > - */
> > -static void mcryptd_opportunistic_flush(void)
> > -{
> > -	struct mcryptd_flush_list *flist;
> > -	struct mcryptd_alg_cstate *cstate;
> > -
> > -	flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
> > -	while (single_task_running()) {
> > -		mutex_lock(&flist->lock);
> > -		cstate = list_first_entry_or_null(&flist->list,
> > -				struct mcryptd_alg_cstate, flush_list);
> > -		if (!cstate || !cstate->flusher_engaged) {
> > -			mutex_unlock(&flist->lock);
> > -			return;
> > -		}
> > -		list_del(&cstate->flush_list);
> > -		cstate->flusher_engaged = false;
> > -		mutex_unlock(&flist->lock);
> > -		cstate->alg_state->flusher(cstate);
> > -	}
> > -}
> > -
> > -/*
> > - * Called in workqueue context, do one real cryption work (via
> > - * req->complete) and reschedule itself if there are more work to
> > - * do.
> > - */
> > -static void mcryptd_queue_worker(struct work_struct *work)
> > -{
> > -	struct mcryptd_cpu_queue *cpu_queue;
> > -	struct crypto_async_request *req, *backlog;
> > -	int i;
> > -
> > -	/*
> > -	 * Need to loop through more than once for multi-buffer to
> > -	 * be effective.
> > -	 */
> > -
> > -	cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
> > -	i = 0;
> > -	while (i < MCRYPTD_BATCH || single_task_running()) {
> > -
> > -		spin_lock_bh(&cpu_queue->q_lock);
> > -		backlog = crypto_get_backlog(&cpu_queue->queue);
> > -		req = crypto_dequeue_request(&cpu_queue->queue);
> > -		spin_unlock_bh(&cpu_queue->q_lock);
> > -
> > -		if (!req) {
> > -			mcryptd_opportunistic_flush();
> > -			return;
> > -		}
> > -
> > -		if (backlog)
> > -			backlog->complete(backlog, -EINPROGRESS);
> > -		req->complete(req, 0);
> > -		if (!cpu_queue->queue.qlen)
> > -			return;
> > -		++i;
> > -	}
> > -	if (cpu_queue->queue.qlen)
> > -		queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
> > -}
> > -
> > -void mcryptd_flusher(struct work_struct *__work)
> > -{
> > -	struct	mcryptd_alg_cstate	*alg_cpu_state;
> > -	struct	mcryptd_alg_state	*alg_state;
> > -	struct	mcryptd_flush_list	*flist;
> > -	int	cpu;
> > -
> > -	cpu = smp_processor_id();
> > -	alg_cpu_state = container_of(to_delayed_work(__work),
> > -				     struct mcryptd_alg_cstate, flush);
> > -	alg_state = alg_cpu_state->alg_state;
> > -	if (alg_cpu_state->cpu != cpu)
> > -		pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n",
> > -				cpu, alg_cpu_state->cpu);
> > -
> > -	if (alg_cpu_state->flusher_engaged) {
> > -		flist = per_cpu_ptr(mcryptd_flist, cpu);
> > -		mutex_lock(&flist->lock);
> > -		list_del(&alg_cpu_state->flush_list);
> > -		alg_cpu_state->flusher_engaged = false;
> > -		mutex_unlock(&flist->lock);
> > -		alg_state->flusher(alg_cpu_state);
> > -	}
> > -}
> > -EXPORT_SYMBOL_GPL(mcryptd_flusher);
> > -
> > -static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm)
> > -{
> > -	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
> > -	struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst);
> > -
> > -	return ictx->queue;
> > -}
> > -
> > -static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
> > -				   unsigned int tail)
> > -{
> > -	char *p;
> > -	struct crypto_instance *inst;
> > -	int err;
> > -
> > -	p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL);
> > -	if (!p)
> > -		return ERR_PTR(-ENOMEM);
> > -
> > -	inst = (void *)(p + head);
> > -
> > -	err = -ENAMETOOLONG;
> > -	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
> > -		    "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
> > -		goto out_free_inst;
> > -
> > -	memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
> > -
> > -	inst->alg.cra_priority = alg->cra_priority + 50;
> > -	inst->alg.cra_blocksize = alg->cra_blocksize;
> > -	inst->alg.cra_alignmask = alg->cra_alignmask;
> > -
> > -out:
> > -	return p;
> > -
> > -out_free_inst:
> > -	kfree(p);
> > -	p = ERR_PTR(err);
> > -	goto out;
> > -}
> > -
> > -static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type,
> > -					  u32 *mask)
> > -{
> > -	struct crypto_attr_type *algt;
> > -
> > -	algt = crypto_get_attr_type(tb);
> > -	if (IS_ERR(algt))
> > -		return false;
> > -
> > -	*type |= algt->type & CRYPTO_ALG_INTERNAL;
> > -	*mask |= algt->mask & CRYPTO_ALG_INTERNAL;
> > -
> > -	if (*type & *mask & CRYPTO_ALG_INTERNAL)
> > -		return true;
> > -	else
> > -		return false;
> > -}
> > -
> > -static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
> > -{
> > -	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
> > -	struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst);
> > -	struct crypto_ahash_spawn *spawn = &ictx->spawn;
> > -	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
> > -	struct crypto_ahash *hash;
> > -
> > -	hash = crypto_spawn_ahash(spawn);
> > -	if (IS_ERR(hash))
> > -		return PTR_ERR(hash);
> > -
> > -	ctx->child = hash;
> > -	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> > -				 sizeof(struct mcryptd_hash_request_ctx) +
> > -				 crypto_ahash_reqsize(hash));
> > -	return 0;
> > -}
> > -
> > -static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm)
> > -{
> > -	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
> > -
> > -	crypto_free_ahash(ctx->child);
> > -}
> > -
> > -static int mcryptd_hash_setkey(struct crypto_ahash *parent,
> > -				   const u8 *key, unsigned int keylen)
> > -{
> > -	struct mcryptd_hash_ctx *ctx   = crypto_ahash_ctx(parent);
> > -	struct crypto_ahash *child = ctx->child;
> > -	int err;
> > -
> > -	crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
> > -	crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) &
> > -				      CRYPTO_TFM_REQ_MASK);
> > -	err = crypto_ahash_setkey(child, key, keylen);
> > -	crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) &
> > -				       CRYPTO_TFM_RES_MASK);
> > -	return err;
> > -}
> > -
> > -static int mcryptd_hash_enqueue(struct ahash_request *req,
> > -				crypto_completion_t complete)
> > -{
> > -	int ret;
> > -
> > -	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
> > -	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > -	struct mcryptd_queue *queue =
> > -		mcryptd_get_queue(crypto_ahash_tfm(tfm));
> > -
> > -	rctx->complete = req->base.complete;
> > -	req->base.complete = complete;
> > -
> > -	ret = mcryptd_enqueue_request(queue, &req->base, rctx);
> > -
> > -	return ret;
> > -}
> > -
> > -static void mcryptd_hash_init(struct crypto_async_request *req_async, int err)
> > -{
> > -	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
> > -	struct crypto_ahash *child = ctx->child;
> > -	struct ahash_request *req = ahash_request_cast(req_async);
> > -	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
> > -	struct ahash_request *desc = &rctx->areq;
> > -
> > -	if (unlikely(err == -EINPROGRESS))
> > -		goto out;
> > -
> > -	ahash_request_set_tfm(desc, child);
> > -	ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
> > -						rctx->complete, req_async);
> > -
> > -	rctx->out = req->result;
> > -	err = crypto_ahash_init(desc);
> > -
> > -out:
> > -	local_bh_disable();
> > -	rctx->complete(&req->base, err);
> > -	local_bh_enable();
> > -}
> > -
> > -static int mcryptd_hash_init_enqueue(struct ahash_request *req)
> > -{
> > -	return mcryptd_hash_enqueue(req, mcryptd_hash_init);
> > -}
> > -
> > -static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
> > -{
> > -	struct ahash_request *req = ahash_request_cast(req_async);
> > -	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
> > -
> > -	if (unlikely(err == -EINPROGRESS))
> > -		goto out;
> > -
> > -	rctx->out = req->result;
> > -	err = crypto_ahash_update(&rctx->areq);
> > -	if (err) {
> > -		req->base.complete = rctx->complete;
> > -		goto out;
> > -	}
> > -
> > -	return;
> > -out:
> > -	local_bh_disable();
> > -	rctx->complete(&req->base, err);
> > -	local_bh_enable();
> > -}
> > -
> > -static int mcryptd_hash_update_enqueue(struct ahash_request *req)
> > -{
> > -	return mcryptd_hash_enqueue(req, mcryptd_hash_update);
> > -}
> > -
> > -static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
> > -{
> > -	struct ahash_request *req = ahash_request_cast(req_async);
> > -	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
> > -
> > -	if (unlikely(err == -EINPROGRESS))
> > -		goto out;
> > -
> > -	rctx->out = req->result;
> > -	err = crypto_ahash_final(&rctx->areq);
> > -	if (err) {
> > -		req->base.complete = rctx->complete;
> > -		goto out;
> > -	}
> > -
> > -	return;
> > -out:
> > -	local_bh_disable();
> > -	rctx->complete(&req->base, err);
> > -	local_bh_enable();
> > -}
> > -
> > -static int mcryptd_hash_final_enqueue(struct ahash_request *req)
> > -{
> > -	return mcryptd_hash_enqueue(req, mcryptd_hash_final);
> > -}
> > -
> > -static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
> > -{
> > -	struct ahash_request *req = ahash_request_cast(req_async);
> > -	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
> > -
> > -	if (unlikely(err == -EINPROGRESS))
> > -		goto out;
> > -	rctx->out = req->result;
> > -	err = crypto_ahash_finup(&rctx->areq);
> > -
> > -	if (err) {
> > -		req->base.complete = rctx->complete;
> > -		goto out;
> > -	}
> > -
> > -	return;
> > -out:
> > -	local_bh_disable();
> > -	rctx->complete(&req->base, err);
> > -	local_bh_enable();
> > -}
> > -
> > -static int mcryptd_hash_finup_enqueue(struct ahash_request *req)
> > -{
> > -	return mcryptd_hash_enqueue(req, mcryptd_hash_finup);
> > -}
> > -
> > -static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
> > -{
> > -	struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
> > -	struct crypto_ahash *child = ctx->child;
> > -	struct ahash_request *req = ahash_request_cast(req_async);
> > -	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
> > -	struct ahash_request *desc = &rctx->areq;
> > -
> > -	if (unlikely(err == -EINPROGRESS))
> > -		goto out;
> > -
> > -	ahash_request_set_tfm(desc, child);
> > -	ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
> > -						rctx->complete, req_async);
> > -
> > -	rctx->out = req->result;
> > -	err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc);
> > -
> > -out:
> > -	local_bh_disable();
> > -	rctx->complete(&req->base, err);
> > -	local_bh_enable();
> > -}
> > -
> > -static int mcryptd_hash_digest_enqueue(struct ahash_request *req)
> > -{
> > -	return mcryptd_hash_enqueue(req, mcryptd_hash_digest);
> > -}
> > -
> > -static int mcryptd_hash_export(struct ahash_request *req, void *out)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
> > -
> > -	return crypto_ahash_export(&rctx->areq, out);
> > -}
> > -
> > -static int mcryptd_hash_import(struct ahash_request *req, const void *in)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
> > -
> > -	return crypto_ahash_import(&rctx->areq, in);
> > -}
> > -
> > -static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
> > -			      struct mcryptd_queue *queue)
> > -{
> > -	struct hashd_instance_ctx *ctx;
> > -	struct ahash_instance *inst;
> > -	struct hash_alg_common *halg;
> > -	struct crypto_alg *alg;
> > -	u32 type = 0;
> > -	u32 mask = 0;
> > -	int err;
> > -
> > -	if (!mcryptd_check_internal(tb, &type, &mask))
> > -		return -EINVAL;
> > -
> > -	halg = ahash_attr_alg(tb[1], type, mask);
> > -	if (IS_ERR(halg))
> > -		return PTR_ERR(halg);
> > -
> > -	alg = &halg->base;
> > -	pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name);
> > -	inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(),
> > -					sizeof(*ctx));
> > -	err = PTR_ERR(inst);
> > -	if (IS_ERR(inst))
> > -		goto out_put_alg;
> > -
> > -	ctx = ahash_instance_ctx(inst);
> > -	ctx->queue = queue;
> > -
> > -	err = crypto_init_ahash_spawn(&ctx->spawn, halg,
> > -				      ahash_crypto_instance(inst));
> > -	if (err)
> > -		goto out_free_inst;
> > -
> > -	inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
> > -		(alg->cra_flags & (CRYPTO_ALG_INTERNAL |
> > -				   CRYPTO_ALG_OPTIONAL_KEY));
> > -
> > -	inst->alg.halg.digestsize = halg->digestsize;
> > -	inst->alg.halg.statesize = halg->statesize;
> > -	inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);
> > -
> > -	inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm;
> > -	inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm;
> > -
> > -	inst->alg.init   = mcryptd_hash_init_enqueue;
> > -	inst->alg.update = mcryptd_hash_update_enqueue;
> > -	inst->alg.final  = mcryptd_hash_final_enqueue;
> > -	inst->alg.finup  = mcryptd_hash_finup_enqueue;
> > -	inst->alg.export = mcryptd_hash_export;
> > -	inst->alg.import = mcryptd_hash_import;
> > -	if (crypto_hash_alg_has_setkey(halg))
> > -		inst->alg.setkey = mcryptd_hash_setkey;
> > -	inst->alg.digest = mcryptd_hash_digest_enqueue;
> > -
> > -	err = ahash_register_instance(tmpl, inst);
> > -	if (err) {
> > -		crypto_drop_ahash(&ctx->spawn);
> > -out_free_inst:
> > -		kfree(inst);
> > -	}
> > -
> > -out_put_alg:
> > -	crypto_mod_put(alg);
> > -	return err;
> > -}
> > -
> > -static struct mcryptd_queue mqueue;
> > -
> > -static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
> > -{
> > -	struct crypto_attr_type *algt;
> > -
> > -	algt = crypto_get_attr_type(tb);
> > -	if (IS_ERR(algt))
> > -		return PTR_ERR(algt);
> > -
> > -	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
> > -	case CRYPTO_ALG_TYPE_DIGEST:
> > -		return mcryptd_create_hash(tmpl, tb, &mqueue);
> > -	break;
> > -	}
> > -
> > -	return -EINVAL;
> > -}
> > -
> > -static void mcryptd_free(struct crypto_instance *inst)
> > -{
> > -	struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst);
> > -	struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst);
> > -
> > -	switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
> > -	case CRYPTO_ALG_TYPE_AHASH:
> > -		crypto_drop_ahash(&hctx->spawn);
> > -		kfree(ahash_instance(inst));
> > -		return;
> > -	default:
> > -		crypto_drop_spawn(&ctx->spawn);
> > -		kfree(inst);
> > -	}
> > -}
> > -
> > -static struct crypto_template mcryptd_tmpl = {
> > -	.name = "mcryptd",
> > -	.create = mcryptd_create,
> > -	.free = mcryptd_free,
> > -	.module = THIS_MODULE,
> > -};
> > -
> > -struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
> > -					u32 type, u32 mask)
> > -{
> > -	char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME];
> > -	struct crypto_ahash *tfm;
> > -
> > -	if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME,
> > -		     "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
> > -		return ERR_PTR(-EINVAL);
> > -	tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask);
> > -	if (IS_ERR(tfm))
> > -		return ERR_CAST(tfm);
> > -	if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
> > -		crypto_free_ahash(tfm);
> > -		return ERR_PTR(-EINVAL);
> > -	}
> > -
> > -	return __mcryptd_ahash_cast(tfm);
> > -}
> > -EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
> > -
> > -struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
> > -{
> > -	struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
> > -
> > -	return ctx->child;
> > -}
> > -EXPORT_SYMBOL_GPL(mcryptd_ahash_child);
> > -
> > -struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req)
> > -{
> > -	struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
> > -	return &rctx->areq;
> > -}
> > -EXPORT_SYMBOL_GPL(mcryptd_ahash_desc);
> > -
> > -void mcryptd_free_ahash(struct mcryptd_ahash *tfm)
> > -{
> > -	crypto_free_ahash(&tfm->base);
> > -}
> > -EXPORT_SYMBOL_GPL(mcryptd_free_ahash);
> > -
> > -static int __init mcryptd_init(void)
> > -{
> > -	int err, cpu;
> > -	struct mcryptd_flush_list *flist;
> > -
> > -	mcryptd_flist = alloc_percpu(struct mcryptd_flush_list);
> > -	for_each_possible_cpu(cpu) {
> > -		flist = per_cpu_ptr(mcryptd_flist, cpu);
> > -		INIT_LIST_HEAD(&flist->list);
> > -		mutex_init(&flist->lock);
> > -	}
> > -
> > -	err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN);
> > -	if (err) {
> > -		free_percpu(mcryptd_flist);
> > -		return err;
> > -	}
> > -
> > -	err = crypto_register_template(&mcryptd_tmpl);
> > -	if (err) {
> > -		mcryptd_fini_queue(&mqueue);
> > -		free_percpu(mcryptd_flist);
> > -	}
> > -
> > -	return err;
> > -}
> > -
> > -static void __exit mcryptd_exit(void)
> > -{
> > -	mcryptd_fini_queue(&mqueue);
> > -	crypto_unregister_template(&mcryptd_tmpl);
> > -	free_percpu(mcryptd_flist);
> > -}
> > -
> > -subsys_initcall(mcryptd_init);
> > -module_exit(mcryptd_exit);
> > -
> > -MODULE_LICENSE("GPL");
> > -MODULE_DESCRIPTION("Software async multibuffer crypto daemon");
> > -MODULE_ALIAS_CRYPTO("mcryptd");
> > diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h
> > deleted file mode 100644
> > index b67404fc4b34..000000000000
> > --- a/include/crypto/mcryptd.h
> > +++ /dev/null
> > @@ -1,114 +0,0 @@
> > -/* SPDX-License-Identifier: GPL-2.0 */
> > -/*
> > - * Software async multibuffer crypto daemon headers
> > - *
> > - *    Author:
> > - *             Tim Chen <tim.c.chen@linux.intel.com>
> > - *
> > - *    Copyright (c) 2014, Intel Corporation.
> > - */
> > -
> > -#ifndef _CRYPTO_MCRYPT_H
> > -#define _CRYPTO_MCRYPT_H
> > -
> > -#include <linux/crypto.h>
> > -#include <linux/kernel.h>
> > -#include <crypto/hash.h>
> > -
> > -struct mcryptd_ahash {
> > -	struct crypto_ahash base;
> > -};
> > -
> > -static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
> > -	struct crypto_ahash *tfm)
> > -{
> > -	return (struct mcryptd_ahash *)tfm;
> > -}
> > -
> > -struct mcryptd_cpu_queue {
> > -	struct crypto_queue queue;
> > -	spinlock_t q_lock;
> > -	struct work_struct work;
> > -};
> > -
> > -struct mcryptd_queue {
> > -	struct mcryptd_cpu_queue __percpu *cpu_queue;
> > -};
> > -
> > -struct mcryptd_instance_ctx {
> > -	struct crypto_spawn spawn;
> > -	struct mcryptd_queue *queue;
> > -};
> > -
> > -struct mcryptd_hash_ctx {
> > -	struct crypto_ahash *child;
> > -	struct mcryptd_alg_state *alg_state;
> > -};
> > -
> > -struct mcryptd_tag {
> > -	/* seq number of request */
> > -	unsigned seq_num;
> > -	/* arrival time of request */
> > -	unsigned long arrival;
> > -	unsigned long expire;
> > -	int	cpu;
> > -};
> > -
> > -struct mcryptd_hash_request_ctx {
> > -	struct list_head waiter;
> > -	crypto_completion_t complete;
> > -	struct mcryptd_tag tag;
> > -	struct crypto_hash_walk walk;
> > -	u8 *out;
> > -	int flag;
> > -	struct ahash_request areq;
> > -};
> > -
> > -struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
> > -					u32 type, u32 mask);
> > -struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm);
> > -struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req);
> > -void mcryptd_free_ahash(struct mcryptd_ahash *tfm);
> > -void mcryptd_flusher(struct work_struct *work);
> > -
> > -enum mcryptd_req_type {
> > -	MCRYPTD_NONE,
> > -	MCRYPTD_UPDATE,
> > -	MCRYPTD_FINUP,
> > -	MCRYPTD_DIGEST,
> > -	MCRYPTD_FINAL
> > -};
> > -
> > -struct mcryptd_alg_cstate {
> > -	unsigned long next_flush;
> > -	unsigned next_seq_num;
> > -	bool	flusher_engaged;
> > -	struct  delayed_work flush;
> > -	int	cpu;
> > -	struct  mcryptd_alg_state *alg_state;
> > -	void	*mgr;
> > -	spinlock_t work_lock;
> > -	struct list_head work_list;
> > -	struct list_head flush_list;
> > -};
> > -
> > -struct mcryptd_alg_state {
> > -	struct mcryptd_alg_cstate __percpu *alg_cstate;
> > -	unsigned long (*flusher)(struct mcryptd_alg_cstate *cstate);
> > -};
> > -
> > -/* return delay in jiffies from current time */
> > -static inline unsigned long get_delay(unsigned long t)
> > -{
> > -	long delay;
> > -
> > -	delay = (long) t - (long) jiffies;
> > -	if (delay <= 0)
> > -		return 0;
> > -	else
> > -		return (unsigned long) delay;
> > -}
> > -
> > -void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay);
> > -
> > -#endif
> > 
>