From: Vladimir Zapolskiy Subject: Re: [PATCH v5 4/4] crypto: Add Allwinner Security System crypto accelerator Date: Tue, 21 Oct 2014 02:28:37 +0300 Message-ID: <54459AA5.2030705@mleia.com> References: <1413728182-13569-1-git-send-email-clabbe.montjoie@gmail.com> <1413728182-13569-5-git-send-email-clabbe.montjoie@gmail.com> Mime-Version: 1.0 Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Cc: devicetree@vger.kernel.org, linux-kernel@vger.kernel.org, linux-sunxi@googlegroups.com, linux-crypto@vger.kernel.org, linux-arm-kernel@lists.infradead.org To: LABBE Corentin , robh+dt@kernel.org, pawel.moll@arm.com, mark.rutland@arm.com, ijc+devicetree@hellion.org.uk, galak@codeaurora.org, maxime.ripard@free-electrons.com, linux@arm.linux.org.uk, herbert@gondor.apana.org.au, davem@davemloft.net, grant.likely@linaro.org, akpm@linux-foundation.org, gregkh@linuxfoundation.org, joe@perches.com, mchehab@osg.samsung.com, crope@iki.fi Return-path: In-Reply-To: <1413728182-13569-5-git-send-email-clabbe.montjoie@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-Id: linux-crypto.vger.kernel.org Hello LABBE, On 19.10.2014 17:16, LABBE Corentin wrote: > Add support for the Security System included in Allwinner SoC A20. > The Security System is a hardware cryptographic accelerator that support AES/MD5/SHA1/DES/3DES/PRNG algorithms. > > Signed-off-by: LABBE Corentin > --- > drivers/crypto/Kconfig | 17 ++ > drivers/crypto/Makefile | 1 + > drivers/crypto/sunxi-ss/Makefile | 2 + > drivers/crypto/sunxi-ss/sunxi-ss-cipher.c | 489 ++++++++++++++++++++++++++++++ > drivers/crypto/sunxi-ss/sunxi-ss-core.c | 318 +++++++++++++++++++ > drivers/crypto/sunxi-ss/sunxi-ss-hash.c | 445 +++++++++++++++++++++++++++ > drivers/crypto/sunxi-ss/sunxi-ss.h | 193 ++++++++++++ > 7 files changed, 1465 insertions(+) > create mode 100644 drivers/crypto/sunxi-ss/Makefile > create mode 100644 drivers/crypto/sunxi-ss/sunxi-ss-cipher.c > create mode 100644 drivers/crypto/sunxi-ss/sunxi-ss-core.c > create mode 100644 drivers/crypto/sunxi-ss/sunxi-ss-hash.c > create mode 100644 drivers/crypto/sunxi-ss/sunxi-ss.h > > diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig > index 2fb0fdf..9ba9759 100644 > --- a/drivers/crypto/Kconfig > +++ b/drivers/crypto/Kconfig > @@ -436,4 +436,21 @@ config CRYPTO_DEV_QCE > hardware. To compile this driver as a module, choose M here. The > module will be called qcrypto. > > +config CRYPTO_DEV_SUNXI_SS > + tristate "Support for Allwinner Security System cryptographic accelerator" > + depends on ARCH_SUNXI > + select CRYPTO_MD5 > + select CRYPTO_SHA1 > + select CRYPTO_AES > + select CRYPTO_DES > + select CRYPTO_BLKCIPHER > + help > + Some Allwinner SoC have a crypto accelerator named > + Security System. Select this if you want to use it. > + The Security System handle AES/DES/3DES ciphers in CBC mode > + and SHA1 and MD5 hash algorithms. > + > + To compile this driver as a module, choose M here: the module > + will be called sunxi-ss. > + > endif # CRYPTO_HW > diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile > index 3924f93..856545c 100644 > --- a/drivers/crypto/Makefile > +++ b/drivers/crypto/Makefile > @@ -25,3 +25,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o > obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ > obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ > obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ > +obj-$(CONFIG_CRYPTO_DEV_SUNXI_SS) += sunxi-ss/ > diff --git a/drivers/crypto/sunxi-ss/Makefile b/drivers/crypto/sunxi-ss/Makefile > new file mode 100644 > index 0000000..8bb287d > --- /dev/null > +++ b/drivers/crypto/sunxi-ss/Makefile > @@ -0,0 +1,2 @@ > +obj-$(CONFIG_CRYPTO_DEV_SUNXI_SS) += sunxi-ss.o > +sunxi-ss-y += sunxi-ss-core.o sunxi-ss-hash.o sunxi-ss-cipher.o > diff --git a/drivers/crypto/sunxi-ss/sunxi-ss-cipher.c b/drivers/crypto/sunxi-ss/sunxi-ss-cipher.c > new file mode 100644 > index 0000000..8d0416e > --- /dev/null > +++ b/drivers/crypto/sunxi-ss/sunxi-ss-cipher.c > @@ -0,0 +1,489 @@ > +/* > + * sunxi-ss-cipher.c - hardware cryptographic accelerator for Allwinner A20 SoC > + * > + * Copyright (C) 2013-2014 Corentin LABBE > + * > + * This file add support for AES cipher with 128,192,256 bits > + * keysize in CBC mode. > + * Add support also for DES and 3DES in CBC mode. > + * > + * You could find the datasheet in Documentation/arm/sunxi/README > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + */ > +#include "sunxi-ss.h" > + > +extern struct sunxi_ss_ctx *ss; > + > +static int sunxi_ss_cipher(struct ablkcipher_request *areq, u32 mode) > +{ > + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq); > + struct sunxi_tfm_ctx *op = crypto_ablkcipher_ctx(tfm); > + const char *cipher_type; > + > + if (areq->nbytes == 0) > + return 0; > + > + if (areq->info == NULL) { > + dev_err(ss->dev, "ERROR: Empty IV\n"); > + return -EINVAL; > + } > + > + if (areq->src == NULL || areq->dst == NULL) { > + dev_err(ss->dev, "ERROR: Some SGs are NULL\n"); > + return -EINVAL; > + } > + > + cipher_type = crypto_tfm_alg_name(crypto_ablkcipher_tfm(tfm)); > + > + if (strcmp("cbc(aes)", cipher_type) == 0) { > + mode |= SS_OP_AES | SS_CBC | SS_ENABLED | op->keymode; > + return sunxi_ss_aes_poll(areq, mode); > + } > + > + if (strcmp("cbc(des)", cipher_type) == 0) { > + mode |= SS_OP_DES | SS_CBC | SS_ENABLED | op->keymode; > + return sunxi_ss_des_poll(areq, mode); > + } > + > + if (strcmp("cbc(des3_ede)", cipher_type) == 0) { > + mode |= SS_OP_3DES | SS_CBC | SS_ENABLED | op->keymode; > + return sunxi_ss_des_poll(areq, mode); > + } > + > + dev_err(ss->dev, "ERROR: Cipher %s not handled\n", cipher_type); > + return -EINVAL; > +} > + > +int sunxi_ss_cipher_encrypt(struct ablkcipher_request *areq) > +{ > + return sunxi_ss_cipher(areq, SS_ENCRYPTION); > +} > + > +int sunxi_ss_cipher_decrypt(struct ablkcipher_request *areq) > +{ > + return sunxi_ss_cipher(areq, SS_DECRYPTION); > +} > + > +int sunxi_ss_cipher_init(struct crypto_tfm *tfm) > +{ > + struct sunxi_tfm_ctx *op = crypto_tfm_ctx(tfm); > + > + memset(op, 0, sizeof(struct sunxi_tfm_ctx)); > + return 0; > +} > + > +/* > + * Optimized function for the case where we have only one SG, > + * so we can use kmap_atomic > + */ > +static int sunxi_ss_aes_poll_atomic(struct ablkcipher_request *areq) > +{ > + u32 spaces; > + struct scatterlist *in_sg = areq->src; > + struct scatterlist *out_sg = areq->dst; > + void *src_addr; > + void *dst_addr; > + unsigned int ileft = areq->nbytes; > + unsigned int oleft = areq->nbytes; > + unsigned int todo; > + u32 *src32; > + u32 *dst32; > + u32 rx_cnt = 32; > + u32 tx_cnt = 0; > + int i; > + > + src_addr = kmap_atomic(sg_page(in_sg)) + in_sg->offset; > + if (src_addr == NULL) { > + dev_err(ss->dev, "kmap_atomic error for src SG\n"); > + writel(0, ss->base + SS_CTL); > + mutex_unlock(&ss->lock); > + return -EINVAL; > + } > + > + dst_addr = kmap_atomic(sg_page(out_sg)) + out_sg->offset; > + if (dst_addr == NULL) { > + dev_err(ss->dev, "kmap_atomic error for dst SG\n"); > + writel(0, ss->base + SS_CTL); > + kunmap_atomic(src_addr); > + mutex_unlock(&ss->lock); > + return -EINVAL; > + } > + > + src32 = (u32 *)src_addr; > + dst32 = (u32 *)dst_addr; > + ileft = areq->nbytes / 4; > + oleft = areq->nbytes / 4; > + i = 0; > + do { > + if (ileft > 0 && rx_cnt > 0) { > + todo = min(rx_cnt, ileft); > + ileft -= todo; > + do { > + writel_relaxed(*src32++, > + ss->base + > + SS_RXFIFO); > + todo--; > + } while (todo > 0); > + } > + if (tx_cnt > 0) { > + todo = min(tx_cnt, oleft); > + oleft -= todo; > + do { > + *dst32++ = readl_relaxed(ss->base + > + SS_TXFIFO); > + todo--; > + } while (todo > 0); > + } > + spaces = readl_relaxed(ss->base + SS_FCSR); > + rx_cnt = SS_RXFIFO_SPACES(spaces); > + tx_cnt = SS_TXFIFO_SPACES(spaces); > + } while (oleft > 0); > + writel(0, ss->base + SS_CTL); > + kunmap_atomic(src_addr); > + kunmap_atomic(dst_addr); > + mutex_unlock(&ss->lock); > + return 0; > +} > + > +int sunxi_ss_aes_poll(struct ablkcipher_request *areq, u32 mode) > +{ > + u32 spaces; > + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq); > + struct sunxi_tfm_ctx *op = crypto_ablkcipher_ctx(tfm); > + unsigned int ivsize = crypto_ablkcipher_ivsize(tfm); > + /* when activating SS, the default FIFO space is 32 */ > + u32 rx_cnt = 32; > + u32 tx_cnt = 0; > + u32 v; > + int i; > + struct scatterlist *in_sg = areq->src; > + struct scatterlist *out_sg = areq->dst; > + void *src_addr; > + void *dst_addr; > + unsigned int ileft = areq->nbytes; > + unsigned int oleft = areq->nbytes; > + unsigned int sgileft = areq->src->length; > + unsigned int sgoleft = areq->dst->length; > + unsigned int todo; > + u32 *src32; > + u32 *dst32; > + > + mutex_lock(&ss->lock); > + > + for (i = 0; i < op->keylen; i += 4) > + writel(*(op->key + i/4), ss->base + SS_KEY0 + i); > + > + if (areq->info != NULL) { > + for (i = 0; i < 4 && i < ivsize / 4; i++) { > + v = *(u32 *)(areq->info + i * 4); > + writel(v, ss->base + SS_IV0 + i * 4); > + } > + } > + writel(mode, ss->base + SS_CTL); > + > + /* If we have only one SG, we can use kmap_atomic */ > + if (sg_next(in_sg) == NULL && sg_next(out_sg) == NULL) > + return sunxi_ss_aes_poll_atomic(areq); for clarity it might be better to move all "mutex_unlock(&ss->lock)" calls from sunxi_ss_aes_poll_atomic() body right to here. > + > + /* > + * If we have more than one SG, we cannot use kmap_atomic since > + * we hold the mapping too long > + */ > + src_addr = kmap(sg_page(in_sg)) + in_sg->offset; > + if (src_addr == NULL) { > + dev_err(ss->dev, "KMAP error for src SG\n"); > + mutex_unlock(&ss->lock); > + return -EINVAL; > + } > + dst_addr = kmap(sg_page(out_sg)) + out_sg->offset; > + if (dst_addr == NULL) { > + kunmap(sg_page(in_sg)); > + dev_err(ss->dev, "KMAP error for dst SG\n"); > + mutex_unlock(&ss->lock); > + return -EINVAL; > + } > + src32 = (u32 *)src_addr; > + dst32 = (u32 *)dst_addr; > + ileft = areq->nbytes / 4; > + oleft = areq->nbytes / 4; > + sgileft = in_sg->length / 4; > + sgoleft = out_sg->length / 4; > + do { > + spaces = readl_relaxed(ss->base + SS_FCSR); > + rx_cnt = SS_RXFIFO_SPACES(spaces); > + tx_cnt = SS_TXFIFO_SPACES(spaces); > + todo = min3(rx_cnt, ileft, sgileft); > + if (todo > 0) { > + ileft -= todo; > + sgileft -= todo; > + } > + while (todo > 0) { > + writel_relaxed(*src32++, ss->base + SS_RXFIFO); > + todo--; > + } > + if (in_sg != NULL && sgileft == 0 && ileft > 0) { > + kunmap(sg_page(in_sg)); > + in_sg = sg_next(in_sg); > + while (in_sg != NULL && in_sg->length == 0) > + in_sg = sg_next(in_sg); > + if (in_sg != NULL && ileft > 0) { > + src_addr = kmap(sg_page(in_sg)) + in_sg->offset; > + if (src_addr == NULL) { > + dev_err(ss->dev, "ERROR: KMAP for src SG\n"); > + mutex_unlock(&ss->lock); > + return -EINVAL; > + } > + src32 = src_addr; > + sgileft = in_sg->length / 4; > + } > + } > + /* do not test oleft since when oleft == 0 we have finished */ > + todo = min3(tx_cnt, oleft, sgoleft); > + if (todo > 0) { > + oleft -= todo; > + sgoleft -= todo; > + } > + while (todo > 0) { > + *dst32++ = readl_relaxed(ss->base + SS_TXFIFO); > + todo--; > + } > + if (out_sg != NULL && sgoleft == 0 && oleft >= 0) { > + kunmap(sg_page(out_sg)); > + out_sg = sg_next(out_sg); > + while (out_sg != NULL && out_sg->length == 0) > + out_sg = sg_next(out_sg); > + if (out_sg != NULL && oleft > 0) { > + dst_addr = kmap(sg_page(out_sg)) + > + out_sg->offset; > + if (dst_addr == NULL) { > + dev_err(ss->dev, "KMAP error\n"); > + mutex_unlock(&ss->lock); > + return -EINVAL; > + } > + dst32 = dst_addr; > + sgoleft = out_sg->length / 4; > + } > + } > + } while (oleft > 0); > + > + writel_relaxed(0, ss->base + SS_CTL); > + mutex_unlock(&ss->lock); > + return 0; > +} > + > +/* > + * Pure CPU way of doing DES/3DES with SS > + * Since DES and 3DES SGs could be smaller than 4 bytes, I use sg_copy_to_buffer > + * for "linearize" them. > + * The problem with that is that I alloc (2 x areq->nbytes) for buf_in/buf_out > + * TODO: change this system, I need to support other mode than CBC where len > + * is not a multiple of 4 and the hack of linearize use too much memory > + * SGsrc -> buf_in -> SS -> buf_out -> SGdst > + */ > +int sunxi_ss_des_poll(struct ablkcipher_request *areq, u32 mode) > +{ > + u32 value, spaces; > + size_t nb_in_sg_tx, nb_in_sg_rx; > + size_t ir, it; > + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq); > + struct sunxi_tfm_ctx *op = crypto_ablkcipher_ctx(tfm); > + unsigned int ivsize = crypto_ablkcipher_ivsize(tfm); > + u32 tx_cnt = 0; > + u32 rx_cnt = 0; > + u32 v; > + int i; > + int no_chunk = 1; > + struct scatterlist *in_sg = areq->src; > + struct scatterlist *out_sg = areq->dst; > + > + /* > + * if we have only SGs with size multiple of 4, > + * we can use the SS AES function > + */ > + while (in_sg != NULL && no_chunk == 1) { > + if ((in_sg->length % 4) != 0) > + no_chunk = 0; > + in_sg = sg_next(in_sg); > + } > + while (out_sg != NULL && no_chunk == 1) { > + if ((out_sg->length % 4) != 0) > + no_chunk = 0; > + out_sg = sg_next(out_sg); > + } > + > + if (no_chunk == 1) > + return sunxi_ss_aes_poll(areq, mode); > + > + in_sg = areq->src; > + out_sg = areq->dst; > + > + nb_in_sg_rx = sg_nents(in_sg); > + nb_in_sg_tx = sg_nents(out_sg); > + > + /* > + * buf_in and buf_out are allocated only one time > + * then we keep the buffer until driver end > + * the allocation can only grow more > + * we do not reduce it for simplification > + */ > + mutex_lock(&ss->bufin_lock); > + if (ss->buf_in == NULL) { > + ss->buf_in = kmalloc(areq->nbytes, GFP_KERNEL); > + ss->buf_in_size = areq->nbytes; > + } else { > + if (areq->nbytes > ss->buf_in_size) { > + kfree(ss->buf_in); > + ss->buf_in = kmalloc(areq->nbytes, GFP_KERNEL); > + ss->buf_in_size = areq->nbytes; > + } > + } > + if (ss->buf_in == NULL) { > + ss->buf_in_size = 0; > + mutex_unlock(&ss->bufin_lock); > + dev_err(ss->dev, "Unable to allocate pages.\n"); > + return -ENOMEM; > + } > + mutex_lock(&ss->bufout_lock); > + if (ss->buf_out == NULL) { > + ss->buf_out = kmalloc(areq->nbytes, GFP_KERNEL); > + if (ss->buf_out == NULL) { > + ss->buf_out_size = 0; > + mutex_unlock(&ss->bufin_lock); > + mutex_unlock(&ss->bufout_lock); > + dev_err(ss->dev, "Unable to allocate pages.\n"); > + return -ENOMEM; > + } > + ss->buf_out_size = areq->nbytes; > + } else { > + if (areq->nbytes > ss->buf_out_size) { > + kfree(ss->buf_out); > + ss->buf_out = kmalloc(areq->nbytes, GFP_KERNEL); > + if (ss->buf_out == NULL) { > + ss->buf_out_size = 0; > + mutex_unlock(&ss->bufin_lock); > + mutex_unlock(&ss->bufout_lock); > + dev_err(ss->dev, "Unable to allocate pages.\n"); > + return -ENOMEM; > + } > + ss->buf_out_size = areq->nbytes; > + } > + } > + > + sg_copy_to_buffer(areq->src, nb_in_sg_rx, ss->buf_in, areq->nbytes); > + > + ir = 0; > + it = 0; > + mutex_lock(&ss->lock); > + > + for (i = 0; i < op->keylen; i += 4) > + writel(*(op->key + i/4), ss->base + SS_KEY0 + i); > + if (areq->info != NULL) { > + for (i = 0; i < 4 && i < ivsize / 4; i++) { > + v = *(u32 *)(areq->info + i * 4); > + writel(v, ss->base + SS_IV0 + i * 4); > + } > + } > + writel(mode, ss->base + SS_CTL); > + > + do { > + if (rx_cnt == 0 || tx_cnt == 0) { > + spaces = readl(ss->base + SS_FCSR); > + rx_cnt = SS_RXFIFO_SPACES(spaces); > + tx_cnt = SS_TXFIFO_SPACES(spaces); > + } > + if (rx_cnt > 0 && ir < areq->nbytes) { > + do { > + value = *(u32 *)(ss->buf_in + ir); > + writel(value, ss->base + SS_RXFIFO); > + ir += 4; > + rx_cnt--; > + } while (rx_cnt > 0 && ir < areq->nbytes); > + } > + if (tx_cnt > 0 && it < areq->nbytes) { > + do { > + value = readl(ss->base + SS_TXFIFO); > + *(u32 *)(ss->buf_out + it) = value; > + it += 4; > + tx_cnt--; > + } while (tx_cnt > 0 && it < areq->nbytes); > + } > + if (ir == areq->nbytes) { > + mutex_unlock(&ss->bufin_lock); > + ir++; > + } > + } while (it < areq->nbytes); > + > + writel(0, ss->base + SS_CTL); > + mutex_unlock(&ss->lock); > + > + /* > + * a simple optimization, since we dont need the hardware for this copy > + * we release the lock and do the copy. With that we gain 5/10% perf > + */ > + sg_copy_from_buffer(areq->dst, nb_in_sg_tx, ss->buf_out, areq->nbytes); > + > + mutex_unlock(&ss->bufout_lock); > + return 0; > +} > + > +/* check and set the AES key, prepare the mode to be used */ > +int sunxi_ss_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, > + unsigned int keylen) > +{ > + struct sunxi_tfm_ctx *op = crypto_ablkcipher_ctx(tfm); > + > + switch (keylen) { > + case 128 / 8: > + op->keymode = SS_AES_128BITS; > + break; > + case 192 / 8: > + op->keymode = SS_AES_192BITS; > + break; > + case 256 / 8: > + op->keymode = SS_AES_256BITS; > + break; > + default: > + dev_err(ss->dev, "ERROR: Invalid keylen %u\n", keylen); > + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); > + return -EINVAL; > + } > + op->keylen = keylen; > + memcpy(op->key, key, keylen); > + return 0; > +} > + > +/* check and set the DES key, prepare the mode to be used */ > +int sunxi_ss_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key, > + unsigned int keylen) > +{ > + struct sunxi_tfm_ctx *op = crypto_ablkcipher_ctx(tfm); > + > + if (keylen != DES_KEY_SIZE) { > + dev_err(ss->dev, "Invalid keylen %u\n", keylen); > + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); > + return -EINVAL; > + } > + op->keylen = keylen; > + memcpy(op->key, key, keylen); > + return 0; > +} > + > +/* check and set the 3DES key, prepare the mode to be used */ > +int sunxi_ss_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key, > + unsigned int keylen) > +{ > + struct sunxi_tfm_ctx *op = crypto_ablkcipher_ctx(tfm); > + > + if (keylen != 3 * DES_KEY_SIZE) { > + dev_err(ss->dev, "Invalid keylen %u\n", keylen); > + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); > + return -EINVAL; > + } > + op->keylen = keylen; > + memcpy(op->key, key, keylen); > + return 0; > +} > diff --git a/drivers/crypto/sunxi-ss/sunxi-ss-core.c b/drivers/crypto/sunxi-ss/sunxi-ss-core.c > new file mode 100644 > index 0000000..e66d7e2 > --- /dev/null > +++ b/drivers/crypto/sunxi-ss/sunxi-ss-core.c > @@ -0,0 +1,318 @@ > +/* > + * sunxi-ss-core.c - hardware cryptographic accelerator for Allwinner A20 SoC > + * > + * Copyright (C) 2013-2014 Corentin LABBE > + * > + * Core file which registers crypto algorithms supported by the SS. > + * > + * You could find a link for the datasheet in Documentation/arm/sunxi/README > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + */ > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include "sunxi-ss.h" > + > +struct sunxi_ss_ctx *ss; > + > +/* > + * General notes for whole driver: > + * > + * After each request the device must be disabled with a write of 0 in SS_CTL > + * > + * For performance reason, we use writel_relaxed/read_relaxed for all > + * operations on RX and TX FIFO and also SS_FCSR. > + * Excepts for the last write on TX FIFO. > + * For all other registers, we use writel/readl. > + * See http://permalink.gmane.org/gmane.linux.ports.arm.kernel/117644 > + * and http://permalink.gmane.org/gmane.linux.ports.arm.kernel/117640 > + */ > + > +static struct ahash_alg sunxi_md5_alg = { > + .init = sunxi_hash_init, > + .update = sunxi_hash_update, > + .final = sunxi_hash_final, > + .finup = sunxi_hash_finup, > + .digest = sunxi_hash_digest, > + .halg = { > + .digestsize = MD5_DIGEST_SIZE, > + .base = { > + .cra_name = "md5", > + .cra_driver_name = "md5-sunxi-ss", > + .cra_priority = 300, > + .cra_alignmask = 3, > + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, > + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, > + .cra_ctxsize = sizeof(struct sunxi_req_ctx), > + .cra_module = THIS_MODULE, > + .cra_type = &crypto_ahash_type, > + .cra_init = sunxi_hash_crainit > + } > + } > +}; > + > +static struct ahash_alg sunxi_sha1_alg = { > + .init = sunxi_hash_init, > + .update = sunxi_hash_update, > + .final = sunxi_hash_final, > + .finup = sunxi_hash_finup, > + .digest = sunxi_hash_digest, > + .halg = { > + .digestsize = SHA1_DIGEST_SIZE, > + .base = { > + .cra_name = "sha1", > + .cra_driver_name = "sha1-sunxi-ss", > + .cra_priority = 300, > + .cra_alignmask = 3, > + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, > + .cra_blocksize = SHA1_BLOCK_SIZE, > + .cra_ctxsize = sizeof(struct sunxi_req_ctx), > + .cra_module = THIS_MODULE, > + .cra_type = &crypto_ahash_type, > + .cra_init = sunxi_hash_crainit > + } > + } > +}; > + > +static struct crypto_alg sunxi_cipher_algs[] = { > +{ > + .cra_name = "cbc(aes)", > + .cra_driver_name = "cbc-aes-sunxi-ss", > + .cra_priority = 300, > + .cra_blocksize = AES_BLOCK_SIZE, > + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER, > + .cra_ctxsize = sizeof(struct sunxi_tfm_ctx), > + .cra_module = THIS_MODULE, > + .cra_alignmask = 3, > + .cra_type = &crypto_ablkcipher_type, > + .cra_init = sunxi_ss_cipher_init, > + .cra_u = { > + .ablkcipher = { > + .min_keysize = AES_MIN_KEY_SIZE, > + .max_keysize = AES_MAX_KEY_SIZE, > + .ivsize = AES_BLOCK_SIZE, > + .setkey = sunxi_ss_aes_setkey, > + .encrypt = sunxi_ss_cipher_encrypt, > + .decrypt = sunxi_ss_cipher_decrypt, > + } > + } > +}, { > + .cra_name = "cbc(des)", > + .cra_driver_name = "cbc-des-sunxi-ss", > + .cra_priority = 300, > + .cra_blocksize = DES_BLOCK_SIZE, > + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER, > + .cra_ctxsize = sizeof(struct sunxi_req_ctx), > + .cra_module = THIS_MODULE, > + .cra_alignmask = 3, > + .cra_type = &crypto_ablkcipher_type, > + .cra_init = sunxi_ss_cipher_init, > + .cra_u.ablkcipher = { > + .min_keysize = DES_KEY_SIZE, > + .max_keysize = DES_KEY_SIZE, > + .ivsize = DES_BLOCK_SIZE, > + .setkey = sunxi_ss_des_setkey, > + .encrypt = sunxi_ss_cipher_encrypt, > + .decrypt = sunxi_ss_cipher_decrypt, > + } > +}, { > + .cra_name = "cbc(des3_ede)", > + .cra_driver_name = "cbc-des3-sunxi-ss", > + .cra_priority = 300, > + .cra_blocksize = DES3_EDE_BLOCK_SIZE, > + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER, > + .cra_ctxsize = sizeof(struct sunxi_req_ctx), > + .cra_module = THIS_MODULE, > + .cra_alignmask = 3, > + .cra_type = &crypto_ablkcipher_type, > + .cra_init = sunxi_ss_cipher_init, > + .cra_u.ablkcipher = { > + .min_keysize = DES3_EDE_KEY_SIZE, > + .max_keysize = DES3_EDE_KEY_SIZE, > + .ivsize = DES3_EDE_BLOCK_SIZE, > + .setkey = sunxi_ss_des3_setkey, > + .encrypt = sunxi_ss_cipher_encrypt, > + .decrypt = sunxi_ss_cipher_decrypt, > + } > +} > +}; > + > +static int sunxi_ss_probe(struct platform_device *pdev) > +{ > + struct resource *res; > + u32 v; > + int err; > + unsigned long cr; > + const unsigned long cr_ahb = 24 * 1000 * 1000; > + const unsigned long cr_mod = 150 * 1000 * 1000; > + > + if (!pdev->dev.of_node) > + return -ENODEV; > + > + ss = devm_kzalloc(&pdev->dev, sizeof(*ss), GFP_KERNEL); > + if (ss == NULL) > + return -ENOMEM; Why do you dynamically allocate memory for "struct sunxi_ss_ctx *ss"? Since you have a single global pointer, it makes sense to declare "struct sunxi_ss_ctx ss" statically instead. And even a better solution is to remove a single global pointer. > + > + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + ss->base = devm_ioremap_resource(&pdev->dev, res); > + if (IS_ERR(ss->base)) { > + dev_err(&pdev->dev, "Cannot request MMIO\n"); > + return PTR_ERR(ss->base); > + } > + > + ss->ssclk = devm_clk_get(&pdev->dev, "mod"); > + if (IS_ERR(ss->ssclk)) { > + err = PTR_ERR(ss->ssclk); > + dev_err(&pdev->dev, "Cannot get SS clock err=%d\n", err); > + return err; > + } > + dev_dbg(&pdev->dev, "clock ss acquired\n"); > + > + ss->busclk = devm_clk_get(&pdev->dev, "ahb"); > + if (IS_ERR(ss->busclk)) { > + err = PTR_ERR(ss->busclk); > + dev_err(&pdev->dev, "Cannot get AHB SS clock err=%d\n", err); > + return err; > + } > + dev_dbg(&pdev->dev, "clock ahb_ss acquired\n"); > + > + /* Enable both clocks */ > + err = clk_prepare_enable(ss->busclk); > + if (err != 0) { > + dev_err(&pdev->dev, "Cannot prepare_enable busclk\n"); > + return err; > + } > + err = clk_prepare_enable(ss->ssclk); > + if (err != 0) { > + dev_err(&pdev->dev, "Cannot prepare_enable ssclk\n"); > + clk_disable_unprepare(ss->busclk); goto somewhere to the end of the function? > + return err; > + } > + > + /* > + * Check that clock have the correct rates gived in the datasheet > + * Try to set the clock to the maximum allowed > + */ > + err = clk_set_rate(ss->ssclk, cr_mod); > + if (err != 0) { > + dev_err(&pdev->dev, "Cannot set clock rate to ssclk\n"); > + clk_disable_unprepare(ss->ssclk); > + clk_disable_unprepare(ss->busclk); goto "error_md5"? > + return err; > + } > + > + cr = clk_get_rate(ss->busclk); > + if (cr >= cr_ahb) > + dev_dbg(&pdev->dev, "Clock bus %lu (%lu MHz) (must be >= %lu)\n", > + cr, cr / 1000000, cr_ahb); > + else > + dev_warn(&pdev->dev, "Clock bus %lu (%lu MHz) (must be >= %lu)\n", > + cr, cr / 1000000, cr_ahb); See next comment. > + cr = clk_get_rate(ss->ssclk); > + if (cr <= cr_mod) > + if (cr < cr_mod) > + dev_info(&pdev->dev, "Clock ss %lu (%lu MHz) (must be <= %lu)\n", > + cr, cr / 1000000, cr_mod); > + else > + dev_dbg(&pdev->dev, "Clock ss %lu (%lu MHz) (must be <= %lu)\n", > + cr, cr / 1000000, cr_mod); > + else > + dev_warn(&pdev->dev, "Clock ss is at %lu (%lu MHz) (must be <= %lu)\n", > + cr, cr / 1000000, cr_mod); The management of kernel log levels looks pretty strange. As far as I understand there is no error on any clock rate, I'd recommend to keep only one information message. > + /* > + * Datasheet named it "Die Bonding ID" > + * I expect to be a sort of Security System Revision number. > + * Since the A80 seems to have an other version of SS > + * this info could be useful > + */ > + writel(SS_ENABLED, ss->base + SS_CTL); > + v = readl(ss->base + SS_CTL); > + v >>= 16; > + v &= 0x07; > + dev_info(&pdev->dev, "Die ID %d\n", v); > + writel(0, ss->base + SS_CTL); > + > + ss->dev = &pdev->dev; > + > + mutex_init(&ss->lock); > + mutex_init(&ss->bufin_lock); > + mutex_init(&ss->bufout_lock); > + > + err = crypto_register_ahash(&sunxi_md5_alg); > + if (err) > + goto error_md5; > + err = crypto_register_ahash(&sunxi_sha1_alg); > + if (err) > + goto error_sha1; > + err = crypto_register_algs(sunxi_cipher_algs, > + ARRAY_SIZE(sunxi_cipher_algs)); > + if (err) > + goto error_ciphers; > + > + return 0; > +error_ciphers: > + crypto_unregister_ahash(&sunxi_sha1_alg); > +error_sha1: > + crypto_unregister_ahash(&sunxi_md5_alg); > +error_md5: > + clk_disable_unprepare(ss->ssclk); > + clk_disable_unprepare(ss->busclk); > + return err; > +} > + > +static int __exit sunxi_ss_remove(struct platform_device *pdev) > +{ > + if (!pdev->dev.of_node) > + return 0; Redundant check. > + > + crypto_unregister_ahash(&sunxi_md5_alg); > + crypto_unregister_ahash(&sunxi_sha1_alg); > + crypto_unregister_algs(sunxi_cipher_algs, > + ARRAY_SIZE(sunxi_cipher_algs)); > + > + if (ss->buf_in != NULL) > + kfree(ss->buf_in); > + if (ss->buf_out != NULL) > + kfree(ss->buf_out); > + > + writel(0, ss->base + SS_CTL); > + clk_disable_unprepare(ss->busclk); > + clk_disable_unprepare(ss->ssclk); > + return 0; > +} > + > +static const struct of_device_id a20ss_crypto_of_match_table[] = { > + { .compatible = "allwinner,sun7i-a20-crypto" }, > + {} > +}; > +MODULE_DEVICE_TABLE(of, a20ss_crypto_of_match_table); > + > +static struct platform_driver sunxi_ss_driver = { > + .probe = sunxi_ss_probe, > + .remove = __exit_p(sunxi_ss_remove), > + .driver = { > + .owner = THIS_MODULE, > + .name = "sunxi-ss", > + .of_match_table = a20ss_crypto_of_match_table, > + }, > +}; > + > +module_platform_driver(sunxi_ss_driver); > + > +MODULE_DESCRIPTION("Allwinner Security System cryptographic accelerator"); > +MODULE_LICENSE("GPL"); > +MODULE_AUTHOR("Corentin LABBE "); > diff --git a/drivers/crypto/sunxi-ss/sunxi-ss-hash.c b/drivers/crypto/sunxi-ss/sunxi-ss-hash.c > new file mode 100644 > index 0000000..ec8758f > --- /dev/null > +++ b/drivers/crypto/sunxi-ss/sunxi-ss-hash.c > @@ -0,0 +1,445 @@ > +/* > + * sunxi-ss-hash.c - hardware cryptographic accelerator for Allwinner A20 SoC > + * > + * Copyright (C) 2013-2014 Corentin LABBE > + * > + * This file add support for MD5 and SHA1. > + * > + * You could find the datasheet in Documentation/arm/sunxi/README > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + */ > +#include "sunxi-ss.h" > + > +/* This is a totaly arbitrary value */ > +#define SS_TIMEOUT 100 > + > +extern struct sunxi_ss_ctx *ss; > + > +int sunxi_hash_crainit(struct crypto_tfm *tfm) > +{ > + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), > + sizeof(struct sunxi_req_ctx)); > + return 0; > +} > + > +/* sunxi_hash_init: initialize request context */ > +int sunxi_hash_init(struct ahash_request *areq) > +{ > + const char *hash_type; > + struct sunxi_req_ctx *op = ahash_request_ctx(areq); > + > + memset(op, 0, sizeof(struct sunxi_req_ctx)); > + > + hash_type = crypto_tfm_alg_name(areq->base.tfm); > + > + if (strcmp(hash_type, "sha1") == 0) > + op->mode = SS_OP_SHA1; > + if (strcmp(hash_type, "md5") == 0) > + op->mode = SS_OP_MD5; > + if (op->mode == 0) > + return -EINVAL; > + > + return 0; > +} > + > +static u32 rx_cnt; > + > +inline void ss_writer(const u32 v) > +{ > + u32 spaces; > + > + writel(v, ss->base + SS_RXFIFO); > + rx_cnt--; > + while (rx_cnt == 0) { > + spaces = readl_relaxed(ss->base + SS_FCSR); > + rx_cnt = SS_RXFIFO_SPACES(spaces); > + } > +} > + > +inline void ss_writer_relaxed(const u32 v) > +{ > + u32 spaces; > + > + writel_relaxed(v, ss->base + SS_RXFIFO); > + rx_cnt--; > + while (rx_cnt == 0) { > + spaces = readl_relaxed(ss->base + SS_FCSR); > + rx_cnt = SS_RXFIFO_SPACES(spaces); > + } > +} > + > +/* > + * sunxi_hash_update: update hash engine > + * > + * Could be used for both SHA1 and MD5 > + * Write data by step of 32bits and put then in the SS. > + * > + * Since we cannot leave partial data and hash state in the engine, > + * we need to get the hash state at the end of this function. > + * After some work, I have found that we can get the hash state every 64o > + * > + * So the first work is to get the number of bytes to write to SS modulo 64 > + * The extra bytes will go to two different destination: > + * op->wait for full 32bits word > + * op->wb (waiting bytes) for partial 32 bits word > + * So we can have up to (64/4)-1 op->wait words and 0/1/2/3 bytes in wb > + * > + * So at the begin of update() > + * if op->nwait * 4 + areq->nbytes < 64 > + * => all data writed to wait buffers and end=0 > + * if not write all nwait to the device and position end to complete to 64o > + * > + * example 1: > + * update1 60o => nwait=15 > + * update2 60o => need one more word to have 64o > + * end=4 > + * so write all data in op->wait and one word of SGs > + * write remaining data in op->wait > + * final state op->nwait=14 > + */ > +int sunxi_hash_update(struct ahash_request *areq) > +{ > + u32 v, ivmode = 0; > + unsigned int i = 0; > + /* > + * i is the total bytes read from SGs, to be compared to areq->nbytes > + * i is important because we cannot rely on SG length since the sum of > + * SG->length could be greater than areq->nbytes > + */ > + > + struct sunxi_req_ctx *op = ahash_request_ctx(areq); > + struct scatterlist *in_sg; > + unsigned int in_i = 0; /* advancement in the current SG */ > + u64 end; > + /* > + * end is the position when we need to stop writing to the device, > + * to be compared to i > + */ > + int in_r; > + void *src_addr; > + > + dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x bw=%u ww=%u", > + __func__, crypto_tfm_alg_name(areq->base.tfm), > + op->byte_count, areq->nbytes, op->mode, > + op->nbw, op->nwait); > + > + if (areq->nbytes == 0) > + return 0; > + > + end = ((areq->nbytes + op->nwait * 4 + op->nbw) / 64) * 64 > + - op->nbw - op->nwait * 4; > + > + if (end > areq->nbytes || areq->nbytes - end > 63) { > + dev_err(ss->dev, "ERROR: Bound error %llu %u\n", > + end, areq->nbytes); > + return -EINVAL; > + } > + > + if (op->nwait > 0 && end > 0) { > + /* a precedent update was done */ > + for (i = 0; i < op->nwait; i++) { > + ss_writer(op->wait[i]); > + op->byte_count += 4; > + } > + op->nwait = 0; > + } > + > + mutex_lock(&ss->lock); > + /* > + * if some data have been processed before, > + * we need to restore the partial hash state > + */ > + if (op->byte_count > 0) { > + ivmode = SS_IV_ARBITRARY; > + for (i = 0; i < 5; i++) > + writel(op->hash[i], ss->base + SS_IV0 + i * 4); > + } > + /* Enable the device */ > + writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL); > + > + rx_cnt = 0; > + i = 0; > + > + in_sg = areq->src; > + src_addr = kmap(sg_page(in_sg)) + in_sg->offset; > + if (src_addr == NULL) { > + mutex_unlock(&ss->lock); > + dev_err(ss->dev, "ERROR: Cannot kmap source buffer\n"); > + return -EFAULT; > + } > + do { > + /* > + * step 1, if some bytes remains from last SG, > + * try to complete them to 4 and send that word > + */ > + if (op->nbw > 0) { > + while (op->nbw < 4 && i < areq->nbytes && > + in_i < in_sg->length) { > + op->wb |= (*(u8 *)(src_addr + in_i)) > + << (8 * op->nbw); > + dev_dbg(ss->dev, "%s Complete w=%d wb=%x\n", > + __func__, op->nbw, op->wb); > + i++; > + in_i++; > + op->nbw++; > + } > + if (op->nbw == 4) { > + if (i <= end) { > + ss_writer(op->wb); > + op->byte_count += 4; > + } else { > + op->wait[op->nwait] = op->wb; > + op->nwait++; > + dev_dbg(ss->dev, "%s Keep %u bytes after %llu\n", > + __func__, op->nwait, end); > + } > + op->nbw = 0; > + op->wb = 0; > + } > + } > + /* step 2, main loop, read data 4bytes at a time */ > + while (i < areq->nbytes && in_i < in_sg->length) { > + /* how many bytes we can read, (we need 4) */ > + in_r = min(in_sg->length - in_i, areq->nbytes - i); > + if (in_r < 4) { > + /* Not enough data to write to the device */ > + op->wb = 0; > + while (in_r > 0) { > + op->wb |= (*(u8 *)(src_addr + in_i)) > + << (8 * op->nbw); > + dev_dbg(ss->dev, "%s ending bw=%d wb=%x\n", > + __func__, op->nbw, op->wb); > + in_r--; > + i++; > + in_i++; > + op->nbw++; > + } > + goto nextsg; > + } > + v = *(u32 *)(src_addr + in_i); > + if (i < end) { > + /* last write must be done without relaxed */ > + if (i + 4 >= end) > + ss_writer(v); > + else > + ss_writer_relaxed(v); > + i += 4; > + op->byte_count += 4; > + in_i += 4; > + } else { > + op->wait[op->nwait] = v; > + i += 4; > + in_i += 4; > + op->nwait++; > + dev_dbg(ss->dev, "%s Keep word ww=%u after %llu\n", > + __func__, op->nwait, end); > + if (op->nwait > 15) { > + dev_err(ss->dev, "FATAL: Cannot enqueue more, bug?\n"); > + writel(0, ss->base + SS_CTL); > + mutex_unlock(&ss->lock); > + return -EIO; > + } > + } > + } > +nextsg: > + /* Nothing more to read in this SG */ > + if (in_i == in_sg->length) { > + kunmap(sg_page(in_sg)); > + do { > + in_sg = sg_next(in_sg); > + } while (in_sg != NULL && in_sg->length == 0); > + in_i = 0; > + if (in_sg != NULL) { > + src_addr = kmap(sg_page(in_sg)) + in_sg->offset; > + if (src_addr == NULL) { > + mutex_unlock(&ss->lock); > + dev_err(ss->dev, "ERROR: Cannot kmap source buffer\n"); > + return -EFAULT; > + } > + } > + } > + } while (in_sg != NULL && i < areq->nbytes); > + > + /* ask the device to finish the hashing */ > + writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL); > + i = 0; > + do { > + v = readl(ss->base + SS_CTL); > + i++; > + } while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0); > + if (i >= SS_TIMEOUT) { > + dev_err(ss->dev, "ERROR: %s hash end timeout after %d loop, CTL=%x\n", > + __func__, i, v); > + writel(0, ss->base + SS_CTL); > + mutex_unlock(&ss->lock); > + return -EIO; > + } > + > + /* get the partial hash */ > + if (op->mode == SS_OP_SHA1) { > + for (i = 0; i < 5; i++) > + op->hash[i] = readl(ss->base + SS_MD0 + i * 4); > + } else { > + for (i = 0; i < 4; i++) > + op->hash[i] = readl(ss->base + SS_MD0 + i * 4); > + } > + > + writel(0, ss->base + SS_CTL); > + mutex_unlock(&ss->lock); > + return 0; > +} > + > +/* > + * sunxi_hash_final: finalize hashing operation > + * > + * If we have some remaining bytes, we write them. > + * Then ask the SS for finalizing the hashing operation > + */ > +int sunxi_hash_final(struct ahash_request *areq) > +{ > + u32 v, ivmode = 0; > + unsigned int i; > + int zeros; > + unsigned int index, padlen; > + __be64 bits; > + struct sunxi_req_ctx *op = ahash_request_ctx(areq); > + > + dev_dbg(ss->dev, "%s byte=%llu len=%u mode=%x bw=%u %x h=%x ww=%u", > + __func__, op->byte_count, areq->nbytes, op->mode, > + op->nbw, op->wb, op->hash[0], op->nwait); > + > + mutex_lock(&ss->lock); > + rx_cnt = 0; > + > + /* > + * if we have already writed something, > + * restore the partial hash state > + */ > + if (op->byte_count > 0) { > + ivmode = SS_IV_ARBITRARY; > + for (i = 0; i < 5; i++) > + writel(op->hash[i], ss->base + SS_IV0 + i * 4); > + } > + writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL); > + > + /* write the remaining words of the wait buffer */ > + if (op->nwait > 0) { > + for (i = 0; i < op->nwait; i++) { > + v = op->wait[i]; > + ss_writer(v); > + op->byte_count += 4; > + dev_dbg(ss->dev, "%s write %llu i=%u %x\n", > + __func__, op->byte_count, i, v); > + } > + op->nwait = 0; > + } > + > + /* write the remaining bytes of the nbw buffer */ > + if (op->nbw > 0) { > + op->wb |= ((1 << 7) << (op->nbw * 8)); > + ss_writer(op->wb); > + } else { > + ss_writer((1 << 7)); > + } > + > + /* > + * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1) > + * I take the operations from other md5/sha1 implementations > + */ > + > + /* we have already send 4 more byte of which nbw data */ > + if (op->mode == SS_OP_MD5) { > + index = (op->byte_count + 4) & 0x3f; > + op->byte_count += op->nbw; > + if (index > 56) > + zeros = (120 - index) / 4; > + else > + zeros = (56 - index) / 4; > + } else { > + op->byte_count += op->nbw; > + index = op->byte_count & 0x3f; > + padlen = (index < 56) ? (56 - index) : ((64+56) - index); > + zeros = (padlen - 1) / 4; > + } > + for (i = 0; i < zeros; i++) > + ss_writer(0); > + > + /* write the length of data */ > + if (op->mode == SS_OP_SHA1) { > + bits = cpu_to_be64(op->byte_count << 3); > + ss_writer(bits & 0xffffffff); > + ss_writer((bits >> 32) & 0xffffffff); > + } else { > + ss_writer((op->byte_count << 3) & 0xffffffff); > + ss_writer((op->byte_count >> 29) & 0xffffffff); > + } > + > + /* Tell the SS to stop the hashing */ > + writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL); > + > + /* > + * Wait for SS to finish the hash. > + * The timeout could happend only in case of bad overcloking > + * or driver bug. > + */ > + i = 0; > + do { > + v = readl(ss->base + SS_CTL); > + i++; > + } while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0); > + if (i >= SS_TIMEOUT) { > + dev_err(ss->dev, "ERROR: hash end timeout %d>%d ctl=%x len=%u\n", > + i, SS_TIMEOUT, v, areq->nbytes); > + writel(0, ss->base + SS_CTL); > + mutex_unlock(&ss->lock); > + return -EIO; > + } > + > + /* Get the hash from the device */ > + if (op->mode == SS_OP_SHA1) { > + for (i = 0; i < 5; i++) { > + v = cpu_to_be32(readl(ss->base + SS_MD0 + i * 4)); > + memcpy(areq->result + i * 4, &v, 4); > + } > + } else { > + for (i = 0; i < 4; i++) { > + v = readl(ss->base + SS_MD0 + i * 4); > + memcpy(areq->result + i * 4, &v, 4); > + } > + } > + writel(0, ss->base + SS_CTL); > + mutex_unlock(&ss->lock); > + return 0; > +} > + > +/* sunxi_hash_finup: finalize hashing operation after an update */ > +int sunxi_hash_finup(struct ahash_request *areq) > +{ > + int err; > + > + err = sunxi_hash_update(areq); > + if (err != 0) > + return err; > + > + return sunxi_hash_final(areq); > +} > + > +/* combo of init/update/final functions */ > +int sunxi_hash_digest(struct ahash_request *areq) > +{ > + int err; > + > + err = sunxi_hash_init(areq); > + if (err != 0) > + return err; > + > + err = sunxi_hash_update(areq); > + if (err != 0) > + return err; > + > + return sunxi_hash_final(areq); > +} > diff --git a/drivers/crypto/sunxi-ss/sunxi-ss.h b/drivers/crypto/sunxi-ss/sunxi-ss.h > new file mode 100644 > index 0000000..331e75b > --- /dev/null > +++ b/drivers/crypto/sunxi-ss/sunxi-ss.h > @@ -0,0 +1,193 @@ > +/* > + * sunxi-ss.c - hardware cryptographic accelerator for Allwinner A20 SoC > + * > + * Copyright (C) 2013-2014 Corentin LABBE > + * > + * Support AES cipher with 128,192,256 bits keysize. > + * Support MD5 and SHA1 hash algorithms. > + * Support DES and 3DES > + * > + * You could find the datasheet in Documentation/arm/sunxi/README > + * > + * Licensed under the GPL-2. > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#define SS_CTL 0x00 > +#define SS_KEY0 0x04 > +#define SS_KEY1 0x08 > +#define SS_KEY2 0x0C > +#define SS_KEY3 0x10 > +#define SS_KEY4 0x14 > +#define SS_KEY5 0x18 > +#define SS_KEY6 0x1C > +#define SS_KEY7 0x20 > + > +#define SS_IV0 0x24 > +#define SS_IV1 0x28 > +#define SS_IV2 0x2C > +#define SS_IV3 0x30 > + > +#define SS_CNT0 0x34 > +#define SS_CNT1 0x38 > +#define SS_CNT2 0x3C > +#define SS_CNT3 0x40 > + > +#define SS_FCSR 0x44 > +#define SS_ICSR 0x48 > + > +#define SS_MD0 0x4C > +#define SS_MD1 0x50 > +#define SS_MD2 0x54 > +#define SS_MD3 0x58 > +#define SS_MD4 0x5C > + > +#define SS_RXFIFO 0x200 > +#define SS_TXFIFO 0x204 > + > +/* SS_CTL configuration values */ > + > +/* PRNG generator mode - bit 15 */ > +#define SS_PRNG_ONESHOT (0 << 15) > +#define SS_PRNG_CONTINUE (1 << 15) > + > +/* IV mode for hash */ > +#define SS_IV_ARBITRARY (1 << 14) > + > +/* SS operation mode - bits 12-13 */ > +#define SS_ECB (0 << 12) > +#define SS_CBC (1 << 12) > +#define SS_CNT (2 << 12) > + > +/* Counter width for CNT mode - bits 10-11 */ > +#define SS_CNT_16BITS (0 << 10) > +#define SS_CNT_32BITS (1 << 10) > +#define SS_CNT_64BITS (2 << 10) > + > +/* Key size for AES - bits 8-9 */ > +#define SS_AES_128BITS (0 << 8) > +#define SS_AES_192BITS (1 << 8) > +#define SS_AES_256BITS (2 << 8) > + > +/* Operation direction - bit 7 */ > +#define SS_ENCRYPTION (0 << 7) > +#define SS_DECRYPTION (1 << 7) > + > +/* SS Method - bits 4-6 */ > +#define SS_OP_AES (0 << 4) > +#define SS_OP_DES (1 << 4) > +#define SS_OP_3DES (2 << 4) > +#define SS_OP_SHA1 (3 << 4) > +#define SS_OP_MD5 (4 << 4) > +#define SS_OP_PRNG (5 << 4) > + > +/* Data end bit - bit 2 */ > +#define SS_DATA_END (1 << 2) > + > +/* PRNG start bit - bit 1 */ > +#define SS_PRNG_START (1 << 1) > + > +/* SS Enable bit - bit 0 */ > +#define SS_DISABLED (0 << 0) > +#define SS_ENABLED (1 << 0) > + > +/* SS_FCSR configuration values */ > +/* RX FIFO status - bit 30 */ > +#define SS_RXFIFO_FREE (1 << 30) > + > +/* RX FIFO empty spaces - bits 24-29 */ > +#define SS_RXFIFO_SPACES(val) (((val) >> 24) & 0x3f) > + > +/* TX FIFO status - bit 22 */ > +#define SS_TXFIFO_AVAILABLE (1 << 22) > + > +/* TX FIFO available spaces - bits 16-21 */ > +#define SS_TXFIFO_SPACES(val) (((val) >> 16) & 0x3f) > + > +#define SS_RXFIFO_EMP_INT_PENDING (1 << 10) > +#define SS_TXFIFO_AVA_INT_PENDING (1 << 8) > +#define SS_RXFIFO_EMP_INT_ENABLE (1 << 2) > +#define SS_TXFIFO_AVA_INT_ENABLE (1 << 0) > + > +/* SS_ICSR configuration values */ > +#define SS_ICS_DRQ_ENABLE (1 << 4) > + > +struct sunxi_ss_ctx { > + void __iomem *base; > + int irq; > + struct clk *busclk; > + struct clk *ssclk; > + struct device *dev; > + struct resource *res; > + void *buf_in; /* pointer to data to be uploaded to the device */ > + size_t buf_in_size; /* size of buf_in */ > + void *buf_out; > + size_t buf_out_size; > + struct mutex lock; /* control the use of the device */ > + struct mutex bufout_lock; /* control the use of buf_out*/ > + struct mutex bufin_lock; /* control the sue of buf_in*/ > +}; > + > +struct sunxi_tfm_ctx { > + u32 key[AES_MAX_KEY_SIZE / 4];/* divided by sizeof(u32) */ > + u32 keylen; > + u32 keymode; > +}; > + > +struct sunxi_req_ctx { > + u32 mode; > + u64 byte_count; /* number of bytes "uploaded" to the device */ > + u32 wb; /* a partial word waiting to be completed and > + uploaded to the device */ > + /* number of bytes to be uploaded in the wb word */ > + unsigned int nbw; > + u32 hash[5]; > + u32 wait[64]; > + unsigned int nwait; > +}; > + > +#define SS_SEED_LEN (192/8) > +#define SS_DATA_LEN (160/8) > + > +struct prng_context { > + u32 seed[SS_SEED_LEN/4]; > + unsigned int slen; > +}; > + > +int sunxi_hash_crainit(struct crypto_tfm *tfm); > +int sunxi_hash_init(struct ahash_request *areq); > +int sunxi_hash_update(struct ahash_request *areq); > +int sunxi_hash_final(struct ahash_request *areq); > +int sunxi_hash_finup(struct ahash_request *areq); > +int sunxi_hash_digest(struct ahash_request *areq); > +int sunxi_hash_export(struct ahash_request *areq, void *out); > +int sunxi_hash_import(struct ahash_request *areq, const void *in); > + > +int sunxi_ss_aes_poll(struct ablkcipher_request *areq, u32 mode); > +int sunxi_ss_des_poll(struct ablkcipher_request *areq, u32 mode); > +int sunxi_ss_cipher_init(struct crypto_tfm *tfm); > +int sunxi_ss_cipher_encrypt(struct ablkcipher_request *areq); > +int sunxi_ss_cipher_decrypt(struct ablkcipher_request *areq); > +int sunxi_ss_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, > + unsigned int keylen); > +int sunxi_ss_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key, > + unsigned int keylen); > +int sunxi_ss_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key, > + unsigned int keylen); > -- With best wishes, Vladimir