This adds support for the Imagination Technologies hash
accelerator which provides hardware acceleration for
SHA1 SHA244 SHA256 and MD5 hashes.
Tested on silicon, using testmgr.
Changes from V2:
* This hardware does not support importing a partial hash state,
so the init, update, final and finup have been reworked to use
a fallback driver; only digest remains as hardware accelerated.
* Simplified the driver as a result of the above rework
Changes from V1:
* Addressed review comments from Andrew Bresticker and
Vladimir Zapolskiy
* rebased to current linux-next
James Hartley (2):
crypto: Add Imagination Technologies hw hash accelerator
Documentation: crypto: Add DT binding info for the img hw hash
accelerator
.../devicetree/bindings/crypto/img-hash.txt | 27 +
drivers/crypto/Kconfig | 14 +
drivers/crypto/Makefile | 2 +
drivers/crypto/img-hash.c | 1037 ++++++++++++++++++++
4 files changed, 1080 insertions(+)
create mode 100644 Documentation/devicetree/bindings/crypto/img-hash.txt
create mode 100644 drivers/crypto/img-hash.c
--
1.7.9.5
This adds support for the Imagination Technologies hash
accelerator which provides hardware acceleration for
SHA1 SHA224 SHA256 and MD5 hashes.
Signed-off-by: James Hartley <[email protected]>
---
drivers/crypto/Kconfig | 14 +
drivers/crypto/Makefile | 2 +
drivers/crypto/img-hash.c | 1037 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 1053 insertions(+)
create mode 100644 drivers/crypto/img-hash.c
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 2fb0fdf..c72223e 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -436,4 +436,18 @@ config CRYPTO_DEV_QCE
hardware. To compile this driver as a module, choose M here. The
module will be called qcrypto.
+config CRYPTO_DEV_IMGTEC_HASH
+ depends on MIPS || COMPILE_TEST
+ tristate "Imagination Technologies hardware hash accelerator"
+ select CRYPTO_ALG_API
+ select CRYPTO_MD5
+ select CRYPTO_SHA1
+ select CRYPTO_SHA224
+ select CRYPTO_SHA256
+ select CRYPTO_HASH
+ help
+ This driver interfaces with the Imagination Technologies
+ hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256
+ hashing algorithms.
+
endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 3924f93..fb84be7 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
+obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
@@ -25,3 +26,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
+obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
new file mode 100644
index 0000000..94a3a6f
--- /dev/null
+++ b/drivers/crypto/img-hash.c
@@ -0,0 +1,1037 @@
+/*
+ * Copyright (c) 2014 Imagination Technologies
+ * Authors: Will Thomas, James Hartley
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Interface structure taken from omap-sham driver
+ */
+
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+
+#include <crypto/internal/hash.h>
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+
+#define CR_RESET 0
+#define CR_RESET_SET 1
+#define CR_RESET_UNSET 0
+
+#define CR_MESSAGE_LENGTH_H 0x4
+#define CR_MESSAGE_LENGTH_L 0x8
+
+#define CR_CONTROL 0xc
+#define CR_CONTROL_BYTE_ORDER_3210 0
+#define CR_CONTROL_BYTE_ORDER_0123 1
+#define CR_CONTROL_BYTE_ORDER_2310 2
+#define CR_CONTROL_BYTE_ORDER_1032 3
+#define CR_CONTROL_BYTE_ORDER_SHIFT 8
+#define CR_CONTROL_ALGO_MD5 0
+#define CR_CONTROL_ALGO_SHA1 1
+#define CR_CONTROL_ALGO_SHA224 2
+#define CR_CONTROL_ALGO_SHA256 3
+
+#define CR_INTSTAT 0x10
+#define CR_INTENAB 0x14
+#define CR_INTCLEAR 0x18
+#define CR_INT_RESULTS_AVAILABLE BIT(0)
+#define CR_INT_NEW_RESULTS_SET BIT(1)
+#define CR_INT_RESULT_READ_ERR BIT(2)
+#define CR_INT_MESSAGE_WRITE_ERROR BIT(3)
+#define CR_INT_STATUS BIT(8)
+
+#define CR_RESULT_QUEUE 0x1c
+#define CR_RSD0 0x40
+#define CR_CORE_REV 0x50
+#define CR_CORE_DES1 0x60
+#define CR_CORE_DES2 0x70
+
+#define DRIVER_FLAGS_BUSY BIT(0)
+#define DRIVER_FLAGS_FINAL BIT(1)
+#define DRIVER_FLAGS_DMA_ACTIVE BIT(2)
+#define DRIVER_FLAGS_OUTPUT_READY BIT(3)
+#define DRIVER_FLAGS_INIT BIT(4)
+#define DRIVER_FLAGS_CPU BIT(5)
+#define DRIVER_FLAGS_DMA_READY BIT(6)
+#define DRIVER_FLAGS_ERROR BIT(7)
+#define DRIVER_FLAGS_SG BIT(8)
+#define DRIVER_FLAGS_SHA1 BIT(18)
+#define DRIVER_FLAGS_SHA224 BIT(19)
+#define DRIVER_FLAGS_SHA256 BIT(20)
+#define DRIVER_FLAGS_MD5 BIT(21)
+
+#define IMG_HASH_QUEUE_LENGTH 20
+#define IMG_HASH_DMA_THRESHOLD 64
+
+#ifdef __LITTLE_ENDIAN
+#define IMG_HASH_BYTE_ORDER (CR_CONTROL_BYTE_ORDER_3210)
+#else
+#define IMG_HASH_BYTE_ORDER (CR_CONTROL_BYTE_ORDER_0123)
+#endif
+
+struct img_hash_dev;
+
+struct img_hash_request_ctx {
+ struct img_hash_dev *hdev;
+ u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
+ unsigned long flags;
+ size_t digsize;
+
+ dma_addr_t dma_addr;
+ size_t dma_ct;
+
+ /* sg root */
+ struct scatterlist *sgfirst;
+ /* walk state */
+ struct scatterlist *sg;
+ size_t nents;
+ size_t offset;
+ unsigned int total;
+ size_t sent;
+
+ unsigned long op;
+
+ size_t bufcnt;
+ u8 buffer[0] __aligned(sizeof(u32));
+ struct ahash_request fallback_req;
+};
+
+struct img_hash_ctx {
+ struct img_hash_dev *hdev;
+ unsigned long flags;
+ struct crypto_ahash *fallback;
+};
+
+struct img_hash_dev {
+ struct list_head list;
+ struct device *dev;
+ struct clk *iclk;
+ struct clk *fclk;
+ int irq;
+ void __iomem *io_base;
+
+ phys_addr_t bus_addr;
+ void __iomem *cpu_addr;
+
+ spinlock_t lock;
+ int err;
+ struct tasklet_struct done_task;
+ struct tasklet_struct dma_task;
+
+ unsigned long flags;
+ struct crypto_queue queue;
+ struct ahash_request *req;
+
+ struct dma_chan *dma_lch;
+};
+
+struct img_hash_drv {
+ struct list_head dev_list;
+ spinlock_t lock;
+};
+
+static struct img_hash_drv img_hash = {
+ .dev_list = LIST_HEAD_INIT(img_hash.dev_list),
+ .lock = __SPIN_LOCK_UNLOCKED(img_hash.lock),
+};
+
+static inline u32 img_hash_read(struct img_hash_dev *hdev, u32 offset)
+{
+ return readl_relaxed(hdev->io_base + offset);
+}
+
+static inline void img_hash_write(struct img_hash_dev *hdev,
+ u32 offset, u32 value)
+{
+ writel_relaxed(value, hdev->io_base + offset);
+}
+
+static inline u32 img_hash_read_result_queue(struct img_hash_dev *hdev)
+{
+ return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE));
+}
+
+static void img_hash_start(struct img_hash_dev *hdev, bool dma)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+ u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT;
+
+ if (ctx->flags & DRIVER_FLAGS_MD5)
+ cr |= CR_CONTROL_ALGO_MD5;
+ else if (ctx->flags & DRIVER_FLAGS_SHA1)
+ cr |= CR_CONTROL_ALGO_SHA1;
+ else if (ctx->flags & DRIVER_FLAGS_SHA224)
+ cr |= CR_CONTROL_ALGO_SHA224;
+ else if (ctx->flags & DRIVER_FLAGS_SHA256)
+ cr |= CR_CONTROL_ALGO_SHA256;
+ dev_dbg(hdev->dev, "Starting hash process\n");
+ img_hash_write(hdev, CR_CONTROL, cr);
+
+ /*
+ * The hardware block requires two cycles between writing the control
+ * register and writing the first word of data in non DMA mode, to
+ * ensure the first data write is not grouped in burst with the control
+ * register write a read is issued to 'flush' the bus.
+ */
+ if (!dma)
+ img_hash_read(hdev, CR_CONTROL);
+}
+
+static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf,
+ size_t length, int final)
+{
+ u32 count, len32;
+ const u32 *buffer = (const u32 *)buf;
+
+ dev_dbg(hdev->dev, "xmit_cpu: length: %u bytes\n", length);
+
+ if (final)
+ hdev->flags |= DRIVER_FLAGS_FINAL;
+
+ len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+ for (count = 0; count < len32; count++)
+ writel_relaxed(buffer[count], hdev->cpu_addr);
+
+ return -EINPROGRESS;
+}
+
+static void img_hash_dma_callback(void *data)
+{
+ struct img_hash_dev *hdev = (struct img_hash_dev *)data;
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+ if (ctx->bufcnt) {
+ img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0);
+ ctx->bufcnt = 0;
+ }
+ if (ctx->sg)
+ tasklet_schedule(&hdev->dma_task);
+}
+
+static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg)
+{
+ struct dma_async_tx_descriptor *desc;
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+ ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
+ if (ctx->dma_ct == 0) {
+ dev_err(hdev->dev, "Invalid DMA sg\n");
+ hdev->err = -EINVAL;
+ return -EINVAL;
+ }
+
+ desc = dmaengine_prep_slave_sg(hdev->dma_lch,
+ sg,
+ ctx->dma_ct,
+ DMA_MEM_TO_DEV,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ if (!desc) {
+ dev_err(hdev->dev, "Null DMA descriptor\n");
+ hdev->err = -EINVAL;
+ dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
+ return -EINVAL;
+ }
+ desc->callback = img_hash_dma_callback;
+ desc->callback_param = hdev;
+ dmaengine_submit(desc);
+ dma_async_issue_pending(hdev->dma_lch);
+
+ return 0;
+}
+
+static int img_hash_write_via_cpu(struct img_hash_dev *hdev)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+ int ret = 0;
+
+ ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx->sg),
+ ctx->buffer, hdev->req->nbytes);
+
+ ctx->total = hdev->req->nbytes;
+ ctx->bufcnt = 0;
+
+ hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL);
+
+ img_hash_start(hdev, false);
+
+ if (ctx->total)
+ ret = img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 1);
+ else
+ ret = 0;
+
+ return ret;
+}
+
+static int img_hash_finish(struct ahash_request *req)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+
+ if (!req->result)
+ return -EINVAL;
+
+ memcpy(req->result, ctx->digest, ctx->digsize);
+
+ return 0;
+}
+
+static void img_hash_copy_hash(struct ahash_request *req)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+ u32 *hash = (u32 *)ctx->digest;
+ int i;
+
+ for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--)
+ hash[i] = img_hash_read_result_queue(ctx->hdev);
+}
+
+static void img_hash_finish_req(struct ahash_request *req, int err)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+ struct img_hash_dev *hdev = ctx->hdev;
+
+ if (!err) {
+ img_hash_copy_hash(req);
+ if (DRIVER_FLAGS_FINAL & hdev->flags)
+ err = img_hash_finish(req);
+ } else {
+ dev_warn(hdev->dev, "Hash failed with error %d\n", err);
+ ctx->flags |= DRIVER_FLAGS_ERROR;
+ }
+
+ hdev->flags &= ~(DRIVER_FLAGS_DMA_READY | DRIVER_FLAGS_OUTPUT_READY |
+ DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY | DRIVER_FLAGS_FINAL);
+
+ if (req->base.complete)
+ req->base.complete(&req->base, err);
+}
+
+static int img_hash_write_via_dma(struct img_hash_dev *hdev)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+ img_hash_start(hdev, true);
+
+ dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total);
+
+ if (!ctx->total)
+ hdev->flags |= DRIVER_FLAGS_FINAL;
+
+ hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL;
+
+ tasklet_schedule(&hdev->dma_task);
+
+ return -EINPROGRESS;
+}
+
+static int img_hash_dma_init(struct img_hash_dev *hdev)
+{
+ struct dma_slave_config dma_conf;
+ int err = -EINVAL;
+
+ hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
+ if (!hdev->dma_lch) {
+ dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.\n");
+ return -EBUSY;
+ }
+ dma_conf.direction = DMA_MEM_TO_DEV;
+ dma_conf.dst_addr = hdev->bus_addr;
+ dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ dma_conf.dst_maxburst = 16;
+ dma_conf.device_fc = false;
+
+ err = dmaengine_slave_config(hdev->dma_lch, &dma_conf);
+
+ if (err) {
+ dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
+ dma_release_channel(hdev->dma_lch);
+ return err;
+ }
+ return 0;
+}
+
+static void img_hash_dma_task(unsigned long d)
+{
+ struct img_hash_dev *hdev = (struct img_hash_dev *)d;
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+ u8 *addr;
+ size_t nbytes, bleft, wsend, len, tbc;
+ struct scatterlist tsg;
+
+ if (!ctx->sg)
+ return;
+
+ addr = sg_virt(ctx->sg);
+ nbytes = ctx->sg->length - ctx->offset;
+
+ /* The hash accelerator does not support a data valid mask. This means
+ * that if each dma (i.e. per page) is not a multiple of 4 bytes, the
+ * padding bytes in the last word written by that dma would erroneously
+ * be included in the hash. To avoid this we round down the transfer,
+ * and add the excess to the start of the next dma. It does not matter
+ * that the final dma may not be a multiple of 4 bytes as the hashing
+ * block is programmed to accept the correct number of bytes.
+ */
+
+ bleft = nbytes % 4;
+ wsend = (nbytes / 4);
+
+ if (wsend) {
+ sg_init_one(&tsg, addr + ctx->offset, wsend * 4);
+ if (img_hash_xmit_dma(hdev, &tsg)) {
+ dev_err(hdev->dev, "DMA failed, falling back to CPU");
+ ctx->flags |= DRIVER_FLAGS_CPU;
+ hdev->err = 0;
+ img_hash_xmit_cpu(hdev, addr + ctx->offset,
+ wsend * 4, 0);
+ ctx->sent += wsend * 4;
+ wsend = 0;
+ } else {
+ ctx->sent += wsend * 4;
+ }
+ }
+
+ if (bleft) {
+ ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
+ ctx->buffer, bleft, ctx->sent);
+ tbc = 0;
+ ctx->sg = sg_next(ctx->sg);
+ while (ctx->sg && (ctx->bufcnt < 4)) {
+ len = ctx->sg->length;
+ if (likely(len > (4 - ctx->bufcnt)))
+ len = 4 - ctx->bufcnt;
+ tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
+ ctx->buffer + ctx->bufcnt, len,
+ ctx->sent + ctx->bufcnt);
+ ctx->bufcnt += tbc;
+ if (tbc >= ctx->sg->length) {
+ ctx->sg = sg_next(ctx->sg);
+ tbc = 0;
+ }
+ }
+
+ ctx->sent += ctx->bufcnt;
+ ctx->offset = tbc;
+
+ if (!wsend)
+ img_hash_dma_callback(hdev);
+ } else {
+ ctx->offset = 0;
+ ctx->sg = sg_next(ctx->sg);
+ }
+}
+
+static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev)
+{
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+ if (ctx->flags & DRIVER_FLAGS_SG)
+ dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct, DMA_TO_DEVICE);
+
+ return 0;
+}
+
+static int img_hash_process_data(struct img_hash_dev *hdev)
+{
+ struct ahash_request *req = hdev->req;
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+ int err = 0;
+
+ ctx->bufcnt = 0;
+
+ if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) {
+ dev_dbg(hdev->dev, "process data request(%d bytes) using DMA\n",
+ req->nbytes);
+ err = img_hash_write_via_dma(hdev);
+ } else {
+ dev_dbg(hdev->dev, "process data request(%d bytes) using CPU\n",
+ req->nbytes);
+ err = img_hash_write_via_cpu(hdev);
+ }
+ return err;
+}
+
+static int img_hash_hw_init(struct img_hash_dev *hdev)
+{
+ unsigned long long nbits;
+ u32 u, l;
+ int ret;
+
+ ret = clk_prepare_enable(hdev->iclk);
+ if (ret)
+ return ret;
+
+ img_hash_write(hdev, CR_RESET, CR_RESET_SET);
+ img_hash_write(hdev, CR_RESET, CR_RESET_UNSET);
+ img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET);
+
+ nbits = (hdev->req->nbytes << 3);
+ u = nbits >> 32;
+ l = nbits;
+ img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u);
+ img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l);
+
+ if (!(DRIVER_FLAGS_INIT & hdev->flags)) {
+ hdev->flags |= DRIVER_FLAGS_INIT;
+ hdev->err = 0;
+ }
+ dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits);
+ return 0;
+}
+
+static int img_hash_init(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+ rctx->fallback_req.base.flags = req->base.flags
+ & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ return crypto_ahash_init(&rctx->fallback_req);
+}
+
+static int img_hash_handle_queue(struct img_hash_dev *hdev,
+ struct ahash_request *req)
+{
+ struct crypto_async_request *async_req, *backlog;
+ struct img_hash_request_ctx *ctx;
+ unsigned long flags;
+ int err = 0, res = 0;
+
+ spin_lock_irqsave(&hdev->lock, flags);
+
+ if (req)
+ res = ahash_enqueue_request(&hdev->queue, req);
+
+ if (DRIVER_FLAGS_BUSY & hdev->flags) {
+ spin_unlock_irqrestore(&hdev->lock, flags);
+ return res;
+ }
+
+ backlog = crypto_get_backlog(&hdev->queue);
+ async_req = crypto_dequeue_request(&hdev->queue);
+ if (async_req)
+ hdev->flags |= DRIVER_FLAGS_BUSY;
+
+ spin_unlock_irqrestore(&hdev->lock, flags);
+
+ if (!async_req)
+ return res;
+
+ if (backlog)
+ backlog->complete(backlog, -EINPROGRESS);
+
+ req = ahash_request_cast(async_req);
+ hdev->req = req;
+
+ ctx = ahash_request_ctx(req);
+
+ dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n",
+ ctx->op, req->nbytes);
+
+ err = img_hash_hw_init(hdev);
+
+ if (!err)
+ err = img_hash_process_data(hdev);
+
+ if (err != -EINPROGRESS) {
+ /* done_task will not finish so do it here */
+ img_hash_finish_req(req, err);
+ }
+ return res;
+}
+
+static int img_hash_update(struct ahash_request *req)
+{
+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+ rctx->fallback_req.base.flags = req->base.flags
+ & CRYPTO_TFM_REQ_MAY_SLEEP;
+ rctx->fallback_req.nbytes = req->nbytes;
+ rctx->fallback_req.src = req->src;
+
+ return crypto_ahash_update(&rctx->fallback_req);
+}
+
+static int img_hash_final(struct ahash_request *req)
+{
+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+ rctx->fallback_req.base.flags = req->base.flags
+ & CRYPTO_TFM_REQ_MAY_SLEEP;
+ rctx->fallback_req.result = req->result;
+
+ return crypto_ahash_final(&rctx->fallback_req);
+}
+
+static int img_hash_finup(struct ahash_request *req)
+{
+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+ rctx->fallback_req.base.flags = req->base.flags
+ & CRYPTO_TFM_REQ_MAY_SLEEP;
+ rctx->fallback_req.nbytes = req->nbytes;
+ rctx->fallback_req.src = req->src;
+ rctx->fallback_req.result = req->result;
+
+ return crypto_ahash_finup(&rctx->fallback_req);
+}
+
+static int img_hash_digest(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm);
+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+ struct img_hash_dev *hdev = NULL;
+ struct img_hash_dev *tmp;
+ int err;
+
+ spin_lock_bh(&img_hash.lock);
+ if (!tctx->hdev) {
+ list_for_each_entry(tmp, &img_hash.dev_list, list) {
+ hdev = tmp;
+ break;
+ }
+ tctx->hdev = hdev;
+
+ } else {
+ hdev = tctx->hdev;
+ }
+
+ spin_unlock_bh(&img_hash.lock);
+ ctx->hdev = hdev;
+ ctx->flags = 0;
+ ctx->digsize = crypto_ahash_digestsize(tfm);
+
+ switch (ctx->digsize) {
+ case SHA1_DIGEST_SIZE:
+ ctx->flags |= DRIVER_FLAGS_SHA1;
+ break;
+ case SHA256_DIGEST_SIZE:
+ ctx->flags |= DRIVER_FLAGS_SHA256;
+ break;
+ case SHA224_DIGEST_SIZE:
+ ctx->flags |= DRIVER_FLAGS_SHA224;
+ break;
+ case MD5_DIGEST_SIZE:
+ ctx->flags |= DRIVER_FLAGS_MD5;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ctx->bufcnt = 0;
+ ctx->offset = 0;
+ ctx->sent = 0;
+ ctx->total = req->nbytes;
+ ctx->sg = req->src;
+ ctx->sgfirst = req->src;
+ ctx->nents = sg_nents(ctx->sg);
+
+ err = img_hash_handle_queue(tctx->hdev, req);
+
+ return err;
+}
+
+static int img_hash_cra_init(struct crypto_tfm *tfm)
+{
+ struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+ const char *alg_name = crypto_tfm_alg_name(tfm);
+ int err = -ENOMEM;
+
+ ctx->fallback = crypto_alloc_ahash(alg_name, 0,
+ CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->fallback)) {
+ pr_err("img_hash: Could not load fallback driver.\n");
+ err = PTR_ERR(ctx->fallback);
+ goto err;
+ }
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct img_hash_request_ctx) +
+ IMG_HASH_DMA_THRESHOLD);
+
+ return 0;
+
+err:
+ return err;
+}
+
+static void img_hash_cra_exit(struct crypto_tfm *tfm)
+{
+ struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
+
+ crypto_free_ahash(tctx->fallback);
+}
+
+static irqreturn_t img_irq_handler(int irq, void *dev_id)
+{
+ struct img_hash_dev *hdev = dev_id;
+ u32 reg;
+
+ reg = img_hash_read(hdev, CR_INTSTAT);
+ img_hash_write(hdev, CR_INTCLEAR, reg);
+
+ if (reg & CR_INT_NEW_RESULTS_SET) {
+ dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
+ if (DRIVER_FLAGS_BUSY & hdev->flags) {
+ hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
+ if (!(DRIVER_FLAGS_CPU & hdev->flags))
+ hdev->flags |= DRIVER_FLAGS_DMA_READY;
+ tasklet_schedule(&hdev->done_task);
+ } else {
+ dev_warn(hdev->dev,
+ "HASH interrupt when no active requests.\n");
+ }
+ } else if (reg & CR_INT_RESULTS_AVAILABLE) {
+ dev_warn(hdev->dev,
+ "IRQ triggered before the hash had completed\n");
+ } else if (reg & CR_INT_RESULT_READ_ERR) {
+ dev_warn(hdev->dev,
+ "Attempt to read from an empty result queue\n");
+ } else if (reg & CR_INT_MESSAGE_WRITE_ERROR) {
+ dev_warn(hdev->dev,
+ "Data written before the hardware was configured\n");
+ }
+ return IRQ_HANDLED;
+}
+
+static struct ahash_alg img_algs[] = {
+ {
+ .init = img_hash_init,
+ .update = img_hash_update,
+ .final = img_hash_final,
+ .finup = img_hash_finup,
+ .digest = img_hash_digest,
+ .halg = {
+ .digestsize = MD5_DIGEST_SIZE,
+ .base = {
+ .cra_name = "md5",
+ .cra_driver_name = "img-md5",
+ .cra_priority = 301,
+ .cra_flags =
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct img_hash_ctx),
+ .cra_init = img_hash_cra_init,
+ .cra_exit = img_hash_cra_exit,
+ .cra_module = THIS_MODULE,
+ }
+ }
+ },
+ {
+ .init = img_hash_init,
+ .update = img_hash_update,
+ .final = img_hash_final,
+ .finup = img_hash_finup,
+ .digest = img_hash_digest,
+ .halg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name = "img-sha1",
+ .cra_priority = 3000,
+ .cra_flags =
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct img_hash_ctx),
+ .cra_init = img_hash_cra_init,
+ .cra_exit = img_hash_cra_exit,
+ .cra_module = THIS_MODULE,
+ }
+ }
+ },
+ {
+ .init = img_hash_init,
+ .update = img_hash_update,
+ .final = img_hash_final,
+ .finup = img_hash_finup,
+ .digest = img_hash_digest,
+ .halg = {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "img-sha224",
+ .cra_priority = 3000,
+ .cra_flags =
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct img_hash_ctx),
+ .cra_init = img_hash_cra_init,
+ .cra_exit = img_hash_cra_exit,
+ .cra_module = THIS_MODULE,
+ }
+ }
+ },
+ {
+ .init = img_hash_init,
+ .update = img_hash_update,
+ .final = img_hash_final,
+ .finup = img_hash_finup,
+ .digest = img_hash_digest,
+ .halg = {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "img-sha256",
+ .cra_priority = 301,
+ .cra_flags =
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct img_hash_ctx),
+ .cra_init = img_hash_cra_init,
+ .cra_exit = img_hash_cra_exit,
+ .cra_module = THIS_MODULE,
+ }
+ }
+ }
+};
+
+static int img_register_algs(struct img_hash_dev *hdev)
+{
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(img_algs); i++) {
+ err = crypto_register_ahash(&img_algs[i]);
+ if (err)
+ goto err_reg;
+ }
+ return 0;
+
+err_reg:
+ for (; i--; )
+ crypto_unregister_ahash(&img_algs[i]);
+
+ return err;
+}
+
+static int img_unregister_algs(struct img_hash_dev *hdev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(img_algs); i++)
+ crypto_unregister_ahash(&img_algs[i]);
+ return 0;
+}
+
+static void img_hash_done_task(unsigned long data)
+{
+ struct img_hash_dev *hdev = (struct img_hash_dev *)data;
+ int err = 0;
+
+ if (hdev->err == -EINVAL) {
+ err = hdev->err;
+ goto finish;
+ }
+
+ if (!(DRIVER_FLAGS_BUSY & hdev->flags)) {
+ img_hash_handle_queue(hdev, NULL);
+ return;
+ }
+
+ if (DRIVER_FLAGS_CPU & hdev->flags) {
+ if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
+ hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY;
+ goto finish;
+ }
+ } else if (DRIVER_FLAGS_DMA_READY & hdev->flags) {
+ if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) {
+ hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE;
+ img_hash_write_via_dma_stop(hdev);
+ if (hdev->err) {
+ err = hdev->err;
+ goto finish;
+ }
+ }
+ if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
+ hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
+ DRIVER_FLAGS_OUTPUT_READY);
+ goto finish;
+ }
+ }
+ return;
+
+finish:
+ img_hash_finish_req(hdev->req, err);
+}
+
+static const struct of_device_id img_hash_match[] = {
+ { .compatible = "img,hash-accelerator" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, img_hash_match)
+
+static int img_hash_probe(struct platform_device *pdev)
+{
+ struct img_hash_dev *hdev;
+ struct device *dev = &pdev->dev;
+ struct resource *hash_res;
+ int err;
+
+ hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
+ if (hdev == NULL) {
+ err = -ENOMEM;
+ goto sha_dev_err;
+ }
+ spin_lock_init(&hdev->lock);
+
+ hdev->dev = dev;
+
+ platform_set_drvdata(pdev, hdev);
+
+ INIT_LIST_HEAD(&hdev->list);
+
+ tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned long)hdev);
+ tasklet_init(&hdev->dma_task, img_hash_dma_task, (unsigned long)hdev);
+
+ crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
+
+ /* Register bank */
+ hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+ hdev->io_base = devm_ioremap_resource(dev, hash_res);
+ if (IS_ERR(hdev->io_base)) {
+ dev_err(dev, "can't ioremap\n");
+ err = PTR_ERR(hdev->io_base);
+ goto hash_io_err;
+ }
+
+ /* Write port (DMA or CPU) */
+ hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!hash_res) {
+ dev_err(dev, "no write port resource info\n");
+ err = -ENODEV;
+ goto res_err;
+ }
+ hdev->bus_addr = hash_res->start;
+ hdev->cpu_addr = devm_ioremap_resource(dev, hash_res);
+ if (IS_ERR(hdev->cpu_addr)) {
+ dev_err(dev, "can't ioremap write port\n");
+ err = PTR_ERR(hdev->cpu_addr);
+ goto res_err;
+ }
+
+ hdev->irq = platform_get_irq(pdev, 0);
+ if (hdev->irq < 0) {
+ dev_err(dev, "no IRQ resource info\n");
+ err = hdev->irq;
+ goto res_err;
+ }
+
+ err = devm_request_irq(dev, hdev->irq, img_irq_handler, 0,
+ dev_name(dev), hdev);
+ if (err) {
+ dev_err(dev, "unable to request %s irq\n", dev_name(dev));
+ goto res_err;
+ }
+ dev_dbg(dev, "using IRQ channel %d\n", hdev->irq);
+
+ hdev->iclk = devm_clk_get(&pdev->dev, "hash_clk");
+ if (IS_ERR(hdev->iclk)) {
+ dev_err(dev, "clock initialization failed.\n");
+ err = PTR_ERR(hdev->iclk);
+ goto res_err;
+ }
+
+ hdev->fclk = devm_clk_get(&pdev->dev, "hash_reg_clk");
+ if (IS_ERR(hdev->fclk)) {
+ dev_err(dev, "clock initialization failed.\n");
+ err = PTR_ERR(hdev->fclk);
+ goto res_err;
+ }
+
+ err = img_hash_dma_init(hdev);
+ if (err)
+ goto err_dma;
+
+ dev_dbg(dev, "using %s for DMA transfers\n",
+ dma_chan_name(hdev->dma_lch));
+
+ spin_lock(&img_hash.lock);
+ list_add_tail(&hdev->list, &img_hash.dev_list);
+ spin_unlock(&img_hash.lock);
+
+ err = img_register_algs(hdev);
+ if (err)
+ goto err_algs;
+ dev_info(dev, "data bus: 0x%x;\tsize:0x%x\n", hdev->bus_addr, 0x4);
+ dev_info(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n");
+
+ return 0;
+
+err_algs:
+ spin_lock(&img_hash.lock);
+ list_del(&hdev->list);
+ spin_unlock(&img_hash.lock);
+ dma_release_channel(hdev->dma_lch);
+err_dma:
+hash_io_err:
+ devm_clk_put(dev, hdev->iclk);
+ devm_clk_put(dev, hdev->fclk);
+res_err:
+ tasklet_kill(&hdev->done_task);
+ tasklet_kill(&hdev->dma_task);
+sha_dev_err:
+ dev_err(dev, "initialization failed.\n");
+ return err;
+}
+
+static int img_hash_remove(struct platform_device *pdev)
+{
+ static struct img_hash_dev *hdev;
+
+ hdev = platform_get_drvdata(pdev);
+ spin_lock(&img_hash.lock);
+ list_del(&hdev->list);
+ spin_unlock(&img_hash.lock);
+
+ img_unregister_algs(hdev);
+
+ tasklet_kill(&hdev->done_task);
+ tasklet_kill(&hdev->dma_task);
+
+ dma_release_channel(hdev->dma_lch);
+
+ clk_disable_unprepare(hdev->iclk);
+ clk_disable_unprepare(hdev->fclk);
+ return 0;
+}
+
+static struct platform_driver img_hash_driver = {
+ .probe = img_hash_probe,
+ .remove = img_hash_remove,
+ .driver = {
+ .name = "img-hash-accelerator",
+ .of_match_table = of_match_ptr(img_hash_match),
+ }
+};
+module_platform_driver(img_hash_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator driver");
+MODULE_AUTHOR("Will Thomas.");
+MODULE_AUTHOR("James Hartley.");
--
1.7.9.5
This adds the binding documentation for the Imagination Technologies
hash accelerator that provides hardware acceleration for
SHA1/SHA224/SHA256/MD5 hashes. This hardware will be present in
the upcoming pistachio SoC.
Signed-off-by: James Hartley <[email protected]>
---
.../devicetree/bindings/crypto/img-hash.txt | 27 ++++++++++++++++++++
1 file changed, 27 insertions(+)
create mode 100644 Documentation/devicetree/bindings/crypto/img-hash.txt
diff --git a/Documentation/devicetree/bindings/crypto/img-hash.txt b/Documentation/devicetree/bindings/crypto/img-hash.txt
new file mode 100644
index 0000000..7adc519
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/img-hash.txt
@@ -0,0 +1,27 @@
+Imagination Technologies hardware hash accelerator
+
+The hash accelerator provides hardware hashing acceleration for
+SHA1, SHA224, SHA256 and MD5 hashes
+
+Required properties:
+
+- compatible : "img,hash-accelerator"
+- reg : Offset and length of the register set for the module, and the DMA port
+- interrupts : The designated IRQ line for the hashing module.
+- dmas : DMA specifier as per Documentation/devicetree/bindings/dma/dma.txt
+- dma-names : Should be "tx"
+- clocks : Clock specifiers
+- clock-names : "hash_clk" Used to clock data through the accelerator
+ "hash_reg_clk" Used to clock the hash block registers
+
+Example:
+
+ hash: hash@18149600 {
+ compatible = "img,hash-accelerator";
+ reg = <0x18149600 0x100, 0x18101100 0x4>;
+ interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&dma 8 0xffffffff>;
+ dma-names = "tx";
+ clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>;
+ clock-names = "hash_clk, hash_reg_clk";
+ };
--
1.7.9.5
Hi James,
On Thu, Mar 5, 2015 at 7:01 PM, James Hartley <[email protected]> wrote:
> This adds the binding documentation for the Imagination Technologies
> hash accelerator that provides hardware acceleration for
> SHA1/SHA224/SHA256/MD5 hashes. This hardware will be present in
> the upcoming pistachio SoC.
>
> Signed-off-by: James Hartley <[email protected]>
> diff --git a/Documentation/devicetree/bindings/crypto/img-hash.txt b/Documentation/devicetree/bindings/crypto/img-hash.txt
> new file mode 100644
> index 0000000..7adc519
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/crypto/img-hash.txt
> @@ -0,0 +1,27 @@
> +Imagination Technologies hardware hash accelerator
> +
> +The hash accelerator provides hardware hashing acceleration for
> +SHA1, SHA224, SHA256 and MD5 hashes
> +
> +Required properties:
> +
> +- compatible : "img,hash-accelerator"
> +- reg : Offset and length of the register set for the module, and the DMA port
> +- interrupts : The designated IRQ line for the hashing module.
> +- dmas : DMA specifier as per Documentation/devicetree/bindings/dma/dma.txt
> +- dma-names : Should be "tx"
> +- clocks : Clock specifiers
> +- clock-names : "hash_clk" Used to clock data through the accelerator
> + "hash_reg_clk" Used to clock the hash block registers
For the other IMG drivers that have been submitted, we've been using
"sys" as the name for the register gate clock. Maybe we should do
that here too to be consistent?
> +Example:
> +
> + hash: hash@18149600 {
> + compatible = "img,hash-accelerator";
> + reg = <0x18149600 0x100, 0x18101100 0x4>;
> + interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
> + dmas = <&dma 8 0xffffffff>;
> + dma-names = "tx";
> + clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>;
> + clock-names = "hash_clk, hash_reg_clk";
I think these are flipped (and you're missing some quotation marks).
Otherwise this looks good to me.
-Andrew
Hi James,
On Thu, Mar 5, 2015 at 7:01 PM, James Hartley <[email protected]> wrote:
> This adds support for the Imagination Technologies hash
> accelerator which provides hardware acceleration for
> SHA1 SHA224 SHA256 and MD5 hashes.
>
> Signed-off-by: James Hartley <[email protected]>
Some general comments below, I'll leave review of the crypto-specific
stuff to Herbert.
> diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
> index 3924f93..fb84be7 100644
> --- a/drivers/crypto/Makefile
> +++ b/drivers/crypto/Makefile
> @@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
> obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
> obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
> obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
> +obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
> obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
> obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
> obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
> @@ -25,3 +26,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
> obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
> obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
> obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
> +obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
Unrelated change - perhaps a bad merge conflict resolution?
> diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
> new file mode 100644
> index 0000000..94a3a6f
> --- /dev/null
> +++ b/drivers/crypto/img-hash.c
> @@ -0,0 +1,1037 @@
> +/*
> + * Copyright (c) 2014 Imagination Technologies
> + * Authors: Will Thomas, James Hartley
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as published
> + * by the Free Software Foundation.
> + *
> + * Interface structure taken from omap-sham driver
> + */
> +
> +#include <linux/clk.h>
> +#include <linux/dmaengine.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/of_device.h>
> +#include <linux/platform_device.h>
> +#include <linux/scatterlist.h>
> +
> +#include <crypto/internal/hash.h>
> +#include <crypto/md5.h>
> +#include <crypto/sha.h>
> +
> +#define CR_RESET 0
> +#define CR_RESET_SET 1
> +#define CR_RESET_UNSET 0
> +
> +#define CR_MESSAGE_LENGTH_H 0x4
> +#define CR_MESSAGE_LENGTH_L 0x8
> +
> +#define CR_CONTROL 0xc
> +#define CR_CONTROL_BYTE_ORDER_3210 0
> +#define CR_CONTROL_BYTE_ORDER_0123 1
> +#define CR_CONTROL_BYTE_ORDER_2310 2
> +#define CR_CONTROL_BYTE_ORDER_1032 3
> +#define CR_CONTROL_BYTE_ORDER_SHIFT 8
> +#define CR_CONTROL_ALGO_MD5 0
> +#define CR_CONTROL_ALGO_SHA1 1
> +#define CR_CONTROL_ALGO_SHA224 2
> +#define CR_CONTROL_ALGO_SHA256 3
> +
> +#define CR_INTSTAT 0x10
> +#define CR_INTENAB 0x14
> +#define CR_INTCLEAR 0x18
> +#define CR_INT_RESULTS_AVAILABLE BIT(0)
> +#define CR_INT_NEW_RESULTS_SET BIT(1)
> +#define CR_INT_RESULT_READ_ERR BIT(2)
> +#define CR_INT_MESSAGE_WRITE_ERROR BIT(3)
> +#define CR_INT_STATUS BIT(8)
> +
> +#define CR_RESULT_QUEUE 0x1c
> +#define CR_RSD0 0x40
> +#define CR_CORE_REV 0x50
> +#define CR_CORE_DES1 0x60
> +#define CR_CORE_DES2 0x70
> +
> +#define DRIVER_FLAGS_BUSY BIT(0)
> +#define DRIVER_FLAGS_FINAL BIT(1)
> +#define DRIVER_FLAGS_DMA_ACTIVE BIT(2)
> +#define DRIVER_FLAGS_OUTPUT_READY BIT(3)
> +#define DRIVER_FLAGS_INIT BIT(4)
> +#define DRIVER_FLAGS_CPU BIT(5)
> +#define DRIVER_FLAGS_DMA_READY BIT(6)
> +#define DRIVER_FLAGS_ERROR BIT(7)
> +#define DRIVER_FLAGS_SG BIT(8)
> +#define DRIVER_FLAGS_SHA1 BIT(18)
> +#define DRIVER_FLAGS_SHA224 BIT(19)
> +#define DRIVER_FLAGS_SHA256 BIT(20)
> +#define DRIVER_FLAGS_MD5 BIT(21)
> +
> +#define IMG_HASH_QUEUE_LENGTH 20
> +#define IMG_HASH_DMA_THRESHOLD 64
> +
> +#ifdef __LITTLE_ENDIAN
> +#define IMG_HASH_BYTE_ORDER (CR_CONTROL_BYTE_ORDER_3210)
> +#else
> +#define IMG_HASH_BYTE_ORDER (CR_CONTROL_BYTE_ORDER_0123)
> +#endif
Unnecessary parenthesis.
> +struct img_hash_dev;
> +
> +struct img_hash_request_ctx {
> + struct img_hash_dev *hdev;
> + u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
> + unsigned long flags;
> + size_t digsize;
> +
> + dma_addr_t dma_addr;
> + size_t dma_ct;
> +
> + /* sg root */
> + struct scatterlist *sgfirst;
> + /* walk state */
> + struct scatterlist *sg;
> + size_t nents;
> + size_t offset;
> + unsigned int total;
> + size_t sent;
> +
> + unsigned long op;
> +
> + size_t bufcnt;
> + u8 buffer[0] __aligned(sizeof(u32));
> + struct ahash_request fallback_req;
> +};
> +
> +struct img_hash_ctx {
> + struct img_hash_dev *hdev;
> + unsigned long flags;
> + struct crypto_ahash *fallback;
> +};
> +
> +struct img_hash_dev {
> + struct list_head list;
> + struct device *dev;
> + struct clk *iclk;
> + struct clk *fclk;
Maybe make these names a little more obvious so it's clear which clock is which.
> + int irq;
This isn't used outside of probe(), so it need not be carried around
in this struct.
> + void __iomem *io_base;
> +
> + phys_addr_t bus_addr;
> + void __iomem *cpu_addr;
> +
> + spinlock_t lock;
> + int err;
> + struct tasklet_struct done_task;
> + struct tasklet_struct dma_task;
> +
> + unsigned long flags;
> + struct crypto_queue queue;
> + struct ahash_request *req;
> +
> + struct dma_chan *dma_lch;
> +};
> +
> +struct img_hash_drv {
> + struct list_head dev_list;
> + spinlock_t lock;
> +};
> +
> +static struct img_hash_drv img_hash = {
> + .dev_list = LIST_HEAD_INIT(img_hash.dev_list),
> + .lock = __SPIN_LOCK_UNLOCKED(img_hash.lock),
> +};
> +
> +static inline u32 img_hash_read(struct img_hash_dev *hdev, u32 offset)
> +{
> + return readl_relaxed(hdev->io_base + offset);
> +}
> +
> +static inline void img_hash_write(struct img_hash_dev *hdev,
> + u32 offset, u32 value)
> +{
> + writel_relaxed(value, hdev->io_base + offset);
> +}
> +
> +static inline u32 img_hash_read_result_queue(struct img_hash_dev *hdev)
> +{
> + return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE));
> +}
> +
> +static void img_hash_start(struct img_hash_dev *hdev, bool dma)
> +{
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> + u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT;
> +
> + if (ctx->flags & DRIVER_FLAGS_MD5)
> + cr |= CR_CONTROL_ALGO_MD5;
> + else if (ctx->flags & DRIVER_FLAGS_SHA1)
> + cr |= CR_CONTROL_ALGO_SHA1;
> + else if (ctx->flags & DRIVER_FLAGS_SHA224)
> + cr |= CR_CONTROL_ALGO_SHA224;
> + else if (ctx->flags & DRIVER_FLAGS_SHA256)
> + cr |= CR_CONTROL_ALGO_SHA256;
> + dev_dbg(hdev->dev, "Starting hash process\n");
> + img_hash_write(hdev, CR_CONTROL, cr);
> +
> + /*
> + * The hardware block requires two cycles between writing the control
> + * register and writing the first word of data in non DMA mode, to
> + * ensure the first data write is not grouped in burst with the control
> + * register write a read is issued to 'flush' the bus.
> + */
> + if (!dma)
> + img_hash_read(hdev, CR_CONTROL);
> +}
> +
> +static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf,
> + size_t length, int final)
> +{
> + u32 count, len32;
> + const u32 *buffer = (const u32 *)buf;
> +
> + dev_dbg(hdev->dev, "xmit_cpu: length: %u bytes\n", length);
> +
> + if (final)
> + hdev->flags |= DRIVER_FLAGS_FINAL;
> +
> + len32 = DIV_ROUND_UP(length, sizeof(u32));
> +
> + for (count = 0; count < len32; count++)
> + writel_relaxed(buffer[count], hdev->cpu_addr);
> +
> + return -EINPROGRESS;
> +}
> +
> +static void img_hash_dma_callback(void *data)
> +{
> + struct img_hash_dev *hdev = (struct img_hash_dev *)data;
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> +
> + if (ctx->bufcnt) {
> + img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0);
> + ctx->bufcnt = 0;
> + }
> + if (ctx->sg)
> + tasklet_schedule(&hdev->dma_task);
> +}
> +
> +static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg)
> +{
> + struct dma_async_tx_descriptor *desc;
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> +
> + ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
> + if (ctx->dma_ct == 0) {
> + dev_err(hdev->dev, "Invalid DMA sg\n");
> + hdev->err = -EINVAL;
> + return -EINVAL;
> + }
> +
> + desc = dmaengine_prep_slave_sg(hdev->dma_lch,
> + sg,
Line this up (and the rest of this statement) with the open paren of
dmaengine_prep_slave_sg().
> + ctx->dma_ct,
> + DMA_MEM_TO_DEV,
> + DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> + if (!desc) {
> + dev_err(hdev->dev, "Null DMA descriptor\n");
> + hdev->err = -EINVAL;
> + dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
> + return -EINVAL;
> + }
> + desc->callback = img_hash_dma_callback;
> + desc->callback_param = hdev;
> + dmaengine_submit(desc);
> + dma_async_issue_pending(hdev->dma_lch);
> +
> + return 0;
> +}
> +
> +static int img_hash_write_via_cpu(struct img_hash_dev *hdev)
> +{
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> + int ret = 0;
> +
> + ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx->sg),
> + ctx->buffer, hdev->req->nbytes);
> +
> + ctx->total = hdev->req->nbytes;
> + ctx->bufcnt = 0;
> +
> + hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL);
> +
> + img_hash_start(hdev, false);
> +
> + if (ctx->total)
> + ret = img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 1);
> + else
> + ret = 0;
The else block here (and possibly the entire check) seems unnecessary.
img_hash_xmit_cpu() won't do anything if the length is 0.
> +
> + return ret;
> +}
> +
> +static int img_hash_finish(struct ahash_request *req)
> +{
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> +
> + if (!req->result)
> + return -EINVAL;
> +
> + memcpy(req->result, ctx->digest, ctx->digsize);
> +
> + return 0;
> +}
> +
> +static void img_hash_copy_hash(struct ahash_request *req)
> +{
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> + u32 *hash = (u32 *)ctx->digest;
> + int i;
> +
> + for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--)
> + hash[i] = img_hash_read_result_queue(ctx->hdev);
> +}
> +
> +static void img_hash_finish_req(struct ahash_request *req, int err)
> +{
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> + struct img_hash_dev *hdev = ctx->hdev;
> +
> + if (!err) {
> + img_hash_copy_hash(req);
> + if (DRIVER_FLAGS_FINAL & hdev->flags)
> + err = img_hash_finish(req);
> + } else {
> + dev_warn(hdev->dev, "Hash failed with error %d\n", err);
> + ctx->flags |= DRIVER_FLAGS_ERROR;
> + }
> +
> + hdev->flags &= ~(DRIVER_FLAGS_DMA_READY | DRIVER_FLAGS_OUTPUT_READY |
> + DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY | DRIVER_FLAGS_FINAL);
> +
> + if (req->base.complete)
> + req->base.complete(&req->base, err);
> +}
> +
> +static int img_hash_write_via_dma(struct img_hash_dev *hdev)
> +{
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> +
> + img_hash_start(hdev, true);
> +
> + dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total);
> +
> + if (!ctx->total)
> + hdev->flags |= DRIVER_FLAGS_FINAL;
> +
> + hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL;
> +
> + tasklet_schedule(&hdev->dma_task);
> +
> + return -EINPROGRESS;
> +}
> +
> +static int img_hash_dma_init(struct img_hash_dev *hdev)
> +{
> + struct dma_slave_config dma_conf;
> + int err = -EINVAL;
> +
> + hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
> + if (!hdev->dma_lch) {
> + dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.\n");
> + return -EBUSY;
> + }
> + dma_conf.direction = DMA_MEM_TO_DEV;
> + dma_conf.dst_addr = hdev->bus_addr;
> + dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> + dma_conf.dst_maxburst = 16;
> + dma_conf.device_fc = false;
> +
> + err = dmaengine_slave_config(hdev->dma_lch, &dma_conf);
> +
> + if (err) {
> + dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
> + dma_release_channel(hdev->dma_lch);
> + return err;
> + }
> + return 0;
nit: I would expect there to be a newline before the return rather
than before the if().
> +}
> +
> +static void img_hash_dma_task(unsigned long d)
> +{
> + struct img_hash_dev *hdev = (struct img_hash_dev *)d;
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> + u8 *addr;
> + size_t nbytes, bleft, wsend, len, tbc;
> + struct scatterlist tsg;
> +
> + if (!ctx->sg)
> + return;
> +
> + addr = sg_virt(ctx->sg);
> + nbytes = ctx->sg->length - ctx->offset;
> +
> + /* The hash accelerator does not support a data valid mask. This means
> + * that if each dma (i.e. per page) is not a multiple of 4 bytes, the
> + * padding bytes in the last word written by that dma would erroneously
> + * be included in the hash. To avoid this we round down the transfer,
> + * and add the excess to the start of the next dma. It does not matter
> + * that the final dma may not be a multiple of 4 bytes as the hashing
> + * block is programmed to accept the correct number of bytes.
> + */
nit: Multi-line comment style is:
/*
* blah blah blah
*/
> +
> + bleft = nbytes % 4;
> + wsend = (nbytes / 4);
> +
> + if (wsend) {
> + sg_init_one(&tsg, addr + ctx->offset, wsend * 4);
> + if (img_hash_xmit_dma(hdev, &tsg)) {
> + dev_err(hdev->dev, "DMA failed, falling back to CPU");
> + ctx->flags |= DRIVER_FLAGS_CPU;
> + hdev->err = 0;
> + img_hash_xmit_cpu(hdev, addr + ctx->offset,
> + wsend * 4, 0);
> + ctx->sent += wsend * 4;
> + wsend = 0;
> + } else {
> + ctx->sent += wsend * 4;
> + }
> + }
> +
> + if (bleft) {
> + ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
> + ctx->buffer, bleft, ctx->sent);
> + tbc = 0;
> + ctx->sg = sg_next(ctx->sg);
> + while (ctx->sg && (ctx->bufcnt < 4)) {
> + len = ctx->sg->length;
> + if (likely(len > (4 - ctx->bufcnt)))
> + len = 4 - ctx->bufcnt;
> + tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
> + ctx->buffer + ctx->bufcnt, len,
> + ctx->sent + ctx->bufcnt);
> + ctx->bufcnt += tbc;
> + if (tbc >= ctx->sg->length) {
> + ctx->sg = sg_next(ctx->sg);
> + tbc = 0;
> + }
> + }
> +
> + ctx->sent += ctx->bufcnt;
> + ctx->offset = tbc;
> +
> + if (!wsend)
> + img_hash_dma_callback(hdev);
> + } else {
> + ctx->offset = 0;
> + ctx->sg = sg_next(ctx->sg);
> + }
> +}
> +
> +static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev)
> +{
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> +
> + if (ctx->flags & DRIVER_FLAGS_SG)
> + dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct, DMA_TO_DEVICE);
> +
> + return 0;
> +}
> +
> +static int img_hash_process_data(struct img_hash_dev *hdev)
> +{
> + struct ahash_request *req = hdev->req;
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> + int err = 0;
> +
> + ctx->bufcnt = 0;
> +
> + if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) {
> + dev_dbg(hdev->dev, "process data request(%d bytes) using DMA\n",
> + req->nbytes);
> + err = img_hash_write_via_dma(hdev);
> + } else {
> + dev_dbg(hdev->dev, "process data request(%d bytes) using CPU\n",
> + req->nbytes);
> + err = img_hash_write_via_cpu(hdev);
> + }
> + return err;
> +}
> +
> +static int img_hash_hw_init(struct img_hash_dev *hdev)
> +{
> + unsigned long long nbits;
> + u32 u, l;
> + int ret;
> +
> + ret = clk_prepare_enable(hdev->iclk);
It looks like there's no corresponding clk_disable_unprepare() to
this, so the prepare/enable counts will just keep increasing. Perhaps
it would be better to manage enabling/disabling the clocks with
runtime PM?
> + if (ret)
> + return ret;
> +
> + img_hash_write(hdev, CR_RESET, CR_RESET_SET);
> + img_hash_write(hdev, CR_RESET, CR_RESET_UNSET);
> + img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET);
> +
> + nbits = (hdev->req->nbytes << 3);
> + u = nbits >> 32;
> + l = nbits;
> + img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u);
> + img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l);
> +
> + if (!(DRIVER_FLAGS_INIT & hdev->flags)) {
> + hdev->flags |= DRIVER_FLAGS_INIT;
> + hdev->err = 0;
> + }
> + dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits);
> + return 0;
> +}
> +
> +static int img_hash_init(struct ahash_request *req)
> +{
> + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> +
> + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> + rctx->fallback_req.base.flags = req->base.flags
> + & CRYPTO_TFM_REQ_MAY_SLEEP;
> +
> + return crypto_ahash_init(&rctx->fallback_req);
> +}
> +
> +static int img_hash_handle_queue(struct img_hash_dev *hdev,
> + struct ahash_request *req)
> +{
> + struct crypto_async_request *async_req, *backlog;
> + struct img_hash_request_ctx *ctx;
> + unsigned long flags;
> + int err = 0, res = 0;
> +
> + spin_lock_irqsave(&hdev->lock, flags);
> +
> + if (req)
> + res = ahash_enqueue_request(&hdev->queue, req);
> +
> + if (DRIVER_FLAGS_BUSY & hdev->flags) {
> + spin_unlock_irqrestore(&hdev->lock, flags);
> + return res;
> + }
> +
> + backlog = crypto_get_backlog(&hdev->queue);
> + async_req = crypto_dequeue_request(&hdev->queue);
> + if (async_req)
> + hdev->flags |= DRIVER_FLAGS_BUSY;
> +
> + spin_unlock_irqrestore(&hdev->lock, flags);
> +
> + if (!async_req)
> + return res;
> +
> + if (backlog)
> + backlog->complete(backlog, -EINPROGRESS);
> +
> + req = ahash_request_cast(async_req);
> + hdev->req = req;
> +
> + ctx = ahash_request_ctx(req);
> +
> + dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n",
> + ctx->op, req->nbytes);
> +
> + err = img_hash_hw_init(hdev);
> +
> + if (!err)
> + err = img_hash_process_data(hdev);
> +
> + if (err != -EINPROGRESS) {
> + /* done_task will not finish so do it here */
> + img_hash_finish_req(req, err);
> + }
> + return res;
> +}
> +
> +static int img_hash_update(struct ahash_request *req)
> +{
> + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> +
> + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> + rctx->fallback_req.base.flags = req->base.flags
> + & CRYPTO_TFM_REQ_MAY_SLEEP;
> + rctx->fallback_req.nbytes = req->nbytes;
> + rctx->fallback_req.src = req->src;
> +
> + return crypto_ahash_update(&rctx->fallback_req);
> +}
> +
> +static int img_hash_final(struct ahash_request *req)
> +{
> + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> +
> + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> + rctx->fallback_req.base.flags = req->base.flags
> + & CRYPTO_TFM_REQ_MAY_SLEEP;
> + rctx->fallback_req.result = req->result;
> +
> + return crypto_ahash_final(&rctx->fallback_req);
> +}
> +
> +static int img_hash_finup(struct ahash_request *req)
> +{
> + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> +
> + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> + rctx->fallback_req.base.flags = req->base.flags
> + & CRYPTO_TFM_REQ_MAY_SLEEP;
> + rctx->fallback_req.nbytes = req->nbytes;
> + rctx->fallback_req.src = req->src;
> + rctx->fallback_req.result = req->result;
> +
> + return crypto_ahash_finup(&rctx->fallback_req);
> +}
> +
> +static int img_hash_digest(struct ahash_request *req)
> +{
> + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> + struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm);
> + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> + struct img_hash_dev *hdev = NULL;
> + struct img_hash_dev *tmp;
> + int err;
> +
> + spin_lock_bh(&img_hash.lock);
Why spin_{lock,unlock}_bh() here?
> + if (!tctx->hdev) {
> + list_for_each_entry(tmp, &img_hash.dev_list, list) {
> + hdev = tmp;
> + break;
> + }
> + tctx->hdev = hdev;
> +
> + } else {
> + hdev = tctx->hdev;
> + }
> +
> + spin_unlock_bh(&img_hash.lock);
> + ctx->hdev = hdev;
> + ctx->flags = 0;
> + ctx->digsize = crypto_ahash_digestsize(tfm);
> +
> + switch (ctx->digsize) {
> + case SHA1_DIGEST_SIZE:
> + ctx->flags |= DRIVER_FLAGS_SHA1;
> + break;
> + case SHA256_DIGEST_SIZE:
> + ctx->flags |= DRIVER_FLAGS_SHA256;
> + break;
> + case SHA224_DIGEST_SIZE:
> + ctx->flags |= DRIVER_FLAGS_SHA224;
> + break;
> + case MD5_DIGEST_SIZE:
> + ctx->flags |= DRIVER_FLAGS_MD5;
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + ctx->bufcnt = 0;
> + ctx->offset = 0;
> + ctx->sent = 0;
> + ctx->total = req->nbytes;
> + ctx->sg = req->src;
> + ctx->sgfirst = req->src;
> + ctx->nents = sg_nents(ctx->sg);
> +
> + err = img_hash_handle_queue(tctx->hdev, req);
> +
> + return err;
> +}
> +
> +static int img_hash_cra_init(struct crypto_tfm *tfm)
> +{
> + struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
> + const char *alg_name = crypto_tfm_alg_name(tfm);
> + int err = -ENOMEM;
> +
> + ctx->fallback = crypto_alloc_ahash(alg_name, 0,
> + CRYPTO_ALG_NEED_FALLBACK);
> + if (IS_ERR(ctx->fallback)) {
> + pr_err("img_hash: Could not load fallback driver.\n");
> + err = PTR_ERR(ctx->fallback);
> + goto err;
> + }
> + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> + sizeof(struct img_hash_request_ctx) +
> + IMG_HASH_DMA_THRESHOLD);
> +
> + return 0;
> +
> +err:
> + return err;
> +}
> +
> +static void img_hash_cra_exit(struct crypto_tfm *tfm)
> +{
> + struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
> +
> + crypto_free_ahash(tctx->fallback);
> +}
> +
> +static irqreturn_t img_irq_handler(int irq, void *dev_id)
> +{
> + struct img_hash_dev *hdev = dev_id;
> + u32 reg;
> +
> + reg = img_hash_read(hdev, CR_INTSTAT);
> + img_hash_write(hdev, CR_INTCLEAR, reg);
> +
> + if (reg & CR_INT_NEW_RESULTS_SET) {
> + dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
> + if (DRIVER_FLAGS_BUSY & hdev->flags) {
> + hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
> + if (!(DRIVER_FLAGS_CPU & hdev->flags))
> + hdev->flags |= DRIVER_FLAGS_DMA_READY;
> + tasklet_schedule(&hdev->done_task);
Since this done_task only gets scheduled from here, why not make this
a threaded IRQ handler and just do the work here instead of separating
it between a hard IRQ handler and a tasklet?
> + } else {
> + dev_warn(hdev->dev,
> + "HASH interrupt when no active requests.\n");
> + }
> + } else if (reg & CR_INT_RESULTS_AVAILABLE) {
> + dev_warn(hdev->dev,
> + "IRQ triggered before the hash had completed\n");
> + } else if (reg & CR_INT_RESULT_READ_ERR) {
> + dev_warn(hdev->dev,
> + "Attempt to read from an empty result queue\n");
> + } else if (reg & CR_INT_MESSAGE_WRITE_ERROR) {
> + dev_warn(hdev->dev,
> + "Data written before the hardware was configured\n");
> + }
> + return IRQ_HANDLED;
> +}
> +
> +static struct ahash_alg img_algs[] = {
> + {
> + .init = img_hash_init,
> + .update = img_hash_update,
> + .final = img_hash_final,
> + .finup = img_hash_finup,
> + .digest = img_hash_digest,
> + .halg = {
> + .digestsize = MD5_DIGEST_SIZE,
> + .base = {
> + .cra_name = "md5",
> + .cra_driver_name = "img-md5",
> + .cra_priority = 301,
> + .cra_flags =
> + CRYPTO_ALG_ASYNC |
> + CRYPTO_ALG_NEED_FALLBACK,
> + .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
> + .cra_ctxsize = sizeof(struct img_hash_ctx),
> + .cra_init = img_hash_cra_init,
> + .cra_exit = img_hash_cra_exit,
> + .cra_module = THIS_MODULE,
> + }
> + }
> + },
> + {
> + .init = img_hash_init,
> + .update = img_hash_update,
> + .final = img_hash_final,
> + .finup = img_hash_finup,
> + .digest = img_hash_digest,
> + .halg = {
> + .digestsize = SHA1_DIGEST_SIZE,
> + .base = {
> + .cra_name = "sha1",
> + .cra_driver_name = "img-sha1",
> + .cra_priority = 3000,
> + .cra_flags =
> + CRYPTO_ALG_ASYNC |
> + CRYPTO_ALG_NEED_FALLBACK,
> + .cra_blocksize = SHA1_BLOCK_SIZE,
> + .cra_ctxsize = sizeof(struct img_hash_ctx),
> + .cra_init = img_hash_cra_init,
> + .cra_exit = img_hash_cra_exit,
> + .cra_module = THIS_MODULE,
> + }
> + }
> + },
> + {
> + .init = img_hash_init,
> + .update = img_hash_update,
> + .final = img_hash_final,
> + .finup = img_hash_finup,
> + .digest = img_hash_digest,
> + .halg = {
> + .digestsize = SHA224_DIGEST_SIZE,
> + .base = {
> + .cra_name = "sha224",
> + .cra_driver_name = "img-sha224",
> + .cra_priority = 3000,
> + .cra_flags =
> + CRYPTO_ALG_ASYNC |
> + CRYPTO_ALG_NEED_FALLBACK,
> + .cra_blocksize = SHA224_BLOCK_SIZE,
> + .cra_ctxsize = sizeof(struct img_hash_ctx),
> + .cra_init = img_hash_cra_init,
> + .cra_exit = img_hash_cra_exit,
> + .cra_module = THIS_MODULE,
> + }
> + }
> + },
> + {
> + .init = img_hash_init,
> + .update = img_hash_update,
> + .final = img_hash_final,
> + .finup = img_hash_finup,
> + .digest = img_hash_digest,
> + .halg = {
> + .digestsize = SHA256_DIGEST_SIZE,
> + .base = {
> + .cra_name = "sha256",
> + .cra_driver_name = "img-sha256",
> + .cra_priority = 301,
> + .cra_flags =
> + CRYPTO_ALG_ASYNC |
> + CRYPTO_ALG_NEED_FALLBACK,
> + .cra_blocksize = SHA256_BLOCK_SIZE,
> + .cra_ctxsize = sizeof(struct img_hash_ctx),
> + .cra_init = img_hash_cra_init,
> + .cra_exit = img_hash_cra_exit,
> + .cra_module = THIS_MODULE,
> + }
> + }
> + }
> +};
> +
> +static int img_register_algs(struct img_hash_dev *hdev)
> +{
> + int i, err;
> +
> + for (i = 0; i < ARRAY_SIZE(img_algs); i++) {
> + err = crypto_register_ahash(&img_algs[i]);
> + if (err)
> + goto err_reg;
> + }
> + return 0;
> +
> +err_reg:
> + for (; i--; )
> + crypto_unregister_ahash(&img_algs[i]);
> +
> + return err;
> +}
> +
> +static int img_unregister_algs(struct img_hash_dev *hdev)
> +{
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(img_algs); i++)
> + crypto_unregister_ahash(&img_algs[i]);
> + return 0;
> +}
> +
> +static void img_hash_done_task(unsigned long data)
> +{
> + struct img_hash_dev *hdev = (struct img_hash_dev *)data;
> + int err = 0;
> +
> + if (hdev->err == -EINVAL) {
> + err = hdev->err;
> + goto finish;
> + }
> +
> + if (!(DRIVER_FLAGS_BUSY & hdev->flags)) {
> + img_hash_handle_queue(hdev, NULL);
> + return;
> + }
> +
> + if (DRIVER_FLAGS_CPU & hdev->flags) {
> + if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
> + hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY;
> + goto finish;
> + }
> + } else if (DRIVER_FLAGS_DMA_READY & hdev->flags) {
> + if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) {
> + hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE;
> + img_hash_write_via_dma_stop(hdev);
> + if (hdev->err) {
> + err = hdev->err;
> + goto finish;
> + }
> + }
> + if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
> + hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
> + DRIVER_FLAGS_OUTPUT_READY);
> + goto finish;
> + }
> + }
> + return;
> +
> +finish:
> + img_hash_finish_req(hdev->req, err);
> +}
> +
> +static const struct of_device_id img_hash_match[] = {
> + { .compatible = "img,hash-accelerator" },
> + {}
> +};
> +MODULE_DEVICE_TABLE(of, img_hash_match)
> +
> +static int img_hash_probe(struct platform_device *pdev)
> +{
> + struct img_hash_dev *hdev;
> + struct device *dev = &pdev->dev;
> + struct resource *hash_res;
> + int err;
> +
> + hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
> + if (hdev == NULL) {
> + err = -ENOMEM;
> + goto sha_dev_err;
Why not just "return -ENOMEM" here? There should be no cleanup
necessary at this point?
> + }
> + spin_lock_init(&hdev->lock);
> +
> + hdev->dev = dev;
> +
> + platform_set_drvdata(pdev, hdev);
> +
> + INIT_LIST_HEAD(&hdev->list);
> +
> + tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned long)hdev);
> + tasklet_init(&hdev->dma_task, img_hash_dma_task, (unsigned long)hdev);
> +
> + crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
> +
> + /* Register bank */
> + hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +
> + hdev->io_base = devm_ioremap_resource(dev, hash_res);
> + if (IS_ERR(hdev->io_base)) {
> + dev_err(dev, "can't ioremap\n");
nit: When printing error messages, it's helpful to print the error code as well.
> + err = PTR_ERR(hdev->io_base);
> + goto hash_io_err;
> + }
> +
> + /* Write port (DMA or CPU) */
> + hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> + if (!hash_res) {
> + dev_err(dev, "no write port resource info\n");
> + err = -ENODEV;
> + goto res_err;
> + }
> + hdev->bus_addr = hash_res->start;
Maybe set this after devm_ioremap_resource() succeeds and avoid the
extra error check?
> + hdev->cpu_addr = devm_ioremap_resource(dev, hash_res);
> + if (IS_ERR(hdev->cpu_addr)) {
> + dev_err(dev, "can't ioremap write port\n");
> + err = PTR_ERR(hdev->cpu_addr);
> + goto res_err;
> + }
> +
> + hdev->irq = platform_get_irq(pdev, 0);
> + if (hdev->irq < 0) {
> + dev_err(dev, "no IRQ resource info\n");
> + err = hdev->irq;
> + goto res_err;
> + }
> +
> + err = devm_request_irq(dev, hdev->irq, img_irq_handler, 0,
> + dev_name(dev), hdev);
> + if (err) {
> + dev_err(dev, "unable to request %s irq\n", dev_name(dev));
dev_* already prints dev_name().
> + goto res_err;
> + }
> + dev_dbg(dev, "using IRQ channel %d\n", hdev->irq);
> +
> + hdev->iclk = devm_clk_get(&pdev->dev, "hash_clk");
> + if (IS_ERR(hdev->iclk)) {
> + dev_err(dev, "clock initialization failed.\n");
> + err = PTR_ERR(hdev->iclk);
> + goto res_err;
> + }
> +
> + hdev->fclk = devm_clk_get(&pdev->dev, "hash_reg_clk");
I don't see this clock getting enabled anywhere.
> + if (IS_ERR(hdev->fclk)) {
> + dev_err(dev, "clock initialization failed.\n");
> + err = PTR_ERR(hdev->fclk);
> + goto res_err;
> + }
> +
> + err = img_hash_dma_init(hdev);
> + if (err)
> + goto err_dma;
> +
> + dev_dbg(dev, "using %s for DMA transfers\n",
> + dma_chan_name(hdev->dma_lch));
> +
> + spin_lock(&img_hash.lock);
> + list_add_tail(&hdev->list, &img_hash.dev_list);
> + spin_unlock(&img_hash.lock);
> +
> + err = img_register_algs(hdev);
> + if (err)
> + goto err_algs;
> + dev_info(dev, "data bus: 0x%x;\tsize:0x%x\n", hdev->bus_addr, 0x4);
dev_dbg()? (or maybe not at all)
> + dev_info(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n");
> +
> + return 0;
> +
> +err_algs:
> + spin_lock(&img_hash.lock);
> + list_del(&hdev->list);
> + spin_unlock(&img_hash.lock);
> + dma_release_channel(hdev->dma_lch);
> +err_dma:
> +hash_io_err:
> + devm_clk_put(dev, hdev->iclk);
> + devm_clk_put(dev, hdev->fclk);
Since the clocks were acquired via the devm_* API, there's no need to
explicitly put() them.
> +res_err:
> + tasklet_kill(&hdev->done_task);
> + tasklet_kill(&hdev->dma_task);
> +sha_dev_err:
> + dev_err(dev, "initialization failed.\n");
The driver core will print an error message if a device probe fails
(for reasons other than -ENODEV, at least), so there's no need to
print here.
> + return err;
> +}
> +
> +static int img_hash_remove(struct platform_device *pdev)
> +{
> + static struct img_hash_dev *hdev;
> +
> + hdev = platform_get_drvdata(pdev);
> + spin_lock(&img_hash.lock);
> + list_del(&hdev->list);
> + spin_unlock(&img_hash.lock);
> +
> + img_unregister_algs(hdev);
> +
> + tasklet_kill(&hdev->done_task);
> + tasklet_kill(&hdev->dma_task);
> +
> + dma_release_channel(hdev->dma_lch);
> +
> + clk_disable_unprepare(hdev->iclk);
> + clk_disable_unprepare(hdev->fclk);
Unbalanced prepare_enable()/disable_unprepare().
> + return 0;
> +}
> +
> +static struct platform_driver img_hash_driver = {
> + .probe = img_hash_probe,
> + .remove = img_hash_remove,
> + .driver = {
> + .name = "img-hash-accelerator",
> + .of_match_table = of_match_ptr(img_hash_match),
> + }
> +};
> +module_platform_driver(img_hash_driver);
> +
> +MODULE_LICENSE("GPL v2");
> +MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator driver");
> +MODULE_AUTHOR("Will Thomas.");
> +MODULE_AUTHOR("James Hartley.");
It's helpful to include email addresses in the MODULE_AUTHOR tags.
Am Freitag, 6. M?rz 2015, 02:58:26 schrieb James Hartley:
Hi James,
>This adds support for the Imagination Technologies hash
>accelerator which provides hardware acceleration for
>SHA1 SHA224 SHA256 and MD5 hashes.
>
>Signed-off-by: James Hartley <[email protected]>
>---
> drivers/crypto/Kconfig | 14 +
> drivers/crypto/Makefile | 2 +
> drivers/crypto/img-hash.c | 1037
>+++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1053
>insertions(+)
> create mode 100644 drivers/crypto/img-hash.c
>
>diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
>index 2fb0fdf..c72223e 100644
>--- a/drivers/crypto/Kconfig
>+++ b/drivers/crypto/Kconfig
>@@ -436,4 +436,18 @@ config CRYPTO_DEV_QCE
> hardware. To compile this driver as a module, choose M here.
The
> module will be called qcrypto.
>
>+config CRYPTO_DEV_IMGTEC_HASH
>+ depends on MIPS || COMPILE_TEST
>+ tristate "Imagination Technologies hardware hash accelerator"
>+ select CRYPTO_ALG_API
>+ select CRYPTO_MD5
>+ select CRYPTO_SHA1
>+ select CRYPTO_SHA224
>+ select CRYPTO_SHA256
>+ select CRYPTO_HASH
>+ help
>+ This driver interfaces with the Imagination Technologies
>+ hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256
>+ hashing algorithms.
>+
> endif # CRYPTO_HW
>diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
>index 3924f93..fb84be7 100644
>--- a/drivers/crypto/Makefile
>+++ b/drivers/crypto/Makefile
>@@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
> obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
> obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
> obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
>+obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
> obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
> obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
> obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
>@@ -25,3 +26,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
> obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
> obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
> obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
>+obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
>diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
>new file mode 100644
>index 0000000..94a3a6f
>--- /dev/null
>+++ b/drivers/crypto/img-hash.c
>@@ -0,0 +1,1037 @@
>+/*
>+ * Copyright (c) 2014 Imagination Technologies
>+ * Authors: Will Thomas, James Hartley
>+ *
>+ * This program is free software; you can redistribute it and/or
>modify + * it under the terms of the GNU General Public License
>version 2 as published + * by the Free Software Foundation.
>+ *
>+ * Interface structure taken from omap-sham driver
>+ */
>+
>+#include <linux/clk.h>
>+#include <linux/dmaengine.h>
>+#include <linux/interrupt.h>
>+#include <linux/io.h>
>+#include <linux/kernel.h>
>+#include <linux/module.h>
>+#include <linux/of_device.h>
>+#include <linux/platform_device.h>
>+#include <linux/scatterlist.h>
>+
>+#include <crypto/internal/hash.h>
>+#include <crypto/md5.h>
>+#include <crypto/sha.h>
>+
>+#define CR_RESET 0
>+#define CR_RESET_SET 1
>+#define CR_RESET_UNSET 0
>+
>+#define CR_MESSAGE_LENGTH_H 0x4
>+#define CR_MESSAGE_LENGTH_L 0x8
>+
>+#define CR_CONTROL 0xc
>+#define CR_CONTROL_BYTE_ORDER_3210 0
>+#define CR_CONTROL_BYTE_ORDER_0123 1
>+#define CR_CONTROL_BYTE_ORDER_2310 2
>+#define CR_CONTROL_BYTE_ORDER_1032 3
>+#define CR_CONTROL_BYTE_ORDER_SHIFT 8
>+#define CR_CONTROL_ALGO_MD5 0
>+#define CR_CONTROL_ALGO_SHA1 1
>+#define CR_CONTROL_ALGO_SHA224 2
>+#define CR_CONTROL_ALGO_SHA256 3
>+
>+#define CR_INTSTAT 0x10
>+#define CR_INTENAB 0x14
>+#define CR_INTCLEAR 0x18
>+#define CR_INT_RESULTS_AVAILABLE BIT(0)
>+#define CR_INT_NEW_RESULTS_SET BIT(1)
>+#define CR_INT_RESULT_READ_ERR BIT(2)
>+#define CR_INT_MESSAGE_WRITE_ERROR BIT(3)
>+#define CR_INT_STATUS BIT(8)
>+
>+#define CR_RESULT_QUEUE 0x1c
>+#define CR_RSD0 0x40
>+#define CR_CORE_REV 0x50
>+#define CR_CORE_DES1 0x60
>+#define CR_CORE_DES2 0x70
>+
>+#define DRIVER_FLAGS_BUSY BIT(0)
>+#define DRIVER_FLAGS_FINAL BIT(1)
>+#define DRIVER_FLAGS_DMA_ACTIVE BIT(2)
>+#define DRIVER_FLAGS_OUTPUT_READY BIT(3)
>+#define DRIVER_FLAGS_INIT BIT(4)
>+#define DRIVER_FLAGS_CPU BIT(5)
>+#define DRIVER_FLAGS_DMA_READY BIT(6)
>+#define DRIVER_FLAGS_ERROR BIT(7)
>+#define DRIVER_FLAGS_SG BIT(8)
>+#define DRIVER_FLAGS_SHA1 BIT(18)
>+#define DRIVER_FLAGS_SHA224 BIT(19)
>+#define DRIVER_FLAGS_SHA256 BIT(20)
>+#define DRIVER_FLAGS_MD5 BIT(21)
>+
>+#define IMG_HASH_QUEUE_LENGTH 20
>+#define IMG_HASH_DMA_THRESHOLD 64
>+
>+#ifdef __LITTLE_ENDIAN
>+#define IMG_HASH_BYTE_ORDER (CR_CONTROL_BYTE_ORDER_3210)
>+#else
>+#define IMG_HASH_BYTE_ORDER (CR_CONTROL_BYTE_ORDER_0123)
>+#endif
>+
>+struct img_hash_dev;
>+
>+struct img_hash_request_ctx {
>+ struct img_hash_dev *hdev;
>+ u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
>+ unsigned long flags;
>+ size_t digsize;
>+
>+ dma_addr_t dma_addr;
>+ size_t dma_ct;
>+
>+ /* sg root */
>+ struct scatterlist *sgfirst;
>+ /* walk state */
>+ struct scatterlist *sg;
>+ size_t nents;
>+ size_t offset;
>+ unsigned int total;
>+ size_t sent;
>+
>+ unsigned long op;
>+
>+ size_t bufcnt;
>+ u8 buffer[0] __aligned(sizeof(u32));
>+ struct ahash_request fallback_req;
>+};
>+
>+struct img_hash_ctx {
>+ struct img_hash_dev *hdev;
>+ unsigned long flags;
>+ struct crypto_ahash *fallback;
>+};
>+
>+struct img_hash_dev {
>+ struct list_head list;
>+ struct device *dev;
>+ struct clk *iclk;
>+ struct clk *fclk;
>+ int irq;
>+ void __iomem *io_base;
>+
>+ phys_addr_t bus_addr;
>+ void __iomem *cpu_addr;
>+
>+ spinlock_t lock;
>+ int err;
>+ struct tasklet_struct done_task;
>+ struct tasklet_struct dma_task;
>+
>+ unsigned long flags;
>+ struct crypto_queue queue;
>+ struct ahash_request *req;
>+
>+ struct dma_chan *dma_lch;
>+};
>+
>+struct img_hash_drv {
>+ struct list_head dev_list;
>+ spinlock_t lock;
>+};
>+
>+static struct img_hash_drv img_hash = {
>+ .dev_list = LIST_HEAD_INIT(img_hash.dev_list),
>+ .lock = __SPIN_LOCK_UNLOCKED(img_hash.lock),
>+};
>+
>+static inline u32 img_hash_read(struct img_hash_dev *hdev, u32 offset)
>+{
>+ return readl_relaxed(hdev->io_base + offset);
>+}
>+
>+static inline void img_hash_write(struct img_hash_dev *hdev,
>+ u32 offset, u32 value)
>+{
>+ writel_relaxed(value, hdev->io_base + offset);
>+}
>+
>+static inline u32 img_hash_read_result_queue(struct img_hash_dev
>*hdev) +{
>+ return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE));
>+}
>+
>+static void img_hash_start(struct img_hash_dev *hdev, bool dma)
>+{
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+ u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT;
>+
>+ if (ctx->flags & DRIVER_FLAGS_MD5)
>+ cr |= CR_CONTROL_ALGO_MD5;
>+ else if (ctx->flags & DRIVER_FLAGS_SHA1)
>+ cr |= CR_CONTROL_ALGO_SHA1;
>+ else if (ctx->flags & DRIVER_FLAGS_SHA224)
>+ cr |= CR_CONTROL_ALGO_SHA224;
>+ else if (ctx->flags & DRIVER_FLAGS_SHA256)
>+ cr |= CR_CONTROL_ALGO_SHA256;
>+ dev_dbg(hdev->dev, "Starting hash process\n");
>+ img_hash_write(hdev, CR_CONTROL, cr);
>+
>+ /*
>+ * The hardware block requires two cycles between writing the
control
>+ * register and writing the first word of data in non DMA mode,
to
>+ * ensure the first data write is not grouped in burst with the
>control + * register write a read is issued to 'flush' the bus.
>+ */
>+ if (!dma)
>+ img_hash_read(hdev, CR_CONTROL);
>+}
>+
>+static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf,
>+ size_t length, int final)
>+{
>+ u32 count, len32;
>+ const u32 *buffer = (const u32 *)buf;
>+
>+ dev_dbg(hdev->dev, "xmit_cpu: length: %u bytes\n", length);
>+
>+ if (final)
>+ hdev->flags |= DRIVER_FLAGS_FINAL;
>+
>+ len32 = DIV_ROUND_UP(length, sizeof(u32));
>+
>+ for (count = 0; count < len32; count++)
>+ writel_relaxed(buffer[count], hdev->cpu_addr);
>+
>+ return -EINPROGRESS;
>+}
>+
>+static void img_hash_dma_callback(void *data)
>+{
>+ struct img_hash_dev *hdev = (struct img_hash_dev *)data;
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+
>+ if (ctx->bufcnt) {
>+ img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0);
>+ ctx->bufcnt = 0;
>+ }
>+ if (ctx->sg)
>+ tasklet_schedule(&hdev->dma_task);
>+}
>+
>+static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct
>scatterlist *sg) +{
>+ struct dma_async_tx_descriptor *desc;
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+
>+ ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
>+ if (ctx->dma_ct == 0) {
>+ dev_err(hdev->dev, "Invalid DMA sg\n");
>+ hdev->err = -EINVAL;
>+ return -EINVAL;
>+ }
>+
>+ desc = dmaengine_prep_slave_sg(hdev->dma_lch,
>+ sg,
>+ ctx->dma_ct,
>+ DMA_MEM_TO_DEV,
>+ DMA_PREP_INTERRUPT |
DMA_CTRL_ACK);
>+ if (!desc) {
>+ dev_err(hdev->dev, "Null DMA descriptor\n");
>+ hdev->err = -EINVAL;
>+ dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
>+ return -EINVAL;
>+ }
>+ desc->callback = img_hash_dma_callback;
>+ desc->callback_param = hdev;
>+ dmaengine_submit(desc);
>+ dma_async_issue_pending(hdev->dma_lch);
>+
>+ return 0;
>+}
>+
>+static int img_hash_write_via_cpu(struct img_hash_dev *hdev)
>+{
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+ int ret = 0;
>+
>+ ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx-
>sg),
>+ ctx->buffer, hdev->req->nbytes);
>+
>+ ctx->total = hdev->req->nbytes;
>+ ctx->bufcnt = 0;
>+
>+ hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL);
>+
>+ img_hash_start(hdev, false);
>+
>+ if (ctx->total)
>+ ret = img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total,
1);
>+ else
>+ ret = 0;
>+
>+ return ret;
>+}
>+
>+static int img_hash_finish(struct ahash_request *req)
>+{
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
>+
>+ if (!req->result)
>+ return -EINVAL;
>+
>+ memcpy(req->result, ctx->digest, ctx->digsize);
>+
>+ return 0;
>+}
>+
>+static void img_hash_copy_hash(struct ahash_request *req)
>+{
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
>+ u32 *hash = (u32 *)ctx->digest;
>+ int i;
>+
>+ for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--)
>+ hash[i] = img_hash_read_result_queue(ctx->hdev);
>+}
>+
>+static void img_hash_finish_req(struct ahash_request *req, int err)
>+{
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
>+ struct img_hash_dev *hdev = ctx->hdev;
>+
>+ if (!err) {
>+ img_hash_copy_hash(req);
>+ if (DRIVER_FLAGS_FINAL & hdev->flags)
>+ err = img_hash_finish(req);
>+ } else {
>+ dev_warn(hdev->dev, "Hash failed with error %d\n", err);
>+ ctx->flags |= DRIVER_FLAGS_ERROR;
>+ }
>+
>+ hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
DRIVER_FLAGS_OUTPUT_READY |
>+ DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY |
DRIVER_FLAGS_FINAL); +
>+ if (req->base.complete)
>+ req->base.complete(&req->base, err);
>+}
>+
>+static int img_hash_write_via_dma(struct img_hash_dev *hdev)
>+{
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+
>+ img_hash_start(hdev, true);
>+
>+ dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total);
>+
>+ if (!ctx->total)
>+ hdev->flags |= DRIVER_FLAGS_FINAL;
>+
>+ hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL;
>+
>+ tasklet_schedule(&hdev->dma_task);
>+
>+ return -EINPROGRESS;
>+}
>+
>+static int img_hash_dma_init(struct img_hash_dev *hdev)
>+{
>+ struct dma_slave_config dma_conf;
>+ int err = -EINVAL;
>+
>+ hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
>+ if (!hdev->dma_lch) {
>+ dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.
\n");
>+ return -EBUSY;
>+ }
>+ dma_conf.direction = DMA_MEM_TO_DEV;
>+ dma_conf.dst_addr = hdev->bus_addr;
>+ dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
>+ dma_conf.dst_maxburst = 16;
>+ dma_conf.device_fc = false;
>+
>+ err = dmaengine_slave_config(hdev->dma_lch, &dma_conf);
>+
>+ if (err) {
>+ dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
>+ dma_release_channel(hdev->dma_lch);
>+ return err;
>+ }
>+ return 0;
>+}
>+
>+static void img_hash_dma_task(unsigned long d)
>+{
>+ struct img_hash_dev *hdev = (struct img_hash_dev *)d;
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+ u8 *addr;
>+ size_t nbytes, bleft, wsend, len, tbc;
>+ struct scatterlist tsg;
>+
>+ if (!ctx->sg)
>+ return;
>+
>+ addr = sg_virt(ctx->sg);
>+ nbytes = ctx->sg->length - ctx->offset;
>+
>+ /* The hash accelerator does not support a data valid mask. This
>means + * that if each dma (i.e. per page) is not a multiple of
4
>bytes, the + * padding bytes in the last word written by that dma
>would erroneously + * be included in the hash. To avoid this we
round
>down the transfer, + * and add the excess to the start of the next
>dma. It does not matter + * that the final dma may not be a
multiple
>of 4 bytes as the hashing + * block is programmed to accept the
>correct number of bytes. + */
>+
>+ bleft = nbytes % 4;
>+ wsend = (nbytes / 4);
>+
>+ if (wsend) {
>+ sg_init_one(&tsg, addr + ctx->offset, wsend * 4);
>+ if (img_hash_xmit_dma(hdev, &tsg)) {
>+ dev_err(hdev->dev, "DMA failed, falling back to
CPU");
>+ ctx->flags |= DRIVER_FLAGS_CPU;
>+ hdev->err = 0;
>+ img_hash_xmit_cpu(hdev, addr + ctx->offset,
>+ wsend * 4, 0);
>+ ctx->sent += wsend * 4;
>+ wsend = 0;
>+ } else {
>+ ctx->sent += wsend * 4;
>+ }
>+ }
>+
>+ if (bleft) {
>+ ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx-
>nents,
>+ ctx->buffer, bleft,
ctx->sent);
>+ tbc = 0;
>+ ctx->sg = sg_next(ctx->sg);
>+ while (ctx->sg && (ctx->bufcnt < 4)) {
>+ len = ctx->sg->length;
>+ if (likely(len > (4 - ctx->bufcnt)))
>+ len = 4 - ctx->bufcnt;
>+ tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx-
>nents,
>+ ctx->buffer + ctx-
>bufcnt, len,
>+ ctx->sent + ctx->bufcnt);
>+ ctx->bufcnt += tbc;
>+ if (tbc >= ctx->sg->length) {
>+ ctx->sg = sg_next(ctx->sg);
>+ tbc = 0;
>+ }
>+ }
>+
>+ ctx->sent += ctx->bufcnt;
>+ ctx->offset = tbc;
>+
>+ if (!wsend)
>+ img_hash_dma_callback(hdev);
>+ } else {
>+ ctx->offset = 0;
>+ ctx->sg = sg_next(ctx->sg);
>+ }
>+}
>+
>+static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev)
>+{
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+
>+ if (ctx->flags & DRIVER_FLAGS_SG)
>+ dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct,
DMA_TO_DEVICE);
>+
>+ return 0;
>+}
>+
>+static int img_hash_process_data(struct img_hash_dev *hdev)
>+{
>+ struct ahash_request *req = hdev->req;
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
>+ int err = 0;
>+
>+ ctx->bufcnt = 0;
>+
>+ if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) {
>+ dev_dbg(hdev->dev, "process data request(%d bytes) using
DMA\n",
>+ req->nbytes);
>+ err = img_hash_write_via_dma(hdev);
>+ } else {
>+ dev_dbg(hdev->dev, "process data request(%d bytes) using
CPU\n",
>+ req->nbytes);
>+ err = img_hash_write_via_cpu(hdev);
>+ }
>+ return err;
>+}
>+
>+static int img_hash_hw_init(struct img_hash_dev *hdev)
>+{
>+ unsigned long long nbits;
>+ u32 u, l;
>+ int ret;
>+
>+ ret = clk_prepare_enable(hdev->iclk);
>+ if (ret)
>+ return ret;
>+
>+ img_hash_write(hdev, CR_RESET, CR_RESET_SET);
>+ img_hash_write(hdev, CR_RESET, CR_RESET_UNSET);
>+ img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET);
>+
>+ nbits = (hdev->req->nbytes << 3);
>+ u = nbits >> 32;
>+ l = nbits;
>+ img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u);
>+ img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l);
>+
>+ if (!(DRIVER_FLAGS_INIT & hdev->flags)) {
>+ hdev->flags |= DRIVER_FLAGS_INIT;
>+ hdev->err = 0;
>+ }
>+ dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits);
>+ return 0;
>+}
>+
>+static int img_hash_init(struct ahash_request *req)
>+{
>+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
>+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
>+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
>+
>+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
>+ rctx->fallback_req.base.flags = req->base.flags
>+ & CRYPTO_TFM_REQ_MAY_SLEEP;
>+
>+ return crypto_ahash_init(&rctx->fallback_req);
>+}
>+
>+static int img_hash_handle_queue(struct img_hash_dev *hdev,
>+ struct ahash_request *req)
>+{
>+ struct crypto_async_request *async_req, *backlog;
>+ struct img_hash_request_ctx *ctx;
>+ unsigned long flags;
>+ int err = 0, res = 0;
>+
>+ spin_lock_irqsave(&hdev->lock, flags);
>+
>+ if (req)
>+ res = ahash_enqueue_request(&hdev->queue, req);
>+
>+ if (DRIVER_FLAGS_BUSY & hdev->flags) {
>+ spin_unlock_irqrestore(&hdev->lock, flags);
>+ return res;
>+ }
>+
>+ backlog = crypto_get_backlog(&hdev->queue);
>+ async_req = crypto_dequeue_request(&hdev->queue);
>+ if (async_req)
>+ hdev->flags |= DRIVER_FLAGS_BUSY;
>+
>+ spin_unlock_irqrestore(&hdev->lock, flags);
>+
>+ if (!async_req)
>+ return res;
>+
>+ if (backlog)
>+ backlog->complete(backlog, -EINPROGRESS);
>+
>+ req = ahash_request_cast(async_req);
>+ hdev->req = req;
>+
>+ ctx = ahash_request_ctx(req);
>+
>+ dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n",
>+ ctx->op, req->nbytes);
>+
>+ err = img_hash_hw_init(hdev);
>+
>+ if (!err)
>+ err = img_hash_process_data(hdev);
>+
>+ if (err != -EINPROGRESS) {
>+ /* done_task will not finish so do it here */
>+ img_hash_finish_req(req, err);
>+ }
>+ return res;
>+}
>+
>+static int img_hash_update(struct ahash_request *req)
>+{
>+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
>+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
>+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
>+
>+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
>+ rctx->fallback_req.base.flags = req->base.flags
>+ & CRYPTO_TFM_REQ_MAY_SLEEP;
>+ rctx->fallback_req.nbytes = req->nbytes;
>+ rctx->fallback_req.src = req->src;
>+
>+ return crypto_ahash_update(&rctx->fallback_req);
>+}
>+
>+static int img_hash_final(struct ahash_request *req)
>+{
>+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
>+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
>+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
>+
>+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
>+ rctx->fallback_req.base.flags = req->base.flags
>+ & CRYPTO_TFM_REQ_MAY_SLEEP;
>+ rctx->fallback_req.result = req->result;
>+
>+ return crypto_ahash_final(&rctx->fallback_req);
>+}
>+
>+static int img_hash_finup(struct ahash_request *req)
>+{
>+ struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
>+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
>+ struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
>+
>+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
>+ rctx->fallback_req.base.flags = req->base.flags
>+ & CRYPTO_TFM_REQ_MAY_SLEEP;
>+ rctx->fallback_req.nbytes = req->nbytes;
>+ rctx->fallback_req.src = req->src;
>+ rctx->fallback_req.result = req->result;
>+
>+ return crypto_ahash_finup(&rctx->fallback_req);
>+}
>+
>+static int img_hash_digest(struct ahash_request *req)
>+{
>+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
>+ struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm);
>+ struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
>+ struct img_hash_dev *hdev = NULL;
>+ struct img_hash_dev *tmp;
>+ int err;
>+
>+ spin_lock_bh(&img_hash.lock);
>+ if (!tctx->hdev) {
>+ list_for_each_entry(tmp, &img_hash.dev_list, list) {
>+ hdev = tmp;
>+ break;
>+ }
>+ tctx->hdev = hdev;
>+
>+ } else {
>+ hdev = tctx->hdev;
>+ }
>+
>+ spin_unlock_bh(&img_hash.lock);
>+ ctx->hdev = hdev;
>+ ctx->flags = 0;
>+ ctx->digsize = crypto_ahash_digestsize(tfm);
>+
>+ switch (ctx->digsize) {
>+ case SHA1_DIGEST_SIZE:
>+ ctx->flags |= DRIVER_FLAGS_SHA1;
>+ break;
>+ case SHA256_DIGEST_SIZE:
>+ ctx->flags |= DRIVER_FLAGS_SHA256;
>+ break;
>+ case SHA224_DIGEST_SIZE:
>+ ctx->flags |= DRIVER_FLAGS_SHA224;
>+ break;
>+ case MD5_DIGEST_SIZE:
>+ ctx->flags |= DRIVER_FLAGS_MD5;
>+ break;
>+ default:
>+ return -EINVAL;
>+ }
>+
>+ ctx->bufcnt = 0;
>+ ctx->offset = 0;
>+ ctx->sent = 0;
>+ ctx->total = req->nbytes;
>+ ctx->sg = req->src;
>+ ctx->sgfirst = req->src;
>+ ctx->nents = sg_nents(ctx->sg);
>+
>+ err = img_hash_handle_queue(tctx->hdev, req);
>+
>+ return err;
>+}
>+
>+static int img_hash_cra_init(struct crypto_tfm *tfm)
>+{
>+ struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
>+ const char *alg_name = crypto_tfm_alg_name(tfm);
>+ int err = -ENOMEM;
>+
>+ ctx->fallback = crypto_alloc_ahash(alg_name, 0,
>+ CRYPTO_ALG_NEED_FALLBACK);
>+ if (IS_ERR(ctx->fallback)) {
>+ pr_err("img_hash: Could not load fallback driver.\n");
>+ err = PTR_ERR(ctx->fallback);
>+ goto err;
>+ }
>+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
>+ sizeof(struct img_hash_request_ctx) +
>+ IMG_HASH_DMA_THRESHOLD);
>+
>+ return 0;
>+
>+err:
>+ return err;
>+}
>+
>+static void img_hash_cra_exit(struct crypto_tfm *tfm)
>+{
>+ struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
>+
>+ crypto_free_ahash(tctx->fallback);
>+}
>+
>+static irqreturn_t img_irq_handler(int irq, void *dev_id)
>+{
>+ struct img_hash_dev *hdev = dev_id;
>+ u32 reg;
>+
>+ reg = img_hash_read(hdev, CR_INTSTAT);
>+ img_hash_write(hdev, CR_INTCLEAR, reg);
>+
>+ if (reg & CR_INT_NEW_RESULTS_SET) {
>+ dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
>+ if (DRIVER_FLAGS_BUSY & hdev->flags) {
>+ hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
>+ if (!(DRIVER_FLAGS_CPU & hdev->flags))
>+ hdev->flags |= DRIVER_FLAGS_DMA_READY;
>+ tasklet_schedule(&hdev->done_task);
>+ } else {
>+ dev_warn(hdev->dev,
>+ "HASH interrupt when no active
requests.\n");
>+ }
>+ } else if (reg & CR_INT_RESULTS_AVAILABLE) {
>+ dev_warn(hdev->dev,
>+ "IRQ triggered before the hash had
completed\n");
>+ } else if (reg & CR_INT_RESULT_READ_ERR) {
>+ dev_warn(hdev->dev,
>+ "Attempt to read from an empty result
queue\n");
>+ } else if (reg & CR_INT_MESSAGE_WRITE_ERROR) {
>+ dev_warn(hdev->dev,
>+ "Data written before the hardware was
configured\n");
>+ }
>+ return IRQ_HANDLED;
>+}
>+
>+static struct ahash_alg img_algs[] = {
>+ {
>+ .init = img_hash_init,
>+ .update = img_hash_update,
>+ .final = img_hash_final,
>+ .finup = img_hash_finup,
>+ .digest = img_hash_digest,
>+ .halg = {
>+ .digestsize = MD5_DIGEST_SIZE,
>+ .base = {
>+ .cra_name = "md5",
>+ .cra_driver_name = "img-md5",
>+ .cra_priority = 301,
Just curious: why do you use such odd priorities of 301 or 3000? IMHO,
all you need is a priority of more than 100 to "beat" the generic C
prios. Maybe you also need to beat the standard assembler
implementations which are routinely at 200 for hashes. So, a prio of 300
should suffice, should it not?
>+ .cra_flags =
>+ CRYPTO_ALG_ASYNC |
>+ CRYPTO_ALG_NEED_FALLBACK,
>+ .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
>+ .cra_ctxsize = sizeof(struct
img_hash_ctx),
>+ .cra_init = img_hash_cra_init,
>+ .cra_exit = img_hash_cra_exit,
>+ .cra_module = THIS_MODULE,
>+ }
>+ }
>+ },
>+ {
>+ .init = img_hash_init,
>+ .update = img_hash_update,
>+ .final = img_hash_final,
>+ .finup = img_hash_finup,
>+ .digest = img_hash_digest,
>+ .halg = {
>+ .digestsize = SHA1_DIGEST_SIZE,
>+ .base = {
>+ .cra_name = "sha1",
>+ .cra_driver_name = "img-sha1",
>+ .cra_priority = 3000,
>+ .cra_flags =
>+ CRYPTO_ALG_ASYNC |
>+ CRYPTO_ALG_NEED_FALLBACK,
>+ .cra_blocksize = SHA1_BLOCK_SIZE,
>+ .cra_ctxsize = sizeof(struct
img_hash_ctx),
>+ .cra_init = img_hash_cra_init,
>+ .cra_exit = img_hash_cra_exit,
>+ .cra_module = THIS_MODULE,
>+ }
>+ }
>+ },
>+ {
>+ .init = img_hash_init,
>+ .update = img_hash_update,
>+ .final = img_hash_final,
>+ .finup = img_hash_finup,
>+ .digest = img_hash_digest,
>+ .halg = {
>+ .digestsize = SHA224_DIGEST_SIZE,
>+ .base = {
>+ .cra_name = "sha224",
>+ .cra_driver_name = "img-sha224",
>+ .cra_priority = 3000,
>+ .cra_flags =
>+ CRYPTO_ALG_ASYNC |
>+ CRYPTO_ALG_NEED_FALLBACK,
>+ .cra_blocksize = SHA224_BLOCK_SIZE,
>+ .cra_ctxsize = sizeof(struct
img_hash_ctx),
>+ .cra_init = img_hash_cra_init,
>+ .cra_exit = img_hash_cra_exit,
>+ .cra_module = THIS_MODULE,
>+ }
>+ }
>+ },
>+ {
>+ .init = img_hash_init,
>+ .update = img_hash_update,
>+ .final = img_hash_final,
>+ .finup = img_hash_finup,
>+ .digest = img_hash_digest,
>+ .halg = {
>+ .digestsize = SHA256_DIGEST_SIZE,
>+ .base = {
>+ .cra_name = "sha256",
>+ .cra_driver_name = "img-sha256",
>+ .cra_priority = 301,
>+ .cra_flags =
>+ CRYPTO_ALG_ASYNC |
>+ CRYPTO_ALG_NEED_FALLBACK,
>+ .cra_blocksize = SHA256_BLOCK_SIZE,
>+ .cra_ctxsize = sizeof(struct
img_hash_ctx),
>+ .cra_init = img_hash_cra_init,
>+ .cra_exit = img_hash_cra_exit,
>+ .cra_module = THIS_MODULE,
>+ }
>+ }
>+ }
>+};
>+
>+static int img_register_algs(struct img_hash_dev *hdev)
>+{
>+ int i, err;
>+
>+ for (i = 0; i < ARRAY_SIZE(img_algs); i++) {
>+ err = crypto_register_ahash(&img_algs[i]);
>+ if (err)
>+ goto err_reg;
>+ }
>+ return 0;
>+
>+err_reg:
>+ for (; i--; )
>+ crypto_unregister_ahash(&img_algs[i]);
>+
>+ return err;
>+}
>+
>+static int img_unregister_algs(struct img_hash_dev *hdev)
>+{
>+ int i;
>+
>+ for (i = 0; i < ARRAY_SIZE(img_algs); i++)
>+ crypto_unregister_ahash(&img_algs[i]);
>+ return 0;
>+}
>+
>+static void img_hash_done_task(unsigned long data)
>+{
>+ struct img_hash_dev *hdev = (struct img_hash_dev *)data;
>+ int err = 0;
>+
>+ if (hdev->err == -EINVAL) {
>+ err = hdev->err;
>+ goto finish;
>+ }
>+
>+ if (!(DRIVER_FLAGS_BUSY & hdev->flags)) {
>+ img_hash_handle_queue(hdev, NULL);
>+ return;
>+ }
>+
>+ if (DRIVER_FLAGS_CPU & hdev->flags) {
>+ if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
>+ hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY;
>+ goto finish;
>+ }
>+ } else if (DRIVER_FLAGS_DMA_READY & hdev->flags) {
>+ if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) {
>+ hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE;
>+ img_hash_write_via_dma_stop(hdev);
>+ if (hdev->err) {
>+ err = hdev->err;
>+ goto finish;
>+ }
>+ }
>+ if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
>+ hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
>+ DRIVER_FLAGS_OUTPUT_READY);
>+ goto finish;
>+ }
>+ }
>+ return;
>+
>+finish:
>+ img_hash_finish_req(hdev->req, err);
>+}
>+
>+static const struct of_device_id img_hash_match[] = {
>+ { .compatible = "img,hash-accelerator" },
>+ {}
>+};
>+MODULE_DEVICE_TABLE(of, img_hash_match)
>+
>+static int img_hash_probe(struct platform_device *pdev)
>+{
>+ struct img_hash_dev *hdev;
>+ struct device *dev = &pdev->dev;
>+ struct resource *hash_res;
>+ int err;
>+
>+ hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
>+ if (hdev == NULL) {
>+ err = -ENOMEM;
>+ goto sha_dev_err;
>+ }
>+ spin_lock_init(&hdev->lock);
>+
>+ hdev->dev = dev;
>+
>+ platform_set_drvdata(pdev, hdev);
>+
>+ INIT_LIST_HEAD(&hdev->list);
>+
>+ tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned
>long)hdev); + tasklet_init(&hdev->dma_task, img_hash_dma_task,
>(unsigned long)hdev); +
>+ crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
>+
>+ /* Register bank */
>+ hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>+
>+ hdev->io_base = devm_ioremap_resource(dev, hash_res);
>+ if (IS_ERR(hdev->io_base)) {
>+ dev_err(dev, "can't ioremap\n");
>+ err = PTR_ERR(hdev->io_base);
>+ goto hash_io_err;
>+ }
>+
>+ /* Write port (DMA or CPU) */
>+ hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
>+ if (!hash_res) {
>+ dev_err(dev, "no write port resource info\n");
>+ err = -ENODEV;
>+ goto res_err;
>+ }
>+ hdev->bus_addr = hash_res->start;
>+ hdev->cpu_addr = devm_ioremap_resource(dev, hash_res);
>+ if (IS_ERR(hdev->cpu_addr)) {
>+ dev_err(dev, "can't ioremap write port\n");
>+ err = PTR_ERR(hdev->cpu_addr);
>+ goto res_err;
>+ }
>+
>+ hdev->irq = platform_get_irq(pdev, 0);
>+ if (hdev->irq < 0) {
>+ dev_err(dev, "no IRQ resource info\n");
>+ err = hdev->irq;
>+ goto res_err;
>+ }
>+
>+ err = devm_request_irq(dev, hdev->irq, img_irq_handler, 0,
>+ dev_name(dev), hdev);
>+ if (err) {
>+ dev_err(dev, "unable to request %s irq\n",
dev_name(dev));
>+ goto res_err;
>+ }
>+ dev_dbg(dev, "using IRQ channel %d\n", hdev->irq);
>+
>+ hdev->iclk = devm_clk_get(&pdev->dev, "hash_clk");
>+ if (IS_ERR(hdev->iclk)) {
>+ dev_err(dev, "clock initialization failed.\n");
>+ err = PTR_ERR(hdev->iclk);
>+ goto res_err;
>+ }
>+
>+ hdev->fclk = devm_clk_get(&pdev->dev, "hash_reg_clk");
>+ if (IS_ERR(hdev->fclk)) {
>+ dev_err(dev, "clock initialization failed.\n");
>+ err = PTR_ERR(hdev->fclk);
>+ goto res_err;
>+ }
>+
>+ err = img_hash_dma_init(hdev);
>+ if (err)
>+ goto err_dma;
>+
>+ dev_dbg(dev, "using %s for DMA transfers\n",
>+ dma_chan_name(hdev->dma_lch));
>+
>+ spin_lock(&img_hash.lock);
>+ list_add_tail(&hdev->list, &img_hash.dev_list);
>+ spin_unlock(&img_hash.lock);
>+
>+ err = img_register_algs(hdev);
>+ if (err)
>+ goto err_algs;
>+ dev_info(dev, "data bus: 0x%x;\tsize:0x%x\n", hdev->bus_addr,
0x4);
>+ dev_info(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator
>initialized\n"); +
>+ return 0;
>+
>+err_algs:
>+ spin_lock(&img_hash.lock);
>+ list_del(&hdev->list);
>+ spin_unlock(&img_hash.lock);
>+ dma_release_channel(hdev->dma_lch);
>+err_dma:
>+hash_io_err:
>+ devm_clk_put(dev, hdev->iclk);
>+ devm_clk_put(dev, hdev->fclk);
>+res_err:
>+ tasklet_kill(&hdev->done_task);
>+ tasklet_kill(&hdev->dma_task);
>+sha_dev_err:
>+ dev_err(dev, "initialization failed.\n");
>+ return err;
>+}
>+
>+static int img_hash_remove(struct platform_device *pdev)
>+{
>+ static struct img_hash_dev *hdev;
>+
>+ hdev = platform_get_drvdata(pdev);
>+ spin_lock(&img_hash.lock);
>+ list_del(&hdev->list);
>+ spin_unlock(&img_hash.lock);
>+
>+ img_unregister_algs(hdev);
>+
>+ tasklet_kill(&hdev->done_task);
>+ tasklet_kill(&hdev->dma_task);
>+
>+ dma_release_channel(hdev->dma_lch);
>+
>+ clk_disable_unprepare(hdev->iclk);
>+ clk_disable_unprepare(hdev->fclk);
>+ return 0;
>+}
>+
>+static struct platform_driver img_hash_driver = {
>+ .probe = img_hash_probe,
>+ .remove = img_hash_remove,
>+ .driver = {
>+ .name = "img-hash-accelerator",
>+ .of_match_table = of_match_ptr(img_hash_match),
>+ }
>+};
>+module_platform_driver(img_hash_driver);
>+
>+MODULE_LICENSE("GPL v2");
>+MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator driver");
>+MODULE_AUTHOR("Will Thomas.");
>+MODULE_AUTHOR("James Hartley.");
Ciao
Stephan
On Mon, Mar 09, 2015 at 07:35:57AM +0100, Stephan Mueller wrote:
>
> >+static struct ahash_alg img_algs[] = {
> >+ {
> >+ .init = img_hash_init,
> >+ .update = img_hash_update,
> >+ .final = img_hash_final,
> >+ .finup = img_hash_finup,
> >+ .digest = img_hash_digest,
> >+ .halg = {
> >+ .digestsize = MD5_DIGEST_SIZE,
> >+ .base = {
> >+ .cra_name = "md5",
> >+ .cra_driver_name = "img-md5",
> >+ .cra_priority = 301,
>
> Just curious: why do you use such odd priorities of 301 or 3000? IMHO,
> all you need is a priority of more than 100 to "beat" the generic C
> prios. Maybe you also need to beat the standard assembler
> implementations which are routinely at 200 for hashes. So, a prio of 300
> should suffice, should it not?
James, can you answer Stephan's question please?
Thanks,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
> -----Original Message-----
> From: Herbert Xu [mailto:[email protected]]
> Sent: 10 March 2015 09:37
> To: Stephan Mueller
> Cc: James Hartley; [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]; Ezequiel Garcia; linux-
> [email protected]
> Subject: Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> accelerator
>
> On Mon, Mar 09, 2015 at 07:35:57AM +0100, Stephan Mueller wrote:
> >
> > >+static struct ahash_alg img_algs[] = {
> > >+ {
> > >+ .init = img_hash_init,
> > >+ .update = img_hash_update,
> > >+ .final = img_hash_final,
> > >+ .finup = img_hash_finup,
> > >+ .digest = img_hash_digest,
> > >+ .halg = {
> > >+ .digestsize = MD5_DIGEST_SIZE,
> > >+ .base = {
> > >+ .cra_name = "md5",
> > >+ .cra_driver_name = "img-md5",
> > >+ .cra_priority = 301,
> >
> > Just curious: why do you use such odd priorities of 301 or 3000? IMHO,
> > all you need is a priority of more than 100 to "beat" the generic C
> > prios. Maybe you also need to beat the standard assembler
> > implementations which are routinely at 200 for hashes. So, a prio of
> > 300 should suffice, should it not?
>
> James, can you answer Stephan's question please?
Hi Herbert, and Stephan,
The difficulty here is that the driver was written by a summer placement student who has since left the company, and despite searching our internal commit logs I'm unable to find any reason why 301 and 3000 are used. I am happy to set them to 300 if that is a sensible figure to use.
Thanks for the review Stephan!
>
> Thanks,
> --
> Email: Herbert Xu <[email protected]> Home Page:
> http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
James.
Resend with correct email address for Andrew Bresticker.
> -----Original Message-----
> From: James Hartley
> Sent: 10 March 2015 09:53
> To: 'Herbert Xu'; Stephan Mueller
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; Ezequiel Garcia;
> [email protected]
> Subject: RE: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> accelerator
>
>
> > -----Original Message-----
> > From: Herbert Xu [mailto:[email protected]]
> > Sent: 10 March 2015 09:37
> > To: Stephan Mueller
> > Cc: James Hartley; [email protected]; [email protected];
> > [email protected]; [email protected];
> > [email protected]; Ezequiel Garcia; linux-
> > [email protected]
> > Subject: Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> > accelerator
> >
> > On Mon, Mar 09, 2015 at 07:35:57AM +0100, Stephan Mueller wrote:
> > >
> > > >+static struct ahash_alg img_algs[] = {
> > > >+ {
> > > >+ .init = img_hash_init,
> > > >+ .update = img_hash_update,
> > > >+ .final = img_hash_final,
> > > >+ .finup = img_hash_finup,
> > > >+ .digest = img_hash_digest,
> > > >+ .halg = {
> > > >+ .digestsize = MD5_DIGEST_SIZE,
> > > >+ .base = {
> > > >+ .cra_name = "md5",
> > > >+ .cra_driver_name = "img-md5",
> > > >+ .cra_priority = 301,
> > >
> > > Just curious: why do you use such odd priorities of 301 or 3000? IMHO,
> > > all you need is a priority of more than 100 to "beat" the generic C
> > > prios. Maybe you also need to beat the standard assembler
> > > implementations which are routinely at 200 for hashes. So, a prio of
> > > 300 should suffice, should it not?
> >
> > James, can you answer Stephan's question please?
>
> Hi Herbert, and Stephan,
>
> The difficulty here is that the driver was written by a summer placement
> student who has since left the company, and despite searching our internal
> commit logs I'm unable to find any reason why 301 and 3000 are used. I am
> happy to set them to 300 if that is a sensible figure to use.
>
> Thanks for the review Stephan!
>
> >
> > Thanks,
> > --
> > Email: Herbert Xu <[email protected]> Home Page:
> > http://gondor.apana.org.au/~herbert/
> > PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
>
> James.
On Tue, Mar 10, 2015 at 09:54:49AM +0000, James Hartley wrote:
>
> > Hi Herbert, and Stephan,
> >
> > The difficulty here is that the driver was written by a summer placement
> > student who has since left the company, and despite searching our internal
> > commit logs I'm unable to find any reason why 301 and 3000 are used. I am
> > happy to set them to 300 if that is a sensible figure to use.
OK please resend the patches with that change James.
Thanks,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
> -----Original Message-----
> From: [email protected] [mailto:linux-crypto-
> [email protected]] On Behalf Of Herbert Xu
> Sent: 10 March 2015 09:56
> To: James Hartley
> Cc: Stephan Mueller; [email protected]; [email protected];
> [email protected]; [email protected]; Andrew Bresticker
> ([email protected]); Ezequiel Garcia; [email protected]
> Subject: Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> accelerator
>
> On Tue, Mar 10, 2015 at 09:54:49AM +0000, James Hartley wrote:
> >
> > > Hi Herbert, and Stephan,
> > >
> > > The difficulty here is that the driver was written by a summer
> > > placement student who has since left the company, and despite
> > > searching our internal commit logs I'm unable to find any reason why
> > > 301 and 3000 are used. I am happy to set them to 300 if that is a
> sensible figure to use.
>
> OK please resend the patches with that change James.
Will do, I want to address Andrew Bresticker's review comments first though.
Thanks,
James
>
> Thanks,
> --
> Email: Herbert Xu <[email protected]> Home Page:
> http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
> --
> To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the
> body of a message to [email protected] More majordomo info at
> http://vger.kernel.org/majordomo-info.html
Hi Andrew
> -----Original Message-----
> From: [email protected] [mailto:[email protected]] On Behalf Of
> Andrew Bresticker
> Sent: 09 March 2015 05:42
> To: James Hartley
> Cc: [email protected]
> Subject: Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> accelerator
>
> Hi James,
>
> On Thu, Mar 5, 2015 at 7:01 PM, James Hartley <[email protected]>
> wrote:
> > This adds support for the Imagination Technologies hash accelerator
> > which provides hardware acceleration for
> > SHA1 SHA224 SHA256 and MD5 hashes.
> >
> > Signed-off-by: James Hartley <[email protected]>
>
> Some general comments below, I'll leave review of the crypto-specific stuff to
> Herbert.
>
> > diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index
> > 3924f93..fb84be7 100644
> > --- a/drivers/crypto/Makefile
> > +++ b/drivers/crypto/Makefile
> > @@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
> > obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
> > obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
> > obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
> > +obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
> > obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
> > obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
> > obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o @@ -25,3 +26,4 @@
> > obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
> > obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
> > obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
> > obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
> > +obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
>
> Unrelated change - perhaps a bad merge conflict resolution?
Yep, now resolved, thanks.
>
> > diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c new
> > file mode 100644 index 0000000..94a3a6f
> > --- /dev/null
> > +++ b/drivers/crypto/img-hash.c
> > @@ -0,0 +1,1037 @@
> > +/*
> > + * Copyright (c) 2014 Imagination Technologies
> > + * Authors: Will Thomas, James Hartley
> > + *
> > + * This program is free software; you can redistribute it and/or
> > +modify
> > + * it under the terms of the GNU General Public License version 2 as
> > +published
> > + * by the Free Software Foundation.
> > + *
> > + * Interface structure taken from omap-sham driver
> > + */
> > +
> > +#include <linux/clk.h>
> > +#include <linux/dmaengine.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/io.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/of_device.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/scatterlist.h>
> > +
> > +#include <crypto/internal/hash.h>
> > +#include <crypto/md5.h>
> > +#include <crypto/sha.h>
> > +
> > +#define CR_RESET 0
> > +#define CR_RESET_SET 1
> > +#define CR_RESET_UNSET 0
> > +
> > +#define CR_MESSAGE_LENGTH_H 0x4
> > +#define CR_MESSAGE_LENGTH_L 0x8
> > +
> > +#define CR_CONTROL 0xc
> > +#define CR_CONTROL_BYTE_ORDER_3210 0
> > +#define CR_CONTROL_BYTE_ORDER_0123 1
> > +#define CR_CONTROL_BYTE_ORDER_2310 2
> > +#define CR_CONTROL_BYTE_ORDER_1032 3
> > +#define CR_CONTROL_BYTE_ORDER_SHIFT 8
> > +#define CR_CONTROL_ALGO_MD5 0
> > +#define CR_CONTROL_ALGO_SHA1 1
> > +#define CR_CONTROL_ALGO_SHA224 2
> > +#define CR_CONTROL_ALGO_SHA256 3
> > +
> > +#define CR_INTSTAT 0x10
> > +#define CR_INTENAB 0x14
> > +#define CR_INTCLEAR 0x18
> > +#define CR_INT_RESULTS_AVAILABLE BIT(0)
> > +#define CR_INT_NEW_RESULTS_SET BIT(1)
> > +#define CR_INT_RESULT_READ_ERR BIT(2)
> > +#define CR_INT_MESSAGE_WRITE_ERROR BIT(3)
> > +#define CR_INT_STATUS BIT(8)
> > +
> > +#define CR_RESULT_QUEUE 0x1c
> > +#define CR_RSD0 0x40
> > +#define CR_CORE_REV 0x50
> > +#define CR_CORE_DES1 0x60
> > +#define CR_CORE_DES2 0x70
> > +
> > +#define DRIVER_FLAGS_BUSY BIT(0)
> > +#define DRIVER_FLAGS_FINAL BIT(1)
> > +#define DRIVER_FLAGS_DMA_ACTIVE BIT(2)
> > +#define DRIVER_FLAGS_OUTPUT_READY BIT(3)
> > +#define DRIVER_FLAGS_INIT BIT(4)
> > +#define DRIVER_FLAGS_CPU BIT(5)
> > +#define DRIVER_FLAGS_DMA_READY BIT(6)
> > +#define DRIVER_FLAGS_ERROR BIT(7)
> > +#define DRIVER_FLAGS_SG BIT(8)
> > +#define DRIVER_FLAGS_SHA1 BIT(18)
> > +#define DRIVER_FLAGS_SHA224 BIT(19)
> > +#define DRIVER_FLAGS_SHA256 BIT(20)
> > +#define DRIVER_FLAGS_MD5 BIT(21)
> > +
> > +#define IMG_HASH_QUEUE_LENGTH 20
> > +#define IMG_HASH_DMA_THRESHOLD 64
> > +
> > +#ifdef __LITTLE_ENDIAN
> > +#define IMG_HASH_BYTE_ORDER (CR_CONTROL_BYTE_ORDER_3210)
> > +#else
> > +#define IMG_HASH_BYTE_ORDER (CR_CONTROL_BYTE_ORDER_0123)
> > +#endif
>
> Unnecessary parenthesis.
Fixed
>
> > +struct img_hash_dev;
> > +
> > +struct img_hash_request_ctx {
> > + struct img_hash_dev *hdev;
> > + u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
> > + unsigned long flags;
> > + size_t digsize;
> > +
> > + dma_addr_t dma_addr;
> > + size_t dma_ct;
> > +
> > + /* sg root */
> > + struct scatterlist *sgfirst;
> > + /* walk state */
> > + struct scatterlist *sg;
> > + size_t nents;
> > + size_t offset;
> > + unsigned int total;
> > + size_t sent;
> > +
> > + unsigned long op;
> > +
> > + size_t bufcnt;
> > + u8 buffer[0] __aligned(sizeof(u32));
> > + struct ahash_request fallback_req;
> > +};
> > +
> > +struct img_hash_ctx {
> > + struct img_hash_dev *hdev;
> > + unsigned long flags;
> > + struct crypto_ahash *fallback;
> > +};
> > +
> > +struct img_hash_dev {
> > + struct list_head list;
> > + struct device *dev;
> > + struct clk *iclk;
> > + struct clk *fclk;
>
> Maybe make these names a little more obvious so it's clear which clock is
> which.
Agreed - done
>
> > + int irq;
>
> This isn't used outside of probe(), so it need not be carried around in this
> struct.
Done
>
> > + void __iomem *io_base;
> > +
> > + phys_addr_t bus_addr;
> > + void __iomem *cpu_addr;
> > +
> > + spinlock_t lock;
> > + int err;
> > + struct tasklet_struct done_task;
> > + struct tasklet_struct dma_task;
> > +
> > + unsigned long flags;
> > + struct crypto_queue queue;
> > + struct ahash_request *req;
> > +
> > + struct dma_chan *dma_lch;
> > +};
> > +
> > +struct img_hash_drv {
> > + struct list_head dev_list;
> > + spinlock_t lock;
> > +};
> > +
> > +static struct img_hash_drv img_hash = {
> > + .dev_list = LIST_HEAD_INIT(img_hash.dev_list),
> > + .lock = __SPIN_LOCK_UNLOCKED(img_hash.lock),
> > +};
> > +
> > +static inline u32 img_hash_read(struct img_hash_dev *hdev, u32
> > +offset) {
> > + return readl_relaxed(hdev->io_base + offset); }
> > +
> > +static inline void img_hash_write(struct img_hash_dev *hdev,
> > + u32 offset, u32 value) {
> > + writel_relaxed(value, hdev->io_base + offset); }
> > +
> > +static inline u32 img_hash_read_result_queue(struct img_hash_dev
> > +*hdev) {
> > + return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE)); }
> > +
> > +static void img_hash_start(struct img_hash_dev *hdev, bool dma) {
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> > + u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT;
> > +
> > + if (ctx->flags & DRIVER_FLAGS_MD5)
> > + cr |= CR_CONTROL_ALGO_MD5;
> > + else if (ctx->flags & DRIVER_FLAGS_SHA1)
> > + cr |= CR_CONTROL_ALGO_SHA1;
> > + else if (ctx->flags & DRIVER_FLAGS_SHA224)
> > + cr |= CR_CONTROL_ALGO_SHA224;
> > + else if (ctx->flags & DRIVER_FLAGS_SHA256)
> > + cr |= CR_CONTROL_ALGO_SHA256;
> > + dev_dbg(hdev->dev, "Starting hash process\n");
> > + img_hash_write(hdev, CR_CONTROL, cr);
> > +
> > + /*
> > + * The hardware block requires two cycles between writing the control
> > + * register and writing the first word of data in non DMA mode, to
> > + * ensure the first data write is not grouped in burst with the control
> > + * register write a read is issued to 'flush' the bus.
> > + */
> > + if (!dma)
> > + img_hash_read(hdev, CR_CONTROL); }
> > +
> > +static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf,
> > + size_t length, int final) {
> > + u32 count, len32;
> > + const u32 *buffer = (const u32 *)buf;
> > +
> > + dev_dbg(hdev->dev, "xmit_cpu: length: %u bytes\n", length);
> > +
> > + if (final)
> > + hdev->flags |= DRIVER_FLAGS_FINAL;
> > +
> > + len32 = DIV_ROUND_UP(length, sizeof(u32));
> > +
> > + for (count = 0; count < len32; count++)
> > + writel_relaxed(buffer[count], hdev->cpu_addr);
> > +
> > + return -EINPROGRESS;
> > +}
> > +
> > +static void img_hash_dma_callback(void *data) {
> > + struct img_hash_dev *hdev = (struct img_hash_dev *)data;
> > + struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > + if (ctx->bufcnt) {
> > + img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0);
> > + ctx->bufcnt = 0;
> > + }
> > + if (ctx->sg)
> > + tasklet_schedule(&hdev->dma_task);
> > +}
> > +
> > +static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct
> > +scatterlist *sg) {
> > + struct dma_async_tx_descriptor *desc;
> > + struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > + ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
> > + if (ctx->dma_ct == 0) {
> > + dev_err(hdev->dev, "Invalid DMA sg\n");
> > + hdev->err = -EINVAL;
> > + return -EINVAL;
> > + }
> > +
> > + desc = dmaengine_prep_slave_sg(hdev->dma_lch,
> > + sg,
>
> Line this up (and the rest of this statement) with the open paren of
> dmaengine_prep_slave_sg().
Done
>
> > + ctx->dma_ct,
> > + DMA_MEM_TO_DEV,
> > + DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> > + if (!desc) {
> > + dev_err(hdev->dev, "Null DMA descriptor\n");
> > + hdev->err = -EINVAL;
> > + dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
> > + return -EINVAL;
> > + }
> > + desc->callback = img_hash_dma_callback;
> > + desc->callback_param = hdev;
> > + dmaengine_submit(desc);
> > + dma_async_issue_pending(hdev->dma_lch);
> > +
> > + return 0;
> > +}
> > +
> > +static int img_hash_write_via_cpu(struct img_hash_dev *hdev) {
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> > + int ret = 0;
> > +
> > + ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx->sg),
> > + ctx->buffer,
> > + hdev->req->nbytes);
> > +
> > + ctx->total = hdev->req->nbytes;
> > + ctx->bufcnt = 0;
> > +
> > + hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL);
> > +
> > + img_hash_start(hdev, false);
> > +
> > + if (ctx->total)
> > + ret = img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 1);
> > + else
> > + ret = 0;
>
> The else block here (and possibly the entire check) seems unnecessary.
> img_hash_xmit_cpu() won't do anything if the length is 0.
Yes you're right, I've removed the check, and the temporary variable.
>
> > +
> > + return ret;
> > +}
> > +
> > +static int img_hash_finish(struct ahash_request *req) {
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > +
> > + if (!req->result)
> > + return -EINVAL;
> > +
> > + memcpy(req->result, ctx->digest, ctx->digsize);
> > +
> > + return 0;
> > +}
> > +
> > +static void img_hash_copy_hash(struct ahash_request *req) {
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > + u32 *hash = (u32 *)ctx->digest;
> > + int i;
> > +
> > + for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--)
> > + hash[i] = img_hash_read_result_queue(ctx->hdev);
> > +}
> > +
> > +static void img_hash_finish_req(struct ahash_request *req, int err) {
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > + struct img_hash_dev *hdev = ctx->hdev;
> > +
> > + if (!err) {
> > + img_hash_copy_hash(req);
> > + if (DRIVER_FLAGS_FINAL & hdev->flags)
> > + err = img_hash_finish(req);
> > + } else {
> > + dev_warn(hdev->dev, "Hash failed with error %d\n", err);
> > + ctx->flags |= DRIVER_FLAGS_ERROR;
> > + }
> > +
> > + hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
> DRIVER_FLAGS_OUTPUT_READY |
> > + DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY |
> > + DRIVER_FLAGS_FINAL);
> > +
> > + if (req->base.complete)
> > + req->base.complete(&req->base, err); }
> > +
> > +static int img_hash_write_via_dma(struct img_hash_dev *hdev) {
> > + struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > + img_hash_start(hdev, true);
> > +
> > + dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total);
> > +
> > + if (!ctx->total)
> > + hdev->flags |= DRIVER_FLAGS_FINAL;
> > +
> > + hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL;
> > +
> > + tasklet_schedule(&hdev->dma_task);
> > +
> > + return -EINPROGRESS;
> > +}
> > +
> > +static int img_hash_dma_init(struct img_hash_dev *hdev) {
> > + struct dma_slave_config dma_conf;
> > + int err = -EINVAL;
> > +
> > + hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
> > + if (!hdev->dma_lch) {
> > + dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.\n");
> > + return -EBUSY;
> > + }
> > + dma_conf.direction = DMA_MEM_TO_DEV;
> > + dma_conf.dst_addr = hdev->bus_addr;
> > + dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > + dma_conf.dst_maxburst = 16;
> > + dma_conf.device_fc = false;
> > +
> > + err = dmaengine_slave_config(hdev->dma_lch, &dma_conf);
> > +
> > + if (err) {
> > + dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
> > + dma_release_channel(hdev->dma_lch);
> > + return err;
> > + }
> > + return 0;
>
> nit: I would expect there to be a newline before the return rather than before
> the if().
Done
>
> > +}
> > +
> > +static void img_hash_dma_task(unsigned long d) {
> > + struct img_hash_dev *hdev = (struct img_hash_dev *)d;
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> > + u8 *addr;
> > + size_t nbytes, bleft, wsend, len, tbc;
> > + struct scatterlist tsg;
> > +
> > + if (!ctx->sg)
> > + return;
> > +
> > + addr = sg_virt(ctx->sg);
> > + nbytes = ctx->sg->length - ctx->offset;
> > +
> > + /* The hash accelerator does not support a data valid mask. This
> means
> > + * that if each dma (i.e. per page) is not a multiple of 4 bytes, the
> > + * padding bytes in the last word written by that dma would
> erroneously
> > + * be included in the hash. To avoid this we round down the transfer,
> > + * and add the excess to the start of the next dma. It does not matter
> > + * that the final dma may not be a multiple of 4 bytes as the hashing
> > + * block is programmed to accept the correct number of bytes.
> > + */
>
> nit: Multi-line comment style is:
>
> /*
> * blah blah blah
> */
Fixed
>
> > +
> > + bleft = nbytes % 4;
> > + wsend = (nbytes / 4);
> > +
> > + if (wsend) {
> > + sg_init_one(&tsg, addr + ctx->offset, wsend * 4);
> > + if (img_hash_xmit_dma(hdev, &tsg)) {
> > + dev_err(hdev->dev, "DMA failed, falling back to CPU");
> > + ctx->flags |= DRIVER_FLAGS_CPU;
> > + hdev->err = 0;
> > + img_hash_xmit_cpu(hdev, addr + ctx->offset,
> > + wsend * 4, 0);
> > + ctx->sent += wsend * 4;
> > + wsend = 0;
> > + } else {
> > + ctx->sent += wsend * 4;
> > + }
> > + }
> > +
> > + if (bleft) {
> > + ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
> > + ctx->buffer, bleft, ctx->sent);
> > + tbc = 0;
> > + ctx->sg = sg_next(ctx->sg);
> > + while (ctx->sg && (ctx->bufcnt < 4)) {
> > + len = ctx->sg->length;
> > + if (likely(len > (4 - ctx->bufcnt)))
> > + len = 4 - ctx->bufcnt;
> > + tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
> > + ctx->buffer + ctx->bufcnt, len,
> > + ctx->sent + ctx->bufcnt);
> > + ctx->bufcnt += tbc;
> > + if (tbc >= ctx->sg->length) {
> > + ctx->sg = sg_next(ctx->sg);
> > + tbc = 0;
> > + }
> > + }
> > +
> > + ctx->sent += ctx->bufcnt;
> > + ctx->offset = tbc;
> > +
> > + if (!wsend)
> > + img_hash_dma_callback(hdev);
> > + } else {
> > + ctx->offset = 0;
> > + ctx->sg = sg_next(ctx->sg);
> > + }
> > +}
> > +
> > +static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev) {
> > + struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > + if (ctx->flags & DRIVER_FLAGS_SG)
> > + dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct,
> > + DMA_TO_DEVICE);
> > +
> > + return 0;
> > +}
> > +
> > +static int img_hash_process_data(struct img_hash_dev *hdev) {
> > + struct ahash_request *req = hdev->req;
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > + int err = 0;
> > +
> > + ctx->bufcnt = 0;
> > +
> > + if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) {
> > + dev_dbg(hdev->dev, "process data request(%d bytes) using
> DMA\n",
> > + req->nbytes);
> > + err = img_hash_write_via_dma(hdev);
> > + } else {
> > + dev_dbg(hdev->dev, "process data request(%d bytes) using
> CPU\n",
> > + req->nbytes);
> > + err = img_hash_write_via_cpu(hdev);
> > + }
> > + return err;
> > +}
> > +
> > +static int img_hash_hw_init(struct img_hash_dev *hdev) {
> > + unsigned long long nbits;
> > + u32 u, l;
> > + int ret;
> > +
> > + ret = clk_prepare_enable(hdev->iclk);
>
> It looks like there's no corresponding clk_disable_unprepare() to this, so the
> prepare/enable counts will just keep increasing. Perhaps it would be better
> to manage enabling/disabling the clocks with runtime PM?
Yes it probably would be - I'll look at doing that.
>
> > + if (ret)
> > + return ret;
> > +
> > + img_hash_write(hdev, CR_RESET, CR_RESET_SET);
> > + img_hash_write(hdev, CR_RESET, CR_RESET_UNSET);
> > + img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET);
> > +
> > + nbits = (hdev->req->nbytes << 3);
> > + u = nbits >> 32;
> > + l = nbits;
> > + img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u);
> > + img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l);
> > +
> > + if (!(DRIVER_FLAGS_INIT & hdev->flags)) {
> > + hdev->flags |= DRIVER_FLAGS_INIT;
> > + hdev->err = 0;
> > + }
> > + dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits);
> > + return 0;
> > +}
> > +
> > +static int img_hash_init(struct ahash_request *req) {
> > + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > + rctx->fallback_req.base.flags = req->base.flags
> > + & CRYPTO_TFM_REQ_MAY_SLEEP;
> > +
> > + return crypto_ahash_init(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_handle_queue(struct img_hash_dev *hdev,
> > + struct ahash_request *req) {
> > + struct crypto_async_request *async_req, *backlog;
> > + struct img_hash_request_ctx *ctx;
> > + unsigned long flags;
> > + int err = 0, res = 0;
> > +
> > + spin_lock_irqsave(&hdev->lock, flags);
> > +
> > + if (req)
> > + res = ahash_enqueue_request(&hdev->queue, req);
> > +
> > + if (DRIVER_FLAGS_BUSY & hdev->flags) {
> > + spin_unlock_irqrestore(&hdev->lock, flags);
> > + return res;
> > + }
> > +
> > + backlog = crypto_get_backlog(&hdev->queue);
> > + async_req = crypto_dequeue_request(&hdev->queue);
> > + if (async_req)
> > + hdev->flags |= DRIVER_FLAGS_BUSY;
> > +
> > + spin_unlock_irqrestore(&hdev->lock, flags);
> > +
> > + if (!async_req)
> > + return res;
> > +
> > + if (backlog)
> > + backlog->complete(backlog, -EINPROGRESS);
> > +
> > + req = ahash_request_cast(async_req);
> > + hdev->req = req;
> > +
> > + ctx = ahash_request_ctx(req);
> > +
> > + dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n",
> > + ctx->op, req->nbytes);
> > +
> > + err = img_hash_hw_init(hdev);
> > +
> > + if (!err)
> > + err = img_hash_process_data(hdev);
> > +
> > + if (err != -EINPROGRESS) {
> > + /* done_task will not finish so do it here */
> > + img_hash_finish_req(req, err);
> > + }
> > + return res;
> > +}
> > +
> > +static int img_hash_update(struct ahash_request *req) {
> > + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > + rctx->fallback_req.base.flags = req->base.flags
> > + & CRYPTO_TFM_REQ_MAY_SLEEP;
> > + rctx->fallback_req.nbytes = req->nbytes;
> > + rctx->fallback_req.src = req->src;
> > +
> > + return crypto_ahash_update(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_final(struct ahash_request *req) {
> > + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > + rctx->fallback_req.base.flags = req->base.flags
> > + & CRYPTO_TFM_REQ_MAY_SLEEP;
> > + rctx->fallback_req.result = req->result;
> > +
> > + return crypto_ahash_final(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_finup(struct ahash_request *req) {
> > + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > + rctx->fallback_req.base.flags = req->base.flags
> > + & CRYPTO_TFM_REQ_MAY_SLEEP;
> > + rctx->fallback_req.nbytes = req->nbytes;
> > + rctx->fallback_req.src = req->src;
> > + rctx->fallback_req.result = req->result;
> > +
> > + return crypto_ahash_finup(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_digest(struct ahash_request *req) {
> > + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > + struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm);
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > + struct img_hash_dev *hdev = NULL;
> > + struct img_hash_dev *tmp;
> > + int err;
> > +
> > + spin_lock_bh(&img_hash.lock);
>
> Why spin_{lock,unlock}_bh() here?
Changed to spin_{lock, unlock}(), I don't see any reason why it should be _bh.
>
> > + if (!tctx->hdev) {
> > + list_for_each_entry(tmp, &img_hash.dev_list, list) {
> > + hdev = tmp;
> > + break;
> > + }
> > + tctx->hdev = hdev;
> > +
> > + } else {
> > + hdev = tctx->hdev;
> > + }
> > +
> > + spin_unlock_bh(&img_hash.lock);
> > + ctx->hdev = hdev;
> > + ctx->flags = 0;
> > + ctx->digsize = crypto_ahash_digestsize(tfm);
> > +
> > + switch (ctx->digsize) {
> > + case SHA1_DIGEST_SIZE:
> > + ctx->flags |= DRIVER_FLAGS_SHA1;
> > + break;
> > + case SHA256_DIGEST_SIZE:
> > + ctx->flags |= DRIVER_FLAGS_SHA256;
> > + break;
> > + case SHA224_DIGEST_SIZE:
> > + ctx->flags |= DRIVER_FLAGS_SHA224;
> > + break;
> > + case MD5_DIGEST_SIZE:
> > + ctx->flags |= DRIVER_FLAGS_MD5;
> > + break;
> > + default:
> > + return -EINVAL;
> > + }
> > +
> > + ctx->bufcnt = 0;
> > + ctx->offset = 0;
> > + ctx->sent = 0;
> > + ctx->total = req->nbytes;
> > + ctx->sg = req->src;
> > + ctx->sgfirst = req->src;
> > + ctx->nents = sg_nents(ctx->sg);
> > +
> > + err = img_hash_handle_queue(tctx->hdev, req);
> > +
> > + return err;
> > +}
> > +
> > +static int img_hash_cra_init(struct crypto_tfm *tfm) {
> > + struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
> > + const char *alg_name = crypto_tfm_alg_name(tfm);
> > + int err = -ENOMEM;
> > +
> > + ctx->fallback = crypto_alloc_ahash(alg_name, 0,
> > + CRYPTO_ALG_NEED_FALLBACK);
> > + if (IS_ERR(ctx->fallback)) {
> > + pr_err("img_hash: Could not load fallback driver.\n");
> > + err = PTR_ERR(ctx->fallback);
> > + goto err;
> > + }
> > + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> > + sizeof(struct img_hash_request_ctx) +
> > + IMG_HASH_DMA_THRESHOLD);
> > +
> > + return 0;
> > +
> > +err:
> > + return err;
> > +}
> > +
> > +static void img_hash_cra_exit(struct crypto_tfm *tfm) {
> > + struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
> > +
> > + crypto_free_ahash(tctx->fallback);
> > +}
> > +
> > +static irqreturn_t img_irq_handler(int irq, void *dev_id) {
> > + struct img_hash_dev *hdev = dev_id;
> > + u32 reg;
> > +
> > + reg = img_hash_read(hdev, CR_INTSTAT);
> > + img_hash_write(hdev, CR_INTCLEAR, reg);
> > +
> > + if (reg & CR_INT_NEW_RESULTS_SET) {
> > + dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
> > + if (DRIVER_FLAGS_BUSY & hdev->flags) {
> > + hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
> > + if (!(DRIVER_FLAGS_CPU & hdev->flags))
> > + hdev->flags |= DRIVER_FLAGS_DMA_READY;
> > + tasklet_schedule(&hdev->done_task);
>
> Since this done_task only gets scheduled from here, why not make this a
> threaded IRQ handler and just do the work here instead of separating it
> between a hard IRQ handler and a tasklet?
What is the advantage of doing that? i.e is this simple case worth setting up an extra thread?
>
> > + } else {
> > + dev_warn(hdev->dev,
> > + "HASH interrupt when no active requests.\n");
> > + }
> > + } else if (reg & CR_INT_RESULTS_AVAILABLE) {
> > + dev_warn(hdev->dev,
> > + "IRQ triggered before the hash had completed\n");
> > + } else if (reg & CR_INT_RESULT_READ_ERR) {
> > + dev_warn(hdev->dev,
> > + "Attempt to read from an empty result queue\n");
> > + } else if (reg & CR_INT_MESSAGE_WRITE_ERROR) {
> > + dev_warn(hdev->dev,
> > + "Data written before the hardware was configured\n");
> > + }
> > + return IRQ_HANDLED;
> > +}
> > +
> > +static struct ahash_alg img_algs[] = {
> > + {
> > + .init = img_hash_init,
> > + .update = img_hash_update,
> > + .final = img_hash_final,
> > + .finup = img_hash_finup,
> > + .digest = img_hash_digest,
> > + .halg = {
> > + .digestsize = MD5_DIGEST_SIZE,
> > + .base = {
> > + .cra_name = "md5",
> > + .cra_driver_name = "img-md5",
> > + .cra_priority = 301,
> > + .cra_flags =
> > + CRYPTO_ALG_ASYNC |
> > + CRYPTO_ALG_NEED_FALLBACK,
> > + .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
> > + .cra_ctxsize = sizeof(struct img_hash_ctx),
> > + .cra_init = img_hash_cra_init,
> > + .cra_exit = img_hash_cra_exit,
> > + .cra_module = THIS_MODULE,
> > + }
> > + }
> > + },
> > + {
> > + .init = img_hash_init,
> > + .update = img_hash_update,
> > + .final = img_hash_final,
> > + .finup = img_hash_finup,
> > + .digest = img_hash_digest,
> > + .halg = {
> > + .digestsize = SHA1_DIGEST_SIZE,
> > + .base = {
> > + .cra_name = "sha1",
> > + .cra_driver_name = "img-sha1",
> > + .cra_priority = 3000,
> > + .cra_flags =
> > + CRYPTO_ALG_ASYNC |
> > + CRYPTO_ALG_NEED_FALLBACK,
> > + .cra_blocksize = SHA1_BLOCK_SIZE,
> > + .cra_ctxsize = sizeof(struct img_hash_ctx),
> > + .cra_init = img_hash_cra_init,
> > + .cra_exit = img_hash_cra_exit,
> > + .cra_module = THIS_MODULE,
> > + }
> > + }
> > + },
> > + {
> > + .init = img_hash_init,
> > + .update = img_hash_update,
> > + .final = img_hash_final,
> > + .finup = img_hash_finup,
> > + .digest = img_hash_digest,
> > + .halg = {
> > + .digestsize = SHA224_DIGEST_SIZE,
> > + .base = {
> > + .cra_name = "sha224",
> > + .cra_driver_name = "img-sha224",
> > + .cra_priority = 3000,
> > + .cra_flags =
> > + CRYPTO_ALG_ASYNC |
> > + CRYPTO_ALG_NEED_FALLBACK,
> > + .cra_blocksize = SHA224_BLOCK_SIZE,
> > + .cra_ctxsize = sizeof(struct img_hash_ctx),
> > + .cra_init = img_hash_cra_init,
> > + .cra_exit = img_hash_cra_exit,
> > + .cra_module = THIS_MODULE,
> > + }
> > + }
> > + },
> > + {
> > + .init = img_hash_init,
> > + .update = img_hash_update,
> > + .final = img_hash_final,
> > + .finup = img_hash_finup,
> > + .digest = img_hash_digest,
> > + .halg = {
> > + .digestsize = SHA256_DIGEST_SIZE,
> > + .base = {
> > + .cra_name = "sha256",
> > + .cra_driver_name = "img-sha256",
> > + .cra_priority = 301,
> > + .cra_flags =
> > + CRYPTO_ALG_ASYNC |
> > + CRYPTO_ALG_NEED_FALLBACK,
> > + .cra_blocksize = SHA256_BLOCK_SIZE,
> > + .cra_ctxsize = sizeof(struct img_hash_ctx),
> > + .cra_init = img_hash_cra_init,
> > + .cra_exit = img_hash_cra_exit,
> > + .cra_module = THIS_MODULE,
> > + }
> > + }
> > + }
> > +};
> > +
> > +static int img_register_algs(struct img_hash_dev *hdev) {
> > + int i, err;
> > +
> > + for (i = 0; i < ARRAY_SIZE(img_algs); i++) {
> > + err = crypto_register_ahash(&img_algs[i]);
> > + if (err)
> > + goto err_reg;
> > + }
> > + return 0;
> > +
> > +err_reg:
> > + for (; i--; )
> > + crypto_unregister_ahash(&img_algs[i]);
> > +
> > + return err;
> > +}
> > +
> > +static int img_unregister_algs(struct img_hash_dev *hdev) {
> > + int i;
> > +
> > + for (i = 0; i < ARRAY_SIZE(img_algs); i++)
> > + crypto_unregister_ahash(&img_algs[i]);
> > + return 0;
> > +}
> > +
> > +static void img_hash_done_task(unsigned long data) {
> > + struct img_hash_dev *hdev = (struct img_hash_dev *)data;
> > + int err = 0;
> > +
> > + if (hdev->err == -EINVAL) {
> > + err = hdev->err;
> > + goto finish;
> > + }
> > +
> > + if (!(DRIVER_FLAGS_BUSY & hdev->flags)) {
> > + img_hash_handle_queue(hdev, NULL);
> > + return;
> > + }
> > +
> > + if (DRIVER_FLAGS_CPU & hdev->flags) {
> > + if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
> > + hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY;
> > + goto finish;
> > + }
> > + } else if (DRIVER_FLAGS_DMA_READY & hdev->flags) {
> > + if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) {
> > + hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE;
> > + img_hash_write_via_dma_stop(hdev);
> > + if (hdev->err) {
> > + err = hdev->err;
> > + goto finish;
> > + }
> > + }
> > + if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
> > + hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
> > + DRIVER_FLAGS_OUTPUT_READY);
> > + goto finish;
> > + }
> > + }
> > + return;
> > +
> > +finish:
> > + img_hash_finish_req(hdev->req, err); }
> > +
> > +static const struct of_device_id img_hash_match[] = {
> > + { .compatible = "img,hash-accelerator" },
> > + {}
> > +};
> > +MODULE_DEVICE_TABLE(of, img_hash_match)
> > +
> > +static int img_hash_probe(struct platform_device *pdev) {
> > + struct img_hash_dev *hdev;
> > + struct device *dev = &pdev->dev;
> > + struct resource *hash_res;
> > + int err;
> > +
> > + hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
> > + if (hdev == NULL) {
> > + err = -ENOMEM;
> > + goto sha_dev_err;
>
> Why not just "return -ENOMEM" here? There should be no cleanup
> necessary at this point?
Done
>
> > + }
> > + spin_lock_init(&hdev->lock);
> > +
> > + hdev->dev = dev;
> > +
> > + platform_set_drvdata(pdev, hdev);
> > +
> > + INIT_LIST_HEAD(&hdev->list);
> > +
> > + tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned
> long)hdev);
> > + tasklet_init(&hdev->dma_task, img_hash_dma_task, (unsigned
> > + long)hdev);
> > +
> > + crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
> > +
> > + /* Register bank */
> > + hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +
> > + hdev->io_base = devm_ioremap_resource(dev, hash_res);
> > + if (IS_ERR(hdev->io_base)) {
> > + dev_err(dev, "can't ioremap\n");
>
> nit: When printing error messages, it's helpful to print the error code as well.
Done
>
> > + err = PTR_ERR(hdev->io_base);
> > + goto hash_io_err;
> > + }
> > +
> > + /* Write port (DMA or CPU) */
> > + hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> > + if (!hash_res) {
> > + dev_err(dev, "no write port resource info\n");
> > + err = -ENODEV;
> > + goto res_err;
> > + }
> > + hdev->bus_addr = hash_res->start;
>
> Maybe set this after devm_ioremap_resource() succeeds and avoid the extra
> error check?
Not quite following you here - which check are you saying can be removed?
>
> > + hdev->cpu_addr = devm_ioremap_resource(dev, hash_res);
> > + if (IS_ERR(hdev->cpu_addr)) {
> > + dev_err(dev, "can't ioremap write port\n");
> > + err = PTR_ERR(hdev->cpu_addr);
> > + goto res_err;
> > + }
> > +
> > + hdev->irq = platform_get_irq(pdev, 0);
> > + if (hdev->irq < 0) {
> > + dev_err(dev, "no IRQ resource info\n");
> > + err = hdev->irq;
> > + goto res_err;
> > + }
> > +
> > + err = devm_request_irq(dev, hdev->irq, img_irq_handler, 0,
> > + dev_name(dev), hdev);
> > + if (err) {
> > + dev_err(dev, "unable to request %s irq\n",
> > + dev_name(dev));
>
> dev_* already prints dev_name().
Yes, removed it
>
> > + goto res_err;
> > + }
> > + dev_dbg(dev, "using IRQ channel %d\n", hdev->irq);
> > +
> > + hdev->iclk = devm_clk_get(&pdev->dev, "hash_clk");
> > + if (IS_ERR(hdev->iclk)) {
> > + dev_err(dev, "clock initialization failed.\n");
> > + err = PTR_ERR(hdev->iclk);
> > + goto res_err;
> > + }
> > +
> > + hdev->fclk = devm_clk_get(&pdev->dev, "hash_reg_clk");
>
> I don't see this clock getting enabled anywhere.
Good catch - now added to img_hash_hw_init.
>
> > + if (IS_ERR(hdev->fclk)) {
> > + dev_err(dev, "clock initialization failed.\n");
> > + err = PTR_ERR(hdev->fclk);
> > + goto res_err;
> > + }
> > +
> > + err = img_hash_dma_init(hdev);
> > + if (err)
> > + goto err_dma;
> > +
> > + dev_dbg(dev, "using %s for DMA transfers\n",
> > + dma_chan_name(hdev->dma_lch));
> > +
> > + spin_lock(&img_hash.lock);
> > + list_add_tail(&hdev->list, &img_hash.dev_list);
> > + spin_unlock(&img_hash.lock);
> > +
> > + err = img_register_algs(hdev);
> > + if (err)
> > + goto err_algs;
> > + dev_info(dev, "data bus: 0x%x;\tsize:0x%x\n", hdev->bus_addr,
> > + 0x4);
>
> dev_dbg()? (or maybe not at all)
Removed it
>
> > + dev_info(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator
> > + initialized\n");
> > +
> > + return 0;
> > +
> > +err_algs:
> > + spin_lock(&img_hash.lock);
> > + list_del(&hdev->list);
> > + spin_unlock(&img_hash.lock);
> > + dma_release_channel(hdev->dma_lch);
> > +err_dma:
> > +hash_io_err:
> > + devm_clk_put(dev, hdev->iclk);
> > + devm_clk_put(dev, hdev->fclk);
>
> Since the clocks were acquired via the devm_* API, there's no need to
> explicitly put() them.
Done
>
> > +res_err:
> > + tasklet_kill(&hdev->done_task);
> > + tasklet_kill(&hdev->dma_task);
> > +sha_dev_err:
> > + dev_err(dev, "initialization failed.\n");
>
> The driver core will print an error message if a device probe fails (for
> reasons other than -ENODEV, at least), so there's no need to print here.
Removed
>
> > + return err;
> > +}
> > +
> > +static int img_hash_remove(struct platform_device *pdev) {
> > + static struct img_hash_dev *hdev;
> > +
> > + hdev = platform_get_drvdata(pdev);
> > + spin_lock(&img_hash.lock);
> > + list_del(&hdev->list);
> > + spin_unlock(&img_hash.lock);
> > +
> > + img_unregister_algs(hdev);
> > +
> > + tasklet_kill(&hdev->done_task);
> > + tasklet_kill(&hdev->dma_task);
> > +
> > + dma_release_channel(hdev->dma_lch);
> > +
> > + clk_disable_unprepare(hdev->iclk);
> > + clk_disable_unprepare(hdev->fclk);
>
> Unbalanced prepare_enable()/disable_unprepare().
Now enabled in img_hash_hw_init.
>
> > + return 0;
> > +}
> > +
> > +static struct platform_driver img_hash_driver = {
> > + .probe = img_hash_probe,
> > + .remove = img_hash_remove,
> > + .driver = {
> > + .name = "img-hash-accelerator",
> > + .of_match_table = of_match_ptr(img_hash_match),
> > + }
> > +};
> > +module_platform_driver(img_hash_driver);
> > +
> > +MODULE_LICENSE("GPL v2");
> > +MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator
> > +driver"); MODULE_AUTHOR("Will Thomas."); MODULE_AUTHOR("James
> > +Hartley.");
>
> It's helpful to include email addresses in the MODULE_AUTHOR tags.
I can do that for my email address, but not for Will's
Thanks for the review Andrew!
James.
Hi Andrew
> -----Original Message-----
> From: [email protected] [mailto:[email protected]] On Behalf Of
> Andrew Bresticker
> Sent: 06 March 2015 21:50
> To: James Hartley
> Cc: [email protected]
> Subject: Re: [PATCH V3 2/2] Documentation: crypto: Add DT binding info for
> the img hw hash accelerator
>
> Hi James,
>
> On Thu, Mar 5, 2015 at 7:01 PM, James Hartley <[email protected]>
> wrote:
> > This adds the binding documentation for the Imagination Technologies
> > hash accelerator that provides hardware acceleration for
> > SHA1/SHA224/SHA256/MD5 hashes. This hardware will be present in the
> > upcoming pistachio SoC.
> >
> > Signed-off-by: James Hartley <[email protected]>
>
> > diff --git a/Documentation/devicetree/bindings/crypto/img-hash.txt
> > b/Documentation/devicetree/bindings/crypto/img-hash.txt
> > new file mode 100644
> > index 0000000..7adc519
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/crypto/img-hash.txt
> > @@ -0,0 +1,27 @@
> > +Imagination Technologies hardware hash accelerator
> > +
> > +The hash accelerator provides hardware hashing acceleration for SHA1,
> > +SHA224, SHA256 and MD5 hashes
> > +
> > +Required properties:
> > +
> > +- compatible : "img,hash-accelerator"
> > +- reg : Offset and length of the register set for the module, and the
> > +DMA port
> > +- interrupts : The designated IRQ line for the hashing module.
> > +- dmas : DMA specifier as per
> > +Documentation/devicetree/bindings/dma/dma.txt
> > +- dma-names : Should be "tx"
> > +- clocks : Clock specifiers
> > +- clock-names : "hash_clk" Used to clock data through the accelerator
> > + "hash_reg_clk" Used to clock the hash block registers
>
> For the other IMG drivers that have been submitted, we've been using "sys"
> as the name for the register gate clock. Maybe we should do that here too to
> be consistent?
Yes, good idea
>
> > +Example:
> > +
> > + hash: hash@18149600 {
> > + compatible = "img,hash-accelerator";
> > + reg = <0x18149600 0x100, 0x18101100 0x4>;
> > + interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
> > + dmas = <&dma 8 0xffffffff>;
> > + dma-names = "tx";
> > + clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph
> PERIPH_CLK_ROM>;
> > + clock-names = "hash_clk, hash_reg_clk";
>
> I think these are flipped (and you're missing some quotation marks).
Fixed.
>
> Otherwise this looks good to me.
>
> -Andrew
Thanks for the review,
James
Hi James,
>> > +static irqreturn_t img_irq_handler(int irq, void *dev_id) {
>> > + struct img_hash_dev *hdev = dev_id;
>> > + u32 reg;
>> > +
>> > + reg = img_hash_read(hdev, CR_INTSTAT);
>> > + img_hash_write(hdev, CR_INTCLEAR, reg);
>> > +
>> > + if (reg & CR_INT_NEW_RESULTS_SET) {
>> > + dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
>> > + if (DRIVER_FLAGS_BUSY & hdev->flags) {
>> > + hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
>> > + if (!(DRIVER_FLAGS_CPU & hdev->flags))
>> > + hdev->flags |= DRIVER_FLAGS_DMA_READY;
>> > + tasklet_schedule(&hdev->done_task);
>>
>> Since this done_task only gets scheduled from here, why not make this a
>> threaded IRQ handler and just do the work here instead of separating it
>> between a hard IRQ handler and a tasklet?
>
> What is the advantage of doing that? i.e is this simple case worth setting up an extra thread?
I believe threaded IRQ handlers are now preferred to using a hard IRQ
handler + tasklet when possible, partly because people tend to screw
up tasklet usage.
>> > + err = PTR_ERR(hdev->io_base);
>> > + goto hash_io_err;
>> > + }
>> > +
>> > + /* Write port (DMA or CPU) */
>> > + hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
>> > + if (!hash_res) {
>> > + dev_err(dev, "no write port resource info\n");
>> > + err = -ENODEV;
>> > + goto res_err;
>> > + }
>> > + hdev->bus_addr = hash_res->start;
>>
>> Maybe set this after devm_ioremap_resource() succeeds and avoid the extra
>> error check?
>
> Not quite following you here - which check are you saying can be removed?
You can remove the if (hash_res) check if you set hdev->bus_addr after
devm_ioremap_resource().
Thanks,
Andrew
Hi Andrew,
> -----Original Message-----
> From: [email protected] [mailto:[email protected]] On Behalf Of
> Andrew Bresticker
> Sent: 10 March 2015 18:02
> To: James Hartley
> Cc: [email protected]
> Subject: Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> accelerator
>
> Hi James,
>
> >> > +static irqreturn_t img_irq_handler(int irq, void *dev_id) {
> >> > + struct img_hash_dev *hdev = dev_id;
> >> > + u32 reg;
> >> > +
> >> > + reg = img_hash_read(hdev, CR_INTSTAT);
> >> > + img_hash_write(hdev, CR_INTCLEAR, reg);
> >> > +
> >> > + if (reg & CR_INT_NEW_RESULTS_SET) {
> >> > + dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
> >> > + if (DRIVER_FLAGS_BUSY & hdev->flags) {
> >> > + hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
> >> > + if (!(DRIVER_FLAGS_CPU & hdev->flags))
> >> > + hdev->flags |= DRIVER_FLAGS_DMA_READY;
> >> > + tasklet_schedule(&hdev->done_task);
> >>
> >> Since this done_task only gets scheduled from here, why not make this
> >> a threaded IRQ handler and just do the work here instead of
> >> separating it between a hard IRQ handler and a tasklet?
> >
> > What is the advantage of doing that? i.e is this simple case worth setting up
> an extra thread?
>
> I believe threaded IRQ handlers are now preferred to using a hard IRQ
> handler + tasklet when possible, partly because people tend to screw up
> tasklet usage.
Fair enough, but I'd like to leave this as an enhancement for when I have some
extra bandwidth if it's not essential (same for the runtime PM).
>
> >> > + err = PTR_ERR(hdev->io_base);
> >> > + goto hash_io_err;
> >> > + }
> >> > +
> >> > + /* Write port (DMA or CPU) */
> >> > + hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> >> > + if (!hash_res) {
> >> > + dev_err(dev, "no write port resource info\n");
> >> > + err = -ENODEV;
> >> > + goto res_err;
> >> > + }
> >> > + hdev->bus_addr = hash_res->start;
> >>
> >> Maybe set this after devm_ioremap_resource() succeeds and avoid the
> >> extra error check?
> >
> > Not quite following you here - which check are you saying can be removed?
>
> You can remove the if (hash_res) check if you set hdev->bus_addr after
> devm_ioremap_resource().
I see what you mean - done.
>
> Thanks,
> Andrew
Thanks,
James.