From: Qiang Liu Subject: [PATCH v3 1/4] Talitos: Support for async_tx XOR offload Date: Mon, 16 Jul 2012 12:11:26 +0800 Message-ID: <1342411886-30014-1-git-send-email-qiang.liu@freescale.com> Mime-Version: 1.0 Content-Type: text/plain Cc: , , , , Qiang Liu , "David S. Miller" To: , Return-path: Received: from co1ehsobe002.messaging.microsoft.com ([216.32.180.185]:43914 "EHLO co1outboundpool.messaging.microsoft.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750897Ab2GPEdF (ORCPT ); Mon, 16 Jul 2012 00:33:05 -0400 Sender: linux-crypto-owner@vger.kernel.org List-ID: Expose Talitos's XOR functionality to be used for RAID parity calculation via the Async_tx layer. Cc: Herbert Xu Cc: David S. Miller Signed-off-by: Dipen Dudhat Signed-off-by: Maneesh Gupta Signed-off-by: Kim Phillips Signed-off-by: Vishnu Suresh Signed-off-by: Qiang Liu --- drivers/crypto/Kconfig | 9 + drivers/crypto/talitos.c | 410 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/crypto/talitos.h | 53 ++++++ 3 files changed, 472 insertions(+), 0 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index be6b2ba..f0a7c29 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -222,6 +222,15 @@ config CRYPTO_DEV_TALITOS To compile this driver as a module, choose M here: the module will be called talitos. +config CRYPTO_DEV_TALITOS_RAIDXOR + bool "Talitos RAID5 XOR Calculation Offload" + default y + select DMA_ENGINE + depends on CRYPTO_DEV_TALITOS + help + Say 'Y' here to use the Freescale Security Engine (SEC) to + offload RAID XOR parity Calculation + config CRYPTO_DEV_IXP4XX tristate "Driver for IXP4xx crypto hardware acceleration" depends on ARCH_IXP4XX diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index efff788..1e0c9e6 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -619,6 +619,396 @@ static void talitos_unregister_rng(struct device *dev) hwrng_unregister(&priv->rng); } +#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR +static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc, + void *context, int error); + +static enum dma_status talitos_is_tx_complete(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct talitos_xor_chan *xor_chan; + dma_cookie_t last_used; + dma_cookie_t last_complete; + + xor_chan = container_of(chan, struct talitos_xor_chan, common); + + last_used = chan->cookie; + last_complete = xor_chan->completed_cookie; + + if (state->last) + state->last = last_complete; + + if (state->used) + state->used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static void talitos_process_pending(struct talitos_xor_chan *xor_chan) +{ + struct talitos_xor_desc *desc, *_desc; + unsigned long flags; + int status; + struct talitos_private *priv; + int ch; + + priv = dev_get_drvdata(xor_chan->dev); + ch = atomic_inc_return(&priv->last_chan) & + (priv->num_channels - 1); + spin_lock_irqsave(&xor_chan->desc_lock, flags); + + list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) { + status = talitos_submit(xor_chan->dev, ch, &desc->hwdesc, + talitos_release_xor, desc); + if (status != -EINPROGRESS) + break; + + list_del(&desc->node); + list_add_tail(&desc->node, &xor_chan->in_progress_q); + } + + spin_unlock_irqrestore(&xor_chan->desc_lock, flags); +} + +static void talitos_xor_run_tx_complete_actions(struct talitos_xor_desc *desc, + struct talitos_xor_chan *xor_chan) +{ + struct device *dev = xor_chan->dev; + dma_addr_t dest, addr; + unsigned int src_cnt = desc->unmap_src_cnt; + unsigned int len = desc->unmap_len; + enum dma_ctrl_flags flags = desc->async_tx.flags; + struct dma_async_tx_descriptor *tx = &desc->async_tx; + + /* unmap dma addresses */ + dest = desc->hwdesc.ptr[6].ptr; + if (likely(!(flags & DMA_COMPL_SKIP_DEST_UNMAP))) + dma_unmap_page(dev, dest, len, DMA_BIDIRECTIONAL); + + desc->idx = 6 - src_cnt; + while(desc->idx < 6) { + addr = desc->hwdesc.ptr[desc->idx++].ptr; + if (likely(!(flags & DMA_COMPL_SKIP_SRC_UNMAP))) + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); + } + + /* run dependent operations */ + dma_run_dependencies(tx); +} + +static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc, + void *context, int error) +{ + struct talitos_xor_desc *desc = context; + struct talitos_xor_chan *xor_chan; + dma_async_tx_callback callback; + void *callback_param; + + if (unlikely(error)) + dev_err(dev, "xor operation: talitos error %d\n", error); + + xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan, + common); + spin_lock_bh(&xor_chan->desc_lock); + if (xor_chan->completed_cookie < desc->async_tx.cookie) + xor_chan->completed_cookie = desc->async_tx.cookie; + + callback = desc->async_tx.callback; + callback_param = desc->async_tx.callback_param; + + if (callback) { + spin_unlock_bh(&xor_chan->desc_lock); + callback(callback_param); + spin_lock_bh(&xor_chan->desc_lock); + } + + talitos_xor_run_tx_complete_actions(desc, xor_chan); + + list_del(&desc->node); + list_add_tail(&desc->node, &xor_chan->free_desc); + spin_unlock_bh(&xor_chan->desc_lock); + if (!list_empty(&xor_chan->pending_q)) + talitos_process_pending(xor_chan); +} + +/** + * talitos_issue_pending - move the descriptors in submit + * queue to pending queue and submit them for processing + * @chan: DMA channel + */ +static void talitos_issue_pending(struct dma_chan *chan) +{ + struct talitos_xor_chan *xor_chan; + + xor_chan = container_of(chan, struct talitos_xor_chan, common); + spin_lock_bh(&xor_chan->desc_lock); + list_splice_tail_init(&xor_chan->submit_q, + &xor_chan->pending_q); + spin_unlock_bh(&xor_chan->desc_lock); + talitos_process_pending(xor_chan); +} + +static dma_cookie_t talitos_async_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct talitos_xor_desc *desc; + struct talitos_xor_chan *xor_chan; + dma_cookie_t cookie; + + desc = container_of(tx, struct talitos_xor_desc, async_tx); + xor_chan = container_of(tx->chan, struct talitos_xor_chan, common); + + spin_lock_bh(&xor_chan->desc_lock); + + cookie = xor_chan->common.cookie + 1; + if (cookie < 0) + cookie = 1; + + desc->async_tx.cookie = cookie; + xor_chan->common.cookie = desc->async_tx.cookie; + + list_splice_tail_init(&desc->tx_list, + &xor_chan->submit_q); + + spin_unlock_bh(&xor_chan->desc_lock); + + return cookie; +} + +static struct talitos_xor_desc *talitos_xor_alloc_descriptor( + struct talitos_xor_chan *xor_chan, gfp_t flags) +{ + struct talitos_xor_desc *desc; + + desc = kmalloc(sizeof(*desc), flags); + if (desc) { + xor_chan->total_desc++; + desc->async_tx.tx_submit = talitos_async_tx_submit; + } + + return desc; +} + +static void talitos_free_chan_resources(struct dma_chan *chan) +{ + struct talitos_xor_chan *xor_chan; + struct talitos_xor_desc *desc, *_desc; + + xor_chan = container_of(chan, struct talitos_xor_chan, common); + + spin_lock_bh(&xor_chan->desc_lock); + + list_for_each_entry_safe(desc, _desc, &xor_chan->submit_q, node) { + list_del(&desc->node); + xor_chan->total_desc--; + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) { + list_del(&desc->node); + xor_chan->total_desc--; + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, &xor_chan->in_progress_q, node) { + list_del(&desc->node); + xor_chan->total_desc--; + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, &xor_chan->free_desc, node) { + list_del(&desc->node); + xor_chan->total_desc--; + kfree(desc); + } + + /* Some descriptor not freed? */ + if (unlikely(xor_chan->total_desc)) + dev_warn(chan->device->dev, "Failed to free xor channel resource\n"); + + spin_unlock_bh(&xor_chan->desc_lock); +} + +static int talitos_alloc_chan_resources(struct dma_chan *chan) +{ + struct talitos_xor_chan *xor_chan; + struct talitos_xor_desc *desc; + LIST_HEAD(tmp_list); + int i; + + xor_chan = container_of(chan, struct talitos_xor_chan, common); + + if (!list_empty(&xor_chan->free_desc)) + return xor_chan->total_desc; + + for (i = 0; i < TALITOS_MAX_DESCRIPTOR_NR; i++) { + desc = talitos_xor_alloc_descriptor(xor_chan, + GFP_KERNEL | GFP_DMA); + if (!desc) { + dev_err(xor_chan->common.device->dev, + "Only %d initial descriptors\n", i); + break; + } + list_add_tail(&desc->node, &tmp_list); + } + + if (!i) + return -ENOMEM; + + /* At least one desc is allocated */ + spin_lock_bh(&xor_chan->desc_lock); + list_splice_init(&tmp_list, &xor_chan->free_desc); + spin_unlock_bh(&xor_chan->desc_lock); + + return xor_chan->total_desc; +} + +static struct dma_async_tx_descriptor *talitos_prep_dma_xor( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + struct talitos_xor_chan *xor_chan; + struct talitos_xor_desc *new; + struct talitos_desc *desc; + int i, j; + + BUG_ON(len > TALITOS_MAX_DATA_LEN); + + xor_chan = container_of(chan, struct talitos_xor_chan, common); + + spin_lock_bh(&xor_chan->desc_lock); + if (!list_empty(&xor_chan->free_desc)) { + new = container_of(xor_chan->free_desc.next, + struct talitos_xor_desc, node); + list_del(&new->node); + } else { + new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL | GFP_DMA); + } + spin_unlock_bh(&xor_chan->desc_lock); + + if (!new) { + dev_err(xor_chan->common.device->dev, + "No free memory for XOR DMA descriptor\n"); + return NULL; + } + dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common); + + INIT_LIST_HEAD(&new->node); + INIT_LIST_HEAD(&new->tx_list); + + desc = &new->hwdesc; + /* Set destination: Last pointer pair */ + to_talitos_ptr(&desc->ptr[6], dest); + desc->ptr[6].len = cpu_to_be16(len); + desc->ptr[6].j_extent = 0; + new->unmap_src_cnt = src_cnt; + new->unmap_len = len; + + /* Set Sources: End loading from second-last pointer pair */ + for (i = 5, j = 0; j < src_cnt && i >= 0; i--, j++) { + to_talitos_ptr(&desc->ptr[i], src[j]); + desc->ptr[i].len = cpu_to_be16(len); + desc->ptr[i].j_extent = 0; + } + + /* + * documentation states first 0 ptr/len combo marks end of sources + * yet device produces scatter boundary error unless all subsequent + * sources are zeroed out + */ + for (; i >= 0; i--) { + to_talitos_ptr(&desc->ptr[i], 0); + desc->ptr[i].len = 0; + desc->ptr[i].j_extent = 0; + } + + desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR | + DESC_HDR_TYPE_RAID_XOR; + + new->async_tx.parent = NULL; + new->async_tx.next = NULL; + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + + list_add_tail(&new->node, &new->tx_list); + + new->async_tx.flags = flags; + new->async_tx.cookie = -EBUSY; + + return &new->async_tx; +} + +static void talitos_unregister_async_xor(struct device *dev) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + struct talitos_xor_chan *xor_chan; + struct dma_chan *chan, *_chan; + + if (priv->dma_dev_common.chancnt) + dma_async_device_unregister(&priv->dma_dev_common); + + list_for_each_entry_safe(chan, _chan, &priv->dma_dev_common.channels, + device_node) { + xor_chan = container_of(chan, struct talitos_xor_chan, + common); + list_del(&chan->device_node); + priv->dma_dev_common.chancnt--; + kfree(xor_chan); + } +} + +/** + * talitos_register_dma_async - Initialize the Freescale XOR ADMA device + * It is registered as a DMA device with the capability to perform + * XOR operation with the Async_tx layer. + * The various queues and channel resources are also allocated. + */ +static int talitos_register_async_tx(struct device *dev, int max_xor_srcs) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + struct dma_device *dma_dev = &priv->dma_dev_common; + struct talitos_xor_chan *xor_chan; + int err; + + xor_chan = kzalloc(sizeof(struct talitos_xor_chan), GFP_KERNEL); + if (!xor_chan) { + dev_err(dev, "unable to allocate xor channel\n"); + return -ENOMEM; + } + + dma_dev->dev = dev; + dma_dev->device_alloc_chan_resources = talitos_alloc_chan_resources; + dma_dev->device_free_chan_resources = talitos_free_chan_resources; + dma_dev->device_prep_dma_xor = talitos_prep_dma_xor; + dma_dev->max_xor = max_xor_srcs; + dma_dev->device_tx_status = talitos_is_tx_complete; + dma_dev->device_issue_pending = talitos_issue_pending; + INIT_LIST_HEAD(&dma_dev->channels); + dma_cap_set(DMA_XOR, dma_dev->cap_mask); + + xor_chan->dev = dev; + xor_chan->common.device = dma_dev; + xor_chan->total_desc = 0; + INIT_LIST_HEAD(&xor_chan->submit_q); + INIT_LIST_HEAD(&xor_chan->pending_q); + INIT_LIST_HEAD(&xor_chan->in_progress_q); + INIT_LIST_HEAD(&xor_chan->free_desc); + spin_lock_init(&xor_chan->desc_lock); + + list_add_tail(&xor_chan->common.device_node, &dma_dev->channels); + dma_dev->chancnt++; + + err = dma_async_device_register(dma_dev); + if (err) { + dev_err(dev, "Unable to register XOR with Async_tx\n"); + goto err_out; + } + + return err; + +err_out: + talitos_unregister_async_xor(dev); + return err; +} +#endif + /* * crypto alg */ @@ -2891,6 +3281,26 @@ static int talitos_probe(struct platform_device *ofdev) dev_info(dev, "hwrng\n"); } +#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR + /* + * register with async_tx xor, if capable + * SEC 2.x support up to 3 RAID sources, + * SEC 3.x support up to 6 + */ + if (hw_supports(dev, DESC_HDR_SEL0_AESU | DESC_HDR_TYPE_RAID_XOR)) { + int max_xor_srcs = 3; + if (of_device_is_compatible(np, "fsl,sec3.0")) + max_xor_srcs = 6; + err = talitos_register_async_tx(dev, max_xor_srcs); + if (err) { + dev_err(dev, "failed to register async_tx xor: %d\n", + err); + goto err_out; + } + dev_info(dev, "max_xor_srcs %d\n", max_xor_srcs); + } +#endif + /* register crypto algorithms the device supports */ for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { if (hw_supports(dev, driver_algs[i].desc_hdr_template)) { diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 61a1405..fc9d125 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -30,6 +30,7 @@ #define TALITOS_TIMEOUT 100000 #define TALITOS_MAX_DATA_LEN 65535 +#define TALITOS_MAX_DESCRIPTOR_NR 256 #define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f) #define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf) @@ -131,7 +132,57 @@ struct talitos_private { /* hwrng device */ struct hwrng rng; + +#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR + /* XOR Device */ + struct dma_device dma_dev_common; +#endif +}; + +#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR +/** + * talitos_xor_chan - context management for the async_tx channel + * @completed_cookie: the last completed cookie + * @desc_lock: lock for tx queue + * @total_desc: number of descriptors allocated + * @submit_q: queue of submitted descriptors + * @pending_q: queue of pending descriptors + * @in_progress_q: queue of descriptors in progress + * @free_desc: queue of unused descriptors + * @dev: talitos device implementing this channel + * @common: the corresponding xor channel in async_tx + */ +struct talitos_xor_chan { + dma_cookie_t completed_cookie; + spinlock_t desc_lock; + unsigned int total_desc; + struct list_head submit_q; + struct list_head pending_q; + struct list_head in_progress_q; + struct list_head free_desc; + struct device *dev; + struct dma_chan common; +}; + +/** + * talitos_xor_desc - software xor descriptor + * @async_tx: the referring async_tx descriptor + * @node: + * @hwdesc: h/w descriptor + * @unmap_src_cnt: number of xor sources + * @unmap_len: transaction byte count + * @idx: index of xor sources + */ +struct talitos_xor_desc { + struct dma_async_tx_descriptor async_tx; + struct list_head tx_list; + struct list_head node; + struct talitos_desc hwdesc; + unsigned int unmap_src_cnt; + unsigned int unmap_len; + unsigned int idx; }; +#endif extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, void (*callback)(struct device *dev, @@ -284,6 +335,7 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, /* primary execution unit mode (MODE0) and derivatives */ #define DESC_HDR_MODE0_ENCRYPT cpu_to_be32(0x00100000) #define DESC_HDR_MODE0_AESU_CBC cpu_to_be32(0x00200000) +#define DESC_HDR_MODE0_AESU_XOR cpu_to_be32(0x0c600000) #define DESC_HDR_MODE0_DEU_CBC cpu_to_be32(0x00400000) #define DESC_HDR_MODE0_DEU_3DES cpu_to_be32(0x00200000) #define DESC_HDR_MODE0_MDEU_CONT cpu_to_be32(0x08000000) @@ -344,6 +396,7 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, #define DESC_HDR_TYPE_IPSEC_ESP cpu_to_be32(1 << 3) #define DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU cpu_to_be32(2 << 3) #define DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU cpu_to_be32(4 << 3) +#define DESC_HDR_TYPE_RAID_XOR cpu_to_be32(21 << 3) /* link table extent field bits */ #define DESC_PTR_LNKTBL_JUMP 0x80 -- 1.7.5.1