2012-08-09 08:47:39

by Qiang Liu

[permalink] [raw]
Subject: [PATCH v7 1/8] Talitos: Support for async_tx XOR offload

From: Qiang Liu <[email protected]>

Expose Talitos's XOR functionality to be used for RAID parity
calculation via the Async_tx layer.

Cc: Herbert Xu <[email protected]>
Cc: David S. Miller <[email protected]>
Signed-off-by: Dipen Dudhat <[email protected]>
Signed-off-by: Maneesh Gupta <[email protected]>
Signed-off-by: Kim Phillips <[email protected]>
Signed-off-by: Vishnu Suresh <[email protected]>
Signed-off-by: Qiang Liu <[email protected]>
---
drivers/crypto/Kconfig | 9 +
drivers/crypto/talitos.c | 413 ++++++++++++++++++++++++++++++++++++++++++++++
drivers/crypto/talitos.h | 53 ++++++
3 files changed, 475 insertions(+), 0 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index be6b2ba..f0a7c29 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -222,6 +222,15 @@ config CRYPTO_DEV_TALITOS
To compile this driver as a module, choose M here: the module
will be called talitos.

+config CRYPTO_DEV_TALITOS_RAIDXOR
+ bool "Talitos RAID5 XOR Calculation Offload"
+ default y
+ select DMA_ENGINE
+ depends on CRYPTO_DEV_TALITOS
+ help
+ Say 'Y' here to use the Freescale Security Engine (SEC) to
+ offload RAID XOR parity Calculation
+
config CRYPTO_DEV_IXP4XX
tristate "Driver for IXP4xx crypto hardware acceleration"
depends on ARCH_IXP4XX
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index efff788..b34264e 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -619,6 +619,399 @@ static void talitos_unregister_rng(struct device *dev)
hwrng_unregister(&priv->rng);
}

+#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR
+static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
+ void *context, int error);
+
+static enum dma_status talitos_is_tx_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *state)
+{
+ struct talitos_xor_chan *xor_chan;
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+
+ xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+ last_used = chan->cookie;
+ last_complete = xor_chan->completed_cookie;
+
+ if (state->last)
+ state->last = last_complete;
+
+ if (state->used)
+ state->used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
+{
+ struct talitos_xor_desc *desc, *_desc;
+ unsigned long flags;
+ int status;
+ struct talitos_private *priv;
+ int ch;
+
+ priv = dev_get_drvdata(xor_chan->dev);
+ ch = atomic_inc_return(&priv->last_chan) &
+ (priv->num_channels - 1);
+ spin_lock_irqsave(&xor_chan->desc_lock, flags);
+
+ list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
+ status = talitos_submit(xor_chan->dev, ch, &desc->hwdesc,
+ talitos_release_xor, desc);
+ if (status != -EINPROGRESS)
+ break;
+
+ list_del(&desc->node);
+ list_add_tail(&desc->node, &xor_chan->in_progress_q);
+ }
+
+ spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
+}
+
+static void talitos_xor_run_tx_complete_actions(struct talitos_xor_desc *desc,
+ struct talitos_xor_chan *xor_chan)
+{
+ struct device *dev = xor_chan->dev;
+ dma_addr_t dest, addr;
+ unsigned int src_cnt = desc->unmap_src_cnt;
+ unsigned int len = desc->unmap_len;
+ enum dma_ctrl_flags flags = desc->async_tx.flags;
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+ /* unmap dma addresses */
+ dest = desc->hwdesc.ptr[6].ptr;
+ if (likely(!(flags & DMA_COMPL_SKIP_DEST_UNMAP)))
+ dma_unmap_page(dev, dest, len, DMA_BIDIRECTIONAL);
+
+ desc->idx = 6 - src_cnt;
+ if (likely(!(flags & DMA_COMPL_SKIP_SRC_UNMAP))) {
+ while(desc->idx < 6) {
+ addr = desc->hwdesc.ptr[desc->idx++].ptr;
+ if (addr == dest)
+ continue;
+ dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ }
+ }
+
+ /* run dependent operations */
+ dma_run_dependencies(tx);
+}
+
+static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
+ void *context, int error)
+{
+ struct talitos_xor_desc *desc = context;
+ struct talitos_xor_chan *xor_chan;
+ dma_async_tx_callback callback;
+ void *callback_param;
+
+ if (unlikely(error))
+ dev_err(dev, "xor operation: talitos error %d\n", error);
+
+ xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
+ common);
+ spin_lock_bh(&xor_chan->desc_lock);
+ if (xor_chan->completed_cookie < desc->async_tx.cookie)
+ xor_chan->completed_cookie = desc->async_tx.cookie;
+
+ callback = desc->async_tx.callback;
+ callback_param = desc->async_tx.callback_param;
+
+ if (callback) {
+ spin_unlock_bh(&xor_chan->desc_lock);
+ callback(callback_param);
+ spin_lock_bh(&xor_chan->desc_lock);
+ }
+
+ talitos_xor_run_tx_complete_actions(desc, xor_chan);
+
+ list_del(&desc->node);
+ list_add_tail(&desc->node, &xor_chan->free_desc);
+ spin_unlock_bh(&xor_chan->desc_lock);
+ if (!list_empty(&xor_chan->pending_q))
+ talitos_process_pending(xor_chan);
+}
+
+/**
+ * talitos_issue_pending - move the descriptors in submit
+ * queue to pending queue and submit them for processing
+ * @chan: DMA channel
+ */
+static void talitos_issue_pending(struct dma_chan *chan)
+{
+ struct talitos_xor_chan *xor_chan;
+
+ xor_chan = container_of(chan, struct talitos_xor_chan, common);
+ spin_lock_bh(&xor_chan->desc_lock);
+ list_splice_tail_init(&xor_chan->submit_q,
+ &xor_chan->pending_q);
+ spin_unlock_bh(&xor_chan->desc_lock);
+ talitos_process_pending(xor_chan);
+}
+
+static dma_cookie_t talitos_async_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct talitos_xor_desc *desc;
+ struct talitos_xor_chan *xor_chan;
+ dma_cookie_t cookie;
+
+ desc = container_of(tx, struct talitos_xor_desc, async_tx);
+ xor_chan = container_of(tx->chan, struct talitos_xor_chan, common);
+
+ spin_lock_bh(&xor_chan->desc_lock);
+
+ cookie = xor_chan->common.cookie + 1;
+ if (cookie < 0)
+ cookie = 1;
+
+ desc->async_tx.cookie = cookie;
+ xor_chan->common.cookie = desc->async_tx.cookie;
+
+ list_splice_tail_init(&desc->tx_list,
+ &xor_chan->submit_q);
+
+ spin_unlock_bh(&xor_chan->desc_lock);
+
+ return cookie;
+}
+
+static struct talitos_xor_desc *talitos_xor_alloc_descriptor(
+ struct talitos_xor_chan *xor_chan, gfp_t flags)
+{
+ struct talitos_xor_desc *desc;
+
+ desc = kmalloc(sizeof(*desc), flags);
+ if (desc) {
+ xor_chan->total_desc++;
+ desc->async_tx.tx_submit = talitos_async_tx_submit;
+ }
+
+ return desc;
+}
+
+static void talitos_free_chan_resources(struct dma_chan *chan)
+{
+ struct talitos_xor_chan *xor_chan;
+ struct talitos_xor_desc *desc, *_desc;
+
+ xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+ spin_lock_bh(&xor_chan->desc_lock);
+
+ list_for_each_entry_safe(desc, _desc, &xor_chan->submit_q, node) {
+ list_del(&desc->node);
+ xor_chan->total_desc--;
+ kfree(desc);
+ }
+ list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
+ list_del(&desc->node);
+ xor_chan->total_desc--;
+ kfree(desc);
+ }
+ list_for_each_entry_safe(desc, _desc, &xor_chan->in_progress_q, node) {
+ list_del(&desc->node);
+ xor_chan->total_desc--;
+ kfree(desc);
+ }
+ list_for_each_entry_safe(desc, _desc, &xor_chan->free_desc, node) {
+ list_del(&desc->node);
+ xor_chan->total_desc--;
+ kfree(desc);
+ }
+
+ /* Some descriptor not freed? */
+ if (unlikely(xor_chan->total_desc))
+ dev_warn(chan->device->dev, "Failed to free xor channel resource\n");
+
+ spin_unlock_bh(&xor_chan->desc_lock);
+}
+
+static int talitos_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct talitos_xor_chan *xor_chan;
+ struct talitos_xor_desc *desc;
+ LIST_HEAD(tmp_list);
+ int i;
+
+ xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+ if (!list_empty(&xor_chan->free_desc))
+ return xor_chan->total_desc;
+
+ for (i = 0; i < TALITOS_MAX_DESCRIPTOR_NR; i++) {
+ desc = talitos_xor_alloc_descriptor(xor_chan,
+ GFP_KERNEL | GFP_DMA);
+ if (!desc) {
+ dev_err(xor_chan->common.device->dev,
+ "Only %d initial descriptors\n", i);
+ break;
+ }
+ list_add_tail(&desc->node, &tmp_list);
+ }
+
+ if (!i)
+ return -ENOMEM;
+
+ /* At least one desc is allocated */
+ spin_lock_bh(&xor_chan->desc_lock);
+ list_splice_init(&tmp_list, &xor_chan->free_desc);
+ spin_unlock_bh(&xor_chan->desc_lock);
+
+ return xor_chan->total_desc;
+}
+
+static struct dma_async_tx_descriptor *talitos_prep_dma_xor(
+ struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ struct talitos_xor_chan *xor_chan;
+ struct talitos_xor_desc *new;
+ struct talitos_desc *desc;
+ int i, j;
+
+ BUG_ON(len > TALITOS_MAX_DATA_LEN);
+
+ xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+ spin_lock_bh(&xor_chan->desc_lock);
+ if (!list_empty(&xor_chan->free_desc)) {
+ new = container_of(xor_chan->free_desc.next,
+ struct talitos_xor_desc, node);
+ list_del(&new->node);
+ } else {
+ new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL | GFP_DMA);
+ }
+ spin_unlock_bh(&xor_chan->desc_lock);
+
+ if (!new) {
+ dev_err(xor_chan->common.device->dev,
+ "No free memory for XOR DMA descriptor\n");
+ return NULL;
+ }
+ dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common);
+
+ INIT_LIST_HEAD(&new->node);
+ INIT_LIST_HEAD(&new->tx_list);
+
+ desc = &new->hwdesc;
+ /* Set destination: Last pointer pair */
+ to_talitos_ptr(&desc->ptr[6], dest);
+ desc->ptr[6].len = cpu_to_be16(len);
+ desc->ptr[6].j_extent = 0;
+ new->unmap_src_cnt = src_cnt;
+ new->unmap_len = len;
+
+ /* Set Sources: End loading from second-last pointer pair */
+ for (i = 5, j = 0; j < src_cnt && i >= 0; i--, j++) {
+ to_talitos_ptr(&desc->ptr[i], src[j]);
+ desc->ptr[i].len = cpu_to_be16(len);
+ desc->ptr[i].j_extent = 0;
+ }
+
+ /*
+ * documentation states first 0 ptr/len combo marks end of sources
+ * yet device produces scatter boundary error unless all subsequent
+ * sources are zeroed out
+ */
+ for (; i >= 0; i--) {
+ to_talitos_ptr(&desc->ptr[i], 0);
+ desc->ptr[i].len = 0;
+ desc->ptr[i].j_extent = 0;
+ }
+
+ desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR |
+ DESC_HDR_TYPE_RAID_XOR;
+
+ new->async_tx.parent = NULL;
+ new->async_tx.next = NULL;
+ new->async_tx.cookie = 0;
+ async_tx_ack(&new->async_tx);
+
+ list_add_tail(&new->node, &new->tx_list);
+
+ new->async_tx.flags = flags;
+ new->async_tx.cookie = -EBUSY;
+
+ return &new->async_tx;
+}
+
+static void talitos_unregister_async_xor(struct device *dev)
+{
+ struct talitos_private *priv = dev_get_drvdata(dev);
+ struct talitos_xor_chan *xor_chan;
+ struct dma_chan *chan, *_chan;
+
+ if (priv->dma_dev_common.chancnt)
+ dma_async_device_unregister(&priv->dma_dev_common);
+
+ list_for_each_entry_safe(chan, _chan, &priv->dma_dev_common.channels,
+ device_node) {
+ xor_chan = container_of(chan, struct talitos_xor_chan,
+ common);
+ list_del(&chan->device_node);
+ priv->dma_dev_common.chancnt--;
+ kfree(xor_chan);
+ }
+}
+
+/**
+ * talitos_register_dma_async - Initialize the Freescale XOR ADMA device
+ * It is registered as a DMA device with the capability to perform
+ * XOR operation with the Async_tx layer.
+ * The various queues and channel resources are also allocated.
+ */
+static int talitos_register_async_tx(struct device *dev, int max_xor_srcs)
+{
+ struct talitos_private *priv = dev_get_drvdata(dev);
+ struct dma_device *dma_dev = &priv->dma_dev_common;
+ struct talitos_xor_chan *xor_chan;
+ int err;
+
+ xor_chan = kzalloc(sizeof(struct talitos_xor_chan), GFP_KERNEL);
+ if (!xor_chan) {
+ dev_err(dev, "unable to allocate xor channel\n");
+ return -ENOMEM;
+ }
+
+ dma_dev->dev = dev;
+ dma_dev->device_alloc_chan_resources = talitos_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = talitos_free_chan_resources;
+ dma_dev->device_prep_dma_xor = talitos_prep_dma_xor;
+ dma_dev->max_xor = max_xor_srcs;
+ dma_dev->device_tx_status = talitos_is_tx_complete;
+ dma_dev->device_issue_pending = talitos_issue_pending;
+ INIT_LIST_HEAD(&dma_dev->channels);
+ dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+
+ xor_chan->dev = dev;
+ xor_chan->common.device = dma_dev;
+ xor_chan->total_desc = 0;
+ INIT_LIST_HEAD(&xor_chan->submit_q);
+ INIT_LIST_HEAD(&xor_chan->pending_q);
+ INIT_LIST_HEAD(&xor_chan->in_progress_q);
+ INIT_LIST_HEAD(&xor_chan->free_desc);
+ spin_lock_init(&xor_chan->desc_lock);
+
+ list_add_tail(&xor_chan->common.device_node, &dma_dev->channels);
+ dma_dev->chancnt++;
+
+ err = dma_async_device_register(dma_dev);
+ if (err) {
+ dev_err(dev, "Unable to register XOR with Async_tx\n");
+ goto err_out;
+ }
+
+ return err;
+
+err_out:
+ talitos_unregister_async_xor(dev);
+ return err;
+}
+#endif
+
/*
* crypto alg
*/
@@ -2891,6 +3284,26 @@ static int talitos_probe(struct platform_device *ofdev)
dev_info(dev, "hwrng\n");
}

+#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR
+ /*
+ * register with async_tx xor, if capable
+ * SEC 2.x support up to 3 RAID sources,
+ * SEC 3.x support up to 6
+ */
+ if (hw_supports(dev, DESC_HDR_SEL0_AESU | DESC_HDR_TYPE_RAID_XOR)) {
+ int max_xor_srcs = 3;
+ if (of_device_is_compatible(np, "fsl,sec3.0"))
+ max_xor_srcs = 6;
+ err = talitos_register_async_tx(dev, max_xor_srcs);
+ if (err) {
+ dev_err(dev, "failed to register async_tx xor: %d\n",
+ err);
+ goto err_out;
+ }
+ dev_info(dev, "max_xor_srcs %d\n", max_xor_srcs);
+ }
+#endif
+
/* register crypto algorithms the device supports */
for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
if (hw_supports(dev, driver_algs[i].desc_hdr_template)) {
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 61a1405..fc9d125 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -30,6 +30,7 @@

#define TALITOS_TIMEOUT 100000
#define TALITOS_MAX_DATA_LEN 65535
+#define TALITOS_MAX_DESCRIPTOR_NR 256

#define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f)
#define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf)
@@ -131,7 +132,57 @@ struct talitos_private {

/* hwrng device */
struct hwrng rng;
+
+#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR
+ /* XOR Device */
+ struct dma_device dma_dev_common;
+#endif
+};
+
+#ifdef CONFIG_CRYPTO_DEV_TALITOS_RAIDXOR
+/**
+ * talitos_xor_chan - context management for the async_tx channel
+ * @completed_cookie: the last completed cookie
+ * @desc_lock: lock for tx queue
+ * @total_desc: number of descriptors allocated
+ * @submit_q: queue of submitted descriptors
+ * @pending_q: queue of pending descriptors
+ * @in_progress_q: queue of descriptors in progress
+ * @free_desc: queue of unused descriptors
+ * @dev: talitos device implementing this channel
+ * @common: the corresponding xor channel in async_tx
+ */
+struct talitos_xor_chan {
+ dma_cookie_t completed_cookie;
+ spinlock_t desc_lock;
+ unsigned int total_desc;
+ struct list_head submit_q;
+ struct list_head pending_q;
+ struct list_head in_progress_q;
+ struct list_head free_desc;
+ struct device *dev;
+ struct dma_chan common;
+};
+
+/**
+ * talitos_xor_desc - software xor descriptor
+ * @async_tx: the referring async_tx descriptor
+ * @node:
+ * @hwdesc: h/w descriptor
+ * @unmap_src_cnt: number of xor sources
+ * @unmap_len: transaction byte count
+ * @idx: index of xor sources
+ */
+struct talitos_xor_desc {
+ struct dma_async_tx_descriptor async_tx;
+ struct list_head tx_list;
+ struct list_head node;
+ struct talitos_desc hwdesc;
+ unsigned int unmap_src_cnt;
+ unsigned int unmap_len;
+ unsigned int idx;
};
+#endif

extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
void (*callback)(struct device *dev,
@@ -284,6 +335,7 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
/* primary execution unit mode (MODE0) and derivatives */
#define DESC_HDR_MODE0_ENCRYPT cpu_to_be32(0x00100000)
#define DESC_HDR_MODE0_AESU_CBC cpu_to_be32(0x00200000)
+#define DESC_HDR_MODE0_AESU_XOR cpu_to_be32(0x0c600000)
#define DESC_HDR_MODE0_DEU_CBC cpu_to_be32(0x00400000)
#define DESC_HDR_MODE0_DEU_3DES cpu_to_be32(0x00200000)
#define DESC_HDR_MODE0_MDEU_CONT cpu_to_be32(0x08000000)
@@ -344,6 +396,7 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
#define DESC_HDR_TYPE_IPSEC_ESP cpu_to_be32(1 << 3)
#define DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU cpu_to_be32(2 << 3)
#define DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU cpu_to_be32(4 << 3)
+#define DESC_HDR_TYPE_RAID_XOR cpu_to_be32(21 << 3)

/* link table extent field bits */
#define DESC_PTR_LNKTBL_JUMP 0x80
--
1.7.5.1


Subject: RE: [PATCH v7 1/8] Talitos: Support for async_tx XOR offload

On Thu, 9 Aug 2012 11:20:48 +0300, [email protected] wrote:
> From: Qiang Liu <[email protected]>
>
> Expose Talitos's XOR functionality to be used for RAID parity
> calculation via the Async_tx layer.
>
> Cc: Herbert Xu <[email protected]>
> Cc: David S. Miller <[email protected]>
> Signed-off-by: Dipen Dudhat <[email protected]>
> Signed-off-by: Maneesh Gupta <[email protected]>
> Signed-off-by: Kim Phillips <[email protected]>
> Signed-off-by: Vishnu Suresh <[email protected]>
> Signed-off-by: Qiang Liu <[email protected]>
> ---
> drivers/crypto/Kconfig | 9 +
> drivers/crypto/talitos.c | 413 ++++++++++++++++++++++++++++++++++++++++++++++
> drivers/crypto/talitos.h | 53 ++++++
> 3 files changed, 475 insertions(+), 0 deletions(-)


> +static void talitos_xor_run_tx_complete_actions(struct talitos_xor_desc *desc,
> + struct talitos_xor_chan *xor_chan)
> +{
> + struct device *dev = xor_chan->dev;
> + dma_addr_t dest, addr;
> + unsigned int src_cnt = desc->unmap_src_cnt;
> + unsigned int len = desc->unmap_len;
> + enum dma_ctrl_flags flags = desc->async_tx.flags;
> + struct dma_async_tx_descriptor *tx = &desc->async_tx;
> +
> + /* unmap dma addresses */
> + dest = desc->hwdesc.ptr[6].ptr;
> + if (likely(!(flags & DMA_COMPL_SKIP_DEST_UNMAP)))
> + dma_unmap_page(dev, dest, len, DMA_BIDIRECTIONAL);
> +
> + desc->idx = 6 - src_cnt;
> + if (likely(!(flags & DMA_COMPL_SKIP_SRC_UNMAP))) {
> + while(desc->idx < 6) {
> + addr = desc->hwdesc.ptr[desc->idx++].ptr;
> + if (addr == dest)
> + continue;
> + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
> + }
> + }

No need for braces around the while block.

> + /* run dependent operations */
> + dma_run_dependencies(tx);
> +}


> +static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
> + void *context, int error)
> +{
> + struct talitos_xor_desc *desc = context;
> + struct talitos_xor_chan *xor_chan;
> + dma_async_tx_callback callback;
> + void *callback_param;
> +
> + if (unlikely(error))
> + dev_err(dev, "xor operation: talitos error %d\n", error);
> +
> + xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
> + common);
> + spin_lock_bh(&xor_chan->desc_lock);
> + if (xor_chan->completed_cookie < desc->async_tx.cookie)
> + xor_chan->completed_cookie = desc->async_tx.cookie;
> +
> + callback = desc->async_tx.callback;
> + callback_param = desc->async_tx.callback_param;
> +
> + if (callback) {
> + spin_unlock_bh(&xor_chan->desc_lock);
> + callback(callback_param);
> + spin_lock_bh(&xor_chan->desc_lock);
> + }

Since callback_param is used only here, maybe:

if (callback) {
void *callback_param = desc->async_tx.callback_param;

spin_unlock_bh(&xor_chan->desc_lock);
callback(callback_param);
spin_lock_bh(&xor_chan->desc_lock);
}

> +
> + talitos_xor_run_tx_complete_actions(desc, xor_chan);
> +
> + list_del(&desc->node);
> + list_add_tail(&desc->node, &xor_chan->free_desc);
> + spin_unlock_bh(&xor_chan->desc_lock);
> + if (!list_empty(&xor_chan->pending_q))
> + talitos_process_pending(xor_chan);
> +}


> +static int talitos_alloc_chan_resources(struct dma_chan *chan)
> +{
> + struct talitos_xor_chan *xor_chan;
> + struct talitos_xor_desc *desc;
> + LIST_HEAD(tmp_list);
> + int i;
> +
> + xor_chan = container_of(chan, struct talitos_xor_chan, common);
> +
> + if (!list_empty(&xor_chan->free_desc))
> + return xor_chan->total_desc;
> +
> + for (i = 0; i < TALITOS_MAX_DESCRIPTOR_NR; i++) {
> + desc = talitos_xor_alloc_descriptor(xor_chan,
> + GFP_KERNEL | GFP_DMA);

talitos_xor_alloc_descriptor() is called here without holding
the xor_chan->desc_lock and it increments xor_chan->total_desc.
Isn't this an issue ?

> + if (!desc) {
> + dev_err(xor_chan->common.device->dev,
> + "Only %d initial descriptors\n", i);
> + break;
> + }
> + list_add_tail(&desc->node, &tmp_list);
> + }
> +
> + if (!i)
> + return -ENOMEM;
> +
> + /* At least one desc is allocated */
> + spin_lock_bh(&xor_chan->desc_lock);
> + list_splice_init(&tmp_list, &xor_chan->free_desc);
> + spin_unlock_bh(&xor_chan->desc_lock);
> +
> + return xor_chan->total_desc;
> +}


> +/**
> + * talitos_register_dma_async - Initialize the Freescale XOR ADMA device
> + * It is registered as a DMA device with the capability to perform
> + * XOR operation with the Async_tx layer.
> + * The various queues and channel resources are also allocated.
> + */
> +static int talitos_register_async_tx(struct device *dev, int max_xor_srcs)
> +{
> + struct talitos_private *priv = dev_get_drvdata(dev);
> + struct dma_device *dma_dev = &priv->dma_dev_common;
> + struct talitos_xor_chan *xor_chan;
> + int err;
> +
> + xor_chan = kzalloc(sizeof(struct talitos_xor_chan), GFP_KERNEL);
> + if (!xor_chan) {
> + dev_err(dev, "unable to allocate xor channel\n");
> + return -ENOMEM;
> + }
> +
> + dma_dev->dev = dev;
> + dma_dev->device_alloc_chan_resources = talitos_alloc_chan_resources;
> + dma_dev->device_free_chan_resources = talitos_free_chan_resources;
> + dma_dev->device_prep_dma_xor = talitos_prep_dma_xor;
> + dma_dev->max_xor = max_xor_srcs;
> + dma_dev->device_tx_status = talitos_is_tx_complete;
> + dma_dev->device_issue_pending = talitos_issue_pending;
> + INIT_LIST_HEAD(&dma_dev->channels);
> + dma_cap_set(DMA_XOR, dma_dev->cap_mask);
> +
> + xor_chan->dev = dev;
> + xor_chan->common.device = dma_dev;
> + xor_chan->total_desc = 0;
> + INIT_LIST_HEAD(&xor_chan->submit_q);
> + INIT_LIST_HEAD(&xor_chan->pending_q);
> + INIT_LIST_HEAD(&xor_chan->in_progress_q);
> + INIT_LIST_HEAD(&xor_chan->free_desc);
> + spin_lock_init(&xor_chan->desc_lock);
> +
> + list_add_tail(&xor_chan->common.device_node, &dma_dev->channels);
> + dma_dev->chancnt++;
> +
> + err = dma_async_device_register(dma_dev);
> + if (err) {
> + dev_err(dev, "Unable to register XOR with Async_tx\n");
> + goto err_out;

Replace the jump with talitos_unregister_async_xor(dev) and
remove code under err_out label.

> + }
> +
> + return err;
> +
> +err_out:
> + talitos_unregister_async_xor(dev);
> + return err;
> +}
> +#endif


> diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
> index 61a1405..fc9d125 100644
> --- a/drivers/crypto/talitos.h
> +++ b/drivers/crypto/talitos.h
> @@ -30,6 +30,7 @@
>
> #define TALITOS_TIMEOUT 100000
> #define TALITOS_MAX_DATA_LEN 65535
> +#define TALITOS_MAX_DESCRIPTOR_NR 256

This refers only to xor descriptors, so renaming it similar to
TALITOS_MAX_XOR_DESCRIPTOR_NR would make sense.

Besides these:
1. As Dan Williams mentioned, you should explain why you are using
both spin_lock_bh and spin_lock_irqsave on the same lock.
2. I don't see anything added to talitos_remove(). Shouldn't
talitos_unregister_async_xor() be called? Anything else?
Have you tested with talitos built as a module?

Horia

2012-08-31 03:08:15

by Liu Qiang-B32616

[permalink] [raw]
Subject: RE: [PATCH v7 1/8] Talitos: Support for async_tx XOR offload

> -----Original Message-----
> From: Geanta Neag Horia Ioan-B05471
> Sent: Thursday, August 30, 2012 10:23 PM
> To: Liu Qiang-B32616; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]; linuxppc-
> [email protected]
> Cc: Li Yang-R58472; Phillips Kim-R1AAHA; [email protected];
> [email protected]; [email protected]; [email protected]; Liu
> Qiang-B32616
> Subject: RE: [PATCH v7 1/8] Talitos: Support for async_tx XOR offload
>
> On Thu, 9 Aug 2012 11:20:48 +0300, [email protected] wrote:
> > From: Qiang Liu <[email protected]>
> >
> > Expose Talitos's XOR functionality to be used for RAID parity
> > calculation via the Async_tx layer.
> >
> > Cc: Herbert Xu <[email protected]>
> > Cc: David S. Miller <[email protected]>
> > Signed-off-by: Dipen Dudhat <[email protected]>
> > Signed-off-by: Maneesh Gupta <[email protected]>
> > Signed-off-by: Kim Phillips <[email protected]>
> > Signed-off-by: Vishnu Suresh <[email protected]>
> > Signed-off-by: Qiang Liu <[email protected]>
> > ---
> > drivers/crypto/Kconfig | 9 +
> > drivers/crypto/talitos.c | 413
> ++++++++++++++++++++++++++++++++++++++++++++++
> > drivers/crypto/talitos.h | 53 ++++++
> > 3 files changed, 475 insertions(+), 0 deletions(-)
>
>
> > +static void talitos_xor_run_tx_complete_actions(struct
> talitos_xor_desc *desc,
> > + struct talitos_xor_chan *xor_chan)
> > +{
> > + struct device *dev = xor_chan->dev;
> > + dma_addr_t dest, addr;
> > + unsigned int src_cnt = desc->unmap_src_cnt;
> > + unsigned int len = desc->unmap_len;
> > + enum dma_ctrl_flags flags = desc->async_tx.flags;
> > + struct dma_async_tx_descriptor *tx = &desc->async_tx;
> > +
> > + /* unmap dma addresses */
> > + dest = desc->hwdesc.ptr[6].ptr;
> > + if (likely(!(flags & DMA_COMPL_SKIP_DEST_UNMAP)))
> > + dma_unmap_page(dev, dest, len, DMA_BIDIRECTIONAL);
> > +
> > + desc->idx = 6 - src_cnt;
> > + if (likely(!(flags & DMA_COMPL_SKIP_SRC_UNMAP))) {
> > + while(desc->idx < 6) {
> > + addr = desc->hwdesc.ptr[desc->idx++].ptr;
> > + if (addr == dest)
> > + continue;
> > + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
> > + }
> > + }
>
> No need for braces around the while block.
I will remove it.

>
> > + /* run dependent operations */
> > + dma_run_dependencies(tx);
> > +}
>
>
> > +static void talitos_release_xor(struct device *dev, struct
> talitos_desc *hwdesc,
> > + void *context, int error)
> > +{
> > + struct talitos_xor_desc *desc = context;
> > + struct talitos_xor_chan *xor_chan;
> > + dma_async_tx_callback callback;
> > + void *callback_param;
> > +
> > + if (unlikely(error))
> > + dev_err(dev, "xor operation: talitos error %d\n", error);
> > +
> > + xor_chan = container_of(desc->async_tx.chan, struct
> talitos_xor_chan,
> > + common);
> > + spin_lock_bh(&xor_chan->desc_lock);
> > + if (xor_chan->completed_cookie < desc->async_tx.cookie)
> > + xor_chan->completed_cookie = desc->async_tx.cookie;
> > +
> > + callback = desc->async_tx.callback;
> > + callback_param = desc->async_tx.callback_param;
> > +
> > + if (callback) {
> > + spin_unlock_bh(&xor_chan->desc_lock);
> > + callback(callback_param);
> > + spin_lock_bh(&xor_chan->desc_lock);
> > + }
>
> Since callback_param is used only here, maybe:
>
> if (callback) {
> void *callback_param = desc->async_tx.callback_param;
>
> spin_unlock_bh(&xor_chan->desc_lock);
> callback(callback_param);
> spin_lock_bh(&xor_chan->desc_lock);
> }
Fine. I will modify it in next.

>
> > +
> > + talitos_xor_run_tx_complete_actions(desc, xor_chan);
> > +
> > + list_del(&desc->node);
> > + list_add_tail(&desc->node, &xor_chan->free_desc);
> > + spin_unlock_bh(&xor_chan->desc_lock);
> > + if (!list_empty(&xor_chan->pending_q))
> > + talitos_process_pending(xor_chan);
> > +}
>
>
> > +static int talitos_alloc_chan_resources(struct dma_chan *chan)
> > +{
> > + struct talitos_xor_chan *xor_chan;
> > + struct talitos_xor_desc *desc;
> > + LIST_HEAD(tmp_list);
> > + int i;
> > +
> > + xor_chan = container_of(chan, struct talitos_xor_chan, common);
> > +
> > + if (!list_empty(&xor_chan->free_desc))
> > + return xor_chan->total_desc;
> > +
> > + for (i = 0; i < TALITOS_MAX_DESCRIPTOR_NR; i++) {
> > + desc = talitos_xor_alloc_descriptor(xor_chan,
> > + GFP_KERNEL | GFP_DMA);
>
> talitos_xor_alloc_descriptor() is called here without holding
> the xor_chan->desc_lock and it increments xor_chan->total_desc.
> Isn't this an issue ?

No, please refer to the code as below,
+ list_add_tail(&desc->node, &tmp_list);

The list is temporary list, it will be merged to xor_chan->free_desc in next step, here is protected by lock,
+ spin_lock_bh(&xor_chan->desc_lock);
+ list_splice_init(&tmp_list, &xor_chan->free_desc);
+ spin_unlock_bh(&xor_chan->desc_lock);

>
> > + if (!desc) {
> > + dev_err(xor_chan->common.device->dev,
> > + "Only %d initial descriptors\n", i);
> > + break;
> > + }
> > + list_add_tail(&desc->node, &tmp_list);
> > + }
> > +
> > + if (!i)
> > + return -ENOMEM;
> > +
> > + /* At least one desc is allocated */
> > + spin_lock_bh(&xor_chan->desc_lock);
> > + list_splice_init(&tmp_list, &xor_chan->free_desc);
> > + spin_unlock_bh(&xor_chan->desc_lock);
> > +
> > + return xor_chan->total_desc;
> > +}
>
>
> > +/**
> > + * talitos_register_dma_async - Initialize the Freescale XOR ADMA
> device
> > + * It is registered as a DMA device with the capability to perform
> > + * XOR operation with the Async_tx layer.
> > + * The various queues and channel resources are also allocated.
> > + */
> > +static int talitos_register_async_tx(struct device *dev, int
> max_xor_srcs)
> > +{
> > + struct talitos_private *priv = dev_get_drvdata(dev);
> > + struct dma_device *dma_dev = &priv->dma_dev_common;
> > + struct talitos_xor_chan *xor_chan;
> > + int err;
> > +
> > + xor_chan = kzalloc(sizeof(struct talitos_xor_chan), GFP_KERNEL);
> > + if (!xor_chan) {
> > + dev_err(dev, "unable to allocate xor channel\n");
> > + return -ENOMEM;
> > + }
> > +
> > + dma_dev->dev = dev;
> > + dma_dev->device_alloc_chan_resources = talitos_alloc_chan_resources;
> > + dma_dev->device_free_chan_resources = talitos_free_chan_resources;
> > + dma_dev->device_prep_dma_xor = talitos_prep_dma_xor;
> > + dma_dev->max_xor = max_xor_srcs;
> > + dma_dev->device_tx_status = talitos_is_tx_complete;
> > + dma_dev->device_issue_pending = talitos_issue_pending;
> > + INIT_LIST_HEAD(&dma_dev->channels);
> > + dma_cap_set(DMA_XOR, dma_dev->cap_mask);
> > +
> > + xor_chan->dev = dev;
> > + xor_chan->common.device = dma_dev;
> > + xor_chan->total_desc = 0;
> > + INIT_LIST_HEAD(&xor_chan->submit_q);
> > + INIT_LIST_HEAD(&xor_chan->pending_q);
> > + INIT_LIST_HEAD(&xor_chan->in_progress_q);
> > + INIT_LIST_HEAD(&xor_chan->free_desc);
> > + spin_lock_init(&xor_chan->desc_lock);
> > +
> > + list_add_tail(&xor_chan->common.device_node, &dma_dev->channels);
> > + dma_dev->chancnt++;
> > +
> > + err = dma_async_device_register(dma_dev);
> > + if (err) {
> > + dev_err(dev, "Unable to register XOR with Async_tx\n");
> > + goto err_out;
>
> Replace the jump with talitos_unregister_async_xor(dev) and
> remove code under err_out label.
No, here should be reserved, it should free xor_chan and remove "dma_dev->chancnt++;".
Actually, I find most of code doesn't care this return value.
I will correct it in next.

>
> > + }
> > +
> > + return err;
> > +
> > +err_out:
> > + talitos_unregister_async_xor(dev);
> > + return err;
> > +}
> > +#endif
>
>
> > diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
> > index 61a1405..fc9d125 100644
> > --- a/drivers/crypto/talitos.h
> > +++ b/drivers/crypto/talitos.h
> > @@ -30,6 +30,7 @@
> >
> > #define TALITOS_TIMEOUT 100000
> > #define TALITOS_MAX_DATA_LEN 65535
> > +#define TALITOS_MAX_DESCRIPTOR_NR 256
>
> This refers only to xor descriptors, so renaming it similar to
> TALITOS_MAX_XOR_DESCRIPTOR_NR would make sense.
I remember it is applied to other descriptors, I will check it again, if not, I will rename it as you suggested.

>
> Besides these:
> 1. As Dan Williams mentioned, you should explain why you are using
> both spin_lock_bh and spin_lock_irqsave on the same lock.
I'm waiting for Dan's feedback about this patch, I will add description or correct it with other comments together.

> 2. I don't see anything added to talitos_remove(). Shouldn't
> talitos_unregister_async_xor() be called? Anything else?
> Have you tested with talitos built as a module?
My fault, it should be added in talitos_remove(); I will correct it in next.
Thanks for your review.

>
> Horia

Subject: RE: [PATCH v7 1/8] Talitos: Support for async_tx XOR offload

On Fri, 31 Aug 2012 06:08:05 +0300, Liu Qiang-B32616 <[email protected]>
wrote:
> > -----Original Message-----
> > From: Geanta Neag Horia Ioan-B05471
> > Sent: Thursday, August 30, 2012 10:23 PM
> > To: Liu Qiang-B32616; [email protected];
> > [email protected]; [email protected];
> > [email protected]; [email protected]; linuxppc-
> > [email protected]
> > Cc: Li Yang-R58472; Phillips Kim-R1AAHA; [email protected];
> > [email protected]; [email protected]; [email protected]; Liu
> > Qiang-B32616
> > Subject: RE: [PATCH v7 1/8] Talitos: Support for async_tx XOR offload
> >
> > On Thu, 9 Aug 2012 11:20:48 +0300, [email protected] wrote:
> > > From: Qiang Liu <[email protected]>
> > >
> > > Expose Talitos's XOR functionality to be used for RAID parity
> > > calculation via the Async_tx layer.
> > >
> > > Cc: Herbert Xu <[email protected]>
> > > Cc: David S. Miller <[email protected]>
> > > Signed-off-by: Dipen Dudhat <[email protected]>
> > > Signed-off-by: Maneesh Gupta <[email protected]>
> > > Signed-off-by: Kim Phillips <[email protected]>
> > > Signed-off-by: Vishnu Suresh <[email protected]>
> > > Signed-off-by: Qiang Liu <[email protected]>
> > > ---
> > > drivers/crypto/Kconfig | 9 +
> > > drivers/crypto/talitos.c | 413
> > ++++++++++++++++++++++++++++++++++++++++++++++
> > > drivers/crypto/talitos.h | 53 ++++++
> > > 3 files changed, 475 insertions(+), 0 deletions(-)
> >

> > > +static int talitos_alloc_chan_resources(struct dma_chan *chan)
> > > +{
> > > + struct talitos_xor_chan *xor_chan;
> > > + struct talitos_xor_desc *desc;
> > > + LIST_HEAD(tmp_list);
> > > + int i;
> > > +
> > > + xor_chan = container_of(chan, struct talitos_xor_chan, common);
> > > +
> > > + if (!list_empty(&xor_chan->free_desc))
> > > + return xor_chan->total_desc;
> > > +
> > > + for (i = 0; i < TALITOS_MAX_DESCRIPTOR_NR; i++) {
> > > + desc = talitos_xor_alloc_descriptor(xor_chan,
> > > + GFP_KERNEL | GFP_DMA);
> >
> > talitos_xor_alloc_descriptor() is called here without holding
> > the xor_chan->desc_lock and it increments xor_chan->total_desc.
> > Isn't this an issue ?
>
> No, please refer to the code as below,
> + list_add_tail(&desc->node, &tmp_list);
>
> The list is temporary list, it will be merged to xor_chan->free_desc in next step, here is protected
> by lock,
> + spin_lock_bh(&xor_chan->desc_lock);
> + list_splice_init(&tmp_list, &xor_chan->free_desc);
> + spin_unlock_bh(&xor_chan->desc_lock);

I was not referring to the list, but to xor_chan->total_desc variable.
The following access:
talitos_alloc_chan_resources()->talitos_xor_alloc_descriptor()->total_desc++
is not protected by the xor_chan->desc_lock.

2012-08-31 10:41:17

by Liu Qiang-B32616

[permalink] [raw]
Subject: RE: [PATCH v7 1/8] Talitos: Support for async_tx XOR offload

> -----Original Message-----
> From: Geanta Neag Horia Ioan-B05471
> Sent: Friday, August 31, 2012 6:39 PM
> To: Liu Qiang-B32616; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]; linuxppc-
> [email protected]
> Cc: Li Yang-R58472; Phillips Kim-R1AAHA; [email protected]; Dan
> Williams; [email protected]; [email protected]
> Subject: RE: [PATCH v7 1/8] Talitos: Support for async_tx XOR offload
>
> On Fri, 31 Aug 2012 06:08:05 +0300, Liu Qiang-B32616
> <[email protected]>
> wrote:
> > > -----Original Message-----
> > > From: Geanta Neag Horia Ioan-B05471
> > > Sent: Thursday, August 30, 2012 10:23 PM
> > > To: Liu Qiang-B32616; [email protected];
> > > [email protected]; [email protected];
> > > [email protected]; [email protected]; linuxppc-
> > > [email protected]
> > > Cc: Li Yang-R58472; Phillips Kim-R1AAHA; [email protected];
> > > [email protected]; [email protected]; [email protected];
> Liu
> > > Qiang-B32616
> > > Subject: RE: [PATCH v7 1/8] Talitos: Support for async_tx XOR offload
> > >
> > > On Thu, 9 Aug 2012 11:20:48 +0300, [email protected] wrote:
> > > > From: Qiang Liu <[email protected]>
> > > >
> > > > Expose Talitos's XOR functionality to be used for RAID parity
> > > > calculation via the Async_tx layer.
> > > >
> > > > Cc: Herbert Xu <[email protected]>
> > > > Cc: David S. Miller <[email protected]>
> > > > Signed-off-by: Dipen Dudhat <[email protected]>
> > > > Signed-off-by: Maneesh Gupta <[email protected]>
> > > > Signed-off-by: Kim Phillips <[email protected]>
> > > > Signed-off-by: Vishnu Suresh <[email protected]>
> > > > Signed-off-by: Qiang Liu <[email protected]>
> > > > ---
> > > > drivers/crypto/Kconfig | 9 +
> > > > drivers/crypto/talitos.c | 413
> > > ++++++++++++++++++++++++++++++++++++++++++++++
> > > > drivers/crypto/talitos.h | 53 ++++++
> > > > 3 files changed, 475 insertions(+), 0 deletions(-)
> > >
>
> > > > +static int talitos_alloc_chan_resources(struct dma_chan *chan)
> > > > +{
> > > > + struct talitos_xor_chan *xor_chan;
> > > > + struct talitos_xor_desc *desc;
> > > > + LIST_HEAD(tmp_list);
> > > > + int i;
> > > > +
> > > > + xor_chan = container_of(chan, struct talitos_xor_chan,
> common);
> > > > +
> > > > + if (!list_empty(&xor_chan->free_desc))
> > > > + return xor_chan->total_desc;
> > > > +
> > > > + for (i = 0; i < TALITOS_MAX_DESCRIPTOR_NR; i++) {
> > > > + desc = talitos_xor_alloc_descriptor(xor_chan,
> > > > + GFP_KERNEL | GFP_DMA);
> > >
> > > talitos_xor_alloc_descriptor() is called here without holding
> > > the xor_chan->desc_lock and it increments xor_chan->total_desc.
> > > Isn't this an issue ?
> >
> > No, please refer to the code as below,
> > + list_add_tail(&desc->node, &tmp_list);
> >
> > The list is temporary list, it will be merged to xor_chan->free_desc in
> next step, here is protected
> > by lock,
> > + spin_lock_bh(&xor_chan->desc_lock);
> > + list_splice_init(&tmp_list, &xor_chan->free_desc);
> > + spin_unlock_bh(&xor_chan->desc_lock);
>
> I was not referring to the list, but to xor_chan->total_desc variable.
> The following access:
> talitos_alloc_chan_resources()->talitos_xor_alloc_descriptor()-
> >total_desc++
> is not protected by the xor_chan->desc_lock.
Ok, I will correct it in next series. Thanks.