2008-06-23 16:46:20

by Nicolas Pitre

[permalink] [raw]
Subject: [PATCH/RFC] DMA engine driver for Marvell XOR engine

Here is a driver for the DMA/XOR engine found on many ARM SoCs from
Marvell, such as Orion and newly supported Kirkwood. Could interested
people (notably Maciej Sosnowski and Dan Williams who are listed as
maintainers for the DMA generic offload subsystem) review this patch,
and ultimately provide their ACK, so we could push this driver along
with additional ARM patches that depend on this one through the ARM git
tree.

Thanks

-----
From: Saeed Bishara <[email protected]>

The Marvell XOR engine found in Marvell's SoCs and system controllers
provides xor and DMA operation, iSCSI CRC32C calculation, memory initialization,
and memory ECC errors cleanup operation support.

This driver implements the DMA engine API and support the following capabilities:
memcpy
xor
memset

The XOR engine can be used by DMA engine clients implemented in the
kernel, one of those clients is the RAID module. in that case, I
observed 20% improvement in the raid5 writes throughput, and 40%
decrease in the CPU utilization when doing array construction, those
results performed on 5182 running at 500Mhz.

when enabling the NET DMA client, the performance decreased, so
meanwhile it is recommended to keep this client off.

Signed-off-by: Saeed Bishara <[email protected]>
---
drivers/dma/Kconfig | 11 +-
drivers/dma/Makefile | 1 +
drivers/dma/mv_xor.c | 1446 +++++++++++++++++++++++++++++++++++
drivers/dma/mv_xor.h | 187 +++++
include/asm-arm/plat-orion/mv_xor.h | 24 +
5 files changed, 1668 insertions(+), 1 deletions(-)
create mode 100644 drivers/dma/mv_xor.c
create mode 100644 drivers/dma/mv_xor.h
create mode 100644 include/asm-arm/plat-orion/mv_xor.h

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 6239c3d..8665e39 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -4,7 +4,7 @@

menuconfig DMADEVICES
bool "DMA Engine support"
- depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX || PPC
+ depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX || PPC || PLAT_ORION
depends on !HIGHMEM64G
help
DMA engines can do asynchronous data transfers without
@@ -46,6 +46,15 @@ config FSL_DMA
MPC8560/40, MPC8555, MPC8548 and MPC8641 processors.
The MPC8349, MPC8360 is also supported.

+config MV_XOR
+ bool "Marvell XOR engine support"
+ depends on PLAT_ORION
+ select ASYNC_CORE
+ select DMA_ENGINE
+ ---help---
+ Enable support for the Marvell XOR engine. This engine is present
+ in some of Marvell's SoC such as the MV88F5182.
+
config DMA_ENGINE
bool

diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index c8036d9..ee272fd 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_MV_XOR) += mv_xor.o
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
new file mode 100644
index 0000000..bc708b7
--- /dev/null
+++ b/drivers/dma/mv_xor.c
@@ -0,0 +1,1446 @@
+/*
+ * offload engine driver for the XOR engines of the Marvell Feroceon processors
+ * Copyright ?? 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <asm/plat-orion/mv_xor.h>
+#include "mv_xor.h"
+
+static void mv_xor_check_threshold(struct mv_xor_chan *mv_chan);
+
+#define to_mv_xor_chan(chan) container_of(chan, struct mv_xor_chan, common)
+#define to_mv_xor_device(dev) \
+ container_of(dev, struct mv_xor_device, common)
+#define tx_to_mv_xor_slot(tx) \
+ container_of(tx, struct mv_xor_desc_slot, async_tx)
+
+static inline void
+mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+ hw_desc->status = (1 << 31);
+ hw_desc->phy_next_desc = 0;
+ if (flags & DMA_PREP_INTERRUPT)
+ hw_desc->desc_command = (1 << 31);
+ else
+ hw_desc->desc_command = 0;
+
+ hw_desc->desc_command = (1 << 31);
+}
+
+static inline u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *chan)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_dest_addr;
+}
+
+static inline u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *chan,
+ int src_idx)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_src_addr[src_idx];
+}
+
+
+static inline void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *chan,
+ u32 byte_count)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->byte_count = byte_count;
+}
+
+static inline void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+ u32 next_desc_addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ BUG_ON(hw_desc->phy_next_desc);
+ hw_desc->phy_next_desc = next_desc_addr;
+}
+static inline u32 mv_desc_get_next_desc(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_next_desc;
+}
+
+static inline void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_next_desc = 0;
+}
+
+static inline void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc,
+ u32 val)
+{
+ desc->value = val;
+}
+
+static inline void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *chan,
+ dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_dest_addr = addr;
+}
+
+static inline int mv_chan_memset_slot_count(size_t len)
+{
+ return 1;
+}
+
+#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
+
+static inline void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
+ int index,
+ dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_src_addr[index] = addr;
+ if (desc->type == DMA_XOR)
+ hw_desc->desc_command |= (1 << index);
+}
+
+static inline u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
+{
+ return __raw_readl(XOR_CURR_DESC(chan));
+}
+
+static inline void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
+ u32 next_desc_addr)
+{
+ __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
+}
+static inline void mv_chan_set_dest_pointer(struct mv_xor_chan *chan,
+ u32 desc_addr)
+{
+ __raw_writel(desc_addr, XOR_DEST_POINTER(chan));
+}
+
+static inline void mv_chan_set_block_size(struct mv_xor_chan *chan,
+ u32 block_size)
+{
+ __raw_writel(block_size, XOR_BLOCK_SIZE(chan));
+}
+
+static inline void mv_chan_set_value(struct mv_xor_chan *chan,
+ u32 value)
+{
+ __raw_writel(value, XOR_INIT_VALUE_LOW(chan));
+ __raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
+}
+
+static inline void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
+{
+ u32 val = __raw_readl(XOR_INTR_MASK(chan));
+ val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_MASK(chan));
+}
+static inline u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
+{
+ u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
+ intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
+ return intr_cause;
+}
+
+static inline int mv_is_err_intr(u32 intr_cause)
+{
+ if (intr_cause &
+ ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
+ return 1;
+
+ return 0;
+}
+
+static inline void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+{
+ u32 val = (1 << (1 + (chan->idx * 16)));
+ dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static inline void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+{
+ u32 val = 0xFFFF0000 >> (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static inline int mv_can_chain(struct mv_xor_chan *chan,
+ struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc_slot *chain_old_tail = list_entry(
+ desc->chain_node.prev, struct mv_xor_desc_slot, chain_node);
+
+ if (chain_old_tail->type != desc->type)
+ return 0;
+ if (desc->type == DMA_MEMSET)
+ return 0;
+ return 1;
+}
+
+static inline void mv_set_mode(struct mv_xor_chan *chan,
+ enum dma_transaction_type type)
+{
+ u32 op_mode;
+ u32 config = __raw_readl(XOR_CONFIG(chan));
+
+ switch (type) {
+ case DMA_XOR:
+ op_mode = XOR_OPERATION_MODE_XOR;
+ break;
+ case DMA_MEMCPY:
+ op_mode = XOR_OPERATION_MODE_MEMCPY;
+ break;
+ case DMA_MEMSET:
+ op_mode = XOR_OPERATION_MODE_MEMSET;
+ break;
+ default:
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error: unsupported operation %d.\n",
+ type);
+ BUG();
+ return;
+ }
+
+ config &= ~0x7;
+ config |= op_mode;
+ __raw_writel(config, XOR_CONFIG(chan));
+ chan->current_type = type;
+}
+
+static inline void mv_chan_activate(struct mv_xor_chan *chan)
+{
+ u32 activation;
+
+ dev_dbg(chan->device->common.dev, " activate chan.\n");
+ activation = __raw_readl(XOR_ACTIVATION(chan));
+ activation |= 0x1;
+ __raw_writel(activation, XOR_ACTIVATION(chan));
+}
+
+static inline void mv_chan_disable(struct mv_xor_chan *chan)
+{
+ u32 activation;
+
+ activation = __raw_readl(XOR_ACTIVATION(chan));
+ activation |= (1 << 1);
+ __raw_writel(activation, XOR_ACTIVATION(chan));
+}
+
+static inline void mv_chan_pause(struct mv_xor_chan *chan)
+{
+ u32 activation;
+ __raw_writel(1 << 2, XOR_ACTIVATION(chan));
+ activation = __raw_readl(XOR_ACTIVATION(chan));
+}
+
+static inline void mv_chan_restart(struct mv_xor_chan *chan)
+{
+ __raw_writel(1 << 3, XOR_ACTIVATION(chan));
+}
+
+static inline int mv_desc_get_zero_result(struct mv_xor_desc_slot *desc)
+{
+ BUG();
+ return 0;
+}
+
+static inline void mv_chan_idle(int busy, struct mv_xor_chan *chan)
+{
+ do { } while (0);
+}
+
+static inline char mv_chan_is_busy(struct mv_xor_chan *chan)
+{
+ u32 state = __raw_readl(XOR_ACTIVATION(chan));
+
+ state = (state >> 4) & 0x3;
+
+ return (state == 1)? 1 : 0;
+}
+
+static inline int
+mv_chan_get_desc_align(struct mv_xor_chan *chan, int num_slots)
+{
+ return 1;
+}
+
+static inline int
+mv_chan_xor_slot_count(size_t len, int src_cnt)
+{
+ return 1;
+}
+
+/**
+ * mv_xor_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *slot)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
+ __func__, __LINE__, slot);
+
+ slot->slots_per_op = 0;
+
+}
+
+/*
+ * mv_xor_start_new_chain - program the engine to operate on new chain headed by
+ * sw_desc
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *sw_desc)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
+ __func__, __LINE__, sw_desc);
+ if (sw_desc->type != mv_chan->current_type)
+ mv_set_mode(mv_chan, sw_desc->type);
+
+ if (sw_desc->type == DMA_MEMSET) {
+ /* for memset requests we need to program the engine, no
+ * descriptors used.
+ */
+ struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
+ mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr);
+ mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
+ mv_chan_set_value(mv_chan, sw_desc->value);
+ } else {
+ /* set the hardware chain */
+ mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
+ }
+ mv_chan->pending += sw_desc->slot_cnt;
+ mv_xor_check_threshold(mv_chan);
+}
+
+static dma_cookie_t
+mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+{
+ BUG_ON(desc->async_tx.cookie < 0);
+
+ if (desc->async_tx.cookie > 0) {
+ cookie = desc->async_tx.cookie;
+
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ if (desc->async_tx.callback)
+ desc->async_tx.callback(
+ desc->async_tx.callback_param);
+
+ /* unmap dma addresses
+ * (unmap_single vs unmap_page?)
+ */
+ if (desc->group_head && desc->unmap_len) {
+ struct mv_xor_desc_slot *unmap = desc->group_head;
+ struct device *dev =
+ &mv_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ u32 src_cnt = unmap->unmap_src_cnt;
+ dma_addr_t addr = mv_desc_get_dest_addr(unmap, mv_chan);
+
+ dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
+ while (src_cnt--) {
+ addr = mv_desc_get_src_addr(unmap, mv_chan,
+ src_cnt);
+ dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ }
+ desc->group_head = NULL;
+ }
+ }
+
+ /* run dependent operations */
+ async_tx_run_dependencies(&desc->async_tx);
+
+ return cookie;
+}
+
+static int
+mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+
+ if (async_tx_test_ack(&iter->async_tx)) {
+ list_del(&iter->completed_node);
+ mv_xor_free_slots(mv_chan, iter);
+ }
+ }
+ return 0;
+}
+
+static int
+mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n",
+ __func__, __LINE__, desc, desc->async_tx.flags);
+ list_del(&desc->chain_node);
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx)) {
+ /* move this slot to the completed_slots */
+ list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
+ return 0;
+ }
+
+ mv_xor_free_slots(mv_chan, desc);
+ return 0;
+}
+
+static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+ dma_cookie_t cookie = 0;
+ int busy = mv_chan_is_busy(mv_chan);
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ int seen_current = 0;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc);
+ mv_xor_clean_completed_slots(mv_chan);
+
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel, subsequent descriptors are either in
+ * process or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy
+ */
+ if (iter->async_tx.phys == current_desc) {
+ seen_current = 1;
+ if (busy)
+ break;
+ }
+
+ cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
+
+ if (mv_xor_clean_slot(iter, mv_chan))
+ break;
+ }
+
+ if ((busy == 0) && !list_empty(&mv_chan->chain)) {
+ struct mv_xor_desc_slot *chain_head;
+ chain_head = list_entry(mv_chan->chain.next,
+ struct mv_xor_desc_slot,
+ chain_node);
+
+ mv_xor_start_new_chain(mv_chan, chain_head);
+ }
+
+ if (cookie > 0)
+ mv_chan->completed_cookie = cookie;
+}
+
+static void
+mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ spin_lock_bh(&mv_chan->lock);
+ __mv_xor_slot_cleanup(mv_chan);
+ spin_unlock_bh(&mv_chan->lock);
+}
+
+static void mv_xor_tasklet(unsigned long data)
+{
+ struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
+ __mv_xor_slot_cleanup(chan);
+}
+
+static struct mv_xor_desc_slot *
+mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
+ int slots_per_op)
+{
+ struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
+ LIST_HEAD(chain);
+ int slots_found, retry = 0;
+
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+retry:
+ slots_found = 0;
+ if (retry == 0)
+ iter = mv_chan->last_used;
+ else
+ iter = list_entry(&mv_chan->all_slots,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ list_for_each_entry_safe_continue(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+ if (iter->slots_per_op) {
+ /* give up after finding the first busy slot
+ * on the second pass through the list
+ */
+ if (retry)
+ break;
+
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++)
+ alloc_start = iter;
+
+ if (slots_found == num_slots) {
+ struct mv_xor_desc_slot *alloc_tail = NULL;
+ struct mv_xor_desc_slot *last_used = NULL;
+ iter = alloc_start;
+ while (num_slots) {
+ int i;
+
+ /* pre-ack all but the last descriptor */
+ async_tx_ack(&iter->async_tx);
+
+ list_add_tail(&iter->chain_node, &chain);
+ alloc_tail = iter;
+ iter->async_tx.cookie = 0;
+ iter->slot_cnt = num_slots;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->slots_per_op = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ alloc_tail->group_head = alloc_start;
+ alloc_tail->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &alloc_tail->async_tx.tx_list);
+ mv_chan->last_used = last_used;
+ mv_desc_clear_next_desc(alloc_start);
+ mv_desc_clear_next_desc(alloc_tail);
+ return alloc_tail;
+ }
+ }
+ if (!retry++)
+ goto retry;
+
+ /* try to free some slots if the allocation fails */
+ tasklet_schedule(&mv_chan->irq_tasklet);
+
+ return NULL;
+}
+
+static dma_cookie_t
+mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *desc)
+{
+ dma_cookie_t cookie = mv_chan->common.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+ mv_chan->common.cookie = desc->async_tx.cookie = cookie;
+ return cookie;
+}
+
+static void mv_xor_check_threshold(struct mv_xor_chan *mv_chan)
+{
+ if (mv_chan->pending >= MV_XOR_THRESHOLD) {
+ mv_chan->pending = 0;
+ mv_chan_activate(mv_chan);
+ }
+}
+
+/************************ DMA engine API functions ****************************/
+static dma_cookie_t
+mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct mv_xor_desc_slot *sw_desc = tx_to_mv_xor_slot(tx);
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
+ struct mv_xor_desc_slot *grp_start, *old_chain_tail;
+ dma_cookie_t cookie;
+ int new_hw_chain = 1;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p: async_tx %p\n",
+ __func__, sw_desc, &sw_desc->async_tx);
+
+ grp_start = sw_desc->group_head;
+
+ spin_lock_bh(&mv_chan->lock);
+ cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
+
+ if (list_empty(&mv_chan->chain))
+ list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
+ else{
+ new_hw_chain = 0;
+
+ old_chain_tail = list_entry(mv_chan->chain.prev,
+ struct mv_xor_desc_slot,
+ chain_node);
+ list_splice_init(&grp_start->async_tx.tx_list,
+ &old_chain_tail->chain_node);
+
+ if (!mv_can_chain(mv_chan, grp_start))
+ goto submit_done;
+
+ dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n",
+ old_chain_tail->async_tx.phys);
+
+ /* fix up the hardware chain */
+ mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
+
+ /* if the channel is not busy */
+ if (!mv_chan_is_busy(mv_chan)) {
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ /*
+ * and the curren desc is the end of the chain before
+ * the append, then we need to start the channel
+ */
+ if (current_desc == old_chain_tail->async_tx.phys)
+ new_hw_chain = 1;
+ }
+ }
+ if (new_hw_chain)
+ mv_xor_start_new_chain(mv_chan, grp_start);
+
+submit_done:
+ spin_unlock_bh(&mv_chan->lock);
+
+ return cookie;
+}
+
+/* returns the number of allocated descriptors */
+static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
+{
+ char *hw_desc;
+ int idx;
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *slot = NULL;
+ struct mv_xor_platform_data *plat_data =
+ mv_chan->device->pdev->dev.platform_data;
+ int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
+
+ /* Allocate descriptor slots */
+ do {
+ idx = mv_chan->slots_allocated;
+ if (idx == num_descs_in_pool)
+ break;
+
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "MV XOR Channel only initialized"
+ " %d descriptor slots", idx);
+ break;
+ }
+ hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = mv_xor_tx_submit;
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->async_tx.tx_list);
+ hw_desc = (char *) mv_chan->device->dma_desc_pool;
+ slot->async_tx.phys =
+ (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+ slot->idx = idx;
+
+ spin_lock_bh(&mv_chan->lock);
+ mv_chan->slots_allocated++;
+ list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+ spin_unlock_bh(&mv_chan->lock);
+ } while (mv_chan->slots_allocated < num_descs_in_pool);
+
+ if (idx && !mv_chan->last_used)
+ mv_chan->last_used = list_entry(mv_chan->all_slots.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "allocated %d descriptor slots last_used: %p\n",
+ mv_chan->slots_allocated, mv_chan->last_used);
+
+ return (idx > 0) ? idx : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x src %x len: %u flags: %ld\n",
+ __func__, dest, src, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memcpy_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMCPY;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, mv_chan, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, mv_chan, dest);
+ mv_desc_set_src_addr(grp_start, 0, src);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p\n",
+ __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x len: %u flags: %ld\n",
+ __func__, dest, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memset_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMSET;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, mv_chan, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, mv_chan, dest);
+ mv_desc_set_block_fill_val(grp_start, value);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s src_cnt: %d len: dest %x %u flags: %ld\n",
+ __func__, src_cnt, len, dest, flags);
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_XOR;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ /* the byte count field is the same as in memcpy desc*/
+ mv_desc_set_byte_count(grp_start, mv_chan, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, mv_chan, dest);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ while (src_cnt--)
+ mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]);
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void mv_xor_free_chan_resources(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ mv_xor_slot_cleanup(mv_chan);
+
+ spin_lock_bh(&mv_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+ in_use_descs++;
+ list_del(&iter->completed_node);
+ }
+ list_for_each_entry_safe_reverse(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ mv_chan->slots_allocated--;
+ }
+ mv_chan->last_used = NULL;
+
+ dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
+ __func__, mv_chan->slots_allocated);
+ spin_unlock_bh(&mv_chan->lock);
+
+ if (in_use_descs)
+ dev_err(mv_chan->device->common.dev,
+ "freeing %d in use descriptors!\n", in_use_descs);
+}
+
+/**
+ * mv_xor_is_complete - poll the status of an XOR transaction
+ * @chan: XOR channel handle
+ * @cookie: XOR transaction identifier
+ */
+static enum dma_status mv_xor_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+ mv_chan->is_complete_cookie = cookie;
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret == DMA_SUCCESS) {
+ mv_xor_clean_completed_slots(mv_chan);
+ return ret;
+ }
+ mv_xor_slot_cleanup(mv_chan);
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+{
+ u32 val;
+
+ val = __raw_readl(XOR_CONFIG(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "config 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ACTIVATION(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "activation 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_MASK(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr mask 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_ADDR(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error addr 0x%08x.\n", val);
+}
+
+static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
+ u32 intr_cause)
+{
+ if (intr_cause & (1 << 4)) {
+ dev_dbg(chan->device->common.dev,
+ "ignore this error\n");
+ return;
+ }
+
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error on chan %d. intr cause 0x%08x.\n",
+ chan->idx, intr_cause);
+
+ mv_dump_xor_regs(chan);
+ BUG();
+}
+
+static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
+{
+ struct mv_xor_chan *chan = data;
+ u32 intr_cause = mv_chan_get_intr_cause(chan);
+
+ dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause);
+
+ if (mv_is_err_intr(intr_cause))
+ mv_xor_err_interrupt_handler(chan, intr_cause);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ mv_xor_device_clear_eoc_cause(chan);
+
+ return IRQ_HANDLED;
+}
+
+static void mv_xor_issue_pending(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+
+ if (mv_chan->pending) {
+ mv_chan->pending = 0;
+ mv_chan_activate(mv_chan);
+ }
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define MV_XOR_TEST_SIZE 2000
+
+static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
+{
+ int i;
+ void *src, *dest;
+ dma_addr_t src_dma, dest_dma;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ struct dma_async_tx_descriptor *tx;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+
+ dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < MV_XOR_TEST_SIZE; i++)
+ ((u8 *) src)[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dest_dma = dma_map_single(dma_chan->device->dev, dest,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+
+ src_dma = dma_map_single(dma_chan->device->dev, src,
+ MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+ MV_XOR_TEST_SIZE, 0);
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(1);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+ if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+mv_xor_xor_self_test(struct mv_xor_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx])
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest)
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+
+ for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+ 0, PAGE_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
+
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(8);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor failed compare, disabling."
+ " index %d, data %x, expected %x\n", i,
+ ptr[i], cmp_word);
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ src_idx = MV_XOR_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int __devexit mv_xor_remove(struct platform_device *dev)
+{
+ struct mv_xor_device *device = platform_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+ struct mv_xor_chan *mv_chan;
+ struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
+
+ dma_async_device_unregister(&device->common);
+
+ dma_free_coherent(&dev->dev, plat_data->pool_size,
+ device->dma_desc_pool_virt, device->dma_desc_pool);
+
+ list_for_each_entry_safe(chan, _chan, &device->common.channels,
+ device_node) {
+ mv_chan = to_mv_xor_chan(chan);
+ list_del(&chan->device_node);
+ }
+
+ return 0;
+}
+
+static int __devinit mv_xor_probe(struct platform_device *pdev)
+{
+ int ret = 0;
+ int irq;
+ struct mv_xor_device *adev;
+ struct mv_xor_chan *mv_chan;
+ struct dma_device *dma_dev;
+ struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
+
+
+ adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+
+ dma_dev = &adev->common;
+
+ /* allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly flush the writes
+ */
+ adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+ plat_data->pool_size,
+ &adev->dma_desc_pool,
+ GFP_KERNEL);
+ if (!adev->dma_desc_pool_virt)
+ return -ENOMEM;
+
+ adev->id = plat_data->hw_id;
+
+ /* discover transaction capabilites from the platform data */
+ dma_dev->cap_mask = plat_data->cap_mask;
+ adev->pdev = pdev;
+ platform_set_drvdata(pdev, adev);
+
+ adev->shared = platform_get_drvdata(plat_data->shared);
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* set base routines */
+ dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
+ dma_dev->device_is_tx_complete = mv_xor_is_complete;
+ dma_dev->device_issue_pending = mv_xor_issue_pending;
+ dma_dev->dev = &pdev->dev;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
+ if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset;
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->max_xor = 8; ;
+ dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
+ }
+
+ mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
+ if (!mv_chan) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ mv_chan->device = adev;
+ mv_chan->idx = plat_data->hw_id;
+ mv_chan->mmr_base = adev->shared->xor_base;
+
+ if (!mv_chan->mmr_base) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
+ mv_chan);
+
+ /* clear errors before enabling interrupts */
+ mv_xor_device_clear_err_status(mv_chan);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto err_free_dma;
+ }
+ ret = devm_request_irq(&pdev->dev, irq,
+ mv_xor_interrupt_handler,
+ 0, dev_name(&pdev->dev), mv_chan);
+ if (ret)
+ goto err_free_dma;
+
+ mv_chan_unmask_interrupts(mv_chan);
+
+ mv_set_mode(mv_chan, DMA_MEMCPY);
+
+ spin_lock_init(&mv_chan->lock);
+ INIT_LIST_HEAD(&mv_chan->chain);
+ INIT_LIST_HEAD(&mv_chan->completed_slots);
+ INIT_LIST_HEAD(&mv_chan->all_slots);
+ INIT_RCU_HEAD(&mv_chan->common.rcu);
+ mv_chan->common.device = dma_dev;
+
+ list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = mv_xor_memcpy_self_test(adev);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+ ret = mv_xor_xor_self_test(adev);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = mv_xor_memcpy_self_test(adev);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+ ret = mv_xor_xor_self_test(adev);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
+ "( %s%s%s%s)\n",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+ dma_async_device_register(dma_dev);
+ goto out;
+
+ err_free_dma:
+ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ out:
+ return ret;
+}
+
+static void
+mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
+ struct mbus_dram_target_info *dram)
+{
+ void __iomem *base = msp->xor_base;
+ u32 win_enable = 0;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ writel(0, base + WINDOW_BASE(i));
+ writel(0, base + WINDOW_SIZE(i));
+ if (i < 4)
+ writel(0, base + WINDOW_REMAP_HIGH(i));
+ }
+
+ for (i = 0; i < dram->num_cs; i++) {
+ struct mbus_dram_window *cs = dram->cs + i;
+
+ writel((cs->base & 0xffff0000) |
+ (cs->mbus_attr << 8) |
+ dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+ writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+ win_enable |= (1 << i);
+ win_enable |= 3 << (16 + (2 * i));
+ }
+
+ writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+ writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+}
+
+static struct platform_driver mv_xor_driver = {
+ .probe = mv_xor_probe,
+ .remove = mv_xor_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_NAME,
+ },
+};
+
+static int mv_xor_shared_probe(struct platform_device *pdev)
+{
+ struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data;
+ struct mv_xor_shared_private *msp;
+ struct resource *res;
+
+ dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n");
+
+ msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
+ if (!msp)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_base)
+ return -EBUSY;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_base_high = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_base_high)
+ return -EBUSY;
+
+ platform_set_drvdata(pdev, msp);
+
+ /*
+ * (Re-)program MBUS remapping windows if we are asked to.
+ */
+ if (msd != NULL && msd->dram != NULL)
+ mv_xor_conf_mbus_windows(msp, msd->dram);
+
+ return 0;
+}
+
+static int mv_xor_shared_remove(struct platform_device *pdev)
+{
+ return 0;
+}
+
+static struct platform_driver mv_xor_shared_driver = {
+ .probe = mv_xor_shared_probe,
+ .remove = mv_xor_shared_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_SHARED_NAME,
+ },
+};
+
+
+static int __init mv_xor_init(void)
+{
+ int rc;
+
+ rc = platform_driver_register(&mv_xor_shared_driver);
+ if (!rc) {
+ rc = platform_driver_register(&mv_xor_driver);
+ if (rc)
+ platform_driver_unregister(&mv_xor_shared_driver);
+ }
+ return rc;
+}
+
+/* it's currently unsafe to unload this module */
+#if 0
+static void __exit mv_xor_exit(void)
+{
+ platform_driver_unregister(&mv_xor_driver);
+ platform_driver_unregister(&mv_xor_shared_driver);
+ return;
+}
+
+module_exit(mv_xor_exit);
+#endif
+module_init(mv_xor_init);
+
+MODULE_AUTHOR("Saeed Bishara <[email protected]>");
+MODULE_DESCRIPTION("DMA Engine driver for Marvell's XOR");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
new file mode 100644
index 0000000..7c6d624
--- /dev/null
+++ b/drivers/dma/mv_xor.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright ?? 2007, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#ifndef MV_XOR_H
+#define MV_XOR_H
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+
+#define USE_TIMER
+#define MV_XOR_SLOT_SIZE 64
+#define MV_XOR_THRESHOLD 1
+
+#define XOR0_ID 0
+#define XOR1_ID 1
+#define XOR_OPERATION_MODE_XOR 0
+#define XOR_OPERATION_MODE_MEMCPY 2
+#define XOR_OPERATION_MODE_MEMSET 4
+
+#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0)
+#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4)
+
+#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4))
+#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4))
+#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30)
+#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40)
+#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50)
+#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60)
+#define XOR_INTR_MASK_VALUE 0x3F5
+
+#define WINDOW_BASE(w) (0x250 + ((w) << 2))
+#define WINDOW_SIZE(w) (0x270 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2))
+
+struct mv_xor_shared_private {
+ void __iomem *xor_base;
+ void __iomem *xor_base_high;
+};
+
+
+/**
+ * struct mv_xor_device - internal representation of a XOR device
+ * @pdev: Platform device
+ * @id: HW XOR Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct mv_xor_device {
+ struct platform_device *pdev;
+ int id;
+ dma_addr_t dma_desc_pool;
+ void *dma_desc_pool_virt;
+ struct dma_device common;
+ struct mv_xor_shared_private *shared;
+};
+
+/**
+ * struct mv_xor_chan - internal representation of a XOR channel
+ * @pending: allows batching of hardware operations
+ * @completed_cookie: identifier for the most recently completed operation
+ * @lock: serializes enqueue/dequeue operations to the descriptors pool
+ * @mmr_base: memory mapped register base
+ * @idx: the index of the xor channel
+ * @chain: device chain view of the descriptors
+ * @completed_slots: slots completed by HW but still need to be acked
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @last_used: place holder for allocation to continue from where it left off
+ * @all_slots: complete domain of slots usable by the channel
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ */
+struct mv_xor_chan {
+ int pending;
+ dma_cookie_t completed_cookie;
+ spinlock_t lock; /* protects the descriptor slot pool */
+ void __iomem *mmr_base;
+ unsigned int idx;
+ enum dma_transaction_type current_type;
+ struct list_head chain;
+ struct list_head completed_slots;
+ struct mv_xor_device *device;
+ struct dma_chan common;
+ struct mv_xor_desc_slot *last_used;
+ struct list_head all_slots;
+ int slots_allocated;
+ struct tasklet_struct irq_tasklet;
+#ifdef USE_TIMER
+ unsigned long cleanup_time;
+ u32 current_on_last_cleanup;
+ dma_cookie_t is_complete_cookie;
+#endif
+};
+
+/**
+ * struct mv_xor_desc_slot - software descriptor
+ * @slot_node: node on the mv_xor_chan.all_slots list
+ * @chain_node: node on the mv_xor_chan.chain list
+ * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @slots_per_op: number of slots per operation
+ * @idx: pool index
+ * @unmap_src_cnt: number of xor sources
+ * @unmap_len: transaction bytecount
+ * @async_tx: support for the async_tx api
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ * for example transfer lengths larger than the supported hw max
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct mv_xor_desc_slot {
+ struct list_head slot_node;
+ struct list_head chain_node;
+ struct list_head completed_node;
+ enum dma_transaction_type type;
+ void *hw_desc;
+ struct mv_xor_desc_slot *group_head;
+ u16 slot_cnt;
+ u16 slots_per_op;
+ u16 idx;
+ u16 unmap_src_cnt;
+ u32 value;
+ size_t unmap_len;
+ struct dma_async_tx_descriptor async_tx;
+ union {
+ u32 *xor_check_result;
+ u32 *crc32_result;
+ };
+#ifdef USE_TIMER
+ unsigned long arrival_time;
+ struct timer_list timeout;
+#endif
+};
+
+
+/* This structure describes XOR descriptor size 64bytes */
+struct mv_xor_desc {
+ u32 status; /* Successful descriptor execution indication */
+ u32 crc32_result; /* Result of CRC-32 calculation */
+ u32 desc_command; /* type of operation to be carried out on the
+ data */
+ u32 phy_next_desc; /* Next descriptor address pointer */
+ u32 byte_count; /* Size of source and destination blocks in
+ bytes */
+ u32 phy_dest_addr; /* Destination Block address pointer */
+ u32 phy_src_addr[8]; /* source block addresses */
+ u32 reserved0;
+ u32 reserved1;
+};
+
+#define to_mv_sw_desc(addr_hw_desc) \
+ container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
+#define mv_hw_desc_slot_idx(hw_desc, idx) \
+ ((void *) (((unsigned long)hw_desc) + ((idx) << 5)))
+
+
+#define MV_XOR_MIN_BYTE_COUNT (128)
+#define XOR_MAX_BYTE_COUNT ((16 * 1024 * 1024) - 1)
+#define MV_XOR_MAX_BYTE_COUNT XOR_MAX_BYTE_COUNT
+
+#endif
diff --git a/include/asm-arm/plat-orion/mv_xor.h b/include/asm-arm/plat-orion/mv_xor.h
new file mode 100644
index 0000000..c92bf08
--- /dev/null
+++ b/include/asm-arm/plat-orion/mv_xor.h
@@ -0,0 +1,24 @@
+/*
+ * Marvell XOR platform device data definition file.
+ */
+#ifndef __LINUX_MV_XOR_H
+#define __LINUX_MV_XOR_H
+#include <linux/dmaengine.h>
+#include <linux/mbus.h>
+
+#define MV_XOR_SHARED_NAME "mv_xor_shared"
+#define MV_XOR_NAME "mv_xor"
+
+struct mbus_dram_target_info;
+
+struct mv_xor_platform_shared_data {
+ struct mbus_dram_target_info *dram;
+};
+
+struct mv_xor_platform_data {
+ int hw_id;
+ dma_cap_mask_t cap_mask;
+ size_t pool_size;
+ struct platform_device *shared;
+};
+#endif


2008-06-23 21:55:22

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCH/RFC] DMA engine driver for Marvell XOR engine


On Mon, 2008-06-23 at 09:46 -0700, Nicolas Pitre wrote:
> Here is a driver for the DMA/XOR engine found on many ARM SoCs from
> Marvell, such as Orion and newly supported Kirkwood. Could interested
> people (notably Maciej Sosnowski and Dan Williams who are listed as
> maintainers for the DMA generic offload subsystem) review this patch,
> and ultimately provide their ACK, so we could push this driver along
> with additional ARM patches that depend on this one through the ARM git
> tree.
>
> Thanks

Sticking closely to the construction of iop-adma made this that much
easier to read, thanks. See below for an 'inline' cleanup suggestion, a
few questions, and a checkpatch escape, but other than that:

Acked-by: Dan Williams <[email protected]>
>
> -----
> From: Saeed Bishara <[email protected]>
>
> The Marvell XOR engine found in Marvell's SoCs and system controllers
> provides xor and DMA operation, iSCSI CRC32C calculation, memory initialization,
> and memory ECC errors cleanup operation support.
>
> This driver implements the DMA engine API and support the following capabilities:
> memcpy
> xor
> memset
>
> The XOR engine can be used by DMA engine clients implemented in the
> kernel, one of those clients is the RAID module. in that case, I
> observed 20% improvement in the raid5 writes throughput, and 40%
> decrease in the CPU utilization when doing array construction, those
> results performed on 5182 running at 500Mhz.
>
> when enabling the NET DMA client, the performance decreased, so
> meanwhile it is recommended to keep this client off.

Yes, I see this as well on iop platforms. The problem is that the cache
flushes in get_user_pages are too expensive, leading to out of tree
hacks like:

http://git.kernel.org/?p=linux/kernel/git/djbw/xscaleiop.git;a=commitdiff;h=729ea28e581ca25197f3e92b263b0659d4bdc341

...this helps iop3xx but not iop13xx to a noticeable degree.

> Signed-off-by: Saeed Bishara <[email protected]>
> ---
> drivers/dma/Kconfig | 11 +-
> drivers/dma/Makefile | 1 +
> drivers/dma/mv_xor.c | 1446 +++++++++++++++++++++++++++++++++++
> drivers/dma/mv_xor.h | 187 +++++
> include/asm-arm/plat-orion/mv_xor.h | 24 +
> 5 files changed, 1668 insertions(+), 1 deletions(-)
> create mode 100644 drivers/dma/mv_xor.c
> create mode 100644 drivers/dma/mv_xor.h
> create mode 100644 include/asm-arm/plat-orion/mv_xor.h
>
> diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
> index 6239c3d..8665e39 100644
> --- a/drivers/dma/Kconfig
> +++ b/drivers/dma/Kconfig
[..]


> diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
> index c8036d9..ee272fd 100644
> --- a/drivers/dma/Makefile
> +++ b/drivers/dma/Makefile
[..]

> diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
> new file mode 100644
> index 0000000..bc708b7
> --- /dev/null
> +++ b/drivers/dma/mv_xor.c
> @@ -0,0 +1,1446 @@
> +/*
> + * offload engine driver for the XOR engines of the Marvell Feroceon processors
> + * Copyright ?? 2008, Marvell International Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + *
> + */
> +
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/async_tx.h>
> +#include <linux/delay.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/spinlock.h>
> +#include <linux/interrupt.h>
> +#include <linux/platform_device.h>
> +#include <linux/memory.h>
> +#include <asm/plat-orion/mv_xor.h>
> +#include "mv_xor.h"
> +
> +static void mv_xor_check_threshold(struct mv_xor_chan *mv_chan);
> +
> +#define to_mv_xor_chan(chan) container_of(chan, struct mv_xor_chan, common)
> +#define to_mv_xor_device(dev) \
> + container_of(dev, struct mv_xor_device, common)
> +#define tx_to_mv_xor_slot(tx) \
> + container_of(tx, struct mv_xor_desc_slot, async_tx)
> +
> +static inline void

I can already hear Adrian writing up a 'remove inlines from .c files'
patch. Is gcc's automatic inlining insufficient for all these routines?
How about limiting it to just the routines are called from
device_prep_dma.{xor,memcpy,memset}?

> +mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> +
> + hw_desc->status = (1 << 31);
> + hw_desc->phy_next_desc = 0;
> + if (flags & DMA_PREP_INTERRUPT)
> + hw_desc->desc_command = (1 << 31);
> + else
> + hw_desc->desc_command = 0;
> +
> + hw_desc->desc_command = (1 << 31);
> +}
> +
> +static inline u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc,
> + struct mv_xor_chan *chan)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + return hw_desc->phy_dest_addr;
> +}
> +
> +static inline u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
> + struct mv_xor_chan *chan,
> + int src_idx)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + return hw_desc->phy_src_addr[src_idx];
> +}
> +
> +
> +static inline void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
> + struct mv_xor_chan *chan,
> + u32 byte_count)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + hw_desc->byte_count = byte_count;
> +}
> +
> +static inline void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
> + u32 next_desc_addr)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + BUG_ON(hw_desc->phy_next_desc);
> + hw_desc->phy_next_desc = next_desc_addr;
> +}
> +static inline u32 mv_desc_get_next_desc(struct mv_xor_desc_slot *desc)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + return hw_desc->phy_next_desc;
> +}
> +
> +static inline void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + hw_desc->phy_next_desc = 0;
> +}
> +
> +static inline void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc,
> + u32 val)
> +{
> + desc->value = val;
> +}
> +
> +static inline void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc,
> + struct mv_xor_chan *chan,
> + dma_addr_t addr)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + hw_desc->phy_dest_addr = addr;
> +}
> +
> +static inline int mv_chan_memset_slot_count(size_t len)
> +{
> + return 1;
> +}
> +
> +#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
> +
> +static inline void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
> + int index,
> + dma_addr_t addr)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + hw_desc->phy_src_addr[index] = addr;
> + if (desc->type == DMA_XOR)
> + hw_desc->desc_command |= (1 << index);
> +}
> +
> +static inline u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
> +{
> + return __raw_readl(XOR_CURR_DESC(chan));
> +}
> +
> +static inline void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
> + u32 next_desc_addr)
> +{
> + __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
> +}
> +static inline void mv_chan_set_dest_pointer(struct mv_xor_chan *chan,
> + u32 desc_addr)
> +{
> + __raw_writel(desc_addr, XOR_DEST_POINTER(chan));
> +}
> +
> +static inline void mv_chan_set_block_size(struct mv_xor_chan *chan,
> + u32 block_size)
> +{
> + __raw_writel(block_size, XOR_BLOCK_SIZE(chan));
> +}
> +
> +static inline void mv_chan_set_value(struct mv_xor_chan *chan,
> + u32 value)
> +{
> + __raw_writel(value, XOR_INIT_VALUE_LOW(chan));
> + __raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
> +}
> +
> +static inline void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
> +{
> + u32 val = __raw_readl(XOR_INTR_MASK(chan));
> + val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
> + __raw_writel(val, XOR_INTR_MASK(chan));
> +}
> +static inline u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
> +{
> + u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
> + intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
> + return intr_cause;
> +}
> +
> +static inline int mv_is_err_intr(u32 intr_cause)
> +{
> + if (intr_cause &
> + ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
> + return 1;
> +
> + return 0;
> +}
> +
> +static inline void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
> +{
> + u32 val = (1 << (1 + (chan->idx * 16)));
> + dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
> + __raw_writel(val, XOR_INTR_CAUSE(chan));
> +}
> +
> +static inline void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
> +{
> + u32 val = 0xFFFF0000 >> (chan->idx * 16);
> + __raw_writel(val, XOR_INTR_CAUSE(chan));
> +}
> +
> +static inline int mv_can_chain(struct mv_xor_chan *chan,
> + struct mv_xor_desc_slot *desc)
> +{
> + struct mv_xor_desc_slot *chain_old_tail = list_entry(
> + desc->chain_node.prev, struct mv_xor_desc_slot, chain_node);
> +
> + if (chain_old_tail->type != desc->type)
> + return 0;
> + if (desc->type == DMA_MEMSET)
> + return 0;
> + return 1;
> +}
> +
> +static inline void mv_set_mode(struct mv_xor_chan *chan,
> + enum dma_transaction_type type)
> +{
> + u32 op_mode;
> + u32 config = __raw_readl(XOR_CONFIG(chan));
> +
> + switch (type) {
> + case DMA_XOR:
> + op_mode = XOR_OPERATION_MODE_XOR;
> + break;
> + case DMA_MEMCPY:
> + op_mode = XOR_OPERATION_MODE_MEMCPY;
> + break;
> + case DMA_MEMSET:
> + op_mode = XOR_OPERATION_MODE_MEMSET;
> + break;
> + default:
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "error: unsupported operation %d.\n",
> + type);
> + BUG();
> + return;
> + }
> +
> + config &= ~0x7;
> + config |= op_mode;
> + __raw_writel(config, XOR_CONFIG(chan));
> + chan->current_type = type;
> +}
> +
> +static inline void mv_chan_activate(struct mv_xor_chan *chan)
> +{
> + u32 activation;
> +
> + dev_dbg(chan->device->common.dev, " activate chan.\n");
> + activation = __raw_readl(XOR_ACTIVATION(chan));
> + activation |= 0x1;
> + __raw_writel(activation, XOR_ACTIVATION(chan));
> +}
> +
> +static inline void mv_chan_disable(struct mv_xor_chan *chan)
> +{
> + u32 activation;
> +
> + activation = __raw_readl(XOR_ACTIVATION(chan));
> + activation |= (1 << 1);
> + __raw_writel(activation, XOR_ACTIVATION(chan));
> +}
> +
> +static inline void mv_chan_pause(struct mv_xor_chan *chan)
> +{
> + u32 activation;
> + __raw_writel(1 << 2, XOR_ACTIVATION(chan));
> + activation = __raw_readl(XOR_ACTIVATION(chan));
> +}
> +
> +static inline void mv_chan_restart(struct mv_xor_chan *chan)
> +{
> + __raw_writel(1 << 3, XOR_ACTIVATION(chan));
> +}
> +
> +static inline int mv_desc_get_zero_result(struct mv_xor_desc_slot *desc)
> +{
> + BUG();
> + return 0;
> +}
> +
> +static inline void mv_chan_idle(int busy, struct mv_xor_chan *chan)
> +{
> + do { } while (0);
> +}
> +
> +static inline char mv_chan_is_busy(struct mv_xor_chan *chan)
> +{
> + u32 state = __raw_readl(XOR_ACTIVATION(chan));
> +
> + state = (state >> 4) & 0x3;
> +
> + return (state == 1)? 1 : 0;
> +}
> +
> +static inline int
> +mv_chan_get_desc_align(struct mv_xor_chan *chan, int num_slots)
> +{
> + return 1;
> +}
> +
> +static inline int
> +mv_chan_xor_slot_count(size_t len, int src_cnt)
> +{
> + return 1;
> +}
> +
> +/**
> + * mv_xor_free_slots - flags descriptor slots for reuse
> + * @slot: Slot to free
> + * Caller must hold &mv_chan->lock while calling this function
> + */
> +static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
> + struct mv_xor_desc_slot *slot)
> +{
> + dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
> + __func__, __LINE__, slot);
> +
> + slot->slots_per_op = 0;
> +
> +}
> +
> +/*
> + * mv_xor_start_new_chain - program the engine to operate on new chain headed by
> + * sw_desc
> + * Caller must hold &mv_chan->lock while calling this function
> + */
> +static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
> + struct mv_xor_desc_slot *sw_desc)
> +{
> + dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
> + __func__, __LINE__, sw_desc);
> + if (sw_desc->type != mv_chan->current_type)
> + mv_set_mode(mv_chan, sw_desc->type);
> +
> + if (sw_desc->type == DMA_MEMSET) {
> + /* for memset requests we need to program the engine, no
> + * descriptors used.
> + */
> + struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
> + mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr);
> + mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
> + mv_chan_set_value(mv_chan, sw_desc->value);
> + } else {
> + /* set the hardware chain */
> + mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
> + }
> + mv_chan->pending += sw_desc->slot_cnt;
> + mv_xor_check_threshold(mv_chan);
> +}
> +
> +static dma_cookie_t
> +mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
> + struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
> +{
> + BUG_ON(desc->async_tx.cookie < 0);
> +
> + if (desc->async_tx.cookie > 0) {
> + cookie = desc->async_tx.cookie;
> +
> + /* call the callback (must not sleep or submit new
> + * operations to this channel)
> + */
> + if (desc->async_tx.callback)
> + desc->async_tx.callback(
> + desc->async_tx.callback_param);
> +
> + /* unmap dma addresses
> + * (unmap_single vs unmap_page?)
> + */
> + if (desc->group_head && desc->unmap_len) {
> + struct mv_xor_desc_slot *unmap = desc->group_head;
> + struct device *dev =
> + &mv_chan->device->pdev->dev;
> + u32 len = unmap->unmap_len;
> + u32 src_cnt = unmap->unmap_src_cnt;
> + dma_addr_t addr = mv_desc_get_dest_addr(unmap, mv_chan);
> +
> + dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
> + while (src_cnt--) {
> + addr = mv_desc_get_src_addr(unmap, mv_chan,
> + src_cnt);
> + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
> + }
> + desc->group_head = NULL;
> + }
> + }
> +
> + /* run dependent operations */
> + async_tx_run_dependencies(&desc->async_tx);
> +
> + return cookie;
> +}
> +
> +static int
> +mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
> +{
> + struct mv_xor_desc_slot *iter, *_iter;
> +
> + dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
> + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
> + completed_node) {
> +
> + if (async_tx_test_ack(&iter->async_tx)) {
> + list_del(&iter->completed_node);
> + mv_xor_free_slots(mv_chan, iter);
> + }
> + }
> + return 0;
> +}
> +
> +static int
> +mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
> + struct mv_xor_chan *mv_chan)
> +{
> + dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n",
> + __func__, __LINE__, desc, desc->async_tx.flags);
> + list_del(&desc->chain_node);
> + /* the client is allowed to attach dependent operations
> + * until 'ack' is set
> + */
> + if (!async_tx_test_ack(&desc->async_tx)) {
> + /* move this slot to the completed_slots */
> + list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
> + return 0;
> + }
> +
> + mv_xor_free_slots(mv_chan, desc);
> + return 0;
> +}
> +
> +static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
> +{
> + struct mv_xor_desc_slot *iter, *_iter;
> + dma_cookie_t cookie = 0;
> + int busy = mv_chan_is_busy(mv_chan);
> + u32 current_desc = mv_chan_get_current_desc(mv_chan);
> + int seen_current = 0;
> +
> + dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
> + dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc);
> + mv_xor_clean_completed_slots(mv_chan);
> +
> + /* free completed slots from the chain starting with
> + * the oldest descriptor
> + */
> +
> + list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
> + chain_node) {
> + prefetch(_iter);
> + prefetch(&_iter->async_tx);
> +
> + /* do not advance past the current descriptor loaded into the
> + * hardware channel, subsequent descriptors are either in
> + * process or have not been submitted
> + */
> + if (seen_current)
> + break;
> +
> + /* stop the search if we reach the current descriptor and the
> + * channel is busy
> + */
> + if (iter->async_tx.phys == current_desc) {
> + seen_current = 1;
> + if (busy)
> + break;
> + }
> +
> + cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
> +
> + if (mv_xor_clean_slot(iter, mv_chan))
> + break;
> + }
> +
> + if ((busy == 0) && !list_empty(&mv_chan->chain)) {
> + struct mv_xor_desc_slot *chain_head;
> + chain_head = list_entry(mv_chan->chain.next,
> + struct mv_xor_desc_slot,
> + chain_node);
> +
> + mv_xor_start_new_chain(mv_chan, chain_head);
> + }

This, and the fact that memset is done without a descriptor, are the
only significant differences with the iop-adma driver.

Just to check my understanding this driver waits for a 'idle hardware +
pending descriptors' condition as an indication of when to start a new
chain? Out of curiosity might there be a case, like an error condition,
where the channel is idle but should not start a new chain? Any ill
effects from not detecting that case?

> +
> + if (cookie > 0)
> + mv_chan->completed_cookie = cookie;
> +}
> +
> +static void
> +mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
> +{
> + spin_lock_bh(&mv_chan->lock);
> + __mv_xor_slot_cleanup(mv_chan);
> + spin_unlock_bh(&mv_chan->lock);
> +}
> +
> +static void mv_xor_tasklet(unsigned long data)
> +{
> + struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
> + __mv_xor_slot_cleanup(chan);
> +}
> +
> +static struct mv_xor_desc_slot *
> +mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
> + int slots_per_op)
> +{
> + struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
> + LIST_HEAD(chain);
> + int slots_found, retry = 0;
> +
> + /* start search from the last allocated descrtiptor
> + * if a contiguous allocation can not be found start searching
> + * from the beginning of the list
> + */
> +retry:
> + slots_found = 0;
> + if (retry == 0)
> + iter = mv_chan->last_used;
> + else
> + iter = list_entry(&mv_chan->all_slots,
> + struct mv_xor_desc_slot,
> + slot_node);
> +
> + list_for_each_entry_safe_continue(
> + iter, _iter, &mv_chan->all_slots, slot_node) {
> + prefetch(_iter);
> + prefetch(&_iter->async_tx);
> + if (iter->slots_per_op) {
> + /* give up after finding the first busy slot
> + * on the second pass through the list
> + */
> + if (retry)
> + break;
> +
> + slots_found = 0;
> + continue;
> + }
> +
> + /* start the allocation if the slot is correctly aligned */
> + if (!slots_found++)
> + alloc_start = iter;
> +
> + if (slots_found == num_slots) {
> + struct mv_xor_desc_slot *alloc_tail = NULL;
> + struct mv_xor_desc_slot *last_used = NULL;
> + iter = alloc_start;
> + while (num_slots) {
> + int i;
> +
> + /* pre-ack all but the last descriptor */
> + async_tx_ack(&iter->async_tx);
> +
> + list_add_tail(&iter->chain_node, &chain);
> + alloc_tail = iter;
> + iter->async_tx.cookie = 0;
> + iter->slot_cnt = num_slots;
> + iter->xor_check_result = NULL;
> + for (i = 0; i < slots_per_op; i++) {
> + iter->slots_per_op = slots_per_op - i;
> + last_used = iter;
> + iter = list_entry(iter->slot_node.next,
> + struct mv_xor_desc_slot,
> + slot_node);
> + }
> + num_slots -= slots_per_op;
> + }
> + alloc_tail->group_head = alloc_start;
> + alloc_tail->async_tx.cookie = -EBUSY;
> + list_splice(&chain, &alloc_tail->async_tx.tx_list);
> + mv_chan->last_used = last_used;
> + mv_desc_clear_next_desc(alloc_start);
> + mv_desc_clear_next_desc(alloc_tail);
> + return alloc_tail;
> + }
> + }
> + if (!retry++)
> + goto retry;
> +
> + /* try to free some slots if the allocation fails */
> + tasklet_schedule(&mv_chan->irq_tasklet);
> +
> + return NULL;
> +}
> +
> +static dma_cookie_t
> +mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
> + struct mv_xor_desc_slot *desc)
> +{
> + dma_cookie_t cookie = mv_chan->common.cookie;
> +
> + if (++cookie < 0)
> + cookie = 1;
> + mv_chan->common.cookie = desc->async_tx.cookie = cookie;
> + return cookie;
> +}
> +
> +static void mv_xor_check_threshold(struct mv_xor_chan *mv_chan)
> +{
> + if (mv_chan->pending >= MV_XOR_THRESHOLD) {
> + mv_chan->pending = 0;
> + mv_chan_activate(mv_chan);
> + }
> +}
> +
> +/************************ DMA engine API functions ****************************/
> +static dma_cookie_t
> +mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
> +{
> + struct mv_xor_desc_slot *sw_desc = tx_to_mv_xor_slot(tx);
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
> + struct mv_xor_desc_slot *grp_start, *old_chain_tail;
> + dma_cookie_t cookie;
> + int new_hw_chain = 1;
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "%s sw_desc %p: async_tx %p\n",
> + __func__, sw_desc, &sw_desc->async_tx);
> +
> + grp_start = sw_desc->group_head;
> +
> + spin_lock_bh(&mv_chan->lock);
> + cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
> +
> + if (list_empty(&mv_chan->chain))
> + list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
> + else{

Checkpatch missed the needed space after that 'else'.

> + new_hw_chain = 0;
> +
> + old_chain_tail = list_entry(mv_chan->chain.prev,
> + struct mv_xor_desc_slot,
> + chain_node);
> + list_splice_init(&grp_start->async_tx.tx_list,
> + &old_chain_tail->chain_node);
> +
> + if (!mv_can_chain(mv_chan, grp_start))
> + goto submit_done;
> +
> + dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n",
> + old_chain_tail->async_tx.phys);
> +
> + /* fix up the hardware chain */
> + mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
> +
> + /* if the channel is not busy */
> + if (!mv_chan_is_busy(mv_chan)) {
> + u32 current_desc = mv_chan_get_current_desc(mv_chan);
> + /*
> + * and the curren desc is the end of the chain before
> + * the append, then we need to start the channel
> + */
> + if (current_desc == old_chain_tail->async_tx.phys)
> + new_hw_chain = 1;
> + }
> + }
> + if (new_hw_chain)
> + mv_xor_start_new_chain(mv_chan, grp_start);
> +
> +submit_done:
> + spin_unlock_bh(&mv_chan->lock);
> +
> + return cookie;
> +}
> +
> +/* returns the number of allocated descriptors */
> +static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
> +{
> + char *hw_desc;
> + int idx;
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + struct mv_xor_desc_slot *slot = NULL;
> + struct mv_xor_platform_data *plat_data =
> + mv_chan->device->pdev->dev.platform_data;
> + int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
> +
> + /* Allocate descriptor slots */
> + do {
> + idx = mv_chan->slots_allocated;
> + if (idx == num_descs_in_pool)
> + break;
> +
> + slot = kzalloc(sizeof(*slot), GFP_KERNEL);
> + if (!slot) {
> + printk(KERN_INFO "MV XOR Channel only initialized"
> + " %d descriptor slots", idx);
> + break;
> + }
> + hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
> + slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE];
> +
> + dma_async_tx_descriptor_init(&slot->async_tx, chan);
> + slot->async_tx.tx_submit = mv_xor_tx_submit;
> + INIT_LIST_HEAD(&slot->chain_node);
> + INIT_LIST_HEAD(&slot->slot_node);
> + INIT_LIST_HEAD(&slot->async_tx.tx_list);
> + hw_desc = (char *) mv_chan->device->dma_desc_pool;
> + slot->async_tx.phys =
> + (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
> + slot->idx = idx;
> +
> + spin_lock_bh(&mv_chan->lock);
> + mv_chan->slots_allocated++;
> + list_add_tail(&slot->slot_node, &mv_chan->all_slots);
> + spin_unlock_bh(&mv_chan->lock);
> + } while (mv_chan->slots_allocated < num_descs_in_pool);
> +
> + if (idx && !mv_chan->last_used)
> + mv_chan->last_used = list_entry(mv_chan->all_slots.next,
> + struct mv_xor_desc_slot,
> + slot_node);
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "allocated %d descriptor slots last_used: %p\n",
> + mv_chan->slots_allocated, mv_chan->last_used);
> +
> + return (idx > 0) ? idx : -ENOMEM;
> +}
> +
> +static struct dma_async_tx_descriptor *
> +mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
> + size_t len, unsigned long flags)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + struct mv_xor_desc_slot *sw_desc, *grp_start;
> + int slot_cnt;
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "%s dest: %x src %x len: %u flags: %ld\n",
> + __func__, dest, src, len, flags);
> + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
> + return NULL;
> +
> + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
> +
> + spin_lock_bh(&mv_chan->lock);
> + slot_cnt = mv_chan_memcpy_slot_count(len);
> + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
> + if (sw_desc) {
> + sw_desc->type = DMA_MEMCPY;
> + sw_desc->async_tx.flags = flags;
> + grp_start = sw_desc->group_head;
> + mv_desc_init(grp_start, flags);
> + mv_desc_set_byte_count(grp_start, mv_chan, len);
> + mv_desc_set_dest_addr(sw_desc->group_head, mv_chan, dest);
> + mv_desc_set_src_addr(grp_start, 0, src);
> + sw_desc->unmap_src_cnt = 1;
> + sw_desc->unmap_len = len;
> + }
> + spin_unlock_bh(&mv_chan->lock);
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "%s sw_desc %p async_tx %p\n",
> + __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
> +
> + return sw_desc ? &sw_desc->async_tx : NULL;
> +}
> +
> +static struct dma_async_tx_descriptor *
> +mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
> + size_t len, unsigned long flags)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + struct mv_xor_desc_slot *sw_desc, *grp_start;
> + int slot_cnt;
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "%s dest: %x len: %u flags: %ld\n",
> + __func__, dest, len, flags);
> + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
> + return NULL;
> +
> + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
> +
> + spin_lock_bh(&mv_chan->lock);
> + slot_cnt = mv_chan_memset_slot_count(len);
> + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
> + if (sw_desc) {
> + sw_desc->type = DMA_MEMSET;
> + sw_desc->async_tx.flags = flags;
> + grp_start = sw_desc->group_head;
> + mv_desc_init(grp_start, flags);
> + mv_desc_set_byte_count(grp_start, mv_chan, len);
> + mv_desc_set_dest_addr(sw_desc->group_head, mv_chan, dest);
> + mv_desc_set_block_fill_val(grp_start, value);
> + sw_desc->unmap_src_cnt = 1;
> + sw_desc->unmap_len = len;
> + }
> + spin_unlock_bh(&mv_chan->lock);
> + dev_dbg(mv_chan->device->common.dev,
> + "%s sw_desc %p async_tx %p \n",
> + __func__, sw_desc, &sw_desc->async_tx);
> + return sw_desc ? &sw_desc->async_tx : NULL;
> +}
> +
> +static struct dma_async_tx_descriptor *
> +mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
> + unsigned int src_cnt, size_t len, unsigned long flags)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + struct mv_xor_desc_slot *sw_desc, *grp_start;
> + int slot_cnt;
> +
> + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
> + return NULL;
> +
> + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "%s src_cnt: %d len: dest %x %u flags: %ld\n",
> + __func__, src_cnt, len, dest, flags);
> +
> + spin_lock_bh(&mv_chan->lock);
> + slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
> + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
> + if (sw_desc) {
> + sw_desc->type = DMA_XOR;
> + sw_desc->async_tx.flags = flags;
> + grp_start = sw_desc->group_head;
> + mv_desc_init(grp_start, flags);
> + /* the byte count field is the same as in memcpy desc*/
> + mv_desc_set_byte_count(grp_start, mv_chan, len);
> + mv_desc_set_dest_addr(sw_desc->group_head, mv_chan, dest);
> + sw_desc->unmap_src_cnt = src_cnt;
> + sw_desc->unmap_len = len;
> + while (src_cnt--)
> + mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]);
> + }
> + spin_unlock_bh(&mv_chan->lock);
> + dev_dbg(mv_chan->device->common.dev,
> + "%s sw_desc %p async_tx %p \n",
> + __func__, sw_desc, &sw_desc->async_tx);
> + return sw_desc ? &sw_desc->async_tx : NULL;
> +}
> +
> +static void mv_xor_free_chan_resources(struct dma_chan *chan)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + struct mv_xor_desc_slot *iter, *_iter;
> + int in_use_descs = 0;
> +
> + mv_xor_slot_cleanup(mv_chan);
> +
> + spin_lock_bh(&mv_chan->lock);
> + list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
> + chain_node) {
> + in_use_descs++;
> + list_del(&iter->chain_node);
> + }
> + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
> + completed_node) {
> + in_use_descs++;
> + list_del(&iter->completed_node);
> + }
> + list_for_each_entry_safe_reverse(
> + iter, _iter, &mv_chan->all_slots, slot_node) {
> + list_del(&iter->slot_node);
> + kfree(iter);
> + mv_chan->slots_allocated--;
> + }
> + mv_chan->last_used = NULL;
> +
> + dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
> + __func__, mv_chan->slots_allocated);
> + spin_unlock_bh(&mv_chan->lock);
> +
> + if (in_use_descs)
> + dev_err(mv_chan->device->common.dev,
> + "freeing %d in use descriptors!\n", in_use_descs);
> +}
> +
> +/**
> + * mv_xor_is_complete - poll the status of an XOR transaction
> + * @chan: XOR channel handle
> + * @cookie: XOR transaction identifier
> + */
> +static enum dma_status mv_xor_is_complete(struct dma_chan *chan,
> + dma_cookie_t cookie,
> + dma_cookie_t *done,
> + dma_cookie_t *used)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + dma_cookie_t last_used;
> + dma_cookie_t last_complete;
> + enum dma_status ret;
> +
> + last_used = chan->cookie;
> + last_complete = mv_chan->completed_cookie;
> + mv_chan->is_complete_cookie = cookie;
> + if (done)
> + *done = last_complete;
> + if (used)
> + *used = last_used;
> +
> + ret = dma_async_is_complete(cookie, last_complete, last_used);
> + if (ret == DMA_SUCCESS) {
> + mv_xor_clean_completed_slots(mv_chan);
> + return ret;
> + }
> + mv_xor_slot_cleanup(mv_chan);
> +
> + last_used = chan->cookie;
> + last_complete = mv_chan->completed_cookie;
> +
> + if (done)
> + *done = last_complete;
> + if (used)
> + *used = last_used;
> +
> + return dma_async_is_complete(cookie, last_complete, last_used);
> +}
> +
> +static void mv_dump_xor_regs(struct mv_xor_chan *chan)
> +{
> + u32 val;
> +
> + val = __raw_readl(XOR_CONFIG(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "config 0x%08x.\n", val);
> +
> + val = __raw_readl(XOR_ACTIVATION(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "activation 0x%08x.\n", val);
> +
> + val = __raw_readl(XOR_INTR_CAUSE(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "intr cause 0x%08x.\n", val);
> +
> + val = __raw_readl(XOR_INTR_MASK(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "intr mask 0x%08x.\n", val);
> +
> + val = __raw_readl(XOR_ERROR_CAUSE(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "error cause 0x%08x.\n", val);
> +
> + val = __raw_readl(XOR_ERROR_ADDR(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "error addr 0x%08x.\n", val);
> +}
> +
> +static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
> + u32 intr_cause)
> +{
> + if (intr_cause & (1 << 4)) {
> + dev_dbg(chan->device->common.dev,
> + "ignore this error\n");
> + return;
> + }
> +
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "error on chan %d. intr cause 0x%08x.\n",
> + chan->idx, intr_cause);
> +
> + mv_dump_xor_regs(chan);
> + BUG();
> +}
> +
> +static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
> +{
> + struct mv_xor_chan *chan = data;
> + u32 intr_cause = mv_chan_get_intr_cause(chan);
> +
> + dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause);
> +
> + if (mv_is_err_intr(intr_cause))
> + mv_xor_err_interrupt_handler(chan, intr_cause);
> +
> + tasklet_schedule(&chan->irq_tasklet);
> +
> + mv_xor_device_clear_eoc_cause(chan);
> +
> + return IRQ_HANDLED;
> +}
> +
> +static void mv_xor_issue_pending(struct dma_chan *chan)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> +
> + if (mv_chan->pending) {
> + mv_chan->pending = 0;
> + mv_chan_activate(mv_chan);
> + }
> +}
> +
> +/*
> + * Perform a transaction to verify the HW works.
> + */
> +#define MV_XOR_TEST_SIZE 2000
> +
> +static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
> +{
> + int i;
> + void *src, *dest;
> + dma_addr_t src_dma, dest_dma;
> + struct dma_chan *dma_chan;
> + dma_cookie_t cookie;
> + struct dma_async_tx_descriptor *tx;
> + int err = 0;
> + struct mv_xor_chan *mv_chan;
> +
> + src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
> + if (!src)
> + return -ENOMEM;
> +
> + dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
> + if (!dest) {
> + kfree(src);
> + return -ENOMEM;
> + }
> +
> + /* Fill in src buffer */
> + for (i = 0; i < MV_XOR_TEST_SIZE; i++)
> + ((u8 *) src)[i] = (u8)i;
> +
> + /* Start copy, using first DMA channel */
> + dma_chan = container_of(device->common.channels.next,
> + struct dma_chan,
> + device_node);
> + if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
> + err = -ENODEV;
> + goto out;
> + }
> +
> + dest_dma = dma_map_single(dma_chan->device->dev, dest,
> + MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
> +
> + src_dma = dma_map_single(dma_chan->device->dev, src,
> + MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
> +
> + tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
> + MV_XOR_TEST_SIZE, 0);
> + cookie = mv_xor_tx_submit(tx);
> + mv_xor_issue_pending(dma_chan);
> + async_tx_ack(tx);
> + msleep(1);
> +
> + if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
> + DMA_SUCCESS) {
> + dev_printk(KERN_ERR, dma_chan->device->dev,
> + "Self-test copy timed out, disabling\n");
> + err = -ENODEV;
> + goto free_resources;
> + }
> +
> + mv_chan = to_mv_xor_chan(dma_chan);
> + dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
> + MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
> + if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
> + dev_printk(KERN_ERR, dma_chan->device->dev,
> + "Self-test copy failed compare, disabling\n");
> + err = -ENODEV;
> + goto free_resources;
> + }
> +
> +free_resources:
> + mv_xor_free_chan_resources(dma_chan);
> +out:
> + kfree(src);
> + kfree(dest);
> + return err;
> +}
> +
> +#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
> +static int __devinit
> +mv_xor_xor_self_test(struct mv_xor_device *device)
> +{
> + int i, src_idx;
> + struct page *dest;
> + struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
> + dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
> + dma_addr_t dest_dma;
> + struct dma_async_tx_descriptor *tx;
> + struct dma_chan *dma_chan;
> + dma_cookie_t cookie;
> + u8 cmp_byte = 0;
> + u32 cmp_word;
> + int err = 0;
> + struct mv_xor_chan *mv_chan;
> +
> + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
> + xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
> + if (!xor_srcs[src_idx])
> + while (src_idx--) {
> + __free_page(xor_srcs[src_idx]);
> + return -ENOMEM;
> + }
> + }
> +
> + dest = alloc_page(GFP_KERNEL);
> + if (!dest)
> + while (src_idx--) {
> + __free_page(xor_srcs[src_idx]);
> + return -ENOMEM;
> + }
> +
> + /* Fill in src buffers */
> + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
> + u8 *ptr = page_address(xor_srcs[src_idx]);
> + for (i = 0; i < PAGE_SIZE; i++)
> + ptr[i] = (1 << src_idx);
> + }
> +
> + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
> + cmp_byte ^= (u8) (1 << src_idx);
> +
> + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
> + (cmp_byte << 8) | cmp_byte;
> +
> + memset(page_address(dest), 0, PAGE_SIZE);
> +
> + dma_chan = container_of(device->common.channels.next,
> + struct dma_chan,
> + device_node);
> + if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
> + err = -ENODEV;
> + goto out;
> + }
> +
> + /* test xor */
> + dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
> + DMA_FROM_DEVICE);
> +
> + for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
> + dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
> + 0, PAGE_SIZE, DMA_TO_DEVICE);
> +
> + tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
> + MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
> +
> + cookie = mv_xor_tx_submit(tx);
> + mv_xor_issue_pending(dma_chan);
> + async_tx_ack(tx);
> + msleep(8);
> +
> + if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
> + DMA_SUCCESS) {
> + dev_printk(KERN_ERR, dma_chan->device->dev,
> + "Self-test xor timed out, disabling\n");
> + err = -ENODEV;
> + goto free_resources;
> + }
> +
> + mv_chan = to_mv_xor_chan(dma_chan);
> + dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
> + PAGE_SIZE, DMA_FROM_DEVICE);
> + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
> + u32 *ptr = page_address(dest);
> + if (ptr[i] != cmp_word) {
> + dev_printk(KERN_ERR, dma_chan->device->dev,
> + "Self-test xor failed compare, disabling."
> + " index %d, data %x, expected %x\n", i,
> + ptr[i], cmp_word);
> + err = -ENODEV;
> + goto free_resources;
> + }
> + }
> +
> +free_resources:
> + mv_xor_free_chan_resources(dma_chan);
> +out:
> + src_idx = MV_XOR_NUM_SRC_TEST;
> + while (src_idx--)
> + __free_page(xor_srcs[src_idx]);
> + __free_page(dest);
> + return err;
> +}
> +
> +static int __devexit mv_xor_remove(struct platform_device *dev)
> +{
> + struct mv_xor_device *device = platform_get_drvdata(dev);
> + struct dma_chan *chan, *_chan;
> + struct mv_xor_chan *mv_chan;
> + struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
> +
> + dma_async_device_unregister(&device->common);
> +
> + dma_free_coherent(&dev->dev, plat_data->pool_size,
> + device->dma_desc_pool_virt, device->dma_desc_pool);
> +
> + list_for_each_entry_safe(chan, _chan, &device->common.channels,
> + device_node) {
> + mv_chan = to_mv_xor_chan(chan);
> + list_del(&chan->device_node);
> + }
> +
> + return 0;
> +}
> +
> +static int __devinit mv_xor_probe(struct platform_device *pdev)
> +{
> + int ret = 0;
> + int irq;
> + struct mv_xor_device *adev;
> + struct mv_xor_chan *mv_chan;
> + struct dma_device *dma_dev;
> + struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
> +
> +
> + adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
> + if (!adev)
> + return -ENOMEM;
> +
> + dma_dev = &adev->common;
> +
> + /* allocate coherent memory for hardware descriptors
> + * note: writecombine gives slightly better performance, but
> + * requires that we explicitly flush the writes
> + */
> + adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
> + plat_data->pool_size,
> + &adev->dma_desc_pool,
> + GFP_KERNEL);
> + if (!adev->dma_desc_pool_virt)
> + return -ENOMEM;
> +
> + adev->id = plat_data->hw_id;
> +
> + /* discover transaction capabilites from the platform data */
> + dma_dev->cap_mask = plat_data->cap_mask;
> + adev->pdev = pdev;
> + platform_set_drvdata(pdev, adev);
> +
> + adev->shared = platform_get_drvdata(plat_data->shared);
> +
> + INIT_LIST_HEAD(&dma_dev->channels);
> +
> + /* set base routines */
> + dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
> + dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
> + dma_dev->device_is_tx_complete = mv_xor_is_complete;
> + dma_dev->device_issue_pending = mv_xor_issue_pending;
> + dma_dev->dev = &pdev->dev;
> +
> + /* set prep routines based on capability */
> + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
> + dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
> + if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
> + dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset;
> + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
> + dma_dev->max_xor = 8; ;
> + dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
> + }
> +
> + mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
> + if (!mv_chan) {
> + ret = -ENOMEM;
> + goto err_free_dma;
> + }
> + mv_chan->device = adev;
> + mv_chan->idx = plat_data->hw_id;
> + mv_chan->mmr_base = adev->shared->xor_base;
> +
> + if (!mv_chan->mmr_base) {
> + ret = -ENOMEM;
> + goto err_free_dma;
> + }
> + tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
> + mv_chan);
> +
> + /* clear errors before enabling interrupts */
> + mv_xor_device_clear_err_status(mv_chan);
> +
> + irq = platform_get_irq(pdev, 0);
> + if (irq < 0) {
> + ret = irq;
> + goto err_free_dma;
> + }
> + ret = devm_request_irq(&pdev->dev, irq,
> + mv_xor_interrupt_handler,
> + 0, dev_name(&pdev->dev), mv_chan);
> + if (ret)
> + goto err_free_dma;
> +
> + mv_chan_unmask_interrupts(mv_chan);
> +
> + mv_set_mode(mv_chan, DMA_MEMCPY);
> +
> + spin_lock_init(&mv_chan->lock);
> + INIT_LIST_HEAD(&mv_chan->chain);
> + INIT_LIST_HEAD(&mv_chan->completed_slots);
> + INIT_LIST_HEAD(&mv_chan->all_slots);
> + INIT_RCU_HEAD(&mv_chan->common.rcu);
> + mv_chan->common.device = dma_dev;
> +
> + list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
> +
> + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
> + ret = mv_xor_memcpy_self_test(adev);
> + dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
> + if (ret)
> + goto err_free_dma;
> + }
> +
> + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
> + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
> + ret = mv_xor_xor_self_test(adev);
> + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
> + if (ret)
> + goto err_free_dma;
> + }
> + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
> + ret = mv_xor_memcpy_self_test(adev);
> + dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
> + if (ret)
> + goto err_free_dma;
> + }
> +
> + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
> + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
> + ret = mv_xor_xor_self_test(adev);
> + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
> + if (ret)
> + goto err_free_dma;
> + }
> +
> + dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
> + "( %s%s%s%s)\n",
> + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
> + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
> + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
> + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
> +
> + dma_async_device_register(dma_dev);
> + goto out;
> +
> + err_free_dma:
> + dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
> + adev->dma_desc_pool_virt, adev->dma_desc_pool);
> + out:
> + return ret;
> +}
> +
> +static void
> +mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
> + struct mbus_dram_target_info *dram)
> +{
> + void __iomem *base = msp->xor_base;
> + u32 win_enable = 0;
> + int i;
> +
> + for (i = 0; i < 8; i++) {
> + writel(0, base + WINDOW_BASE(i));
> + writel(0, base + WINDOW_SIZE(i));
> + if (i < 4)
> + writel(0, base + WINDOW_REMAP_HIGH(i));
> + }
> +
> + for (i = 0; i < dram->num_cs; i++) {
> + struct mbus_dram_window *cs = dram->cs + i;
> +
> + writel((cs->base & 0xffff0000) |
> + (cs->mbus_attr << 8) |
> + dram->mbus_dram_target_id, base + WINDOW_BASE(i));
> + writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
> +
> + win_enable |= (1 << i);
> + win_enable |= 3 << (16 + (2 * i));
> + }
> +
> + writel(win_enable, base + WINDOW_BAR_ENABLE(0));
> + writel(win_enable, base + WINDOW_BAR_ENABLE(1));
> +}
> +
> +static struct platform_driver mv_xor_driver = {
> + .probe = mv_xor_probe,
> + .remove = mv_xor_remove,
> + .driver = {
> + .owner = THIS_MODULE,
> + .name = MV_XOR_NAME,
> + },
> +};
> +
> +static int mv_xor_shared_probe(struct platform_device *pdev)
> +{
> + struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data;
> + struct mv_xor_shared_private *msp;
> + struct resource *res;
> +
> + dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n");
> +
> + msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
> + if (!msp)
> + return -ENOMEM;
> +
> + res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> + if (!res)
> + return -ENODEV;
> +
> + msp->xor_base = devm_ioremap(&pdev->dev, res->start,
> + res->end - res->start + 1);
> + if (!msp->xor_base)
> + return -EBUSY;
> +
> + res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> + if (!res)
> + return -ENODEV;
> +
> + msp->xor_base_high = devm_ioremap(&pdev->dev, res->start,
> + res->end - res->start + 1);
> + if (!msp->xor_base_high)
> + return -EBUSY;
> +
> + platform_set_drvdata(pdev, msp);
> +
> + /*
> + * (Re-)program MBUS remapping windows if we are asked to.
> + */
> + if (msd != NULL && msd->dram != NULL)
> + mv_xor_conf_mbus_windows(msp, msd->dram);
> +
> + return 0;
> +}
> +
> +static int mv_xor_shared_remove(struct platform_device *pdev)
> +{
> + return 0;
> +}
> +
> +static struct platform_driver mv_xor_shared_driver = {
> + .probe = mv_xor_shared_probe,
> + .remove = mv_xor_shared_remove,
> + .driver = {
> + .owner = THIS_MODULE,
> + .name = MV_XOR_SHARED_NAME,
> + },
> +};
> +
> +
> +static int __init mv_xor_init(void)
> +{
> + int rc;
> +
> + rc = platform_driver_register(&mv_xor_shared_driver);
> + if (!rc) {
> + rc = platform_driver_register(&mv_xor_driver);
> + if (rc)
> + platform_driver_unregister(&mv_xor_shared_driver);
> + }
> + return rc;
> +}

Curious, can you elaborate on why the code performs two registrations?

[..]

Thanks,
Dan

2008-06-26 22:33:29

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCH/RFC] DMA engine driver for Marvell XOR engine

On Mon, Jun 23, 2008 at 9:46 AM, Nicolas Pitre <[email protected]> wrote:
> Here is a driver for the DMA/XOR engine found on many ARM SoCs from
> Marvell, such as Orion and newly supported Kirkwood. Could interested
> people (notably Maciej Sosnowski and Dan Williams who are listed as
> maintainers for the DMA generic offload subsystem) review this patch,
> and ultimately provide their ACK, so we could push this driver along
> with additional ARM patches that depend on this one through the ARM git
> tree.
>

Hi Nicolas,

With Haavard's recently posted changes there is more activity in the
dmaengine/async_tx space with implications to mv_xor. What
difficulties arise if I carry mv_xor through async_tx.git instead of
letting it go though arm.git, i.e. what patches do you have that
depend on mv_xor?

Thanks,
Dan

2008-06-27 00:21:20

by Nicolas Pitre

[permalink] [raw]
Subject: Re: [PATCH/RFC] DMA engine driver for Marvell XOR engine

On Thu, 26 Jun 2008, Dan Williams wrote:

> On Mon, Jun 23, 2008 at 9:46 AM, Nicolas Pitre <[email protected]> wrote:
> > Here is a driver for the DMA/XOR engine found on many ARM SoCs from
> > Marvell, such as Orion and newly supported Kirkwood. Could interested
> > people (notably Maciej Sosnowski and Dan Williams who are listed as
> > maintainers for the DMA generic offload subsystem) review this patch,
> > and ultimately provide their ACK, so we could push this driver along
> > with additional ARM patches that depend on this one through the ARM git
> > tree.
> >
>
> Hi Nicolas,
>
> With Haavard's recently posted changes there is more activity in the
> dmaengine/async_tx space with implications to mv_xor. What
> difficulties arise if I carry mv_xor through async_tx.git instead of
> letting it go though arm.git, i.e. what patches do you have that
> depend on mv_xor?

The extra patches are instantiations of the driver for various platform
which support will hit mainline at some point in the next merge window.
But since those are rather trivial, I guess no one will object if we
submit them for inclusion only after your stuff is pulled into mainline.
So please add it to your repo.

Here's the updated patch with further cleanups.

---------- >8

From: Saeed Bishara <[email protected]>
Date: Mon, 23 Jun 2008 04:26:05 -1100
Subject: DMA engine driver for Marvell XOR engine

The XOR engine found in Marvell's SoCs and system controllers
provides XOR and DMA operation, iSCSI CRC32C calculation, memory
initialization, and memory ECC error cleanup operation support.

This driver implements the DMA engine API and supports the following
capabilities:
- memcpy
- xor
- memset

The XOR engine can be used by DMA engine clients implemented in the
kernel, one of those clients is the RAID module. In that case, I
observed 20% improvement in the raid5 write throughput, and 40%
decrease in the CPU utilization when doing array construction, those
results obtained on an 5182 running at 500Mhz.

When enabling the NET DMA client, the performance decreased, so
meanwhile it is recommended to keep this client off.

Signed-off-by: Saeed Bishara <[email protected]>
Acked-by: Dan Williams <[email protected]>
Signed-off-by: Lennert Buytenhek <[email protected]>
Signed-off-by: Nicolas Pitre <[email protected]>
---
Index: linux-2.6.26-rc8/drivers/dma/Kconfig
===================================================================
--- linux-2.6.26-rc8.orig/drivers/dma/Kconfig
+++ linux-2.6.26-rc8/drivers/dma/Kconfig
@@ -4,7 +4,7 @@

menuconfig DMADEVICES
bool "DMA Engine support"
- depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX || PPC
+ depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX || PPC || PLAT_ORION
depends on !HIGHMEM64G
help
DMA engines can do asynchronous data transfers without
@@ -46,6 +46,14 @@ config FSL_DMA
MPC8560/40, MPC8555, MPC8548 and MPC8641 processors.
The MPC8349, MPC8360 is also supported.

+config MV_XOR
+ bool "Marvell XOR engine support"
+ depends on PLAT_ORION
+ select ASYNC_CORE
+ select DMA_ENGINE
+ ---help---
+ Enable support for the Marvell XOR engine.
+
config DMA_ENGINE
bool

Index: linux-2.6.26-rc8/drivers/dma/Makefile
===================================================================
--- linux-2.6.26-rc8.orig/drivers/dma/Makefile
+++ linux-2.6.26-rc8/drivers/dma/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_MV_XOR) += mv_xor.o
Index: linux-2.6.26-rc8/drivers/dma/mv_xor.c
===================================================================
--- /dev/null
+++ linux-2.6.26-rc8/drivers/dma/mv_xor.c
@@ -0,0 +1,1400 @@
+/*
+ * offload engine driver for the Marvell XOR engine
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <asm/plat-orion/mv_xor.h>
+#include "mv_xor.h"
+
+static void mv_xor_check_threshold(struct mv_xor_chan *mv_chan);
+
+#define to_mv_xor_chan(chan) \
+ container_of(chan, struct mv_xor_chan, common)
+
+#define to_mv_xor_device(dev) \
+ container_of(dev, struct mv_xor_device, common)
+
+#define to_mv_xor_slot(tx) \
+ container_of(tx, struct mv_xor_desc_slot, async_tx)
+
+static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+ hw_desc->status = (1 << 31);
+ hw_desc->phy_next_desc = 0;
+ if (flags & DMA_PREP_INTERRUPT)
+ hw_desc->desc_command = (1 << 31);
+ else
+ hw_desc->desc_command = 0;
+
+ hw_desc->desc_command = (1 << 31);
+}
+
+static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *chan)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_dest_addr;
+}
+
+static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *chan,
+ int src_idx)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_src_addr[src_idx];
+}
+
+
+static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *chan,
+ u32 byte_count)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->byte_count = byte_count;
+}
+
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+ u32 next_desc_addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ BUG_ON(hw_desc->phy_next_desc);
+ hw_desc->phy_next_desc = next_desc_addr;
+}
+
+static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_next_desc = 0;
+}
+
+static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val)
+{
+ desc->value = val;
+}
+
+static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *chan,
+ dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_dest_addr = addr;
+}
+
+static int mv_chan_memset_slot_count(size_t len)
+{
+ return 1;
+}
+
+#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
+
+static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
+ int index, dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_src_addr[index] = addr;
+ if (desc->type == DMA_XOR)
+ hw_desc->desc_command |= (1 << index);
+}
+
+static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
+{
+ return __raw_readl(XOR_CURR_DESC(chan));
+}
+
+static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
+ u32 next_desc_addr)
+{
+ __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
+}
+
+static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr)
+{
+ __raw_writel(desc_addr, XOR_DEST_POINTER(chan));
+}
+
+static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size)
+{
+ __raw_writel(block_size, XOR_BLOCK_SIZE(chan));
+}
+
+static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value)
+{
+ __raw_writel(value, XOR_INIT_VALUE_LOW(chan));
+ __raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
+}
+
+static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
+{
+ u32 val = __raw_readl(XOR_INTR_MASK(chan));
+ val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_MASK(chan));
+}
+
+static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
+{
+ u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
+ intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
+ return intr_cause;
+}
+
+static int mv_is_err_intr(u32 intr_cause)
+{
+ if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
+ return 1;
+
+ return 0;
+}
+
+static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+{
+ u32 val = (1 << (1 + (chan->idx * 16)));
+ dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+{
+ u32 val = 0xFFFF0000 >> (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static int mv_can_chain(struct mv_xor_chan *chan, struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc_slot *chain_old_tail = list_entry(
+ desc->chain_node.prev, struct mv_xor_desc_slot, chain_node);
+
+ if (chain_old_tail->type != desc->type)
+ return 0;
+ if (desc->type == DMA_MEMSET)
+ return 0;
+
+ return 1;
+}
+
+static void mv_set_mode(struct mv_xor_chan *chan,
+ enum dma_transaction_type type)
+{
+ u32 op_mode;
+ u32 config = __raw_readl(XOR_CONFIG(chan));
+
+ switch (type) {
+ case DMA_XOR:
+ op_mode = XOR_OPERATION_MODE_XOR;
+ break;
+ case DMA_MEMCPY:
+ op_mode = XOR_OPERATION_MODE_MEMCPY;
+ break;
+ case DMA_MEMSET:
+ op_mode = XOR_OPERATION_MODE_MEMSET;
+ break;
+ default:
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error: unsupported operation %d.\n",
+ type);
+ BUG();
+ return;
+ }
+
+ config &= ~0x7;
+ config |= op_mode;
+ __raw_writel(config, XOR_CONFIG(chan));
+ chan->current_type = type;
+}
+
+static void mv_chan_activate(struct mv_xor_chan *chan)
+{
+ u32 activation;
+
+ dev_dbg(chan->device->common.dev, " activate chan.\n");
+ activation = __raw_readl(XOR_ACTIVATION(chan));
+ activation |= 0x1;
+ __raw_writel(activation, XOR_ACTIVATION(chan));
+}
+
+static char mv_chan_is_busy(struct mv_xor_chan *chan)
+{
+ u32 state = __raw_readl(XOR_ACTIVATION(chan));
+
+ state = (state >> 4) & 0x3;
+
+ return (state == 1) ? 1 : 0;
+}
+
+static int mv_chan_xor_slot_count(size_t len, int src_cnt)
+{
+ return 1;
+}
+
+/**
+ * mv_xor_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *slot)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
+ __func__, __LINE__, slot);
+
+ slot->slots_per_op = 0;
+
+}
+
+/*
+ * mv_xor_start_new_chain - program the engine to operate on new chain headed by
+ * sw_desc
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *sw_desc)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
+ __func__, __LINE__, sw_desc);
+ if (sw_desc->type != mv_chan->current_type)
+ mv_set_mode(mv_chan, sw_desc->type);
+
+ if (sw_desc->type == DMA_MEMSET) {
+ /* for memset requests we need to program the engine, no
+ * descriptors used.
+ */
+ struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
+ mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr);
+ mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
+ mv_chan_set_value(mv_chan, sw_desc->value);
+ } else {
+ /* set the hardware chain */
+ mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
+ }
+ mv_chan->pending += sw_desc->slot_cnt;
+ mv_xor_check_threshold(mv_chan);
+}
+
+static dma_cookie_t
+mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+{
+ BUG_ON(desc->async_tx.cookie < 0);
+
+ if (desc->async_tx.cookie > 0) {
+ cookie = desc->async_tx.cookie;
+
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ if (desc->async_tx.callback)
+ desc->async_tx.callback(
+ desc->async_tx.callback_param);
+
+ /* unmap dma addresses
+ * (unmap_single vs unmap_page?)
+ */
+ if (desc->group_head && desc->unmap_len) {
+ struct mv_xor_desc_slot *unmap = desc->group_head;
+ struct device *dev =
+ &mv_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ u32 src_cnt = unmap->unmap_src_cnt;
+ dma_addr_t addr = mv_desc_get_dest_addr(unmap, mv_chan);
+
+ dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
+ while (src_cnt--) {
+ addr = mv_desc_get_src_addr(unmap, mv_chan,
+ src_cnt);
+ dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ }
+ desc->group_head = NULL;
+ }
+ }
+
+ /* run dependent operations */
+ async_tx_run_dependencies(&desc->async_tx);
+
+ return cookie;
+}
+
+static int
+mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+
+ if (async_tx_test_ack(&iter->async_tx)) {
+ list_del(&iter->completed_node);
+ mv_xor_free_slots(mv_chan, iter);
+ }
+ }
+ return 0;
+}
+
+static int
+mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n",
+ __func__, __LINE__, desc, desc->async_tx.flags);
+ list_del(&desc->chain_node);
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx)) {
+ /* move this slot to the completed_slots */
+ list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
+ return 0;
+ }
+
+ mv_xor_free_slots(mv_chan, desc);
+ return 0;
+}
+
+static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+ dma_cookie_t cookie = 0;
+ int busy = mv_chan_is_busy(mv_chan);
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ int seen_current = 0;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc);
+ mv_xor_clean_completed_slots(mv_chan);
+
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel, subsequent descriptors are either in
+ * process or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy
+ */
+ if (iter->async_tx.phys == current_desc) {
+ seen_current = 1;
+ if (busy)
+ break;
+ }
+
+ cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
+
+ if (mv_xor_clean_slot(iter, mv_chan))
+ break;
+ }
+
+ if ((busy == 0) && !list_empty(&mv_chan->chain)) {
+ struct mv_xor_desc_slot *chain_head;
+ chain_head = list_entry(mv_chan->chain.next,
+ struct mv_xor_desc_slot,
+ chain_node);
+
+ mv_xor_start_new_chain(mv_chan, chain_head);
+ }
+
+ if (cookie > 0)
+ mv_chan->completed_cookie = cookie;
+}
+
+static void
+mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ spin_lock_bh(&mv_chan->lock);
+ __mv_xor_slot_cleanup(mv_chan);
+ spin_unlock_bh(&mv_chan->lock);
+}
+
+static void mv_xor_tasklet(unsigned long data)
+{
+ struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
+ __mv_xor_slot_cleanup(chan);
+}
+
+static struct mv_xor_desc_slot *
+mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
+ int slots_per_op)
+{
+ struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
+ LIST_HEAD(chain);
+ int slots_found, retry = 0;
+
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+retry:
+ slots_found = 0;
+ if (retry == 0)
+ iter = mv_chan->last_used;
+ else
+ iter = list_entry(&mv_chan->all_slots,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ list_for_each_entry_safe_continue(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+ if (iter->slots_per_op) {
+ /* give up after finding the first busy slot
+ * on the second pass through the list
+ */
+ if (retry)
+ break;
+
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++)
+ alloc_start = iter;
+
+ if (slots_found == num_slots) {
+ struct mv_xor_desc_slot *alloc_tail = NULL;
+ struct mv_xor_desc_slot *last_used = NULL;
+ iter = alloc_start;
+ while (num_slots) {
+ int i;
+
+ /* pre-ack all but the last descriptor */
+ async_tx_ack(&iter->async_tx);
+
+ list_add_tail(&iter->chain_node, &chain);
+ alloc_tail = iter;
+ iter->async_tx.cookie = 0;
+ iter->slot_cnt = num_slots;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->slots_per_op = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ alloc_tail->group_head = alloc_start;
+ alloc_tail->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &alloc_tail->async_tx.tx_list);
+ mv_chan->last_used = last_used;
+ mv_desc_clear_next_desc(alloc_start);
+ mv_desc_clear_next_desc(alloc_tail);
+ return alloc_tail;
+ }
+ }
+ if (!retry++)
+ goto retry;
+
+ /* try to free some slots if the allocation fails */
+ tasklet_schedule(&mv_chan->irq_tasklet);
+
+ return NULL;
+}
+
+static dma_cookie_t
+mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *desc)
+{
+ dma_cookie_t cookie = mv_chan->common.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+ mv_chan->common.cookie = desc->async_tx.cookie = cookie;
+ return cookie;
+}
+
+static void mv_xor_check_threshold(struct mv_xor_chan *mv_chan)
+{
+ if (mv_chan->pending >= MV_XOR_THRESHOLD) {
+ mv_chan->pending = 0;
+ mv_chan_activate(mv_chan);
+ }
+}
+
+/************************ DMA engine API functions ****************************/
+static dma_cookie_t
+mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
+ struct mv_xor_desc_slot *grp_start, *old_chain_tail;
+ dma_cookie_t cookie;
+ int new_hw_chain = 1;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p: async_tx %p\n",
+ __func__, sw_desc, &sw_desc->async_tx);
+
+ grp_start = sw_desc->group_head;
+
+ spin_lock_bh(&mv_chan->lock);
+ cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
+
+ if (list_empty(&mv_chan->chain))
+ list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
+ else {
+ new_hw_chain = 0;
+
+ old_chain_tail = list_entry(mv_chan->chain.prev,
+ struct mv_xor_desc_slot,
+ chain_node);
+ list_splice_init(&grp_start->async_tx.tx_list,
+ &old_chain_tail->chain_node);
+
+ if (!mv_can_chain(mv_chan, grp_start))
+ goto submit_done;
+
+ dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n",
+ old_chain_tail->async_tx.phys);
+
+ /* fix up the hardware chain */
+ mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
+
+ /* if the channel is not busy */
+ if (!mv_chan_is_busy(mv_chan)) {
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ /*
+ * and the curren desc is the end of the chain before
+ * the append, then we need to start the channel
+ */
+ if (current_desc == old_chain_tail->async_tx.phys)
+ new_hw_chain = 1;
+ }
+ }
+
+ if (new_hw_chain)
+ mv_xor_start_new_chain(mv_chan, grp_start);
+
+submit_done:
+ spin_unlock_bh(&mv_chan->lock);
+
+ return cookie;
+}
+
+/* returns the number of allocated descriptors */
+static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
+{
+ char *hw_desc;
+ int idx;
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *slot = NULL;
+ struct mv_xor_platform_data *plat_data =
+ mv_chan->device->pdev->dev.platform_data;
+ int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
+
+ /* Allocate descriptor slots */
+ do {
+ idx = mv_chan->slots_allocated;
+ if (idx == num_descs_in_pool)
+ break;
+
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "MV XOR Channel only initialized"
+ " %d descriptor slots", idx);
+ break;
+ }
+ hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = mv_xor_tx_submit;
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->async_tx.tx_list);
+ hw_desc = (char *) mv_chan->device->dma_desc_pool;
+ slot->async_tx.phys =
+ (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+ slot->idx = idx;
+
+ spin_lock_bh(&mv_chan->lock);
+ mv_chan->slots_allocated++;
+ list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+ spin_unlock_bh(&mv_chan->lock);
+ } while (mv_chan->slots_allocated < num_descs_in_pool);
+
+ if (idx && !mv_chan->last_used)
+ mv_chan->last_used = list_entry(mv_chan->all_slots.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "allocated %d descriptor slots last_used: %p\n",
+ mv_chan->slots_allocated, mv_chan->last_used);
+
+ return (idx > 0) ? idx : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x src %x len: %u flags: %ld\n",
+ __func__, dest, src, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memcpy_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMCPY;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, mv_chan, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, mv_chan, dest);
+ mv_desc_set_src_addr(grp_start, 0, src);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p\n",
+ __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x len: %u flags: %ld\n",
+ __func__, dest, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memset_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMSET;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, mv_chan, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, mv_chan, dest);
+ mv_desc_set_block_fill_val(grp_start, value);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s src_cnt: %d len: dest %x %u flags: %ld\n",
+ __func__, src_cnt, len, dest, flags);
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_XOR;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ /* the byte count field is the same as in memcpy desc*/
+ mv_desc_set_byte_count(grp_start, mv_chan, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, mv_chan, dest);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ while (src_cnt--)
+ mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]);
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void mv_xor_free_chan_resources(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ mv_xor_slot_cleanup(mv_chan);
+
+ spin_lock_bh(&mv_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+ in_use_descs++;
+ list_del(&iter->completed_node);
+ }
+ list_for_each_entry_safe_reverse(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ mv_chan->slots_allocated--;
+ }
+ mv_chan->last_used = NULL;
+
+ dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
+ __func__, mv_chan->slots_allocated);
+ spin_unlock_bh(&mv_chan->lock);
+
+ if (in_use_descs)
+ dev_err(mv_chan->device->common.dev,
+ "freeing %d in use descriptors!\n", in_use_descs);
+}
+
+/**
+ * mv_xor_is_complete - poll the status of an XOR transaction
+ * @chan: XOR channel handle
+ * @cookie: XOR transaction identifier
+ */
+static enum dma_status mv_xor_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+ mv_chan->is_complete_cookie = cookie;
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret == DMA_SUCCESS) {
+ mv_xor_clean_completed_slots(mv_chan);
+ return ret;
+ }
+ mv_xor_slot_cleanup(mv_chan);
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+{
+ u32 val;
+
+ val = __raw_readl(XOR_CONFIG(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "config 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ACTIVATION(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "activation 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_MASK(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr mask 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_ADDR(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error addr 0x%08x.\n", val);
+}
+
+static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
+ u32 intr_cause)
+{
+ if (intr_cause & (1 << 4)) {
+ dev_dbg(chan->device->common.dev,
+ "ignore this error\n");
+ return;
+ }
+
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error on chan %d. intr cause 0x%08x.\n",
+ chan->idx, intr_cause);
+
+ mv_dump_xor_regs(chan);
+ BUG();
+}
+
+static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
+{
+ struct mv_xor_chan *chan = data;
+ u32 intr_cause = mv_chan_get_intr_cause(chan);
+
+ dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause);
+
+ if (mv_is_err_intr(intr_cause))
+ mv_xor_err_interrupt_handler(chan, intr_cause);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ mv_xor_device_clear_eoc_cause(chan);
+
+ return IRQ_HANDLED;
+}
+
+static void mv_xor_issue_pending(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+
+ if (mv_chan->pending) {
+ mv_chan->pending = 0;
+ mv_chan_activate(mv_chan);
+ }
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define MV_XOR_TEST_SIZE 2000
+
+static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
+{
+ int i;
+ void *src, *dest;
+ dma_addr_t src_dma, dest_dma;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ struct dma_async_tx_descriptor *tx;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+
+ dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < MV_XOR_TEST_SIZE; i++)
+ ((u8 *) src)[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dest_dma = dma_map_single(dma_chan->device->dev, dest,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+
+ src_dma = dma_map_single(dma_chan->device->dev, src,
+ MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+ MV_XOR_TEST_SIZE, 0);
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(1);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+ if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+mv_xor_xor_self_test(struct mv_xor_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx])
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest)
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+
+ for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+ 0, PAGE_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
+
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(8);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor failed compare, disabling."
+ " index %d, data %x, expected %x\n", i,
+ ptr[i], cmp_word);
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ src_idx = MV_XOR_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int __devexit mv_xor_remove(struct platform_device *dev)
+{
+ struct mv_xor_device *device = platform_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+ struct mv_xor_chan *mv_chan;
+ struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
+
+ dma_async_device_unregister(&device->common);
+
+ dma_free_coherent(&dev->dev, plat_data->pool_size,
+ device->dma_desc_pool_virt, device->dma_desc_pool);
+
+ list_for_each_entry_safe(chan, _chan, &device->common.channels,
+ device_node) {
+ mv_chan = to_mv_xor_chan(chan);
+ list_del(&chan->device_node);
+ }
+
+ return 0;
+}
+
+static int __devinit mv_xor_probe(struct platform_device *pdev)
+{
+ int ret = 0;
+ int irq;
+ struct mv_xor_device *adev;
+ struct mv_xor_chan *mv_chan;
+ struct dma_device *dma_dev;
+ struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
+
+
+ adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+
+ dma_dev = &adev->common;
+
+ /* allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly flush the writes
+ */
+ adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+ plat_data->pool_size,
+ &adev->dma_desc_pool,
+ GFP_KERNEL);
+ if (!adev->dma_desc_pool_virt)
+ return -ENOMEM;
+
+ adev->id = plat_data->hw_id;
+
+ /* discover transaction capabilites from the platform data */
+ dma_dev->cap_mask = plat_data->cap_mask;
+ adev->pdev = pdev;
+ platform_set_drvdata(pdev, adev);
+
+ adev->shared = platform_get_drvdata(plat_data->shared);
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* set base routines */
+ dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
+ dma_dev->device_is_tx_complete = mv_xor_is_complete;
+ dma_dev->device_issue_pending = mv_xor_issue_pending;
+ dma_dev->dev = &pdev->dev;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
+ if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset;
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->max_xor = 8; ;
+ dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
+ }
+
+ mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
+ if (!mv_chan) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ mv_chan->device = adev;
+ mv_chan->idx = plat_data->hw_id;
+ mv_chan->mmr_base = adev->shared->xor_base;
+
+ if (!mv_chan->mmr_base) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
+ mv_chan);
+
+ /* clear errors before enabling interrupts */
+ mv_xor_device_clear_err_status(mv_chan);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto err_free_dma;
+ }
+ ret = devm_request_irq(&pdev->dev, irq,
+ mv_xor_interrupt_handler,
+ 0, dev_name(&pdev->dev), mv_chan);
+ if (ret)
+ goto err_free_dma;
+
+ mv_chan_unmask_interrupts(mv_chan);
+
+ mv_set_mode(mv_chan, DMA_MEMCPY);
+
+ spin_lock_init(&mv_chan->lock);
+ INIT_LIST_HEAD(&mv_chan->chain);
+ INIT_LIST_HEAD(&mv_chan->completed_slots);
+ INIT_LIST_HEAD(&mv_chan->all_slots);
+ INIT_RCU_HEAD(&mv_chan->common.rcu);
+ mv_chan->common.device = dma_dev;
+
+ list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = mv_xor_memcpy_self_test(adev);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+ ret = mv_xor_xor_self_test(adev);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = mv_xor_memcpy_self_test(adev);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+ ret = mv_xor_xor_self_test(adev);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
+ "( %s%s%s%s)\n",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+ dma_async_device_register(dma_dev);
+ goto out;
+
+ err_free_dma:
+ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ out:
+ return ret;
+}
+
+static void
+mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
+ struct mbus_dram_target_info *dram)
+{
+ void __iomem *base = msp->xor_base;
+ u32 win_enable = 0;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ writel(0, base + WINDOW_BASE(i));
+ writel(0, base + WINDOW_SIZE(i));
+ if (i < 4)
+ writel(0, base + WINDOW_REMAP_HIGH(i));
+ }
+
+ for (i = 0; i < dram->num_cs; i++) {
+ struct mbus_dram_window *cs = dram->cs + i;
+
+ writel((cs->base & 0xffff0000) |
+ (cs->mbus_attr << 8) |
+ dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+ writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+ win_enable |= (1 << i);
+ win_enable |= 3 << (16 + (2 * i));
+ }
+
+ writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+ writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+}
+
+static struct platform_driver mv_xor_driver = {
+ .probe = mv_xor_probe,
+ .remove = mv_xor_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_NAME,
+ },
+};
+
+static int mv_xor_shared_probe(struct platform_device *pdev)
+{
+ struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data;
+ struct mv_xor_shared_private *msp;
+ struct resource *res;
+
+ dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n");
+
+ msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
+ if (!msp)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_base)
+ return -EBUSY;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_high_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_high_base)
+ return -EBUSY;
+
+ platform_set_drvdata(pdev, msp);
+
+ /*
+ * (Re-)program MBUS remapping windows if we are asked to.
+ */
+ if (msd != NULL && msd->dram != NULL)
+ mv_xor_conf_mbus_windows(msp, msd->dram);
+
+ return 0;
+}
+
+static int mv_xor_shared_remove(struct platform_device *pdev)
+{
+ return 0;
+}
+
+static struct platform_driver mv_xor_shared_driver = {
+ .probe = mv_xor_shared_probe,
+ .remove = mv_xor_shared_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_SHARED_NAME,
+ },
+};
+
+
+static int __init mv_xor_init(void)
+{
+ int rc;
+
+ rc = platform_driver_register(&mv_xor_shared_driver);
+ if (!rc) {
+ rc = platform_driver_register(&mv_xor_driver);
+ if (rc)
+ platform_driver_unregister(&mv_xor_shared_driver);
+ }
+ return rc;
+}
+module_init(mv_xor_init);
+
+/* it's currently unsafe to unload this module */
+#if 0
+static void __exit mv_xor_exit(void)
+{
+ platform_driver_unregister(&mv_xor_driver);
+ platform_driver_unregister(&mv_xor_shared_driver);
+ return;
+}
+
+module_exit(mv_xor_exit);
+#endif
+
+MODULE_AUTHOR("Saeed Bishara <[email protected]>");
+MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
+MODULE_LICENSE("GPL");
Index: linux-2.6.26-rc8/drivers/dma/mv_xor.h
===================================================================
--- /dev/null
+++ linux-2.6.26-rc8/drivers/dma/mv_xor.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MV_XOR_H
+#define MV_XOR_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#define USE_TIMER
+#define MV_XOR_SLOT_SIZE 64
+#define MV_XOR_THRESHOLD 1
+
+#define XOR_OPERATION_MODE_XOR 0
+#define XOR_OPERATION_MODE_MEMCPY 2
+#define XOR_OPERATION_MODE_MEMSET 4
+
+#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0)
+#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4)
+
+#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4))
+#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4))
+#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30)
+#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40)
+#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50)
+#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60)
+#define XOR_INTR_MASK_VALUE 0x3F5
+
+#define WINDOW_BASE(w) (0x250 + ((w) << 2))
+#define WINDOW_SIZE(w) (0x270 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2))
+
+struct mv_xor_shared_private {
+ void __iomem *xor_base;
+ void __iomem *xor_high_base;
+};
+
+
+/**
+ * struct mv_xor_device - internal representation of a XOR device
+ * @pdev: Platform device
+ * @id: HW XOR Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct mv_xor_device {
+ struct platform_device *pdev;
+ int id;
+ dma_addr_t dma_desc_pool;
+ void *dma_desc_pool_virt;
+ struct dma_device common;
+ struct mv_xor_shared_private *shared;
+};
+
+/**
+ * struct mv_xor_chan - internal representation of a XOR channel
+ * @pending: allows batching of hardware operations
+ * @completed_cookie: identifier for the most recently completed operation
+ * @lock: serializes enqueue/dequeue operations to the descriptors pool
+ * @mmr_base: memory mapped register base
+ * @idx: the index of the xor channel
+ * @chain: device chain view of the descriptors
+ * @completed_slots: slots completed by HW but still need to be acked
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @last_used: place holder for allocation to continue from where it left off
+ * @all_slots: complete domain of slots usable by the channel
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ */
+struct mv_xor_chan {
+ int pending;
+ dma_cookie_t completed_cookie;
+ spinlock_t lock; /* protects the descriptor slot pool */
+ void __iomem *mmr_base;
+ unsigned int idx;
+ enum dma_transaction_type current_type;
+ struct list_head chain;
+ struct list_head completed_slots;
+ struct mv_xor_device *device;
+ struct dma_chan common;
+ struct mv_xor_desc_slot *last_used;
+ struct list_head all_slots;
+ int slots_allocated;
+ struct tasklet_struct irq_tasklet;
+#ifdef USE_TIMER
+ unsigned long cleanup_time;
+ u32 current_on_last_cleanup;
+ dma_cookie_t is_complete_cookie;
+#endif
+};
+
+/**
+ * struct mv_xor_desc_slot - software descriptor
+ * @slot_node: node on the mv_xor_chan.all_slots list
+ * @chain_node: node on the mv_xor_chan.chain list
+ * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @slots_per_op: number of slots per operation
+ * @idx: pool index
+ * @unmap_src_cnt: number of xor sources
+ * @unmap_len: transaction bytecount
+ * @async_tx: support for the async_tx api
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ * for example transfer lengths larger than the supported hw max
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct mv_xor_desc_slot {
+ struct list_head slot_node;
+ struct list_head chain_node;
+ struct list_head completed_node;
+ enum dma_transaction_type type;
+ void *hw_desc;
+ struct mv_xor_desc_slot *group_head;
+ u16 slot_cnt;
+ u16 slots_per_op;
+ u16 idx;
+ u16 unmap_src_cnt;
+ u32 value;
+ size_t unmap_len;
+ struct dma_async_tx_descriptor async_tx;
+ union {
+ u32 *xor_check_result;
+ u32 *crc32_result;
+ };
+#ifdef USE_TIMER
+ unsigned long arrival_time;
+ struct timer_list timeout;
+#endif
+};
+
+/* This structure describes XOR descriptor size 64bytes */
+struct mv_xor_desc {
+ u32 status; /* descriptor execution status */
+ u32 crc32_result; /* result of CRC-32 calculation */
+ u32 desc_command; /* type of operation to be carried out */
+ u32 phy_next_desc; /* next descriptor address pointer */
+ u32 byte_count; /* size of src/dst blocks in bytes */
+ u32 phy_dest_addr; /* destination block address */
+ u32 phy_src_addr[8]; /* source block addresses */
+ u32 reserved0;
+ u32 reserved1;
+};
+
+#define to_mv_sw_desc(addr_hw_desc) \
+ container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
+
+#define mv_hw_desc_slot_idx(hw_desc, idx) \
+ ((void *)(((unsigned long)hw_desc) + ((idx) << 5)))
+
+#define MV_XOR_MIN_BYTE_COUNT (128)
+#define XOR_MAX_BYTE_COUNT ((16 * 1024 * 1024) - 1)
+#define MV_XOR_MAX_BYTE_COUNT XOR_MAX_BYTE_COUNT
+
+
+#endif
Index: linux-2.6.26-rc8/include/asm-arm/plat-orion/mv_xor.h
===================================================================
--- /dev/null
+++ linux-2.6.26-rc8/include/asm-arm/plat-orion/mv_xor.h
@@ -0,0 +1,28 @@
+/*
+ * Marvell XOR platform device data definition file.
+ */
+
+#ifndef __ASM_PLAT_ORION_MV_XOR_H
+#define __ASM_PLAT_ORION_MV_XOR_H
+
+#include <linux/dmaengine.h>
+#include <linux/mbus.h>
+
+#define MV_XOR_SHARED_NAME "mv_xor_shared"
+#define MV_XOR_NAME "mv_xor"
+
+struct mbus_dram_target_info;
+
+struct mv_xor_platform_shared_data {
+ struct mbus_dram_target_info *dram;
+};
+
+struct mv_xor_platform_data {
+ struct platform_device *shared;
+ int hw_id;
+ dma_cap_mask_t cap_mask;
+ size_t pool_size;
+};
+
+
+#endif

2008-06-30 14:10:19

by Sosnowski, Maciej

[permalink] [raw]
Subject: RE: [PATCH/RFC] DMA engine driver for Marvell XOR engine

Nicolas Pitre wrote:
> Here is a driver for the DMA/XOR engine found on many ARM SoCs from
> Marvell, such as Orion and newly supported Kirkwood. Could interested
> people (notably Maciej Sosnowski and Dan Williams who are listed as
> maintainers for the DMA generic offload subsystem) review this patch,
> and ultimately provide their ACK, so we could push this driver along
> with additional ARM patches that depend on this one through the ARM
git
> tree.
>
> Thanks

Sorry I could not do the review last week.
Below are some minor comments/questions from my side.
Apart from that the code looks ok.

Acked-by: Maciej Sosnowski <[email protected]>

>
> -----
> From: Saeed Bishara <[email protected]>
>
> The Marvell XOR engine found in Marvell's SoCs and system controllers
> provides xor and DMA operation, iSCSI CRC32C calculation, memory
> initialization, and memory ECC errors cleanup operation support.
>
> This driver implements the DMA engine API and support the following
> capabilities:
> memcpy
> xor
> memset
>
> The XOR engine can be used by DMA engine clients implemented in the
> kernel, one of those clients is the RAID module. in that case, I
> observed 20% improvement in the raid5 writes throughput, and 40%
> decrease in the CPU utilization when doing array construction, those
> results performed on 5182 running at 500Mhz.
>
> when enabling the NET DMA client, the performance decreased, so
> meanwhile it is recommended to keep this client off.
>
> Signed-off-by: Saeed Bishara <[email protected]>
> ---
> drivers/dma/Kconfig | 11 +-
> drivers/dma/Makefile | 1 +
> drivers/dma/mv_xor.c | 1446
> +++++++++++++++++++++++++++++++++++ drivers/dma/mv_xor.h
|
> 187 +++++ include/asm-arm/plat-orion/mv_xor.h | 24 +
> 5 files changed, 1668 insertions(+), 1 deletions(-)
> create mode 100644 drivers/dma/mv_xor.c
> create mode 100644 drivers/dma/mv_xor.h
> create mode 100644 include/asm-arm/plat-orion/mv_xor.h
>
> diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
> index 6239c3d..8665e39 100644
> --- a/drivers/dma/Kconfig
> +++ b/drivers/dma/Kconfig
> @@ -4,7 +4,7 @@
>
> menuconfig DMADEVICES
> bool "DMA Engine support"
> - depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X ||
ARCH_IOP13XX || PPC
> + depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X ||
ARCH_IOP13XX ||
> PPC || PLAT_ORION depends on !HIGHMEM64G
> help
> DMA engines can do asynchronous data transfers without
> @@ -46,6 +46,15 @@ config FSL_DMA
> MPC8560/40, MPC8555, MPC8548 and MPC8641 processors.
> The MPC8349, MPC8360 is also supported.
>
> +config MV_XOR
> + bool "Marvell XOR engine support"
> + depends on PLAT_ORION
> + select ASYNC_CORE
> + select DMA_ENGINE
> + ---help---
> + Enable support for the Marvell XOR engine. This engine is
present
> + in some of Marvell's SoC such as the MV88F5182.
> +
> config DMA_ENGINE
> bool
>
> diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
> index c8036d9..ee272fd 100644
> --- a/drivers/dma/Makefile
> +++ b/drivers/dma/Makefile
> @@ -4,3 +4,4 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
> ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
> obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
> obj-$(CONFIG_FSL_DMA) += fsldma.o
> +obj-$(CONFIG_MV_XOR) += mv_xor.o
> diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
> new file mode 100644
> index 0000000..bc708b7
> --- /dev/null
> +++ b/drivers/dma/mv_xor.c
> @@ -0,0 +1,1446 @@
> +/*
> + * offload engine driver for the XOR engines of the Marvell Feroceon
> processors + * Copyright ?? 2008, Marvell International Ltd.
> + *
> + * This program is free software; you can redistribute it and/or
modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but
WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
along
> with + * this program; if not, write to the Free Software Foundation,
Inc.,
> + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + *
> + */
> +
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/async_tx.h>
> +#include <linux/delay.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/spinlock.h>
> +#include <linux/interrupt.h>
> +#include <linux/platform_device.h>
> +#include <linux/memory.h>
> +#include <asm/plat-orion/mv_xor.h>
> +#include "mv_xor.h"
> +
> +static void mv_xor_check_threshold(struct mv_xor_chan *mv_chan);
> +
> +#define to_mv_xor_chan(chan) container_of(chan, struct mv_xor_chan,
common)
> +#define to_mv_xor_device(dev) \
> + container_of(dev, struct mv_xor_device, common)
> +#define tx_to_mv_xor_slot(tx) \
> + container_of(tx, struct mv_xor_desc_slot, async_tx)
> +
> +static inline void
> +mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> +
> + hw_desc->status = (1 << 31);
> + hw_desc->phy_next_desc = 0;
> + if (flags & DMA_PREP_INTERRUPT)
> + hw_desc->desc_command = (1 << 31);
> + else
> + hw_desc->desc_command = 0;
> +
> + hw_desc->desc_command = (1 << 31);
> +}

It looks like in mv_desc_init() either the last line or if-else section
needs to be removed.

> +
> +static inline u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot
*desc,
> + struct mv_xor_chan *chan)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + return hw_desc->phy_dest_addr;
> +}
> +
> +static inline u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
> + struct mv_xor_chan *chan,
> + int src_idx)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + return hw_desc->phy_src_addr[src_idx];
> +}
> +
> +
> +static inline void mv_desc_set_byte_count(struct mv_xor_desc_slot
*desc,
> + struct mv_xor_chan *chan,
> + u32 byte_count)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + hw_desc->byte_count = byte_count;
> +}

Parameter 'chan' is not used in mv_desc_get_dest_addr(),
mv_desc_get_src_addr()
and mv_desc_set_byte_count()

> +
> +static inline void mv_desc_set_next_desc(struct mv_xor_desc_slot
*desc,
> + u32 next_desc_addr)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + BUG_ON(hw_desc->phy_next_desc);
> + hw_desc->phy_next_desc = next_desc_addr;
> +}
> +static inline u32 mv_desc_get_next_desc(struct mv_xor_desc_slot
*desc)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + return hw_desc->phy_next_desc;
> +}
> +
> +static inline void mv_desc_clear_next_desc(struct mv_xor_desc_slot
*desc)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + hw_desc->phy_next_desc = 0;
> +}
> +
> +static inline void mv_desc_set_block_fill_val(struct mv_xor_desc_slot
*desc,
> + u32 val)
> +{
> + desc->value = val;
> +}
> +
> +static inline void mv_desc_set_dest_addr(struct mv_xor_desc_slot
*desc,
> + struct mv_xor_chan *chan,
> + dma_addr_t addr)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + hw_desc->phy_dest_addr = addr;
> +}

Again, 'chan' is not used here.

> +
> +static inline int mv_chan_memset_slot_count(size_t len)
> +{
> + return 1;
> +}
> +
> +#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
> +
> +static inline void mv_desc_set_src_addr(struct mv_xor_desc_slot
*desc,
> + int index,
> + dma_addr_t addr)
> +{
> + struct mv_xor_desc *hw_desc = desc->hw_desc;
> + hw_desc->phy_src_addr[index] = addr;
> + if (desc->type == DMA_XOR)
> + hw_desc->desc_command |= (1 << index);
> +}
> +
> +static inline u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
> +{
> + return __raw_readl(XOR_CURR_DESC(chan));
> +}
> +
> +static inline void mv_chan_set_next_descriptor(struct mv_xor_chan
*chan,
> + u32 next_desc_addr)
> +{
> + __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
> +}
> +static inline void mv_chan_set_dest_pointer(struct mv_xor_chan *chan,
> + u32 desc_addr)
> +{
> + __raw_writel(desc_addr, XOR_DEST_POINTER(chan));
> +}
> +
> +static inline void mv_chan_set_block_size(struct mv_xor_chan *chan,
> + u32 block_size)
> +{
> + __raw_writel(block_size, XOR_BLOCK_SIZE(chan));
> +}
> +
> +static inline void mv_chan_set_value(struct mv_xor_chan *chan,
> + u32 value)
> +{
> + __raw_writel(value, XOR_INIT_VALUE_LOW(chan));
> + __raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
> +}
> +
> +static inline void mv_chan_unmask_interrupts(struct mv_xor_chan
*chan)
> +{
> + u32 val = __raw_readl(XOR_INTR_MASK(chan));
> + val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
> + __raw_writel(val, XOR_INTR_MASK(chan));
> +}
> +static inline u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
> +{
> + u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
> + intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
> + return intr_cause;
> +}
> +
> +static inline int mv_is_err_intr(u32 intr_cause)
> +{
> + if (intr_cause &
> + ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
> + return 1;
> +
> + return 0;
> +}
> +
> +static inline void mv_xor_device_clear_eoc_cause(struct mv_xor_chan
*chan)
> +{
> + u32 val = (1 << (1 + (chan->idx * 16)));
> + dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__,
val);
> + __raw_writel(val, XOR_INTR_CAUSE(chan));
> +}
> +
> +static inline void mv_xor_device_clear_err_status(struct mv_xor_chan
*chan)
> +{
> + u32 val = 0xFFFF0000 >> (chan->idx * 16);
> + __raw_writel(val, XOR_INTR_CAUSE(chan));
> +}
> +
> +static inline int mv_can_chain(struct mv_xor_chan *chan,
> + struct mv_xor_desc_slot *desc)
> +{
> + struct mv_xor_desc_slot *chain_old_tail = list_entry(
> + desc->chain_node.prev, struct mv_xor_desc_slot,
chain_node);
> +
> + if (chain_old_tail->type != desc->type)
> + return 0;
> + if (desc->type == DMA_MEMSET)
> + return 0;
> + return 1;
> +}

And again 'chan' parameter is not needed.

> +
> +static inline void mv_set_mode(struct mv_xor_chan *chan,
> + enum dma_transaction_type type)
> +{
> + u32 op_mode;
> + u32 config = __raw_readl(XOR_CONFIG(chan));
> +
> + switch (type) {
> + case DMA_XOR:
> + op_mode = XOR_OPERATION_MODE_XOR;
> + break;
> + case DMA_MEMCPY:
> + op_mode = XOR_OPERATION_MODE_MEMCPY;
> + break;
> + case DMA_MEMSET:
> + op_mode = XOR_OPERATION_MODE_MEMSET;
> + break;
> + default:
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "error: unsupported operation %d.\n",
> + type);
> + BUG();
> + return;
> + }
> +
> + config &= ~0x7;
> + config |= op_mode;
> + __raw_writel(config, XOR_CONFIG(chan));
> + chan->current_type = type;
> +}
> +
> +static inline void mv_chan_activate(struct mv_xor_chan *chan)
> +{
> + u32 activation;
> +
> + dev_dbg(chan->device->common.dev, " activate chan.\n");
> + activation = __raw_readl(XOR_ACTIVATION(chan));
> + activation |= 0x1;
> + __raw_writel(activation, XOR_ACTIVATION(chan));
> +}
> +
> +static inline void mv_chan_disable(struct mv_xor_chan *chan)
> +{
> + u32 activation;
> +
> + activation = __raw_readl(XOR_ACTIVATION(chan));
> + activation |= (1 << 1);
> + __raw_writel(activation, XOR_ACTIVATION(chan));
> +}
> +
> +static inline void mv_chan_pause(struct mv_xor_chan *chan)
> +{
> + u32 activation;
> + __raw_writel(1 << 2, XOR_ACTIVATION(chan));
> + activation = __raw_readl(XOR_ACTIVATION(chan));
> +}
> +
> +static inline void mv_chan_restart(struct mv_xor_chan *chan)
> +{
> + __raw_writel(1 << 3, XOR_ACTIVATION(chan));
> +}
> +
> +static inline int mv_desc_get_zero_result(struct mv_xor_desc_slot
*desc)
> +{
> + BUG();
> + return 0;
> +}
> +
> +static inline void mv_chan_idle(int busy, struct mv_xor_chan *chan)
> +{
> + do { } while (0);
> +}
> +
> +static inline char mv_chan_is_busy(struct mv_xor_chan *chan)
> +{
> + u32 state = __raw_readl(XOR_ACTIVATION(chan));
> +
> + state = (state >> 4) & 0x3;
> +
> + return (state == 1)? 1 : 0;
> +}
> +
> +static inline int
> +mv_chan_get_desc_align(struct mv_xor_chan *chan, int num_slots)
> +{
> + return 1;
> +}
> +
> +static inline int
> +mv_chan_xor_slot_count(size_t len, int src_cnt)
> +{
> + return 1;
> +}
> +
> +/**
> + * mv_xor_free_slots - flags descriptor slots for reuse
> + * @slot: Slot to free
> + * Caller must hold &mv_chan->lock while calling this function
> + */
> +static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
> + struct mv_xor_desc_slot *slot)
> +{
> + dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
> + __func__, __LINE__, slot);
> +
> + slot->slots_per_op = 0;
> +
> +}
> +
> +/*
> + * mv_xor_start_new_chain - program the engine to operate on new
chain
> headed by + * sw_desc
> + * Caller must hold &mv_chan->lock while calling this function
> + */
> +static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
> + struct mv_xor_desc_slot *sw_desc)
> +{
> + dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
> + __func__, __LINE__, sw_desc);
> + if (sw_desc->type != mv_chan->current_type)
> + mv_set_mode(mv_chan, sw_desc->type);
> +
> + if (sw_desc->type == DMA_MEMSET) {
> + /* for memset requests we need to program the engine, no
> + * descriptors used.
> + */
> + struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
> + mv_chan_set_dest_pointer(mv_chan,
hw_desc->phy_dest_addr);
> + mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
> + mv_chan_set_value(mv_chan, sw_desc->value);
> + } else {
> + /* set the hardware chain */
> + mv_chan_set_next_descriptor(mv_chan,
sw_desc->async_tx.phys);
> + }
> + mv_chan->pending += sw_desc->slot_cnt;
> + mv_xor_check_threshold(mv_chan);
> +}
> +
> +static dma_cookie_t
> +mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
> + struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
> +{
> + BUG_ON(desc->async_tx.cookie < 0);
> +
> + if (desc->async_tx.cookie > 0) {
> + cookie = desc->async_tx.cookie;
> +
> + /* call the callback (must not sleep or submit new
> + * operations to this channel)
> + */
> + if (desc->async_tx.callback)
> + desc->async_tx.callback(
> + desc->async_tx.callback_param);
> +
> + /* unmap dma addresses
> + * (unmap_single vs unmap_page?)
> + */
> + if (desc->group_head && desc->unmap_len) {
> + struct mv_xor_desc_slot *unmap =
desc->group_head;
> + struct device *dev =
> + &mv_chan->device->pdev->dev;
> + u32 len = unmap->unmap_len;
> + u32 src_cnt = unmap->unmap_src_cnt;
> + dma_addr_t addr = mv_desc_get_dest_addr(unmap,
mv_chan);
> +
> + dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
> + while (src_cnt--) {
> + addr = mv_desc_get_src_addr(unmap,
mv_chan,
> + src_cnt);
> + dma_unmap_page(dev, addr, len,
DMA_TO_DEVICE);
> + }
> + desc->group_head = NULL;
> + }
> + }
> +
> + /* run dependent operations */
> + async_tx_run_dependencies(&desc->async_tx);
> +
> + return cookie;
> +}
> +
> +static int
> +mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
> +{
> + struct mv_xor_desc_slot *iter, *_iter;
> +
> + dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__,
__LINE__);
> + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
> + completed_node) {
> +
> + if (async_tx_test_ack(&iter->async_tx)) {
> + list_del(&iter->completed_node);
> + mv_xor_free_slots(mv_chan, iter);
> + }
> + }
> + return 0;
> +}
> +
> +static int
> +mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
> + struct mv_xor_chan *mv_chan)
> +{
> + dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags
%d\n",
> + __func__, __LINE__, desc, desc->async_tx.flags);
> + list_del(&desc->chain_node);
> + /* the client is allowed to attach dependent operations
> + * until 'ack' is set
> + */
> + if (!async_tx_test_ack(&desc->async_tx)) {
> + /* move this slot to the completed_slots */
> + list_add_tail(&desc->completed_node,
&mv_chan->completed_slots);
> + return 0;
> + }
> +
> + mv_xor_free_slots(mv_chan, desc);
> + return 0;
> +}
> +
> +static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
> +{
> + struct mv_xor_desc_slot *iter, *_iter;
> + dma_cookie_t cookie = 0;
> + int busy = mv_chan_is_busy(mv_chan);
> + u32 current_desc = mv_chan_get_current_desc(mv_chan);
> + int seen_current = 0;
> +
> + dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__,
__LINE__);
> + dev_dbg(mv_chan->device->common.dev, "current_desc %x\n",
current_desc);
> + mv_xor_clean_completed_slots(mv_chan);
> +
> + /* free completed slots from the chain starting with
> + * the oldest descriptor
> + */
> +
> + list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
> + chain_node) {
> + prefetch(_iter);
> + prefetch(&_iter->async_tx);
> +
> + /* do not advance past the current descriptor loaded
into the
> + * hardware channel, subsequent descriptors are either
in
> + * process or have not been submitted
> + */
> + if (seen_current)
> + break;
> +
> + /* stop the search if we reach the current descriptor
and the
> + * channel is busy
> + */
> + if (iter->async_tx.phys == current_desc) {
> + seen_current = 1;
> + if (busy)
> + break;
> + }
> +
> + cookie = mv_xor_run_tx_complete_actions(iter, mv_chan,
cookie);
> +
> + if (mv_xor_clean_slot(iter, mv_chan))
> + break;
> + }
> +
> + if ((busy == 0) && !list_empty(&mv_chan->chain)) {
> + struct mv_xor_desc_slot *chain_head;
> + chain_head = list_entry(mv_chan->chain.next,
> + struct mv_xor_desc_slot,
> + chain_node);
> +
> + mv_xor_start_new_chain(mv_chan, chain_head);
> + }
> +
> + if (cookie > 0)
> + mv_chan->completed_cookie = cookie;
> +}
> +
> +static void
> +mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
> +{
> + spin_lock_bh(&mv_chan->lock);
> + __mv_xor_slot_cleanup(mv_chan);
> + spin_unlock_bh(&mv_chan->lock);
> +}
> +
> +static void mv_xor_tasklet(unsigned long data)
> +{
> + struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
> + __mv_xor_slot_cleanup(chan);
> +}
> +
> +static struct mv_xor_desc_slot *
> +mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
> + int slots_per_op)
> +{
> + struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
> + LIST_HEAD(chain);
> + int slots_found, retry = 0;
> +
> + /* start search from the last allocated descrtiptor
> + * if a contiguous allocation can not be found start searching
> + * from the beginning of the list
> + */
> +retry:
> + slots_found = 0;
> + if (retry == 0)
> + iter = mv_chan->last_used;
> + else
> + iter = list_entry(&mv_chan->all_slots,
> + struct mv_xor_desc_slot,
> + slot_node);
> +
> + list_for_each_entry_safe_continue(
> + iter, _iter, &mv_chan->all_slots, slot_node) {
> + prefetch(_iter);
> + prefetch(&_iter->async_tx);
> + if (iter->slots_per_op) {
> + /* give up after finding the first busy slot
> + * on the second pass through the list
> + */
> + if (retry)
> + break;
> +
> + slots_found = 0;
> + continue;
> + }
> +
> + /* start the allocation if the slot is correctly aligned
*/
> + if (!slots_found++)
> + alloc_start = iter;
> +
> + if (slots_found == num_slots) {
> + struct mv_xor_desc_slot *alloc_tail = NULL;
> + struct mv_xor_desc_slot *last_used = NULL;
> + iter = alloc_start;
> + while (num_slots) {
> + int i;
> +
> + /* pre-ack all but the last descriptor
*/
> + async_tx_ack(&iter->async_tx);
> +
> + list_add_tail(&iter->chain_node,
&chain);
> + alloc_tail = iter;
> + iter->async_tx.cookie = 0;
> + iter->slot_cnt = num_slots;
> + iter->xor_check_result = NULL;
> + for (i = 0; i < slots_per_op; i++) {
> + iter->slots_per_op =
slots_per_op - i;
> + last_used = iter;
> + iter =
list_entry(iter->slot_node.next,
> + struct mv_xor_desc_slot,
> + slot_node);
> + }
> + num_slots -= slots_per_op;
> + }
> + alloc_tail->group_head = alloc_start;
> + alloc_tail->async_tx.cookie = -EBUSY;
> + list_splice(&chain,
&alloc_tail->async_tx.tx_list);
> + mv_chan->last_used = last_used;
> + mv_desc_clear_next_desc(alloc_start);
> + mv_desc_clear_next_desc(alloc_tail);
> + return alloc_tail;
> + }
> + }
> + if (!retry++)
> + goto retry;
> +
> + /* try to free some slots if the allocation fails */
> + tasklet_schedule(&mv_chan->irq_tasklet);
> +
> + return NULL;
> +}
> +
> +static dma_cookie_t
> +mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
> + struct mv_xor_desc_slot *desc)
> +{
> + dma_cookie_t cookie = mv_chan->common.cookie;
> +
> + if (++cookie < 0)
> + cookie = 1;
> + mv_chan->common.cookie = desc->async_tx.cookie = cookie;
> + return cookie;
> +}
> +
> +static void mv_xor_check_threshold(struct mv_xor_chan *mv_chan)
> +{
> + if (mv_chan->pending >= MV_XOR_THRESHOLD) {
> + mv_chan->pending = 0;
> + mv_chan_activate(mv_chan);
> + }
> +}

Is it needed to use both mv_xor_check_threshold() and
mv_xor_issue_pending()
in the driver?
What about replacing mv_xor_check_threshold() with
mv_xor_issue_pending()
and using the threshold in mv_xor_issue_pending()
(especially that MV_XOR_THRESHOLD is set to 1)?

> +
> +/************************ DMA engine API functions
> ****************************/ +static dma_cookie_t
> +mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
> +{
> + struct mv_xor_desc_slot *sw_desc = tx_to_mv_xor_slot(tx);
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
> + struct mv_xor_desc_slot *grp_start, *old_chain_tail;
> + dma_cookie_t cookie;
> + int new_hw_chain = 1;
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "%s sw_desc %p: async_tx %p\n",
> + __func__, sw_desc, &sw_desc->async_tx);
> +
> + grp_start = sw_desc->group_head;
> +
> + spin_lock_bh(&mv_chan->lock);
> + cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
> +
> + if (list_empty(&mv_chan->chain))
> + list_splice_init(&sw_desc->async_tx.tx_list,
&mv_chan->chain);
> + else{
> + new_hw_chain = 0;
> +
> + old_chain_tail = list_entry(mv_chan->chain.prev,
> + struct mv_xor_desc_slot,
> + chain_node);
> + list_splice_init(&grp_start->async_tx.tx_list,
> + &old_chain_tail->chain_node);
> +
> + if (!mv_can_chain(mv_chan, grp_start))
> + goto submit_done;
> +
> + dev_dbg(mv_chan->device->common.dev, "Append to last
desc %x\n",
> + old_chain_tail->async_tx.phys);
> +
> + /* fix up the hardware chain */
> + mv_desc_set_next_desc(old_chain_tail,
grp_start->async_tx.phys);
> +
> + /* if the channel is not busy */
> + if (!mv_chan_is_busy(mv_chan)) {
> + u32 current_desc =
mv_chan_get_current_desc(mv_chan);
> + /*
> + * and the curren desc is the end of the chain
before
> + * the append, then we need to start the channel
> + */
> + if (current_desc ==
old_chain_tail->async_tx.phys)
> + new_hw_chain = 1;
> + }
> + }
> + if (new_hw_chain)
> + mv_xor_start_new_chain(mv_chan, grp_start);
> +
> +submit_done:
> + spin_unlock_bh(&mv_chan->lock);
> +
> + return cookie;
> +}
> +
> +/* returns the number of allocated descriptors */
> +static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
> +{
> + char *hw_desc;
> + int idx;
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + struct mv_xor_desc_slot *slot = NULL;
> + struct mv_xor_platform_data *plat_data =
> + mv_chan->device->pdev->dev.platform_data;
> + int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
> +
> + /* Allocate descriptor slots */
> + do {
> + idx = mv_chan->slots_allocated;
> + if (idx == num_descs_in_pool)
> + break;

This break condition is actually redundant to the do-while loop
condition.
What about replacing do-while with simpler while loop?

> +
> + slot = kzalloc(sizeof(*slot), GFP_KERNEL);
> + if (!slot) {
> + printk(KERN_INFO "MV XOR Channel only
initialized"
> + " %d descriptor slots", idx);
> + break;
> + }
> + hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
> + slot->hw_desc = (void *) &hw_desc[idx *
MV_XOR_SLOT_SIZE];
> +
> + dma_async_tx_descriptor_init(&slot->async_tx, chan);
> + slot->async_tx.tx_submit = mv_xor_tx_submit;
> + INIT_LIST_HEAD(&slot->chain_node);
> + INIT_LIST_HEAD(&slot->slot_node);
> + INIT_LIST_HEAD(&slot->async_tx.tx_list);
> + hw_desc = (char *) mv_chan->device->dma_desc_pool;
> + slot->async_tx.phys =
> + (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
> + slot->idx = idx;
> +
> + spin_lock_bh(&mv_chan->lock);
> + mv_chan->slots_allocated++;
> + list_add_tail(&slot->slot_node, &mv_chan->all_slots);
> + spin_unlock_bh(&mv_chan->lock);
> + } while (mv_chan->slots_allocated < num_descs_in_pool);
> +
> + if (idx && !mv_chan->last_used)
> + mv_chan->last_used = list_entry(mv_chan->all_slots.next,
> + struct mv_xor_desc_slot,
> + slot_node);
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "allocated %d descriptor slots last_used: %p\n",
> + mv_chan->slots_allocated, mv_chan->last_used);
> +
> + return (idx > 0) ? idx : -ENOMEM;
> +}
> +
> +static struct dma_async_tx_descriptor *
> +mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
dma_addr_t
> src, + size_t len, unsigned long flags)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + struct mv_xor_desc_slot *sw_desc, *grp_start;
> + int slot_cnt;
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "%s dest: %x src %x len: %u flags: %ld\n",
> + __func__, dest, src, len, flags);
> + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
> + return NULL;
> +
> + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
> +
> + spin_lock_bh(&mv_chan->lock);
> + slot_cnt = mv_chan_memcpy_slot_count(len);
> + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
> + if (sw_desc) {
> + sw_desc->type = DMA_MEMCPY;
> + sw_desc->async_tx.flags = flags;
> + grp_start = sw_desc->group_head;
> + mv_desc_init(grp_start, flags);
> + mv_desc_set_byte_count(grp_start, mv_chan, len);
> + mv_desc_set_dest_addr(sw_desc->group_head, mv_chan,
dest);
> + mv_desc_set_src_addr(grp_start, 0, src);
> + sw_desc->unmap_src_cnt = 1;
> + sw_desc->unmap_len = len;
> + }
> + spin_unlock_bh(&mv_chan->lock);
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "%s sw_desc %p async_tx %p\n",
> + __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
> +
> + return sw_desc ? &sw_desc->async_tx : NULL;
> +}
> +
> +static struct dma_async_tx_descriptor *
> +mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int
value,
> + size_t len, unsigned long flags)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + struct mv_xor_desc_slot *sw_desc, *grp_start;
> + int slot_cnt;
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "%s dest: %x len: %u flags: %ld\n",
> + __func__, dest, len, flags);
> + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
> + return NULL;
> +
> + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
> +
> + spin_lock_bh(&mv_chan->lock);
> + slot_cnt = mv_chan_memset_slot_count(len);
> + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
> + if (sw_desc) {
> + sw_desc->type = DMA_MEMSET;
> + sw_desc->async_tx.flags = flags;
> + grp_start = sw_desc->group_head;
> + mv_desc_init(grp_start, flags);
> + mv_desc_set_byte_count(grp_start, mv_chan, len);
> + mv_desc_set_dest_addr(sw_desc->group_head, mv_chan,
dest);
> + mv_desc_set_block_fill_val(grp_start, value);
> + sw_desc->unmap_src_cnt = 1;
> + sw_desc->unmap_len = len;
> + }
> + spin_unlock_bh(&mv_chan->lock);
> + dev_dbg(mv_chan->device->common.dev,
> + "%s sw_desc %p async_tx %p \n",
> + __func__, sw_desc, &sw_desc->async_tx);
> + return sw_desc ? &sw_desc->async_tx : NULL;
> +}
> +
> +static struct dma_async_tx_descriptor *
> +mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest,
dma_addr_t *src,
> + unsigned int src_cnt, size_t len, unsigned long
flags)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + struct mv_xor_desc_slot *sw_desc, *grp_start;
> + int slot_cnt;
> +
> + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
> + return NULL;
> +
> + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
> +
> + dev_dbg(mv_chan->device->common.dev,
> + "%s src_cnt: %d len: dest %x %u flags: %ld\n",
> + __func__, src_cnt, len, dest, flags);
> +
> + spin_lock_bh(&mv_chan->lock);
> + slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
> + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
> + if (sw_desc) {
> + sw_desc->type = DMA_XOR;
> + sw_desc->async_tx.flags = flags;
> + grp_start = sw_desc->group_head;
> + mv_desc_init(grp_start, flags);
> + /* the byte count field is the same as in memcpy desc*/
> + mv_desc_set_byte_count(grp_start, mv_chan, len);
> + mv_desc_set_dest_addr(sw_desc->group_head, mv_chan,
dest);
> + sw_desc->unmap_src_cnt = src_cnt;
> + sw_desc->unmap_len = len;
> + while (src_cnt--)
> + mv_desc_set_src_addr(grp_start, src_cnt,
src[src_cnt]);
> + }
> + spin_unlock_bh(&mv_chan->lock);
> + dev_dbg(mv_chan->device->common.dev,
> + "%s sw_desc %p async_tx %p \n",
> + __func__, sw_desc, &sw_desc->async_tx);
> + return sw_desc ? &sw_desc->async_tx : NULL;
> +}
> +
> +static void mv_xor_free_chan_resources(struct dma_chan *chan)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + struct mv_xor_desc_slot *iter, *_iter;
> + int in_use_descs = 0;
> +
> + mv_xor_slot_cleanup(mv_chan);
> +
> + spin_lock_bh(&mv_chan->lock);
> + list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
> + chain_node) {
> + in_use_descs++;
> + list_del(&iter->chain_node);
> + }
> + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
> + completed_node) {
> + in_use_descs++;
> + list_del(&iter->completed_node);
> + }
> + list_for_each_entry_safe_reverse(
> + iter, _iter, &mv_chan->all_slots, slot_node) {
> + list_del(&iter->slot_node);
> + kfree(iter);
> + mv_chan->slots_allocated--;
> + }
> + mv_chan->last_used = NULL;
> +
> + dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
> + __func__, mv_chan->slots_allocated);
> + spin_unlock_bh(&mv_chan->lock);
> +
> + if (in_use_descs)
> + dev_err(mv_chan->device->common.dev,
> + "freeing %d in use descriptors!\n",
in_use_descs);
> +}
> +
> +/**
> + * mv_xor_is_complete - poll the status of an XOR transaction
> + * @chan: XOR channel handle
> + * @cookie: XOR transaction identifier
> + */
> +static enum dma_status mv_xor_is_complete(struct dma_chan *chan,
> + dma_cookie_t cookie,
> + dma_cookie_t *done,
> + dma_cookie_t *used)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> + dma_cookie_t last_used;
> + dma_cookie_t last_complete;
> + enum dma_status ret;
> +
> + last_used = chan->cookie;
> + last_complete = mv_chan->completed_cookie;
> + mv_chan->is_complete_cookie = cookie;
> + if (done)
> + *done = last_complete;
> + if (used)
> + *used = last_used;
> +
> + ret = dma_async_is_complete(cookie, last_complete, last_used);
> + if (ret == DMA_SUCCESS) {
> + mv_xor_clean_completed_slots(mv_chan);
> + return ret;
> + }
> + mv_xor_slot_cleanup(mv_chan);
> +
> + last_used = chan->cookie;
> + last_complete = mv_chan->completed_cookie;
> +
> + if (done)
> + *done = last_complete;
> + if (used)
> + *used = last_used;
> +
> + return dma_async_is_complete(cookie, last_complete, last_used);
> +}
> +
> +static void mv_dump_xor_regs(struct mv_xor_chan *chan)
> +{
> + u32 val;
> +
> + val = __raw_readl(XOR_CONFIG(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "config 0x%08x.\n", val);
> +
> + val = __raw_readl(XOR_ACTIVATION(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "activation 0x%08x.\n", val);
> +
> + val = __raw_readl(XOR_INTR_CAUSE(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "intr cause 0x%08x.\n", val);
> +
> + val = __raw_readl(XOR_INTR_MASK(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "intr mask 0x%08x.\n", val);
> +
> + val = __raw_readl(XOR_ERROR_CAUSE(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "error cause 0x%08x.\n", val);
> +
> + val = __raw_readl(XOR_ERROR_ADDR(chan));
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "error addr 0x%08x.\n", val);
> +}
> +
> +static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
> + u32 intr_cause)
> +{
> + if (intr_cause & (1 << 4)) {
> + dev_dbg(chan->device->common.dev,
> + "ignore this error\n");
> + return;
> + }
> +
> + dev_printk(KERN_ERR, chan->device->common.dev,
> + "error on chan %d. intr cause 0x%08x.\n",
> + chan->idx, intr_cause);
> +
> + mv_dump_xor_regs(chan);
> + BUG();
> +}
> +
> +static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
> +{
> + struct mv_xor_chan *chan = data;
> + u32 intr_cause = mv_chan_get_intr_cause(chan);
> +
> + dev_dbg(chan->device->common.dev, "intr cause %x\n",
intr_cause);
> +
> + if (mv_is_err_intr(intr_cause))
> + mv_xor_err_interrupt_handler(chan, intr_cause);
> +
> + tasklet_schedule(&chan->irq_tasklet);
> +
> + mv_xor_device_clear_eoc_cause(chan);
> +
> + return IRQ_HANDLED;
> +}
> +
> +static void mv_xor_issue_pending(struct dma_chan *chan)
> +{
> + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
> +
> + if (mv_chan->pending) {
> + mv_chan->pending = 0;
> + mv_chan_activate(mv_chan);
> + }
> +}
> +
> +/*
> + * Perform a transaction to verify the HW works.
> + */
> +#define MV_XOR_TEST_SIZE 2000
> +
> +static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device
*device)
> +{
> + int i;
> + void *src, *dest;
> + dma_addr_t src_dma, dest_dma;
> + struct dma_chan *dma_chan;
> + dma_cookie_t cookie;
> + struct dma_async_tx_descriptor *tx;
> + int err = 0;
> + struct mv_xor_chan *mv_chan;
> +
> + src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
> + if (!src)
> + return -ENOMEM;
> +
> + dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
> + if (!dest) {
> + kfree(src);
> + return -ENOMEM;
> + }
> +
> + /* Fill in src buffer */
> + for (i = 0; i < MV_XOR_TEST_SIZE; i++)
> + ((u8 *) src)[i] = (u8)i;
> +
> + /* Start copy, using first DMA channel */
> + dma_chan = container_of(device->common.channels.next,
> + struct dma_chan,
> + device_node);
> + if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
> + err = -ENODEV;
> + goto out;
> + }
> +
> + dest_dma = dma_map_single(dma_chan->device->dev, dest,
> + MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
> +
> + src_dma = dma_map_single(dma_chan->device->dev, src,
> + MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
> +
> + tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
> + MV_XOR_TEST_SIZE, 0);
> + cookie = mv_xor_tx_submit(tx);

It would be more generic solution in both _self_test() functions
to use dma_device API and async_tx API rather than
direct calls like mv_xor_alloc_chan_resources(),
mv_xor_prep_dma_memcpy(),
mv_xor_tx_submit(), mv_xor_issue_pending()
(i.e. replace mv_xor_alloc_chan_resources
with device->common.device_alloc_chan_resources, etc.)

> + mv_xor_issue_pending(dma_chan);
> + async_tx_ack(tx);
> + msleep(1);
> +
> + if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
> + DMA_SUCCESS) {
> + dev_printk(KERN_ERR, dma_chan->device->dev,
> + "Self-test copy timed out, disabling\n");
> + err = -ENODEV;
> + goto free_resources;
> + }
> +
> + mv_chan = to_mv_xor_chan(dma_chan);
> + dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
> + MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
> + if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
> + dev_printk(KERN_ERR, dma_chan->device->dev,
> + "Self-test copy failed compare,
disabling\n");
> + err = -ENODEV;
> + goto free_resources;
> + }
> +
> +free_resources:
> + mv_xor_free_chan_resources(dma_chan);
> +out:
> + kfree(src);
> + kfree(dest);
> + return err;
> +}
> +
> +#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
> +static int __devinit
> +mv_xor_xor_self_test(struct mv_xor_device *device)
> +{
> + int i, src_idx;
> + struct page *dest;
> + struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
> + dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
> + dma_addr_t dest_dma;
> + struct dma_async_tx_descriptor *tx;
> + struct dma_chan *dma_chan;
> + dma_cookie_t cookie;
> + u8 cmp_byte = 0;
> + u32 cmp_word;
> + int err = 0;
> + struct mv_xor_chan *mv_chan;
> +
> + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
> + xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
> + if (!xor_srcs[src_idx])
> + while (src_idx--) {
> + __free_page(xor_srcs[src_idx]);
> + return -ENOMEM;
> + }
> + }
> +
> + dest = alloc_page(GFP_KERNEL);
> + if (!dest)
> + while (src_idx--) {
> + __free_page(xor_srcs[src_idx]);
> + return -ENOMEM;
> + }
> +
> + /* Fill in src buffers */
> + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
> + u8 *ptr = page_address(xor_srcs[src_idx]);
> + for (i = 0; i < PAGE_SIZE; i++)
> + ptr[i] = (1 << src_idx);
> + }
> +
> + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
> + cmp_byte ^= (u8) (1 << src_idx);
> +
> + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
> + (cmp_byte << 8) | cmp_byte;
> +
> + memset(page_address(dest), 0, PAGE_SIZE);
> +
> + dma_chan = container_of(device->common.channels.next,
> + struct dma_chan,
> + device_node);
> + if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
> + err = -ENODEV;
> + goto out;
> + }
> +
> + /* test xor */
> + dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
PAGE_SIZE,
> + DMA_FROM_DEVICE);
> +
> + for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
> + dma_srcs[i] = dma_map_page(dma_chan->device->dev,
xor_srcs[i],
> + 0, PAGE_SIZE, DMA_TO_DEVICE);
> +
> + tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
> + MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
> +
> + cookie = mv_xor_tx_submit(tx);
> + mv_xor_issue_pending(dma_chan);
> + async_tx_ack(tx);
> + msleep(8);
> +
> + if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
> + DMA_SUCCESS) {
> + dev_printk(KERN_ERR, dma_chan->device->dev,
> + "Self-test xor timed out, disabling\n");
> + err = -ENODEV;
> + goto free_resources;
> + }
> +
> + mv_chan = to_mv_xor_chan(dma_chan);
> + dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
> + PAGE_SIZE, DMA_FROM_DEVICE);
> + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
> + u32 *ptr = page_address(dest);
> + if (ptr[i] != cmp_word) {
> + dev_printk(KERN_ERR, dma_chan->device->dev,
> + "Self-test xor failed compare,
disabling."
> + " index %d, data %x, expected %x\n",
i,
> + ptr[i], cmp_word);
> + err = -ENODEV;
> + goto free_resources;
> + }
> + }
> +
> +free_resources:
> + mv_xor_free_chan_resources(dma_chan);
> +out:
> + src_idx = MV_XOR_NUM_SRC_TEST;
> + while (src_idx--)
> + __free_page(xor_srcs[src_idx]);
> + __free_page(dest);
> + return err;
> +}
> +
> +static int __devexit mv_xor_remove(struct platform_device *dev)
> +{
> + struct mv_xor_device *device = platform_get_drvdata(dev);
> + struct dma_chan *chan, *_chan;
> + struct mv_xor_chan *mv_chan;
> + struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
> +
> + dma_async_device_unregister(&device->common);
> +
> + dma_free_coherent(&dev->dev, plat_data->pool_size,
> + device->dma_desc_pool_virt,
device->dma_desc_pool);
> +
> + list_for_each_entry_safe(chan, _chan, &device->common.channels,
> + device_node) {
> + mv_chan = to_mv_xor_chan(chan);
> + list_del(&chan->device_node);
> + }
> +
> + return 0;
> +}
> +
> +static int __devinit mv_xor_probe(struct platform_device *pdev)
> +{
> + int ret = 0;
> + int irq;
> + struct mv_xor_device *adev;
> + struct mv_xor_chan *mv_chan;
> + struct dma_device *dma_dev;
> + struct mv_xor_platform_data *plat_data =
pdev->dev.platform_data;
> +
> +
> + adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
> + if (!adev)
> + return -ENOMEM;
> +
> + dma_dev = &adev->common;
> +
> + /* allocate coherent memory for hardware descriptors
> + * note: writecombine gives slightly better performance, but
> + * requires that we explicitly flush the writes
> + */
> + adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
> +
plat_data->pool_size,
> +
&adev->dma_desc_pool,
> + GFP_KERNEL);
> + if (!adev->dma_desc_pool_virt)
> + return -ENOMEM;
> +
> + adev->id = plat_data->hw_id;
> +
> + /* discover transaction capabilites from the platform data */
> + dma_dev->cap_mask = plat_data->cap_mask;
> + adev->pdev = pdev;
> + platform_set_drvdata(pdev, adev);
> +
> + adev->shared = platform_get_drvdata(plat_data->shared);
> +
> + INIT_LIST_HEAD(&dma_dev->channels);
> +
> + /* set base routines */
> + dma_dev->device_alloc_chan_resources =
mv_xor_alloc_chan_resources;
> + dma_dev->device_free_chan_resources =
mv_xor_free_chan_resources;
> + dma_dev->device_is_tx_complete = mv_xor_is_complete;
> + dma_dev->device_issue_pending = mv_xor_issue_pending;
> + dma_dev->dev = &pdev->dev;
> +
> + /* set prep routines based on capability */
> + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
> + dma_dev->device_prep_dma_memcpy =
mv_xor_prep_dma_memcpy;
> + if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
> + dma_dev->device_prep_dma_memset =
mv_xor_prep_dma_memset;
> + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
> + dma_dev->max_xor = 8; ;
> + dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
> + }
> +
> + mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan),
GFP_KERNEL);
> + if (!mv_chan) {
> + ret = -ENOMEM;
> + goto err_free_dma;
> + }
> + mv_chan->device = adev;
> + mv_chan->idx = plat_data->hw_id;
> + mv_chan->mmr_base = adev->shared->xor_base;
> +
> + if (!mv_chan->mmr_base) {
> + ret = -ENOMEM;
> + goto err_free_dma;
> + }
> + tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned
long)
> + mv_chan);
> +
> + /* clear errors before enabling interrupts */
> + mv_xor_device_clear_err_status(mv_chan);
> +
> + irq = platform_get_irq(pdev, 0);
> + if (irq < 0) {
> + ret = irq;
> + goto err_free_dma;
> + }
> + ret = devm_request_irq(&pdev->dev, irq,
> + mv_xor_interrupt_handler,
> + 0, dev_name(&pdev->dev), mv_chan);
> + if (ret)
> + goto err_free_dma;
> +
> + mv_chan_unmask_interrupts(mv_chan);
> +
> + mv_set_mode(mv_chan, DMA_MEMCPY);
> +
> + spin_lock_init(&mv_chan->lock);
> + INIT_LIST_HEAD(&mv_chan->chain);
> + INIT_LIST_HEAD(&mv_chan->completed_slots);
> + INIT_LIST_HEAD(&mv_chan->all_slots);
> + INIT_RCU_HEAD(&mv_chan->common.rcu);
> + mv_chan->common.device = dma_dev;
> +
> + list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
> +
> + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
> + ret = mv_xor_memcpy_self_test(adev);
> + dev_dbg(&pdev->dev, "memcpy self test returned %d\n",
ret);
> + if (ret)
> + goto err_free_dma;
> + }
> +
> + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
> + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
> + ret = mv_xor_xor_self_test(adev);
> + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
> + if (ret)
> + goto err_free_dma;
> + }
> + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
> + ret = mv_xor_memcpy_self_test(adev);
> + dev_dbg(&pdev->dev, "memcpy self test returned %d\n",
ret);
> + if (ret)
> + goto err_free_dma;
> + }
> +
> + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
> + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
> + ret = mv_xor_xor_self_test(adev);
> + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
> + if (ret)
> + goto err_free_dma;
> + }

What is the reason for running exact the same memcpy/xor self_test
procedure two times?
It would be helpful if there was a comment on that in this place.

> +
> + dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
> + "( %s%s%s%s)\n",
> + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
> + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
> + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
> + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
> +
> + dma_async_device_register(dma_dev);
> + goto out;
> +
> + err_free_dma:
> + dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
> + adev->dma_desc_pool_virt, adev->dma_desc_pool);
> + out:
> + return ret;
> +}
> +
> +static void
> +mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
> + struct mbus_dram_target_info *dram)
> +{
> + void __iomem *base = msp->xor_base;
> + u32 win_enable = 0;
> + int i;
> +
> + for (i = 0; i < 8; i++) {
> + writel(0, base + WINDOW_BASE(i));
> + writel(0, base + WINDOW_SIZE(i));
> + if (i < 4)
> + writel(0, base + WINDOW_REMAP_HIGH(i));
> + }
> +
> + for (i = 0; i < dram->num_cs; i++) {
> + struct mbus_dram_window *cs = dram->cs + i;
> +
> + writel((cs->base & 0xffff0000) |
> + (cs->mbus_attr << 8) |
> + dram->mbus_dram_target_id, base +
WINDOW_BASE(i));
> + writel((cs->size - 1) & 0xffff0000, base +
WINDOW_SIZE(i));
> +
> + win_enable |= (1 << i);
> + win_enable |= 3 << (16 + (2 * i));
> + }
> +
> + writel(win_enable, base + WINDOW_BAR_ENABLE(0));
> + writel(win_enable, base + WINDOW_BAR_ENABLE(1));
> +}
> +
> +static struct platform_driver mv_xor_driver = {
> + .probe = mv_xor_probe,
> + .remove = mv_xor_remove,
> + .driver = {
> + .owner = THIS_MODULE,
> + .name = MV_XOR_NAME,
> + },
> +};
> +
> +static int mv_xor_shared_probe(struct platform_device *pdev)
> +{
> + struct mv_xor_platform_shared_data *msd =
pdev->dev.platform_data;
> + struct mv_xor_shared_private *msp;
> + struct resource *res;
> +
> + dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR
driver\n");
> +
> + msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
> + if (!msp)
> + return -ENOMEM;
> +
> + res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> + if (!res)
> + return -ENODEV;
> +
> + msp->xor_base = devm_ioremap(&pdev->dev, res->start,
> + res->end - res->start + 1);
> + if (!msp->xor_base)
> + return -EBUSY;
> +
> + res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> + if (!res)
> + return -ENODEV;
> +
> + msp->xor_base_high = devm_ioremap(&pdev->dev, res->start,
> + res->end - res->start + 1);
> + if (!msp->xor_base_high)
> + return -EBUSY;
> +
> + platform_set_drvdata(pdev, msp);
> +
> + /*
> + * (Re-)program MBUS remapping windows if we are asked to.
> + */
> + if (msd != NULL && msd->dram != NULL)
> + mv_xor_conf_mbus_windows(msp, msd->dram);
> +
> + return 0;
> +}
> +
> +static int mv_xor_shared_remove(struct platform_device *pdev)
> +{
> + return 0;
> +}
> +
> +static struct platform_driver mv_xor_shared_driver = {
> + .probe = mv_xor_shared_probe,
> + .remove = mv_xor_shared_remove,
> + .driver = {
> + .owner = THIS_MODULE,
> + .name = MV_XOR_SHARED_NAME,
> + },
> +};
> +
> +
> +static int __init mv_xor_init(void)
> +{
> + int rc;
> +
> + rc = platform_driver_register(&mv_xor_shared_driver);
> + if (!rc) {
> + rc = platform_driver_register(&mv_xor_driver);
> + if (rc)
> +
platform_driver_unregister(&mv_xor_shared_driver);
> + }
> + return rc;
> +}
> +
> +/* it's currently unsafe to unload this module */
> +#if 0
> +static void __exit mv_xor_exit(void)
> +{
> + platform_driver_unregister(&mv_xor_driver);
> + platform_driver_unregister(&mv_xor_shared_driver);
> + return;
> +}
> +
> +module_exit(mv_xor_exit);
> +#endif
> +module_init(mv_xor_init);
> +
> +MODULE_AUTHOR("Saeed Bishara <[email protected]>");
> +MODULE_DESCRIPTION("DMA Engine driver for Marvell's XOR");
> +MODULE_LICENSE("GPL");
> diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
> new file mode 100644
> index 0000000..7c6d624
> --- /dev/null
> +++ b/drivers/dma/mv_xor.h
> @@ -0,0 +1,187 @@
> +/*
> + * Copyright ?? 2007, Marvell International Ltd.
> + *
> + * This program is free software; you can redistribute it and/or
modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but
WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
along
> with + * this program; if not, write to the Free Software Foundation,
Inc.,
> + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + *
> + */
> +#ifndef MV_XOR_H
> +#define MV_XOR_H
> +#include <linux/types.h>
> +#include <linux/io.h>
> +#include <linux/dmaengine.h>
> +#include <linux/interrupt.h>
> +
> +
> +#define USE_TIMER
> +#define MV_XOR_SLOT_SIZE 64
> +#define MV_XOR_THRESHOLD 1
> +
> +#define XOR0_ID 0
> +#define XOR1_ID 1
> +#define XOR_OPERATION_MODE_XOR 0
> +#define XOR_OPERATION_MODE_MEMCPY 2
> +#define XOR_OPERATION_MODE_MEMSET 4
> +
> +#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx *
4))
> +#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx *
4))
> +#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx *
4))
> +#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx
* 4))
> +#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx *
4))
> +#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0)
> +#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4)
> +
> +#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx *
4))
> +#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx *
4))
> +#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30)
> +#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40)
> +#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50)
> +#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60)
> +#define XOR_INTR_MASK_VALUE 0x3F5
> +
> +#define WINDOW_BASE(w) (0x250 + ((w) << 2))
> +#define WINDOW_SIZE(w) (0x270 + ((w) << 2))
> +#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2))
> +#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2))
> +
> +struct mv_xor_shared_private {
> + void __iomem *xor_base;
> + void __iomem *xor_base_high;
> +};
> +
> +
> +/**
> + * struct mv_xor_device - internal representation of a XOR device
> + * @pdev: Platform device
> + * @id: HW XOR Device selector
> + * @dma_desc_pool: base of DMA descriptor region (DMA address)
> + * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
> + * @common: embedded struct dma_device
> + */
> +struct mv_xor_device {
> + struct platform_device *pdev;
> + int id;
> + dma_addr_t dma_desc_pool;
> + void *dma_desc_pool_virt;
> + struct dma_device common;
> + struct mv_xor_shared_private *shared;
> +};
> +
> +/**
> + * struct mv_xor_chan - internal representation of a XOR channel
> + * @pending: allows batching of hardware operations
> + * @completed_cookie: identifier for the most recently completed
operation
> + * @lock: serializes enqueue/dequeue operations to the descriptors
pool
> + * @mmr_base: memory mapped register base
> + * @idx: the index of the xor channel
> + * @chain: device chain view of the descriptors
> + * @completed_slots: slots completed by HW but still need to be acked
> + * @device: parent device
> + * @common: common dmaengine channel object members
> + * @last_used: place holder for allocation to continue from where it
left off
> + * @all_slots: complete domain of slots usable by the channel
> + * @slots_allocated: records the actual size of the descriptor slot
pool
> + * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
> + */
> +struct mv_xor_chan {
> + int pending;
> + dma_cookie_t completed_cookie;
> + spinlock_t lock; /* protects the descriptor slot pool */
> + void __iomem *mmr_base;
> + unsigned int idx;
> + enum dma_transaction_type current_type;
> + struct list_head chain;
> + struct list_head completed_slots;
> + struct mv_xor_device *device;
> + struct dma_chan common;
> + struct mv_xor_desc_slot *last_used;
> + struct list_head all_slots;
> + int slots_allocated;
> + struct tasklet_struct irq_tasklet;
> +#ifdef USE_TIMER
> + unsigned long cleanup_time;
> + u32 current_on_last_cleanup;
> + dma_cookie_t is_complete_cookie;
> +#endif
> +};
> +
> +/**
> + * struct mv_xor_desc_slot - software descriptor
> + * @slot_node: node on the mv_xor_chan.all_slots list
> + * @chain_node: node on the mv_xor_chan.chain list
> + * @completed_node: node on the mv_xor_chan.completed_slots list
> + * @hw_desc: virtual address of the hardware descriptor chain
> + * @phys: hardware address of the hardware descriptor chain
> + * @group_head: first operation in a transaction
> + * @slot_cnt: total slots used in an transaction (group of
operations)
> + * @slots_per_op: number of slots per operation
> + * @idx: pool index
> + * @unmap_src_cnt: number of xor sources
> + * @unmap_len: transaction bytecount
> + * @async_tx: support for the async_tx api
> + * @group_list: list of slots that make up a multi-descriptor
transaction
> + * for example transfer lengths larger than the supported hw max
> + * @xor_check_result: result of zero sum
> + * @crc32_result: result crc calculation
> + */
> +struct mv_xor_desc_slot {
> + struct list_head slot_node;
> + struct list_head chain_node;
> + struct list_head completed_node;
> + enum dma_transaction_type type;
> + void *hw_desc;
> + struct mv_xor_desc_slot *group_head;
> + u16 slot_cnt;
> + u16 slots_per_op;
> + u16 idx;
> + u16 unmap_src_cnt;
> + u32 value;
> + size_t unmap_len;
> + struct dma_async_tx_descriptor async_tx;
> + union {
> + u32 *xor_check_result;
> + u32 *crc32_result;
> + };
> +#ifdef USE_TIMER
> + unsigned long arrival_time;
> + struct timer_list timeout;
> +#endif
> +};
> +
> +
> +/* This structure describes XOR descriptor size 64bytes

> */ +struct mv_xor_desc {
> + u32 status; /* Successful descriptor execution indication
*/
> + u32 crc32_result; /* Result of CRC-32 calculation */
> + u32 desc_command; /* type of operation to be carried out on
the
> + data */
> + u32 phy_next_desc; /* Next descriptor address pointer */
> + u32 byte_count; /* Size of source and destination blocks
in
> + bytes */
> + u32 phy_dest_addr; /* Destination Block address pointer */
> + u32 phy_src_addr[8]; /* source block addresses */
> + u32 reserved0;
> + u32 reserved1;
> +};
> +
> +#define to_mv_sw_desc(addr_hw_desc) \
> + container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
> +#define mv_hw_desc_slot_idx(hw_desc, idx) \
> + ((void *) (((unsigned long)hw_desc) + ((idx) << 5)))
> +
> +
> +#define MV_XOR_MIN_BYTE_COUNT (128)
> +#define XOR_MAX_BYTE_COUNT ((16 * 1024 * 1024) - 1)
> +#define MV_XOR_MAX_BYTE_COUNT XOR_MAX_BYTE_COUNT
> +
> +#endif
> diff --git a/include/asm-arm/plat-orion/mv_xor.h
> b/include/asm-arm/plat-orion/mv_xor.h
> new file mode 100644
> index 0000000..c92bf08
> --- /dev/null
> +++ b/include/asm-arm/plat-orion/mv_xor.h
> @@ -0,0 +1,24 @@
> +/*
> + * Marvell XOR platform device data definition file.
> + */
> +#ifndef __LINUX_MV_XOR_H
> +#define __LINUX_MV_XOR_H
> +#include <linux/dmaengine.h>
> +#include <linux/mbus.h>
> +
> +#define MV_XOR_SHARED_NAME "mv_xor_shared"
> +#define MV_XOR_NAME "mv_xor"
> +
> +struct mbus_dram_target_info;
> +
> +struct mv_xor_platform_shared_data {
> + struct mbus_dram_target_info *dram;
> +};
> +
> +struct mv_xor_platform_data {
> + int hw_id;
> + dma_cap_mask_t cap_mask;
> + size_t pool_size;
> + struct platform_device *shared;
> +};
> +#endif

Regards,
Maciej

2008-07-01 16:16:46

by Saeed Bishara

[permalink] [raw]
Subject: RE: [PATCH/RFC] DMA engine driver for Marvell XOR engine


> > + if (flags & DMA_PREP_INTERRUPT)
> > + hw_desc->desc_command = (1 << 31);
> > + else
> > + hw_desc->desc_command = 0;
> > +
> > + hw_desc->desc_command = (1 << 31);
> > +}
>
> It looks like in mv_desc_init() either the last line or if-else section
> needs to be removed.
right, the if-else will be removed
>
> > +
> > +static inline u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot
> *desc,
> > + struct mv_xor_chan *chan)
> > +{
> > + struct mv_xor_desc *hw_desc = desc->hw_desc;
> > + return hw_desc->phy_dest_addr;
> > +}
> > +
> > +static inline u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
> > + struct mv_xor_chan *chan,
> > + int src_idx)
> > +{
> > + struct mv_xor_desc *hw_desc = desc->hw_desc;
> > + return hw_desc->phy_src_addr[src_idx];
> > +}
> > +
> > +
> > +static inline void mv_desc_set_byte_count(struct mv_xor_desc_slot
> *desc,
> > + struct mv_xor_chan *chan,
> > + u32 byte_count)
> > +{
> > + struct mv_xor_desc *hw_desc = desc->hw_desc;
> > + hw_desc->byte_count = byte_count;
> > +}
>
> Parameter 'chan' is not used in mv_desc_get_dest_addr(),
> mv_desc_get_src_addr()
> and mv_desc_set_byte_count()
ok, it will be removed
>
> > +static void mv_xor_check_threshold(struct mv_xor_chan *mv_chan)
> > +{
> > + if (mv_chan->pending >= MV_XOR_THRESHOLD) {
> > + mv_chan->pending = 0;
> > + mv_chan_activate(mv_chan);
> > + }
> > +}
>
> Is it needed to use both mv_xor_check_threshold() and
> mv_xor_issue_pending()
> in the driver?
> What about replacing mv_xor_check_threshold() with
> mv_xor_issue_pending()
> and using the threshold in mv_xor_issue_pending()
> (especially that MV_XOR_THRESHOLD is set to 1)?
you're right, as the threshold is 1, those functions doing exactly the
same job.
>
> > + int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
> > +
> > + /* Allocate descriptor slots */
> > + do {
> > + idx = mv_chan->slots_allocated;
> > + if (idx == num_descs_in_pool)
> > + break;
>
> This break condition is actually redundant to the do-while loop
> condition.
> What about replacing do-while with simpler while loop?
I did that, but know I found some problem with this code which was
copied from the iop-adma. what bothers me that if we exit the loop from
the break, then we end with idx=mv_chan->slots_allocated=num_descs_in_pool,
but, if we exit from the while condition, then we end with
idx=mv_chan->slots_allocated - 1 = num_descs_in_pool - 1
Dan, can you comment?
> MV_XOR_SLOT_SIZE];
> > +
> > + tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
> > + MV_XOR_TEST_SIZE, 0);
> > + cookie = mv_xor_tx_submit(tx);
>
> It would be more generic solution in both _self_test() functions
> to use dma_device API and async_tx API rather than
> direct calls like mv_xor_alloc_chan_resources(),
> mv_xor_prep_dma_memcpy(),
> mv_xor_tx_submit(), mv_xor_issue_pending()
> (i.e. replace mv_xor_alloc_chan_resources
> with device->common.device_alloc_chan_resources, etc.)
again, this is copy&paste from iop-adma, I suggest to keep it this way,
and to do what you suggest in seperate patch set. and I think that the
test code better be removed from the low level drivers to the DMA Engine
layer.
agree?

>
> > + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
> > + ret = mv_xor_memcpy_self_test(adev);
> > + dev_dbg(&pdev->dev, "memcpy self test returned %d\n",
> ret);
> > + if (ret)
> > + goto err_free_dma;
> > + }
> > +
> > + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
> > + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
> > + ret = mv_xor_xor_self_test(adev);
> > + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
> > + if (ret)
> > + goto err_free_dma;
> > + }
> > + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
> > + ret = mv_xor_memcpy_self_test(adev);
> > + dev_dbg(&pdev->dev, "memcpy self test returned %d\n",
> ret);
> > + if (ret)
> > + goto err_free_dma;
> > + }
> > +
> > + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
> > + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
> > + ret = mv_xor_xor_self_test(adev);
> > + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
> > + if (ret)
> > + goto err_free_dma;
> > + }
>
> What is the reason for running exact the same memcpy/xor self_test
> procedure two times?
> It would be helpful if there was a comment on that in this place.
no reason, I did that for debug and forgot to remove, I also removed the
|| dma_has_cap (MEMSET) from the xor self test; another stupid copy&paste
>
> > +
Here is the updated patch:
---------------- >8
>From 773fd2e0fa260b6a5041ced8c72487e594c011fa Mon Sep 17 00:00:00 2001
From: Saeed Bishara <[email protected]>
Date: Mon, 23 Jun 2008 04:26:05 -1100
Subject: [PATCH] DMA engine driver for Marvell XOR engine

The XOR engine found in Marvell's SoCs and system controllers
provides XOR and DMA operation, iSCSI CRC32C calculation, memory
initialization, and memory ECC error cleanup operation support.

This driver implements the DMA engine API and supports the following
capabilities:
- memcpy
- xor
- memset

The XOR engine can be used by DMA engine clients implemented in the
kernel, one of those clients is the RAID module. In that case, I
observed 20% improvement in the raid5 write throughput, and 40%
decrease in the CPU utilization when doing array construction, those
results obtained on an 5182 running at 500Mhz.

When enabling the NET DMA client, the performance decreased, so
meanwhile it is recommended to keep this client off.

Signed-off-by: Saeed Bishara <[email protected]>
Acked-by: Dan Williams <[email protected]>
Signed-off-by: Lennert Buytenhek <[email protected]>
Signed-off-by: Nicolas Pitre <[email protected]>
Acked-by: Maciej Sosnowski <[email protected]>
---
drivers/dma/Kconfig | 10 +-
drivers/dma/Makefile | 1 +
drivers/dma/mv_xor.c | 1364 +++++++++++++++++++++++++++++++++++
drivers/dma/mv_xor.h | 183 +++++
include/asm-arm/plat-orion/mv_xor.h | 28 +
5 files changed, 1585 insertions(+), 1 deletions(-)
create mode 100644 drivers/dma/mv_xor.c
create mode 100644 drivers/dma/mv_xor.h
create mode 100644 include/asm-arm/plat-orion/mv_xor.h

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 6239c3d..b834820 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -4,7 +4,7 @@

menuconfig DMADEVICES
bool "DMA Engine support"
- depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX || PPC
+ depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX || PPC || PLAT_ORION
depends on !HIGHMEM64G
help
DMA engines can do asynchronous data transfers without
@@ -46,6 +46,14 @@ config FSL_DMA
MPC8560/40, MPC8555, MPC8548 and MPC8641 processors.
The MPC8349, MPC8360 is also supported.

+config MV_XOR
+ bool "Marvell XOR engine support"
+ depends on PLAT_ORION
+ select ASYNC_CORE
+ select DMA_ENGINE
+ ---help---
+ Enable support for the Marvell XOR engine.
+
config DMA_ENGINE
bool

diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index c8036d9..ee272fd 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_MV_XOR) += mv_xor.o
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
new file mode 100644
index 0000000..1965300
--- /dev/null
+++ b/drivers/dma/mv_xor.c
@@ -0,0 +1,1364 @@
+/*
+ * offload engine driver for the Marvell XOR engine
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <asm/plat-orion/mv_xor.h>
+#include "mv_xor.h"
+
+static void mv_xor_issue_pending(struct dma_chan *chan);
+
+#define to_mv_xor_chan(chan) \
+ container_of(chan, struct mv_xor_chan, common)
+
+#define to_mv_xor_device(dev) \
+ container_of(dev, struct mv_xor_device, common)
+
+#define to_mv_xor_slot(tx) \
+ container_of(tx, struct mv_xor_desc_slot, async_tx)
+
+static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+ hw_desc->status = (1 << 31);
+ hw_desc->phy_next_desc = 0;
+ hw_desc->desc_command = (1 << 31);
+}
+
+static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_dest_addr;
+}
+
+static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
+ int src_idx)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_src_addr[src_idx];
+}
+
+
+static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
+ u32 byte_count)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->byte_count = byte_count;
+}
+
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+ u32 next_desc_addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ BUG_ON(hw_desc->phy_next_desc);
+ hw_desc->phy_next_desc = next_desc_addr;
+}
+
+static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_next_desc = 0;
+}
+
+static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val)
+{
+ desc->value = val;
+}
+
+static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc,
+ dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_dest_addr = addr;
+}
+
+static int mv_chan_memset_slot_count(size_t len)
+{
+ return 1;
+}
+
+#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
+
+static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
+ int index, dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_src_addr[index] = addr;
+ if (desc->type == DMA_XOR)
+ hw_desc->desc_command |= (1 << index);
+}
+
+static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
+{
+ return __raw_readl(XOR_CURR_DESC(chan));
+}
+
+static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
+ u32 next_desc_addr)
+{
+ __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
+}
+
+static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr)
+{
+ __raw_writel(desc_addr, XOR_DEST_POINTER(chan));
+}
+
+static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size)
+{
+ __raw_writel(block_size, XOR_BLOCK_SIZE(chan));
+}
+
+static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value)
+{
+ __raw_writel(value, XOR_INIT_VALUE_LOW(chan));
+ __raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
+}
+
+static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
+{
+ u32 val = __raw_readl(XOR_INTR_MASK(chan));
+ val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_MASK(chan));
+}
+
+static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
+{
+ u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
+ intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
+ return intr_cause;
+}
+
+static int mv_is_err_intr(u32 intr_cause)
+{
+ if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
+ return 1;
+
+ return 0;
+}
+
+static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+{
+ u32 val = (1 << (1 + (chan->idx * 16)));
+ dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+{
+ u32 val = 0xFFFF0000 >> (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static int mv_can_chain(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc_slot *chain_old_tail = list_entry(
+ desc->chain_node.prev, struct mv_xor_desc_slot, chain_node);
+
+ if (chain_old_tail->type != desc->type)
+ return 0;
+ if (desc->type == DMA_MEMSET)
+ return 0;
+
+ return 1;
+}
+
+static void mv_set_mode(struct mv_xor_chan *chan,
+ enum dma_transaction_type type)
+{
+ u32 op_mode;
+ u32 config = __raw_readl(XOR_CONFIG(chan));
+
+ switch (type) {
+ case DMA_XOR:
+ op_mode = XOR_OPERATION_MODE_XOR;
+ break;
+ case DMA_MEMCPY:
+ op_mode = XOR_OPERATION_MODE_MEMCPY;
+ break;
+ case DMA_MEMSET:
+ op_mode = XOR_OPERATION_MODE_MEMSET;
+ break;
+ default:
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error: unsupported operation %d.\n",
+ type);
+ BUG();
+ return;
+ }
+
+ config &= ~0x7;
+ config |= op_mode;
+ __raw_writel(config, XOR_CONFIG(chan));
+ chan->current_type = type;
+}
+
+static void mv_chan_activate(struct mv_xor_chan *chan)
+{
+ u32 activation;
+
+ dev_dbg(chan->device->common.dev, " activate chan.\n");
+ activation = __raw_readl(XOR_ACTIVATION(chan));
+ activation |= 0x1;
+ __raw_writel(activation, XOR_ACTIVATION(chan));
+}
+
+static char mv_chan_is_busy(struct mv_xor_chan *chan)
+{
+ u32 state = __raw_readl(XOR_ACTIVATION(chan));
+
+ state = (state >> 4) & 0x3;
+
+ return (state == 1) ? 1 : 0;
+}
+
+static int mv_chan_xor_slot_count(size_t len, int src_cnt)
+{
+ return 1;
+}
+
+/**
+ * mv_xor_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *slot)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
+ __func__, __LINE__, slot);
+
+ slot->slots_per_op = 0;
+
+}
+
+/*
+ * mv_xor_start_new_chain - program the engine to operate on new chain headed by
+ * sw_desc
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *sw_desc)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
+ __func__, __LINE__, sw_desc);
+ if (sw_desc->type != mv_chan->current_type)
+ mv_set_mode(mv_chan, sw_desc->type);
+
+ if (sw_desc->type == DMA_MEMSET) {
+ /* for memset requests we need to program the engine, no
+ * descriptors used.
+ */
+ struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
+ mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr);
+ mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
+ mv_chan_set_value(mv_chan, sw_desc->value);
+ } else {
+ /* set the hardware chain */
+ mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
+ }
+ mv_chan->pending += sw_desc->slot_cnt;
+ mv_xor_issue_pending(&mv_chan->common);
+}
+
+static dma_cookie_t
+mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+{
+ BUG_ON(desc->async_tx.cookie < 0);
+
+ if (desc->async_tx.cookie > 0) {
+ cookie = desc->async_tx.cookie;
+
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ if (desc->async_tx.callback)
+ desc->async_tx.callback(
+ desc->async_tx.callback_param);
+
+ /* unmap dma addresses
+ * (unmap_single vs unmap_page?)
+ */
+ if (desc->group_head && desc->unmap_len) {
+ struct mv_xor_desc_slot *unmap = desc->group_head;
+ struct device *dev =
+ &mv_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ u32 src_cnt = unmap->unmap_src_cnt;
+ dma_addr_t addr = mv_desc_get_dest_addr(unmap);
+
+ dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
+ while (src_cnt--) {
+ addr = mv_desc_get_src_addr(unmap, src_cnt);
+ dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ }
+ desc->group_head = NULL;
+ }
+ }
+
+ /* run dependent operations */
+ async_tx_run_dependencies(&desc->async_tx);
+
+ return cookie;
+}
+
+static int
+mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+
+ if (async_tx_test_ack(&iter->async_tx)) {
+ list_del(&iter->completed_node);
+ mv_xor_free_slots(mv_chan, iter);
+ }
+ }
+ return 0;
+}
+
+static int
+mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n",
+ __func__, __LINE__, desc, desc->async_tx.flags);
+ list_del(&desc->chain_node);
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx)) {
+ /* move this slot to the completed_slots */
+ list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
+ return 0;
+ }
+
+ mv_xor_free_slots(mv_chan, desc);
+ return 0;
+}
+
+static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+ dma_cookie_t cookie = 0;
+ int busy = mv_chan_is_busy(mv_chan);
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ int seen_current = 0;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc);
+ mv_xor_clean_completed_slots(mv_chan);
+
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel, subsequent descriptors are either in
+ * process or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy
+ */
+ if (iter->async_tx.phys == current_desc) {
+ seen_current = 1;
+ if (busy)
+ break;
+ }
+
+ cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
+
+ if (mv_xor_clean_slot(iter, mv_chan))
+ break;
+ }
+
+ if ((busy == 0) && !list_empty(&mv_chan->chain)) {
+ struct mv_xor_desc_slot *chain_head;
+ chain_head = list_entry(mv_chan->chain.next,
+ struct mv_xor_desc_slot,
+ chain_node);
+
+ mv_xor_start_new_chain(mv_chan, chain_head);
+ }
+
+ if (cookie > 0)
+ mv_chan->completed_cookie = cookie;
+}
+
+static void
+mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ spin_lock_bh(&mv_chan->lock);
+ __mv_xor_slot_cleanup(mv_chan);
+ spin_unlock_bh(&mv_chan->lock);
+}
+
+static void mv_xor_tasklet(unsigned long data)
+{
+ struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
+ __mv_xor_slot_cleanup(chan);
+}
+
+static struct mv_xor_desc_slot *
+mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
+ int slots_per_op)
+{
+ struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
+ LIST_HEAD(chain);
+ int slots_found, retry = 0;
+
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+retry:
+ slots_found = 0;
+ if (retry == 0)
+ iter = mv_chan->last_used;
+ else
+ iter = list_entry(&mv_chan->all_slots,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ list_for_each_entry_safe_continue(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+ if (iter->slots_per_op) {
+ /* give up after finding the first busy slot
+ * on the second pass through the list
+ */
+ if (retry)
+ break;
+
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++)
+ alloc_start = iter;
+
+ if (slots_found == num_slots) {
+ struct mv_xor_desc_slot *alloc_tail = NULL;
+ struct mv_xor_desc_slot *last_used = NULL;
+ iter = alloc_start;
+ while (num_slots) {
+ int i;
+
+ /* pre-ack all but the last descriptor */
+ async_tx_ack(&iter->async_tx);
+
+ list_add_tail(&iter->chain_node, &chain);
+ alloc_tail = iter;
+ iter->async_tx.cookie = 0;
+ iter->slot_cnt = num_slots;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->slots_per_op = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ alloc_tail->group_head = alloc_start;
+ alloc_tail->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &alloc_tail->async_tx.tx_list);
+ mv_chan->last_used = last_used;
+ mv_desc_clear_next_desc(alloc_start);
+ mv_desc_clear_next_desc(alloc_tail);
+ return alloc_tail;
+ }
+ }
+ if (!retry++)
+ goto retry;
+
+ /* try to free some slots if the allocation fails */
+ tasklet_schedule(&mv_chan->irq_tasklet);
+
+ return NULL;
+}
+
+static dma_cookie_t
+mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *desc)
+{
+ dma_cookie_t cookie = mv_chan->common.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+ mv_chan->common.cookie = desc->async_tx.cookie = cookie;
+ return cookie;
+}
+
+/************************ DMA engine API functions ****************************/
+static dma_cookie_t
+mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
+ struct mv_xor_desc_slot *grp_start, *old_chain_tail;
+ dma_cookie_t cookie;
+ int new_hw_chain = 1;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p: async_tx %p\n",
+ __func__, sw_desc, &sw_desc->async_tx);
+
+ grp_start = sw_desc->group_head;
+
+ spin_lock_bh(&mv_chan->lock);
+ cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
+
+ if (list_empty(&mv_chan->chain))
+ list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
+ else {
+ new_hw_chain = 0;
+
+ old_chain_tail = list_entry(mv_chan->chain.prev,
+ struct mv_xor_desc_slot,
+ chain_node);
+ list_splice_init(&grp_start->async_tx.tx_list,
+ &old_chain_tail->chain_node);
+
+ if (!mv_can_chain(grp_start))
+ goto submit_done;
+
+ dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n",
+ old_chain_tail->async_tx.phys);
+
+ /* fix up the hardware chain */
+ mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
+
+ /* if the channel is not busy */
+ if (!mv_chan_is_busy(mv_chan)) {
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ /*
+ * and the curren desc is the end of the chain before
+ * the append, then we need to start the channel
+ */
+ if (current_desc == old_chain_tail->async_tx.phys)
+ new_hw_chain = 1;
+ }
+ }
+
+ if (new_hw_chain)
+ mv_xor_start_new_chain(mv_chan, grp_start);
+
+submit_done:
+ spin_unlock_bh(&mv_chan->lock);
+
+ return cookie;
+}
+
+/* returns the number of allocated descriptors */
+static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
+{
+ char *hw_desc;
+ int idx;
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *slot = NULL;
+ struct mv_xor_platform_data *plat_data =
+ mv_chan->device->pdev->dev.platform_data;
+ int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
+
+ /* Allocate descriptor slots */
+ idx = mv_chan->slots_allocated;
+ while (idx < num_descs_in_pool) {
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "MV XOR Channel only initialized"
+ " %d descriptor slots", idx);
+ break;
+ }
+ hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = mv_xor_tx_submit;
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->async_tx.tx_list);
+ hw_desc = (char *) mv_chan->device->dma_desc_pool;
+ slot->async_tx.phys =
+ (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+ slot->idx = idx++;
+
+ spin_lock_bh(&mv_chan->lock);
+ mv_chan->slots_allocated = idx;
+ list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+ spin_unlock_bh(&mv_chan->lock);
+ }
+
+ if (idx && !mv_chan->last_used)
+ mv_chan->last_used = list_entry(mv_chan->all_slots.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "allocated %d descriptor slots last_used: %p\n",
+ mv_chan->slots_allocated, mv_chan->last_used);
+
+ return (idx > 0) ? idx : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x src %x len: %u flags: %ld\n",
+ __func__, dest, src, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memcpy_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMCPY;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ mv_desc_set_src_addr(grp_start, 0, src);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p\n",
+ __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x len: %u flags: %ld\n",
+ __func__, dest, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memset_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMSET;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ mv_desc_set_block_fill_val(grp_start, value);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s src_cnt: %d len: dest %x %u flags: %ld\n",
+ __func__, src_cnt, len, dest, flags);
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_XOR;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ /* the byte count field is the same as in memcpy desc*/
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ while (src_cnt--)
+ mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]);
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void mv_xor_free_chan_resources(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ mv_xor_slot_cleanup(mv_chan);
+
+ spin_lock_bh(&mv_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+ in_use_descs++;
+ list_del(&iter->completed_node);
+ }
+ list_for_each_entry_safe_reverse(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ mv_chan->slots_allocated--;
+ }
+ mv_chan->last_used = NULL;
+
+ dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
+ __func__, mv_chan->slots_allocated);
+ spin_unlock_bh(&mv_chan->lock);
+
+ if (in_use_descs)
+ dev_err(mv_chan->device->common.dev,
+ "freeing %d in use descriptors!\n", in_use_descs);
+}
+
+/**
+ * mv_xor_is_complete - poll the status of an XOR transaction
+ * @chan: XOR channel handle
+ * @cookie: XOR transaction identifier
+ */
+static enum dma_status mv_xor_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+ mv_chan->is_complete_cookie = cookie;
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret == DMA_SUCCESS) {
+ mv_xor_clean_completed_slots(mv_chan);
+ return ret;
+ }
+ mv_xor_slot_cleanup(mv_chan);
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+{
+ u32 val;
+
+ val = __raw_readl(XOR_CONFIG(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "config 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ACTIVATION(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "activation 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_MASK(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr mask 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_ADDR(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error addr 0x%08x.\n", val);
+}
+
+static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
+ u32 intr_cause)
+{
+ if (intr_cause & (1 << 4)) {
+ dev_dbg(chan->device->common.dev,
+ "ignore this error\n");
+ return;
+ }
+
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error on chan %d. intr cause 0x%08x.\n",
+ chan->idx, intr_cause);
+
+ mv_dump_xor_regs(chan);
+ BUG();
+}
+
+static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
+{
+ struct mv_xor_chan *chan = data;
+ u32 intr_cause = mv_chan_get_intr_cause(chan);
+
+ dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause);
+
+ if (mv_is_err_intr(intr_cause))
+ mv_xor_err_interrupt_handler(chan, intr_cause);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ mv_xor_device_clear_eoc_cause(chan);
+
+ return IRQ_HANDLED;
+}
+
+static void mv_xor_issue_pending(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+
+ if (mv_chan->pending >= MV_XOR_THRESHOLD) {
+ mv_chan->pending = 0;
+ mv_chan_activate(mv_chan);
+ }
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define MV_XOR_TEST_SIZE 2000
+
+static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
+{
+ int i;
+ void *src, *dest;
+ dma_addr_t src_dma, dest_dma;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ struct dma_async_tx_descriptor *tx;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+
+ dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < MV_XOR_TEST_SIZE; i++)
+ ((u8 *) src)[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dest_dma = dma_map_single(dma_chan->device->dev, dest,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+
+ src_dma = dma_map_single(dma_chan->device->dev, src,
+ MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+ MV_XOR_TEST_SIZE, 0);
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(1);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+ if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+mv_xor_xor_self_test(struct mv_xor_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx])
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest)
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+
+ for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+ 0, PAGE_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
+
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(8);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor failed compare, disabling."
+ " index %d, data %x, expected %x\n", i,
+ ptr[i], cmp_word);
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ src_idx = MV_XOR_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int __devexit mv_xor_remove(struct platform_device *dev)
+{
+ struct mv_xor_device *device = platform_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+ struct mv_xor_chan *mv_chan;
+ struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
+
+ dma_async_device_unregister(&device->common);
+
+ dma_free_coherent(&dev->dev, plat_data->pool_size,
+ device->dma_desc_pool_virt, device->dma_desc_pool);
+
+ list_for_each_entry_safe(chan, _chan, &device->common.channels,
+ device_node) {
+ mv_chan = to_mv_xor_chan(chan);
+ list_del(&chan->device_node);
+ }
+
+ return 0;
+}
+
+static int __devinit mv_xor_probe(struct platform_device *pdev)
+{
+ int ret = 0;
+ int irq;
+ struct mv_xor_device *adev;
+ struct mv_xor_chan *mv_chan;
+ struct dma_device *dma_dev;
+ struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
+
+
+ adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+
+ dma_dev = &adev->common;
+
+ /* allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly flush the writes
+ */
+ adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+ plat_data->pool_size,
+ &adev->dma_desc_pool,
+ GFP_KERNEL);
+ if (!adev->dma_desc_pool_virt)
+ return -ENOMEM;
+
+ adev->id = plat_data->hw_id;
+
+ /* discover transaction capabilites from the platform data */
+ dma_dev->cap_mask = plat_data->cap_mask;
+ adev->pdev = pdev;
+ platform_set_drvdata(pdev, adev);
+
+ adev->shared = platform_get_drvdata(plat_data->shared);
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* set base routines */
+ dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
+ dma_dev->device_is_tx_complete = mv_xor_is_complete;
+ dma_dev->device_issue_pending = mv_xor_issue_pending;
+ dma_dev->dev = &pdev->dev;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
+ if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset;
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->max_xor = 8; ;
+ dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
+ }
+
+ mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
+ if (!mv_chan) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ mv_chan->device = adev;
+ mv_chan->idx = plat_data->hw_id;
+ mv_chan->mmr_base = adev->shared->xor_base;
+
+ if (!mv_chan->mmr_base) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
+ mv_chan);
+
+ /* clear errors before enabling interrupts */
+ mv_xor_device_clear_err_status(mv_chan);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto err_free_dma;
+ }
+ ret = devm_request_irq(&pdev->dev, irq,
+ mv_xor_interrupt_handler,
+ 0, dev_name(&pdev->dev), mv_chan);
+ if (ret)
+ goto err_free_dma;
+
+ mv_chan_unmask_interrupts(mv_chan);
+
+ mv_set_mode(mv_chan, DMA_MEMCPY);
+
+ spin_lock_init(&mv_chan->lock);
+ INIT_LIST_HEAD(&mv_chan->chain);
+ INIT_LIST_HEAD(&mv_chan->completed_slots);
+ INIT_LIST_HEAD(&mv_chan->all_slots);
+ INIT_RCU_HEAD(&mv_chan->common.rcu);
+ mv_chan->common.device = dma_dev;
+
+ list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = mv_xor_memcpy_self_test(adev);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ ret = mv_xor_xor_self_test(adev);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
+ "( %s%s%s%s)\n",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+ dma_async_device_register(dma_dev);
+ goto out;
+
+ err_free_dma:
+ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ out:
+ return ret;
+}
+
+static void
+mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
+ struct mbus_dram_target_info *dram)
+{
+ void __iomem *base = msp->xor_base;
+ u32 win_enable = 0;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ writel(0, base + WINDOW_BASE(i));
+ writel(0, base + WINDOW_SIZE(i));
+ if (i < 4)
+ writel(0, base + WINDOW_REMAP_HIGH(i));
+ }
+
+ for (i = 0; i < dram->num_cs; i++) {
+ struct mbus_dram_window *cs = dram->cs + i;
+
+ writel((cs->base & 0xffff0000) |
+ (cs->mbus_attr << 8) |
+ dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+ writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+ win_enable |= (1 << i);
+ win_enable |= 3 << (16 + (2 * i));
+ }
+
+ writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+ writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+}
+
+static struct platform_driver mv_xor_driver = {
+ .probe = mv_xor_probe,
+ .remove = mv_xor_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_NAME,
+ },
+};
+
+static int mv_xor_shared_probe(struct platform_device *pdev)
+{
+ struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data;
+ struct mv_xor_shared_private *msp;
+ struct resource *res;
+
+ dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n");
+
+ msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
+ if (!msp)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_base)
+ return -EBUSY;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_high_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_high_base)
+ return -EBUSY;
+
+ platform_set_drvdata(pdev, msp);
+
+ /*
+ * (Re-)program MBUS remapping windows if we are asked to.
+ */
+ if (msd != NULL && msd->dram != NULL)
+ mv_xor_conf_mbus_windows(msp, msd->dram);
+
+ return 0;
+}
+
+static int mv_xor_shared_remove(struct platform_device *pdev)
+{
+ return 0;
+}
+
+static struct platform_driver mv_xor_shared_driver = {
+ .probe = mv_xor_shared_probe,
+ .remove = mv_xor_shared_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_SHARED_NAME,
+ },
+};
+
+
+static int __init mv_xor_init(void)
+{
+ int rc;
+
+ rc = platform_driver_register(&mv_xor_shared_driver);
+ if (!rc) {
+ rc = platform_driver_register(&mv_xor_driver);
+ if (rc)
+ platform_driver_unregister(&mv_xor_shared_driver);
+ }
+ return rc;
+}
+module_init(mv_xor_init);
+
+/* it's currently unsafe to unload this module */
+#if 0
+static void __exit mv_xor_exit(void)
+{
+ platform_driver_unregister(&mv_xor_driver);
+ platform_driver_unregister(&mv_xor_shared_driver);
+ return;
+}
+
+module_exit(mv_xor_exit);
+#endif
+
+MODULE_AUTHOR("Saeed Bishara <[email protected]>");
+MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
new file mode 100644
index 0000000..06cafe1
--- /dev/null
+++ b/drivers/dma/mv_xor.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MV_XOR_H
+#define MV_XOR_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#define USE_TIMER
+#define MV_XOR_SLOT_SIZE 64
+#define MV_XOR_THRESHOLD 1
+
+#define XOR_OPERATION_MODE_XOR 0
+#define XOR_OPERATION_MODE_MEMCPY 2
+#define XOR_OPERATION_MODE_MEMSET 4
+
+#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0)
+#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4)
+
+#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4))
+#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4))
+#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30)
+#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40)
+#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50)
+#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60)
+#define XOR_INTR_MASK_VALUE 0x3F5
+
+#define WINDOW_BASE(w) (0x250 + ((w) << 2))
+#define WINDOW_SIZE(w) (0x270 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2))
+
+struct mv_xor_shared_private {
+ void __iomem *xor_base;
+ void __iomem *xor_high_base;
+};
+
+
+/**
+ * struct mv_xor_device - internal representation of a XOR device
+ * @pdev: Platform device
+ * @id: HW XOR Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct mv_xor_device {
+ struct platform_device *pdev;
+ int id;
+ dma_addr_t dma_desc_pool;
+ void *dma_desc_pool_virt;
+ struct dma_device common;
+ struct mv_xor_shared_private *shared;
+};
+
+/**
+ * struct mv_xor_chan - internal representation of a XOR channel
+ * @pending: allows batching of hardware operations
+ * @completed_cookie: identifier for the most recently completed operation
+ * @lock: serializes enqueue/dequeue operations to the descriptors pool
+ * @mmr_base: memory mapped register base
+ * @idx: the index of the xor channel
+ * @chain: device chain view of the descriptors
+ * @completed_slots: slots completed by HW but still need to be acked
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @last_used: place holder for allocation to continue from where it left off
+ * @all_slots: complete domain of slots usable by the channel
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ */
+struct mv_xor_chan {
+ int pending;
+ dma_cookie_t completed_cookie;
+ spinlock_t lock; /* protects the descriptor slot pool */
+ void __iomem *mmr_base;
+ unsigned int idx;
+ enum dma_transaction_type current_type;
+ struct list_head chain;
+ struct list_head completed_slots;
+ struct mv_xor_device *device;
+ struct dma_chan common;
+ struct mv_xor_desc_slot *last_used;
+ struct list_head all_slots;
+ int slots_allocated;
+ struct tasklet_struct irq_tasklet;
+#ifdef USE_TIMER
+ unsigned long cleanup_time;
+ u32 current_on_last_cleanup;
+ dma_cookie_t is_complete_cookie;
+#endif
+};
+
+/**
+ * struct mv_xor_desc_slot - software descriptor
+ * @slot_node: node on the mv_xor_chan.all_slots list
+ * @chain_node: node on the mv_xor_chan.chain list
+ * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @slots_per_op: number of slots per operation
+ * @idx: pool index
+ * @unmap_src_cnt: number of xor sources
+ * @unmap_len: transaction bytecount
+ * @async_tx: support for the async_tx api
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ * for example transfer lengths larger than the supported hw max
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct mv_xor_desc_slot {
+ struct list_head slot_node;
+ struct list_head chain_node;
+ struct list_head completed_node;
+ enum dma_transaction_type type;
+ void *hw_desc;
+ struct mv_xor_desc_slot *group_head;
+ u16 slot_cnt;
+ u16 slots_per_op;
+ u16 idx;
+ u16 unmap_src_cnt;
+ u32 value;
+ size_t unmap_len;
+ struct dma_async_tx_descriptor async_tx;
+ union {
+ u32 *xor_check_result;
+ u32 *crc32_result;
+ };
+#ifdef USE_TIMER
+ unsigned long arrival_time;
+ struct timer_list timeout;
+#endif
+};
+
+/* This structure describes XOR descriptor size 64bytes */
+struct mv_xor_desc {
+ u32 status; /* descriptor execution status */
+ u32 crc32_result; /* result of CRC-32 calculation */
+ u32 desc_command; /* type of operation to be carried out */
+ u32 phy_next_desc; /* next descriptor address pointer */
+ u32 byte_count; /* size of src/dst blocks in bytes */
+ u32 phy_dest_addr; /* destination block address */
+ u32 phy_src_addr[8]; /* source block addresses */
+ u32 reserved0;
+ u32 reserved1;
+};
+
+#define to_mv_sw_desc(addr_hw_desc) \
+ container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
+
+#define mv_hw_desc_slot_idx(hw_desc, idx) \
+ ((void *)(((unsigned long)hw_desc) + ((idx) << 5)))
+
+#define MV_XOR_MIN_BYTE_COUNT (128)
+#define XOR_MAX_BYTE_COUNT ((16 * 1024 * 1024) - 1)
+#define MV_XOR_MAX_BYTE_COUNT XOR_MAX_BYTE_COUNT
+
+
+#endif
diff --git a/include/asm-arm/plat-orion/mv_xor.h b/include/asm-arm/plat-orion/mv_xor.h
new file mode 100644
index 0000000..c349e8f
--- /dev/null
+++ b/include/asm-arm/plat-orion/mv_xor.h
@@ -0,0 +1,28 @@
+/*
+ * Marvell XOR platform device data definition file.
+ */
+
+#ifndef __ASM_PLAT_ORION_MV_XOR_H
+#define __ASM_PLAT_ORION_MV_XOR_H
+
+#include <linux/dmaengine.h>
+#include <linux/mbus.h>
+
+#define MV_XOR_SHARED_NAME "mv_xor_shared"
+#define MV_XOR_NAME "mv_xor"
+
+struct mbus_dram_target_info;
+
+struct mv_xor_platform_shared_data {
+ struct mbus_dram_target_info *dram;
+};
+
+struct mv_xor_platform_data {
+ struct platform_device *shared;
+ int hw_id;
+ dma_cap_mask_t cap_mask;
+ size_t pool_size;
+};
+
+
+#endif
--
1.5.2.5

2008-07-01 23:41:54

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCH/RFC] DMA engine driver for Marvell XOR engine

On Tue, Jul 1, 2008 at 9:07 AM, saeed <[email protected]> wrote:
>> > + int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
>> > +
>> > + /* Allocate descriptor slots */
>> > + do {
>> > + idx = mv_chan->slots_allocated;
>> > + if (idx == num_descs_in_pool)
>> > + break;
>>
>> This break condition is actually redundant to the do-while loop
>> condition.
>> What about replacing do-while with simpler while loop?
> I did that, but know I found some problem with this code which was
> copied from the iop-adma. what bothers me that if we exit the loop from
> the break, then we end with idx=mv_chan->slots_allocated=num_descs_in_pool,
> but, if we exit from the while condition, then we end with
> idx=mv_chan->slots_allocated - 1 = num_descs_in_pool - 1
> Dan, can you comment?

The admittedly ugly do { } while () loop in iop-adma.c assumed that
num_descs_in_pool is always > 1, and guarantees that idx is equal to
the count of allocated descriptors. Since you changed it to a simple
while() loop then you should also replace idx with ->slots_allocated
in the rest of the routine i.e.:

return mv_chan->slots_allocated ? : -ENOMEM;

>> MV_XOR_SLOT_SIZE];
>> > +
>> > + tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
>> > + MV_XOR_TEST_SIZE, 0);
>> > + cookie = mv_xor_tx_submit(tx);
>>
>> It would be more generic solution in both _self_test() functions
>> to use dma_device API and async_tx API rather than
>> direct calls like mv_xor_alloc_chan_resources(),
>> mv_xor_prep_dma_memcpy(),
>> mv_xor_tx_submit(), mv_xor_issue_pending()
>> (i.e. replace mv_xor_alloc_chan_resources
>> with device->common.device_alloc_chan_resources, etc.)
> again, this is copy&paste from iop-adma, I suggest to keep it this way,
> and to do what you suggest in seperate patch set. and I think that the
> test code better be removed from the low level drivers to the DMA Engine
> layer.
> agree?
>

I agree, keep it this way for now and then we can look to unify all
the drivers' self test routines into a common dmaengine routine for
2.6.28.

>>
>> > + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
>> > + ret = mv_xor_memcpy_self_test(adev);
>> > + dev_dbg(&pdev->dev, "memcpy self test returned %d\n",
>> ret);
>> > + if (ret)
>> > + goto err_free_dma;
>> > + }
>> > +
>> > + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
>> > + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
>> > + ret = mv_xor_xor_self_test(adev);
>> > + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
>> > + if (ret)
>> > + goto err_free_dma;
>> > + }
>> > + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
>> > + ret = mv_xor_memcpy_self_test(adev);
>> > + dev_dbg(&pdev->dev, "memcpy self test returned %d\n",
>> ret);
>> > + if (ret)
>> > + goto err_free_dma;
>> > + }
>> > +
>> > + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
>> > + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
>> > + ret = mv_xor_xor_self_test(adev);
>> > + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
>> > + if (ret)
>> > + goto err_free_dma;
>> > + }
>>
>> What is the reason for running exact the same memcpy/xor self_test
>> procedure two times?
>> It would be helpful if there was a comment on that in this place.
> no reason, I did that for debug and forgot to remove, I also removed the
> || dma_has_cap (MEMSET) from the xor self test; another stupid copy&paste

Yes, this is an architecture specific aspect of the iop-adma driver.
On an iop the channel may be a memcpy only channel, or an xor / memset
channel. Hence the need for two separate tests.

--
Dan

2008-07-06 11:16:56

by Saeed Bishara

[permalink] [raw]
Subject: Re: [PATCH/RFC] DMA engine driver for Marvell XOR engine


> The admittedly ugly do { } while () loop in iop-adma.c assumed that
> num_descs_in_pool is always > 1, and guarantees that idx is equal to
> the count of allocated descriptors. Since you changed it to a simple
> while() loop then you should also replace idx with ->slots_allocated
> in the rest of the routine i.e.:
>
> return mv_chan->slots_allocated ? : -ENOMEM;
done.
Here is the updated patch:
--------------------->8
>From df28148f5e02f2e744082271911faab04334731c Mon Sep 17 00:00:00 2001
From: Saeed Bishara <[email protected]>
Date: Mon, 23 Jun 2008 04:26:05 -1100
Subject: [PATCH] DMA engine driver for Marvell XOR engine

The XOR engine found in Marvell's SoCs and system controllers
provides XOR and DMA operation, iSCSI CRC32C calculation, memory
initialization, and memory ECC error cleanup operation support.

This driver implements the DMA engine API and supports the following
capabilities:
- memcpy
- xor
- memset

The XOR engine can be used by DMA engine clients implemented in the
kernel, one of those clients is the RAID module. In that case, I
observed 20% improvement in the raid5 write throughput, and 40%
decrease in the CPU utilization when doing array construction, those
results obtained on an 5182 running at 500Mhz.

When enabling the NET DMA client, the performance decreased, so
meanwhile it is recommended to keep this client off.

Signed-off-by: Saeed Bishara <[email protected]>
Acked-by: Dan Williams <[email protected]>
Signed-off-by: Lennert Buytenhek <[email protected]>
Signed-off-by: Nicolas Pitre <[email protected]>
Acked-by: Maciej Sosnowski <[email protected]>
---
drivers/dma/Kconfig | 10 +-
drivers/dma/Makefile | 1 +
drivers/dma/mv_xor.c | 1364 +++++++++++++++++++++++++++++++++++
drivers/dma/mv_xor.h | 183 +++++
include/asm-arm/plat-orion/mv_xor.h | 28 +
5 files changed, 1585 insertions(+), 1 deletions(-)
create mode 100644 drivers/dma/mv_xor.c
create mode 100644 drivers/dma/mv_xor.h
create mode 100644 include/asm-arm/plat-orion/mv_xor.h

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 6239c3d..b834820 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -4,7 +4,7 @@

menuconfig DMADEVICES
bool "DMA Engine support"
- depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX || PPC
+ depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX || PPC || PLAT_ORION
depends on !HIGHMEM64G
help
DMA engines can do asynchronous data transfers without
@@ -46,6 +46,14 @@ config FSL_DMA
MPC8560/40, MPC8555, MPC8548 and MPC8641 processors.
The MPC8349, MPC8360 is also supported.

+config MV_XOR
+ bool "Marvell XOR engine support"
+ depends on PLAT_ORION
+ select ASYNC_CORE
+ select DMA_ENGINE
+ ---help---
+ Enable support for the Marvell XOR engine.
+
config DMA_ENGINE
bool

diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index c8036d9..ee272fd 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_MV_XOR) += mv_xor.o
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
new file mode 100644
index 0000000..82f5631
--- /dev/null
+++ b/drivers/dma/mv_xor.c
@@ -0,0 +1,1364 @@
+/*
+ * offload engine driver for the Marvell XOR engine
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <asm/plat-orion/mv_xor.h>
+#include "mv_xor.h"
+
+static void mv_xor_issue_pending(struct dma_chan *chan);
+
+#define to_mv_xor_chan(chan) \
+ container_of(chan, struct mv_xor_chan, common)
+
+#define to_mv_xor_device(dev) \
+ container_of(dev, struct mv_xor_device, common)
+
+#define to_mv_xor_slot(tx) \
+ container_of(tx, struct mv_xor_desc_slot, async_tx)
+
+static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+ hw_desc->status = (1 << 31);
+ hw_desc->phy_next_desc = 0;
+ hw_desc->desc_command = (1 << 31);
+}
+
+static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_dest_addr;
+}
+
+static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
+ int src_idx)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_src_addr[src_idx];
+}
+
+
+static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
+ u32 byte_count)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->byte_count = byte_count;
+}
+
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+ u32 next_desc_addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ BUG_ON(hw_desc->phy_next_desc);
+ hw_desc->phy_next_desc = next_desc_addr;
+}
+
+static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_next_desc = 0;
+}
+
+static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val)
+{
+ desc->value = val;
+}
+
+static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc,
+ dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_dest_addr = addr;
+}
+
+static int mv_chan_memset_slot_count(size_t len)
+{
+ return 1;
+}
+
+#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
+
+static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
+ int index, dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_src_addr[index] = addr;
+ if (desc->type == DMA_XOR)
+ hw_desc->desc_command |= (1 << index);
+}
+
+static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
+{
+ return __raw_readl(XOR_CURR_DESC(chan));
+}
+
+static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
+ u32 next_desc_addr)
+{
+ __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
+}
+
+static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr)
+{
+ __raw_writel(desc_addr, XOR_DEST_POINTER(chan));
+}
+
+static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size)
+{
+ __raw_writel(block_size, XOR_BLOCK_SIZE(chan));
+}
+
+static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value)
+{
+ __raw_writel(value, XOR_INIT_VALUE_LOW(chan));
+ __raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
+}
+
+static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
+{
+ u32 val = __raw_readl(XOR_INTR_MASK(chan));
+ val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_MASK(chan));
+}
+
+static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
+{
+ u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
+ intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
+ return intr_cause;
+}
+
+static int mv_is_err_intr(u32 intr_cause)
+{
+ if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
+ return 1;
+
+ return 0;
+}
+
+static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+{
+ u32 val = (1 << (1 + (chan->idx * 16)));
+ dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+{
+ u32 val = 0xFFFF0000 >> (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static int mv_can_chain(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc_slot *chain_old_tail = list_entry(
+ desc->chain_node.prev, struct mv_xor_desc_slot, chain_node);
+
+ if (chain_old_tail->type != desc->type)
+ return 0;
+ if (desc->type == DMA_MEMSET)
+ return 0;
+
+ return 1;
+}
+
+static void mv_set_mode(struct mv_xor_chan *chan,
+ enum dma_transaction_type type)
+{
+ u32 op_mode;
+ u32 config = __raw_readl(XOR_CONFIG(chan));
+
+ switch (type) {
+ case DMA_XOR:
+ op_mode = XOR_OPERATION_MODE_XOR;
+ break;
+ case DMA_MEMCPY:
+ op_mode = XOR_OPERATION_MODE_MEMCPY;
+ break;
+ case DMA_MEMSET:
+ op_mode = XOR_OPERATION_MODE_MEMSET;
+ break;
+ default:
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error: unsupported operation %d.\n",
+ type);
+ BUG();
+ return;
+ }
+
+ config &= ~0x7;
+ config |= op_mode;
+ __raw_writel(config, XOR_CONFIG(chan));
+ chan->current_type = type;
+}
+
+static void mv_chan_activate(struct mv_xor_chan *chan)
+{
+ u32 activation;
+
+ dev_dbg(chan->device->common.dev, " activate chan.\n");
+ activation = __raw_readl(XOR_ACTIVATION(chan));
+ activation |= 0x1;
+ __raw_writel(activation, XOR_ACTIVATION(chan));
+}
+
+static char mv_chan_is_busy(struct mv_xor_chan *chan)
+{
+ u32 state = __raw_readl(XOR_ACTIVATION(chan));
+
+ state = (state >> 4) & 0x3;
+
+ return (state == 1) ? 1 : 0;
+}
+
+static int mv_chan_xor_slot_count(size_t len, int src_cnt)
+{
+ return 1;
+}
+
+/**
+ * mv_xor_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *slot)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
+ __func__, __LINE__, slot);
+
+ slot->slots_per_op = 0;
+
+}
+
+/*
+ * mv_xor_start_new_chain - program the engine to operate on new chain headed by
+ * sw_desc
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *sw_desc)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
+ __func__, __LINE__, sw_desc);
+ if (sw_desc->type != mv_chan->current_type)
+ mv_set_mode(mv_chan, sw_desc->type);
+
+ if (sw_desc->type == DMA_MEMSET) {
+ /* for memset requests we need to program the engine, no
+ * descriptors used.
+ */
+ struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
+ mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr);
+ mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
+ mv_chan_set_value(mv_chan, sw_desc->value);
+ } else {
+ /* set the hardware chain */
+ mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
+ }
+ mv_chan->pending += sw_desc->slot_cnt;
+ mv_xor_issue_pending(&mv_chan->common);
+}
+
+static dma_cookie_t
+mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+{
+ BUG_ON(desc->async_tx.cookie < 0);
+
+ if (desc->async_tx.cookie > 0) {
+ cookie = desc->async_tx.cookie;
+
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ if (desc->async_tx.callback)
+ desc->async_tx.callback(
+ desc->async_tx.callback_param);
+
+ /* unmap dma addresses
+ * (unmap_single vs unmap_page?)
+ */
+ if (desc->group_head && desc->unmap_len) {
+ struct mv_xor_desc_slot *unmap = desc->group_head;
+ struct device *dev =
+ &mv_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ u32 src_cnt = unmap->unmap_src_cnt;
+ dma_addr_t addr = mv_desc_get_dest_addr(unmap);
+
+ dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
+ while (src_cnt--) {
+ addr = mv_desc_get_src_addr(unmap, src_cnt);
+ dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+ }
+ desc->group_head = NULL;
+ }
+ }
+
+ /* run dependent operations */
+ async_tx_run_dependencies(&desc->async_tx);
+
+ return cookie;
+}
+
+static int
+mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+
+ if (async_tx_test_ack(&iter->async_tx)) {
+ list_del(&iter->completed_node);
+ mv_xor_free_slots(mv_chan, iter);
+ }
+ }
+ return 0;
+}
+
+static int
+mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n",
+ __func__, __LINE__, desc, desc->async_tx.flags);
+ list_del(&desc->chain_node);
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx)) {
+ /* move this slot to the completed_slots */
+ list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
+ return 0;
+ }
+
+ mv_xor_free_slots(mv_chan, desc);
+ return 0;
+}
+
+static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+ dma_cookie_t cookie = 0;
+ int busy = mv_chan_is_busy(mv_chan);
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ int seen_current = 0;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc);
+ mv_xor_clean_completed_slots(mv_chan);
+
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel, subsequent descriptors are either in
+ * process or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy
+ */
+ if (iter->async_tx.phys == current_desc) {
+ seen_current = 1;
+ if (busy)
+ break;
+ }
+
+ cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
+
+ if (mv_xor_clean_slot(iter, mv_chan))
+ break;
+ }
+
+ if ((busy == 0) && !list_empty(&mv_chan->chain)) {
+ struct mv_xor_desc_slot *chain_head;
+ chain_head = list_entry(mv_chan->chain.next,
+ struct mv_xor_desc_slot,
+ chain_node);
+
+ mv_xor_start_new_chain(mv_chan, chain_head);
+ }
+
+ if (cookie > 0)
+ mv_chan->completed_cookie = cookie;
+}
+
+static void
+mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ spin_lock_bh(&mv_chan->lock);
+ __mv_xor_slot_cleanup(mv_chan);
+ spin_unlock_bh(&mv_chan->lock);
+}
+
+static void mv_xor_tasklet(unsigned long data)
+{
+ struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
+ __mv_xor_slot_cleanup(chan);
+}
+
+static struct mv_xor_desc_slot *
+mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
+ int slots_per_op)
+{
+ struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
+ LIST_HEAD(chain);
+ int slots_found, retry = 0;
+
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+retry:
+ slots_found = 0;
+ if (retry == 0)
+ iter = mv_chan->last_used;
+ else
+ iter = list_entry(&mv_chan->all_slots,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ list_for_each_entry_safe_continue(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+ if (iter->slots_per_op) {
+ /* give up after finding the first busy slot
+ * on the second pass through the list
+ */
+ if (retry)
+ break;
+
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++)
+ alloc_start = iter;
+
+ if (slots_found == num_slots) {
+ struct mv_xor_desc_slot *alloc_tail = NULL;
+ struct mv_xor_desc_slot *last_used = NULL;
+ iter = alloc_start;
+ while (num_slots) {
+ int i;
+
+ /* pre-ack all but the last descriptor */
+ async_tx_ack(&iter->async_tx);
+
+ list_add_tail(&iter->chain_node, &chain);
+ alloc_tail = iter;
+ iter->async_tx.cookie = 0;
+ iter->slot_cnt = num_slots;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->slots_per_op = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ alloc_tail->group_head = alloc_start;
+ alloc_tail->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &alloc_tail->async_tx.tx_list);
+ mv_chan->last_used = last_used;
+ mv_desc_clear_next_desc(alloc_start);
+ mv_desc_clear_next_desc(alloc_tail);
+ return alloc_tail;
+ }
+ }
+ if (!retry++)
+ goto retry;
+
+ /* try to free some slots if the allocation fails */
+ tasklet_schedule(&mv_chan->irq_tasklet);
+
+ return NULL;
+}
+
+static dma_cookie_t
+mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *desc)
+{
+ dma_cookie_t cookie = mv_chan->common.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+ mv_chan->common.cookie = desc->async_tx.cookie = cookie;
+ return cookie;
+}
+
+/************************ DMA engine API functions ****************************/
+static dma_cookie_t
+mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
+ struct mv_xor_desc_slot *grp_start, *old_chain_tail;
+ dma_cookie_t cookie;
+ int new_hw_chain = 1;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p: async_tx %p\n",
+ __func__, sw_desc, &sw_desc->async_tx);
+
+ grp_start = sw_desc->group_head;
+
+ spin_lock_bh(&mv_chan->lock);
+ cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
+
+ if (list_empty(&mv_chan->chain))
+ list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
+ else {
+ new_hw_chain = 0;
+
+ old_chain_tail = list_entry(mv_chan->chain.prev,
+ struct mv_xor_desc_slot,
+ chain_node);
+ list_splice_init(&grp_start->async_tx.tx_list,
+ &old_chain_tail->chain_node);
+
+ if (!mv_can_chain(grp_start))
+ goto submit_done;
+
+ dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n",
+ old_chain_tail->async_tx.phys);
+
+ /* fix up the hardware chain */
+ mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
+
+ /* if the channel is not busy */
+ if (!mv_chan_is_busy(mv_chan)) {
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ /*
+ * and the curren desc is the end of the chain before
+ * the append, then we need to start the channel
+ */
+ if (current_desc == old_chain_tail->async_tx.phys)
+ new_hw_chain = 1;
+ }
+ }
+
+ if (new_hw_chain)
+ mv_xor_start_new_chain(mv_chan, grp_start);
+
+submit_done:
+ spin_unlock_bh(&mv_chan->lock);
+
+ return cookie;
+}
+
+/* returns the number of allocated descriptors */
+static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
+{
+ char *hw_desc;
+ int idx;
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *slot = NULL;
+ struct mv_xor_platform_data *plat_data =
+ mv_chan->device->pdev->dev.platform_data;
+ int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
+
+ /* Allocate descriptor slots */
+ idx = mv_chan->slots_allocated;
+ while (idx < num_descs_in_pool) {
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "MV XOR Channel only initialized"
+ " %d descriptor slots", idx);
+ break;
+ }
+ hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = mv_xor_tx_submit;
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->async_tx.tx_list);
+ hw_desc = (char *) mv_chan->device->dma_desc_pool;
+ slot->async_tx.phys =
+ (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+ slot->idx = idx++;
+
+ spin_lock_bh(&mv_chan->lock);
+ mv_chan->slots_allocated = idx;
+ list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+ spin_unlock_bh(&mv_chan->lock);
+ }
+
+ if (mv_chan->slots_allocated && !mv_chan->last_used)
+ mv_chan->last_used = list_entry(mv_chan->all_slots.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "allocated %d descriptor slots last_used: %p\n",
+ mv_chan->slots_allocated, mv_chan->last_used);
+
+ return (mv_chan->slots_allocated > 0) ? mv_chan->slots_allocated : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x src %x len: %u flags: %ld\n",
+ __func__, dest, src, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memcpy_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMCPY;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ mv_desc_set_src_addr(grp_start, 0, src);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p\n",
+ __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x len: %u flags: %ld\n",
+ __func__, dest, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memset_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMSET;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ mv_desc_set_block_fill_val(grp_start, value);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s src_cnt: %d len: dest %x %u flags: %ld\n",
+ __func__, src_cnt, len, dest, flags);
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_XOR;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ /* the byte count field is the same as in memcpy desc*/
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ while (src_cnt--)
+ mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]);
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void mv_xor_free_chan_resources(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ mv_xor_slot_cleanup(mv_chan);
+
+ spin_lock_bh(&mv_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+ in_use_descs++;
+ list_del(&iter->completed_node);
+ }
+ list_for_each_entry_safe_reverse(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ mv_chan->slots_allocated--;
+ }
+ mv_chan->last_used = NULL;
+
+ dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
+ __func__, mv_chan->slots_allocated);
+ spin_unlock_bh(&mv_chan->lock);
+
+ if (in_use_descs)
+ dev_err(mv_chan->device->common.dev,
+ "freeing %d in use descriptors!\n", in_use_descs);
+}
+
+/**
+ * mv_xor_is_complete - poll the status of an XOR transaction
+ * @chan: XOR channel handle
+ * @cookie: XOR transaction identifier
+ */
+static enum dma_status mv_xor_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+ mv_chan->is_complete_cookie = cookie;
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret == DMA_SUCCESS) {
+ mv_xor_clean_completed_slots(mv_chan);
+ return ret;
+ }
+ mv_xor_slot_cleanup(mv_chan);
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+{
+ u32 val;
+
+ val = __raw_readl(XOR_CONFIG(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "config 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ACTIVATION(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "activation 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_MASK(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr mask 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_ADDR(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error addr 0x%08x.\n", val);
+}
+
+static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
+ u32 intr_cause)
+{
+ if (intr_cause & (1 << 4)) {
+ dev_dbg(chan->device->common.dev,
+ "ignore this error\n");
+ return;
+ }
+
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error on chan %d. intr cause 0x%08x.\n",
+ chan->idx, intr_cause);
+
+ mv_dump_xor_regs(chan);
+ BUG();
+}
+
+static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
+{
+ struct mv_xor_chan *chan = data;
+ u32 intr_cause = mv_chan_get_intr_cause(chan);
+
+ dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause);
+
+ if (mv_is_err_intr(intr_cause))
+ mv_xor_err_interrupt_handler(chan, intr_cause);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ mv_xor_device_clear_eoc_cause(chan);
+
+ return IRQ_HANDLED;
+}
+
+static void mv_xor_issue_pending(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+
+ if (mv_chan->pending >= MV_XOR_THRESHOLD) {
+ mv_chan->pending = 0;
+ mv_chan_activate(mv_chan);
+ }
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define MV_XOR_TEST_SIZE 2000
+
+static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
+{
+ int i;
+ void *src, *dest;
+ dma_addr_t src_dma, dest_dma;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ struct dma_async_tx_descriptor *tx;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+
+ dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < MV_XOR_TEST_SIZE; i++)
+ ((u8 *) src)[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dest_dma = dma_map_single(dma_chan->device->dev, dest,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+
+ src_dma = dma_map_single(dma_chan->device->dev, src,
+ MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+ MV_XOR_TEST_SIZE, 0);
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(1);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+ if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+mv_xor_xor_self_test(struct mv_xor_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx])
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest)
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+
+ for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+ 0, PAGE_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
+
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(8);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor failed compare, disabling."
+ " index %d, data %x, expected %x\n", i,
+ ptr[i], cmp_word);
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ src_idx = MV_XOR_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int __devexit mv_xor_remove(struct platform_device *dev)
+{
+ struct mv_xor_device *device = platform_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+ struct mv_xor_chan *mv_chan;
+ struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
+
+ dma_async_device_unregister(&device->common);
+
+ dma_free_coherent(&dev->dev, plat_data->pool_size,
+ device->dma_desc_pool_virt, device->dma_desc_pool);
+
+ list_for_each_entry_safe(chan, _chan, &device->common.channels,
+ device_node) {
+ mv_chan = to_mv_xor_chan(chan);
+ list_del(&chan->device_node);
+ }
+
+ return 0;
+}
+
+static int __devinit mv_xor_probe(struct platform_device *pdev)
+{
+ int ret = 0;
+ int irq;
+ struct mv_xor_device *adev;
+ struct mv_xor_chan *mv_chan;
+ struct dma_device *dma_dev;
+ struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
+
+
+ adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+
+ dma_dev = &adev->common;
+
+ /* allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly flush the writes
+ */
+ adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+ plat_data->pool_size,
+ &adev->dma_desc_pool,
+ GFP_KERNEL);
+ if (!adev->dma_desc_pool_virt)
+ return -ENOMEM;
+
+ adev->id = plat_data->hw_id;
+
+ /* discover transaction capabilites from the platform data */
+ dma_dev->cap_mask = plat_data->cap_mask;
+ adev->pdev = pdev;
+ platform_set_drvdata(pdev, adev);
+
+ adev->shared = platform_get_drvdata(plat_data->shared);
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* set base routines */
+ dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
+ dma_dev->device_is_tx_complete = mv_xor_is_complete;
+ dma_dev->device_issue_pending = mv_xor_issue_pending;
+ dma_dev->dev = &pdev->dev;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
+ if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset;
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->max_xor = 8; ;
+ dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
+ }
+
+ mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
+ if (!mv_chan) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ mv_chan->device = adev;
+ mv_chan->idx = plat_data->hw_id;
+ mv_chan->mmr_base = adev->shared->xor_base;
+
+ if (!mv_chan->mmr_base) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
+ mv_chan);
+
+ /* clear errors before enabling interrupts */
+ mv_xor_device_clear_err_status(mv_chan);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto err_free_dma;
+ }
+ ret = devm_request_irq(&pdev->dev, irq,
+ mv_xor_interrupt_handler,
+ 0, dev_name(&pdev->dev), mv_chan);
+ if (ret)
+ goto err_free_dma;
+
+ mv_chan_unmask_interrupts(mv_chan);
+
+ mv_set_mode(mv_chan, DMA_MEMCPY);
+
+ spin_lock_init(&mv_chan->lock);
+ INIT_LIST_HEAD(&mv_chan->chain);
+ INIT_LIST_HEAD(&mv_chan->completed_slots);
+ INIT_LIST_HEAD(&mv_chan->all_slots);
+ INIT_RCU_HEAD(&mv_chan->common.rcu);
+ mv_chan->common.device = dma_dev;
+
+ list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = mv_xor_memcpy_self_test(adev);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ ret = mv_xor_xor_self_test(adev);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
+ "( %s%s%s%s)\n",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+ dma_async_device_register(dma_dev);
+ goto out;
+
+ err_free_dma:
+ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ out:
+ return ret;
+}
+
+static void
+mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
+ struct mbus_dram_target_info *dram)
+{
+ void __iomem *base = msp->xor_base;
+ u32 win_enable = 0;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ writel(0, base + WINDOW_BASE(i));
+ writel(0, base + WINDOW_SIZE(i));
+ if (i < 4)
+ writel(0, base + WINDOW_REMAP_HIGH(i));
+ }
+
+ for (i = 0; i < dram->num_cs; i++) {
+ struct mbus_dram_window *cs = dram->cs + i;
+
+ writel((cs->base & 0xffff0000) |
+ (cs->mbus_attr << 8) |
+ dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+ writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+ win_enable |= (1 << i);
+ win_enable |= 3 << (16 + (2 * i));
+ }
+
+ writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+ writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+}
+
+static struct platform_driver mv_xor_driver = {
+ .probe = mv_xor_probe,
+ .remove = mv_xor_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_NAME,
+ },
+};
+
+static int mv_xor_shared_probe(struct platform_device *pdev)
+{
+ struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data;
+ struct mv_xor_shared_private *msp;
+ struct resource *res;
+
+ dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n");
+
+ msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
+ if (!msp)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_base)
+ return -EBUSY;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_high_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_high_base)
+ return -EBUSY;
+
+ platform_set_drvdata(pdev, msp);
+
+ /*
+ * (Re-)program MBUS remapping windows if we are asked to.
+ */
+ if (msd != NULL && msd->dram != NULL)
+ mv_xor_conf_mbus_windows(msp, msd->dram);
+
+ return 0;
+}
+
+static int mv_xor_shared_remove(struct platform_device *pdev)
+{
+ return 0;
+}
+
+static struct platform_driver mv_xor_shared_driver = {
+ .probe = mv_xor_shared_probe,
+ .remove = mv_xor_shared_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_SHARED_NAME,
+ },
+};
+
+
+static int __init mv_xor_init(void)
+{
+ int rc;
+
+ rc = platform_driver_register(&mv_xor_shared_driver);
+ if (!rc) {
+ rc = platform_driver_register(&mv_xor_driver);
+ if (rc)
+ platform_driver_unregister(&mv_xor_shared_driver);
+ }
+ return rc;
+}
+module_init(mv_xor_init);
+
+/* it's currently unsafe to unload this module */
+#if 0
+static void __exit mv_xor_exit(void)
+{
+ platform_driver_unregister(&mv_xor_driver);
+ platform_driver_unregister(&mv_xor_shared_driver);
+ return;
+}
+
+module_exit(mv_xor_exit);
+#endif
+
+MODULE_AUTHOR("Saeed Bishara <[email protected]>");
+MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
new file mode 100644
index 0000000..06cafe1
--- /dev/null
+++ b/drivers/dma/mv_xor.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MV_XOR_H
+#define MV_XOR_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#define USE_TIMER
+#define MV_XOR_SLOT_SIZE 64
+#define MV_XOR_THRESHOLD 1
+
+#define XOR_OPERATION_MODE_XOR 0
+#define XOR_OPERATION_MODE_MEMCPY 2
+#define XOR_OPERATION_MODE_MEMSET 4
+
+#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0)
+#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4)
+
+#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4))
+#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4))
+#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30)
+#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40)
+#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50)
+#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60)
+#define XOR_INTR_MASK_VALUE 0x3F5
+
+#define WINDOW_BASE(w) (0x250 + ((w) << 2))
+#define WINDOW_SIZE(w) (0x270 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2))
+
+struct mv_xor_shared_private {
+ void __iomem *xor_base;
+ void __iomem *xor_high_base;
+};
+
+
+/**
+ * struct mv_xor_device - internal representation of a XOR device
+ * @pdev: Platform device
+ * @id: HW XOR Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct mv_xor_device {
+ struct platform_device *pdev;
+ int id;
+ dma_addr_t dma_desc_pool;
+ void *dma_desc_pool_virt;
+ struct dma_device common;
+ struct mv_xor_shared_private *shared;
+};
+
+/**
+ * struct mv_xor_chan - internal representation of a XOR channel
+ * @pending: allows batching of hardware operations
+ * @completed_cookie: identifier for the most recently completed operation
+ * @lock: serializes enqueue/dequeue operations to the descriptors pool
+ * @mmr_base: memory mapped register base
+ * @idx: the index of the xor channel
+ * @chain: device chain view of the descriptors
+ * @completed_slots: slots completed by HW but still need to be acked
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @last_used: place holder for allocation to continue from where it left off
+ * @all_slots: complete domain of slots usable by the channel
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ */
+struct mv_xor_chan {
+ int pending;
+ dma_cookie_t completed_cookie;
+ spinlock_t lock; /* protects the descriptor slot pool */
+ void __iomem *mmr_base;
+ unsigned int idx;
+ enum dma_transaction_type current_type;
+ struct list_head chain;
+ struct list_head completed_slots;
+ struct mv_xor_device *device;
+ struct dma_chan common;
+ struct mv_xor_desc_slot *last_used;
+ struct list_head all_slots;
+ int slots_allocated;
+ struct tasklet_struct irq_tasklet;
+#ifdef USE_TIMER
+ unsigned long cleanup_time;
+ u32 current_on_last_cleanup;
+ dma_cookie_t is_complete_cookie;
+#endif
+};
+
+/**
+ * struct mv_xor_desc_slot - software descriptor
+ * @slot_node: node on the mv_xor_chan.all_slots list
+ * @chain_node: node on the mv_xor_chan.chain list
+ * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @slots_per_op: number of slots per operation
+ * @idx: pool index
+ * @unmap_src_cnt: number of xor sources
+ * @unmap_len: transaction bytecount
+ * @async_tx: support for the async_tx api
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ * for example transfer lengths larger than the supported hw max
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct mv_xor_desc_slot {
+ struct list_head slot_node;
+ struct list_head chain_node;
+ struct list_head completed_node;
+ enum dma_transaction_type type;
+ void *hw_desc;
+ struct mv_xor_desc_slot *group_head;
+ u16 slot_cnt;
+ u16 slots_per_op;
+ u16 idx;
+ u16 unmap_src_cnt;
+ u32 value;
+ size_t unmap_len;
+ struct dma_async_tx_descriptor async_tx;
+ union {
+ u32 *xor_check_result;
+ u32 *crc32_result;
+ };
+#ifdef USE_TIMER
+ unsigned long arrival_time;
+ struct timer_list timeout;
+#endif
+};
+
+/* This structure describes XOR descriptor size 64bytes */
+struct mv_xor_desc {
+ u32 status; /* descriptor execution status */
+ u32 crc32_result; /* result of CRC-32 calculation */
+ u32 desc_command; /* type of operation to be carried out */
+ u32 phy_next_desc; /* next descriptor address pointer */
+ u32 byte_count; /* size of src/dst blocks in bytes */
+ u32 phy_dest_addr; /* destination block address */
+ u32 phy_src_addr[8]; /* source block addresses */
+ u32 reserved0;
+ u32 reserved1;
+};
+
+#define to_mv_sw_desc(addr_hw_desc) \
+ container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
+
+#define mv_hw_desc_slot_idx(hw_desc, idx) \
+ ((void *)(((unsigned long)hw_desc) + ((idx) << 5)))
+
+#define MV_XOR_MIN_BYTE_COUNT (128)
+#define XOR_MAX_BYTE_COUNT ((16 * 1024 * 1024) - 1)
+#define MV_XOR_MAX_BYTE_COUNT XOR_MAX_BYTE_COUNT
+
+
+#endif
diff --git a/include/asm-arm/plat-orion/mv_xor.h b/include/asm-arm/plat-orion/mv_xor.h
new file mode 100644
index 0000000..c349e8f
--- /dev/null
+++ b/include/asm-arm/plat-orion/mv_xor.h
@@ -0,0 +1,28 @@
+/*
+ * Marvell XOR platform device data definition file.
+ */
+
+#ifndef __ASM_PLAT_ORION_MV_XOR_H
+#define __ASM_PLAT_ORION_MV_XOR_H
+
+#include <linux/dmaengine.h>
+#include <linux/mbus.h>
+
+#define MV_XOR_SHARED_NAME "mv_xor_shared"
+#define MV_XOR_NAME "mv_xor"
+
+struct mbus_dram_target_info;
+
+struct mv_xor_platform_shared_data {
+ struct mbus_dram_target_info *dram;
+};
+
+struct mv_xor_platform_data {
+ struct platform_device *shared;
+ int hw_id;
+ dma_cap_mask_t cap_mask;
+ size_t pool_size;
+};
+
+
+#endif
--
1.5.2.5

2008-07-07 22:50:24

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCH/RFC] DMA engine driver for Marvell XOR engine


On Sun, 2008-07-06 at 04:23 -0700, saeed wrote:
>
> > The admittedly ugly do { } while () loop in iop-adma.c assumed that
> > num_descs_in_pool is always > 1, and guarantees that idx is equal to
> > the count of allocated descriptors. Since you changed it to a simple
> > while() loop then you should also replace idx with ->slots_allocated
> > in the rest of the routine i.e.:
> >
> > return mv_chan->slots_allocated ? : -ENOMEM;
> done.
> Here is the updated patch:

checkpatch said:
ERROR: code indent should use tabs where possible
#636: FILE: drivers/dma/mv_xor.c:602:
+ ^Iidx = mv_chan->slots_allocated;$

WARNING: line over 80 characters
#672: FILE: drivers/dma/mv_xor.c:638:
+ return (mv_chan->slots_allocated > 0) ? mv_chan->slots_allocated : -ENOMEM;


...so I made the following quick edits before applying:

diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 82f5631..f0c123c 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -599,7 +599,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;

/* Allocate descriptor slots */
- idx = mv_chan->slots_allocated;
+ idx = mv_chan->slots_allocated;
while (idx < num_descs_in_pool) {
slot = kzalloc(sizeof(*slot), GFP_KERNEL);
if (!slot) {
@@ -635,7 +635,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
"allocated %d descriptor slots last_used: %p\n",
mv_chan->slots_allocated, mv_chan->last_used);

- return (mv_chan->slots_allocated > 0) ? mv_chan->slots_allocated : -ENOMEM;
+ return mv_chan->slots_allocated ? : -ENOMEM;
}

static struct dma_async_tx_descriptor *

--
Dan