2010-09-23 22:12:09

by Tirumala Marri

[permalink] [raw]
Subject: [PATCH v1 3/4] PPC4xx: New file with SoC specific functions

From: Tirumala Marri <[email protected]>

This patch creates new file with SoC dependent functions.

Signed-off-by: Tirumala R Marri <[email protected]>
---
V1:
* Remove all 440SPe specific references.
* Move some of the code from header file to c file.
---
drivers/dma/ppc4xx/ppc4xx-adma.c | 1658 ++++++++++++++++++++++++++++++++++++++
1 files changed, 1658 insertions(+), 0 deletions(-)
create mode 100644 drivers/dma/ppc4xx/ppc4xx-adma.c

diff --git a/drivers/dma/ppc4xx/ppc4xx-adma.c b/drivers/dma/ppc4xx/ppc4xx-adma.c
new file mode 100644
index 0000000..5a5da23
--- /dev/null
+++ b/drivers/dma/ppc4xx/ppc4xx-adma.c
@@ -0,0 +1,1658 @@
+/*
+ * Copyright (C) 2006-2009 DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <[email protected]>
+ *
+ * Further porting to arch/powerpc by
+ * Anatolij Gustschin <[email protected]>
+ * Tirumala R Marri <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the AMCC PPC440SPe Processors.
+ * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ * ADMA driver written by D.Williams.
+ */
+
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <linux/async_tx.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include "adma.h"
+#if defined(CONFIG_440SPe) || defined(CONFIG_440SP)
+#include "ppc440spe-dma.h"
+#endif
+#include "ppc4xx-adma.h"
+
+/* This array is used in data-check operations for storing a pattern */
+static char ppc4xx_qword[16];
+static atomic_t ppc4xx_adma_err_irq_ref;
+static unsigned int ppc4xx_mq_dcr_len;
+
+/* These are used in enable & check routines
+ */
+static u32 ppc4xx_r6_enabled;
+static struct completion ppc4xx_r6_test_comp;
+
+static struct page *ppc4xx_rxor_srcs[32];
+
+static dcr_host_t ppc4xx_mq_dcr_host;
+/* Pointer to DMA0, DMA1 CP/CS FIFO */
+static void *ppc4xx_dma_fifo_buf;
+
+static char *ppc_adma_errors[] = {
+ [PPC_ADMA_INIT_OK] = "ok",
+ [PPC_ADMA_INIT_MEMRES] = "failed to get memory resource",
+ [PPC_ADMA_INIT_MEMREG] = "failed to request memory region",
+ [PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev "
+ "structure",
+ [PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for "
+ "hardware descriptors",
+ [PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel",
+ [PPC_ADMA_INIT_IRQ1] = "failed to request first irq",
+ [PPC_ADMA_INIT_IRQ2] = "failed to request second irq",
+ [PPC_ADMA_INIT_REGISTER] = "failed to register dma async device",
+};
+
+static void ppc4xx_adma_dma2rxor_set_mult(struct ppc4xx_adma_desc_slot *desc,
+ int index, u8 mult);
+static void print_cb_list(struct ppc4xx_adma_chan *chan,
+ struct ppc4xx_adma_desc_slot *iter);
+/**
+ * ppc4xx_can_rxor - check if the operands may be processed with RXOR
+ */
+static int ppc4xx_can_rxor(struct page **srcs, int src_cnt, size_t len)
+{
+ int i, order = 0, state = 0;
+ int idx = 0;
+
+ if (unlikely(!(src_cnt > 1)))
+ return 0;
+
+ BUG_ON(src_cnt > ARRAY_SIZE(ppc4xx_rxor_srcs));
+
+ /* Skip holes in the source list before checking */
+ for (i = 0; i < src_cnt; i++) {
+ if (!srcs[i])
+ continue;
+ ppc4xx_rxor_srcs[idx++] = srcs[i];
+ }
+ src_cnt = idx;
+
+ for (i = 1; i < src_cnt; i++) {
+ char *cur_addr = page_address(ppc4xx_rxor_srcs[i]);
+ char *old_addr = page_address(ppc4xx_rxor_srcs[i - 1]);
+
+ switch (state) {
+ case 0:
+ if (cur_addr == old_addr + len) {
+ /* direct RXOR */
+ order = 1;
+ state = 1;
+ } else if (old_addr == cur_addr + len) {
+ /* reverse RXOR */
+ order = -1;
+ state = 1;
+ } else
+ goto out;
+ break;
+ case 1:
+ if ((i == src_cnt - 2) ||
+ (order == -1 && cur_addr != old_addr - len)) {
+ order = 0;
+ state = 0;
+ } else if ((cur_addr == old_addr + len * order) ||
+ (cur_addr == old_addr + 2 * len) ||
+ (cur_addr == old_addr + 3 * len)) {
+ state = 2;
+ } else {
+ order = 0;
+ state = 0;
+ }
+ break;
+ case 2:
+ order = 0;
+ state = 0;
+ break;
+ }
+ }
+
+ out:
+ if (state == 1 || state == 2)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * ppc4xx_init_rxor_cursor -
+ */
+static void ppc4xx_init_rxor_cursor(struct ppc4xx_rxor *cursor)
+{
+ memset(cursor, 0, sizeof(struct ppc4xx_rxor));
+ cursor->state = 2;
+}
+
+/**
+ * ppc4xx_adma_init_dma2rxor_slot -
+ */
+static void ppc4xx_adma_init_dma2rxor_slot(struct ppc4xx_adma_desc_slot
+ *desc, dma_addr_t * src,
+ int src_cnt)
+{
+ int i;
+
+ /* initialize CDB */
+ for (i = 0; i < src_cnt; i++) {
+ ppc4xx_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i,
+ desc->src_cnt, (u32) src[i]);
+ }
+}
+
+/******************************************************************************
+ * Command (Descriptor) Blocks low-level routines
+ ******************************************************************************/
+/**
+ * ppc4xx_desc_set_rxor_block_size - set RXOR block size
+ */
+static void ppc4xx_desc_set_rxor_block_size(u32 byte_count)
+{
+ /* assume that byte_count is aligned on the 512-boundary;
+ * thus write it directly to the register (bits 23:31 are
+ * reserved there).
+ */
+ dcr_write(ppc4xx_mq_dcr_host, DCRN_MQ0_CF2H, byte_count);
+}
+
+
+static int ppc4xx_adma_estimate(struct dma_chan *chan,
+ enum dma_transaction_type cap,
+ struct page **dst_lst, int dst_cnt,
+ struct page **src_lst, int src_cnt,
+ size_t src_sz)
+{
+ int ef = 1;
+
+ if (cap == DMA_PQ || cap == DMA_PQ_VAL) {
+ /* If RAID-6 capabilities were not activated don't try
+ * to use them
+ */
+ if (unlikely(!ppc4xx_r6_enabled))
+ return -1;
+ }
+ /* In the current implementation of ppc4xx ADMA driver it
+ * makes sense to pick out only pq case, because it may be
+ * processed:
+ * (1) either using Biskup method on DMA2;
+ * (2) or on DMA0/1.
+ * Thus we give a favour to (1) if the sources are suitable;
+ * else let it be processed on one of the DMA0/1 engines.
+ * In the sum_product case where destination is also the
+ * source process it on DMA0/1 only.
+ */
+ if (cap == DMA_PQ && chan->chan_id == PPC4XX_XOR_ID) {
+
+ if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1])
+ ef = 0; /* sum_product case, process on DMA0/1 */
+ else if (ppc4xx_can_rxor(src_lst, src_cnt, src_sz))
+ ef = 3; /* override (DMA0/1 + idle) */
+ else
+ ef = 0; /* can't process on DMA2 if !rxor */
+ }
+
+ /* channel idleness increases the priority */
+ if (likely(ef) && !ppc4xx_chan_is_busy(to_ppc4xx_adma_chan(chan)))
+ ef++;
+
+ return ef;
+}
+
+struct dma_chan *ppc4xx_async_tx_find_best_channel(enum dma_transaction_type cap,
+ struct page **dst_lst,
+ int dst_cnt,
+ struct page **src_lst,
+ int src_cnt,
+ size_t src_sz)
+{
+ struct dma_chan *best_chan = NULL;
+ struct ppc_dma_chan_ref *ref;
+ int best_rank = -1;
+
+ if (unlikely(!src_sz))
+ return NULL;
+ if (src_sz > PAGE_SIZE) {
+ /*
+ * should a user of the api ever pass > PAGE_SIZE requests
+ * we sort out cases where temporary page-sized buffers
+ * are used.
+ */
+ switch (cap) {
+ case DMA_PQ:
+ if (src_cnt == 1 && dst_lst[1] == src_lst[0])
+ return NULL;
+ if (src_cnt == 2 && dst_lst[1] == src_lst[1])
+ return NULL;
+ break;
+ case DMA_PQ_VAL:
+ case DMA_XOR_VAL:
+ return NULL;
+ default:
+ break;
+ }
+ }
+
+ list_for_each_entry(ref, &ppc4xx_adma_chan_list, node) {
+ if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
+ int rank;
+
+ rank = ppc4xx_adma_estimate(ref->chan, cap, dst_lst,
+ dst_cnt, src_lst,
+ src_cnt, src_sz);
+ if (rank > best_rank) {
+ best_rank = rank;
+ best_chan = ref->chan;
+ }
+ }
+ }
+
+ return best_chan;
+}
+
+EXPORT_SYMBOL_GPL(ppc4xx_async_tx_find_best_channel);
+
+/**
+ * ppc4xx_dma01_prep_sum_product -
+ * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also
+ * the source.
+ */
+static struct ppc4xx_adma_desc_slot
+*ppc4xx_dma01_prep_sum_product(struct ppc4xx_adma_chan*ppc4xx_chan,
+ dma_addr_t * dst,
+ dma_addr_t * src,
+ int src_cnt,
+ const unsigned char *scf,
+ size_t len,
+ unsigned long flags)
+{
+ struct ppc4xx_adma_desc_slot *sw_desc = NULL;
+ unsigned long op = 0;
+ int slot_cnt;
+
+ set_bit(PPC4XX_DESC_WXOR, &op);
+ slot_cnt = 3;
+
+ spin_lock_bh(&ppc4xx_chan->lock);
+
+ /* WXOR, each descriptor occupies one slot */
+ sw_desc = ppc4xx_adma_alloc_slots(ppc4xx_chan, slot_cnt, 1);
+ if (sw_desc) {
+ struct ppc4xx_adma_chan *chan;
+ struct ppc4xx_adma_desc_slot *iter;
+ struct dma_cdb *hw_desc;
+
+ chan = to_ppc4xx_adma_chan(sw_desc->async_tx.chan);
+ set_bits(op, &sw_desc->flags);
+ sw_desc->src_cnt = src_cnt;
+ sw_desc->dst_cnt = 1;
+ /* 1st descriptor, src[1] data to q page and zero destination */
+ iter = list_first_entry(&sw_desc->group_list,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ clear_bit(PPC4XX_DESC_INT, &iter->flags);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+ ppc4xx_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+ *dst, 0);
+ ppc4xx_desc_set_dest_addr(iter, chan, 0, ppc4xx_chan->qdest, 1);
+ ppc4xx_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ src[1]);
+ ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len);
+ iter->unmap_len = len;
+
+ /* 2nd descriptor, multiply src[1] data and store the
+ * result in destination */
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ /* set 'next' pointer */
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC4XX_DESC_INT, &iter->flags);
+ else
+ clear_bit(PPC4XX_DESC_INT, &iter->flags);
+
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ ppc4xx_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ ppc4xx_chan->qdest);
+ ppc4xx_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+ *dst, 0);
+ ppc4xx_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+ DMA_CDB_SG_DST1, scf[1]);
+ ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len);
+ iter->unmap_len = len;
+
+ /*
+ * 3rd descriptor, multiply src[0] data and xor it
+ * with destination
+ */
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = NULL;
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC4XX_DESC_INT, &iter->flags);
+ else
+ clear_bit(PPC4XX_DESC_INT, &iter->flags);
+
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ ppc4xx_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ src[0]);
+ ppc4xx_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+ *dst, 0);
+ ppc4xx_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+ DMA_CDB_SG_DST1, scf[0]);
+ ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len);
+ iter->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+
+ spin_unlock_bh(&ppc4xx_chan->lock);
+
+ return sw_desc;
+}
+
+static
+struct ppc4xx_adma_desc_slot *ppc4xx_dma01_prep_pq(struct ppc4xx_adma_chan *ppc4xx_chan,
+ dma_addr_t *dst,
+ int dst_cnt,
+ dma_addr_t *src,
+ int src_cnt,
+ const unsigned char *scf,
+ size_t len,
+ unsigned long flags)
+{
+ int slot_cnt;
+ struct ppc4xx_adma_desc_slot *sw_desc = NULL, *iter;
+ unsigned long op = 0;
+ unsigned char mult = 1;
+
+ pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+ __func__, dst_cnt, src_cnt, len);
+ /* select operations WXOR/RXOR depending on the
+ * source addresses of operators and the number
+ * of destinations (RXOR support only Q-parity calculations)
+ */
+ set_bit(PPC4XX_DESC_WXOR, &op);
+ if (!test_and_set_bit(PPC4XX_RXOR_RUN, &ppc4xx_rxor_state)) {
+ /* no active RXOR;
+ * do RXOR if:
+ * - there are more than 1 source,
+ * - len is aligned on 512-byte boundary,
+ * - source addresses fit to one of 4 possible regions.
+ */
+ if (src_cnt > 1 &&
+ !(len & MQ0_CF2H_RXOR_BS_MASK) &&
+ (src[0] + len) == src[1]) {
+ /* may do RXOR R1 R2 */
+ set_bit(PPC4XX_DESC_RXOR, &op);
+ if (src_cnt != 2) {
+ /* may try to enhance region of RXOR */
+ if ((src[1] + len) == src[2]) {
+ /* do RXOR R1 R2 R3 */
+ set_bit(PPC4XX_DESC_RXOR123, &op);
+ } else if ((src[1] + len * 2) == src[2]) {
+ /* do RXOR R1 R2 R4 */
+ set_bit(PPC4XX_DESC_RXOR124, &op);
+ } else if ((src[1] + len * 3) == src[2]) {
+ /* do RXOR R1 R2 R5 */
+ set_bit(PPC4XX_DESC_RXOR125, &op);
+ } else {
+ /* do RXOR R1 R2 */
+ set_bit(PPC4XX_DESC_RXOR12, &op);
+ }
+ } else {
+ /* do RXOR R1 R2 */
+ set_bit(PPC4XX_DESC_RXOR12, &op);
+ }
+ }
+
+ if (!test_bit(PPC4XX_DESC_RXOR, &op)) {
+ /* can not do this operation with RXOR */
+ clear_bit(PPC4XX_RXOR_RUN, &ppc4xx_rxor_state);
+ } else {
+ /* can do; set block size right now */
+ ppc4xx_desc_set_rxor_block_size(len);
+ }
+ }
+
+ /* Number of necessary slots depends on operation type selected */
+ if (!test_bit(PPC4XX_DESC_RXOR, &op)) {
+ /* This is a WXOR only chain. Need descriptors for each
+ * source to GF-XOR them with WXOR, and need descriptors
+ * for each destination to zero them with WXOR
+ */
+ slot_cnt = src_cnt;
+
+ if (flags & DMA_PREP_ZERO_P) {
+ slot_cnt++;
+ set_bit(PPC4XX_ZERO_P, &op);
+ }
+ if (flags & DMA_PREP_ZERO_Q) {
+ slot_cnt++;
+ set_bit(PPC4XX_ZERO_Q, &op);
+ }
+ } else {
+ /* Need 1/2 descriptor for RXOR operation, and
+ * need (src_cnt - (2 or 3)) for WXOR of sources
+ * remained (if any)
+ */
+ slot_cnt = dst_cnt;
+
+ if (flags & DMA_PREP_ZERO_P)
+ set_bit(PPC4XX_ZERO_P, &op);
+ if (flags & DMA_PREP_ZERO_Q)
+ set_bit(PPC4XX_ZERO_Q, &op);
+
+ if (test_bit(PPC4XX_DESC_RXOR12, &op))
+ slot_cnt += src_cnt - 2;
+ else
+ slot_cnt += src_cnt - 3;
+
+ /* Thus we have either RXOR only chain or
+ * mixed RXOR/WXOR
+ */
+ if (slot_cnt == dst_cnt)
+ /* RXOR only chain */
+ clear_bit(PPC4XX_DESC_WXOR, &op);
+ }
+
+ spin_lock_bh(&ppc4xx_chan->lock);
+ /* for both RXOR/WXOR each descriptor occupies one slot */
+ sw_desc = ppc4xx_adma_alloc_slots(ppc4xx_chan, slot_cnt, 1);
+ if (sw_desc) {
+ ppc4xx_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt, flags, op);
+
+ /* setup dst/src/mult */
+ pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n",
+ __func__, dst[0], dst[1]);
+ ppc4xx_adma_pq_set_dest(sw_desc, dst, flags);
+ while (src_cnt--) {
+ ppc4xx_adma_pq_set_src(sw_desc, src[src_cnt], src_cnt);
+
+ /* NOTE: "Multi = 0 is equivalent to = 1" as it
+ * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf
+ * doesn't work for RXOR with DMA0/1! Instead, multi=0
+ * leads to zeroing source data after RXOR.
+ * So, for P case set-up mult=1 explicitly.
+ */
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ mult = scf[src_cnt];
+ ppc4xx_adma_pq_set_src_mult(sw_desc,
+ mult, src_cnt, dst_cnt - 1);
+ }
+
+ /* Setup byte count foreach slot just allocated */
+ sw_desc->async_tx.flags = flags;
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len);
+ iter->unmap_len = len;
+ }
+ }
+ spin_unlock_bh(&ppc4xx_chan->lock);
+
+ return sw_desc;
+}
+
+/**
+ * ppc4xx_adma_prep_dma_pqzero_sum - prepare CDB group for
+ * a PQ_ZERO_SUM operation
+ */
+struct dma_async_tx_descriptor
+*ppc4xx_adma_prep_dma_pqzero_sum(struct dma_chan *chan,
+ dma_addr_t * pq,
+ dma_addr_t * src,
+ unsigned int src_cnt,
+ const unsigned char *scf,
+ size_t len,
+ enum sum_check_flags *pqres,
+ unsigned long flags)
+{
+ struct ppc4xx_adma_chan *ppc4xx_chan;
+ struct ppc4xx_adma_desc_slot *sw_desc, *iter;
+ dma_addr_t pdest, qdest;
+ int slot_cnt, slots_per_op, idst, dst_cnt;
+
+ ppc4xx_chan = to_ppc4xx_adma_chan(chan);
+
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ pdest = 0;
+ else
+ pdest = pq[0];
+
+ if (flags & DMA_PREP_PQ_DISABLE_Q)
+ qdest = 0;
+ else
+ qdest = pq[1];
+
+ ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc4xx_chan->device->id,
+ src, src_cnt, scf));
+
+ /* Always use WXOR for P/Q calculations (two destinations).
+ * Need 1 or 2 extra slots to verify results are zero.
+ */
+ idst = dst_cnt = (pdest && qdest) ? 2 : 1;
+
+ /* One additional slot per destination to clone P/Q
+ * before calculation (we have to preserve destinations).
+ */
+ slot_cnt = src_cnt + dst_cnt * 2;
+ slots_per_op = 1;
+
+ spin_lock_bh(&ppc4xx_chan->lock);
+ sw_desc = ppc4xx_adma_alloc_slots(ppc4xx_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ ppc4xx_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt);
+
+ /* Setup byte count for each slot just allocated */
+ sw_desc->async_tx.flags = flags;
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len);
+ iter->unmap_len = len;
+ }
+
+ if (pdest) {
+ struct dma_cdb *hw_desc;
+ struct ppc4xx_adma_chan *chan;
+
+ iter = sw_desc->group_head;
+ chan = to_ppc4xx_adma_chan(iter->async_tx.chan);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ iter->src_cnt = 0;
+ iter->dst_cnt = 0;
+ ppc4xx_desc_set_dest_addr(iter, chan, 0,
+ ppc4xx_chan->pdest, 0);
+ ppc4xx_desc_set_src_addr(iter, chan, 0, 0, pdest);
+ ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len);
+ iter->unmap_len = 0;
+ /* override pdest to preserve original P */
+ pdest = ppc4xx_chan->pdest;
+ }
+ if (qdest) {
+ struct dma_cdb *hw_desc;
+ struct ppc4xx_adma_chan *chan;
+
+ iter = list_first_entry(&sw_desc->group_list,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ chan = to_ppc4xx_adma_chan(iter->async_tx.chan);
+
+ if (pdest) {
+ iter = list_entry(iter->chain_node.next,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ }
+
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ iter->src_cnt = 0;
+ iter->dst_cnt = 0;
+ ppc4xx_desc_set_dest_addr(iter, chan, 0,
+ ppc4xx_chan->qdest, 0);
+ ppc4xx_desc_set_src_addr(iter, chan, 0, 0, qdest);
+ ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len);
+ iter->unmap_len = 0;
+ /* override qdest to preserve original Q */
+ qdest = ppc4xx_chan->qdest;
+ }
+
+ /* Setup destinations for P/Q ops */
+ ppc4xx_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest);
+
+ /* Setup zero QWORDs into DCHECK CDBs */
+ idst = dst_cnt;
+ list_for_each_entry_reverse(iter, &sw_desc->group_list,
+ chain_node) {
+ /*
+ * The last CDB corresponds to Q-parity check,
+ * the one before last CDB corresponds
+ * P-parity check
+ */
+ if (idst == DMA_DEST_MAX_NUM) {
+ if (idst == dst_cnt) {
+ set_bit(PPC4XX_DESC_QCHECK,
+ &iter->flags);
+ } else {
+ set_bit(PPC4XX_DESC_PCHECK,
+ &iter->flags);
+ }
+ } else {
+ if (qdest) {
+ set_bit(PPC4XX_DESC_QCHECK,
+ &iter->flags);
+ } else {
+ set_bit(PPC4XX_DESC_PCHECK,
+ &iter->flags);
+ }
+ }
+ iter->xor_check_result = pqres;
+
+ /*
+ * set it to zero, if check fail then result will
+ * be updated
+ */
+ *iter->xor_check_result = 0;
+ ppc4xx_desc_set_dcheck(iter, ppc4xx_chan, ppc4xx_qword);
+
+ if (!(--dst_cnt))
+ break;
+ }
+
+ /* Setup sources and mults for P/Q ops */
+ list_for_each_entry_continue_reverse(iter, &sw_desc->group_list,
+ chain_node) {
+ struct ppc4xx_adma_chan *chan;
+ u32 mult_dst;
+
+ chan = to_ppc4xx_adma_chan(iter->async_tx.chan);
+ ppc4xx_desc_set_src_addr(iter, chan, 0,
+ DMA_CUED_XOR_HB,
+ src[src_cnt - 1]);
+ if (qdest) {
+ mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 :
+ DMA_CDB_SG_DST1;
+ ppc4xx_desc_set_src_mult(iter, chan,
+ DMA_CUED_MULT1_OFF,
+ mult_dst,
+ scf[src_cnt - 1]);
+ }
+ if (!(--src_cnt))
+ break;
+ }
+ }
+ spin_unlock_bh(&ppc4xx_chan->lock);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc4xx_adma_prep_dma_xor_zero_sum - prepare CDB group for
+ * XOR ZERO_SUM operation
+ */
+struct dma_async_tx_descriptor
+*ppc4xx_adma_prep_dma_xor_zero_sum(struct dma_chan *chan,
+ dma_addr_t * src,
+ unsigned int src_cnt,
+ size_t len,
+ enum sum_check_flags *result,
+ unsigned long flags)
+{
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t pq[2];
+
+ /* validate P, disable Q */
+ pq[0] = src[0];
+ pq[1] = 0;
+ flags |= DMA_PREP_PQ_DISABLE_Q;
+
+ tx = ppc4xx_adma_prep_dma_pqzero_sum(chan, pq, &src[1],
+ src_cnt - 1, 0, len,
+ result, flags);
+ return tx;
+}
+
+void ppc4xx_adma_set_capabilities(struct ppc4xx_adma_device *adev)
+{
+ switch (adev->id) {
+ case PPC4XX_DMA0_ID:
+ case PPC4XX_DMA1_ID:
+ dma_cap_set(DMA_MEMCPY, adev->common.cap_mask);
+ dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+ dma_cap_set(DMA_MEMSET, adev->common.cap_mask);
+ dma_cap_set(DMA_PQ, adev->common.cap_mask);
+ dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask);
+ dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask);
+ break;
+ case PPC4XX_XOR_ID:
+ dma_cap_set(DMA_XOR, adev->common.cap_mask);
+ dma_cap_set(DMA_PQ, adev->common.cap_mask);
+ dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+ adev->common.cap_mask = adev->common.cap_mask;
+ break;
+ }
+
+ if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) {
+ switch (adev->id) {
+ case PPC4XX_DMA0_ID:
+ dma_set_maxpq(&adev->common,
+ DMA0_FIFO_SIZE / sizeof(struct dma_cdb),
+ 0);
+ break;
+ case PPC4XX_DMA1_ID:
+ dma_set_maxpq(&adev->common,
+ DMA1_FIFO_SIZE / sizeof(struct dma_cdb),
+ 0);
+ break;
+ case PPC4XX_XOR_ID:
+ adev->common.max_pq = XOR_MAX_OPS * 3;
+ break;
+ }
+ }
+ if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) {
+ switch (adev->id) {
+ case PPC4XX_DMA0_ID:
+ adev->common.max_pq = DMA0_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ case PPC4XX_DMA1_ID:
+ adev->common.max_pq = DMA1_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ }
+ }
+ if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) {
+ switch (adev->id) {
+ case PPC4XX_DMA0_ID:
+ adev->common.max_xor = DMA0_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ case PPC4XX_DMA1_ID:
+ adev->common.max_xor = DMA1_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ }
+ }
+ pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: "
+ "( %s%s%s%s%s%s%s)\n",
+ dev_name(adev->dev),
+ dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "",
+ dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "",
+ dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_XOR_VAL,
+ adev->common.cap_mask) ? "xor_val " : "",
+ dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "",
+ dma_has_cap(DMA_MEMSET, adev->common.cap_mask) ? "memset " : "",
+ dma_has_cap(DMA_INTERRUPT,
+ adev->common.cap_mask) ? "intr " : "");
+}
+static struct ppc4xx_adma_desc_slot
+*ppc4xx_dma2_prep_pq(struct ppc4xx_adma_chan *ppc4xx_chan,
+ dma_addr_t * dst,
+ int dst_cnt,
+ dma_addr_t * src,
+ int src_cnt,
+ const unsigned char *scf,
+ size_t len,
+ unsigned long flags)
+{
+ int slot_cnt, descs_per_op;
+ struct ppc4xx_adma_desc_slot *sw_desc = NULL, *iter;
+ unsigned long op = 0;
+ unsigned char mult = 1;
+
+ BUG_ON(!dst_cnt);
+ /*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+ __func__, dst_cnt, src_cnt, len); */
+
+ spin_lock_bh(&ppc4xx_chan->lock);
+ descs_per_op = ppc4xx_dma2_pq_slot_count(src, src_cnt, len);
+ if (descs_per_op < 0) {
+ spin_unlock_bh(&ppc4xx_chan->lock);
+ return NULL;
+ }
+
+ /* depending on number of sources we have 1 or 2 RXOR chains */
+ slot_cnt = descs_per_op * dst_cnt;
+
+ sw_desc = ppc4xx_adma_alloc_slots(ppc4xx_chan, slot_cnt, 1);
+ if (sw_desc) {
+ op = slot_cnt;
+ sw_desc->async_tx.flags = flags;
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ ppc4xx_desc_init_dma2pq(iter, dst_cnt, src_cnt,
+ --op ? 0 : flags);
+ ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len);
+ iter->unmap_len = len;
+
+ ppc4xx_init_rxor_cursor(&(iter->rxor_cursor));
+ iter->rxor_cursor.len = len;
+ iter->descs_per_op = descs_per_op;
+ }
+ op = 0;
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ op++;
+ if (op % descs_per_op == 0)
+ ppc4xx_adma_init_dma2rxor_slot(iter, src,
+ src_cnt);
+ if (likely(!list_is_last(&iter->chain_node,
+ &sw_desc->group_list))) {
+ /* set 'next' pointer */
+ iter->hw_next =
+ list_entry(iter->chain_node.next,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ ppc4xx_xor_set_link(iter, iter->hw_next);
+ } else {
+ /* this is the last descriptor. */
+ iter->hw_next = NULL;
+ }
+ }
+
+ /* fixup head descriptor */
+ sw_desc->dst_cnt = dst_cnt;
+ if (flags & DMA_PREP_ZERO_P)
+ set_bit(PPC4XX_ZERO_P, &sw_desc->flags);
+ if (flags & DMA_PREP_ZERO_Q)
+ set_bit(PPC4XX_ZERO_Q, &sw_desc->flags);
+
+ /* setup dst/src/mult */
+ ppc4xx_adma_pq_set_dest(sw_desc, dst, flags);
+
+ while (src_cnt--) {
+ /* handle descriptors (if dst_cnt == 2) inside
+ * the ppc4xx_adma_pq_set_srcxxx() functions
+ */
+ ppc4xx_adma_pq_set_src(sw_desc, src[src_cnt], src_cnt);
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ mult = scf[src_cnt];
+ ppc4xx_adma_pq_set_src_mult(sw_desc,
+ mult, src_cnt, dst_cnt - 1);
+ }
+ }
+ spin_unlock_bh(&ppc4xx_chan->lock);
+ ppc4xx_desc_set_rxor_block_size(len);
+ return sw_desc;
+}
+
+/**
+ * ppc4xx_dma01_prep_mult -
+ * for Q operation where destination is also the source
+ */
+static struct ppc4xx_adma_desc_slot
+*ppc4xx_dma01_prep_mult(struct ppc4xx_adma_chan *ppc4xx_chan,
+ dma_addr_t * dst,
+ int dst_cnt,
+ dma_addr_t * src,
+ int src_cnt,
+ const unsigned char *scf,
+ size_t len,
+ unsigned long flags)
+{
+ struct ppc4xx_adma_desc_slot *sw_desc = NULL;
+ unsigned long op = 0;
+ int slot_cnt;
+
+ set_bit(PPC4XX_DESC_WXOR, &op);
+ slot_cnt = 2;
+
+ spin_lock_bh(&ppc4xx_chan->lock);
+
+ /* use WXOR, each descriptor occupies one slot */
+ sw_desc = ppc4xx_adma_alloc_slots(ppc4xx_chan, slot_cnt, 1);
+ if (sw_desc) {
+ struct ppc4xx_adma_chan *chan;
+ struct ppc4xx_adma_desc_slot *iter;
+ struct dma_cdb *hw_desc;
+
+ chan = to_ppc4xx_adma_chan(sw_desc->async_tx.chan);
+ set_bits(op, &sw_desc->flags);
+ sw_desc->src_cnt = src_cnt;
+ sw_desc->dst_cnt = dst_cnt;
+ /* First descriptor, zero data in the destination and copy it
+ * to q page using MULTICAST transfer.
+ */
+ iter = list_first_entry(&sw_desc->group_list,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ /* set 'next' pointer */
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ clear_bit(PPC4XX_DESC_INT, &iter->flags);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+ ppc4xx_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, dst[0], 0);
+ ppc4xx_desc_set_dest_addr(iter, chan, 0, dst[1], 1);
+ ppc4xx_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ src[0]);
+ ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len);
+ iter->unmap_len = len;
+
+ /*
+ * Second descriptor, multiply data from the q page
+ * and store the result in real destination.
+ */
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc4xx_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = NULL;
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC4XX_DESC_INT, &iter->flags);
+ else
+ clear_bit(PPC4XX_DESC_INT, &iter->flags);
+
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ ppc4xx_desc_set_src_addr(iter, chan, 0,
+ DMA_CUED_XOR_HB, dst[1]);
+ ppc4xx_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, dst[0], 0);
+
+ ppc4xx_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+ DMA_CDB_SG_DST1, scf[0]);
+ ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len);
+ iter->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+
+ spin_unlock_bh(&ppc4xx_chan->lock);
+
+ return sw_desc;
+}
+
+/**
+ * ppc4xx_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation
+ */
+struct dma_async_tx_descriptor *ppc4xx_adma_prep_dma_pq(struct dma_chan
+ *chan,
+ dma_addr_t * dst,
+ dma_addr_t * src,
+ unsigned int
+ src_cnt, const unsigned
+ char *scf,
+ size_t len,
+ unsigned long flags)
+{
+ struct ppc4xx_adma_chan *ppc4xx_chan;
+ struct ppc4xx_adma_desc_slot *sw_desc = NULL;
+ int dst_cnt = 0;
+
+ ppc4xx_chan = to_ppc4xx_adma_chan(chan);
+
+ BUG_ON(!len);
+ BUG_ON(unlikely(len > PPC4XX_ADMA_XOR_MAX_BYTE_COUNT));
+ BUG_ON(!src_cnt);
+
+ if (src_cnt == 1 && dst[1] == src[0]) {
+ dma_addr_t dest[2];
+
+ /* dst[1] is real destination (Q) */
+ dest[0] = dst[1];
+ /* this is the page to multicast source data to */
+ dest[1] = ppc4xx_chan->qdest;
+ sw_desc = ppc4xx_dma01_prep_mult(ppc4xx_chan,
+ dest, 2, src, src_cnt, scf,
+ len, flags);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+ }
+
+ if (src_cnt == 2 && dst[1] == src[1]) {
+ sw_desc = ppc4xx_dma01_prep_sum_product(ppc4xx_chan,
+ &dst[1], src, 2, scf,
+ len, flags);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+ }
+
+ if (!(flags & DMA_PREP_PQ_DISABLE_P)) {
+ BUG_ON(!dst[0]);
+ dst_cnt++;
+ flags |= DMA_PREP_ZERO_P;
+ }
+
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q)) {
+ BUG_ON(!dst[1]);
+ dst_cnt++;
+ flags |= DMA_PREP_ZERO_Q;
+ }
+
+ BUG_ON(!dst_cnt);
+
+ dev_dbg(ppc4xx_chan->device->common.dev,
+ "ppc4xx adma%d: %s src_cnt: %d len: %u int_en: %d\n",
+ ppc4xx_chan->device->id, __func__, src_cnt, len,
+ flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+ switch (ppc4xx_chan->device->id) {
+ case PPC4XX_DMA0_ID:
+ case PPC4XX_DMA1_ID:
+ sw_desc = ppc4xx_dma01_prep_pq(ppc4xx_chan,
+ dst, dst_cnt, src, src_cnt,
+ scf, len, flags);
+ break;
+
+ case PPC4XX_XOR_ID:
+ sw_desc = ppc4xx_dma2_prep_pq(ppc4xx_chan,
+ dst, dst_cnt, src, src_cnt,
+ scf, len, flags);
+ break;
+ }
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+int ppc4xx_adma_setup_irqs(struct ppc4xx_adma_device *adev,
+ struct ppc4xx_adma_chan *chan, int *initcode)
+{
+ struct platform_device *ofdev;
+ struct device_node *np;
+ int ret;
+
+ ofdev = container_of(adev->dev, struct platform_device, dev);
+ np = ofdev->dev.of_node;
+ if (adev->id != PPC4XX_XOR_ID) {
+ adev->err_irq = irq_of_parse_and_map(np, 1);
+ if (adev->err_irq == NO_IRQ) {
+ dev_warn(adev->dev, "no err irq resource?\n");
+ *initcode = PPC_ADMA_INIT_IRQ2;
+ adev->err_irq = -ENXIO;
+ } else
+ atomic_inc(&ppc4xx_adma_err_irq_ref);
+ } else {
+ adev->err_irq = -ENXIO;
+ }
+
+ adev->irq = irq_of_parse_and_map(np, 0);
+ if (adev->irq == NO_IRQ) {
+ dev_err(adev->dev, "no irq resource\n");
+ *initcode = PPC_ADMA_INIT_IRQ1;
+ ret = -ENXIO;
+ goto err_irq_map;
+ }
+ dev_dbg(adev->dev, "irq %d, err irq %d\n", adev->irq, adev->err_irq);
+
+ ret = request_irq(adev->irq, ppc4xx_adma_eot_handler,
+ 0, dev_driver_string(adev->dev), chan);
+ if (ret) {
+ dev_err(adev->dev, "can't request irq %d\n", adev->irq);
+ *initcode = PPC_ADMA_INIT_IRQ1;
+ ret = -EIO;
+ goto err_req1;
+ }
+
+ /* only DMA engines have a separate error IRQ
+ * so it's Ok if err_irq < 0 in XOR engine case.
+ */
+ if (adev->err_irq > 0) {
+ /* both DMA engines share common error IRQ */
+ ret = request_irq(adev->err_irq,
+ ppc4xx_adma_err_handler,
+ IRQF_SHARED,
+ dev_driver_string(adev->dev), chan);
+ if (ret) {
+ dev_err(adev->dev, "can't request irq %d\n",
+ adev->err_irq);
+ *initcode = PPC_ADMA_INIT_IRQ2;
+ ret = -EIO;
+ goto err_req2;
+ }
+ }
+
+ if (adev->id == PPC4XX_XOR_ID) {
+ /* enable XOR engine interrupts */
+ iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+ XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT,
+ &adev->xor_reg->ier);
+ } else {
+ u32 mask, enable;
+
+#if defined(CONFIG_440SPe) || defined(CONFIG_440SP)
+ np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+#endif
+ if (!np) {
+ pr_err("%s: can't find I2O device tree node\n",
+ __func__);
+ ret = -ENODEV;
+ goto err_req2;
+ }
+ adev->i2o_reg = of_iomap(np, 0);
+ if (!adev->i2o_reg) {
+ pr_err("%s: failed to map I2O registers\n", __func__);
+ of_node_put(np);
+ ret = -EINVAL;
+ goto err_req2;
+ }
+ of_node_put(np);
+ /* Unmask 'CS FIFO Attention' interrupts and
+ * enable generating interrupts on errors
+ */
+ enable = (adev->id == PPC4XX_DMA0_ID) ?
+ ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+ ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+ mask = ioread32(&adev->i2o_reg->iopim) & enable;
+ iowrite32(mask, &adev->i2o_reg->iopim);
+ }
+ return 0;
+
+ err_req2:
+ free_irq(adev->irq, chan);
+ err_req1:
+ irq_dispose_mapping(adev->irq);
+ err_irq_map:
+ if (adev->err_irq > 0) {
+ if (atomic_dec_and_test(&ppc4xx_adma_err_irq_ref))
+ irq_dispose_mapping(adev->err_irq);
+ }
+ return ret;
+}
+
+void ppc4xx_adma_release_irqs(struct ppc4xx_adma_device *adev,
+ struct ppc4xx_adma_chan *chan)
+{
+ u32 mask, disable;
+
+ if (adev->id == PPC4XX_XOR_ID) {
+ /* disable XOR engine interrupts */
+ mask = ioread32be(&adev->xor_reg->ier);
+ mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+ XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT);
+ iowrite32be(mask, &adev->xor_reg->ier);
+ } else {
+ /* disable DMAx engine interrupts */
+ disable = (adev->id == PPC4XX_DMA0_ID) ?
+ (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+ (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+ mask = ioread32(&adev->i2o_reg->iopim) | disable;
+ iowrite32(mask, &adev->i2o_reg->iopim);
+ }
+ free_irq(adev->irq, chan);
+ irq_dispose_mapping(adev->irq);
+ if (adev->err_irq > 0) {
+ free_irq(adev->err_irq, chan);
+ if (atomic_dec_and_test(&ppc4xx_adma_err_irq_ref)) {
+ irq_dispose_mapping(adev->err_irq);
+ iounmap(adev->i2o_reg);
+ }
+ }
+}
+
+/*
+ * Common initialisation for RAID engines; allocate memory for
+ * DMAx FIFOs, perform configuration common for all DMA engines.
+ * Further DMA engine specific configuration is done at probe time.
+ */
+static int ppc4xx_configure_raid_devices(void)
+{
+ struct device_node *np;
+ struct resource i2o_res;
+ struct i2o_regs __iomem *i2o_reg;
+ dcr_host_t i2o_dcr_host;
+ unsigned int dcr_base, dcr_len;
+ int i, ret;
+
+#if defined(CONFIG_440SPe) || defined(CONFIG_440SP)
+ np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+#endif
+ if (!np) {
+ pr_err("%s: can't find I2O device tree node\n", __func__);
+ return -ENODEV;
+ }
+
+ if (of_address_to_resource(np, 0, &i2o_res)) {
+ of_node_put(np);
+ return -EINVAL;
+ }
+
+ i2o_reg = of_iomap(np, 0);
+ if (!i2o_reg) {
+ pr_err("%s: failed to map I2O registers\n", __func__);
+ of_node_put(np);
+ return -EINVAL;
+ }
+
+ /* Get I2O DCRs base */
+ dcr_base = dcr_resource_start(np, 0);
+ dcr_len = dcr_resource_len(np, 0);
+ if (!dcr_base && !dcr_len) {
+ pr_err("%s: can't get DCR registers base/len!\n",
+ np->full_name);
+ of_node_put(np);
+ iounmap(i2o_reg);
+ return -ENODEV;
+ }
+
+ i2o_dcr_host = dcr_map(np, dcr_base, dcr_len);
+ if (!DCR_MAP_OK(i2o_dcr_host)) {
+ pr_err("%s: failed to map DCRs!\n", np->full_name);
+ of_node_put(np);
+ iounmap(i2o_reg);
+ return -ENODEV;
+ }
+ of_node_put(np);
+
+ /* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share
+ * the base address of FIFO memory space.
+ * Actually we need twice more physical memory than programmed in the
+ * <fsiz> register (because there are two FIFOs for each DMA: CP and CS)
+ */
+ ppc4xx_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1,
+ GFP_KERNEL);
+ if (!ppc4xx_dma_fifo_buf) {
+ pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__);
+ iounmap(i2o_reg);
+ dcr_unmap(i2o_dcr_host, dcr_len);
+ return -ENOMEM;
+ }
+
+ /*
+ * Configure h/w
+ */
+ /* Reset I2O/DMA */
+ mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA);
+ mtdcri(SDR0, DCRN_SDR0_SRST, 0);
+
+ /* Setup the base address of mmaped registers */
+ dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32) (i2o_res.start >> 32));
+ dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32) (i2o_res.start) |
+ I2O_REG_ENABLE);
+ dcr_unmap(i2o_dcr_host, dcr_len);
+
+ /* Setup FIFO memory space base address */
+ iowrite32(0, &i2o_reg->ifbah);
+ iowrite32(((u32) __pa(ppc4xx_dma_fifo_buf)), &i2o_reg->ifbal);
+
+ /* set zero FIFO size for I2O, so the whole
+ * ppc4xx_dma_fifo_buf is used by DMAs.
+ * DMAx_FIFOs will be configured while probe.
+ */
+ iowrite32(0, &i2o_reg->ifsiz);
+ iounmap(i2o_reg);
+
+ /* To prepare WXOR/RXOR functionality we need access to
+ * Memory Queue Module DCRs (finally it will be enabled
+ * via /sys interface of the ppc4xx ADMA driver).
+ */
+#if defined(CONFIG_440SPe) || defined(CONFIG_440SP)
+ np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe");
+#endif
+ if (!np) {
+ pr_err("%s: can't find MQ device tree node\n", __func__);
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ /* Get MQ DCRs base */
+ dcr_base = dcr_resource_start(np, 0);
+ dcr_len = dcr_resource_len(np, 0);
+ if (!dcr_base && !dcr_len) {
+ pr_err("%s: can't get DCR registers base/len!\n",
+ np->full_name);
+ ret = -ENODEV;
+ goto out_mq;
+ }
+
+ ppc4xx_mq_dcr_host = dcr_map(np, dcr_base, dcr_len);
+ if (!DCR_MAP_OK(ppc4xx_mq_dcr_host)) {
+ pr_err("%s: failed to map DCRs!\n", np->full_name);
+ ret = -ENODEV;
+ goto out_mq;
+ }
+ of_node_put(np);
+ ppc4xx_mq_dcr_len = dcr_len;
+
+ /* Set HB alias */
+ dcr_write(ppc4xx_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB);
+
+ /* Set:
+ * - LL transaction passing limit to 1;
+ * - Memory controller cycle limit to 1;
+ * - Galois Polynomial to 0x14d (default)
+ */
+ dcr_write(ppc4xx_mq_dcr_host, DCRN_MQ0_CFBHL,
+ (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) |
+ (PPC4XX_DEFAULT_POLY << MQ0_CFBHL_POLY));
+
+ atomic_set(&ppc4xx_adma_err_irq_ref, 0);
+ for (i = 0; i < PPC4XX_ADMA_ENGINES_NUM; i++)
+ ppc4xx_adma_devices[i] = -1;
+
+ return 0;
+
+ out_mq:
+ of_node_put(np);
+ out_free:
+ kfree(ppc4xx_dma_fifo_buf);
+ return ret;
+}
+
+/**
+ * ppc4xx_test_callback - called when test operation has been done
+ */
+static void ppc4xx_test_callback(void *unused)
+{
+ complete(&ppc4xx_r6_test_comp);
+}
+
+/**
+ * ppc4xx_test_raid6 - test are RAID-6 capabilities enabled successfully.
+ * For this we just perform one WXOR operation with the same source
+ * and destination addresses, the GF-multiplier is 1; so if RAID-6
+ * capabilities are enabled then we'll get src/dst filled with zero.
+ */
+static int ppc4xx_test_raid6(struct ppc4xx_adma_chan *chan)
+{
+ struct ppc4xx_adma_desc_slot *sw_desc, *iter;
+ struct page *pg;
+ char *a;
+ dma_addr_t dma_addr, addrs[2];
+ unsigned long op = 0;
+ int rval = 0;
+
+ set_bit(PPC4XX_DESC_WXOR, &op);
+
+ pg = alloc_page(GFP_KERNEL);
+ if (!pg)
+ return -ENOMEM;
+
+ spin_lock_bh(&chan->lock);
+ sw_desc = ppc4xx_adma_alloc_slots(chan, 1, 1);
+ if (sw_desc) {
+ /* 1 src, 1 dsr, int_ena, WXOR */
+ ppc4xx_desc_init_dma01pq(sw_desc, 1, 1, 1, op);
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ ppc4xx_desc_set_byte_count(iter, chan, PAGE_SIZE);
+ iter->unmap_len = PAGE_SIZE;
+ }
+ } else {
+ rval = -EFAULT;
+ spin_unlock_bh(&chan->lock);
+ goto exit;
+ }
+ spin_unlock_bh(&chan->lock);
+
+ /* Fill the test page with ones */
+ memset(page_address(pg), 0xFF, PAGE_SIZE);
+ dma_addr = dma_map_page(chan->device->dev, pg, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ /* Setup addresses */
+ ppc4xx_adma_pq_set_src(sw_desc, dma_addr, 0);
+ ppc4xx_adma_pq_set_src_mult(sw_desc, 1, 0, 0);
+ addrs[0] = dma_addr;
+ addrs[1] = 0;
+ ppc4xx_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q);
+
+ async_tx_ack(&sw_desc->async_tx);
+ sw_desc->async_tx.callback = ppc4xx_test_callback;
+ sw_desc->async_tx.callback_param = NULL;
+
+ init_completion(&ppc4xx_r6_test_comp);
+
+ ppc4xx_adma_tx_submit(&sw_desc->async_tx);
+ ppc4xx_adma_issue_pending(&chan->common);
+
+ wait_for_completion(&ppc4xx_r6_test_comp);
+
+ /* Now check if the test page is zeroed */
+ a = page_address(pg);
+ if ((*(u32 *) a) == 0 && memcmp(a, a + 4, PAGE_SIZE - 4) == 0) {
+ /* page is zero - RAID-6 enabled */
+ rval = 0;
+ } else {
+ /* RAID-6 was not enabled */
+ rval = -EINVAL;
+ }
+ exit:
+ __free_page(pg);
+ return rval;
+}
+
+/**
+ * ppc4xx_adma_remove - remove the asynch device
+ */
+int __devexit ppc4xx_adma_remove(struct platform_device *ofdev)
+{
+ struct ppc4xx_adma_device *adev = dev_get_drvdata(&ofdev->dev);
+ struct device_node *np = ofdev->dev.of_node;
+ struct resource res;
+ struct dma_chan *chan, *_chan;
+ struct ppc_dma_chan_ref *ref, *_ref;
+ struct ppc4xx_adma_chan *ppc4xx_chan;
+
+ dev_set_drvdata(&ofdev->dev, NULL);
+ if (adev->id < PPC4XX_ADMA_ENGINES_NUM)
+ ppc4xx_adma_devices[adev->id] = -1;
+
+ dma_async_device_unregister(&adev->common);
+
+ list_for_each_entry_safe(chan, _chan, &adev->common.channels,
+ device_node) {
+ ppc4xx_chan = to_ppc4xx_adma_chan(chan);
+ ppc4xx_adma_release_irqs(adev, ppc4xx_chan);
+ tasklet_kill(&ppc4xx_chan->irq_tasklet);
+ if (adev->id != PPC4XX_XOR_ID) {
+ dma_unmap_page(&ofdev->dev, ppc4xx_chan->pdest,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_unmap_page(&ofdev->dev, ppc4xx_chan->qdest,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __free_page(ppc4xx_chan->pdest_page);
+ __free_page(ppc4xx_chan->qdest_page);
+ }
+ list_for_each_entry_safe(ref, _ref, &ppc4xx_adma_chan_list,
+ node) {
+ if (ppc4xx_chan == to_ppc4xx_adma_chan(ref->chan)) {
+ list_del(&ref->node);
+ kfree(ref);
+ }
+ }
+ list_del(&chan->device_node);
+ kfree(ppc4xx_chan);
+ }
+
+ dma_free_coherent(adev->dev, adev->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ if (adev->id == PPC4XX_XOR_ID)
+ iounmap(adev->xor_reg);
+ else
+ iounmap(adev->dma_reg);
+ of_address_to_resource(np, 0, &res);
+ release_mem_region(res.start, resource_size(&res));
+ kfree(adev);
+ return 0;
+}
+
+/*
+ * /sys driver interface to enable h/w RAID-6 capabilities
+ * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/
+ * directory are "devices", "enable" and "poly".
+ * "devices" shows available engines.
+ * "enable" is used to enable RAID-6 capabilities or to check
+ * whether these has been activated.
+ * "poly" allows setting/checking used polynomial (for PPC4xx only).
+ */
+
+static ssize_t show_ppc4xx_devices(struct device_driver *dev, char *buf)
+{
+ ssize_t size = 0;
+ int i;
+
+ for (i = 0; i < PPC4XX_ADMA_ENGINES_NUM; i++) {
+ if (ppc4xx_adma_devices[i] == -1)
+ continue;
+ size += snprintf(buf + size, PAGE_SIZE - size,
+ "PPC4XX-ADMA.%d: %s\n", i,
+ ppc_adma_errors[ppc4xx_adma_devices[i]]);
+ }
+ return size;
+}
+static ssize_t show_ppc4xx_r6enable(struct device_driver *dev, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE,
+ "PPC440SP(e) RAID-6 capabilities are %sABLED.\n",
+ ppc4xx_r6_enabled ? "EN" : "DIS");
+}
+
+static ssize_t store_ppc4xx_r6enable(struct device_driver *dev,
+ const char *buf, size_t count)
+{
+ unsigned long val;
+
+ if (!count || count > 11)
+ return -EINVAL;
+
+ if (!ppc4xx_r6_tchan)
+ return -EFAULT;
+
+ /* Write a key */
+ sscanf(buf, "%lx", &val);
+ dcr_write(ppc4xx_mq_dcr_host, DCRN_MQ0_XORBA, val);
+ isync();
+
+ /* Verify whether it really works now */
+ if (ppc4xx_test_raid6(ppc4xx_r6_tchan) == 0) {
+ pr_info("PPC440SP(e) RAID-6 has been activated "
+ "successfully\n");
+ ppc4xx_r6_enabled = 1;
+ } else {
+ pr_info("PPC440SP(e) RAID-6 hasn't been activated!"
+ " Error key ?\n");
+ ppc4xx_r6_enabled = 0;
+ }
+ return count;
+}
+
+static ssize_t show_ppc4xx_r6poly(struct device_driver *dev, char *buf)
+{
+ ssize_t size = 0;
+ u32 reg;
+
+#ifdef CONFIG_440SP
+ /* 440SP has fixed polynomial */
+ reg = 0x4d;
+#else
+ reg = dcr_read(ppc4xx_mq_dcr_host, DCRN_MQ0_CFBHL);
+ reg >>= MQ0_CFBHL_POLY;
+ reg &= 0xFF;
+#endif
+
+ size = snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 driver "
+ "uses 0x1%02x polynomial.\n", reg);
+ return size;
+}
+
+static ssize_t store_ppc4xx_r6poly(struct device_driver *dev,
+ const char *buf, size_t count)
+{
+ unsigned long reg, val;
+
+#ifdef CONFIG_440SP
+ /* 440SP uses default 0x14D polynomial only */
+ return -EINVAL;
+#endif
+
+ if (!count || count > 6)
+ return -EINVAL;
+
+ /* e.g., 0x14D or 0x11D */
+ sscanf(buf, "%lx", &val);
+
+ if (val & ~0x1FF)
+ return -EINVAL;
+
+ val &= 0xFF;
+ reg = dcr_read(ppc4xx_mq_dcr_host, DCRN_MQ0_CFBHL);
+ reg &= ~(0xFF << MQ0_CFBHL_POLY);
+ reg |= val << MQ0_CFBHL_POLY;
+ dcr_write(ppc4xx_mq_dcr_host, DCRN_MQ0_CFBHL, reg);
+
+ return count;
+}
+
+static DRIVER_ATTR(devices, S_IRUGO, show_ppc4xx_devices, NULL);
+static DRIVER_ATTR(enable, S_IRUGO | S_IWUSR, show_ppc4xx_r6enable,
+ store_ppc4xx_r6enable);
+static DRIVER_ATTR(poly, S_IRUGO | S_IWUSR, show_ppc4xx_r6poly,
+ store_ppc4xx_r6poly);
+int ppc4xx_adma_hw_init(void)
+{
+ int ret;
+
+ ret = ppc4xx_configure_raid_devices();
+ if (ret)
+ return ret;
+
+ ret = of_register_platform_driver(&ppc4xx_adma_driver);
+ if (ret) {
+ pr_err("%s: failed to register platform driver\n", __func__);
+ goto out_reg;
+ }
+
+ /* Initialization status */
+ ret = driver_create_file(&ppc4xx_adma_driver.driver,
+ &driver_attr_devices);
+ if (ret)
+ goto out_dev;
+
+ /* RAID-6 h/w enable entry */
+ ret = driver_create_file(&ppc4xx_adma_driver.driver,
+ &driver_attr_enable);
+ if (ret)
+ goto out_en;
+
+ /* GF polynomial to use */
+ ret = driver_create_file(&ppc4xx_adma_driver.driver, &driver_attr_poly);
+ if (!ret)
+ return ret;
+
+ driver_remove_file(&ppc4xx_adma_driver.driver, &driver_attr_enable);
+ out_en:
+ driver_remove_file(&ppc4xx_adma_driver.driver, &driver_attr_devices);
+ out_dev:
+ /* User will not be able to enable h/w RAID-6 */
+ pr_err("%s: failed to create RAID-6 driver interface\n", __func__);
+ out_reg:
+ dcr_unmap(ppc4xx_mq_dcr_host, ppc4xx_mq_dcr_len);
+ kfree(ppc4xx_dma_fifo_buf);
+ return ret;
+}
+
+static void __exit ppc4xx_adma_exit(void)
+{
+ driver_remove_file(&ppc4xx_adma_driver.driver, &driver_attr_poly);
+ driver_remove_file(&ppc4xx_adma_driver.driver, &driver_attr_enable);
+ driver_remove_file(&ppc4xx_adma_driver.driver, &driver_attr_devices);
+ of_unregister_platform_driver(&ppc4xx_adma_driver);
+ dcr_unmap(ppc4xx_mq_dcr_host, ppc4xx_mq_dcr_len);
+ kfree(ppc4xx_dma_fifo_buf);
+}
--
1.6.1.rc3


2010-09-23 22:48:45

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCH v1 3/4] PPC4xx: New file with SoC specific functions

On 9/23/2010 3:11 PM, [email protected] wrote:
> From: Tirumala Marri<[email protected]>
>
> This patch creates new file with SoC dependent functions.
>
> Signed-off-by: Tirumala R Marri<[email protected]>
> ---
> V1:
> * Remove all 440SPe specific references.

Maybe it renames ppc440spe to ppc4xx but it adds things like...

> +#if defined(CONFIG_440SPe) || defined(CONFIG_440SP)
> + np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
> +#endif

...in the code. Which is 1) not generic and 2) I suspect causes a
compile warning for using an uninitialized variable.

> + if (!np) {
> + pr_err("%s: can't find I2O device tree node\n",
> + __func__);
> + ret = -ENODEV;
> + goto err_req2;
> + }

It looks to me like the common code will need to have a few build
dependent helper routines as it appears one instance of the driver
cannot simultaneously support 440sp, 440spe, and 460ex.

--
Dan