From: tmarri@apm.com Subject: [PATCH v1 3/4] PPC4xx: New file with SoC specific functions Date: Thu, 23 Sep 2010 15:11:46 -0700 Message-ID: <1285279906-16819-1-git-send-email-tmarri@apm.com> Cc: linuxppc-dev@lists.ozlabs.org, linux-crypto@vger.kernel.org, dan.j.williams@intel.com, yur@emcraft.com, herbert@gondor.hengli.com.au, tmarri@apm.com To: linux-raid@vger.kernel.org Return-path: Received: from denmail01-v4020.amcc.com ([192.195.68.30]:57539 "EHLO denmail01.apm.COM" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1754484Ab0IWWMJ (ORCPT ); Thu, 23 Sep 2010 18:12:09 -0400 Sender: linux-crypto-owner@vger.kernel.org List-ID: From: Tirumala Marri This patch creates new file with SoC dependent functions. Signed-off-by: Tirumala R Marri --- V1: * Remove all 440SPe specific references. * Move some of the code from header file to c file. --- drivers/dma/ppc4xx/ppc4xx-adma.c | 1658 ++++++++++++++++++++++++++++++++++++++ 1 files changed, 1658 insertions(+), 0 deletions(-) create mode 100644 drivers/dma/ppc4xx/ppc4xx-adma.c diff --git a/drivers/dma/ppc4xx/ppc4xx-adma.c b/drivers/dma/ppc4xx/ppc4xx-adma.c new file mode 100644 index 0000000..5a5da23 --- /dev/null +++ b/drivers/dma/ppc4xx/ppc4xx-adma.c @@ -0,0 +1,1658 @@ +/* + * Copyright (C) 2006-2009 DENX Software Engineering. + * + * Author: Yuri Tikhonov + * + * Further porting to arch/powerpc by + * Anatolij Gustschin + * Tirumala R Marri + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* + * This driver supports the asynchrounous DMA copy and RAID engines available + * on the AMCC PPC440SPe Processors. + * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) + * ADMA driver written by D.Williams. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "adma.h" +#if defined(CONFIG_440SPe) || defined(CONFIG_440SP) +#include "ppc440spe-dma.h" +#endif +#include "ppc4xx-adma.h" + +/* This array is used in data-check operations for storing a pattern */ +static char ppc4xx_qword[16]; +static atomic_t ppc4xx_adma_err_irq_ref; +static unsigned int ppc4xx_mq_dcr_len; + +/* These are used in enable & check routines + */ +static u32 ppc4xx_r6_enabled; +static struct completion ppc4xx_r6_test_comp; + +static struct page *ppc4xx_rxor_srcs[32]; + +static dcr_host_t ppc4xx_mq_dcr_host; +/* Pointer to DMA0, DMA1 CP/CS FIFO */ +static void *ppc4xx_dma_fifo_buf; + +static char *ppc_adma_errors[] = { + [PPC_ADMA_INIT_OK] = "ok", + [PPC_ADMA_INIT_MEMRES] = "failed to get memory resource", + [PPC_ADMA_INIT_MEMREG] = "failed to request memory region", + [PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev " + "structure", + [PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for " + "hardware descriptors", + [PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel", + [PPC_ADMA_INIT_IRQ1] = "failed to request first irq", + [PPC_ADMA_INIT_IRQ2] = "failed to request second irq", + [PPC_ADMA_INIT_REGISTER] = "failed to register dma async device", +}; + +static void ppc4xx_adma_dma2rxor_set_mult(struct ppc4xx_adma_desc_slot *desc, + int index, u8 mult); +static void print_cb_list(struct ppc4xx_adma_chan *chan, + struct ppc4xx_adma_desc_slot *iter); +/** + * ppc4xx_can_rxor - check if the operands may be processed with RXOR + */ +static int ppc4xx_can_rxor(struct page **srcs, int src_cnt, size_t len) +{ + int i, order = 0, state = 0; + int idx = 0; + + if (unlikely(!(src_cnt > 1))) + return 0; + + BUG_ON(src_cnt > ARRAY_SIZE(ppc4xx_rxor_srcs)); + + /* Skip holes in the source list before checking */ + for (i = 0; i < src_cnt; i++) { + if (!srcs[i]) + continue; + ppc4xx_rxor_srcs[idx++] = srcs[i]; + } + src_cnt = idx; + + for (i = 1; i < src_cnt; i++) { + char *cur_addr = page_address(ppc4xx_rxor_srcs[i]); + char *old_addr = page_address(ppc4xx_rxor_srcs[i - 1]); + + switch (state) { + case 0: + if (cur_addr == old_addr + len) { + /* direct RXOR */ + order = 1; + state = 1; + } else if (old_addr == cur_addr + len) { + /* reverse RXOR */ + order = -1; + state = 1; + } else + goto out; + break; + case 1: + if ((i == src_cnt - 2) || + (order == -1 && cur_addr != old_addr - len)) { + order = 0; + state = 0; + } else if ((cur_addr == old_addr + len * order) || + (cur_addr == old_addr + 2 * len) || + (cur_addr == old_addr + 3 * len)) { + state = 2; + } else { + order = 0; + state = 0; + } + break; + case 2: + order = 0; + state = 0; + break; + } + } + + out: + if (state == 1 || state == 2) + return 1; + + return 0; +} + +/** + * ppc4xx_init_rxor_cursor - + */ +static void ppc4xx_init_rxor_cursor(struct ppc4xx_rxor *cursor) +{ + memset(cursor, 0, sizeof(struct ppc4xx_rxor)); + cursor->state = 2; +} + +/** + * ppc4xx_adma_init_dma2rxor_slot - + */ +static void ppc4xx_adma_init_dma2rxor_slot(struct ppc4xx_adma_desc_slot + *desc, dma_addr_t * src, + int src_cnt) +{ + int i; + + /* initialize CDB */ + for (i = 0; i < src_cnt; i++) { + ppc4xx_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i, + desc->src_cnt, (u32) src[i]); + } +} + +/****************************************************************************** + * Command (Descriptor) Blocks low-level routines + ******************************************************************************/ +/** + * ppc4xx_desc_set_rxor_block_size - set RXOR block size + */ +static void ppc4xx_desc_set_rxor_block_size(u32 byte_count) +{ + /* assume that byte_count is aligned on the 512-boundary; + * thus write it directly to the register (bits 23:31 are + * reserved there). + */ + dcr_write(ppc4xx_mq_dcr_host, DCRN_MQ0_CF2H, byte_count); +} + + +static int ppc4xx_adma_estimate(struct dma_chan *chan, + enum dma_transaction_type cap, + struct page **dst_lst, int dst_cnt, + struct page **src_lst, int src_cnt, + size_t src_sz) +{ + int ef = 1; + + if (cap == DMA_PQ || cap == DMA_PQ_VAL) { + /* If RAID-6 capabilities were not activated don't try + * to use them + */ + if (unlikely(!ppc4xx_r6_enabled)) + return -1; + } + /* In the current implementation of ppc4xx ADMA driver it + * makes sense to pick out only pq case, because it may be + * processed: + * (1) either using Biskup method on DMA2; + * (2) or on DMA0/1. + * Thus we give a favour to (1) if the sources are suitable; + * else let it be processed on one of the DMA0/1 engines. + * In the sum_product case where destination is also the + * source process it on DMA0/1 only. + */ + if (cap == DMA_PQ && chan->chan_id == PPC4XX_XOR_ID) { + + if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1]) + ef = 0; /* sum_product case, process on DMA0/1 */ + else if (ppc4xx_can_rxor(src_lst, src_cnt, src_sz)) + ef = 3; /* override (DMA0/1 + idle) */ + else + ef = 0; /* can't process on DMA2 if !rxor */ + } + + /* channel idleness increases the priority */ + if (likely(ef) && !ppc4xx_chan_is_busy(to_ppc4xx_adma_chan(chan))) + ef++; + + return ef; +} + +struct dma_chan *ppc4xx_async_tx_find_best_channel(enum dma_transaction_type cap, + struct page **dst_lst, + int dst_cnt, + struct page **src_lst, + int src_cnt, + size_t src_sz) +{ + struct dma_chan *best_chan = NULL; + struct ppc_dma_chan_ref *ref; + int best_rank = -1; + + if (unlikely(!src_sz)) + return NULL; + if (src_sz > PAGE_SIZE) { + /* + * should a user of the api ever pass > PAGE_SIZE requests + * we sort out cases where temporary page-sized buffers + * are used. + */ + switch (cap) { + case DMA_PQ: + if (src_cnt == 1 && dst_lst[1] == src_lst[0]) + return NULL; + if (src_cnt == 2 && dst_lst[1] == src_lst[1]) + return NULL; + break; + case DMA_PQ_VAL: + case DMA_XOR_VAL: + return NULL; + default: + break; + } + } + + list_for_each_entry(ref, &ppc4xx_adma_chan_list, node) { + if (dma_has_cap(cap, ref->chan->device->cap_mask)) { + int rank; + + rank = ppc4xx_adma_estimate(ref->chan, cap, dst_lst, + dst_cnt, src_lst, + src_cnt, src_sz); + if (rank > best_rank) { + best_rank = rank; + best_chan = ref->chan; + } + } + } + + return best_chan; +} + +EXPORT_SYMBOL_GPL(ppc4xx_async_tx_find_best_channel); + +/** + * ppc4xx_dma01_prep_sum_product - + * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also + * the source. + */ +static struct ppc4xx_adma_desc_slot +*ppc4xx_dma01_prep_sum_product(struct ppc4xx_adma_chan*ppc4xx_chan, + dma_addr_t * dst, + dma_addr_t * src, + int src_cnt, + const unsigned char *scf, + size_t len, + unsigned long flags) +{ + struct ppc4xx_adma_desc_slot *sw_desc = NULL; + unsigned long op = 0; + int slot_cnt; + + set_bit(PPC4XX_DESC_WXOR, &op); + slot_cnt = 3; + + spin_lock_bh(&ppc4xx_chan->lock); + + /* WXOR, each descriptor occupies one slot */ + sw_desc = ppc4xx_adma_alloc_slots(ppc4xx_chan, slot_cnt, 1); + if (sw_desc) { + struct ppc4xx_adma_chan *chan; + struct ppc4xx_adma_desc_slot *iter; + struct dma_cdb *hw_desc; + + chan = to_ppc4xx_adma_chan(sw_desc->async_tx.chan); + set_bits(op, &sw_desc->flags); + sw_desc->src_cnt = src_cnt; + sw_desc->dst_cnt = 1; + /* 1st descriptor, src[1] data to q page and zero destination */ + iter = list_first_entry(&sw_desc->group_list, + struct ppc4xx_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc4xx_adma_desc_slot, + chain_node); + clear_bit(PPC4XX_DESC_INT, &iter->flags); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MULTICAST; + + ppc4xx_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc4xx_desc_set_dest_addr(iter, chan, 0, ppc4xx_chan->qdest, 1); + ppc4xx_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[1]); + ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len); + iter->unmap_len = len; + + /* 2nd descriptor, multiply src[1] data and store the + * result in destination */ + iter = list_first_entry(&iter->chain_node, + struct ppc4xx_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc4xx_adma_desc_slot, + chain_node); + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC4XX_DESC_INT, &iter->flags); + else + clear_bit(PPC4XX_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc4xx_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + ppc4xx_chan->qdest); + ppc4xx_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc4xx_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[1]); + ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len); + iter->unmap_len = len; + + /* + * 3rd descriptor, multiply src[0] data and xor it + * with destination + */ + iter = list_first_entry(&iter->chain_node, + struct ppc4xx_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = NULL; + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC4XX_DESC_INT, &iter->flags); + else + clear_bit(PPC4XX_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc4xx_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[0]); + ppc4xx_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, + *dst, 0); + ppc4xx_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[0]); + ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len); + iter->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + + spin_unlock_bh(&ppc4xx_chan->lock); + + return sw_desc; +} + +static +struct ppc4xx_adma_desc_slot *ppc4xx_dma01_prep_pq(struct ppc4xx_adma_chan *ppc4xx_chan, + dma_addr_t *dst, + int dst_cnt, + dma_addr_t *src, + int src_cnt, + const unsigned char *scf, + size_t len, + unsigned long flags) +{ + int slot_cnt; + struct ppc4xx_adma_desc_slot *sw_desc = NULL, *iter; + unsigned long op = 0; + unsigned char mult = 1; + + pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n", + __func__, dst_cnt, src_cnt, len); + /* select operations WXOR/RXOR depending on the + * source addresses of operators and the number + * of destinations (RXOR support only Q-parity calculations) + */ + set_bit(PPC4XX_DESC_WXOR, &op); + if (!test_and_set_bit(PPC4XX_RXOR_RUN, &ppc4xx_rxor_state)) { + /* no active RXOR; + * do RXOR if: + * - there are more than 1 source, + * - len is aligned on 512-byte boundary, + * - source addresses fit to one of 4 possible regions. + */ + if (src_cnt > 1 && + !(len & MQ0_CF2H_RXOR_BS_MASK) && + (src[0] + len) == src[1]) { + /* may do RXOR R1 R2 */ + set_bit(PPC4XX_DESC_RXOR, &op); + if (src_cnt != 2) { + /* may try to enhance region of RXOR */ + if ((src[1] + len) == src[2]) { + /* do RXOR R1 R2 R3 */ + set_bit(PPC4XX_DESC_RXOR123, &op); + } else if ((src[1] + len * 2) == src[2]) { + /* do RXOR R1 R2 R4 */ + set_bit(PPC4XX_DESC_RXOR124, &op); + } else if ((src[1] + len * 3) == src[2]) { + /* do RXOR R1 R2 R5 */ + set_bit(PPC4XX_DESC_RXOR125, &op); + } else { + /* do RXOR R1 R2 */ + set_bit(PPC4XX_DESC_RXOR12, &op); + } + } else { + /* do RXOR R1 R2 */ + set_bit(PPC4XX_DESC_RXOR12, &op); + } + } + + if (!test_bit(PPC4XX_DESC_RXOR, &op)) { + /* can not do this operation with RXOR */ + clear_bit(PPC4XX_RXOR_RUN, &ppc4xx_rxor_state); + } else { + /* can do; set block size right now */ + ppc4xx_desc_set_rxor_block_size(len); + } + } + + /* Number of necessary slots depends on operation type selected */ + if (!test_bit(PPC4XX_DESC_RXOR, &op)) { + /* This is a WXOR only chain. Need descriptors for each + * source to GF-XOR them with WXOR, and need descriptors + * for each destination to zero them with WXOR + */ + slot_cnt = src_cnt; + + if (flags & DMA_PREP_ZERO_P) { + slot_cnt++; + set_bit(PPC4XX_ZERO_P, &op); + } + if (flags & DMA_PREP_ZERO_Q) { + slot_cnt++; + set_bit(PPC4XX_ZERO_Q, &op); + } + } else { + /* Need 1/2 descriptor for RXOR operation, and + * need (src_cnt - (2 or 3)) for WXOR of sources + * remained (if any) + */ + slot_cnt = dst_cnt; + + if (flags & DMA_PREP_ZERO_P) + set_bit(PPC4XX_ZERO_P, &op); + if (flags & DMA_PREP_ZERO_Q) + set_bit(PPC4XX_ZERO_Q, &op); + + if (test_bit(PPC4XX_DESC_RXOR12, &op)) + slot_cnt += src_cnt - 2; + else + slot_cnt += src_cnt - 3; + + /* Thus we have either RXOR only chain or + * mixed RXOR/WXOR + */ + if (slot_cnt == dst_cnt) + /* RXOR only chain */ + clear_bit(PPC4XX_DESC_WXOR, &op); + } + + spin_lock_bh(&ppc4xx_chan->lock); + /* for both RXOR/WXOR each descriptor occupies one slot */ + sw_desc = ppc4xx_adma_alloc_slots(ppc4xx_chan, slot_cnt, 1); + if (sw_desc) { + ppc4xx_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt, flags, op); + + /* setup dst/src/mult */ + pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n", + __func__, dst[0], dst[1]); + ppc4xx_adma_pq_set_dest(sw_desc, dst, flags); + while (src_cnt--) { + ppc4xx_adma_pq_set_src(sw_desc, src[src_cnt], src_cnt); + + /* NOTE: "Multi = 0 is equivalent to = 1" as it + * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf + * doesn't work for RXOR with DMA0/1! Instead, multi=0 + * leads to zeroing source data after RXOR. + * So, for P case set-up mult=1 explicitly. + */ + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + mult = scf[src_cnt]; + ppc4xx_adma_pq_set_src_mult(sw_desc, + mult, src_cnt, dst_cnt - 1); + } + + /* Setup byte count foreach slot just allocated */ + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len); + iter->unmap_len = len; + } + } + spin_unlock_bh(&ppc4xx_chan->lock); + + return sw_desc; +} + +/** + * ppc4xx_adma_prep_dma_pqzero_sum - prepare CDB group for + * a PQ_ZERO_SUM operation + */ +struct dma_async_tx_descriptor +*ppc4xx_adma_prep_dma_pqzero_sum(struct dma_chan *chan, + dma_addr_t * pq, + dma_addr_t * src, + unsigned int src_cnt, + const unsigned char *scf, + size_t len, + enum sum_check_flags *pqres, + unsigned long flags) +{ + struct ppc4xx_adma_chan *ppc4xx_chan; + struct ppc4xx_adma_desc_slot *sw_desc, *iter; + dma_addr_t pdest, qdest; + int slot_cnt, slots_per_op, idst, dst_cnt; + + ppc4xx_chan = to_ppc4xx_adma_chan(chan); + + if (flags & DMA_PREP_PQ_DISABLE_P) + pdest = 0; + else + pdest = pq[0]; + + if (flags & DMA_PREP_PQ_DISABLE_Q) + qdest = 0; + else + qdest = pq[1]; + + ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc4xx_chan->device->id, + src, src_cnt, scf)); + + /* Always use WXOR for P/Q calculations (two destinations). + * Need 1 or 2 extra slots to verify results are zero. + */ + idst = dst_cnt = (pdest && qdest) ? 2 : 1; + + /* One additional slot per destination to clone P/Q + * before calculation (we have to preserve destinations). + */ + slot_cnt = src_cnt + dst_cnt * 2; + slots_per_op = 1; + + spin_lock_bh(&ppc4xx_chan->lock); + sw_desc = ppc4xx_adma_alloc_slots(ppc4xx_chan, slot_cnt, slots_per_op); + if (sw_desc) { + ppc4xx_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt); + + /* Setup byte count for each slot just allocated */ + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len); + iter->unmap_len = len; + } + + if (pdest) { + struct dma_cdb *hw_desc; + struct ppc4xx_adma_chan *chan; + + iter = sw_desc->group_head; + chan = to_ppc4xx_adma_chan(iter->async_tx.chan); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc4xx_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter->src_cnt = 0; + iter->dst_cnt = 0; + ppc4xx_desc_set_dest_addr(iter, chan, 0, + ppc4xx_chan->pdest, 0); + ppc4xx_desc_set_src_addr(iter, chan, 0, 0, pdest); + ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len); + iter->unmap_len = 0; + /* override pdest to preserve original P */ + pdest = ppc4xx_chan->pdest; + } + if (qdest) { + struct dma_cdb *hw_desc; + struct ppc4xx_adma_chan *chan; + + iter = list_first_entry(&sw_desc->group_list, + struct ppc4xx_adma_desc_slot, + chain_node); + chan = to_ppc4xx_adma_chan(iter->async_tx.chan); + + if (pdest) { + iter = list_entry(iter->chain_node.next, + struct ppc4xx_adma_desc_slot, + chain_node); + } + + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc4xx_adma_desc_slot, + chain_node); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + iter->src_cnt = 0; + iter->dst_cnt = 0; + ppc4xx_desc_set_dest_addr(iter, chan, 0, + ppc4xx_chan->qdest, 0); + ppc4xx_desc_set_src_addr(iter, chan, 0, 0, qdest); + ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len); + iter->unmap_len = 0; + /* override qdest to preserve original Q */ + qdest = ppc4xx_chan->qdest; + } + + /* Setup destinations for P/Q ops */ + ppc4xx_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest); + + /* Setup zero QWORDs into DCHECK CDBs */ + idst = dst_cnt; + list_for_each_entry_reverse(iter, &sw_desc->group_list, + chain_node) { + /* + * The last CDB corresponds to Q-parity check, + * the one before last CDB corresponds + * P-parity check + */ + if (idst == DMA_DEST_MAX_NUM) { + if (idst == dst_cnt) { + set_bit(PPC4XX_DESC_QCHECK, + &iter->flags); + } else { + set_bit(PPC4XX_DESC_PCHECK, + &iter->flags); + } + } else { + if (qdest) { + set_bit(PPC4XX_DESC_QCHECK, + &iter->flags); + } else { + set_bit(PPC4XX_DESC_PCHECK, + &iter->flags); + } + } + iter->xor_check_result = pqres; + + /* + * set it to zero, if check fail then result will + * be updated + */ + *iter->xor_check_result = 0; + ppc4xx_desc_set_dcheck(iter, ppc4xx_chan, ppc4xx_qword); + + if (!(--dst_cnt)) + break; + } + + /* Setup sources and mults for P/Q ops */ + list_for_each_entry_continue_reverse(iter, &sw_desc->group_list, + chain_node) { + struct ppc4xx_adma_chan *chan; + u32 mult_dst; + + chan = to_ppc4xx_adma_chan(iter->async_tx.chan); + ppc4xx_desc_set_src_addr(iter, chan, 0, + DMA_CUED_XOR_HB, + src[src_cnt - 1]); + if (qdest) { + mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 : + DMA_CDB_SG_DST1; + ppc4xx_desc_set_src_mult(iter, chan, + DMA_CUED_MULT1_OFF, + mult_dst, + scf[src_cnt - 1]); + } + if (!(--src_cnt)) + break; + } + } + spin_unlock_bh(&ppc4xx_chan->lock); + return sw_desc ? &sw_desc->async_tx : NULL; +} + +/** + * ppc4xx_adma_prep_dma_xor_zero_sum - prepare CDB group for + * XOR ZERO_SUM operation + */ +struct dma_async_tx_descriptor +*ppc4xx_adma_prep_dma_xor_zero_sum(struct dma_chan *chan, + dma_addr_t * src, + unsigned int src_cnt, + size_t len, + enum sum_check_flags *result, + unsigned long flags) +{ + struct dma_async_tx_descriptor *tx; + dma_addr_t pq[2]; + + /* validate P, disable Q */ + pq[0] = src[0]; + pq[1] = 0; + flags |= DMA_PREP_PQ_DISABLE_Q; + + tx = ppc4xx_adma_prep_dma_pqzero_sum(chan, pq, &src[1], + src_cnt - 1, 0, len, + result, flags); + return tx; +} + +void ppc4xx_adma_set_capabilities(struct ppc4xx_adma_device *adev) +{ + switch (adev->id) { + case PPC4XX_DMA0_ID: + case PPC4XX_DMA1_ID: + dma_cap_set(DMA_MEMCPY, adev->common.cap_mask); + dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask); + dma_cap_set(DMA_MEMSET, adev->common.cap_mask); + dma_cap_set(DMA_PQ, adev->common.cap_mask); + dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask); + dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask); + break; + case PPC4XX_XOR_ID: + dma_cap_set(DMA_XOR, adev->common.cap_mask); + dma_cap_set(DMA_PQ, adev->common.cap_mask); + dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask); + adev->common.cap_mask = adev->common.cap_mask; + break; + } + + if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) { + switch (adev->id) { + case PPC4XX_DMA0_ID: + dma_set_maxpq(&adev->common, + DMA0_FIFO_SIZE / sizeof(struct dma_cdb), + 0); + break; + case PPC4XX_DMA1_ID: + dma_set_maxpq(&adev->common, + DMA1_FIFO_SIZE / sizeof(struct dma_cdb), + 0); + break; + case PPC4XX_XOR_ID: + adev->common.max_pq = XOR_MAX_OPS * 3; + break; + } + } + if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) { + switch (adev->id) { + case PPC4XX_DMA0_ID: + adev->common.max_pq = DMA0_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + case PPC4XX_DMA1_ID: + adev->common.max_pq = DMA1_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + } + } + if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) { + switch (adev->id) { + case PPC4XX_DMA0_ID: + adev->common.max_xor = DMA0_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + case PPC4XX_DMA1_ID: + adev->common.max_xor = DMA1_FIFO_SIZE / + sizeof(struct dma_cdb); + break; + } + } + pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: " + "( %s%s%s%s%s%s%s)\n", + dev_name(adev->dev), + dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "", + dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "", + dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "", + dma_has_cap(DMA_XOR_VAL, + adev->common.cap_mask) ? "xor_val " : "", + dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "", + dma_has_cap(DMA_MEMSET, adev->common.cap_mask) ? "memset " : "", + dma_has_cap(DMA_INTERRUPT, + adev->common.cap_mask) ? "intr " : ""); +} +static struct ppc4xx_adma_desc_slot +*ppc4xx_dma2_prep_pq(struct ppc4xx_adma_chan *ppc4xx_chan, + dma_addr_t * dst, + int dst_cnt, + dma_addr_t * src, + int src_cnt, + const unsigned char *scf, + size_t len, + unsigned long flags) +{ + int slot_cnt, descs_per_op; + struct ppc4xx_adma_desc_slot *sw_desc = NULL, *iter; + unsigned long op = 0; + unsigned char mult = 1; + + BUG_ON(!dst_cnt); + /*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n", + __func__, dst_cnt, src_cnt, len); */ + + spin_lock_bh(&ppc4xx_chan->lock); + descs_per_op = ppc4xx_dma2_pq_slot_count(src, src_cnt, len); + if (descs_per_op < 0) { + spin_unlock_bh(&ppc4xx_chan->lock); + return NULL; + } + + /* depending on number of sources we have 1 or 2 RXOR chains */ + slot_cnt = descs_per_op * dst_cnt; + + sw_desc = ppc4xx_adma_alloc_slots(ppc4xx_chan, slot_cnt, 1); + if (sw_desc) { + op = slot_cnt; + sw_desc->async_tx.flags = flags; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc4xx_desc_init_dma2pq(iter, dst_cnt, src_cnt, + --op ? 0 : flags); + ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len); + iter->unmap_len = len; + + ppc4xx_init_rxor_cursor(&(iter->rxor_cursor)); + iter->rxor_cursor.len = len; + iter->descs_per_op = descs_per_op; + } + op = 0; + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + op++; + if (op % descs_per_op == 0) + ppc4xx_adma_init_dma2rxor_slot(iter, src, + src_cnt); + if (likely(!list_is_last(&iter->chain_node, + &sw_desc->group_list))) { + /* set 'next' pointer */ + iter->hw_next = + list_entry(iter->chain_node.next, + struct ppc4xx_adma_desc_slot, + chain_node); + ppc4xx_xor_set_link(iter, iter->hw_next); + } else { + /* this is the last descriptor. */ + iter->hw_next = NULL; + } + } + + /* fixup head descriptor */ + sw_desc->dst_cnt = dst_cnt; + if (flags & DMA_PREP_ZERO_P) + set_bit(PPC4XX_ZERO_P, &sw_desc->flags); + if (flags & DMA_PREP_ZERO_Q) + set_bit(PPC4XX_ZERO_Q, &sw_desc->flags); + + /* setup dst/src/mult */ + ppc4xx_adma_pq_set_dest(sw_desc, dst, flags); + + while (src_cnt--) { + /* handle descriptors (if dst_cnt == 2) inside + * the ppc4xx_adma_pq_set_srcxxx() functions + */ + ppc4xx_adma_pq_set_src(sw_desc, src[src_cnt], src_cnt); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + mult = scf[src_cnt]; + ppc4xx_adma_pq_set_src_mult(sw_desc, + mult, src_cnt, dst_cnt - 1); + } + } + spin_unlock_bh(&ppc4xx_chan->lock); + ppc4xx_desc_set_rxor_block_size(len); + return sw_desc; +} + +/** + * ppc4xx_dma01_prep_mult - + * for Q operation where destination is also the source + */ +static struct ppc4xx_adma_desc_slot +*ppc4xx_dma01_prep_mult(struct ppc4xx_adma_chan *ppc4xx_chan, + dma_addr_t * dst, + int dst_cnt, + dma_addr_t * src, + int src_cnt, + const unsigned char *scf, + size_t len, + unsigned long flags) +{ + struct ppc4xx_adma_desc_slot *sw_desc = NULL; + unsigned long op = 0; + int slot_cnt; + + set_bit(PPC4XX_DESC_WXOR, &op); + slot_cnt = 2; + + spin_lock_bh(&ppc4xx_chan->lock); + + /* use WXOR, each descriptor occupies one slot */ + sw_desc = ppc4xx_adma_alloc_slots(ppc4xx_chan, slot_cnt, 1); + if (sw_desc) { + struct ppc4xx_adma_chan *chan; + struct ppc4xx_adma_desc_slot *iter; + struct dma_cdb *hw_desc; + + chan = to_ppc4xx_adma_chan(sw_desc->async_tx.chan); + set_bits(op, &sw_desc->flags); + sw_desc->src_cnt = src_cnt; + sw_desc->dst_cnt = dst_cnt; + /* First descriptor, zero data in the destination and copy it + * to q page using MULTICAST transfer. + */ + iter = list_first_entry(&sw_desc->group_list, + struct ppc4xx_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + /* set 'next' pointer */ + iter->hw_next = list_entry(iter->chain_node.next, + struct ppc4xx_adma_desc_slot, + chain_node); + clear_bit(PPC4XX_DESC_INT, &iter->flags); + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MULTICAST; + + ppc4xx_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, dst[0], 0); + ppc4xx_desc_set_dest_addr(iter, chan, 0, dst[1], 1); + ppc4xx_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, + src[0]); + ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len); + iter->unmap_len = len; + + /* + * Second descriptor, multiply data from the q page + * and store the result in real destination. + */ + iter = list_first_entry(&iter->chain_node, + struct ppc4xx_adma_desc_slot, + chain_node); + memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); + iter->hw_next = NULL; + if (flags & DMA_PREP_INTERRUPT) + set_bit(PPC4XX_DESC_INT, &iter->flags); + else + clear_bit(PPC4XX_DESC_INT, &iter->flags); + + hw_desc = iter->hw_desc; + hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2; + ppc4xx_desc_set_src_addr(iter, chan, 0, + DMA_CUED_XOR_HB, dst[1]); + ppc4xx_desc_set_dest_addr(iter, chan, + DMA_CUED_XOR_BASE, dst[0], 0); + + ppc4xx_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF, + DMA_CDB_SG_DST1, scf[0]); + ppc4xx_desc_set_byte_count(iter, ppc4xx_chan, len); + iter->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + + spin_unlock_bh(&ppc4xx_chan->lock); + + return sw_desc; +} + +/** + * ppc4xx_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation + */ +struct dma_async_tx_descriptor *ppc4xx_adma_prep_dma_pq(struct dma_chan + *chan, + dma_addr_t * dst, + dma_addr_t * src, + unsigned int + src_cnt, const unsigned + char *scf, + size_t len, + unsigned long flags) +{ + struct ppc4xx_adma_chan *ppc4xx_chan; + struct ppc4xx_adma_desc_slot *sw_desc = NULL; + int dst_cnt = 0; + + ppc4xx_chan = to_ppc4xx_adma_chan(chan); + + BUG_ON(!len); + BUG_ON(unlikely(len > PPC4XX_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(!src_cnt); + + if (src_cnt == 1 && dst[1] == src[0]) { + dma_addr_t dest[2]; + + /* dst[1] is real destination (Q) */ + dest[0] = dst[1]; + /* this is the page to multicast source data to */ + dest[1] = ppc4xx_chan->qdest; + sw_desc = ppc4xx_dma01_prep_mult(ppc4xx_chan, + dest, 2, src, src_cnt, scf, + len, flags); + return sw_desc ? &sw_desc->async_tx : NULL; + } + + if (src_cnt == 2 && dst[1] == src[1]) { + sw_desc = ppc4xx_dma01_prep_sum_product(ppc4xx_chan, + &dst[1], src, 2, scf, + len, flags); + return sw_desc ? &sw_desc->async_tx : NULL; + } + + if (!(flags & DMA_PREP_PQ_DISABLE_P)) { + BUG_ON(!dst[0]); + dst_cnt++; + flags |= DMA_PREP_ZERO_P; + } + + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) { + BUG_ON(!dst[1]); + dst_cnt++; + flags |= DMA_PREP_ZERO_Q; + } + + BUG_ON(!dst_cnt); + + dev_dbg(ppc4xx_chan->device->common.dev, + "ppc4xx adma%d: %s src_cnt: %d len: %u int_en: %d\n", + ppc4xx_chan->device->id, __func__, src_cnt, len, + flags & DMA_PREP_INTERRUPT ? 1 : 0); + + switch (ppc4xx_chan->device->id) { + case PPC4XX_DMA0_ID: + case PPC4XX_DMA1_ID: + sw_desc = ppc4xx_dma01_prep_pq(ppc4xx_chan, + dst, dst_cnt, src, src_cnt, + scf, len, flags); + break; + + case PPC4XX_XOR_ID: + sw_desc = ppc4xx_dma2_prep_pq(ppc4xx_chan, + dst, dst_cnt, src, src_cnt, + scf, len, flags); + break; + } + + return sw_desc ? &sw_desc->async_tx : NULL; +} +int ppc4xx_adma_setup_irqs(struct ppc4xx_adma_device *adev, + struct ppc4xx_adma_chan *chan, int *initcode) +{ + struct platform_device *ofdev; + struct device_node *np; + int ret; + + ofdev = container_of(adev->dev, struct platform_device, dev); + np = ofdev->dev.of_node; + if (adev->id != PPC4XX_XOR_ID) { + adev->err_irq = irq_of_parse_and_map(np, 1); + if (adev->err_irq == NO_IRQ) { + dev_warn(adev->dev, "no err irq resource?\n"); + *initcode = PPC_ADMA_INIT_IRQ2; + adev->err_irq = -ENXIO; + } else + atomic_inc(&ppc4xx_adma_err_irq_ref); + } else { + adev->err_irq = -ENXIO; + } + + adev->irq = irq_of_parse_and_map(np, 0); + if (adev->irq == NO_IRQ) { + dev_err(adev->dev, "no irq resource\n"); + *initcode = PPC_ADMA_INIT_IRQ1; + ret = -ENXIO; + goto err_irq_map; + } + dev_dbg(adev->dev, "irq %d, err irq %d\n", adev->irq, adev->err_irq); + + ret = request_irq(adev->irq, ppc4xx_adma_eot_handler, + 0, dev_driver_string(adev->dev), chan); + if (ret) { + dev_err(adev->dev, "can't request irq %d\n", adev->irq); + *initcode = PPC_ADMA_INIT_IRQ1; + ret = -EIO; + goto err_req1; + } + + /* only DMA engines have a separate error IRQ + * so it's Ok if err_irq < 0 in XOR engine case. + */ + if (adev->err_irq > 0) { + /* both DMA engines share common error IRQ */ + ret = request_irq(adev->err_irq, + ppc4xx_adma_err_handler, + IRQF_SHARED, + dev_driver_string(adev->dev), chan); + if (ret) { + dev_err(adev->dev, "can't request irq %d\n", + adev->err_irq); + *initcode = PPC_ADMA_INIT_IRQ2; + ret = -EIO; + goto err_req2; + } + } + + if (adev->id == PPC4XX_XOR_ID) { + /* enable XOR engine interrupts */ + iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT | + XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT, + &adev->xor_reg->ier); + } else { + u32 mask, enable; + +#if defined(CONFIG_440SPe) || defined(CONFIG_440SP) + np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe"); +#endif + if (!np) { + pr_err("%s: can't find I2O device tree node\n", + __func__); + ret = -ENODEV; + goto err_req2; + } + adev->i2o_reg = of_iomap(np, 0); + if (!adev->i2o_reg) { + pr_err("%s: failed to map I2O registers\n", __func__); + of_node_put(np); + ret = -EINVAL; + goto err_req2; + } + of_node_put(np); + /* Unmask 'CS FIFO Attention' interrupts and + * enable generating interrupts on errors + */ + enable = (adev->id == PPC4XX_DMA0_ID) ? + ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) : + ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM); + mask = ioread32(&adev->i2o_reg->iopim) & enable; + iowrite32(mask, &adev->i2o_reg->iopim); + } + return 0; + + err_req2: + free_irq(adev->irq, chan); + err_req1: + irq_dispose_mapping(adev->irq); + err_irq_map: + if (adev->err_irq > 0) { + if (atomic_dec_and_test(&ppc4xx_adma_err_irq_ref)) + irq_dispose_mapping(adev->err_irq); + } + return ret; +} + +void ppc4xx_adma_release_irqs(struct ppc4xx_adma_device *adev, + struct ppc4xx_adma_chan *chan) +{ + u32 mask, disable; + + if (adev->id == PPC4XX_XOR_ID) { + /* disable XOR engine interrupts */ + mask = ioread32be(&adev->xor_reg->ier); + mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT | + XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT); + iowrite32be(mask, &adev->xor_reg->ier); + } else { + /* disable DMAx engine interrupts */ + disable = (adev->id == PPC4XX_DMA0_ID) ? + (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) : + (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM); + mask = ioread32(&adev->i2o_reg->iopim) | disable; + iowrite32(mask, &adev->i2o_reg->iopim); + } + free_irq(adev->irq, chan); + irq_dispose_mapping(adev->irq); + if (adev->err_irq > 0) { + free_irq(adev->err_irq, chan); + if (atomic_dec_and_test(&ppc4xx_adma_err_irq_ref)) { + irq_dispose_mapping(adev->err_irq); + iounmap(adev->i2o_reg); + } + } +} + +/* + * Common initialisation for RAID engines; allocate memory for + * DMAx FIFOs, perform configuration common for all DMA engines. + * Further DMA engine specific configuration is done at probe time. + */ +static int ppc4xx_configure_raid_devices(void) +{ + struct device_node *np; + struct resource i2o_res; + struct i2o_regs __iomem *i2o_reg; + dcr_host_t i2o_dcr_host; + unsigned int dcr_base, dcr_len; + int i, ret; + +#if defined(CONFIG_440SPe) || defined(CONFIG_440SP) + np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe"); +#endif + if (!np) { + pr_err("%s: can't find I2O device tree node\n", __func__); + return -ENODEV; + } + + if (of_address_to_resource(np, 0, &i2o_res)) { + of_node_put(np); + return -EINVAL; + } + + i2o_reg = of_iomap(np, 0); + if (!i2o_reg) { + pr_err("%s: failed to map I2O registers\n", __func__); + of_node_put(np); + return -EINVAL; + } + + /* Get I2O DCRs base */ + dcr_base = dcr_resource_start(np, 0); + dcr_len = dcr_resource_len(np, 0); + if (!dcr_base && !dcr_len) { + pr_err("%s: can't get DCR registers base/len!\n", + np->full_name); + of_node_put(np); + iounmap(i2o_reg); + return -ENODEV; + } + + i2o_dcr_host = dcr_map(np, dcr_base, dcr_len); + if (!DCR_MAP_OK(i2o_dcr_host)) { + pr_err("%s: failed to map DCRs!\n", np->full_name); + of_node_put(np); + iounmap(i2o_reg); + return -ENODEV; + } + of_node_put(np); + + /* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share + * the base address of FIFO memory space. + * Actually we need twice more physical memory than programmed in the + * register (because there are two FIFOs for each DMA: CP and CS) + */ + ppc4xx_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1, + GFP_KERNEL); + if (!ppc4xx_dma_fifo_buf) { + pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__); + iounmap(i2o_reg); + dcr_unmap(i2o_dcr_host, dcr_len); + return -ENOMEM; + } + + /* + * Configure h/w + */ + /* Reset I2O/DMA */ + mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA); + mtdcri(SDR0, DCRN_SDR0_SRST, 0); + + /* Setup the base address of mmaped registers */ + dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32) (i2o_res.start >> 32)); + dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32) (i2o_res.start) | + I2O_REG_ENABLE); + dcr_unmap(i2o_dcr_host, dcr_len); + + /* Setup FIFO memory space base address */ + iowrite32(0, &i2o_reg->ifbah); + iowrite32(((u32) __pa(ppc4xx_dma_fifo_buf)), &i2o_reg->ifbal); + + /* set zero FIFO size for I2O, so the whole + * ppc4xx_dma_fifo_buf is used by DMAs. + * DMAx_FIFOs will be configured while probe. + */ + iowrite32(0, &i2o_reg->ifsiz); + iounmap(i2o_reg); + + /* To prepare WXOR/RXOR functionality we need access to + * Memory Queue Module DCRs (finally it will be enabled + * via /sys interface of the ppc4xx ADMA driver). + */ +#if defined(CONFIG_440SPe) || defined(CONFIG_440SP) + np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe"); +#endif + if (!np) { + pr_err("%s: can't find MQ device tree node\n", __func__); + ret = -ENODEV; + goto out_free; + } + + /* Get MQ DCRs base */ + dcr_base = dcr_resource_start(np, 0); + dcr_len = dcr_resource_len(np, 0); + if (!dcr_base && !dcr_len) { + pr_err("%s: can't get DCR registers base/len!\n", + np->full_name); + ret = -ENODEV; + goto out_mq; + } + + ppc4xx_mq_dcr_host = dcr_map(np, dcr_base, dcr_len); + if (!DCR_MAP_OK(ppc4xx_mq_dcr_host)) { + pr_err("%s: failed to map DCRs!\n", np->full_name); + ret = -ENODEV; + goto out_mq; + } + of_node_put(np); + ppc4xx_mq_dcr_len = dcr_len; + + /* Set HB alias */ + dcr_write(ppc4xx_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB); + + /* Set: + * - LL transaction passing limit to 1; + * - Memory controller cycle limit to 1; + * - Galois Polynomial to 0x14d (default) + */ + dcr_write(ppc4xx_mq_dcr_host, DCRN_MQ0_CFBHL, + (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) | + (PPC4XX_DEFAULT_POLY << MQ0_CFBHL_POLY)); + + atomic_set(&ppc4xx_adma_err_irq_ref, 0); + for (i = 0; i < PPC4XX_ADMA_ENGINES_NUM; i++) + ppc4xx_adma_devices[i] = -1; + + return 0; + + out_mq: + of_node_put(np); + out_free: + kfree(ppc4xx_dma_fifo_buf); + return ret; +} + +/** + * ppc4xx_test_callback - called when test operation has been done + */ +static void ppc4xx_test_callback(void *unused) +{ + complete(&ppc4xx_r6_test_comp); +} + +/** + * ppc4xx_test_raid6 - test are RAID-6 capabilities enabled successfully. + * For this we just perform one WXOR operation with the same source + * and destination addresses, the GF-multiplier is 1; so if RAID-6 + * capabilities are enabled then we'll get src/dst filled with zero. + */ +static int ppc4xx_test_raid6(struct ppc4xx_adma_chan *chan) +{ + struct ppc4xx_adma_desc_slot *sw_desc, *iter; + struct page *pg; + char *a; + dma_addr_t dma_addr, addrs[2]; + unsigned long op = 0; + int rval = 0; + + set_bit(PPC4XX_DESC_WXOR, &op); + + pg = alloc_page(GFP_KERNEL); + if (!pg) + return -ENOMEM; + + spin_lock_bh(&chan->lock); + sw_desc = ppc4xx_adma_alloc_slots(chan, 1, 1); + if (sw_desc) { + /* 1 src, 1 dsr, int_ena, WXOR */ + ppc4xx_desc_init_dma01pq(sw_desc, 1, 1, 1, op); + list_for_each_entry(iter, &sw_desc->group_list, chain_node) { + ppc4xx_desc_set_byte_count(iter, chan, PAGE_SIZE); + iter->unmap_len = PAGE_SIZE; + } + } else { + rval = -EFAULT; + spin_unlock_bh(&chan->lock); + goto exit; + } + spin_unlock_bh(&chan->lock); + + /* Fill the test page with ones */ + memset(page_address(pg), 0xFF, PAGE_SIZE); + dma_addr = dma_map_page(chan->device->dev, pg, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + + /* Setup addresses */ + ppc4xx_adma_pq_set_src(sw_desc, dma_addr, 0); + ppc4xx_adma_pq_set_src_mult(sw_desc, 1, 0, 0); + addrs[0] = dma_addr; + addrs[1] = 0; + ppc4xx_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q); + + async_tx_ack(&sw_desc->async_tx); + sw_desc->async_tx.callback = ppc4xx_test_callback; + sw_desc->async_tx.callback_param = NULL; + + init_completion(&ppc4xx_r6_test_comp); + + ppc4xx_adma_tx_submit(&sw_desc->async_tx); + ppc4xx_adma_issue_pending(&chan->common); + + wait_for_completion(&ppc4xx_r6_test_comp); + + /* Now check if the test page is zeroed */ + a = page_address(pg); + if ((*(u32 *) a) == 0 && memcmp(a, a + 4, PAGE_SIZE - 4) == 0) { + /* page is zero - RAID-6 enabled */ + rval = 0; + } else { + /* RAID-6 was not enabled */ + rval = -EINVAL; + } + exit: + __free_page(pg); + return rval; +} + +/** + * ppc4xx_adma_remove - remove the asynch device + */ +int __devexit ppc4xx_adma_remove(struct platform_device *ofdev) +{ + struct ppc4xx_adma_device *adev = dev_get_drvdata(&ofdev->dev); + struct device_node *np = ofdev->dev.of_node; + struct resource res; + struct dma_chan *chan, *_chan; + struct ppc_dma_chan_ref *ref, *_ref; + struct ppc4xx_adma_chan *ppc4xx_chan; + + dev_set_drvdata(&ofdev->dev, NULL); + if (adev->id < PPC4XX_ADMA_ENGINES_NUM) + ppc4xx_adma_devices[adev->id] = -1; + + dma_async_device_unregister(&adev->common); + + list_for_each_entry_safe(chan, _chan, &adev->common.channels, + device_node) { + ppc4xx_chan = to_ppc4xx_adma_chan(chan); + ppc4xx_adma_release_irqs(adev, ppc4xx_chan); + tasklet_kill(&ppc4xx_chan->irq_tasklet); + if (adev->id != PPC4XX_XOR_ID) { + dma_unmap_page(&ofdev->dev, ppc4xx_chan->pdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(&ofdev->dev, ppc4xx_chan->qdest, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(ppc4xx_chan->pdest_page); + __free_page(ppc4xx_chan->qdest_page); + } + list_for_each_entry_safe(ref, _ref, &ppc4xx_adma_chan_list, + node) { + if (ppc4xx_chan == to_ppc4xx_adma_chan(ref->chan)) { + list_del(&ref->node); + kfree(ref); + } + } + list_del(&chan->device_node); + kfree(ppc4xx_chan); + } + + dma_free_coherent(adev->dev, adev->pool_size, + adev->dma_desc_pool_virt, adev->dma_desc_pool); + if (adev->id == PPC4XX_XOR_ID) + iounmap(adev->xor_reg); + else + iounmap(adev->dma_reg); + of_address_to_resource(np, 0, &res); + release_mem_region(res.start, resource_size(&res)); + kfree(adev); + return 0; +} + +/* + * /sys driver interface to enable h/w RAID-6 capabilities + * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/ + * directory are "devices", "enable" and "poly". + * "devices" shows available engines. + * "enable" is used to enable RAID-6 capabilities or to check + * whether these has been activated. + * "poly" allows setting/checking used polynomial (for PPC4xx only). + */ + +static ssize_t show_ppc4xx_devices(struct device_driver *dev, char *buf) +{ + ssize_t size = 0; + int i; + + for (i = 0; i < PPC4XX_ADMA_ENGINES_NUM; i++) { + if (ppc4xx_adma_devices[i] == -1) + continue; + size += snprintf(buf + size, PAGE_SIZE - size, + "PPC4XX-ADMA.%d: %s\n", i, + ppc_adma_errors[ppc4xx_adma_devices[i]]); + } + return size; +} +static ssize_t show_ppc4xx_r6enable(struct device_driver *dev, char *buf) +{ + return snprintf(buf, PAGE_SIZE, + "PPC440SP(e) RAID-6 capabilities are %sABLED.\n", + ppc4xx_r6_enabled ? "EN" : "DIS"); +} + +static ssize_t store_ppc4xx_r6enable(struct device_driver *dev, + const char *buf, size_t count) +{ + unsigned long val; + + if (!count || count > 11) + return -EINVAL; + + if (!ppc4xx_r6_tchan) + return -EFAULT; + + /* Write a key */ + sscanf(buf, "%lx", &val); + dcr_write(ppc4xx_mq_dcr_host, DCRN_MQ0_XORBA, val); + isync(); + + /* Verify whether it really works now */ + if (ppc4xx_test_raid6(ppc4xx_r6_tchan) == 0) { + pr_info("PPC440SP(e) RAID-6 has been activated " + "successfully\n"); + ppc4xx_r6_enabled = 1; + } else { + pr_info("PPC440SP(e) RAID-6 hasn't been activated!" + " Error key ?\n"); + ppc4xx_r6_enabled = 0; + } + return count; +} + +static ssize_t show_ppc4xx_r6poly(struct device_driver *dev, char *buf) +{ + ssize_t size = 0; + u32 reg; + +#ifdef CONFIG_440SP + /* 440SP has fixed polynomial */ + reg = 0x4d; +#else + reg = dcr_read(ppc4xx_mq_dcr_host, DCRN_MQ0_CFBHL); + reg >>= MQ0_CFBHL_POLY; + reg &= 0xFF; +#endif + + size = snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 driver " + "uses 0x1%02x polynomial.\n", reg); + return size; +} + +static ssize_t store_ppc4xx_r6poly(struct device_driver *dev, + const char *buf, size_t count) +{ + unsigned long reg, val; + +#ifdef CONFIG_440SP + /* 440SP uses default 0x14D polynomial only */ + return -EINVAL; +#endif + + if (!count || count > 6) + return -EINVAL; + + /* e.g., 0x14D or 0x11D */ + sscanf(buf, "%lx", &val); + + if (val & ~0x1FF) + return -EINVAL; + + val &= 0xFF; + reg = dcr_read(ppc4xx_mq_dcr_host, DCRN_MQ0_CFBHL); + reg &= ~(0xFF << MQ0_CFBHL_POLY); + reg |= val << MQ0_CFBHL_POLY; + dcr_write(ppc4xx_mq_dcr_host, DCRN_MQ0_CFBHL, reg); + + return count; +} + +static DRIVER_ATTR(devices, S_IRUGO, show_ppc4xx_devices, NULL); +static DRIVER_ATTR(enable, S_IRUGO | S_IWUSR, show_ppc4xx_r6enable, + store_ppc4xx_r6enable); +static DRIVER_ATTR(poly, S_IRUGO | S_IWUSR, show_ppc4xx_r6poly, + store_ppc4xx_r6poly); +int ppc4xx_adma_hw_init(void) +{ + int ret; + + ret = ppc4xx_configure_raid_devices(); + if (ret) + return ret; + + ret = of_register_platform_driver(&ppc4xx_adma_driver); + if (ret) { + pr_err("%s: failed to register platform driver\n", __func__); + goto out_reg; + } + + /* Initialization status */ + ret = driver_create_file(&ppc4xx_adma_driver.driver, + &driver_attr_devices); + if (ret) + goto out_dev; + + /* RAID-6 h/w enable entry */ + ret = driver_create_file(&ppc4xx_adma_driver.driver, + &driver_attr_enable); + if (ret) + goto out_en; + + /* GF polynomial to use */ + ret = driver_create_file(&ppc4xx_adma_driver.driver, &driver_attr_poly); + if (!ret) + return ret; + + driver_remove_file(&ppc4xx_adma_driver.driver, &driver_attr_enable); + out_en: + driver_remove_file(&ppc4xx_adma_driver.driver, &driver_attr_devices); + out_dev: + /* User will not be able to enable h/w RAID-6 */ + pr_err("%s: failed to create RAID-6 driver interface\n", __func__); + out_reg: + dcr_unmap(ppc4xx_mq_dcr_host, ppc4xx_mq_dcr_len); + kfree(ppc4xx_dma_fifo_buf); + return ret; +} + +static void __exit ppc4xx_adma_exit(void) +{ + driver_remove_file(&ppc4xx_adma_driver.driver, &driver_attr_poly); + driver_remove_file(&ppc4xx_adma_driver.driver, &driver_attr_enable); + driver_remove_file(&ppc4xx_adma_driver.driver, &driver_attr_devices); + of_unregister_platform_driver(&ppc4xx_adma_driver); + dcr_unmap(ppc4xx_mq_dcr_host, ppc4xx_mq_dcr_len); + kfree(ppc4xx_dma_fifo_buf); +} -- 1.6.1.rc3