2015-11-07 17:11:23

by Rameshwar Sahu

[permalink] [raw]
Subject: [PATCH v2 0/3] dmaengine: Add supports for APM X-Gene SoC CRC32C accerlerator driver

This patch implements support for APM X-Gene SoC CRC32C h/w accelerator driver
and adds CRC32C computations support in dmaengine framework. APM X-Gene SoC has
DMA engine capable of performing CRC32C computations.

v2 changes:
1. Added helper function in dmaengine framework
2. Documented CRC32C support in Documentations/dmaengine/provider.txt
3. Fixed algo name
4. Fixed coding style issues

Signed-off-by: Rameshwar Prasad Sahu <[email protected]>
---

Rameshwar Prasad Sahu (3):
dmaengine: Add support for new feature CRC32C computations
dmaengine: xgene-dma: Add support for CRC32C computations via DMA
engine
Crypto: Add support for APM X-Gene SoC CRC32C h/w accelerator driver

Documentation/dmaengine/provider.txt | 3 +
drivers/crypto/Kconfig | 8 +
drivers/crypto/Makefile | 1 +
drivers/crypto/xgene-crc32c.c | 234 +++++++++++++++++++++++++
drivers/dma/dmaengine.c | 2 +
drivers/dma/xgene-dma.c | 314 ++++++++++++++++++++++++++++++++--
include/linux/dmaengine.h | 13 ++
7 files changed, 560 insertions(+), 15 deletions(-)
create mode 100755 drivers/crypto/xgene-crc32c.c


2015-11-07 17:11:49

by Rameshwar Sahu

[permalink] [raw]
Subject: [PATCH v2 1/3] dmaengine: Add support for new feature CRC32C computations

This patch adds support for new feature CRC32C computations in
dmaengine framework.

Signed-of-by: Rameshwar Prasad Sahu<[email protected]>
---
Documentation/dmaengine/provider.txt | 3 +++
drivers/dma/dmaengine.c | 2 ++
include/linux/dmaengine.h | 13 +++++++++++++
3 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt
index 67d4ce4..2399d6f 100644
--- a/Documentation/dmaengine/provider.txt
+++ b/Documentation/dmaengine/provider.txt
@@ -224,6 +224,9 @@ Currently, the types available are:
want to transfer a portion of uncompressed data directly to the
display to print it

+ * DMA_CRC32C
+ - The device is able to perform CRC32C computations
+
These various types will also affect how the source and destination
addresses change over time.

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 09479d4..8cd0365 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -865,6 +865,8 @@ int dma_async_device_register(struct dma_device *device)
!device->device_prep_dma_cyclic);
BUG_ON(dma_has_cap(DMA_INTERLEAVE, device->cap_mask) &&
!device->device_prep_interleaved_dma);
+ BUG_ON(dma_has_cap(DMA_CRC32C, device->cap_mask) &&
+ !device->device_prep_dma_crc32c);

BUG_ON(!device->device_tx_status);
BUG_ON(!device->device_issue_pending);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 7ea9184..7108d7c 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -74,6 +74,7 @@ enum dma_transaction_type {
DMA_SLAVE,
DMA_CYCLIC,
DMA_INTERLEAVE,
+ DMA_CRC32C,
/* last transaction type for creation of the capabilities mask */
DMA_TX_TYPE_END,
};
@@ -645,6 +646,7 @@ enum dmaengine_alignment {
* The function takes a buffer of size buf_len. The callback function will
* be called after period_len bytes have been transferred.
* @device_prep_interleaved_dma: Transfer expression in a generic way.
+ * @device_prep_dma_crc32c: prepares a crc32c operation
* @device_config: Pushes a new configuration to a channel, return 0 or an error
* code
* @device_pause: Pauses any transfer happening on a channel. Returns
@@ -727,6 +729,9 @@ struct dma_device {
struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
struct dma_chan *chan, struct dma_interleaved_template *xt,
unsigned long flags);
+ struct dma_async_tx_descriptor *(*device_prep_dma_crc32c)(
+ struct dma_chan *chan, struct scatterlist *src_sg, size_t len,
+ unsigned int seed, u8 *result, unsigned long flags);

int (*device_config)(struct dma_chan *chan,
struct dma_slave_config *config);
@@ -824,6 +829,14 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg(
src_sg, src_nents, flags);
}

+static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_crc3c(
+ struct dma_chan *chan, struct scatterlist *src_sg,
+ size_t len, unsigned int seed, u8 *result, unsigned long flags)
+{
+ return chan->device->device_prep_dma_crc32c(chan, src_sg, len,
+ seed, result, flags);
+}
+
static inline int dmaengine_terminate_all(struct dma_chan *chan)
{
if (chan->device->device_terminate_all)
--
1.7.1

2015-11-07 17:12:46

by Rameshwar Sahu

[permalink] [raw]
Subject: [PATCH v2 2/3] dmaengine: xgene-dma: Add support for CRC32C computations via DMA engine

This patch implements CRC32C support to APM X-Gene SoC DMA engine driver.
Basically we have DMA engine in SoC capable of doing CRC32C computations.

Signed-off-by: Rameshwar Prasad Sahu <[email protected]>
---
drivers/dma/xgene-dma.c | 314 ++++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 299 insertions(+), 15 deletions(-)

diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c
index 9dfa2b0..d95dc72 100644
--- a/drivers/dma/xgene-dma.c
+++ b/drivers/dma/xgene-dma.c
@@ -22,6 +22,7 @@
*/

#include <linux/acpi.h>
+#include <linux/bitrev.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
@@ -39,6 +40,7 @@
#define XGENE_DMA_RING_ENABLE BIT(31)
#define XGENE_DMA_RING_ID 0x08
#define XGENE_DMA_RING_ID_SETUP(v) ((v) | BIT(31))
+#define XGENE_DMA_RING_IS_BUFPOOL BIT(20)
#define XGENE_DMA_RING_ID_BUF 0x0C
#define XGENE_DMA_RING_ID_BUF_SETUP(v) (((v) << 9) | BIT(21))
#define XGENE_DMA_RING_THRESLD0_SET1 0x30
@@ -69,6 +71,8 @@
(((u32 *)(m))[2] |= (((v) >> 8) << 5))
#define XGENE_DMA_RING_ADDRH_SET(m, v) \
(((u32 *)(m))[3] |= ((v) >> 35))
+#define XGENE_DMA_RING_BUFMODE_SET(m) \
+ (((u32 *)(m))[3] |= ((0x3) << 20))
#define XGENE_DMA_RING_ACCEPTLERR_SET(m) \
(((u32 *)(m))[3] |= BIT(19))
#define XGENE_DMA_RING_SIZE_SET(m, v) \
@@ -106,6 +110,7 @@
#define XGENE_DMA_RING_INT2_MASK 0x90B0
#define XGENE_DMA_RING_INT3_MASK 0x90B8
#define XGENE_DMA_RING_INT4_MASK 0x90C0
+#define XGENE_DMA_CFG_RING_FQ_ASSOC 0x90DC
#define XGENE_DMA_CFG_RING_WQ_ASSOC 0x90E0
#define XGENE_DMA_ASSOC_RING_MNGR1 0xFFFFFFFF
#define XGENE_DMA_MEM_RAM_SHUTDOWN 0xD070
@@ -127,6 +132,10 @@
#define XGENE_DMA_DESC_LERR_POS 60
#define XGENE_DMA_DESC_BUFLEN_POS 48
#define XGENE_DMA_DESC_HOENQ_NUM_POS 48
+#define XGENE_DMA_DESC_BD_BIT BIT(0)
+#define XGENE_DMA_DESC_SD_BIT BIT(1)
+#define XGENE_DMA_DESC_CRCSEED_POS 8
+#define XGENE_DMA_DESC_FPQ_NUM_POS 32
#define XGENE_DMA_DESC_ELERR_RD(m) \
(((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3)
#define XGENE_DMA_DESC_LERR_RD(m) \
@@ -140,20 +149,25 @@
/* X-Gene DMA configurable parameters defines */
#define XGENE_DMA_RING_NUM 512
#define XGENE_DMA_BUFNUM 0x0
+#define XGENE_DMA_BUFPOOL_BUFNUM 0x20
#define XGENE_DMA_CPU_BUFNUM 0x18
#define XGENE_DMA_RING_OWNER_DMA 0x03
#define XGENE_DMA_RING_OWNER_CPU 0x0F
#define XGENE_DMA_RING_TYPE_REGULAR 0x01
+#define XGENE_DMA_RING_TYPE_BUFPOOL 0x02
#define XGENE_DMA_RING_WQ_DESC_SIZE 32 /* 32 Bytes */
+#define XGENE_DMA_BUFPOOL_DESC_SIZE 16 /* 16 Bytes */
#define XGENE_DMA_RING_NUM_CONFIG 5
#define XGENE_DMA_MAX_CHANNEL 4
#define XGENE_DMA_XOR_CHANNEL 0
#define XGENE_DMA_PQ_CHANNEL 1
+#define XGENE_DMA_FLYBY_CHANNEL 2
#define XGENE_DMA_MAX_BYTE_CNT 0x4000 /* 16 KB */
#define XGENE_DMA_MAX_64B_DESC_BYTE_CNT 0x14000 /* 80 KB */
#define XGENE_DMA_MAX_XOR_SRC 5
#define XGENE_DMA_16K_BUFFER_LEN_CODE 0x0
#define XGENE_DMA_INVALID_LEN_CODE 0x7800000000000000ULL
+#define XGENE_DMA_MAX_FLYBY_BYTE_CNT 0x7FFF /* (32 KB - 1) */

/* X-Gene DMA descriptor error codes */
#define ERR_DESC_AXI 0x01
@@ -187,9 +201,14 @@
#define FLYBY_3SRC_XOR 0x90
#define FLYBY_4SRC_XOR 0xA0
#define FLYBY_5SRC_XOR 0xB0
+#define FLYBY_CRC16 0x10
+#define FLYBY_CRC32C 0x20
+#define FLYBY_CRC32 0x30
+#define FLYBY_CHECKSUM 0x40

/* X-Gene DMA SW descriptor flags */
#define XGENE_DMA_FLAG_64B_DESC BIT(0)
+#define XGENE_DMA_FLAG_FLYBY_ACTIVE BIT(1)

/* Define to dump X-Gene DMA descriptor */
#define XGENE_DMA_DESC_DUMP(desc, m) \
@@ -206,6 +225,11 @@
#define chan_err(chan, fmt, arg...) \
dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)

+struct xgene_dma_desc16 {
+ __le64 m0;
+ __le64 m1;
+};
+
struct xgene_dma_desc_hw {
__le64 m0;
__le64 m1;
@@ -232,6 +256,7 @@ struct xgene_dma_ring {
u16 slots;
u16 dst_ring_num;
u32 size;
+ bool is_bufpool;
void __iomem *cmd;
void __iomem *cmd_base;
dma_addr_t desc_paddr;
@@ -239,6 +264,7 @@ struct xgene_dma_ring {
enum xgene_dma_ring_cfgsize cfgsize;
union {
void *desc_vaddr;
+ struct xgene_dma_desc16 *desc16;
struct xgene_dma_desc_hw *desc_hw;
};
};
@@ -247,6 +273,7 @@ struct xgene_dma_desc_sw {
struct xgene_dma_desc_hw desc1;
struct xgene_dma_desc_hw desc2;
u32 flags;
+ u8 *flyby_result;
struct list_head node;
struct list_head tx_list;
struct dma_async_tx_descriptor tx;
@@ -276,6 +303,8 @@ struct xgene_dma_desc_sw {
* descriptors for further executions
* @rx_ring: receive ring descriptor that we use to get completed DMA
* descriptors during cleanup time
+ * @bufpool: Queue which maintains list of allocated memory for flyby operations
+ * needed by DMA engine
*/
struct xgene_dma_chan {
struct dma_chan dma_chan;
@@ -294,6 +323,7 @@ struct xgene_dma_chan {
struct tasklet_struct tasklet;
struct xgene_dma_ring tx_ring;
struct xgene_dma_ring rx_ring;
+ struct xgene_dma_ring bufpool;
};

/**
@@ -509,6 +539,102 @@ static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC;
}

+static u32 xgene_dma_set_flyby_src(__le64 *ext8, struct scatterlist *sg,
+ dma_addr_t *paddr, u32 *nbytes, u32 offset)
+{
+ u32 len;
+
+ /* Fetch physical address from sg */
+ if (*paddr == 0)
+ *paddr = sg_dma_address(sg);
+
+ len = sg_dma_len(sg) - offset;
+
+ *ext8 |= cpu_to_le64(*paddr);
+ *ext8 |= cpu_to_le64(xgene_dma_encode_len(len));
+
+ if (len <= XGENE_DMA_MAX_BYTE_CNT) {
+ *nbytes -= len;
+ *paddr = 0;
+ return len;
+ }
+
+ *nbytes -= XGENE_DMA_MAX_BYTE_CNT;
+ *paddr += XGENE_DMA_MAX_BYTE_CNT;
+
+ return XGENE_DMA_MAX_BYTE_CNT;
+}
+
+static int xgene_dma_prep_flyby_desc(struct xgene_dma_chan *chan,
+ struct xgene_dma_desc_sw *desc_sw,
+ struct scatterlist *sg, u32 nbytes,
+ u32 seed, u8 opcode)
+{
+ struct xgene_dma_desc_hw *desc1, *desc2;
+ dma_addr_t paddr = 0;
+ u32 len = nbytes;
+ u32 offset = 0;
+ int i;
+
+ /* Get 1st descriptor */
+ desc1 = &desc_sw->desc1;
+ xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
+
+ /* Set 1st source address */
+ offset += xgene_dma_set_flyby_src(&desc1->m1, sg, &paddr,
+ &nbytes, offset);
+
+ if (!nbytes) {
+ desc2 = NULL;
+ goto skip_additional_src;
+ }
+
+ /*
+ * Still we have request length remaining,
+ * So we need to use prepare 64B descriptor
+ */
+ desc2 = &desc_sw->desc2;
+ desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT);
+
+ /* Set 2nd to 5th source address */
+ for (i = 0; i < 4 && nbytes; i++) {
+ /* Fetch next sg element */
+ if (!paddr) {
+ sg = sg_next(sg);
+ if (!sg)
+ break;
+ offset = 0;
+ }
+ offset += xgene_dma_set_flyby_src(
+ xgene_dma_lookup_ext8(desc2, i),
+ sg, &paddr, &nbytes, offset);
+ }
+
+ /* Invalidate unused source address field */
+ for (; i < 4; i++)
+ xgene_dma_invalidate_buffer(xgene_dma_lookup_ext8(desc2, i));
+
+ /* Check whether requested buffer processed */
+ if (nbytes) {
+ chan_err(chan, "Src count crossed maximum limit\n");
+ return -EINVAL;
+ }
+
+ /* Update flag that we have prepared 64B descriptor */
+ desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC;
+
+skip_additional_src:
+ /* Set descriptor parameters for flyby operation */
+ desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_BD_BIT);
+ desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_SD_BIT);
+ desc1->m2 |= cpu_to_le64(opcode);
+ desc1->m2 |= cpu_to_le64((u64)bitrev32(seed) <<
+ XGENE_DMA_DESC_CRCSEED_POS);
+ desc1->m3 |= cpu_to_le64(len);
+
+ return 0;
+}
+
static dma_cookie_t xgene_dma_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct xgene_dma_desc_sw *desc;
@@ -745,8 +871,9 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
{
struct xgene_dma_ring *ring = &chan->rx_ring;
struct xgene_dma_desc_sw *desc_sw, *_desc_sw;
- struct xgene_dma_desc_hw *desc_hw;
+ struct xgene_dma_desc_hw *desc_hw1, *desc_hw2;
struct list_head ld_completed;
+ u32 command;
u8 status;

INIT_LIST_HEAD(&ld_completed);
@@ -759,22 +886,35 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
/* Move all completed descriptors to ld completed queue, in order */
list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_running, node) {
/* Get subsequent hw descriptor from DMA rx ring */
- desc_hw = &ring->desc_hw[ring->head];
+ desc_hw1 = &ring->desc_hw[ring->head];

/* Check if this descriptor has been completed */
- if (unlikely(le64_to_cpu(desc_hw->m0) ==
+ if (unlikely(le64_to_cpu(desc_hw1->m0) ==
XGENE_DMA_DESC_EMPTY_SIGNATURE))
break;

if (++ring->head == ring->slots)
ring->head = 0;

+ if (le64_to_cpu(desc_hw1->m0) & XGENE_DMA_DESC_NV_BIT) {
+ /* 64B Rx descriptor */
+ desc_hw2 = &ring->desc_hw[ring->head];
+
+ if (++ring->head == ring->slots)
+ ring->head = 0;
+
+ command = 2;
+ } else {
+ desc_hw2 = NULL;
+ command = 1;
+ }
+
/* Check if we have any error with DMA transactions */
status = XGENE_DMA_DESC_STATUS(
XGENE_DMA_DESC_ELERR_RD(le64_to_cpu(
- desc_hw->m0)),
+ desc_hw1->m0)),
XGENE_DMA_DESC_LERR_RD(le64_to_cpu(
- desc_hw->m0)));
+ desc_hw1->m0)));
if (status) {
/* Print the DMA error type */
chan_err(chan, "%s\n", xgene_dma_desc_err[status]);
@@ -789,15 +929,23 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
XGENE_DMA_DESC_DUMP(&desc_sw->desc2,
"X-Gene DMA TX DESC2: ");

- XGENE_DMA_DESC_DUMP(desc_hw,
+ XGENE_DMA_DESC_DUMP(desc_hw1,
"X-Gene DMA RX ERR DESC: ");
}

/* Notify the hw about this completed descriptor */
- iowrite32(-1, ring->cmd);
+ iowrite32(-command, ring->cmd);

/* Mark this hw descriptor as processed */
- desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
+ desc_hw1->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
+ if (desc_hw2)
+ desc_hw2->m0 = cpu_to_le64(
+ XGENE_DMA_DESC_EMPTY_SIGNATURE);
+
+ if (desc_sw->flags & XGENE_DMA_FLAG_FLYBY_ACTIVE) {
+ iowrite32(command, chan->bufpool.cmd);
+ *(__le32 *)desc_sw->flyby_result = (__le32)desc_hw1->m3;
+ }

/*
* Decrement the pending transaction count
@@ -1125,6 +1273,55 @@ fail:
return NULL;
}

+struct dma_async_tx_descriptor *
+xgene_dma_prep_flyby(struct xgene_dma_chan *chan, struct scatterlist *src_sg,
+ size_t len, u32 seed, u8 *result, unsigned long flags,
+ u8 opcode)
+{
+ struct xgene_dma_desc_sw *desc;
+ int ret;
+
+ if (len > XGENE_DMA_MAX_FLYBY_BYTE_CNT) {
+ chan_err(chan, "Source length is too long 0x%zX\n", len);
+ return NULL;
+ }
+
+ /* Allocate the link descriptor from DMA pool */
+ desc = xgene_dma_alloc_descriptor(chan);
+ if (!desc)
+ return NULL;
+
+ /* Prepare DMA flyby descriptor */
+ ret = xgene_dma_prep_flyby_desc(chan, desc, src_sg, len, seed, opcode);
+ if (ret) {
+ xgene_dma_clean_descriptor(chan, desc);
+ return NULL;
+ }
+
+ desc->flags |= XGENE_DMA_FLAG_FLYBY_ACTIVE;
+ desc->tx.flags = flags;
+ desc->flyby_result = result;
+
+ list_add_tail(&desc->node, &desc->tx_list);
+
+ return &desc->tx;
+}
+
+struct dma_async_tx_descriptor *
+xgene_dma_prep_crc32c(struct dma_chan *dchan, struct scatterlist *src_sg,
+ size_t len, u32 seed, u8 *result, unsigned long flags)
+{
+ struct xgene_dma_chan *chan;
+
+ if (unlikely(!dchan))
+ return NULL;
+
+ chan = to_dma_chan(dchan);
+
+ return xgene_dma_prep_flyby(chan, src_sg, len, seed,
+ result, flags, FLYBY_CRC32C);
+}
+
static void xgene_dma_issue_pending(struct dma_chan *dchan)
{
struct xgene_dma_chan *chan = to_dma_chan(dchan);
@@ -1215,15 +1412,22 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring)
{
void *ring_cfg = ring->state;
u64 addr = ring->desc_paddr;
- u32 i, val;
+ u32 ring_id_buf, i, val;

- ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE;
+ ring->slots = ring->size / (ring->is_bufpool ?
+ XGENE_DMA_BUFPOOL_DESC_SIZE :
+ XGENE_DMA_RING_WQ_DESC_SIZE);

/* Clear DMA ring state */
xgene_dma_clr_ring_state(ring);

/* Set DMA ring type */
- XGENE_DMA_RING_TYPE_SET(ring_cfg, XGENE_DMA_RING_TYPE_REGULAR);
+ XGENE_DMA_RING_TYPE_SET(ring_cfg, ring->is_bufpool ?
+ XGENE_DMA_RING_TYPE_BUFPOOL :
+ XGENE_DMA_RING_TYPE_REGULAR);
+
+ if (ring->is_bufpool)
+ XGENE_DMA_RING_BUFMODE_SET(ring_cfg);

if (ring->owner == XGENE_DMA_RING_OWNER_DMA) {
/* Set recombination buffer and timeout */
@@ -1248,8 +1452,12 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring)
ring->pdma->csr_ring + XGENE_DMA_RING_ID);

/* Set DMA ring buffer */
- iowrite32(XGENE_DMA_RING_ID_BUF_SETUP(ring->num),
- ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF);
+ ring_id_buf = XGENE_DMA_RING_ID_BUF_SETUP(ring->num);
+
+ if (ring->is_bufpool)
+ ring_id_buf |= XGENE_DMA_RING_IS_BUFPOOL;
+
+ iowrite32(ring_id_buf, ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF);

if (ring->owner != XGENE_DMA_RING_OWNER_CPU)
return;
@@ -1344,6 +1552,9 @@ static void xgene_dma_delete_chan_rings(struct xgene_dma_chan *chan)
{
xgene_dma_delete_ring_one(&chan->rx_ring);
xgene_dma_delete_ring_one(&chan->tx_ring);
+
+ if (chan->id == XGENE_DMA_FLYBY_CHANNEL)
+ xgene_dma_delete_ring_one(&chan->bufpool);
}

static int xgene_dma_create_ring_one(struct xgene_dma_chan *chan,
@@ -1378,6 +1589,60 @@ static int xgene_dma_create_ring_one(struct xgene_dma_chan *chan,
return 0;
}

+static int xgene_dma_init_bufpool(struct xgene_dma_chan *chan)
+{
+ struct xgene_dma_ring *bufpool = &chan->bufpool;
+ struct xgene_dma_desc16 *desc16;
+ dma_addr_t buf_addr;
+ void *buf;
+ int ret, i;
+
+ /* Create DMA buffer pool */
+ bufpool->owner = XGENE_DMA_RING_OWNER_DMA;
+ bufpool->is_bufpool = true;
+ bufpool->buf_num = XGENE_DMA_BUFPOOL_BUFNUM;
+
+ ret = xgene_dma_create_ring_one(chan, bufpool,
+ XGENE_DMA_RING_CFG_SIZE_64KB);
+ if (ret)
+ return ret;
+
+ bufpool->dst_ring_num = XGENE_DMA_RING_DST_ID(bufpool->num);
+
+ dev_dbg(chan->dev,
+ "Bufpool ring id 0x%X num %d desc 0x%p\n",
+ bufpool->id, bufpool->num, bufpool->desc_vaddr);
+
+ for (i = 0; i < bufpool->slots; i++) {
+ desc16 = &bufpool->desc16[i];
+ memset(desc16, 0, sizeof(struct xgene_dma_desc16));
+ buf = devm_kzalloc(chan->dev,
+ XGENE_DMA_MAX_BYTE_CNT, GFP_KERNEL);
+ if (!buf) {
+ xgene_dma_delete_ring_one(bufpool);
+ return -ENOMEM;
+ }
+
+ buf_addr = dma_map_single(chan->dev, buf,
+ XGENE_DMA_MAX_BYTE_CNT,
+ DMA_TO_DEVICE);
+
+ desc16->m0 |= cpu_to_le64((u64)bufpool->dst_ring_num <<
+ XGENE_DMA_DESC_FPQ_NUM_POS);
+ desc16->m0 |= cpu_to_le64(XGENE_DMA_DESC_IN_BIT);
+ desc16->m0 |= cpu_to_le64((u64)XGENE_DMA_RING_OWNER_DMA <<
+ XGENE_DMA_DESC_RTYPE_POS);
+ desc16->m1 |= cpu_to_le64(XGENE_DMA_DESC_C_BIT);
+ desc16->m1 |= cpu_to_le64(buf_addr);
+ desc16->m1 |= cpu_to_le64(xgene_dma_encode_len(
+ XGENE_DMA_MAX_BYTE_CNT));
+ }
+
+ iowrite32(bufpool->slots, bufpool->cmd);
+
+ return 0;
+}
+
static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan)
{
struct xgene_dma_ring *rx_ring = &chan->rx_ring;
@@ -1386,6 +1651,7 @@ static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan)

/* Create DMA Rx ring descriptor */
rx_ring->owner = XGENE_DMA_RING_OWNER_CPU;
+ rx_ring->is_bufpool = false;
rx_ring->buf_num = XGENE_DMA_CPU_BUFNUM + chan->id;

ret = xgene_dma_create_ring_one(chan, rx_ring,
@@ -1398,6 +1664,7 @@ static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan)

/* Create DMA Tx ring descriptor */
tx_ring->owner = XGENE_DMA_RING_OWNER_DMA;
+ tx_ring->is_bufpool = false;
tx_ring->buf_num = XGENE_DMA_BUFNUM + chan->id;

ret = xgene_dma_create_ring_one(chan, tx_ring,
@@ -1416,6 +1683,14 @@ static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan)
/* Set the max outstanding request possible to this channel */
chan->max_outstanding = tx_ring->slots;

+ if (chan->id == XGENE_DMA_FLYBY_CHANNEL) {
+ ret = xgene_dma_init_bufpool(chan);
+ if (ret) {
+ xgene_dma_delete_ring_one(rx_ring);
+ xgene_dma_delete_ring_one(tx_ring);
+ }
+ }
+
return ret;
}

@@ -1504,6 +1779,8 @@ static void xgene_dma_init_hw(struct xgene_dma *pdma)

/* Associate DMA ring to corresponding ring HW */
iowrite32(XGENE_DMA_ASSOC_RING_MNGR1,
+ pdma->csr_dma + XGENE_DMA_CFG_RING_FQ_ASSOC);
+ iowrite32(XGENE_DMA_ASSOC_RING_MNGR1,
pdma->csr_dma + XGENE_DMA_CFG_RING_WQ_ASSOC);

/* Configure RAID6 polynomial control setting */
@@ -1671,6 +1948,9 @@ static void xgene_dma_set_caps(struct xgene_dma_chan *chan,
dma_cap_set(DMA_XOR, dma_dev->cap_mask);
}

+ if (chan->id == XGENE_DMA_FLYBY_CHANNEL)
+ dma_cap_set(DMA_CRC32C, dma_dev->cap_mask);
+
/* Set base and prep routines */
dma_dev->dev = chan->dev;
dma_dev->device_alloc_chan_resources = xgene_dma_alloc_chan_resources;
@@ -1690,6 +1970,9 @@ static void xgene_dma_set_caps(struct xgene_dma_chan *chan,
dma_dev->max_pq = XGENE_DMA_MAX_XOR_SRC;
dma_dev->pq_align = DMAENGINE_ALIGN_64_BYTES;
}
+
+ if (dma_has_cap(DMA_CRC32C, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_crc32c = xgene_dma_prep_crc32c;
}

static int xgene_dma_async_register(struct xgene_dma *pdma, int id)
@@ -1729,10 +2012,11 @@ static int xgene_dma_async_register(struct xgene_dma *pdma, int id)

/* DMA capability info */
dev_info(pdma->dev,
- "%s: CAPABILITY ( %s%s%s)\n", dma_chan_name(&chan->dma_chan),
+ "%s: CAPABILITY ( %s%s%s%s)\n", dma_chan_name(&chan->dma_chan),
dma_has_cap(DMA_SG, dma_dev->cap_mask) ? "SGCPY " : "",
dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "XOR " : "",
- dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "PQ " : "");
+ dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "PQ " : "",
+ dma_has_cap(DMA_CRC32C, dma_dev->cap_mask) ? "CRC32C " : "");

return 0;
}
--
1.7.1

2015-11-07 17:12:19

by Rameshwar Sahu

[permalink] [raw]
Subject: [PATCH v2 3/3] Crypto: Add support for APM X-Gene SoC CRC32C h/w accelerator driver

This patch implements support for APM X-Gene SoC CRC32C h/w accelerator.
DMA engine in APM X-Gene SoC is capable of doing CRC32C computations.

Signed-off-by: Rameshwar Prasad Sahu <[email protected]>
---
drivers/crypto/Kconfig | 8 ++
drivers/crypto/Makefile | 1 +
drivers/crypto/xgene-crc32c.c | 234 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 243 insertions(+), 0 deletions(-)
create mode 100755 drivers/crypto/xgene-crc32c.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index d234719..5d90b64 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -497,4 +497,12 @@ config CRYPTO_DEV_SUN4I_SS
To compile this driver as a module, choose M here: the module
will be called sun4i-ss.

+config CRYPTO_DEV_XGENE_CRC32C
+ tristate "Support for APM SoC X-Gene CRC32C HW accelerator"
+ depends on XGENE_DMA
+ select CRYPTO_HASH
+ help
+ This option enables support for CRC32C offload by using
+ APM X-Gene SoC DMA engine.
+
endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index c3ced6f..199d4e4 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
+obj-$(CONFIG_CRYPTO_DEV_XGENE_CRC32C) += xgene-crc32c.o
diff --git a/drivers/crypto/xgene-crc32c.c b/drivers/crypto/xgene-crc32c.c
new file mode 100755
index 0000000..142c681
--- /dev/null
+++ b/drivers/crypto/xgene-crc32c.c
@@ -0,0 +1,234 @@
+/*
+ * Applied Micro X-Gene SoC CRC32C HW acceleration by using DMA engine
+ *
+ * Copyright (c) 2015, Applied Micro Circuits Corporation
+ * Authors: Rameshwar Prasad Sahu <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <crypto/internal/hash.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#define CRC32C_DIGEST_SIZE 4
+#define CRC32C_BLOCK_SIZE 1
+#define XGENE_DMA_MAX_FLYBY_SRC_CNT 5
+#define XGENE_DMA_MAX_FLYBY_BYTE_CNT 0x7FFF /* (32 KB - 1) */
+
+struct xgene_crc32c_session_ctx {
+ struct dma_chan *dchan;
+ u32 key;
+};
+
+struct xgene_crc32c_reqctx {
+ struct device *dev;
+ u32 nents;
+ u32 seed;
+};
+
+static void xgene_crc32c_callback(void *ctx)
+{
+ struct ahash_request *req = ctx;
+ struct xgene_crc32c_reqctx *reqctx = ahash_request_ctx(req);
+
+ if (req->base.complete)
+ req->base.complete(&req->base, 0);
+
+ dma_unmap_sg(reqctx->dev, req->src,
+ reqctx->nents, DMA_TO_DEVICE);
+}
+
+static int xgene_crc32c_handle_req(struct ahash_request *req,
+ struct dma_chan *dchan)
+{
+ struct xgene_crc32c_reqctx *reqctx = ahash_request_ctx(req);
+ struct device *dev = dchan->device->dev;
+ struct dma_async_tx_descriptor *tx;
+ enum dma_ctrl_flags flags;
+ u32 nents, sg_count;
+ dma_cookie_t cookie;
+
+ if (req->nbytes > XGENE_DMA_MAX_FLYBY_BYTE_CNT) {
+ dev_err(dev, "Src len is too long %u\n", req->nbytes);
+ return -EINVAL;
+ }
+
+ nents = sg_nents(req->src);
+ sg_count = dma_map_sg(dev, req->src, nents, DMA_TO_DEVICE);
+ if (!sg_count) {
+ dev_err(dev, "Failed to map src sg");
+ return -EIO;
+ }
+
+ if (sg_count > XGENE_DMA_MAX_FLYBY_SRC_CNT) {
+ dev_err(dev, "Unsupported src sg count %d\n", sg_count);
+ goto err;
+ }
+
+ flags = DMA_CTRL_ACK;
+
+ tx = dmaengine_prep_dma_crc3c(dchan, req->src, req->nbytes,
+ reqctx->seed, req->result, flags);
+ if (!tx)
+ goto err;
+
+ /* Set callback parameters */
+ reqctx->dev = dev;
+ reqctx->nents = nents;
+ tx->callback_param = req;
+ tx->callback = xgene_crc32c_callback;
+
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ dev_err(dev, "Failed to submit descriptor\n");
+ goto err;
+ }
+
+ dma_async_issue_pending(dchan);
+
+ return -EINPROGRESS;
+
+err:
+ dma_unmap_sg(dev, req->src, nents, DMA_TO_DEVICE);
+ return -EINVAL;
+}
+
+static int xgene_crc32c_init(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct xgene_crc32c_session_ctx *session = crypto_ahash_ctx(tfm);
+
+ *(__le32 *)req->result = cpu_to_le32(session->key);
+
+ return 0;
+}
+
+static int xgene_crc32c_update(struct ahash_request *req)
+{
+ struct xgene_crc32c_reqctx *reqctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct xgene_crc32c_session_ctx *session = crypto_ahash_ctx(tfm);
+
+ reqctx->seed = le32_to_cpu(*(__le32 *)req->result);
+
+ return xgene_crc32c_handle_req(req, session->dchan);
+}
+
+static int xgene_crc32c_final(struct ahash_request *req)
+{
+ return 0;
+}
+
+static int xgene_crc32c_finup(struct ahash_request *req)
+{
+ struct xgene_crc32c_reqctx *reqctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct xgene_crc32c_session_ctx *session = crypto_ahash_ctx(tfm);
+
+ reqctx->seed = le32_to_cpu(*(__le32 *)req->result);
+
+ return xgene_crc32c_handle_req(req, session->dchan);
+}
+
+static int xgene_crc32c_digest(struct ahash_request *req)
+{
+ struct xgene_crc32c_reqctx *reqctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct xgene_crc32c_session_ctx *session = crypto_ahash_ctx(tfm);
+
+ reqctx->seed = session->key;
+
+ return xgene_crc32c_handle_req(req, session->dchan);
+}
+
+static int xgene_crc32c_setkey(struct crypto_ahash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct xgene_crc32c_session_ctx *session = crypto_ahash_ctx(tfm);
+
+ if (keylen != CRC32C_DIGEST_SIZE) {
+ crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ session->key = le32_to_cpu(*(__le32 *)key);
+
+ return 0;
+}
+
+static int xgene_crc32c_cra_init(struct crypto_tfm *tfm)
+{
+ struct xgene_crc32c_session_ctx *session = crypto_tfm_ctx(tfm);
+ dma_cap_mask_t mask;
+
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct xgene_crc32c_reqctx));
+ session->key = ~0;
+
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_CRC32C, mask);
+
+ session->dchan = dma_request_channel(mask, NULL, NULL);
+ if (!session->dchan) {
+ pr_err("Failed to request CRC32C DMA channel\n");
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static struct ahash_alg xgene_crc32c_alg = {
+ .init = xgene_crc32c_init,
+ .update = xgene_crc32c_update,
+ .final = xgene_crc32c_final,
+ .finup = xgene_crc32c_finup,
+ .digest = xgene_crc32c_digest,
+ .setkey = xgene_crc32c_setkey,
+ .halg.digestsize = CRC32C_DIGEST_SIZE,
+ .halg.base = {
+ .cra_name = "crc32c",
+ .cra_driver_name = "crc32c-xgene",
+ .cra_flags = (CRYPTO_ALG_TYPE_AHASH |
+ CRYPTO_ALG_ASYNC),
+ .cra_blocksize = CRC32C_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(
+ struct xgene_crc32c_session_ctx),
+ .cra_init = xgene_crc32c_cra_init,
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init xgene_crc32c_mod_init(void)
+{
+ return crypto_register_ahash(&xgene_crc32c_alg);
+}
+late_initcall(xgene_crc32c_mod_init);
+
+static void __exit xgene_crc32c_mod_exit(void)
+{
+ crypto_unregister_ahash(&xgene_crc32c_alg);
+}
+module_exit(xgene_crc32c_mod_exit);
+
+MODULE_DESCRIPTION("APM X-Gene SoC CRC32C HW accelerator driver");
+MODULE_AUTHOR("Rameshwar Prasad Sahu <[email protected]>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+MODULE_ALIAS_CRYPTO("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c-xgene");
--
1.7.1