2020-02-08 08:41:54

by Chuanhong Guo

[permalink] [raw]
Subject: [PATCH v2] mtd: mtk-quadspi: add support for DMA reading

PIO reading mode on this controller is pretty inefficient
(one cmd+addr+dummy sequence reads only one byte)
This patch adds support for reading using DMA mode which increases
reading speed from 1MB/s to 4MB/s

DMA busy checking is implemented with readl_poll_timeout because
I don't have access to IRQ-related docs. The speed increment comes
from those saved cmd+addr+dummy clocks.

This controller requires that DMA source/destination address and
reading length should be 16-byte aligned. We use a bounce buffer if
one of them is not aligned, read more than what we need, and copy
data from corresponding buffer offset.

Signed-off-by: Chuanhong Guo <[email protected]>
---

Changes since v1:
1. cast pointers to ulong instead of u32 to fix warnings on 64bit
platform
2. drop the other patch for reading with custom opcode. That'll
be a separated fix which is unrelated to this one.

drivers/mtd/spi-nor/mtk-quadspi.c | 99 +++++++++++++++++++++++++++++--
1 file changed, 95 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c b/drivers/mtd/spi-nor/mtk-quadspi.c
index b1691680d174..85101b84b516 100644
--- a/drivers/mtd/spi-nor/mtk-quadspi.c
+++ b/drivers/mtd/spi-nor/mtk-quadspi.c
@@ -7,6 +7,7 @@
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/device.h>
+#include <linux/dma-mapping.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/iopoll.h>
@@ -17,6 +18,7 @@
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
+#include <linux/sched/task_stack.h>
#include <linux/slab.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
@@ -70,6 +72,10 @@
#define MTK_NOR_DELSEL2_REG 0xd0
#define MTK_NOR_DELSEL3_REG 0xd4
#define MTK_NOR_DELSEL4_REG 0xd8
+#define MTK_NOR_FDMA_CTL_REG 0x718
+#define MTK_NOR_FDMA_FADR_REG 0x71c
+#define MTK_NOR_FDMA_DADR_REG 0x720
+#define MTK_NOR_FDMA_END_DADR_REG 0x724

/* commands for mtk nor controller */
#define MTK_NOR_READ_CMD 0x0
@@ -88,6 +94,7 @@
#define MTK_NOR_DUAL_READ_EN 0x1
#define MTK_NOR_DUAL_DISABLE 0x0
#define MTK_NOR_FAST_READ 0x1
+#define MTK_NOR_DMA_TRIG 0x1

#define SFLASH_WRBUF_SIZE 128

@@ -97,7 +104,10 @@
#define MTK_NOR_MAX_SHIFT 7
/* nor controller 4-byte address mode enable bit */
#define MTK_NOR_4B_ADDR_EN BIT(4)
-
+/* DMA address has to be 16-byte aligned */
+#define MTK_NOR_DMA_ALIGN 16
+/* Limit bounce buffer size to 32KB */
+#define MTK_NOR_MAX_BBUF_READ (32 * 1024)
/* Helpers for accessing the program data / shift data registers */
#define MTK_NOR_PRG_REG(n) (MTK_NOR_PRGDATA0_REG + 4 * (n))
#define MTK_NOR_SHREG(n) (MTK_NOR_SHREG0_REG + 4 * (n))
@@ -260,13 +270,12 @@ static void mtk_nor_set_addr(struct mtk_nor *mtk_nor, u32 addr)
writeb(addr & 0xff, mtk_nor->base + MTK_NOR_RADR3_REG);
}

-static ssize_t mtk_nor_read(struct spi_nor *nor, loff_t from, size_t length,
- u_char *buffer)
+static ssize_t mtk_nor_read_pio(struct mtk_nor *mtk_nor, loff_t from,
+ size_t length, u_char *buffer)
{
int i, ret;
int addr = (int)from;
u8 *buf = (u8 *)buffer;
- struct mtk_nor *mtk_nor = nor->priv;

/* set mode for fast read mode ,dual mode or quad mode */
mtk_nor_set_read_mode(mtk_nor);
@@ -281,6 +290,88 @@ static ssize_t mtk_nor_read(struct spi_nor *nor, loff_t from, size_t length,
return length;
}

+static int mtk_nor_dma_exec(struct mtk_nor *mtk_nor)
+{
+ int reg;
+
+ reg = readl(mtk_nor->base + MTK_NOR_FDMA_CTL_REG);
+ writel(reg | MTK_NOR_DMA_TRIG, mtk_nor->base + MTK_NOR_FDMA_CTL_REG);
+ return readl_poll_timeout(mtk_nor->base + MTK_NOR_FDMA_CTL_REG, reg,
+ !(reg & MTK_NOR_DMA_TRIG), 20, 10000);
+}
+
+static ssize_t mtk_nor_read_dma(struct mtk_nor *mtk_nor, loff_t from,
+ size_t length, u_char *buffer)
+{
+ ssize_t ret;
+ ssize_t read_length = length & ~(MTK_NOR_DMA_ALIGN - 1);
+ dma_addr_t dma_addr;
+
+ mtk_nor_set_read_mode(mtk_nor);
+ mtk_nor_set_addr_width(mtk_nor);
+
+ dma_addr = dma_map_single(mtk_nor->dev, buffer, read_length,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(mtk_nor->dev, dma_addr)) {
+ dev_err(mtk_nor->dev, "failed to map dma buffer.");
+ return -EINVAL;
+ }
+
+ writel(from, mtk_nor->base + MTK_NOR_FDMA_FADR_REG);
+ writel(dma_addr, mtk_nor->base + MTK_NOR_FDMA_DADR_REG);
+ writel((u32)dma_addr + read_length,
+ mtk_nor->base + MTK_NOR_FDMA_END_DADR_REG);
+ ret = mtk_nor_dma_exec(mtk_nor);
+ dma_unmap_single(mtk_nor->dev, dma_addr, read_length, DMA_FROM_DEVICE);
+ if (!ret)
+ ret = read_length;
+ return ret;
+}
+
+static ssize_t mtk_nor_read_dma_bounce(struct mtk_nor *mtk_nor, loff_t from,
+ size_t length, u_char *buffer)
+{
+ ssize_t nor_unaligned_len = from % MTK_NOR_DMA_ALIGN;
+ loff_t read_from = from & ~(MTK_NOR_DMA_ALIGN - 1);
+ ssize_t read_len;
+ u_char *buf;
+ u_char *bouncebuf;
+ size_t mem_unaligned_len;
+
+ if (length > MTK_NOR_MAX_BBUF_READ)
+ length = MTK_NOR_MAX_BBUF_READ;
+ read_len = length + nor_unaligned_len + MTK_NOR_DMA_ALIGN;
+
+ buf = kmalloc(read_len + MTK_NOR_DMA_ALIGN, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ mem_unaligned_len = (ulong)buf % MTK_NOR_DMA_ALIGN;
+ bouncebuf = (buf + MTK_NOR_DMA_ALIGN) - mem_unaligned_len;
+
+ read_len = mtk_nor_read_dma(mtk_nor, read_from, read_len, bouncebuf);
+ if (read_len > 0)
+ memcpy(buffer, bouncebuf + nor_unaligned_len, length);
+
+ kfree(buf);
+ return length;
+}
+
+static ssize_t mtk_nor_read(struct spi_nor *nor, loff_t from, size_t length,
+ u_char *buffer)
+{
+ struct mtk_nor *mtk_nor = nor->priv;
+
+ if (length < MTK_NOR_DMA_ALIGN)
+ return mtk_nor_read_pio(mtk_nor, from, length, buffer);
+
+ if (object_is_on_stack(buffer) || !virt_addr_valid(buffer) ||
+ (ulong)buffer % MTK_NOR_DMA_ALIGN || from % MTK_NOR_DMA_ALIGN)
+ return mtk_nor_read_dma_bounce(mtk_nor, from, length, buffer);
+
+ return mtk_nor_read_dma(mtk_nor, from, length, buffer);
+}
+
static int mtk_nor_write_single_byte(struct mtk_nor *mtk_nor,
int addr, int length, u8 *data)
{
--
2.24.1


2020-02-10 08:32:27

by Yingjoe Chen

[permalink] [raw]
Subject: Re: [PATCH v2] mtd: mtk-quadspi: add support for DMA reading

On Sat, 2020-02-08 at 16:40 +0800, Chuanhong Guo wrote:
> PIO reading mode on this controller is pretty inefficient
> (one cmd+addr+dummy sequence reads only one byte)
> This patch adds support for reading using DMA mode which increases
> reading speed from 1MB/s to 4MB/s
>
> DMA busy checking is implemented with readl_poll_timeout because
> I don't have access to IRQ-related docs. The speed increment comes
> from those saved cmd+addr+dummy clocks.

Hi Chuanhong,

Thanks for your patch, I'm checking with Guochun to see if we could
release IRQ related information to you.


> This controller requires that DMA source/destination address and
> reading length should be 16-byte aligned. We use a bounce buffer if
> one of them is not aligned, read more than what we need, and copy
> data from corresponding buffer offset.

I've checked with our HW guys. The limitation is on DRAM only.
So for read we should check buffer and length to make sure it is
aligned, but don't need to check from.

Joe.C

2020-02-11 07:30:35

by Chuanhong Guo

[permalink] [raw]
Subject: Re: [PATCH v2] mtd: mtk-quadspi: add support for DMA reading

Hi!

On Mon, Feb 10, 2020 at 4:31 PM Yingjoe Chen <[email protected]> wrote:
> > DMA busy checking is implemented with readl_poll_timeout because
> > I don't have access to IRQ-related docs. The speed increment comes
> > from those saved cmd+addr+dummy clocks.
>
> Hi Chuanhong,
>
> Thanks for your patch, I'm checking with Guochun to see if we could
> release IRQ related information to you.

Thanks for the info.
I'd like to keep using polling mode in this patch for easier reviewing.
It's already a pretty lengthy patch now. I may implement IRQ support
in future patches.

>
> > This controller requires that DMA source/destination address and
> > reading length should be 16-byte aligned. We use a bounce buffer if
> > one of them is not aligned, read more than what we need, and copy
> > data from corresponding buffer offset.
>
> I've checked with our HW guys. The limitation is on DRAM only.
> So for read we should check buffer and length to make sure it is
> aligned, but don't need to check from.

My previous test on mt7629 shows that from address also needs to
be aligned. e.g. If I perform a DMA read from 0x2 I actually got data
starting from 0x0 instead.

Regards,
Chuanhong Guo

2020-02-15 07:06:40

by Chuanhong Guo

[permalink] [raw]
Subject: Re: [PATCH v2] mtd: mtk-quadspi: add support for DMA reading

Hi all!

On Sat, Feb 8, 2020 at 4:41 PM Chuanhong Guo <[email protected]> wrote:
>
> PIO reading mode on this controller is pretty inefficient
> (one cmd+addr+dummy sequence reads only one byte)
> This patch adds support for reading using DMA mode which increases
> reading speed from 1MB/s to 4MB/s
>
> DMA busy checking is implemented with readl_poll_timeout because
> I don't have access to IRQ-related docs. The speed increment comes
> from those saved cmd+addr+dummy clocks.
>
> This controller requires that DMA source/destination address and
> reading length should be 16-byte aligned. We use a bounce buffer if
> one of them is not aligned, read more than what we need, and copy
> data from corresponding buffer offset.
>
> Signed-off-by: Chuanhong Guo <[email protected]>

This patch is deprecated. I wrote a new spi-mem driver for this
controller:
https://patchwork.ozlabs.org/project/linux-mtd/list/?series=158701
and will be focus on getting that one merged instead.
--
Regards,
Chuanhong Guo