2017-03-08 03:58:23

by Kuninori Morimoto

[permalink] [raw]
Subject: [PATCH v2] dmaengine: rcar-dmac: enable descriptor mode on 40bit

From: Kuninori Morimoto <[email protected]>

SYS-DMAC can use 40bit address transfer, and it supports Descriptor
Mode too. Current SYS-DMAC driver disables Descriptor Mode if it was
40bit address today. But it can use Descriptor Mode with 40bit if
transfer Source/Destination address are located in same 4GiB region
in the 40 bit address space.
This patch enables it if all condition was clear

Signed-off-by: Kuninori Morimoto <[email protected]>
---
v1 -> v2

- do high_dev_addr / high_mem_addr in out of sg loop
- reuse if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)

drivers/dma/sh/rcar-dmac.c | 52 ++++++++++++++++++++++++++++------------------
1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 48b22d5..a420921 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -344,13 +344,19 @@ static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);

if (desc->hwdescs.use) {
- struct rcar_dmac_xfer_chunk *chunk;
+ struct rcar_dmac_xfer_chunk *chunk =
+ list_first_entry(&desc->chunks,
+ struct rcar_dmac_xfer_chunk, node);

dev_dbg(chan->chan.device->dev,
"chan%u: queue desc %p: %u@%pad\n",
chan->index, desc, desc->nchunks, &desc->hwdescs.dma);

#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR,
+ chunk->src_addr >> 32);
+ rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR,
+ chunk->dst_addr >> 32);
rcar_dmac_chan_write(chan, RCAR_DMAFIXDPBASE,
desc->hwdescs.dma >> 32);
#endif
@@ -368,8 +374,6 @@ static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
* should. Initialize it manually with the destination address
* of the first chunk.
*/
- chunk = list_first_entry(&desc->chunks,
- struct rcar_dmac_xfer_chunk, node);
rcar_dmac_chan_write(chan, RCAR_DMADAR,
chunk->dst_addr & 0xffffffff);

@@ -855,8 +859,12 @@ static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
unsigned int nchunks = 0;
unsigned int max_chunk_size;
unsigned int full_size = 0;
- bool highmem = false;
+ bool cross_boundary = false;
unsigned int i;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ u32 high_dev_addr = 0;
+ u32 high_mem_addr = 0;
+#endif

desc = rcar_dmac_desc_get(chan);
if (!desc)
@@ -882,6 +890,16 @@ static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,

full_size += len;

+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ if (i == 0) {
+ high_dev_addr = dev_addr >> 32;
+ high_mem_addr = mem_addr >> 32;
+ }
+
+ if ((dev_addr >> 32 != high_dev_addr) ||
+ (mem_addr >> 32 != high_mem_addr))
+ cross_boundary = true;
+#endif
while (len) {
unsigned int size = min(len, max_chunk_size);

@@ -890,18 +908,14 @@ static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
* Prevent individual transfers from crossing 4GB
* boundaries.
*/
- if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)
+ if (dev_addr >> 32 != (dev_addr + size - 1) >> 32) {
size = ALIGN(dev_addr, 1ULL << 32) - dev_addr;
- if (mem_addr >> 32 != (mem_addr + size - 1) >> 32)
+ cross_boundary = true;
+ }
+ if (mem_addr >> 32 != (mem_addr + size - 1) >> 32) {
size = ALIGN(mem_addr, 1ULL << 32) - mem_addr;
-
- /*
- * Check if either of the source or destination address
- * can't be expressed in 32 bits. If so we can't use
- * hardware descriptor lists.
- */
- if (dev_addr >> 32 || mem_addr >> 32)
- highmem = true;
+ cross_boundary = true;
+ }
#endif

chunk = rcar_dmac_xfer_chunk_get(chan);
@@ -943,13 +957,11 @@ static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
* Use hardware descriptor lists if possible when more than one chunk
* needs to be transferred (otherwise they don't make much sense).
*
- * The highmem check currently covers the whole transfer. As an
- * optimization we could use descriptor lists for consecutive lowmem
- * chunks and direct manual mode for highmem chunks. Whether the
- * performance improvement would be significant enough compared to the
- * additional complexity remains to be investigated.
+ * Source/Destination address should be located in same 4GiB region
+ * in the 40bit address space when it uses Hardware descriptor,
+ * and cross_boundary is checking it.
*/
- desc->hwdescs.use = !highmem && nchunks > 1;
+ desc->hwdescs.use = !cross_boundary && nchunks > 1;
if (desc->hwdescs.use) {
if (rcar_dmac_fill_hwdesc(chan, desc) < 0)
desc->hwdescs.use = false;
--
1.9.1


2017-03-08 08:18:20

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [PATCH v2] dmaengine: rcar-dmac: enable descriptor mode on 40bit

On Wed, Mar 8, 2017 at 1:42 AM, Kuninori Morimoto
<[email protected]> wrote:
> From: Kuninori Morimoto <[email protected]>
>
> SYS-DMAC can use 40bit address transfer, and it supports Descriptor
> Mode too. Current SYS-DMAC driver disables Descriptor Mode if it was
> 40bit address today. But it can use Descriptor Mode with 40bit if
> transfer Source/Destination address are located in same 4GiB region
> in the 40 bit address space.
> This patch enables it if all condition was clear
>
> Signed-off-by: Kuninori Morimoto <[email protected]>

Reviewed-by: Geert Uytterhoeven <[email protected]>

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

2017-03-08 09:21:23

by Laurent Pinchart

[permalink] [raw]
Subject: Re: [PATCH v2] dmaengine: rcar-dmac: enable descriptor mode on 40bit

Hi Morimoto-san,

Thank you for the patch.

On Wednesday 08 Mar 2017 00:42:54 Kuninori Morimoto wrote:
> From: Kuninori Morimoto <[email protected]>
>
> SYS-DMAC can use 40bit address transfer, and it supports Descriptor
> Mode too. Current SYS-DMAC driver disables Descriptor Mode if it was
> 40bit address today. But it can use Descriptor Mode with 40bit if
> transfer Source/Destination address are located in same 4GiB region
> in the 40 bit address space.
> This patch enables it if all condition was clear
>
> Signed-off-by: Kuninori Morimoto <[email protected]>
> ---
> v1 -> v2
>
> - do high_dev_addr / high_mem_addr in out of sg loop
> - reuse if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)
>
> drivers/dma/sh/rcar-dmac.c | 52 +++++++++++++++++++++++++------------------
> 1 file changed, 32 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> index 48b22d5..a420921 100644
> --- a/drivers/dma/sh/rcar-dmac.c
> +++ b/drivers/dma/sh/rcar-dmac.c
> @@ -344,13 +344,19 @@ static void rcar_dmac_chan_start_xfer(struct
> rcar_dmac_chan *chan) rcar_dmac_chan_write(chan, RCAR_DMARS,
> chan->mid_rid);
>
> if (desc->hwdescs.use) {
> - struct rcar_dmac_xfer_chunk *chunk;
> + struct rcar_dmac_xfer_chunk *chunk =
> + list_first_entry(&desc->chunks,
> + struct rcar_dmac_xfer_chunk, node);
>
> dev_dbg(chan->chan.device->dev,
> "chan%u: queue desc %p: %u@%pad\n",
> chan->index, desc, desc->nchunks, &desc->hwdescs.dma);
>
> #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> + rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR,
> + chunk->src_addr >> 32);
> + rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR,
> + chunk->dst_addr >> 32);
> rcar_dmac_chan_write(chan, RCAR_DMAFIXDPBASE,
> desc->hwdescs.dma >> 32);
> #endif
> @@ -368,8 +374,6 @@ static void rcar_dmac_chan_start_xfer(struct
> rcar_dmac_chan *chan) * should. Initialize it manually with the destination
> address
> * of the first chunk.
> */
> - chunk = list_first_entry(&desc->chunks,
> - struct rcar_dmac_xfer_chunk, node);
> rcar_dmac_chan_write(chan, RCAR_DMADAR,
> chunk->dst_addr & 0xffffffff);
>
> @@ -855,8 +859,12 @@ static void rcar_dmac_chan_configure_desc(struct
> rcar_dmac_chan *chan, unsigned int nchunks = 0;
> unsigned int max_chunk_size;
> unsigned int full_size = 0;
> - bool highmem = false;
> + bool cross_boundary = false;
> unsigned int i;
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> + u32 high_dev_addr = 0;
> + u32 high_mem_addr = 0;

No need to initialize these to 0 as you initialize them in the first iteration
of the loop.

Apart from that,

Reviewed-by: Laurent Pinchart <[email protected]>

> +#endif
>
> desc = rcar_dmac_desc_get(chan);
> if (!desc)
> @@ -882,6 +890,16 @@ static void rcar_dmac_chan_configure_desc(struct
> rcar_dmac_chan *chan,
>
> full_size += len;
>
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> + if (i == 0) {
> + high_dev_addr = dev_addr >> 32;
> + high_mem_addr = mem_addr >> 32;
> + }
> +
> + if ((dev_addr >> 32 != high_dev_addr) ||
> + (mem_addr >> 32 != high_mem_addr))
> + cross_boundary = true;
> +#endif
> while (len) {
> unsigned int size = min(len, max_chunk_size);
>
> @@ -890,18 +908,14 @@ static void rcar_dmac_chan_configure_desc(struct
> rcar_dmac_chan *chan, * Prevent individual transfers from crossing 4GB
> * boundaries.
> */
> - if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)
> + if (dev_addr >> 32 != (dev_addr + size - 1) >> 32) {
> size = ALIGN(dev_addr, 1ULL << 32) - dev_addr;
> - if (mem_addr >> 32 != (mem_addr + size - 1) >> 32)
> + cross_boundary = true;
> + }
> + if (mem_addr >> 32 != (mem_addr + size - 1) >> 32) {
> size = ALIGN(mem_addr, 1ULL << 32) - mem_addr;
> -
> - /*
> - * Check if either of the source or destination
address
> - * can't be expressed in 32 bits. If so we can't use
> - * hardware descriptor lists.
> - */
> - if (dev_addr >> 32 || mem_addr >> 32)
> - highmem = true;
> + cross_boundary = true;
> + }
> #endif
>
> chunk = rcar_dmac_xfer_chunk_get(chan);
> @@ -943,13 +957,11 @@ static void rcar_dmac_chan_configure_desc(struct
> rcar_dmac_chan *chan, * Use hardware descriptor lists if possible when more
> than one chunk * needs to be transferred (otherwise they don't make much
> sense). *
> - * The highmem check currently covers the whole transfer. As an
> - * optimization we could use descriptor lists for consecutive lowmem
> - * chunks and direct manual mode for highmem chunks. Whether the
> - * performance improvement would be significant enough compared to the
> - * additional complexity remains to be investigated.
> + * Source/Destination address should be located in same 4GiB region
> + * in the 40bit address space when it uses Hardware descriptor,
> + * and cross_boundary is checking it.
> */
> - desc->hwdescs.use = !highmem && nchunks > 1;
> + desc->hwdescs.use = !cross_boundary && nchunks > 1;
> if (desc->hwdescs.use) {
> if (rcar_dmac_fill_hwdesc(chan, desc) < 0)
> desc->hwdescs.use = false;

--
Regards,

Laurent Pinchart