2018-04-11 14:51:11

by Pierre-Yves MORDRET

[permalink] [raw]
Subject: [PATCH v2 0/2] Append some fixes and improvements

Fix an issue with FIFO Size and burst size.
Fix an incomplete allocator for Hardware descriptors: memory badly
allocated.
---
Version history:
v1:
* Initial
v2:
* Fix kbuild warning format: /0x%08x/%pad/
---

Pierre-Yves MORDRET (2):
dmaengine: stm32-mdma: align TLEN and buffer length on burst
dmaengine: stm32-mdma: Fix incomplete Hw descriptors allocator

drivers/dma/stm32-mdma.c | 92 ++++++++++++++++++++++++++++++------------------
1 file changed, 57 insertions(+), 35 deletions(-)

--
2.7.4



2018-04-11 14:49:10

by Pierre-Yves MORDRET

[permalink] [raw]
Subject: [PATCH v2 1/2] dmaengine: stm32-mdma: align TLEN and buffer length on burst

Both buffer Transfer Length (TLEN if any) and transfer size have to be
aligned on burst size (burst beats*bus width).

Signed-off-by: Pierre-Yves MORDRET <[email protected]>
---
Version history:
v1:
* Initial
v2:
---
---
drivers/dma/stm32-mdma.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index daa1602..fbcffa2 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -413,7 +413,7 @@ static u32 stm32_mdma_get_best_burst(u32 buf_len, u32 tlen, u32 max_burst,
u32 best_burst = max_burst;
u32 burst_len = best_burst * width;

- while ((burst_len > 0) && (tlen % burst_len)) {
+ while ((burst_len > 0) && (((tlen | buf_len) & (burst_len - 1)) != 0)) {
best_burst = best_burst >> 1;
burst_len = best_burst * width;
}
--
2.7.4


2018-04-11 14:49:19

by Pierre-Yves MORDRET

[permalink] [raw]
Subject: [PATCH v2 2/2] dmaengine: stm32-mdma: Fix incomplete Hw descriptors allocator

Only 1 Hw Descriptor is allocated. Loop over required Hw descriptor for
proper allocation.

Signed-off-by: Pierre-Yves MORDRET <[email protected]>
---
Version history:
v1:
* Initial
v2:
* Fix kbuild warning format: /0x%08x/%pad/
---
---
drivers/dma/stm32-mdma.c | 90 ++++++++++++++++++++++++++++++------------------
1 file changed, 56 insertions(+), 34 deletions(-)

diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index fbcffa2..3a477bc 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -252,13 +252,17 @@ struct stm32_mdma_hwdesc {
u32 cmdr;
} __aligned(64);

+struct stm32_mdma_desc_node {
+ struct stm32_mdma_hwdesc *hwdesc;
+ dma_addr_t hwdesc_phys;
+};
+
struct stm32_mdma_desc {
struct virt_dma_desc vdesc;
u32 ccr;
- struct stm32_mdma_hwdesc *hwdesc;
- dma_addr_t hwdesc_phys;
bool cyclic;
u32 count;
+ struct stm32_mdma_desc_node node[];
};

struct stm32_mdma_chan {
@@ -344,30 +348,43 @@ static struct stm32_mdma_desc *stm32_mdma_alloc_desc(
struct stm32_mdma_chan *chan, u32 count)
{
struct stm32_mdma_desc *desc;
+ int i;

- desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ desc = kzalloc(sizeof(*desc) +
+ sizeof(struct stm32_mdma_desc_node) * count, GFP_NOWAIT);
if (!desc)
return NULL;

- desc->hwdesc = dma_pool_alloc(chan->desc_pool, GFP_NOWAIT,
- &desc->hwdesc_phys);
- if (!desc->hwdesc) {
- dev_err(chan2dev(chan), "Failed to allocate descriptor\n");
- kfree(desc);
- return NULL;
+ for (i = 0; i < count; i++) {
+ desc->node[i].hwdesc =
+ dma_pool_alloc(chan->desc_pool, GFP_NOWAIT,
+ &desc->node[i].hwdesc_phys);
+ if (!desc->node[i].hwdesc)
+ goto err;
}

desc->count = count;

return desc;
+
+err:
+ dev_err(chan2dev(chan), "Failed to allocate descriptor\n");
+ while (--i >= 0)
+ dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+ desc->node[i].hwdesc_phys);
+ kfree(desc);
+ return NULL;
}

static void stm32_mdma_desc_free(struct virt_dma_desc *vdesc)
{
struct stm32_mdma_desc *desc = to_stm32_mdma_desc(vdesc);
struct stm32_mdma_chan *chan = to_stm32_mdma_chan(vdesc->tx.chan);
+ int i;

- dma_pool_free(chan->desc_pool, desc->hwdesc, desc->hwdesc_phys);
+ for (i = 0; i < desc->count; i++)
+ dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+ desc->node[i].hwdesc_phys);
kfree(desc);
}

@@ -669,18 +686,18 @@ static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan,
}

static void stm32_mdma_dump_hwdesc(struct stm32_mdma_chan *chan,
- struct stm32_mdma_hwdesc *hwdesc)
+ struct stm32_mdma_desc_node *node)
{
- dev_dbg(chan2dev(chan), "hwdesc: 0x%p\n", hwdesc);
- dev_dbg(chan2dev(chan), "CTCR: 0x%08x\n", hwdesc->ctcr);
- dev_dbg(chan2dev(chan), "CBNDTR: 0x%08x\n", hwdesc->cbndtr);
- dev_dbg(chan2dev(chan), "CSAR: 0x%08x\n", hwdesc->csar);
- dev_dbg(chan2dev(chan), "CDAR: 0x%08x\n", hwdesc->cdar);
- dev_dbg(chan2dev(chan), "CBRUR: 0x%08x\n", hwdesc->cbrur);
- dev_dbg(chan2dev(chan), "CLAR: 0x%08x\n", hwdesc->clar);
- dev_dbg(chan2dev(chan), "CTBR: 0x%08x\n", hwdesc->ctbr);
- dev_dbg(chan2dev(chan), "CMAR: 0x%08x\n", hwdesc->cmar);
- dev_dbg(chan2dev(chan), "CMDR: 0x%08x\n\n", hwdesc->cmdr);
+ dev_dbg(chan2dev(chan), "hwdesc: %pad\n", &node->hwdesc_phys);
+ dev_dbg(chan2dev(chan), "CTCR: 0x%08x\n", node->hwdesc->ctcr);
+ dev_dbg(chan2dev(chan), "CBNDTR: 0x%08x\n", node->hwdesc->cbndtr);
+ dev_dbg(chan2dev(chan), "CSAR: 0x%08x\n", node->hwdesc->csar);
+ dev_dbg(chan2dev(chan), "CDAR: 0x%08x\n", node->hwdesc->cdar);
+ dev_dbg(chan2dev(chan), "CBRUR: 0x%08x\n", node->hwdesc->cbrur);
+ dev_dbg(chan2dev(chan), "CLAR: 0x%08x\n", node->hwdesc->clar);
+ dev_dbg(chan2dev(chan), "CTBR: 0x%08x\n", node->hwdesc->ctbr);
+ dev_dbg(chan2dev(chan), "CMAR: 0x%08x\n", node->hwdesc->cmar);
+ dev_dbg(chan2dev(chan), "CMDR: 0x%08x\n\n", node->hwdesc->cmdr);
}

static void stm32_mdma_setup_hwdesc(struct stm32_mdma_chan *chan,
@@ -694,7 +711,7 @@ static void stm32_mdma_setup_hwdesc(struct stm32_mdma_chan *chan,
struct stm32_mdma_hwdesc *hwdesc;
u32 next = count + 1;

- hwdesc = &desc->hwdesc[count];
+ hwdesc = desc->node[count].hwdesc;
hwdesc->ctcr = ctcr;
hwdesc->cbndtr &= ~(STM32_MDMA_CBNDTR_BRC_MK |
STM32_MDMA_CBNDTR_BRDUM |
@@ -704,19 +721,20 @@ static void stm32_mdma_setup_hwdesc(struct stm32_mdma_chan *chan,
hwdesc->csar = src_addr;
hwdesc->cdar = dst_addr;
hwdesc->cbrur = 0;
- hwdesc->clar = desc->hwdesc_phys + next * sizeof(*hwdesc);
hwdesc->ctbr = ctbr;
hwdesc->cmar = config->mask_addr;
hwdesc->cmdr = config->mask_data;

if (is_last) {
if (is_cyclic)
- hwdesc->clar = desc->hwdesc_phys;
+ hwdesc->clar = desc->node[0].hwdesc_phys;
else
hwdesc->clar = 0;
+ } else {
+ hwdesc->clar = desc->node[next].hwdesc_phys;
}

- stm32_mdma_dump_hwdesc(chan, hwdesc);
+ stm32_mdma_dump_hwdesc(chan, &desc->node[count]);
}

static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan,
@@ -780,7 +798,7 @@ stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl,
{
struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
struct stm32_mdma_desc *desc;
- int ret;
+ int i, ret;

/*
* Once DMA is in setup cyclic mode the channel we cannot assign this
@@ -806,7 +824,9 @@ stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl,
return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);

xfer_setup_err:
- dma_pool_free(chan->desc_pool, &desc->hwdesc, desc->hwdesc_phys);
+ for (i = 0; i < desc->count; i++)
+ dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+ desc->node[i].hwdesc_phys);
kfree(desc);
return NULL;
}
@@ -895,7 +915,9 @@ stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr,
return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);

xfer_setup_err:
- dma_pool_free(chan->desc_pool, &desc->hwdesc, desc->hwdesc_phys);
+ for (i = 0; i < desc->count; i++)
+ dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+ desc->node[i].hwdesc_phys);
kfree(desc);
return NULL;
}
@@ -1009,7 +1031,7 @@ stm32_mdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest, dma_addr_t src,
ctcr |= STM32_MDMA_CTCR_PKE;

/* Prepare hardware descriptor */
- hwdesc = desc->hwdesc;
+ hwdesc = desc->node[0].hwdesc;
hwdesc->ctcr = ctcr;
hwdesc->cbndtr = cbndtr;
hwdesc->csar = src;
@@ -1020,7 +1042,7 @@ stm32_mdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest, dma_addr_t src,
hwdesc->cmar = 0;
hwdesc->cmdr = 0;

- stm32_mdma_dump_hwdesc(chan, hwdesc);
+ stm32_mdma_dump_hwdesc(chan, &desc->node[0]);
} else {
/* Setup a LLI transfer */
ctcr |= STM32_MDMA_CTCR_TRGM(STM32_MDMA_LINKED_LIST) |
@@ -1120,7 +1142,7 @@ static void stm32_mdma_start_transfer(struct stm32_mdma_chan *chan)
}

chan->desc = to_stm32_mdma_desc(vdesc);
- hwdesc = chan->desc->hwdesc;
+ hwdesc = chan->desc->node[0].hwdesc;
chan->curr_hwdesc = 0;

stm32_mdma_write(dmadev, STM32_MDMA_CCR(id), chan->desc->ccr);
@@ -1198,7 +1220,7 @@ static int stm32_mdma_resume(struct dma_chan *c)
unsigned long flags;
u32 status, reg;

- hwdesc = &chan->desc->hwdesc[chan->curr_hwdesc];
+ hwdesc = chan->desc->node[chan->curr_hwdesc].hwdesc;

spin_lock_irqsave(&chan->vchan.lock, flags);

@@ -1268,13 +1290,13 @@ static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan,
u32 curr_hwdesc)
{
struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ struct stm32_mdma_hwdesc *hwdesc = desc->node[0].hwdesc;
u32 cbndtr, residue, modulo, burst_size;
int i;

residue = 0;
for (i = curr_hwdesc + 1; i < desc->count; i++) {
- struct stm32_mdma_hwdesc *hwdesc = &desc->hwdesc[i];
-
+ hwdesc = desc->node[i].hwdesc;
residue += STM32_MDMA_CBNDTR_BNDT(hwdesc->cbndtr);
}
cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id));
--
2.7.4


2018-04-11 15:21:05

by Robin Murphy

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] dmaengine: stm32-mdma: align TLEN and buffer length on burst

On 11/04/18 15:44, Pierre-Yves MORDRET wrote:
> Both buffer Transfer Length (TLEN if any) and transfer size have to be
> aligned on burst size (burst beats*bus width).
>
> Signed-off-by: Pierre-Yves MORDRET <[email protected]>
> ---
> Version history:
> v1:
> * Initial
> v2:
> ---
> ---
> drivers/dma/stm32-mdma.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
> index daa1602..fbcffa2 100644
> --- a/drivers/dma/stm32-mdma.c
> +++ b/drivers/dma/stm32-mdma.c
> @@ -413,7 +413,7 @@ static u32 stm32_mdma_get_best_burst(u32 buf_len, u32 tlen, u32 max_burst,
> u32 best_burst = max_burst;
> u32 burst_len = best_burst * width;
>
> - while ((burst_len > 0) && (tlen % burst_len)) {
> + while ((burst_len > 0) && (((tlen | buf_len) & (burst_len - 1)) != 0)) {
> best_burst = best_burst >> 1;
> burst_len = best_burst * width;
> }

FWIW, doesn't that whole loop come down to just:

burst_len = min(ffs(tlen | buf_len), max_burst * width);

?

Robin.

2018-04-13 04:00:35

by Vinod Koul

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] dmaengine: stm32-mdma: Fix incomplete Hw descriptors allocator

On Wed, Apr 11, 2018 at 04:44:39PM +0200, Pierre-Yves MORDRET wrote:

> struct stm32_mdma_desc {
> struct virt_dma_desc vdesc;
> u32 ccr;
> - struct stm32_mdma_hwdesc *hwdesc;
> - dma_addr_t hwdesc_phys;
> bool cyclic;
> u32 count;
> + struct stm32_mdma_desc_node node[];

some ppl use node[0] for this but i think either is fine..

> static void stm32_mdma_dump_hwdesc(struct stm32_mdma_chan *chan,
> - struct stm32_mdma_hwdesc *hwdesc)
> + struct stm32_mdma_desc_node *node)
> {
> - dev_dbg(chan2dev(chan), "CTCR: 0x%08x\n", hwdesc->ctcr);

> + dev_dbg(chan2dev(chan), "CTCR: 0x%08x\n", node->hwdesc->ctcr);

this is noise for this patch and IIUC you should be able to pass
node->hwdesc and keep fn same?

--
~Vinod

2018-04-13 09:57:53

by Pierre-Yves MORDRET

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] dmaengine: stm32-mdma: align TLEN and buffer length on burst

Hi Robin

On 04/11/2018 05:14 PM, Robin Murphy wrote:
> On 11/04/18 15:44, Pierre-Yves MORDRET wrote:
>> Both buffer Transfer Length (TLEN if any) and transfer size have to be
>> aligned on burst size (burst beats*bus width).
>>
>> Signed-off-by: Pierre-Yves MORDRET <[email protected]>
>> ---
>> Version history:
>> v1:
>> * Initial
>> v2:
>> ---
>> ---
>> drivers/dma/stm32-mdma.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
>> index daa1602..fbcffa2 100644
>> --- a/drivers/dma/stm32-mdma.c
>> +++ b/drivers/dma/stm32-mdma.c
>> @@ -413,7 +413,7 @@ static u32 stm32_mdma_get_best_burst(u32 buf_len, u32 tlen, u32 max_burst,
>> u32 best_burst = max_burst;
>> u32 burst_len = best_burst * width;
>>
>> - while ((burst_len > 0) && (tlen % burst_len)) {
>> + while ((burst_len > 0) && (((tlen | buf_len) & (burst_len - 1)) != 0)) {
>> best_burst = best_burst >> 1;
>> burst_len = best_burst * width;
>> }
>
> FWIW, doesn't that whole loop come down to just:
>
> burst_len = min(ffs(tlen | buf_len), max_burst * width);

No sure it ends as expected. or I miss something or don't understand this statement
I tried with "relevant value" : i.e. best_burst = 32, Tlen=128(default) and
buf_len = 64, width= 4. This statements gets me something wrong output => 7
instead of 16 * 4.
I doubt :)

>
> ?
>
> Robin.
>

2018-04-13 10:07:57

by Vinod Koul

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] dmaengine: stm32-mdma: Fix incomplete Hw descriptors allocator

On Fri, Apr 13, 2018 at 10:39:48AM +0200, Geert Uytterhoeven wrote:
> Hi Vinod,
>
> On Fri, Apr 13, 2018 at 6:02 AM, Vinod Koul <[email protected]> wrote:
> > On Wed, Apr 11, 2018 at 04:44:39PM +0200, Pierre-Yves MORDRET wrote:
> >
> >> struct stm32_mdma_desc {
> >> struct virt_dma_desc vdesc;
> >> u32 ccr;
> >> - struct stm32_mdma_hwdesc *hwdesc;
> >> - dma_addr_t hwdesc_phys;
> >> bool cyclic;
> >> u32 count;
> >> + struct stm32_mdma_desc_node node[];
> >
> > some ppl use node[0] for this but i think either is fine..
>
> node[] is the correct one, node[0] may hide future bugs, cfr. commit
> a158531f3c92467d ("gpio: 74x164: Fix crash during .remove()")

Yeah but it this case it is the last element. But yes it helps to avoid such
mistakes in future..

--
~Vinod

2018-04-13 10:23:38

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] dmaengine: stm32-mdma: Fix incomplete Hw descriptors allocator

Hi Vinod,

On Fri, Apr 13, 2018 at 6:02 AM, Vinod Koul <[email protected]> wrote:
> On Wed, Apr 11, 2018 at 04:44:39PM +0200, Pierre-Yves MORDRET wrote:
>
>> struct stm32_mdma_desc {
>> struct virt_dma_desc vdesc;
>> u32 ccr;
>> - struct stm32_mdma_hwdesc *hwdesc;
>> - dma_addr_t hwdesc_phys;
>> bool cyclic;
>> u32 count;
>> + struct stm32_mdma_desc_node node[];
>
> some ppl use node[0] for this but i think either is fine..

node[] is the correct one, node[0] may hide future bugs, cfr. commit
a158531f3c92467d ("gpio: 74x164: Fix crash during .remove()")

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

2018-04-13 10:57:55

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] dmaengine: stm32-mdma: Fix incomplete Hw descriptors allocator

Hi Vinod,

On Fri, Apr 13, 2018 at 12:09 PM, Vinod Koul <[email protected]> wrote:
> On Fri, Apr 13, 2018 at 10:39:48AM +0200, Geert Uytterhoeven wrote:
>> On Fri, Apr 13, 2018 at 6:02 AM, Vinod Koul <[email protected]> wrote:
>> > On Wed, Apr 11, 2018 at 04:44:39PM +0200, Pierre-Yves MORDRET wrote:
>> >
>> >> struct stm32_mdma_desc {
>> >> struct virt_dma_desc vdesc;
>> >> u32 ccr;
>> >> - struct stm32_mdma_hwdesc *hwdesc;
>> >> - dma_addr_t hwdesc_phys;
>> >> bool cyclic;
>> >> u32 count;
>> >> + struct stm32_mdma_desc_node node[];
>> >
>> > some ppl use node[0] for this but i think either is fine..
>>
>> node[] is the correct one, node[0] may hide future bugs, cfr. commit
>> a158531f3c92467d ("gpio: 74x164: Fix crash during .remove()")
>
> Yeah but it this case it is the last element. But yes it helps to avoid such
> mistakes in future..

It was the last element in 74x164, too. Unless someone changed that.

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

2018-04-13 11:11:11

by Robin Murphy

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] dmaengine: stm32-mdma: align TLEN and buffer length on burst

On 13/04/18 10:45, Pierre Yves MORDRET wrote:
> Hi Robin
>
> On 04/11/2018 05:14 PM, Robin Murphy wrote:
>> On 11/04/18 15:44, Pierre-Yves MORDRET wrote:
>>> Both buffer Transfer Length (TLEN if any) and transfer size have to be
>>> aligned on burst size (burst beats*bus width).
>>>
>>> Signed-off-by: Pierre-Yves MORDRET <[email protected]>
>>> ---
>>> Version history:
>>> v1:
>>> * Initial
>>> v2:
>>> ---
>>> ---
>>> drivers/dma/stm32-mdma.c | 2 +-
>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
>>> index daa1602..fbcffa2 100644
>>> --- a/drivers/dma/stm32-mdma.c
>>> +++ b/drivers/dma/stm32-mdma.c
>>> @@ -413,7 +413,7 @@ static u32 stm32_mdma_get_best_burst(u32 buf_len, u32 tlen, u32 max_burst,
>>> u32 best_burst = max_burst;
>>> u32 burst_len = best_burst * width;
>>>
>>> - while ((burst_len > 0) && (tlen % burst_len)) {
>>> + while ((burst_len > 0) && (((tlen | buf_len) & (burst_len - 1)) != 0)) {
>>> best_burst = best_burst >> 1;
>>> burst_len = best_burst * width;
>>> }
>>
>> FWIW, doesn't that whole loop come down to just:
>>
>> burst_len = min(ffs(tlen | buf_len), max_burst * width);
>
> No sure it ends as expected. or I miss something or don't understand this statement
> I tried with "relevant value" : i.e. best_burst = 32, Tlen=128(default) and
> buf_len = 64, width= 4. This statements gets me something wrong output => 7
> instead of 16 * 4.
> I doubt :)

Heh, seems I confused myself halfway through and started thinking
max_burst and width were the exponents x rather than the values 2^x...

A more representative guess should be:

min(1 << __ffs(tlen | buf_len), max_burst * width);

but the general point I was trying to make is that a loop checking
whether the bottom n bits of something are zero for different values of
n is unnecessary when n can simply be calculated directly*.

Robin.


* in the case of this "just the lowest set bit" idiom there's also the
shift-free ((x & (x - 1)) ^ x), but as well as being unreadable it's
generally less efficient than (1 << __ffs(x)) for most modern ISAs.

2018-04-13 12:45:33

by Robin Murphy

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] dmaengine: stm32-mdma: Fix incomplete Hw descriptors allocator

On 11/04/18 15:44, Pierre-Yves MORDRET wrote:
> Only 1 Hw Descriptor is allocated. Loop over required Hw descriptor for
> proper allocation.
>
> Signed-off-by: Pierre-Yves MORDRET <[email protected]>
> ---
> Version history:
> v1:
> * Initial
> v2:
> * Fix kbuild warning format: /0x%08x/%pad/
> ---
> ---
> drivers/dma/stm32-mdma.c | 90 ++++++++++++++++++++++++++++++------------------
> 1 file changed, 56 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
> index fbcffa2..3a477bc 100644
> --- a/drivers/dma/stm32-mdma.c
> +++ b/drivers/dma/stm32-mdma.c
> @@ -252,13 +252,17 @@ struct stm32_mdma_hwdesc {
> u32 cmdr;
> } __aligned(64);
>
> +struct stm32_mdma_desc_node {
> + struct stm32_mdma_hwdesc *hwdesc;
> + dma_addr_t hwdesc_phys;
> +};
> +
> struct stm32_mdma_desc {
> struct virt_dma_desc vdesc;
> u32 ccr;
> - struct stm32_mdma_hwdesc *hwdesc;
> - dma_addr_t hwdesc_phys;
> bool cyclic;
> u32 count;
> + struct stm32_mdma_desc_node node[];
> };
>
> struct stm32_mdma_chan {
> @@ -344,30 +348,43 @@ static struct stm32_mdma_desc *stm32_mdma_alloc_desc(
> struct stm32_mdma_chan *chan, u32 count)
> {
> struct stm32_mdma_desc *desc;
> + int i;
>
> - desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
> + desc = kzalloc(sizeof(*desc) +
> + sizeof(struct stm32_mdma_desc_node) * count, GFP_NOWAIT);

You could use "offsetof(typeof(*desc), node[count])" instead of the
explicit calculation here.

Robin.

> if (!desc)
> return NULL;
>
> - desc->hwdesc = dma_pool_alloc(chan->desc_pool, GFP_NOWAIT,
> - &desc->hwdesc_phys);
> - if (!desc->hwdesc) {
> - dev_err(chan2dev(chan), "Failed to allocate descriptor\n");
> - kfree(desc);
> - return NULL;
> + for (i = 0; i < count; i++) {
> + desc->node[i].hwdesc =
> + dma_pool_alloc(chan->desc_pool, GFP_NOWAIT,
> + &desc->node[i].hwdesc_phys);
> + if (!desc->node[i].hwdesc)
> + goto err;
> }
>
> desc->count = count;
>
> return desc;
> +
> +err:
> + dev_err(chan2dev(chan), "Failed to allocate descriptor\n");
> + while (--i >= 0)
> + dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
> + desc->node[i].hwdesc_phys);
> + kfree(desc);
> + return NULL;
> }
>
> static void stm32_mdma_desc_free(struct virt_dma_desc *vdesc)
> {
> struct stm32_mdma_desc *desc = to_stm32_mdma_desc(vdesc);
> struct stm32_mdma_chan *chan = to_stm32_mdma_chan(vdesc->tx.chan);
> + int i;
>
> - dma_pool_free(chan->desc_pool, desc->hwdesc, desc->hwdesc_phys);
> + for (i = 0; i < desc->count; i++)
> + dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
> + desc->node[i].hwdesc_phys);
> kfree(desc);
> }
>
> @@ -669,18 +686,18 @@ static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan,
> }
>
> static void stm32_mdma_dump_hwdesc(struct stm32_mdma_chan *chan,
> - struct stm32_mdma_hwdesc *hwdesc)
> + struct stm32_mdma_desc_node *node)
> {
> - dev_dbg(chan2dev(chan), "hwdesc: 0x%p\n", hwdesc);
> - dev_dbg(chan2dev(chan), "CTCR: 0x%08x\n", hwdesc->ctcr);
> - dev_dbg(chan2dev(chan), "CBNDTR: 0x%08x\n", hwdesc->cbndtr);
> - dev_dbg(chan2dev(chan), "CSAR: 0x%08x\n", hwdesc->csar);
> - dev_dbg(chan2dev(chan), "CDAR: 0x%08x\n", hwdesc->cdar);
> - dev_dbg(chan2dev(chan), "CBRUR: 0x%08x\n", hwdesc->cbrur);
> - dev_dbg(chan2dev(chan), "CLAR: 0x%08x\n", hwdesc->clar);
> - dev_dbg(chan2dev(chan), "CTBR: 0x%08x\n", hwdesc->ctbr);
> - dev_dbg(chan2dev(chan), "CMAR: 0x%08x\n", hwdesc->cmar);
> - dev_dbg(chan2dev(chan), "CMDR: 0x%08x\n\n", hwdesc->cmdr);
> + dev_dbg(chan2dev(chan), "hwdesc: %pad\n", &node->hwdesc_phys);
> + dev_dbg(chan2dev(chan), "CTCR: 0x%08x\n", node->hwdesc->ctcr);
> + dev_dbg(chan2dev(chan), "CBNDTR: 0x%08x\n", node->hwdesc->cbndtr);
> + dev_dbg(chan2dev(chan), "CSAR: 0x%08x\n", node->hwdesc->csar);
> + dev_dbg(chan2dev(chan), "CDAR: 0x%08x\n", node->hwdesc->cdar);
> + dev_dbg(chan2dev(chan), "CBRUR: 0x%08x\n", node->hwdesc->cbrur);
> + dev_dbg(chan2dev(chan), "CLAR: 0x%08x\n", node->hwdesc->clar);
> + dev_dbg(chan2dev(chan), "CTBR: 0x%08x\n", node->hwdesc->ctbr);
> + dev_dbg(chan2dev(chan), "CMAR: 0x%08x\n", node->hwdesc->cmar);
> + dev_dbg(chan2dev(chan), "CMDR: 0x%08x\n\n", node->hwdesc->cmdr);
> }
>
> static void stm32_mdma_setup_hwdesc(struct stm32_mdma_chan *chan,
> @@ -694,7 +711,7 @@ static void stm32_mdma_setup_hwdesc(struct stm32_mdma_chan *chan,
> struct stm32_mdma_hwdesc *hwdesc;
> u32 next = count + 1;
>
> - hwdesc = &desc->hwdesc[count];
> + hwdesc = desc->node[count].hwdesc;
> hwdesc->ctcr = ctcr;
> hwdesc->cbndtr &= ~(STM32_MDMA_CBNDTR_BRC_MK |
> STM32_MDMA_CBNDTR_BRDUM |
> @@ -704,19 +721,20 @@ static void stm32_mdma_setup_hwdesc(struct stm32_mdma_chan *chan,
> hwdesc->csar = src_addr;
> hwdesc->cdar = dst_addr;
> hwdesc->cbrur = 0;
> - hwdesc->clar = desc->hwdesc_phys + next * sizeof(*hwdesc);
> hwdesc->ctbr = ctbr;
> hwdesc->cmar = config->mask_addr;
> hwdesc->cmdr = config->mask_data;
>
> if (is_last) {
> if (is_cyclic)
> - hwdesc->clar = desc->hwdesc_phys;
> + hwdesc->clar = desc->node[0].hwdesc_phys;
> else
> hwdesc->clar = 0;
> + } else {
> + hwdesc->clar = desc->node[next].hwdesc_phys;
> }
>
> - stm32_mdma_dump_hwdesc(chan, hwdesc);
> + stm32_mdma_dump_hwdesc(chan, &desc->node[count]);
> }
>
> static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan,
> @@ -780,7 +798,7 @@ stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl,
> {
> struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
> struct stm32_mdma_desc *desc;
> - int ret;
> + int i, ret;
>
> /*
> * Once DMA is in setup cyclic mode the channel we cannot assign this
> @@ -806,7 +824,9 @@ stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl,
> return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
>
> xfer_setup_err:
> - dma_pool_free(chan->desc_pool, &desc->hwdesc, desc->hwdesc_phys);
> + for (i = 0; i < desc->count; i++)
> + dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
> + desc->node[i].hwdesc_phys);
> kfree(desc);
> return NULL;
> }
> @@ -895,7 +915,9 @@ stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr,
> return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
>
> xfer_setup_err:
> - dma_pool_free(chan->desc_pool, &desc->hwdesc, desc->hwdesc_phys);
> + for (i = 0; i < desc->count; i++)
> + dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
> + desc->node[i].hwdesc_phys);
> kfree(desc);
> return NULL;
> }
> @@ -1009,7 +1031,7 @@ stm32_mdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest, dma_addr_t src,
> ctcr |= STM32_MDMA_CTCR_PKE;
>
> /* Prepare hardware descriptor */
> - hwdesc = desc->hwdesc;
> + hwdesc = desc->node[0].hwdesc;
> hwdesc->ctcr = ctcr;
> hwdesc->cbndtr = cbndtr;
> hwdesc->csar = src;
> @@ -1020,7 +1042,7 @@ stm32_mdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest, dma_addr_t src,
> hwdesc->cmar = 0;
> hwdesc->cmdr = 0;
>
> - stm32_mdma_dump_hwdesc(chan, hwdesc);
> + stm32_mdma_dump_hwdesc(chan, &desc->node[0]);
> } else {
> /* Setup a LLI transfer */
> ctcr |= STM32_MDMA_CTCR_TRGM(STM32_MDMA_LINKED_LIST) |
> @@ -1120,7 +1142,7 @@ static void stm32_mdma_start_transfer(struct stm32_mdma_chan *chan)
> }
>
> chan->desc = to_stm32_mdma_desc(vdesc);
> - hwdesc = chan->desc->hwdesc;
> + hwdesc = chan->desc->node[0].hwdesc;
> chan->curr_hwdesc = 0;
>
> stm32_mdma_write(dmadev, STM32_MDMA_CCR(id), chan->desc->ccr);
> @@ -1198,7 +1220,7 @@ static int stm32_mdma_resume(struct dma_chan *c)
> unsigned long flags;
> u32 status, reg;
>
> - hwdesc = &chan->desc->hwdesc[chan->curr_hwdesc];
> + hwdesc = chan->desc->node[chan->curr_hwdesc].hwdesc;
>
> spin_lock_irqsave(&chan->vchan.lock, flags);
>
> @@ -1268,13 +1290,13 @@ static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan,
> u32 curr_hwdesc)
> {
> struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
> + struct stm32_mdma_hwdesc *hwdesc = desc->node[0].hwdesc;
> u32 cbndtr, residue, modulo, burst_size;
> int i;
>
> residue = 0;
> for (i = curr_hwdesc + 1; i < desc->count; i++) {
> - struct stm32_mdma_hwdesc *hwdesc = &desc->hwdesc[i];
> -
> + hwdesc = desc->node[i].hwdesc;
> residue += STM32_MDMA_CBNDTR_BNDT(hwdesc->cbndtr);
> }
> cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id));
>

2018-04-13 12:48:19

by Pierre-Yves MORDRET

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] dmaengine: stm32-mdma: align TLEN and buffer length on burst



On 04/13/2018 01:09 PM, Robin Murphy wrote:
> On 13/04/18 10:45, Pierre Yves MORDRET wrote:
>> Hi Robin
>>
>> On 04/11/2018 05:14 PM, Robin Murphy wrote:
>>> On 11/04/18 15:44, Pierre-Yves MORDRET wrote:
>>>> Both buffer Transfer Length (TLEN if any) and transfer size have to be
>>>> aligned on burst size (burst beats*bus width).
>>>>
>>>> Signed-off-by: Pierre-Yves MORDRET <[email protected]>
>>>> ---
>>>> Version history:
>>>> v1:
>>>> * Initial
>>>> v2:
>>>> ---
>>>> ---
>>>> drivers/dma/stm32-mdma.c | 2 +-
>>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
>>>> index daa1602..fbcffa2 100644
>>>> --- a/drivers/dma/stm32-mdma.c
>>>> +++ b/drivers/dma/stm32-mdma.c
>>>> @@ -413,7 +413,7 @@ static u32 stm32_mdma_get_best_burst(u32 buf_len, u32 tlen, u32 max_burst,
>>>> u32 best_burst = max_burst;
>>>> u32 burst_len = best_burst * width;
>>>>
>>>> - while ((burst_len > 0) && (tlen % burst_len)) {
>>>> + while ((burst_len > 0) && (((tlen | buf_len) & (burst_len - 1)) != 0)) {
>>>> best_burst = best_burst >> 1;
>>>> burst_len = best_burst * width;
>>>> }
>>>
>>> FWIW, doesn't that whole loop come down to just:
>>>
>>> burst_len = min(ffs(tlen | buf_len), max_burst * width);
>>
>> No sure it ends as expected. or I miss something or don't understand this statement
>> I tried with "relevant value" : i.e. best_burst = 32, Tlen=128(default) and
>> buf_len = 64, width= 4. This statements gets me something wrong output => 7
>> instead of 16 * 4.
>> I doubt :)
>
> Heh, seems I confused myself halfway through and started thinking
> max_burst and width were the exponents x rather than the values 2^x...
>
> A more representative guess should be:
>
> min(1 << __ffs(tlen | buf_len), max_burst * width);
>
> but the general point I was trying to make is that a loop checking
> whether the bottom n bits of something are zero for different values of
> n is unnecessary when n can simply be calculated directly*.
>
> Robin.

Got the point. I figure how to compute this value with __ffs. Your last
statement doesn't provide the good value, but the spirit is here. I just have to
adjust with what I want.

Thx

>
>
> * in the case of this "just the lowest set bit" idiom there's also the
> shift-free ((x & (x - 1)) ^ x), but as well as being unreadable it's
> generally less efficient than (1 << __ffs(x)) for most modern ISAs.
>