2023-05-12 14:30:36

by Joy Chakraborty

[permalink] [raw]
Subject: [PATCH v2 0/6] dmaengine: pl330: Updates and logical changes for peripheral usecases

This patch series makes some initial minor and cosmetic changes:
-Add variables and logic to handle separate source and destination
AxSize and AxLen.
-Use __ffs to calculate AxSize for consistency in the driver
-Use switch-case in prep_slave_sg() for consistency
-Change args get_burst_len() to remove redundant "len" and add
burst_size so that it can be used in multiple places.

to majorly enable addition of 2 logical changes in the last 2 patches:
-Allow transactions towards memory to use the maximum possible
bus width (AxSize) during a memory to peripheral dma usage or
vise-versa.
-Add logic to copy left over data after executing bursts to
the peripheral in singles instead of bursts.
---
V1->V2 Changes : Remove Quirk logic and update description texts.
---

Joy Chakraborty (6):
dmaengine: pl330: Separate SRC and DST burst size and len
dmaengine: pl330: Use FFS to calculate burst size
dmaengine: pl330: Change if-else to switch-case for consistency
dmaengine: pl330: Change unused arg "len" from get_burst_len()
dmaengine: pl330: Optimize AxSize for peripheral usecases
dmaengine: pl330: Use dma singles for peripheral _dregs

drivers/dma/pl330.c | 223 ++++++++++++++++++++++++++++++++++++--------
1 file changed, 185 insertions(+), 38 deletions(-)

--
2.40.1.606.ga4b1b128d6-goog



2023-05-12 14:31:50

by Joy Chakraborty

[permalink] [raw]
Subject: [PATCH v2 1/6] dmaengine: pl330: Separate SRC and DST burst size and len

Add new variables in request configuration to handle source and
destination AxSize and AxLen separately and allow them to have different
values.

This allows further patches to configure different AxSize and AxLen for
optimum bus utilisation.

Signed-off-by: Joy Chakraborty <[email protected]>
---
drivers/dma/pl330.c | 71 +++++++++++++++++++++++++++++----------------
1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 0d9257fbdfb0..c006e481b4c5 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -240,6 +240,12 @@ enum pl330_byteswap {
#define BYTE_TO_BURST(b, ccr) ((b) / BRST_SIZE(ccr) / BRST_LEN(ccr))
#define BURST_TO_BYTE(c, ccr) ((c) * BRST_SIZE(ccr) * BRST_LEN(ccr))

+#define SRC_BRST_SIZE(ccr) BRST_SIZE(ccr)
+#define DST_BRST_SIZE(ccr) (1 << (((ccr) >> CC_DSTBRSTSIZE_SHFT) & 0x7))
+
+#define SRC_BRST_LEN(ccr) BRST_LEN(ccr)
+#define DST_BRST_LEN(ccr) ((((ccr) >> CC_DSTBRSTLEN_SHFT) & 0xf) + 1)
+
/*
* With 256 bytes, we can do more than 2.5MB and 5MB xfers per req
* at 1byte/burst for P<->M and M<->M respectively.
@@ -305,8 +311,10 @@ struct pl330_reqcfg {
bool nonsecure;
bool privileged;
bool insnaccess;
- unsigned brst_len:5;
- unsigned brst_size:3; /* in power of 2 */
+ unsigned src_brst_size : 3; /* in power of 2 */
+ unsigned src_brst_len:5;
+ unsigned dst_brst_size : 3; /* in power of 2 */
+ unsigned dst_brst_len:5;

enum pl330_cachectrl dcctl;
enum pl330_cachectrl scctl;
@@ -1204,7 +1212,10 @@ static int _bursts(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[],
const struct _xfer_spec *pxs, int cyc)
{
int off = 0;
- enum pl330_cond cond = BRST_LEN(pxs->ccr) > 1 ? BURST : SINGLE;
+ enum pl330_cond cond = SINGLE;
+
+ if (SRC_BRST_LEN(pxs->ccr) > 1 || DST_BRST_LEN(pxs->ccr) > 1)
+ cond = BURST;

if (pl330->quirks & PL330_QUIRK_PERIPH_BURST)
cond = BURST;
@@ -1235,12 +1246,12 @@ static int _bursts(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[],
* for mem-to-mem, mem-to-dev or dev-to-mem.
*/
static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[],
- const struct _xfer_spec *pxs, int transfer_length)
+ const struct _xfer_spec *pxs, int src_length, int dst_length)
{
int off = 0;
int dregs_ccr;

- if (transfer_length == 0)
+ if (src_length == 0 || dst_length == 0)
return off;

/*
@@ -1253,9 +1264,9 @@ static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[],
dregs_ccr = pxs->ccr;
dregs_ccr &= ~((0xf << CC_SRCBRSTLEN_SHFT) |
(0xf << CC_DSTBRSTLEN_SHFT));
- dregs_ccr |= (((transfer_length - 1) & 0xf) <<
+ dregs_ccr |= (((src_length - 1) & 0xf) <<
CC_SRCBRSTLEN_SHFT);
- dregs_ccr |= (((transfer_length - 1) & 0xf) <<
+ dregs_ccr |= (((dst_length - 1) & 0xf) <<
CC_DSTBRSTLEN_SHFT);

switch (pxs->desc->rqtype) {
@@ -1369,16 +1380,18 @@ static inline int _setup_loops(struct pl330_dmac *pl330,
struct pl330_xfer *x = &pxs->desc->px;
u32 ccr = pxs->ccr;
unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr);
- int num_dregs = (x->bytes - BURST_TO_BYTE(bursts, ccr)) /
- BRST_SIZE(ccr);
- int off = 0;
+ int num_dreg_bytes = x->bytes - BURST_TO_BYTE(bursts, ccr);
+ int num_src_dregs, num_dst_dregs, off = 0;
+
+ num_src_dregs = num_dreg_bytes / SRC_BRST_SIZE(ccr);
+ num_dst_dregs = num_dreg_bytes / DST_BRST_SIZE(ccr);

while (bursts) {
c = bursts;
off += _loop(pl330, dry_run, &buf[off], &c, pxs);
bursts -= c;
}
- off += _dregs(pl330, dry_run, &buf[off], pxs, num_dregs);
+ off += _dregs(pl330, dry_run, &buf[off], pxs, num_src_dregs, num_dst_dregs);

return off;
}
@@ -1446,11 +1459,11 @@ static inline u32 _prepare_ccr(const struct pl330_reqcfg *rqc)
if (rqc->insnaccess)
ccr |= CC_SRCIA | CC_DSTIA;

- ccr |= (((rqc->brst_len - 1) & 0xf) << CC_SRCBRSTLEN_SHFT);
- ccr |= (((rqc->brst_len - 1) & 0xf) << CC_DSTBRSTLEN_SHFT);
+ ccr |= (((rqc->src_brst_len - 1) & 0xf) << CC_SRCBRSTLEN_SHFT);
+ ccr |= (((rqc->dst_brst_len - 1) & 0xf) << CC_DSTBRSTLEN_SHFT);

- ccr |= (rqc->brst_size << CC_SRCBRSTSIZE_SHFT);
- ccr |= (rqc->brst_size << CC_DSTBRSTSIZE_SHFT);
+ ccr |= (rqc->src_brst_size << CC_SRCBRSTSIZE_SHFT);
+ ccr |= (rqc->dst_brst_size << CC_DSTBRSTSIZE_SHFT);

ccr |= (rqc->scctl << CC_SRCCCTRL_SHFT);
ccr |= (rqc->dcctl << CC_DSTCCTRL_SHFT);
@@ -2656,7 +2669,7 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)

burst_len = pl330->pcfg.data_bus_width / 8;
burst_len *= pl330->pcfg.data_buf_dep / pl330->pcfg.num_chan;
- burst_len >>= desc->rqcfg.brst_size;
+ burst_len >>= desc->rqcfg.src_brst_size;

/* src/dst_burst_len can't be more than 16 */
if (burst_len > PL330_MAX_BURST)
@@ -2735,8 +2748,10 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
}

desc->rqtype = direction;
- desc->rqcfg.brst_size = pch->burst_sz;
- desc->rqcfg.brst_len = pch->burst_len;
+ desc->rqcfg.src_brst_size = pch->burst_sz;
+ desc->rqcfg.src_brst_len = pch->burst_len;
+ desc->rqcfg.dst_brst_size = pch->burst_sz;
+ desc->rqcfg.dst_brst_len = pch->burst_len;
desc->bytes_requested = period_len;
fill_px(&desc->px, dst, src, period_len);

@@ -2789,17 +2804,21 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
while ((src | dst | len) & (burst - 1))
burst /= 2;

- desc->rqcfg.brst_size = 0;
- while (burst != (1 << desc->rqcfg.brst_size))
- desc->rqcfg.brst_size++;
+ desc->rqcfg.src_brst_size = 0;
+ while (burst != (1 << desc->rqcfg.src_brst_size))
+ desc->rqcfg.src_brst_size++;

- desc->rqcfg.brst_len = get_burst_len(desc, len);
+ desc->rqcfg.src_brst_len = get_burst_len(desc, len);
/*
* If burst size is smaller than bus width then make sure we only
* transfer one at a time to avoid a burst stradling an MFIFO entry.
*/
if (burst * 8 < pl330->pcfg.data_bus_width)
- desc->rqcfg.brst_len = 1;
+ desc->rqcfg.src_brst_len = 1;
+
+ /* For Mem2Mem, set destination AxSize and AxLen same as source*/
+ desc->rqcfg.dst_brst_len = desc->rqcfg.src_brst_len;
+ desc->rqcfg.dst_brst_size = desc->rqcfg.src_brst_size;

desc->bytes_requested = len;

@@ -2879,8 +2898,10 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
sg_dma_len(sg));
}

- desc->rqcfg.brst_size = pch->burst_sz;
- desc->rqcfg.brst_len = pch->burst_len;
+ desc->rqcfg.src_brst_size = pch->burst_sz;
+ desc->rqcfg.src_brst_len = pch->burst_len;
+ desc->rqcfg.dst_brst_size = pch->burst_sz;
+ desc->rqcfg.dst_brst_len = pch->burst_len;
desc->rqtype = direction;
desc->bytes_requested = sg_dma_len(sg);
}
--
2.40.1.606.ga4b1b128d6-goog