2023-12-04 14:04:16

by Paul Cercueil

[permalink] [raw]
Subject: [PATCH 0/4] axi-dmac: Add support for scatter-gather transfers

Hi Vinod,

This patchset updates the dma-axi-dmac driver, and introduces the
ability to use scatter-gather transfers, that are now supported by the
IP core.

When using an older version of the core, the driver will simply fall
back to using standard transfers.

The patchset was generated on top of today's linux-next (629a3b49f3f9).

Cheers,
-Paul

Paul Cercueil (4):
dmaengine: axi-dmac: Small code cleanup
dmaengine: axi-dmac: Allocate hardware descriptors
dmaengine: axi-dmac: Add support for scatter-gather transfers
dmaengine: axi-dmac: Use only EOT interrupts when doing scatter-gather

drivers/dma/dma-axi-dmac.c | 261 +++++++++++++++++++++++++------------
1 file changed, 178 insertions(+), 83 deletions(-)

--
2.42.0


2023-12-04 14:04:31

by Paul Cercueil

[permalink] [raw]
Subject: [PATCH 1/4] dmaengine: axi-dmac: Small code cleanup

Use a for() loop instead of a while() loop in axi_dmac_fill_linear_sg().

Signed-off-by: Paul Cercueil <[email protected]>
---
drivers/dma/dma-axi-dmac.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
index 2457a420c13d..760940b21eab 100644
--- a/drivers/dma/dma-axi-dmac.c
+++ b/drivers/dma/dma-axi-dmac.c
@@ -508,16 +508,13 @@ static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan,
segment_size = ((segment_size - 1) | chan->length_align_mask) + 1;

for (i = 0; i < num_periods; i++) {
- len = period_len;
-
- while (len > segment_size) {
+ for (len = period_len; len > segment_size; sg++) {
if (direction == DMA_DEV_TO_MEM)
sg->dest_addr = addr;
else
sg->src_addr = addr;
sg->x_len = segment_size;
sg->y_len = 1;
- sg++;
addr += segment_size;
len -= segment_size;
}
--
2.42.0

2023-12-04 14:04:53

by Paul Cercueil

[permalink] [raw]
Subject: [PATCH 2/4] dmaengine: axi-dmac: Allocate hardware descriptors

Change where and how the DMA transfers meta-data is stored, to prepare
for the upcoming introduction of scatter-gather support.

Allocate hardware descriptors in the format that the HDL core will be
expecting them when the scatter-gather feature is enabled, and use these
fields to store the data that was previously stored in the axi_dmac_sg
structure.

Note that the 'x_len' and 'y_len' fields now contain the transfer length
minus one, since that's what the hardware will expect in these fields.

Signed-off-by: Paul Cercueil <[email protected]>
---
drivers/dma/dma-axi-dmac.c | 134 ++++++++++++++++++++++++-------------
1 file changed, 88 insertions(+), 46 deletions(-)

diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
index 760940b21eab..185230a769b9 100644
--- a/drivers/dma/dma-axi-dmac.c
+++ b/drivers/dma/dma-axi-dmac.c
@@ -97,20 +97,31 @@
/* The maximum ID allocated by the hardware is 31 */
#define AXI_DMAC_SG_UNUSED 32U

+struct axi_dmac_hw_desc {
+ u32 flags;
+ u32 id;
+ u64 dest_addr;
+ u64 src_addr;
+ u64 __unused;
+ u32 y_len;
+ u32 x_len;
+ u32 src_stride;
+ u32 dst_stride;
+ u64 __pad[2];
+};
+
struct axi_dmac_sg {
- dma_addr_t src_addr;
- dma_addr_t dest_addr;
- unsigned int x_len;
- unsigned int y_len;
- unsigned int dest_stride;
- unsigned int src_stride;
- unsigned int id;
unsigned int partial_len;
bool schedule_when_free;
+
+ struct axi_dmac_hw_desc *hw;
+ dma_addr_t hw_phys;
};

struct axi_dmac_desc {
struct virt_dma_desc vdesc;
+ struct axi_dmac_chan *chan;
+
bool cyclic;
bool have_partial_xfer;

@@ -229,7 +240,7 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
sg = &desc->sg[desc->num_submitted];

/* Already queued in cyclic mode. Wait for it to finish */
- if (sg->id != AXI_DMAC_SG_UNUSED) {
+ if (sg->hw->id != AXI_DMAC_SG_UNUSED) {
sg->schedule_when_free = true;
return;
}
@@ -246,16 +257,16 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
chan->next_desc = desc;
}

- sg->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID);
+ sg->hw->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID);

if (axi_dmac_dest_is_mem(chan)) {
- axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->dest_addr);
- axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->dest_stride);
+ axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->hw->dest_addr);
+ axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->hw->dst_stride);
}

if (axi_dmac_src_is_mem(chan)) {
- axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->src_addr);
- axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->src_stride);
+ axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->hw->src_addr);
+ axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->hw->src_stride);
}

/*
@@ -270,8 +281,8 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
if (chan->hw_partial_xfer)
flags |= AXI_DMAC_FLAG_PARTIAL_REPORT;

- axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->x_len - 1);
- axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->y_len - 1);
+ axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->hw->x_len);
+ axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->hw->y_len);
axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags);
axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1);
}
@@ -286,9 +297,9 @@ static inline unsigned int axi_dmac_total_sg_bytes(struct axi_dmac_chan *chan,
struct axi_dmac_sg *sg)
{
if (chan->hw_2d)
- return sg->x_len * sg->y_len;
+ return (sg->hw->x_len + 1) * (sg->hw->y_len + 1);
else
- return sg->x_len;
+ return (sg->hw->x_len + 1);
}

static void axi_dmac_dequeue_partial_xfers(struct axi_dmac_chan *chan)
@@ -307,9 +318,9 @@ static void axi_dmac_dequeue_partial_xfers(struct axi_dmac_chan *chan)
list_for_each_entry(desc, &chan->active_descs, vdesc.node) {
for (i = 0; i < desc->num_sgs; i++) {
sg = &desc->sg[i];
- if (sg->id == AXI_DMAC_SG_UNUSED)
+ if (sg->hw->id == AXI_DMAC_SG_UNUSED)
continue;
- if (sg->id == id) {
+ if (sg->hw->id == id) {
desc->have_partial_xfer = true;
sg->partial_len = len;
found_sg = true;
@@ -376,12 +387,12 @@ static bool axi_dmac_transfer_done(struct axi_dmac_chan *chan,

do {
sg = &active->sg[active->num_completed];
- if (sg->id == AXI_DMAC_SG_UNUSED) /* Not yet submitted */
+ if (sg->hw->id == AXI_DMAC_SG_UNUSED) /* Not yet submitted */
break;
- if (!(BIT(sg->id) & completed_transfers))
+ if (!(BIT(sg->hw->id) & completed_transfers))
break;
active->num_completed++;
- sg->id = AXI_DMAC_SG_UNUSED;
+ sg->hw->id = AXI_DMAC_SG_UNUSED;
if (sg->schedule_when_free) {
sg->schedule_when_free = false;
start_next = true;
@@ -476,22 +487,52 @@ static void axi_dmac_issue_pending(struct dma_chan *c)
spin_unlock_irqrestore(&chan->vchan.lock, flags);
}

-static struct axi_dmac_desc *axi_dmac_alloc_desc(unsigned int num_sgs)
+static struct axi_dmac_desc *
+axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsigned int num_sgs)
{
+ struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+ struct device *dev = dmac->dma_dev.dev;
+ struct axi_dmac_hw_desc *hws;
struct axi_dmac_desc *desc;
+ dma_addr_t hw_phys;
unsigned int i;

desc = kzalloc(struct_size(desc, sg, num_sgs), GFP_NOWAIT);
if (!desc)
return NULL;
desc->num_sgs = num_sgs;
+ desc->chan = chan;

- for (i = 0; i < num_sgs; i++)
- desc->sg[i].id = AXI_DMAC_SG_UNUSED;
+ hws = dma_alloc_coherent(dev, PAGE_ALIGN(num_sgs * sizeof(*hws)),
+ &hw_phys, GFP_ATOMIC);
+ if (!hws) {
+ kfree(desc);
+ return NULL;
+ }
+
+ for (i = 0; i < num_sgs; i++) {
+ desc->sg[i].hw = &hws[i];
+ desc->sg[i].hw_phys = hw_phys + i * sizeof(*hws);
+
+ hws[i].id = AXI_DMAC_SG_UNUSED;
+ hws[i].flags = 0;
+ }

return desc;
}

+static void axi_dmac_free_desc(struct axi_dmac_desc *desc)
+{
+ struct axi_dmac *dmac = chan_to_axi_dmac(desc->chan);
+ struct device *dev = dmac->dma_dev.dev;
+ struct axi_dmac_hw_desc *hw = desc->sg[0].hw;
+ dma_addr_t hw_phys = desc->sg[0].hw_phys;
+
+ dma_free_coherent(dev, PAGE_ALIGN(desc->num_sgs * sizeof(*hw)),
+ hw, hw_phys);
+ kfree(desc);
+}
+
static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan,
enum dma_transfer_direction direction, dma_addr_t addr,
unsigned int num_periods, unsigned int period_len,
@@ -510,21 +551,22 @@ static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan,
for (i = 0; i < num_periods; i++) {
for (len = period_len; len > segment_size; sg++) {
if (direction == DMA_DEV_TO_MEM)
- sg->dest_addr = addr;
+ sg->hw->dest_addr = addr;
else
- sg->src_addr = addr;
- sg->x_len = segment_size;
- sg->y_len = 1;
+ sg->hw->src_addr = addr;
+ sg->hw->x_len = segment_size - 1;
+ sg->hw->y_len = 0;
+ sg->hw->flags = 0;
addr += segment_size;
len -= segment_size;
}

if (direction == DMA_DEV_TO_MEM)
- sg->dest_addr = addr;
+ sg->hw->dest_addr = addr;
else
- sg->src_addr = addr;
- sg->x_len = len;
- sg->y_len = 1;
+ sg->hw->src_addr = addr;
+ sg->hw->x_len = len - 1;
+ sg->hw->y_len = 0;
sg++;
addr += len;
}
@@ -551,7 +593,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg(
for_each_sg(sgl, sg, sg_len, i)
num_sgs += DIV_ROUND_UP(sg_dma_len(sg), chan->max_length);

- desc = axi_dmac_alloc_desc(num_sgs);
+ desc = axi_dmac_alloc_desc(chan, num_sgs);
if (!desc)
return NULL;

@@ -560,7 +602,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg(
for_each_sg(sgl, sg, sg_len, i) {
if (!axi_dmac_check_addr(chan, sg_dma_address(sg)) ||
!axi_dmac_check_len(chan, sg_dma_len(sg))) {
- kfree(desc);
+ axi_dmac_free_desc(desc);
return NULL;
}

@@ -595,7 +637,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_dma_cyclic(
num_periods = buf_len / period_len;
num_segments = DIV_ROUND_UP(period_len, chan->max_length);

- desc = axi_dmac_alloc_desc(num_periods * num_segments);
+ desc = axi_dmac_alloc_desc(chan, num_periods * num_segments);
if (!desc)
return NULL;

@@ -650,26 +692,26 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_interleaved(
return NULL;
}

- desc = axi_dmac_alloc_desc(1);
+ desc = axi_dmac_alloc_desc(chan, 1);
if (!desc)
return NULL;

if (axi_dmac_src_is_mem(chan)) {
- desc->sg[0].src_addr = xt->src_start;
- desc->sg[0].src_stride = xt->sgl[0].size + src_icg;
+ desc->sg[0].hw->src_addr = xt->src_start;
+ desc->sg[0].hw->src_stride = xt->sgl[0].size + src_icg;
}

if (axi_dmac_dest_is_mem(chan)) {
- desc->sg[0].dest_addr = xt->dst_start;
- desc->sg[0].dest_stride = xt->sgl[0].size + dst_icg;
+ desc->sg[0].hw->dest_addr = xt->dst_start;
+ desc->sg[0].hw->dst_stride = xt->sgl[0].size + dst_icg;
}

if (chan->hw_2d) {
- desc->sg[0].x_len = xt->sgl[0].size;
- desc->sg[0].y_len = xt->numf;
+ desc->sg[0].hw->x_len = xt->sgl[0].size - 1;
+ desc->sg[0].hw->y_len = xt->numf - 1;
} else {
- desc->sg[0].x_len = xt->sgl[0].size * xt->numf;
- desc->sg[0].y_len = 1;
+ desc->sg[0].hw->x_len = xt->sgl[0].size * xt->numf - 1;
+ desc->sg[0].hw->y_len = 0;
}

if (flags & DMA_CYCLIC)
@@ -685,7 +727,7 @@ static void axi_dmac_free_chan_resources(struct dma_chan *c)

static void axi_dmac_desc_free(struct virt_dma_desc *vdesc)
{
- kfree(container_of(vdesc, struct axi_dmac_desc, vdesc));
+ axi_dmac_free_desc(to_axi_dmac_desc(vdesc));
}

static bool axi_dmac_regmap_rdwr(struct device *dev, unsigned int reg)
--
2.42.0

2023-12-04 14:04:55

by Paul Cercueil

[permalink] [raw]
Subject: [PATCH 4/4] dmaengine: axi-dmac: Use only EOT interrupts when doing scatter-gather

Instead of notifying userspace in the end-of-transfer (EOT) interrupt
and program the hardware in the start-of-transfer (SOT) interrupt, we
can do both things in the EOT, allowing us to mask the SOT, and halve
the number of interrupts sent by the HDL core.

Signed-off-by: Paul Cercueil <[email protected]>
---
drivers/dma/dma-axi-dmac.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
index 5109530b66de..beed91a8238c 100644
--- a/drivers/dma/dma-axi-dmac.c
+++ b/drivers/dma/dma-axi-dmac.c
@@ -415,6 +415,7 @@ static bool axi_dmac_transfer_done(struct axi_dmac_chan *chan,
list_del(&active->vdesc.node);
vchan_cookie_complete(&active->vdesc);
active = axi_dmac_active_desc(chan);
+ start_next = !!active;
}
} else {
do {
@@ -1000,6 +1001,7 @@ static int axi_dmac_probe(struct platform_device *pdev)
struct axi_dmac *dmac;
struct regmap *regmap;
unsigned int version;
+ u32 irq_mask = 0;
int ret;

dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
@@ -1067,7 +1069,10 @@ static int axi_dmac_probe(struct platform_device *pdev)

dma_dev->copy_align = (dmac->chan.address_align_mask + 1);

- axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, 0x00);
+ if (dmac->chan.hw_sg)
+ irq_mask |= AXI_DMAC_IRQ_SOT;
+
+ axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, irq_mask);

if (of_dma_is_coherent(pdev->dev.of_node)) {
ret = axi_dmac_read(dmac, AXI_DMAC_REG_COHERENCY_DESC);
--
2.42.0

2023-12-04 14:05:04

by Paul Cercueil

[permalink] [raw]
Subject: [PATCH 3/4] dmaengine: axi-dmac: Add support for scatter-gather transfers

Implement support for scatter-gather transfers. Build a chain of
hardware descriptors, each one corresponding to a segment of the
transfer, and linked to the next one. The hardware will transfer the
chain and only fire interrupts when the whole chain has been
transferred.

Support for scatter-gather is automatically enabled when the driver
detects that the hardware supports it, by writing then reading the
AXI_DMAC_REG_SG_ADDRESS register. If not available, the driver will fall
back to standard DMA transfers.

Signed-off-by: Paul Cercueil <[email protected]>
---
drivers/dma/dma-axi-dmac.c | 135 +++++++++++++++++++++++++------------
1 file changed, 93 insertions(+), 42 deletions(-)

diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
index 185230a769b9..5109530b66de 100644
--- a/drivers/dma/dma-axi-dmac.c
+++ b/drivers/dma/dma-axi-dmac.c
@@ -81,9 +81,13 @@
#define AXI_DMAC_REG_CURRENT_DEST_ADDR 0x438
#define AXI_DMAC_REG_PARTIAL_XFER_LEN 0x44c
#define AXI_DMAC_REG_PARTIAL_XFER_ID 0x450
+#define AXI_DMAC_REG_CURRENT_SG_ID 0x454
+#define AXI_DMAC_REG_SG_ADDRESS 0x47c
+#define AXI_DMAC_REG_SG_ADDRESS_HIGH 0x4bc

#define AXI_DMAC_CTRL_ENABLE BIT(0)
#define AXI_DMAC_CTRL_PAUSE BIT(1)
+#define AXI_DMAC_CTRL_ENABLE_SG BIT(2)

#define AXI_DMAC_IRQ_SOT BIT(0)
#define AXI_DMAC_IRQ_EOT BIT(1)
@@ -97,12 +101,16 @@
/* The maximum ID allocated by the hardware is 31 */
#define AXI_DMAC_SG_UNUSED 32U

+/* Flags for axi_dmac_hw_desc.flags */
+#define AXI_DMAC_HW_FLAG_LAST BIT(0)
+#define AXI_DMAC_HW_FLAG_IRQ BIT(1)
+
struct axi_dmac_hw_desc {
u32 flags;
u32 id;
u64 dest_addr;
u64 src_addr;
- u64 __unused;
+ u64 next_sg_addr;
u32 y_len;
u32 x_len;
u32 src_stride;
@@ -150,6 +158,7 @@ struct axi_dmac_chan {
bool hw_partial_xfer;
bool hw_cyclic;
bool hw_2d;
+ bool hw_sg;
};

struct axi_dmac {
@@ -224,9 +233,11 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
unsigned int flags = 0;
unsigned int val;

- val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER);
- if (val) /* Queue is full, wait for the next SOT IRQ */
- return;
+ if (!chan->hw_sg) {
+ val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER);
+ if (val) /* Queue is full, wait for the next SOT IRQ */
+ return;
+ }

desc = chan->next_desc;

@@ -245,9 +256,10 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
return;
}

- desc->num_submitted++;
- if (desc->num_submitted == desc->num_sgs ||
- desc->have_partial_xfer) {
+ if (chan->hw_sg) {
+ chan->next_desc = NULL;
+ } else if (++desc->num_submitted == desc->num_sgs ||
+ desc->have_partial_xfer) {
if (desc->cyclic)
desc->num_submitted = 0; /* Start again */
else
@@ -259,14 +271,16 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)

sg->hw->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID);

- if (axi_dmac_dest_is_mem(chan)) {
- axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->hw->dest_addr);
- axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->hw->dst_stride);
- }
+ if (!chan->hw_sg) {
+ if (axi_dmac_dest_is_mem(chan)) {
+ axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->hw->dest_addr);
+ axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->hw->dst_stride);
+ }

- if (axi_dmac_src_is_mem(chan)) {
- axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->hw->src_addr);
- axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->hw->src_stride);
+ if (axi_dmac_src_is_mem(chan)) {
+ axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->hw->src_addr);
+ axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->hw->src_stride);
+ }
}

/*
@@ -281,8 +295,14 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
if (chan->hw_partial_xfer)
flags |= AXI_DMAC_FLAG_PARTIAL_REPORT;

- axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->hw->x_len);
- axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->hw->y_len);
+ if (chan->hw_sg) {
+ axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS, (u32)sg->hw_phys);
+ axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS_HIGH,
+ (u64)sg->hw_phys >> 32);
+ } else {
+ axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->hw->x_len);
+ axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->hw->y_len);
+ }
axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags);
axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1);
}
@@ -359,6 +379,9 @@ static void axi_dmac_compute_residue(struct axi_dmac_chan *chan,
rslt->result = DMA_TRANS_NOERROR;
rslt->residue = 0;

+ if (chan->hw_sg)
+ return;
+
/*
* We get here if the last completed segment is partial, which
* means we can compute the residue from that segment onwards
@@ -385,36 +408,46 @@ static bool axi_dmac_transfer_done(struct axi_dmac_chan *chan,
(completed_transfers & AXI_DMAC_FLAG_PARTIAL_XFER_DONE))
axi_dmac_dequeue_partial_xfers(chan);

- do {
- sg = &active->sg[active->num_completed];
- if (sg->hw->id == AXI_DMAC_SG_UNUSED) /* Not yet submitted */
- break;
- if (!(BIT(sg->hw->id) & completed_transfers))
- break;
- active->num_completed++;
- sg->hw->id = AXI_DMAC_SG_UNUSED;
- if (sg->schedule_when_free) {
- sg->schedule_when_free = false;
- start_next = true;
+ if (chan->hw_sg) {
+ if (active->cyclic) {
+ vchan_cyclic_callback(&active->vdesc);
+ } else {
+ list_del(&active->vdesc.node);
+ vchan_cookie_complete(&active->vdesc);
+ active = axi_dmac_active_desc(chan);
}
+ } else {
+ do {
+ sg = &active->sg[active->num_completed];
+ if (sg->hw->id == AXI_DMAC_SG_UNUSED) /* Not yet submitted */
+ break;
+ if (!(BIT(sg->hw->id) & completed_transfers))
+ break;
+ active->num_completed++;
+ sg->hw->id = AXI_DMAC_SG_UNUSED;
+ if (sg->schedule_when_free) {
+ sg->schedule_when_free = false;
+ start_next = true;
+ }

- if (sg->partial_len)
- axi_dmac_compute_residue(chan, active);
+ if (sg->partial_len)
+ axi_dmac_compute_residue(chan, active);

- if (active->cyclic)
- vchan_cyclic_callback(&active->vdesc);
+ if (active->cyclic)
+ vchan_cyclic_callback(&active->vdesc);

- if (active->num_completed == active->num_sgs ||
- sg->partial_len) {
- if (active->cyclic) {
- active->num_completed = 0; /* wrap around */
- } else {
- list_del(&active->vdesc.node);
- vchan_cookie_complete(&active->vdesc);
- active = axi_dmac_active_desc(chan);
+ if (active->num_completed == active->num_sgs ||
+ sg->partial_len) {
+ if (active->cyclic) {
+ active->num_completed = 0; /* wrap around */
+ } else {
+ list_del(&active->vdesc.node);
+ vchan_cookie_complete(&active->vdesc);
+ active = axi_dmac_active_desc(chan);
+ }
}
- }
- } while (active);
+ } while (active);
+ }

return start_next;
}
@@ -478,8 +511,12 @@ static void axi_dmac_issue_pending(struct dma_chan *c)
struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
struct axi_dmac *dmac = chan_to_axi_dmac(chan);
unsigned long flags;
+ u32 ctrl = AXI_DMAC_CTRL_ENABLE;
+
+ if (chan->hw_sg)
+ ctrl |= AXI_DMAC_CTRL_ENABLE_SG;

- axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, AXI_DMAC_CTRL_ENABLE);
+ axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, ctrl);

spin_lock_irqsave(&chan->vchan.lock, flags);
if (vchan_issue_pending(&chan->vchan))
@@ -516,8 +553,14 @@ axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsigned int num_sgs)

hws[i].id = AXI_DMAC_SG_UNUSED;
hws[i].flags = 0;
+
+ /* Link hardware descriptors */
+ hws[i].next_sg_addr = hw_phys + (i + 1) * sizeof(*hws);
}

+ /* The last hardware descriptor will trigger an interrupt */
+ desc->sg[num_sgs - 1].hw->flags = AXI_DMAC_HW_FLAG_LAST | AXI_DMAC_HW_FLAG_IRQ;
+
return desc;
}

@@ -753,6 +796,9 @@ static bool axi_dmac_regmap_rdwr(struct device *dev, unsigned int reg)
case AXI_DMAC_REG_CURRENT_DEST_ADDR:
case AXI_DMAC_REG_PARTIAL_XFER_LEN:
case AXI_DMAC_REG_PARTIAL_XFER_ID:
+ case AXI_DMAC_REG_CURRENT_SG_ID:
+ case AXI_DMAC_REG_SG_ADDRESS:
+ case AXI_DMAC_REG_SG_ADDRESS_HIGH:
return true;
default:
return false;
@@ -905,6 +951,10 @@ static int axi_dmac_detect_caps(struct axi_dmac *dmac, unsigned int version)
if (axi_dmac_read(dmac, AXI_DMAC_REG_FLAGS) == AXI_DMAC_FLAG_CYCLIC)
chan->hw_cyclic = true;

+ axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS, 0xffffffff);
+ if (axi_dmac_read(dmac, AXI_DMAC_REG_SG_ADDRESS))
+ chan->hw_sg = true;
+
axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, 1);
if (axi_dmac_read(dmac, AXI_DMAC_REG_Y_LENGTH) == 1)
chan->hw_2d = true;
@@ -1005,6 +1055,7 @@ static int axi_dmac_probe(struct platform_device *pdev)
dma_dev->dst_addr_widths = BIT(dmac->chan.dest_width);
dma_dev->directions = BIT(dmac->chan.direction);
dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ dma_dev->max_sg_burst = 31; /* 31 SGs maximum in one burst */
INIT_LIST_HEAD(&dma_dev->channels);

dmac->chan.vchan.desc_free = axi_dmac_desc_free;
--
2.42.0

2023-12-11 11:57:44

by Vinod Koul

[permalink] [raw]
Subject: Re: [PATCH 1/4] dmaengine: axi-dmac: Small code cleanup

On 04-12-23, 15:03, Paul Cercueil wrote:
> Use a for() loop instead of a while() loop in axi_dmac_fill_linear_sg().

Why?

>
> Signed-off-by: Paul Cercueil <[email protected]>
> ---
> drivers/dma/dma-axi-dmac.c | 5 +----
> 1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
> index 2457a420c13d..760940b21eab 100644
> --- a/drivers/dma/dma-axi-dmac.c
> +++ b/drivers/dma/dma-axi-dmac.c
> @@ -508,16 +508,13 @@ static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan,
> segment_size = ((segment_size - 1) | chan->length_align_mask) + 1;
>
> for (i = 0; i < num_periods; i++) {
> - len = period_len;
> -
> - while (len > segment_size) {
> + for (len = period_len; len > segment_size; sg++) {
> if (direction == DMA_DEV_TO_MEM)
> sg->dest_addr = addr;
> else
> sg->src_addr = addr;
> sg->x_len = segment_size;
> sg->y_len = 1;
> - sg++;
> addr += segment_size;
> len -= segment_size;
> }
> --
> 2.42.0
>

--
~Vinod

2023-12-11 12:02:58

by Vinod Koul

[permalink] [raw]
Subject: Re: [PATCH 4/4] dmaengine: axi-dmac: Use only EOT interrupts when doing scatter-gather

On 04-12-23, 15:03, Paul Cercueil wrote:
> Instead of notifying userspace in the end-of-transfer (EOT) interrupt
> and program the hardware in the start-of-transfer (SOT) interrupt, we
> can do both things in the EOT, allowing us to mask the SOT, and halve
> the number of interrupts sent by the HDL core.
>
> Signed-off-by: Paul Cercueil <[email protected]>
> ---
> drivers/dma/dma-axi-dmac.c | 7 ++++++-
> 1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
> index 5109530b66de..beed91a8238c 100644
> --- a/drivers/dma/dma-axi-dmac.c
> +++ b/drivers/dma/dma-axi-dmac.c
> @@ -415,6 +415,7 @@ static bool axi_dmac_transfer_done(struct axi_dmac_chan *chan,
> list_del(&active->vdesc.node);
> vchan_cookie_complete(&active->vdesc);
> active = axi_dmac_active_desc(chan);
> + start_next = !!active;

Should this be in current patch, sounds like this should be a different
patch?

> }
> } else {
> do {
> @@ -1000,6 +1001,7 @@ static int axi_dmac_probe(struct platform_device *pdev)
> struct axi_dmac *dmac;
> struct regmap *regmap;
> unsigned int version;
> + u32 irq_mask = 0;
> int ret;
>
> dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
> @@ -1067,7 +1069,10 @@ static int axi_dmac_probe(struct platform_device *pdev)
>
> dma_dev->copy_align = (dmac->chan.address_align_mask + 1);
>
> - axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, 0x00);
> + if (dmac->chan.hw_sg)
> + irq_mask |= AXI_DMAC_IRQ_SOT;
> +
> + axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, irq_mask);
>
> if (of_dma_is_coherent(pdev->dev.of_node)) {
> ret = axi_dmac_read(dmac, AXI_DMAC_REG_COHERENCY_DESC);
> --
> 2.42.0
>

--
~Vinod

2023-12-11 12:16:34

by Paul Cercueil

[permalink] [raw]
Subject: Re: [PATCH 1/4] dmaengine: axi-dmac: Small code cleanup

Hi Vinod,

Le lundi 11 décembre 2023 à 17:27 +0530, Vinod Koul a écrit :
> On 04-12-23, 15:03, Paul Cercueil wrote:
> > Use a for() loop instead of a while() loop in
> > axi_dmac_fill_linear_sg().
>
> Why?

Simplicity? Code quality?

-Paul

>
> >
> > Signed-off-by: Paul Cercueil <[email protected]>
> > ---
> >  drivers/dma/dma-axi-dmac.c | 5 +----
> >  1 file changed, 1 insertion(+), 4 deletions(-)
> >
> > diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-
> > dmac.c
> > index 2457a420c13d..760940b21eab 100644
> > --- a/drivers/dma/dma-axi-dmac.c
> > +++ b/drivers/dma/dma-axi-dmac.c
> > @@ -508,16 +508,13 @@ static struct axi_dmac_sg
> > *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan,
> >   segment_size = ((segment_size - 1) | chan-
> > >length_align_mask) + 1;
> >  
> >   for (i = 0; i < num_periods; i++) {
> > - len = period_len;
> > -
> > - while (len > segment_size) {
> > + for (len = period_len; len > segment_size; sg++) {
> >   if (direction == DMA_DEV_TO_MEM)
> >   sg->dest_addr = addr;
> >   else
> >   sg->src_addr = addr;
> >   sg->x_len = segment_size;
> >   sg->y_len = 1;
> > - sg++;
> >   addr += segment_size;
> >   len -= segment_size;
> >   }
> > --
> > 2.42.0
> >
>

2023-12-11 12:21:39

by Paul Cercueil

[permalink] [raw]
Subject: Re: [PATCH 4/4] dmaengine: axi-dmac: Use only EOT interrupts when doing scatter-gather

Hi Vinod,

Le lundi 11 décembre 2023 à 17:31 +0530, Vinod Koul a écrit :
> On 04-12-23, 15:03, Paul Cercueil wrote:
> > Instead of notifying userspace in the end-of-transfer (EOT)
> > interrupt
> > and program the hardware in the start-of-transfer (SOT) interrupt,
> > we
> > can do both things in the EOT, allowing us to mask the SOT, and
> > halve
> > the number of interrupts sent by the HDL core.
> >
> > Signed-off-by: Paul Cercueil <[email protected]>
> > ---
> >  drivers/dma/dma-axi-dmac.c | 7 ++++++-
> >  1 file changed, 6 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-
> > dmac.c
> > index 5109530b66de..beed91a8238c 100644
> > --- a/drivers/dma/dma-axi-dmac.c
> > +++ b/drivers/dma/dma-axi-dmac.c
> > @@ -415,6 +415,7 @@ static bool axi_dmac_transfer_done(struct
> > axi_dmac_chan *chan,
> >   list_del(&active->vdesc.node);
> >   vchan_cookie_complete(&active->vdesc);
> >   active = axi_dmac_active_desc(chan);
> > + start_next = !!active;
>
> Should this be in current patch, sounds like this should be a
> different
> patch?

It belongs here. This line is what allows a new transfer to be
programmed from the EOT. Since we disable the SOT interrupt, if we
remove that line, the driver won't work.

Cheers,
-Paul

>
> >   }
> >   } else {
> >   do {
> > @@ -1000,6 +1001,7 @@ static int axi_dmac_probe(struct
> > platform_device *pdev)
> >   struct axi_dmac *dmac;
> >   struct regmap *regmap;
> >   unsigned int version;
> > + u32 irq_mask = 0;
> >   int ret;
> >  
> >   dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac),
> > GFP_KERNEL);
> > @@ -1067,7 +1069,10 @@ static int axi_dmac_probe(struct
> > platform_device *pdev)
> >  
> >   dma_dev->copy_align = (dmac->chan.address_align_mask + 1);
> >  
> > - axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, 0x00);
> > + if (dmac->chan.hw_sg)
> > + irq_mask |= AXI_DMAC_IRQ_SOT;
> > +
> > + axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, irq_mask);
> >  
> >   if (of_dma_is_coherent(pdev->dev.of_node)) {
> >   ret = axi_dmac_read(dmac,
> > AXI_DMAC_REG_COHERENCY_DESC);
> > --
> > 2.42.0
> >
>

2023-12-12 04:35:53

by Vinod Koul

[permalink] [raw]
Subject: Re: [PATCH 1/4] dmaengine: axi-dmac: Small code cleanup

On 11-12-23, 13:15, Paul Cercueil wrote:
> Hi Vinod,
>
> Le lundi 11 d?cembre 2023 ? 17:27 +0530, Vinod Koul a ?crit?:
> > On 04-12-23, 15:03, Paul Cercueil wrote:
> > > Use a for() loop instead of a while() loop in
> > > axi_dmac_fill_linear_sg().
> >
> > Why?
>
> Simplicity? Code quality?

It would be great to mention the reason :-) right?

--
~Vinod