Here is a more improved approach for DMA support of SG lists of any length
in the EDMA DMA Engine driver.
In the previous approach [1] we depended on error interrupts to detect
missed events and manually retrigger them, however as discussed in [2],
there are concerns this can be trouble some for high speed peripherals
which may need a more real-time response from the DMA controller.
In this approach, we divide the total no of MAX slots per channel, into
2 linked sets which are cyclically linked to each other (the cyclic
link between the 2 sets make sure that the DMA is continuous till the whole
SG list has exhausted). We then enable completion interrupts on both linked
sets which results in recyling/preparing respective linked set with the
next set of SG entries. The interrupt handler executes in parallel while
the EDMA controller DMA's the next list. This results in no interruption.
Special handling is done for first linked set (as we set up both linked
sets initially before starting with the DMA), and last one where the cyclic
link has to be broken and a link to the Dummy slot has to be created.
Also we keep track of whether all pending DMA operations have completed
before we can mark it as complete.
[1] https://lkml.org/lkml/2013/7/29/312
[2] https://lkml.org/lkml/2013/7/30/54
Joel Fernandes (12):
dma: edma: Setup parameters to DMA MAX_NR_SG at a time
ARM: edma: Don't clear EMR of channel in edma_stop
dma: edma: remove limits on number of slots
dma: edma: Write out and handle MAX_NR_SG at a given time
ARM: edma: Add function to enable interrupt for a PaRAM slot
ARM: edma: Add pr_debug in edma_link
dma: edma: Add function to dump a PaRAM set from PaRAM
dma: edma: Add one more required slot to MAX slots
dma: edma: Implement multiple linked sets for continuity
dma: edma: Check if MAX_NR_SG is even in prep function
dma: edma: Keep tracking of Pending interrupts (pending_acks)
dma: edma: Return if nothing left todo in edma_execute
arch/arm/common/edma.c | 18 ++-
drivers/dma/edma.c | 279 +++++++++++++++++++++++++++++-------
include/linux/platform_data/edma.h | 1 +
3 files changed, 243 insertions(+), 55 deletions(-)
--
1.7.9.5
With this series, this check is no longer required and
we shouldn't need to reject drivers DMA'ing more than the
MAX number of slots.
Signed-off-by: Joel Fernandes <[email protected]>
---
drivers/dma/edma.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 7b0853c..b6d609c 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -247,12 +247,6 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
return NULL;
}
- if (sg_len > MAX_NR_SG) {
- dev_err(dev, "Exceeded max SG segments %d > %d\n",
- sg_len, MAX_NR_SG);
- return NULL;
- }
-
edesc = kzalloc(sizeof(*edesc) + sg_len *
sizeof(edesc->pset[0]), GFP_ATOMIC);
if (!edesc) {
--
1.7.9.5
Splitting of MAX available slots into 2 sets of size MAX_NR_LS
requires to the MAX_NR_SG function to be even. We ensure the same
in prep function.
Signed-off-by: Joel Fernandes <[email protected]>
---
drivers/dma/edma.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 70923a2..061f0cf 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -354,6 +354,9 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
int src_bidx, dst_bidx, src_cidx, dst_cidx;
int i, num_slots_needed;
+ if (MAX_NR_SG & 1)
+ dev_err(dev, "%s: MAX_NR_SG must be even\n", __func__);
+
if (unlikely(!echan || !sgl || !sg_len))
return NULL;
--
1.7.9.5
We'd now need a separate slot just for the channel and separate
ones for the 2 linked sets, so we make adjustments to allocate
an extra channel accordingly.
Signed-off-by: Joel Fernandes <[email protected]>
---
drivers/dma/edma.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index a242269..df50a04 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -48,7 +48,7 @@
/* Max of 16 segments per channel to conserve PaRAM slots */
#define MAX_NR_SG 16
-#define EDMA_MAX_SLOTS MAX_NR_SG
+#define EDMA_MAX_SLOTS (MAX_NR_SG+1)
#define EDMA_DESCRIPTORS 16
struct edma_desc {
@@ -311,6 +311,9 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
num_slots_needed = sg_len > MAX_NR_SG ? MAX_NR_SG : sg_len;
+ /* Allocate one extra to account for the channel itself */
+ num_slots_needed++;
+
for (i = 0; i < num_slots_needed; i++) {
if (echan->slot[i] < 0) {
echan->slot[i] =
--
1.7.9.5
To prevent common programming errors, add a function to enable
interrupts correctly. Also keeps calling code more readable.
Signed-off-by: Joel Fernandes <[email protected]>
---
arch/arm/common/edma.c | 15 +++++++++++++++
include/linux/platform_data/edma.h | 1 +
2 files changed, 16 insertions(+)
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 6433b6c..aa43c49 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -1095,6 +1095,21 @@ void edma_set_transfer_params(unsigned slot,
EXPORT_SYMBOL(edma_set_transfer_params);
/**
+ * edma_enable_interrupt - enable interrupt on parameter RAM slot
+ * @slot: parameter RAM slot which is the link target
+ *
+ * The originating slot should not be part of any active DMA transfer.
+ */
+void edma_enable_interrupt(unsigned slot)
+{
+ pr_debug("Enabling interrupt on slot %d\n", slot);
+
+ edma_parm_modify(EDMA_CTLR(slot), PARM_OPT, EDMA_CHAN_SLOT(slot),
+ ~TCINTEN, TCINTEN);
+}
+EXPORT_SYMBOL(edma_enable_interrupt);
+
+/**
* edma_link - link one parameter RAM slot to another
* @from: parameter RAM slot originating the link
* @to: parameter RAM slot which is the link target
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 57300fd..184ff5f 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -136,6 +136,7 @@ void edma_set_dest_index(unsigned slot, s16 dest_bidx, s16 dest_cidx);
void edma_set_transfer_params(unsigned slot, u16 acnt, u16 bcnt, u16 ccnt,
u16 bcnt_rld, enum sync_dimension sync_mode);
void edma_link(unsigned from, unsigned to);
+void edma_enable_interrupt(unsigned slot);
void edma_unlink(unsigned from);
/* calls that operate on an entire parameter RAM slot */
--
1.7.9.5
The total_processed variable was being used to keep track of when
DMA is completed for a particular descriptor.
This doesn't work anymore, as the interrupt for completion can come
in much later than when the total_processed variable is updated to
reflect that all SG entries have been issues. Introduce another
variable that reflects the actual value. This will serve a purpose
different from what total_processed was required to help with.
Also add code that give special treatment for the first linked set
interrupt as pending_acks is handled a bit differently for that case.
Signed-off-by: Joel Fernandes <[email protected]>
---
drivers/dma/edma.c | 32 +++++++++++++++++++++++++++++---
1 file changed, 29 insertions(+), 3 deletions(-)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 061f0cf..eca1b47 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -59,6 +59,10 @@ struct edma_desc {
int pset_nr;
int total_processed;
int next_setup_linkset;
+
+ int no_first_ls_ack;
+ int pending_acks;
+
struct edmacc_param pset[0];
};
@@ -147,8 +151,7 @@ static void edma_execute(struct edma_chan *echan)
struct edmacc_param tmp_param;
/* If either we processed all psets or we're still not started */
- if (!echan->edesc ||
- echan->edesc->pset_nr == echan->edesc->total_processed) {
+ if (!echan->edesc || !echan->edesc->pending_acks) {
/* Get next vdesc */
vdesc = vchan_next_desc(&echan->vchan);
if (!vdesc) {
@@ -180,6 +183,8 @@ static void edma_execute(struct edma_chan *echan)
edesc->total_processed += total_link_set;
+ edesc->pending_acks += total_link_set;
+
total_left = edesc->pset_nr - edesc->total_processed;
total_link_set = total_left > MAX_NR_LS ?
@@ -190,6 +195,8 @@ static void edma_execute(struct edma_chan *echan)
where total pset_nr is strictly within MAX_NR size */
if (total_left > total_link_set)
edma_enable_interrupt(echan->slot[i]);
+ else
+ edesc->no_first_ls_ack = 1;
/* Setup link between linked set 0 to set 1 */
edma_link(echan->slot[i], echan->slot[i+1]);
@@ -210,6 +217,9 @@ static void edma_execute(struct edma_chan *echan)
}
edesc->total_processed += total_link_set;
+
+ edesc->pending_acks += total_link_set;
+
total_left = edesc->pset_nr - edesc->total_processed;
if (total_left)
@@ -267,6 +277,8 @@ static void edma_execute(struct edma_chan *echan)
edesc->total_processed += total_link_set;
+ edesc->pending_acks += total_link_set;
+
slot_off = total_link_set + ls_cur_off - 1;
if (edesc->total_processed == edesc->pset_nr)
@@ -498,8 +510,22 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
edesc = echan->edesc;
+ if (edesc->pending_acks < MAX_NR_LS)
+ edesc->pending_acks = 0;
+ else
+ edesc->pending_acks -= MAX_NR_LS;
+
+ if (edesc->no_first_ls_ack) {
+ if (edesc->pending_acks < MAX_NR_LS)
+ edesc->pending_acks = 0;
+ else
+ edesc->pending_acks -= MAX_NR_LS;
+
+ edesc->no_first_ls_ack = 0;
+ }
+
if (edesc) {
- if (edesc->total_processed == edesc->pset_nr) {
+ if (!edesc->pending_acks) {
dev_dbg(dev, "Transfer complete,"
" stopping channel %d\n", ch_num);
edma_stop(echan->ch_num);
--
1.7.9.5
Process SG-elements in batches of MAX_NR_SG if they are greater
than MAX_NR_SG. Due to this, at any given time only those many
slots will be used in the given channel no matter how long the
scatter list is. We keep track of how much has been written
inorder to process the next batch of elements in the scatter-list
and detect completion.
Signed-off-by: Joel Fernandes <[email protected]>
---
drivers/dma/edma.c | 76 ++++++++++++++++++++++++++++++++++------------------
1 file changed, 50 insertions(+), 26 deletions(-)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index b6d609c..080d669 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -56,6 +56,7 @@ struct edma_desc {
struct list_head node;
int absync;
int pset_nr;
+ int total_processed;
struct edmacc_param pset[0];
};
@@ -104,22 +105,36 @@ static void edma_desc_free(struct virt_dma_desc *vdesc)
/* Dispatch a queued descriptor to the controller (caller holds lock) */
static void edma_execute(struct edma_chan *echan)
{
- struct virt_dma_desc *vdesc = vchan_next_desc(&echan->vchan);
+ struct virt_dma_desc *vdesc;
struct edma_desc *edesc;
- int i;
+ struct device *dev = echan->vchan.chan.device->dev;
- if (!vdesc) {
- echan->edesc = NULL;
- return;
+ int i, j, total_left, total_process;
+
+ /* If either we processed all psets or we're still not started */
+ if (!echan->edesc ||
+ echan->edesc->pset_nr == echan->edesc->total_processed) {
+ /* Get next vdesc */
+ vdesc = vchan_next_desc(&echan->vchan);
+ if (!vdesc) {
+ echan->edesc = NULL;
+ return;
+ }
+ list_del(&vdesc->node);
+ echan->edesc = to_edma_desc(&vdesc->tx);
}
- list_del(&vdesc->node);
+ edesc = echan->edesc;
+
+ /* Find out how many left */
+ total_left = edesc->pset_nr - edesc->total_processed;
+ total_process = total_left > MAX_NR_SG ? MAX_NR_SG : total_left;
- echan->edesc = edesc = to_edma_desc(&vdesc->tx);
/* Write descriptor PaRAM set(s) */
- for (i = 0; i < edesc->pset_nr; i++) {
- edma_write_slot(echan->slot[i], &edesc->pset[i]);
+ for (i = 0; i < total_process; i++) {
+ j = i + edesc->total_processed;
+ edma_write_slot(echan->slot[i], &edesc->pset[j]);
dev_dbg(echan->vchan.chan.device->dev,
"\n pset[%d]:\n"
" chnum\t%d\n"
@@ -132,24 +147,29 @@ static void edma_execute(struct edma_chan *echan)
" bidx\t%08x\n"
" cidx\t%08x\n"
" lkrld\t%08x\n",
- i, echan->ch_num, echan->slot[i],
- edesc->pset[i].opt,
- edesc->pset[i].src,
- edesc->pset[i].dst,
- edesc->pset[i].a_b_cnt,
- edesc->pset[i].ccnt,
- edesc->pset[i].src_dst_bidx,
- edesc->pset[i].src_dst_cidx,
- edesc->pset[i].link_bcntrld);
+ j, echan->ch_num, echan->slot[i],
+ edesc->pset[j].opt,
+ edesc->pset[j].src,
+ edesc->pset[j].dst,
+ edesc->pset[j].a_b_cnt,
+ edesc->pset[j].ccnt,
+ edesc->pset[j].src_dst_bidx,
+ edesc->pset[j].src_dst_cidx,
+ edesc->pset[j].link_bcntrld);
/* Link to the previous slot if not the last set */
- if (i != (edesc->pset_nr - 1))
+ if (i != (total_process - 1))
edma_link(echan->slot[i], echan->slot[i+1]);
/* Final pset links to the dummy pset */
else
edma_link(echan->slot[i], echan->ecc->dummy_slot);
}
- edma_start(echan->ch_num);
+ edesc->total_processed += total_process;
+
+ if (edesc->total_processed <= MAX_NR_SG) {
+ dev_dbg(dev, "first transfer starting %d\n", echan->ch_num);
+ edma_start(echan->ch_num);
+ }
}
static int edma_terminate_all(struct edma_chan *echan)
@@ -358,19 +378,23 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
struct edma_desc *edesc;
unsigned long flags;
- /* Stop the channel */
- edma_stop(echan->ch_num);
-
switch (ch_status) {
case DMA_COMPLETE:
- dev_dbg(dev, "transfer complete on channel %d\n", ch_num);
-
spin_lock_irqsave(&echan->vchan.lock, flags);
edesc = echan->edesc;
if (edesc) {
+ if (edesc->total_processed == edesc->pset_nr) {
+ dev_dbg(dev, "transfer complete." \
+ " stopping channel %d\n", ch_num);
+ edma_stop(echan->ch_num);
+ vchan_cookie_complete(&edesc->vdesc);
+ } else {
+ dev_dbg(dev, "Intermediate transfer complete" \
+ " on channel %d\n", ch_num);
+ }
+
edma_execute(echan);
- vchan_cookie_complete(&edesc->vdesc);
}
spin_unlock_irqrestore(&echan->vchan.lock, flags);
--
1.7.9.5
Useful for visualizing linking of PaRAM slots
Signed-off-by: Joel Fernandes <[email protected]>
---
arch/arm/common/edma.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index aa43c49..34d3fc9 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -1125,6 +1125,8 @@ void edma_link(unsigned from, unsigned to)
ctlr_to = EDMA_CTLR(to);
to = EDMA_CHAN_SLOT(to);
+ pr_debug("Setting up link %d -> %d\n", from, to);
+
if (from >= edma_cc[ctlr_from]->num_slots)
return;
if (to >= edma_cc[ctlr_to]->num_slots)
--
1.7.9.5
It is possible edma_execute is called even when all the SG
elements have been submitted for transmission, we add a check
for the same and avoid executing the rest of the function.
Signed-off-by: Joel Fernandes <[email protected]>
---
drivers/dma/edma.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index eca1b47..62987fc 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -168,6 +168,9 @@ static void edma_execute(struct edma_chan *echan)
total_left = edesc->pset_nr - edesc->total_processed;
total_link_set = total_left > MAX_NR_LS ? MAX_NR_LS : total_left;
+ if (!total_left)
+ return;
+
/* First time, setup 2 cyclically linked sets, each containing half
the slots allocated for this channel */
if (edesc->total_processed == 0) {
--
1.7.9.5
Changes are made here for configuring existing parameters to support
DMA'ing them out in batches as needed.
Also allocate as many as slots as needed by the SG list, but not more
than MAX_NR_SG. Then these slots will be reused accordingly.
For ex, if MAX_NR_SG=10, and number of SG entries is 40, still only
10 slots will be allocated to DMA the entire SG list of size 40.
Signed-off-by: Joel Fernandes <[email protected]>
---
drivers/dma/edma.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 5f3e532..7b0853c 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -222,9 +222,9 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
enum dma_slave_buswidth dev_width;
u32 burst;
struct scatterlist *sg;
- int i;
int acnt, bcnt, ccnt, src, dst, cidx;
int src_bidx, dst_bidx, src_cidx, dst_cidx;
+ int i, num_slots_needed;
if (unlikely(!echan || !sgl || !sg_len))
return NULL;
@@ -262,8 +262,11 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
edesc->pset_nr = sg_len;
- for_each_sg(sgl, sg, sg_len, i) {
- /* Allocate a PaRAM slot, if needed */
+ /* Allocate a PaRAM slot, if needed */
+
+ num_slots_needed = sg_len > MAX_NR_SG ? MAX_NR_SG : sg_len;
+
+ for (i = 0; i < num_slots_needed; i++) {
if (echan->slot[i] < 0) {
echan->slot[i] =
edma_alloc_slot(EDMA_CTLR(echan->ch_num),
@@ -273,6 +276,10 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
return NULL;
}
}
+ }
+
+ /* Configure PaRAM sets for each SG */
+ for_each_sg(sgl, sg, sg_len, i) {
acnt = dev_width;
@@ -330,6 +337,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
/* Configure A or AB synchronized transfers */
if (edesc->absync)
edesc->pset[i].opt |= SYNCDIM;
+
/* If this is the last set, enable completion interrupt flag */
if (i == sg_len - 1)
edesc->pset[i].opt |= TCINTEN;
--
1.7.9.5
We certainly don't want error conditions to be cleared any other
place but the EDMA error handler, as this will make us 'forget'
about missed events we might need to know errors have occurred.
This fixes a race condition where the EMR was being cleared
by the transfer completion interrupt handler.
Basically, what was happening was:
Missed event
|
|
V
SG1-SG2-SG3-Null
\
\__TC Interrupt (Almost same time as ARM is executing
TC interrupt handler, an event got missed and also forgotten
by clearing the EMR).
This causes the following problems:
1.
If error interrupt is also pending and TC interrupt clears the EMR
by calling edma_stop as has been observed in the edma_callback function,
the ARM will execute the error interrupt even though the EMR is clear.
As a result, the dma_ccerr_handler returns IRQ_NONE. If this happens
enough number of times, IRQ subsystem disables the interrupt thinking
its spurious which makes error handler never execute again.
2.
Also even if error handler doesn't return IRQ_NONE, the removing of EMR
removes the knowledge about which channel had a missed event, and thus
a manual trigger on such channels cannot be performed.
The EMR is ultimately being cleared by the Error interrupt handler
once it is handled so we remove code that does it in edma_stop and
allow it to happen there.
Signed-off-by: Joel Fernandes <[email protected]>
---
arch/arm/common/edma.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 3567ba1..6433b6c 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -1307,7 +1307,6 @@ void edma_stop(unsigned channel)
edma_shadow0_write_array(ctlr, SH_EECR, j, mask);
edma_shadow0_write_array(ctlr, SH_ECR, j, mask);
edma_shadow0_write_array(ctlr, SH_SECR, j, mask);
- edma_write_array(ctlr, EDMA_EMCR, j, mask);
pr_debug("EDMA: EER%d %08x\n", j,
edma_shadow0_read_array(ctlr, SH_EER, j));
--
1.7.9.5
Previously, such a dump function was used but it wasn't reading
from the PaRAM, rather just from a edmacc_param structure, we
add a helpful function for debugging that directly reads from
the PaRAM and gives the current state correctly (including links
and interrupt information).
Signed-off-by: Joel Fernandes <[email protected]>
---
drivers/dma/edma.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 080d669..a242269 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -102,6 +102,37 @@ static void edma_desc_free(struct virt_dma_desc *vdesc)
kfree(container_of(vdesc, struct edma_desc, vdesc));
}
+static inline void dump_pset(struct edma_chan *echan, int slot,
+ struct edmacc_param *pset_unused, int pset_idx)
+{
+ struct edmacc_param pset_local, *pset;
+ pset = &pset_local;
+
+ edma_read_slot(slot, pset);
+
+ dev_dbg(echan->vchan.chan.device->dev,
+ "\n pset[%d]:\n"
+ " chnum\t%d\n"
+ " slot\t%d\n"
+ " opt\t%08x\n"
+ " src\t%08x\n"
+ " dst\t%08x\n"
+ " abcnt\t%08x\n"
+ " ccnt\t%08x\n"
+ " bidx\t%08x\n"
+ " cidx\t%08x\n"
+ " lkrld\t%08x\n",
+ pset_idx, echan->ch_num, slot,
+ pset[0].opt,
+ pset[0].src,
+ pset[0].dst,
+ pset[0].a_b_cnt,
+ pset[0].ccnt,
+ pset[0].src_dst_bidx,
+ pset[0].src_dst_cidx,
+ pset[0].link_bcntrld);
+}
+
/* Dispatch a queued descriptor to the controller (caller holds lock) */
static void edma_execute(struct edma_chan *echan)
{
--
1.7.9.5
Here we implement splitting up of the total MAX number of slots
available for a channel into 2 cyclically linked sets. Transfer
completion Interrupts are enabled on both linked sets and respective
handler recycles them on completion to process the next linked set.
Both linked sets are cyclically linked to each other to ensure
continuity of DMA operations. Interrupt handlers execute asynchronously
to the EDMA events and recycles the linked sets at the right time,
as a result EDMA is not blocked or dependent on interrupts and DMA
continues till the end of the SG-lists without any interruption.
Suggested-by: Sekhar Nori <[email protected]>
Signed-off-by: Joel Fernandes <[email protected]>
---
drivers/dma/edma.c | 157 +++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 118 insertions(+), 39 deletions(-)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index df50a04..70923a2 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -48,6 +48,7 @@
/* Max of 16 segments per channel to conserve PaRAM slots */
#define MAX_NR_SG 16
+#define MAX_NR_LS (MAX_NR_SG >> 1)
#define EDMA_MAX_SLOTS (MAX_NR_SG+1)
#define EDMA_DESCRIPTORS 16
@@ -57,6 +58,7 @@ struct edma_desc {
int absync;
int pset_nr;
int total_processed;
+ int next_setup_linkset;
struct edmacc_param pset[0];
};
@@ -140,7 +142,9 @@ static void edma_execute(struct edma_chan *echan)
struct edma_desc *edesc;
struct device *dev = echan->vchan.chan.device->dev;
- int i, j, total_left, total_process;
+ int i, total_left, total_link_set;
+ int ls_cur_off, ls_next_off, slot_off;
+ struct edmacc_param tmp_param;
/* If either we processed all psets or we're still not started */
if (!echan->edesc ||
@@ -159,48 +163,121 @@ static void edma_execute(struct edma_chan *echan)
/* Find out how many left */
total_left = edesc->pset_nr - edesc->total_processed;
- total_process = total_left > MAX_NR_SG ? MAX_NR_SG : total_left;
-
-
- /* Write descriptor PaRAM set(s) */
- for (i = 0; i < total_process; i++) {
- j = i + edesc->total_processed;
- edma_write_slot(echan->slot[i], &edesc->pset[j]);
- dev_dbg(echan->vchan.chan.device->dev,
- "\n pset[%d]:\n"
- " chnum\t%d\n"
- " slot\t%d\n"
- " opt\t%08x\n"
- " src\t%08x\n"
- " dst\t%08x\n"
- " abcnt\t%08x\n"
- " ccnt\t%08x\n"
- " bidx\t%08x\n"
- " cidx\t%08x\n"
- " lkrld\t%08x\n",
- j, echan->ch_num, echan->slot[i],
- edesc->pset[j].opt,
- edesc->pset[j].src,
- edesc->pset[j].dst,
- edesc->pset[j].a_b_cnt,
- edesc->pset[j].ccnt,
- edesc->pset[j].src_dst_bidx,
- edesc->pset[j].src_dst_cidx,
- edesc->pset[j].link_bcntrld);
- /* Link to the previous slot if not the last set */
- if (i != (total_process - 1))
+ total_link_set = total_left > MAX_NR_LS ? MAX_NR_LS : total_left;
+
+ /* First time, setup 2 cyclically linked sets, each containing half
+ the slots allocated for this channel */
+ if (edesc->total_processed == 0) {
+ for (i = 0; i < total_link_set; i++) {
+ edma_write_slot(echan->slot[i+1], &edesc->pset[i]);
+
+ if (i != total_link_set - 1) {
+ edma_link(echan->slot[i+1], echan->slot[i+2]);
+ dump_pset(echan, echan->slot[i+1],
+ edesc->pset, i);
+ }
+ }
+
+ edesc->total_processed += total_link_set;
+
+ total_left = edesc->pset_nr - edesc->total_processed;
+
+ total_link_set = total_left > MAX_NR_LS ?
+ MAX_NR_LS : total_left;
+
+ if (total_link_set) {
+ /* Don't setup interrupt for first linked set for cases
+ where total pset_nr is strictly within MAX_NR size */
+ if (total_left > total_link_set)
+ edma_enable_interrupt(echan->slot[i]);
+
+ /* Setup link between linked set 0 to set 1 */
edma_link(echan->slot[i], echan->slot[i+1]);
- /* Final pset links to the dummy pset */
- else
+
+ dump_pset(echan, echan->slot[i], edesc->pset, i-1);
+
+ /* Write out linked set 1 */
+ for (; i < total_link_set + MAX_NR_LS; i++) {
+ edma_write_slot(echan->slot[i+1],
+ &edesc->pset[i]);
+
+ if (i != total_link_set + MAX_NR_LS - 1) {
+ edma_link(echan->slot[i+1],
+ echan->slot[i+2]);
+ dump_pset(echan, echan->slot[i+1],
+ edesc->pset, i);
+ }
+ }
+
+ edesc->total_processed += total_link_set;
+ total_left = edesc->pset_nr - edesc->total_processed;
+
+ if (total_left)
+ /* Setup a link from linked set 1 to set 0 */
+ edma_link(echan->slot[i], echan->slot[1]);
+ else
+ /* Setup a link between linked set 1 to dummy */
+ edma_link(echan->slot[i], echan->ecc->dummy_slot);
+ } else {
+ /* First linked set was enough, simply link to dummy */
edma_link(echan->slot[i], echan->ecc->dummy_slot);
- }
+ }
+
+ edma_enable_interrupt(echan->slot[i]);
+ dump_pset(echan, echan->slot[i], edesc->pset, i-1);
- edesc->total_processed += total_process;
+ edesc->next_setup_linkset = 0;
- if (edesc->total_processed <= MAX_NR_SG) {
+ /* Start the ball rolling... */
dev_dbg(dev, "first transfer starting %d\n", echan->ch_num);
+
+ edma_read_slot(echan->slot[1], &tmp_param);
+ edma_write_slot(echan->slot[0], &tmp_param);
edma_start(echan->ch_num);
+
+ return;
+ }
+
+ /* We got called in the middle of an SG-list transaction as one of the
+ linked sets completed */
+
+ /* Setup offsets into echan_slot, +1 is as slot 0 is for chan */
+ if (edesc->next_setup_linkset == 1) {
+ edesc->next_setup_linkset = 0;
+ ls_cur_off = MAX_NR_LS + 1;
+ ls_next_off = 1;
+ } else {
+ edesc->next_setup_linkset = 1;
+ ls_cur_off = 1;
+ ls_next_off = MAX_NR_LS + 1;
+ }
+
+ for (i = 0; i < total_link_set; i++) {
+ edma_write_slot(echan->slot[i + ls_cur_off],
+ &edesc->pset[i + edesc->total_processed]);
+
+ if (i != total_link_set - 1) {
+ edma_link(echan->slot[i + ls_cur_off],
+ echan->slot[i + ls_cur_off + 1]);
+
+ dump_pset(echan, echan->slot[i + ls_cur_off],
+ edesc->pset, i + edesc->total_processed);
+ }
}
+
+ edesc->total_processed += total_link_set;
+
+ slot_off = total_link_set + ls_cur_off - 1;
+
+ if (edesc->total_processed == edesc->pset_nr)
+ edma_link(echan->slot[slot_off], echan->ecc->dummy_slot);
+ else
+ edma_link(echan->slot[slot_off], echan->slot[ls_next_off]);
+
+ edma_enable_interrupt(echan->slot[slot_off]);
+
+ dump_pset(echan, echan->slot[slot_off],
+ edesc->pset, edesc->total_processed-1);
}
static int edma_terminate_all(struct edma_chan *echan)
@@ -417,15 +494,17 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
spin_lock_irqsave(&echan->vchan.lock, flags);
edesc = echan->edesc;
+
if (edesc) {
if (edesc->total_processed == edesc->pset_nr) {
- dev_dbg(dev, "transfer complete." \
+ dev_dbg(dev, "Transfer complete,"
" stopping channel %d\n", ch_num);
edma_stop(echan->ch_num);
vchan_cookie_complete(&edesc->vdesc);
} else {
- dev_dbg(dev, "Intermediate transfer complete" \
- " on channel %d\n", ch_num);
+ dev_dbg(dev, "Intermediate transfer "
+ "complete, setup next linked set on "
+ "%d\n ", ch_num);
}
edma_execute(echan);
--
1.7.9.5
On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
> We certainly don't want error conditions to be cleared any other
> place but the EDMA error handler, as this will make us 'forget'
> about missed events we might need to know errors have occurred.
>
> This fixes a race condition where the EMR was being cleared
> by the transfer completion interrupt handler.
>
> Basically, what was happening was:
>
> Missed event
> |
> |
> V
> SG1-SG2-SG3-Null
> \
> \__TC Interrupt (Almost same time as ARM is executing
> TC interrupt handler, an event got missed and also forgotten
> by clearing the EMR).
>
> This causes the following problems:
>
> 1.
> If error interrupt is also pending and TC interrupt clears the EMR
> by calling edma_stop as has been observed in the edma_callback function,
> the ARM will execute the error interrupt even though the EMR is clear.
> As a result, the dma_ccerr_handler returns IRQ_NONE. If this happens
> enough number of times, IRQ subsystem disables the interrupt thinking
> its spurious which makes error handler never execute again.
>
> 2.
> Also even if error handler doesn't return IRQ_NONE, the removing of EMR
> removes the knowledge about which channel had a missed event, and thus
> a manual trigger on such channels cannot be performed.
>
> The EMR is ultimately being cleared by the Error interrupt handler
> once it is handled so we remove code that does it in edma_stop and
> allow it to happen there.
>
> Signed-off-by: Joel Fernandes <[email protected]>
Queuing this for v3.11 fixes. While committing, I changed the headline
to remove capitalization and made it more readable by removing register
level details. The new headline is:
ARM: edma: don't clear missed events in edma_stop()
Thanks,
Sekhar
On 8/8/2013 5:19 PM, Sekhar Nori wrote:
> On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
>> We certainly don't want error conditions to be cleared any other
>> place but the EDMA error handler, as this will make us 'forget'
>> about missed events we might need to know errors have occurred.
>>
>> This fixes a race condition where the EMR was being cleared
>> by the transfer completion interrupt handler.
>>
>> Basically, what was happening was:
>>
>> Missed event
>> |
>> |
>> V
>> SG1-SG2-SG3-Null
>> \
>> \__TC Interrupt (Almost same time as ARM is executing
>> TC interrupt handler, an event got missed and also forgotten
>> by clearing the EMR).
>>
>> This causes the following problems:
>>
>> 1.
>> If error interrupt is also pending and TC interrupt clears the EMR
>> by calling edma_stop as has been observed in the edma_callback function,
>> the ARM will execute the error interrupt even though the EMR is clear.
>> As a result, the dma_ccerr_handler returns IRQ_NONE. If this happens
>> enough number of times, IRQ subsystem disables the interrupt thinking
>> its spurious which makes error handler never execute again.
>>
>> 2.
>> Also even if error handler doesn't return IRQ_NONE, the removing of EMR
>> removes the knowledge about which channel had a missed event, and thus
>> a manual trigger on such channels cannot be performed.
>>
>> The EMR is ultimately being cleared by the Error interrupt handler
>> once it is handled so we remove code that does it in edma_stop and
>> allow it to happen there.
>>
>> Signed-off-by: Joel Fernandes <[email protected]>
>
> Queuing this for v3.11 fixes. While committing, I changed the headline
> to remove capitalization and made it more readable by removing register
> level details. The new headline is:
>
> ARM: edma: don't clear missed events in edma_stop()
Forgot to ask, should this be tagged for stable? IOW, how serious is
this race in current kernel (without the entire series applied)? I have
never observed it myself - so please provide details how easy/difficult
it is to hit this condition.
Thanks,
Sekhar
On Sun, Aug 11, 2013 at 11:25 PM, Sekhar Nori <[email protected]> wrote:
> On 8/8/2013 5:19 PM, Sekhar Nori wrote:
>> On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
>>> We certainly don't want error conditions to be cleared any other
>>> place but the EDMA error handler, as this will make us 'forget'
>>> about missed events we might need to know errors have occurred.
>>>
>>> This fixes a race condition where the EMR was being cleared
>>> by the transfer completion interrupt handler.
>>>
>>> Basically, what was happening was:
>>>
>>> Missed event
>>> |
>>> |
>>> V
>>> SG1-SG2-SG3-Null
>>> \
>>> \__TC Interrupt (Almost same time as ARM is executing
>>> TC interrupt handler, an event got missed and also forgotten
>>> by clearing the EMR).
>>>
>>> This causes the following problems:
>>>
>>> 1.
>>> If error interrupt is also pending and TC interrupt clears the EMR
>>> by calling edma_stop as has been observed in the edma_callback function,
>>> the ARM will execute the error interrupt even though the EMR is clear.
>>> As a result, the dma_ccerr_handler returns IRQ_NONE. If this happens
>>> enough number of times, IRQ subsystem disables the interrupt thinking
>>> its spurious which makes error handler never execute again.
>>>
>>> 2.
>>> Also even if error handler doesn't return IRQ_NONE, the removing of EMR
>>> removes the knowledge about which channel had a missed event, and thus
>>> a manual trigger on such channels cannot be performed.
>>>
>>> The EMR is ultimately being cleared by the Error interrupt handler
>>> once it is handled so we remove code that does it in edma_stop and
>>> allow it to happen there.
>>>
>>> Signed-off-by: Joel Fernandes <[email protected]>
>>
>> Queuing this for v3.11 fixes. While committing, I changed the headline
>> to remove capitalization and made it more readable by removing register
>> level details. The new headline is:
>>
>> ARM: edma: don't clear missed events in edma_stop()
>
> Forgot to ask, should this be tagged for stable? IOW, how serious is
> this race in current kernel (without the entire series applied)? I have
> never observed it myself - so please provide details how easy/difficult
> it is to hit this condition.
The race was uncovered by recent EDMA patch series, So this patch can
go in for next kernel release as such, I am not aware of any other DMA
user that maybe uncovering the race condition.
Thanks,
-Joel
On Monday 12 August 2013 09:59 AM, Joel Fernandes wrote:
> On Sun, Aug 11, 2013 at 11:25 PM, Sekhar Nori <[email protected]> wrote:
>> On 8/8/2013 5:19 PM, Sekhar Nori wrote:
>>> On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
>>>> We certainly don't want error conditions to be cleared any other
>>>> place but the EDMA error handler, as this will make us 'forget'
>>>> about missed events we might need to know errors have occurred.
>>>>
>>>> This fixes a race condition where the EMR was being cleared
>>>> by the transfer completion interrupt handler.
>>>>
>>>> Basically, what was happening was:
>>>>
>>>> Missed event
>>>> |
>>>> |
>>>> V
>>>> SG1-SG2-SG3-Null
>>>> \
>>>> \__TC Interrupt (Almost same time as ARM is executing
>>>> TC interrupt handler, an event got missed and also forgotten
>>>> by clearing the EMR).
>>>>
>>>> This causes the following problems:
>>>>
>>>> 1.
>>>> If error interrupt is also pending and TC interrupt clears the EMR
>>>> by calling edma_stop as has been observed in the edma_callback function,
>>>> the ARM will execute the error interrupt even though the EMR is clear.
>>>> As a result, the dma_ccerr_handler returns IRQ_NONE. If this happens
>>>> enough number of times, IRQ subsystem disables the interrupt thinking
>>>> its spurious which makes error handler never execute again.
>>>>
>>>> 2.
>>>> Also even if error handler doesn't return IRQ_NONE, the removing of EMR
>>>> removes the knowledge about which channel had a missed event, and thus
>>>> a manual trigger on such channels cannot be performed.
>>>>
>>>> The EMR is ultimately being cleared by the Error interrupt handler
>>>> once it is handled so we remove code that does it in edma_stop and
>>>> allow it to happen there.
>>>>
>>>> Signed-off-by: Joel Fernandes <[email protected]>
>>>
>>> Queuing this for v3.11 fixes. While committing, I changed the headline
>>> to remove capitalization and made it more readable by removing register
>>> level details. The new headline is:
>>>
>>> ARM: edma: don't clear missed events in edma_stop()
>>
>> Forgot to ask, should this be tagged for stable? IOW, how serious is
>> this race in current kernel (without the entire series applied)? I have
>> never observed it myself - so please provide details how easy/difficult
>> it is to hit this condition.
>
> The race was uncovered by recent EDMA patch series, So this patch can
> go in for next kernel release as such, I am not aware of any other DMA
> user that maybe uncovering the race condition.
Okay, I wont queue for -rc then. If Vinod wants to take this along with
rest of the series, you can add my:
Acked-by: Sekhar Nori <[email protected]>
Thanks,
Sekhar
On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
> Changes are made here for configuring existing parameters to support
> DMA'ing them out in batches as needed.
>
> Also allocate as many as slots as needed by the SG list, but not more
> than MAX_NR_SG. Then these slots will be reused accordingly.
> For ex, if MAX_NR_SG=10, and number of SG entries is 40, still only
> 10 slots will be allocated to DMA the entire SG list of size 40.
>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> drivers/dma/edma.c | 14 +++++++++++---
> 1 file changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
> index 5f3e532..7b0853c 100644
> --- a/drivers/dma/edma.c
> +++ b/drivers/dma/edma.c
> @@ -222,9 +222,9 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
> enum dma_slave_buswidth dev_width;
> u32 burst;
> struct scatterlist *sg;
> - int i;
> int acnt, bcnt, ccnt, src, dst, cidx;
> int src_bidx, dst_bidx, src_cidx, dst_cidx;
> + int i, num_slots_needed;
'nslots' is more to my liking. Better keep variable names short.
>
> if (unlikely(!echan || !sgl || !sg_len))
> return NULL;
> @@ -262,8 +262,11 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
>
> edesc->pset_nr = sg_len;
>
> - for_each_sg(sgl, sg, sg_len, i) {
> - /* Allocate a PaRAM slot, if needed */
> + /* Allocate a PaRAM slot, if needed */
> +
> + num_slots_needed = sg_len > MAX_NR_SG ? MAX_NR_SG : sg_len;
nslots = min(MAX_NR_SG, sg_len);
> +
> + for (i = 0; i < num_slots_needed; i++) {
> if (echan->slot[i] < 0) {
> echan->slot[i] =
> edma_alloc_slot(EDMA_CTLR(echan->ch_num),
> @@ -273,6 +276,10 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
> return NULL;
> }
> }
> + }
> +
> + /* Configure PaRAM sets for each SG */
> + for_each_sg(sgl, sg, sg_len, i) {
>
> acnt = dev_width;
>
> @@ -330,6 +337,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
> /* Configure A or AB synchronized transfers */
> if (edesc->absync)
> edesc->pset[i].opt |= SYNCDIM;
> +
Random extra newline.
The patch as such is fine, but I dont think it makes lot of sense
standalone. This needs to be merged into the patch where you actually
handle the entire SG list in batches.
Thanks,
Sekhar
On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
> With this series, this check is no longer required and
> we shouldn't need to reject drivers DMA'ing more than the
> MAX number of slots.
>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> drivers/dma/edma.c | 6 ------
> 1 file changed, 6 deletions(-)
>
> diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
> index 7b0853c..b6d609c 100644
> --- a/drivers/dma/edma.c
> +++ b/drivers/dma/edma.c
> @@ -247,12 +247,6 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
> return NULL;
> }
>
> - if (sg_len > MAX_NR_SG) {
> - dev_err(dev, "Exceeded max SG segments %d > %d\n",
> - sg_len, MAX_NR_SG);
> - return NULL;
> - }
This patch comes too early. Should be moved to the end of the series
when the support you rely on is actually present.
Thanks,
Sekhar
On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
> Previously, such a dump function was used but it wasn't reading
> from the PaRAM, rather just from a edmacc_param structure, we
> add a helpful function for debugging that directly reads from
> the PaRAM and gives the current state correctly (including links
> and interrupt information).
>
> Signed-off-by: Joel Fernandes <[email protected]>
You should convert existing instances of PaRAM set dump to use this new
function along with introducing it.
Thanks,
Sekhar
> ---
> drivers/dma/edma.c | 31 +++++++++++++++++++++++++++++++
> 1 file changed, 31 insertions(+)
>
> diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
> index 080d669..a242269 100644
> --- a/drivers/dma/edma.c
> +++ b/drivers/dma/edma.c
> @@ -102,6 +102,37 @@ static void edma_desc_free(struct virt_dma_desc *vdesc)
> kfree(container_of(vdesc, struct edma_desc, vdesc));
> }
>
> +static inline void dump_pset(struct edma_chan *echan, int slot,
> + struct edmacc_param *pset_unused, int pset_idx)
> +{
> + struct edmacc_param pset_local, *pset;
> + pset = &pset_local;
> +
> + edma_read_slot(slot, pset);
> +
> + dev_dbg(echan->vchan.chan.device->dev,
> + "\n pset[%d]:\n"
> + " chnum\t%d\n"
> + " slot\t%d\n"
> + " opt\t%08x\n"
> + " src\t%08x\n"
> + " dst\t%08x\n"
> + " abcnt\t%08x\n"
> + " ccnt\t%08x\n"
> + " bidx\t%08x\n"
> + " cidx\t%08x\n"
> + " lkrld\t%08x\n",
> + pset_idx, echan->ch_num, slot,
> + pset[0].opt,
> + pset[0].src,
> + pset[0].dst,
> + pset[0].a_b_cnt,
> + pset[0].ccnt,
> + pset[0].src_dst_bidx,
> + pset[0].src_dst_cidx,
> + pset[0].link_bcntrld);
> +}
> +
> /* Dispatch a queued descriptor to the controller (caller holds lock) */
> static void edma_execute(struct edma_chan *echan)
> {
>
On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
> We'd now need a separate slot just for the channel and separate
> ones for the 2 linked sets, so we make adjustments to allocate
> an extra channel accordingly.
>
> Signed-off-by: Joel Fernandes <[email protected]>
No need for a separate patch for this, just do this in the patch where
you include the two linked sets.
> ---
> drivers/dma/edma.c | 5 ++++-
> 1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
> index a242269..df50a04 100644
> --- a/drivers/dma/edma.c
> +++ b/drivers/dma/edma.c
> @@ -48,7 +48,7 @@
>
> /* Max of 16 segments per channel to conserve PaRAM slots */
> #define MAX_NR_SG 16
> -#define EDMA_MAX_SLOTS MAX_NR_SG
> +#define EDMA_MAX_SLOTS (MAX_NR_SG+1)
> #define EDMA_DESCRIPTORS 16
>
> struct edma_desc {
> @@ -311,6 +311,9 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
>
> num_slots_needed = sg_len > MAX_NR_SG ? MAX_NR_SG : sg_len;
>
> + /* Allocate one extra to account for the channel itself */
> + num_slots_needed++;
You can do:
nslots = min(MAX_NR_SG, sg_len) + 1;
Thanks,
Sekhar
On Monday 05 August 2013 04:14 PM, Joel Fernandes wrote:
> Here we implement splitting up of the total MAX number of slots
> available for a channel into 2 cyclically linked sets. Transfer
> completion Interrupts are enabled on both linked sets and respective
> handler recycles them on completion to process the next linked set.
> Both linked sets are cyclically linked to each other to ensure
> continuity of DMA operations. Interrupt handlers execute asynchronously
> to the EDMA events and recycles the linked sets at the right time,
> as a result EDMA is not blocked or dependent on interrupts and DMA
> continues till the end of the SG-lists without any interruption.
>
> Suggested-by: Sekhar Nori <[email protected]>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> drivers/dma/edma.c | 157 +++++++++++++++++++++++++++++++++++++++-------------
> 1 file changed, 118 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
> index df50a04..70923a2 100644
> --- a/drivers/dma/edma.c
> +++ b/drivers/dma/edma.c
> @@ -48,6 +48,7 @@
>
> /* Max of 16 segments per channel to conserve PaRAM slots */
> #define MAX_NR_SG 16
> +#define MAX_NR_LS (MAX_NR_SG >> 1)
> #define EDMA_MAX_SLOTS (MAX_NR_SG+1)
> #define EDMA_DESCRIPTORS 16
>
> @@ -57,6 +58,7 @@ struct edma_desc {
> int absync;
> int pset_nr;
> int total_processed;
> + int next_setup_linkset;
> struct edmacc_param pset[0];
> };
>
> @@ -140,7 +142,9 @@ static void edma_execute(struct edma_chan *echan)
> struct edma_desc *edesc;
> struct device *dev = echan->vchan.chan.device->dev;
>
> - int i, j, total_left, total_process;
> + int i, total_left, total_link_set;
> + int ls_cur_off, ls_next_off, slot_off;
> + struct edmacc_param tmp_param;
>
> /* If either we processed all psets or we're still not started */
> if (!echan->edesc ||
> @@ -159,48 +163,121 @@ static void edma_execute(struct edma_chan *echan)
>
> /* Find out how many left */
> total_left = edesc->pset_nr - edesc->total_processed;
> - total_process = total_left > MAX_NR_SG ? MAX_NR_SG : total_left;
> -
> -
> - /* Write descriptor PaRAM set(s) */
> - for (i = 0; i < total_process; i++) {
> - j = i + edesc->total_processed;
> - edma_write_slot(echan->slot[i], &edesc->pset[j]);
> - dev_dbg(echan->vchan.chan.device->dev,
> - "\n pset[%d]:\n"
> - " chnum\t%d\n"
> - " slot\t%d\n"
> - " opt\t%08x\n"
> - " src\t%08x\n"
> - " dst\t%08x\n"
> - " abcnt\t%08x\n"
> - " ccnt\t%08x\n"
> - " bidx\t%08x\n"
> - " cidx\t%08x\n"
> - " lkrld\t%08x\n",
> - j, echan->ch_num, echan->slot[i],
> - edesc->pset[j].opt,
> - edesc->pset[j].src,
> - edesc->pset[j].dst,
> - edesc->pset[j].a_b_cnt,
> - edesc->pset[j].ccnt,
> - edesc->pset[j].src_dst_bidx,
> - edesc->pset[j].src_dst_cidx,
> - edesc->pset[j].link_bcntrld);
> - /* Link to the previous slot if not the last set */
> - if (i != (total_process - 1))
> + total_link_set = total_left > MAX_NR_LS ? MAX_NR_LS : total_left;
The name you gave here sounds like this is defining total number of
linked PaRAM sets. Rather this is actually tracking the number of PaRAM
sets (slots) in current linked set, correct? Then may be just call it
'nslots' or even 'num_slots'? There are just too many variables with
"total" prefix to keep track of in this function!
> +
> + /* First time, setup 2 cyclically linked sets, each containing half
> + the slots allocated for this channel */
> + if (edesc->total_processed == 0) {
We dont need to check for this case for every DMA_COMPLETE interrupt.
May be move the initial setup to another function called from
edma_issue_pending()?
> + for (i = 0; i < total_link_set; i++) {
> + edma_write_slot(echan->slot[i+1], &edesc->pset[i]);
> +
> + if (i != total_link_set - 1) {
> + edma_link(echan->slot[i+1], echan->slot[i+2]);
> + dump_pset(echan, echan->slot[i+1],
> + edesc->pset, i);
> + }
> + }
> +
> + edesc->total_processed += total_link_set;
> +
> + total_left = edesc->pset_nr - edesc->total_processed;
> +
> + total_link_set = total_left > MAX_NR_LS ?
> + MAX_NR_LS : total_left;
> +
> + if (total_link_set) {
> + /* Don't setup interrupt for first linked set for cases
> + where total pset_nr is strictly within MAX_NR size */
See Documentation/CodingStyle for multi-line commenting style.
> + if (total_left > total_link_set)
> + edma_enable_interrupt(echan->slot[i]);
> +
> + /* Setup link between linked set 0 to set 1 */
> edma_link(echan->slot[i], echan->slot[i+1]);
> - /* Final pset links to the dummy pset */
> - else
> +
> + dump_pset(echan, echan->slot[i], edesc->pset, i-1);
> +
> + /* Write out linked set 1 */
> + for (; i < total_link_set + MAX_NR_LS; i++) {
> + edma_write_slot(echan->slot[i+1],
> + &edesc->pset[i]);
> +
> + if (i != total_link_set + MAX_NR_LS - 1) {
> + edma_link(echan->slot[i+1],
> + echan->slot[i+2]);
> + dump_pset(echan, echan->slot[i+1],
> + edesc->pset, i);
> + }
> + }
> +
> + edesc->total_processed += total_link_set;
> + total_left = edesc->pset_nr - edesc->total_processed;
There is way too much duplication of code here mainly because you
decided not to loop twice in the course of setting up the two linked
sets. Can you use a loop instead?
> +
> + if (total_left)
> + /* Setup a link from linked set 1 to set 0 */
> + edma_link(echan->slot[i], echan->slot[1]);
If you have more SGs to service at the end of setting up the two linked
sets, you should stop right there and wait for CPU to recycle the linked
sets. Right now you are setup for re-DMAing old data.
You wont hit this issue in testing because you have setup an interrupt
for LS0 and that will most likely service before LS1 completes but we
cannot rely on that timing.
Just link to dummy at end of LS1 to stall the DMA and wait for the
completion handler to come-in and restart the DMA after recycling LS0.
I haven't reviewed rest of the patch. Lets make sure we have a common
understanding here.
Thanks,
Sekhar
On Mon, Aug 12, 2013 at 11:26:49AM +0530, Sekhar Nori wrote:
> No need for a separate patch for this, just do this in the patch where
> you include the two linked sets.
Can you guys please think about the CC lists you're using for these
patch serieses? I've certainly no interest in random patches to the
DaVinci DMA controllers, and I suspect the same is true for most of the
people on the CC list.
On 08/12/2013 08:31 AM, Mark Brown wrote:
> On Mon, Aug 12, 2013 at 11:26:49AM +0530, Sekhar Nori wrote:
>
>> No need for a separate patch for this, just do this in the patch where
>> you include the two linked sets.
>
> Can you guys please think about the CC lists you're using for these
> patch serieses? I've certainly no interest in random patches to the
> DaVinci DMA controllers, and I suspect the same is true for most of the
> people on the CC list.
>
Sorry. I was actually getting a sense of this. What I did initially was
I copied the CC list from last years work on the same series hoping that
it would be a complete mail chain. Bad judgement on my part.
I should have paid more attention to Mark's rants about his inbox
getting spammed with this series in those old threads ;)
I guess moving forward I will use get_maintainer.pl and refine my
git-send scripts with only interested folks. Any words of advice on when
to and when not to CC someone remotely connected to a series is welcome.
@Sekhar, let's keep only linux-omap, linux-davinci and few other
interested folks on the CC chain for the rest of review on the series.
Thanks,
-Joel
Dropped quite a few from the CC list...
On 08/12/2013 02:15 AM, Sekhar Nori wrote:
> On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
>> Changes are made here for configuring existing parameters to support
>> DMA'ing them out in batches as needed.
>>
>> Also allocate as many as slots as needed by the SG list, but not more
>> than MAX_NR_SG. Then these slots will be reused accordingly.
>> For ex, if MAX_NR_SG=10, and number of SG entries is 40, still only
>> 10 slots will be allocated to DMA the entire SG list of size 40.
>>
>> Signed-off-by: Joel Fernandes <[email protected]>
>> ---
>> drivers/dma/edma.c | 14 +++++++++++---
>> 1 file changed, 11 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
>> index 5f3e532..7b0853c 100644
>> --- a/drivers/dma/edma.c
>> +++ b/drivers/dma/edma.c
>> @@ -222,9 +222,9 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
>> enum dma_slave_buswidth dev_width;
>> u32 burst;
>> struct scatterlist *sg;
>> - int i;
>> int acnt, bcnt, ccnt, src, dst, cidx;
>> int src_bidx, dst_bidx, src_cidx, dst_cidx;
>> + int i, num_slots_needed;
>
> 'nslots' is more to my liking. Better keep variable names short.
>
>>
>> if (unlikely(!echan || !sgl || !sg_len))
>> return NULL;
>> @@ -262,8 +262,11 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
>>
>> edesc->pset_nr = sg_len;
>>
>> - for_each_sg(sgl, sg, sg_len, i) {
>> - /* Allocate a PaRAM slot, if needed */
>> + /* Allocate a PaRAM slot, if needed */
>> +
>> + num_slots_needed = sg_len > MAX_NR_SG ? MAX_NR_SG : sg_len;
>
> nslots = min(MAX_NR_SG, sg_len);
I agree the original naming was quite long. I would rather using
something more descriptive though than nslots. How does slots_needed sound?
Thanks,
-Joel
Responding to other comments in this post,
On 08/12/2013 02:15 AM, Sekhar Nori wrote:
[..]
>> if (unlikely(!echan || !sgl || !sg_len))
>> return NULL;
>> @@ -262,8 +262,11 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
>>
>> edesc->pset_nr = sg_len;
>>
>> - for_each_sg(sgl, sg, sg_len, i) {
>> - /* Allocate a PaRAM slot, if needed */
>> + /* Allocate a PaRAM slot, if needed */
>> +
>> + num_slots_needed = sg_len > MAX_NR_SG ? MAX_NR_SG : sg_len;
>
> nslots = min(MAX_NR_SG, sg_len);
Changed to this, with the +1
>
>> +
>> + for (i = 0; i < num_slots_needed; i++) {
>> if (echan->slot[i] < 0) {
>> echan->slot[i] =
>> edma_alloc_slot(EDMA_CTLR(echan->ch_num),
>> @@ -273,6 +276,10 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
>> return NULL;
>> }
>> }
>> + }
>> +
>> + /* Configure PaRAM sets for each SG */
>> + for_each_sg(sgl, sg, sg_len, i) {
>>
>> acnt = dev_width;
>>
>> @@ -330,6 +337,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
>> /* Configure A or AB synchronized transfers */
>> if (edesc->absync)
>> edesc->pset[i].opt |= SYNCDIM;
>> +
>
> Random extra newline.
Removing..
>
> The patch as such is fine, but I dont think it makes lot of sense
> standalone. This needs to be merged into the patch where you actually
> handle the entire SG list in batches.
I think it does actually, this patch just takes care of preparing the
param set list correctly and allocating slots. It doesn't the actual DMA
or take part in the algorithm. As a result, the patch can be reused
incase in future the main algorithm is rewritten in a subsequent series.
Further this patch was reused straight from old implementation so it
proved to be useful being a separate patch last time. I also plan to
rewrite just this functionality in the future.
Thanks,
-Joel
On 08/12/2013 06:55 PM, Joel Fernandes wrote:
> Dropped quite a few from the CC list...
>
> On 08/12/2013 02:15 AM, Sekhar Nori wrote:
>> On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
>>> Changes are made here for configuring existing parameters to support
>>> DMA'ing them out in batches as needed.
>>>
>>> Also allocate as many as slots as needed by the SG list, but not more
>>> than MAX_NR_SG. Then these slots will be reused accordingly.
>>> For ex, if MAX_NR_SG=10, and number of SG entries is 40, still only
>>> 10 slots will be allocated to DMA the entire SG list of size 40.
>>>
>>> Signed-off-by: Joel Fernandes <[email protected]>
>>> ---
>>> drivers/dma/edma.c | 14 +++++++++++---
>>> 1 file changed, 11 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
>>> index 5f3e532..7b0853c 100644
>>> --- a/drivers/dma/edma.c
>>> +++ b/drivers/dma/edma.c
>>> @@ -222,9 +222,9 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
>>> enum dma_slave_buswidth dev_width;
>>> u32 burst;
>>> struct scatterlist *sg;
>>> - int i;
>>> int acnt, bcnt, ccnt, src, dst, cidx;
>>> int src_bidx, dst_bidx, src_cidx, dst_cidx;
>>> + int i, num_slots_needed;
>>
>> 'nslots' is more to my liking. Better keep variable names short.
>>
>>>
>>> if (unlikely(!echan || !sgl || !sg_len))
>>> return NULL;
>>> @@ -262,8 +262,11 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
>>>
>>> edesc->pset_nr = sg_len;
>>>
>>> - for_each_sg(sgl, sg, sg_len, i) {
>>> - /* Allocate a PaRAM slot, if needed */
>>> + /* Allocate a PaRAM slot, if needed */
>>> +
>>> + num_slots_needed = sg_len > MAX_NR_SG ? MAX_NR_SG : sg_len;
>>
>> nslots = min(MAX_NR_SG, sg_len);
>
> I agree the original naming was quite long. I would rather using
> something more descriptive though than nslots. How does slots_needed sound?
Sorry for the noise, nslots is fine and I've changed it to the same.
-Joel
On 08/12/2013 01:56 PM, Sekhar Nori wrote:
> On Monday 05 August 2013 04:14 PM, Joel Fernandes wrote:
>> Here we implement splitting up of the total MAX number of slots
>> available for a channel into 2 cyclically linked sets. Transfer
>> completion Interrupts are enabled on both linked sets and respective
>> handler recycles them on completion to process the next linked set.
>> Both linked sets are cyclically linked to each other to ensure
>> continuity of DMA operations. Interrupt handlers execute asynchronously
>> to the EDMA events and recycles the linked sets at the right time,
>> as a result EDMA is not blocked or dependent on interrupts and DMA
>> continues till the end of the SG-lists without any interruption.
>>
>> Suggested-by: Sekhar Nori <[email protected]>
>> Signed-off-by: Joel Fernandes <[email protected]>
>> ---
>> drivers/dma/edma.c | 157 +++++++++++++++++++++++++++++++++++++++-------------
>> 1 file changed, 118 insertions(+), 39 deletions(-)
>>
>> diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
>> index df50a04..70923a2 100644
>> --- a/drivers/dma/edma.c
>> +++ b/drivers/dma/edma.c
>> @@ -48,6 +48,7 @@
>>
>> /* Max of 16 segments per channel to conserve PaRAM slots */
>> #define MAX_NR_SG 16
>> +#define MAX_NR_LS (MAX_NR_SG >> 1)
>> #define EDMA_MAX_SLOTS (MAX_NR_SG+1)
>> #define EDMA_DESCRIPTORS 16
>>
>> @@ -57,6 +58,7 @@ struct edma_desc {
>> int absync;
>> int pset_nr;
>> int total_processed;
>> + int next_setup_linkset;
>> struct edmacc_param pset[0];
>> };
>>
>> @@ -140,7 +142,9 @@ static void edma_execute(struct edma_chan *echan)
>> struct edma_desc *edesc;
>> struct device *dev = echan->vchan.chan.device->dev;
>>
>> - int i, j, total_left, total_process;
>> + int i, total_left, total_link_set;
>> + int ls_cur_off, ls_next_off, slot_off;
>> + struct edmacc_param tmp_param;
>>
>> /* If either we processed all psets or we're still not started */
>> if (!echan->edesc ||
>> @@ -159,48 +163,121 @@ static void edma_execute(struct edma_chan *echan)
>>
>> /* Find out how many left */
>> total_left = edesc->pset_nr - edesc->total_processed;
>> - total_process = total_left > MAX_NR_SG ? MAX_NR_SG : total_left;
>> -
>> -
>> - /* Write descriptor PaRAM set(s) */
>> - for (i = 0; i < total_process; i++) {
>> - j = i + edesc->total_processed;
>> - edma_write_slot(echan->slot[i], &edesc->pset[j]);
>> - dev_dbg(echan->vchan.chan.device->dev,
>> - "\n pset[%d]:\n"
>> - " chnum\t%d\n"
>> - " slot\t%d\n"
>> - " opt\t%08x\n"
>> - " src\t%08x\n"
>> - " dst\t%08x\n"
>> - " abcnt\t%08x\n"
>> - " ccnt\t%08x\n"
>> - " bidx\t%08x\n"
>> - " cidx\t%08x\n"
>> - " lkrld\t%08x\n",
>> - j, echan->ch_num, echan->slot[i],
>> - edesc->pset[j].opt,
>> - edesc->pset[j].src,
>> - edesc->pset[j].dst,
>> - edesc->pset[j].a_b_cnt,
>> - edesc->pset[j].ccnt,
>> - edesc->pset[j].src_dst_bidx,
>> - edesc->pset[j].src_dst_cidx,
>> - edesc->pset[j].link_bcntrld);
>> - /* Link to the previous slot if not the last set */
>> - if (i != (total_process - 1))
>
>> + total_link_set = total_left > MAX_NR_LS ? MAX_NR_LS : total_left;
>
> The name you gave here sounds like this is defining total number of
> linked PaRAM sets. Rather this is actually tracking the number of PaRAM
> sets (slots) in current linked set, correct? Then may be just call it
> 'nslots' or even 'num_slots'? There are just too many variables with
> "total" prefix to keep track of in this function!
I would rather just leave this naming alone. The code is quite self
documenting: total_link_set means "Calculate what's the total size of a
Linkset, or total no.of slots in a linkset we need". This naming is fine
in my opinion and doesn't hurt line size at all, instead improving code
readability. I could dump the _ between link and set to make it:
total_linkset if that makes it any easier.
I agree there are too many variables in this function, but they each
serve a different purpose and required to implement the algorithm, which
is precisely I made them naming a bit more descriptive.
>
>> +
>> + /* First time, setup 2 cyclically linked sets, each containing half
>> + the slots allocated for this channel */
>> + if (edesc->total_processed == 0) {
>
> We dont need to check for this case for every DMA_COMPLETE interrupt.
> May be move the initial setup to another function called from
> edma_issue_pending()?
But how? That would only change the code to (?):
if (edesc->total_processed == 0) {
issue_pending();
}
Further it maybe appear that this case is uncommon, but it is a very
common case. Most SG transfers are within the SG limit, though at times
the else case can execute a lot too.
>> + for (i = 0; i < total_link_set; i++) {
>> + edma_write_slot(echan->slot[i+1], &edesc->pset[i]);
>> +
>> + if (i != total_link_set - 1) {
>> + edma_link(echan->slot[i+1], echan->slot[i+2]);
>> + dump_pset(echan, echan->slot[i+1],
>> + edesc->pset, i);
>> + }
>> + }
>> +
>> + edesc->total_processed += total_link_set;
>> +
>> + total_left = edesc->pset_nr - edesc->total_processed;
>> +
>> + total_link_set = total_left > MAX_NR_LS ?
>> + MAX_NR_LS : total_left;
>> +
>> + if (total_link_set) {
>> + /* Don't setup interrupt for first linked set for cases
>> + where total pset_nr is strictly within MAX_NR size */
>
> See Documentation/CodingStyle for multi-line commenting style.
Ok thanks, changed accordingly.
>> + if (total_left > total_link_set)
>> + edma_enable_interrupt(echan->slot[i]);
>> +
>> + /* Setup link between linked set 0 to set 1 */
>> edma_link(echan->slot[i], echan->slot[i+1]);
>> - /* Final pset links to the dummy pset */
>> - else
>> +
>> + dump_pset(echan, echan->slot[i], edesc->pset, i-1);
>> +
>> + /* Write out linked set 1 */
>> + for (; i < total_link_set + MAX_NR_LS; i++) {
>> + edma_write_slot(echan->slot[i+1],
>> + &edesc->pset[i]);
>> +
>> + if (i != total_link_set + MAX_NR_LS - 1) {
>> + edma_link(echan->slot[i+1],
>> + echan->slot[i+2]);
>> + dump_pset(echan, echan->slot[i+1],
>> + edesc->pset, i);
>> + }
>> + }
>> +
>> + edesc->total_processed += total_link_set;
>> + total_left = edesc->pset_nr - edesc->total_processed;
>
> There is way too much duplication of code here mainly because you
> decided not to loop twice in the course of setting up the two linked
> sets. Can you use a loop instead?
I tried to do this in a loop, its not possible without making the code
more unreadable and introducing more variables.
Further the follow 3 conditions have to be incorporated into the loop
some how which kind of makes it messy.. right now it is linearly
determined which case to execute.
/* Setup a link from linked set 1 to set 0 */
/* Setup a link between linked set 1 to dummy */
/* First linked set was enough, simply link to dummy */
Since it is just a couple of lines more, I am more to the favor of
keeping the code readable than saving a few lines (for a loop of only 2
iterations) introducing more variables and making it look hackish. There
is a good chance in future that if implemented in such a way that I have
to spend quite a bit of time deciphering it.
>> +
>> + if (total_left)
>> + /* Setup a link from linked set 1 to set 0 */
>> + edma_link(echan->slot[i], echan->slot[1]);
>
> If you have more SGs to service at the end of setting up the two linked
> sets, you should stop right there and wait for CPU to recycle the linked
> sets. Right now you are setup for re-DMAing old data.
The above linking you're quoting is done in advance _but_, before the
link is traversed, it is _guaranteed_ that the linkset being traversed
into will be recycled. This is the basis of the whole algorithm and
making sure that we never stall. There never ever will be a case where
we re-DMA old data because of the guarantee that the recycling will take
place before the traversal.
Further FWIW, interrupt takes few 100s microseconds to execute, where as
DMA is seen to take milliseconds from 1 SG entry to another in my testing.
> You wont hit this issue in testing because you have setup an interrupt
> for LS0 and that will most likely service before LS1 completes but we
> cannot rely on that timing.
This goes back to my first patch series where we stall. That doesn't
make any sense. In this patch series, we don't want DMA to stall at any
cost.
> Just link to dummy at end of LS1 to stall the DMA and wait for the
> completion handler to come-in and restart the DMA after recycling LS0.
Nope! Linking to dummy will absorb the events and the events will never
get triggered again. Trust me I have already done what you are saying
and it doesn't work.
> I haven't reviewed rest of the patch. Lets make sure we have a common
> understanding here.
Sure, thanks.
-Joel
On 08/12/2013 12:52 AM, Sekhar Nori wrote:
> On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
>> Previously, such a dump function was used but it wasn't reading
>> from the PaRAM, rather just from a edmacc_param structure, we
>> add a helpful function for debugging that directly reads from
>> the PaRAM and gives the current state correctly (including links
>> and interrupt information).
>>
>> Signed-off-by: Joel Fernandes <[email protected]>
>
> You should convert existing instances of PaRAM set dump to use this new
> function along with introducing it.
Well, the old dump callers were thrown out with completely rewritten
code. This rewritten code already contains the dump calls. Are you
saying pull out those dump pset calls into a separate patch?
Thanks,
-Joel
> Sekhar
>
>> ---
>> drivers/dma/edma.c | 31 +++++++++++++++++++++++++++++++
>> 1 file changed, 31 insertions(+)
>>
>> diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
>> index 080d669..a242269 100644
>> --- a/drivers/dma/edma.c
>> +++ b/drivers/dma/edma.c
>> @@ -102,6 +102,37 @@ static void edma_desc_free(struct virt_dma_desc *vdesc)
>> kfree(container_of(vdesc, struct edma_desc, vdesc));
>> }
>>
>> +static inline void dump_pset(struct edma_chan *echan, int slot,
>> + struct edmacc_param *pset_unused, int pset_idx)
>> +{
>> + struct edmacc_param pset_local, *pset;
>> + pset = &pset_local;
>> +
>> + edma_read_slot(slot, pset);
>> +
>> + dev_dbg(echan->vchan.chan.device->dev,
>> + "\n pset[%d]:\n"
>> + " chnum\t%d\n"
>> + " slot\t%d\n"
>> + " opt\t%08x\n"
>> + " src\t%08x\n"
>> + " dst\t%08x\n"
>> + " abcnt\t%08x\n"
>> + " ccnt\t%08x\n"
>> + " bidx\t%08x\n"
>> + " cidx\t%08x\n"
>> + " lkrld\t%08x\n",
>> + pset_idx, echan->ch_num, slot,
>> + pset[0].opt,
>> + pset[0].src,
>> + pset[0].dst,
>> + pset[0].a_b_cnt,
>> + pset[0].ccnt,
>> + pset[0].src_dst_bidx,
>> + pset[0].src_dst_cidx,
>> + pset[0].link_bcntrld);
>> +}
>> +
>> /* Dispatch a queued descriptor to the controller (caller holds lock) */
>> static void edma_execute(struct edma_chan *echan)
>> {
>>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
On 08/12/2013 12:56 AM, Sekhar Nori wrote:
> On Monday 05 August 2013 09:44 PM, Joel Fernandes wrote:
>> We'd now need a separate slot just for the channel and separate
>> ones for the 2 linked sets, so we make adjustments to allocate
>> an extra channel accordingly.
>>
>> Signed-off-by: Joel Fernandes <[email protected]>
>
> No need for a separate patch for this, just do this in the patch where
> you include the two linked sets.
Ok, I dropped this patch. Anyway, most of this patch has gone away now
because of other changes you suggested.
Thanks,
-Joel
Following offline discussions with Sekhar, we discussed some ideas to
change a few things in this patch series to make it fail-safe. As such,
the only changes we are making for v4 will be to not cyclically link
immediately but doing so only once the ISR has finished setup (apart
from other style cleanups).
Any conditions where we are not able to modify the link in time (due to
heavily loaded system) will be detected and reported by the use of
linking to a NULL set.
The new approach will be fast because of no requirement to stall or
wait, and any DMA issues with heavily loaded systems can be detected as
error conditions.
This should architecturally be the final version of the patch series to
add DMA support for SG lists of any length.
Thanks,
-Joel
On 08/05/2013 11:14 AM, Joel Fernandes wrote:
> Here is a more improved approach for DMA support of SG lists of any length
> in the EDMA DMA Engine driver.
>
> In the previous approach [1] we depended on error interrupts to detect
> missed events and manually retrigger them, however as discussed in [2],
> there are concerns this can be trouble some for high speed peripherals
> which may need a more real-time response from the DMA controller.
>
> In this approach, we divide the total no of MAX slots per channel, into
> 2 linked sets which are cyclically linked to each other (the cyclic
> link between the 2 sets make sure that the DMA is continuous till the whole
> SG list has exhausted). We then enable completion interrupts on both linked
> sets which results in recyling/preparing respective linked set with the
> next set of SG entries. The interrupt handler executes in parallel while
> the EDMA controller DMA's the next list. This results in no interruption.
>
> Special handling is done for first linked set (as we set up both linked
> sets initially before starting with the DMA), and last one where the cyclic
> link has to be broken and a link to the Dummy slot has to be created.
> Also we keep track of whether all pending DMA operations have completed
> before we can mark it as complete.
>
> [1] https://lkml.org/lkml/2013/7/29/312
> [2] https://lkml.org/lkml/2013/7/30/54
>
> Joel Fernandes (12):
> dma: edma: Setup parameters to DMA MAX_NR_SG at a time
> ARM: edma: Don't clear EMR of channel in edma_stop
> dma: edma: remove limits on number of slots
> dma: edma: Write out and handle MAX_NR_SG at a given time
> ARM: edma: Add function to enable interrupt for a PaRAM slot
> ARM: edma: Add pr_debug in edma_link
> dma: edma: Add function to dump a PaRAM set from PaRAM
> dma: edma: Add one more required slot to MAX slots
> dma: edma: Implement multiple linked sets for continuity
> dma: edma: Check if MAX_NR_SG is even in prep function
> dma: edma: Keep tracking of Pending interrupts (pending_acks)
> dma: edma: Return if nothing left todo in edma_execute
>
> arch/arm/common/edma.c | 18 ++-
> drivers/dma/edma.c | 279 +++++++++++++++++++++++++++++-------
> include/linux/platform_data/edma.h | 1 +
> 3 files changed, 243 insertions(+), 55 deletions(-)
>