2021-04-09 17:58:58

by Radhey Shyam Pandey

[permalink] [raw]
Subject: [RFC v2 PATCH 0/7] Xilinx DMA enhancements and optimization


Some background about the patch series: Xilinx Axi Ethernet device driver
(xilinx_axienet_main.c) currently has axi-dma code inside it. The goal
is to refactor axiethernet driver and use existing AXI DMA driver using
DMAEngine API.

This patchset does feature addition and optimization to support
axidma integration with axiethernet network driver. Once axidma
version is accepted mcdma specific changes will be added in
followup version.

This series is based on dmaengine tree commit: #a38fd8748464

Changes for v2:
- Use metadata API[1] for passing metadata from dma to netdev client.
- Read irq-delay from DT.
- Remove desc_callback_valid check.
- Addressed RFC v1 comments[2].
- Minor code refactoring.

Comments, suggestions are very welcome!


[1] https://www.spinics.net/lists/dmaengine/msg16583.html
[2] https://www.spinics.net/lists/dmaengine/msg15208.html

Radhey Shyam Pandey (7):
dt-bindings: dmaengine: xilinx_dma: Add xlnx,axistream-connected
property
dt-bindings: dmaengine: xilinx_dma: Add xlnx,irq-delay property
dmaengine: xilinx_dma: Pass AXI4-Stream control words to dma client
dmaengine: xilinx_dma: Increase AXI DMA transaction segment count
dmaengine: xilinx_dma: Freeup active list based on descriptor
completion bit
dmaengine: xilinx_dma: Use tasklet_hi_schedule for timing critical
usecase
dmaengine: xilinx_dma: Program interrupt delay timeout

.../devicetree/bindings/dma/xilinx/xilinx_dma.txt | 4 ++
drivers/dma/xilinx/xilinx_dma.c | 68 ++++++++++++++++++----
2 files changed, 61 insertions(+), 11 deletions(-)

--
2.7.4


2021-04-09 17:59:12

by Radhey Shyam Pandey

[permalink] [raw]
Subject: [RFC v2 PATCH 7/7] dmaengine: xilinx_dma: Program interrupt delay timeout

Program IRQDelay for AXI DMA. The interrupt timeout mechanism causes
the DMA engine to generate an interrupt after the delay time period
has expired. It enables dmaengine to respond in real-time even though
interrupt coalescing is configured. It also remove the placeholder
for delay interrupt and merge it with frame completion interrupt.
Since by default interrupt delay timeout is disabled this feature
addition has no functional impact on VDMA and CDMA IP's.

Signed-off-by: Radhey Shyam Pandey <[email protected]>
---
Changes for v2:
- Read irq delay timeout value from DT.
- Merge interrupt processing for frame done and delay interrupt.
---
drivers/dma/xilinx/xilinx_dma.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index a2ea2d649332..0c0dc9882a01 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -173,8 +173,10 @@
#define XILINX_DMA_MAX_TRANS_LEN_MAX 23
#define XILINX_DMA_V2_MAX_TRANS_LEN_MAX 26
#define XILINX_DMA_CR_COALESCE_MAX GENMASK(23, 16)
+#define XILINX_DMA_CR_DELAY_MAX GENMASK(31, 24)
#define XILINX_DMA_CR_CYCLIC_BD_EN_MASK BIT(4)
#define XILINX_DMA_CR_COALESCE_SHIFT 16
+#define XILINX_DMA_CR_DELAY_SHIFT 24
#define XILINX_DMA_BD_SOP BIT(27)
#define XILINX_DMA_BD_EOP BIT(26)
#define XILINX_DMA_BD_COMP_MASK BIT(31)
@@ -410,6 +412,7 @@ struct xilinx_dma_tx_descriptor {
* @stop_transfer: Differentiate b/w DMA IP's quiesce
* @tdest: TDEST value for mcdma
* @has_vflip: S2MM vertical flip
+ * @irq_delay: Interrupt delay timeout
*/
struct xilinx_dma_chan {
struct xilinx_dma_device *xdev;
@@ -447,6 +450,7 @@ struct xilinx_dma_chan {
int (*stop_transfer)(struct xilinx_dma_chan *chan);
u16 tdest;
bool has_vflip;
+ u8 irq_delay;
};

/**
@@ -1555,6 +1559,9 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
if (chan->has_sg)
xilinx_write(chan, XILINX_DMA_REG_CURDESC,
head_desc->async_tx.phys);
+ reg &= ~XILINX_DMA_CR_DELAY_MAX;
+ reg |= chan->irq_delay << XILINX_DMA_CR_DELAY_SHIFT;
+ dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);

xilinx_dma_start(chan);

@@ -1877,15 +1884,8 @@ static irqreturn_t xilinx_dma_irq_handler(int irq, void *data)
}
}

- if (status & XILINX_DMA_DMASR_DLY_CNT_IRQ) {
- /*
- * Device takes too long to do the transfer when user requires
- * responsiveness.
- */
- dev_dbg(chan->dev, "Inter-packet latency too long\n");
- }
-
- if (status & XILINX_DMA_DMASR_FRM_CNT_IRQ) {
+ if (status & (XILINX_DMA_DMASR_FRM_CNT_IRQ |
+ XILINX_DMA_DMASR_DLY_CNT_IRQ)) {
spin_lock(&chan->lock);
xilinx_dma_complete_descriptor(chan);
chan->idle = true;
@@ -2802,6 +2802,8 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
/* Retrieve the channel properties from the device tree */
has_dre = of_property_read_bool(node, "xlnx,include-dre");

+ of_property_read_u8(node, "xlnx,irq-delay", &chan->irq_delay);
+
chan->genlock = of_property_read_bool(node, "xlnx,genlock-mode");

err = of_property_read_u32(node, "xlnx,datawidth", &value);
--
2.7.4

2021-04-09 17:59:30

by Radhey Shyam Pandey

[permalink] [raw]
Subject: [RFC v2 PATCH 5/7] dmaengine: xilinx_dma: Freeup active list based on descriptor completion bit

AXIDMA IP in SG mode sets completion bit to 1 when the transfer is
completed. Read this bit to move descriptor from active list to the
done list. This feature is needed when interrupt delay timeout and
IRQThreshold is enabled i.e Dly_IrqEn is triggered w/o completing
interrupt threshold.

Signed-off-by: Radhey Shyam Pandey <[email protected]>
---
- Check BD completion bit only for SG mode.
- Modify the logic to have early return path.
---
drivers/dma/xilinx/xilinx_dma.c | 7 +++++++
1 file changed, 7 insertions(+)

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 890bf46b36e5..f2305a73cb91 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -177,6 +177,7 @@
#define XILINX_DMA_CR_COALESCE_SHIFT 16
#define XILINX_DMA_BD_SOP BIT(27)
#define XILINX_DMA_BD_EOP BIT(26)
+#define XILINX_DMA_BD_COMP_MASK BIT(31)
#define XILINX_DMA_COALESCE_MAX 255
#define XILINX_DMA_NUM_DESCS 512
#define XILINX_DMA_NUM_APP_WORDS 5
@@ -1683,12 +1684,18 @@ static void xilinx_dma_issue_pending(struct dma_chan *dchan)
static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan *chan)
{
struct xilinx_dma_tx_descriptor *desc, *next;
+ struct xilinx_axidma_tx_segment *seg;

/* This function was invoked with lock held */
if (list_empty(&chan->active_list))
return;

list_for_each_entry_safe(desc, next, &chan->active_list, node) {
+ /* TODO: remove hardcoding for axidma_tx_segment */
+ seg = list_last_entry(&desc->segments,
+ struct xilinx_axidma_tx_segment, node);
+ if (!(seg->hw.status & XILINX_DMA_BD_COMP_MASK) && chan->has_sg)
+ break;
if (chan->has_sg && chan->xdev->dma_config->dmatype !=
XDMA_TYPE_VDMA)
desc->residue = xilinx_dma_get_residue(chan, desc);
--
2.7.4

2021-04-09 18:00:49

by Radhey Shyam Pandey

[permalink] [raw]
Subject: [RFC v2 PATCH 6/7] dmaengine: xilinx_dma: Use tasklet_hi_schedule for timing critical usecase

Schedule tasklet with high priority to ensure that callback processing
is prioritized. It improves throughput for netdev dma clients.

Signed-off-by: Radhey Shyam Pandey <[email protected]>
--
Changes for v2:
- None
---
drivers/dma/xilinx/xilinx_dma.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index f2305a73cb91..a2ea2d649332 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -1829,7 +1829,7 @@ static irqreturn_t xilinx_mcdma_irq_handler(int irq, void *data)
spin_unlock(&chan->lock);
}

- tasklet_schedule(&chan->tasklet);
+ tasklet_hi_schedule(&chan->tasklet);
return IRQ_HANDLED;
}

--
2.7.4

2021-04-15 07:07:36

by Lars-Peter Clausen

[permalink] [raw]
Subject: Re: [RFC v2 PATCH 0/7] Xilinx DMA enhancements and optimization

On 4/9/21 7:55 PM, Radhey Shyam Pandey wrote:
> Some background about the patch series: Xilinx Axi Ethernet device driver
> (xilinx_axienet_main.c) currently has axi-dma code inside it. The goal
> is to refactor axiethernet driver and use existing AXI DMA driver using
> DMAEngine API.

This is pretty neat! Do you have the patches that modify the AXI
Ethernet driver in a public tree somewhere, so this series can be seen
in context?

2021-04-15 07:13:02

by Lars-Peter Clausen

[permalink] [raw]
Subject: Re: [RFC v2 PATCH 5/7] dmaengine: xilinx_dma: Freeup active list based on descriptor completion bit

On 4/9/21 7:56 PM, Radhey Shyam Pandey wrote:
> AXIDMA IP in SG mode sets completion bit to 1 when the transfer is
> completed. Read this bit to move descriptor from active list to the
> done list. This feature is needed when interrupt delay timeout and
> IRQThreshold is enabled i.e Dly_IrqEn is triggered w/o completing
> interrupt threshold.
>
> Signed-off-by: Radhey Shyam Pandey <[email protected]>
> ---
> - Check BD completion bit only for SG mode.
> - Modify the logic to have early return path.
> ---
> drivers/dma/xilinx/xilinx_dma.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
> index 890bf46b36e5..f2305a73cb91 100644
> --- a/drivers/dma/xilinx/xilinx_dma.c
> +++ b/drivers/dma/xilinx/xilinx_dma.c
> @@ -177,6 +177,7 @@
> #define XILINX_DMA_CR_COALESCE_SHIFT 16
> #define XILINX_DMA_BD_SOP BIT(27)
> #define XILINX_DMA_BD_EOP BIT(26)
> +#define XILINX_DMA_BD_COMP_MASK BIT(31)
> #define XILINX_DMA_COALESCE_MAX 255
> #define XILINX_DMA_NUM_DESCS 512
> #define XILINX_DMA_NUM_APP_WORDS 5
> @@ -1683,12 +1684,18 @@ static void xilinx_dma_issue_pending(struct dma_chan *dchan)
> static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan *chan)
> {
> struct xilinx_dma_tx_descriptor *desc, *next;
> + struct xilinx_axidma_tx_segment *seg;
>
> /* This function was invoked with lock held */
> if (list_empty(&chan->active_list))
> return;
>
> list_for_each_entry_safe(desc, next, &chan->active_list, node) {
> + /* TODO: remove hardcoding for axidma_tx_segment */
> + seg = list_last_entry(&desc->segments,
> + struct xilinx_axidma_tx_segment, node);
This needs to be fixed before this can be merged as it right now will
break the non AXIDMA variants.
> + if (!(seg->hw.status & XILINX_DMA_BD_COMP_MASK) && chan->has_sg)
> + break;
> if (chan->has_sg && chan->xdev->dma_config->dmatype !=
> XDMA_TYPE_VDMA)
> desc->residue = xilinx_dma_get_residue(chan, desc);


2021-04-15 07:13:26

by Lars-Peter Clausen

[permalink] [raw]
Subject: Re: [RFC v2 PATCH 6/7] dmaengine: xilinx_dma: Use tasklet_hi_schedule for timing critical usecase

On 4/9/21 7:56 PM, Radhey Shyam Pandey wrote:
> Schedule tasklet with high priority to ensure that callback processing
> is prioritized. It improves throughput for netdev dma clients.
Do you have specific numbers on the throughput improvement?

2021-04-15 07:26:48

by Lars-Peter Clausen

[permalink] [raw]
Subject: Re: [RFC v2 PATCH 5/7] dmaengine: xilinx_dma: Freeup active list based on descriptor completion bit

On 4/9/21 7:56 PM, Radhey Shyam Pandey wrote:
> AXIDMA IP in SG mode sets completion bit to 1 when the transfer is
> completed. Read this bit to move descriptor from active list to the
> done list. This feature is needed when interrupt delay timeout and
> IRQThreshold is enabled i.e Dly_IrqEn is triggered w/o completing
> interrupt threshold.
>
> Signed-off-by: Radhey Shyam Pandey <[email protected]>
> ---
> - Check BD completion bit only for SG mode.
> - Modify the logic to have early return path.
> ---
> drivers/dma/xilinx/xilinx_dma.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
> index 890bf46b36e5..f2305a73cb91 100644
> --- a/drivers/dma/xilinx/xilinx_dma.c
> +++ b/drivers/dma/xilinx/xilinx_dma.c
> @@ -177,6 +177,7 @@
> #define XILINX_DMA_CR_COALESCE_SHIFT 16
> #define XILINX_DMA_BD_SOP BIT(27)
> #define XILINX_DMA_BD_EOP BIT(26)
> +#define XILINX_DMA_BD_COMP_MASK BIT(31)
> #define XILINX_DMA_COALESCE_MAX 255
> #define XILINX_DMA_NUM_DESCS 512
> #define XILINX_DMA_NUM_APP_WORDS 5
> @@ -1683,12 +1684,18 @@ static void xilinx_dma_issue_pending(struct dma_chan *dchan)
> static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan *chan)
> {
> struct xilinx_dma_tx_descriptor *desc, *next;
> + struct xilinx_axidma_tx_segment *seg;
>
> /* This function was invoked with lock held */
> if (list_empty(&chan->active_list))
> return;
>
> list_for_each_entry_safe(desc, next, &chan->active_list, node) {
> + /* TODO: remove hardcoding for axidma_tx_segment */
> + seg = list_last_entry(&desc->segments,
> + struct xilinx_axidma_tx_segment, node);
> + if (!(seg->hw.status & XILINX_DMA_BD_COMP_MASK) && chan->has_sg)
> + break;
> if (chan->has_sg && chan->xdev->dma_config->dmatype !=
> XDMA_TYPE_VDMA)
> desc->residue = xilinx_dma_get_residue(chan, desc);

Since not all descriptors will be completed in this function the
`chan->idle = true;` in xilinx_dma_irq_handler() needs to be gated on
the active_list being empty.

xilinx_dma_complete_descriptor

2021-04-15 07:35:19

by Lars-Peter Clausen

[permalink] [raw]
Subject: Re: [RFC v2 PATCH 7/7] dmaengine: xilinx_dma: Program interrupt delay timeout

On 4/9/21 7:56 PM, Radhey Shyam Pandey wrote:
> Program IRQDelay for AXI DMA. The interrupt timeout mechanism causes
> the DMA engine to generate an interrupt after the delay time period
> has expired. It enables dmaengine to respond in real-time even though
> interrupt coalescing is configured. It also remove the placeholder
> for delay interrupt and merge it with frame completion interrupt.
> Since by default interrupt delay timeout is disabled this feature
> addition has no functional impact on VDMA and CDMA IP's.

In my opinion this should not come from the devicetree. This setting is
application specific and should be configured through a runtime API.

For the VDMA there is already xilinx_vdma_channel_set_config() which
allows to configure the maximum number of IRQs that can be coalesced and
the IRQ delay. Something similar is probably needed for the AXIDMA.

>
> Signed-off-by: Radhey Shyam Pandey <[email protected]>
> ---
> Changes for v2:
> - Read irq delay timeout value from DT.
> - Merge interrupt processing for frame done and delay interrupt.
> ---
> drivers/dma/xilinx/xilinx_dma.c | 20 +++++++++++---------
> 1 file changed, 11 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
> index a2ea2d649332..0c0dc9882a01 100644
> --- a/drivers/dma/xilinx/xilinx_dma.c
> +++ b/drivers/dma/xilinx/xilinx_dma.c
> @@ -173,8 +173,10 @@
> #define XILINX_DMA_MAX_TRANS_LEN_MAX 23
> #define XILINX_DMA_V2_MAX_TRANS_LEN_MAX 26
> #define XILINX_DMA_CR_COALESCE_MAX GENMASK(23, 16)
> +#define XILINX_DMA_CR_DELAY_MAX GENMASK(31, 24)
> #define XILINX_DMA_CR_CYCLIC_BD_EN_MASK BIT(4)
> #define XILINX_DMA_CR_COALESCE_SHIFT 16
> +#define XILINX_DMA_CR_DELAY_SHIFT 24
> #define XILINX_DMA_BD_SOP BIT(27)
> #define XILINX_DMA_BD_EOP BIT(26)
> #define XILINX_DMA_BD_COMP_MASK BIT(31)
> @@ -410,6 +412,7 @@ struct xilinx_dma_tx_descriptor {
> * @stop_transfer: Differentiate b/w DMA IP's quiesce
> * @tdest: TDEST value for mcdma
> * @has_vflip: S2MM vertical flip
> + * @irq_delay: Interrupt delay timeout
> */
> struct xilinx_dma_chan {
> struct xilinx_dma_device *xdev;
> @@ -447,6 +450,7 @@ struct xilinx_dma_chan {
> int (*stop_transfer)(struct xilinx_dma_chan *chan);
> u16 tdest;
> bool has_vflip;
> + u8 irq_delay;
> };
>
> /**
> @@ -1555,6 +1559,9 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
> if (chan->has_sg)
> xilinx_write(chan, XILINX_DMA_REG_CURDESC,
> head_desc->async_tx.phys);
> + reg &= ~XILINX_DMA_CR_DELAY_MAX;
> + reg |= chan->irq_delay << XILINX_DMA_CR_DELAY_SHIFT;
> + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
>
> xilinx_dma_start(chan);
>
> @@ -1877,15 +1884,8 @@ static irqreturn_t xilinx_dma_irq_handler(int irq, void *data)
> }
> }
>
> - if (status & XILINX_DMA_DMASR_DLY_CNT_IRQ) {
> - /*
> - * Device takes too long to do the transfer when user requires
> - * responsiveness.
> - */
> - dev_dbg(chan->dev, "Inter-packet latency too long\n");
> - }
> -
> - if (status & XILINX_DMA_DMASR_FRM_CNT_IRQ) {
> + if (status & (XILINX_DMA_DMASR_FRM_CNT_IRQ |
> + XILINX_DMA_DMASR_DLY_CNT_IRQ)) {
> spin_lock(&chan->lock);
> xilinx_dma_complete_descriptor(chan);
> chan->idle = true;
> @@ -2802,6 +2802,8 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
> /* Retrieve the channel properties from the device tree */
> has_dre = of_property_read_bool(node, "xlnx,include-dre");
>
> + of_property_read_u8(node, "xlnx,irq-delay", &chan->irq_delay);
> +
> chan->genlock = of_property_read_bool(node, "xlnx,genlock-mode");
>
> err = of_property_read_u32(node, "xlnx,datawidth", &value);


2021-06-11 16:15:23

by Radhey Shyam Pandey

[permalink] [raw]
Subject: RE: [RFC v2 PATCH 0/7] Xilinx DMA enhancements and optimization

> -----Original Message-----
> From: Lars-Peter Clausen <[email protected]>
> Sent: Thursday, April 15, 2021 12:36 PM
> To: Radhey Shyam Pandey <[email protected]>; [email protected];
> [email protected]; Michal Simek <[email protected]>
> Cc: [email protected]; [email protected]; linux-arm-
> [email protected]; [email protected]; git
> <[email protected]>
> Subject: Re: [RFC v2 PATCH 0/7] Xilinx DMA enhancements and optimization
>
> On 4/9/21 7:55 PM, Radhey Shyam Pandey wrote:
> > Some background about the patch series: Xilinx Axi Ethernet device
> > driver
> > (xilinx_axienet_main.c) currently has axi-dma code inside it. The goal
> > is to refactor axiethernet driver and use existing AXI DMA driver
> > using DMAEngine API.
>
> This is pretty neat! Do you have the patches that modify the AXI Ethernet
> driver in a public tree somewhere, so this series can be seen in context?
Yes, I sent the axiethernet RFC series to the netdev mailing list. Here is
the link: https://www.spinics.net/lists/netdev/msg734173.html

Thanks,
Radhey

2021-06-11 16:19:40

by Radhey Shyam Pandey

[permalink] [raw]
Subject: RE: [RFC v2 PATCH 5/7] dmaengine: xilinx_dma: Freeup active list based on descriptor completion bit

> -----Original Message-----
> From: Lars-Peter Clausen <[email protected]>
> Sent: Thursday, April 15, 2021 12:39 PM
> To: Radhey Shyam Pandey <[email protected]>; [email protected];
> [email protected]; Michal Simek <[email protected]>
> Cc: [email protected]; [email protected]; linux-arm-
> [email protected]; [email protected]; git
> <[email protected]>
> Subject: Re: [RFC v2 PATCH 5/7] dmaengine: xilinx_dma: Freeup active list
> based on descriptor completion bit
>
> On 4/9/21 7:56 PM, Radhey Shyam Pandey wrote:
> > AXIDMA IP in SG mode sets completion bit to 1 when the transfer is
> > completed. Read this bit to move descriptor from active list to the
> > done list. This feature is needed when interrupt delay timeout and
> > IRQThreshold is enabled i.e Dly_IrqEn is triggered w/o completing
> > interrupt threshold.
> >
> > Signed-off-by: Radhey Shyam Pandey <[email protected]>
> > ---
> > - Check BD completion bit only for SG mode.
> > - Modify the logic to have early return path.
> > ---
> > drivers/dma/xilinx/xilinx_dma.c | 7 +++++++
> > 1 file changed, 7 insertions(+)
> >
> > diff --git a/drivers/dma/xilinx/xilinx_dma.c
> > b/drivers/dma/xilinx/xilinx_dma.c index 890bf46b36e5..f2305a73cb91
> > 100644
> > --- a/drivers/dma/xilinx/xilinx_dma.c
> > +++ b/drivers/dma/xilinx/xilinx_dma.c
> > @@ -177,6 +177,7 @@
> > #define XILINX_DMA_CR_COALESCE_SHIFT 16
> > #define XILINX_DMA_BD_SOP BIT(27)
> > #define XILINX_DMA_BD_EOP BIT(26)
> > +#define XILINX_DMA_BD_COMP_MASK BIT(31)
> > #define XILINX_DMA_COALESCE_MAX 255
> > #define XILINX_DMA_NUM_DESCS 512
> > #define XILINX_DMA_NUM_APP_WORDS 5
> > @@ -1683,12 +1684,18 @@ static void xilinx_dma_issue_pending(struct
> dma_chan *dchan)
> > static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan
> *chan)
> > {
> > struct xilinx_dma_tx_descriptor *desc, *next;
> > + struct xilinx_axidma_tx_segment *seg;
> >
> > /* This function was invoked with lock held */
> > if (list_empty(&chan->active_list))
> > return;
> >
> > list_for_each_entry_safe(desc, next, &chan->active_list, node) {
> > + /* TODO: remove hardcoding for axidma_tx_segment */
> > + seg = list_last_entry(&desc->segments,
> > + struct xilinx_axidma_tx_segment, node);
> This needs to be fixed before this can be merged as it right now will break
> the non AXIDMA variants.

I agree, mentioned it in TODO to remove axidma specific hardcoding.
Will fix it next version.

Thanks,
Radhey
> > + if (!(seg->hw.status & XILINX_DMA_BD_COMP_MASK) &&
> chan->has_sg)
> > + break;
> > if (chan->has_sg && chan->xdev->dma_config->dmatype !=
> > XDMA_TYPE_VDMA)
> > desc->residue = xilinx_dma_get_residue(chan, desc);
>

2021-06-11 18:32:43

by Radhey Shyam Pandey

[permalink] [raw]
Subject: RE: [RFC v2 PATCH 6/7] dmaengine: xilinx_dma: Use tasklet_hi_schedule for timing critical usecase

> -----Original Message-----
> From: Lars-Peter Clausen <[email protected]>
> Sent: Thursday, April 15, 2021 12:41 PM
> To: Radhey Shyam Pandey <[email protected]>; [email protected];
> [email protected]; Michal Simek <[email protected]>
> Cc: [email protected]; [email protected]; linux-arm-
> [email protected]; [email protected]; git
> <[email protected]>
> Subject: Re: [RFC v2 PATCH 6/7] dmaengine: xilinx_dma: Use
> tasklet_hi_schedule for timing critical usecase
>
> On 4/9/21 7:56 PM, Radhey Shyam Pandey wrote:
> > Schedule tasklet with high priority to ensure that callback processing
> > is prioritized. It improves throughput for netdev dma clients.
> Do you have specific numbers on the throughput improvement?
IIRC there was ~5% performance improvement but I did that a long back
on an older kernel 4.8 and after that onward I always checked overall
performance (having all optimization applied). In next version i will
redo incremental profiling and capture improvement % in the commit
description.

Thanks,
Radhey

2021-06-11 19:00:42

by Radhey Shyam Pandey

[permalink] [raw]
Subject: RE: [RFC v2 PATCH 5/7] dmaengine: xilinx_dma: Freeup active list based on descriptor completion bit

> -----Original Message-----
> From: Lars-Peter Clausen <[email protected]>
> Sent: Thursday, April 15, 2021 12:56 PM
> To: Radhey Shyam Pandey <[email protected]>; [email protected];
> [email protected]; Michal Simek <[email protected]>
> Cc: [email protected]; [email protected]; linux-arm-
> [email protected]; [email protected]; git
> <[email protected]>
> Subject: Re: [RFC v2 PATCH 5/7] dmaengine: xilinx_dma: Freeup active list
> based on descriptor completion bit
>
> On 4/9/21 7:56 PM, Radhey Shyam Pandey wrote:
> > AXIDMA IP in SG mode sets completion bit to 1 when the transfer is
> > completed. Read this bit to move descriptor from active list to the
> > done list. This feature is needed when interrupt delay timeout and
> > IRQThreshold is enabled i.e Dly_IrqEn is triggered w/o completing
> > interrupt threshold.
> >
> > Signed-off-by: Radhey Shyam Pandey <[email protected]>
> > ---
> > - Check BD completion bit only for SG mode.
> > - Modify the logic to have early return path.
> > ---
> > drivers/dma/xilinx/xilinx_dma.c | 7 +++++++
> > 1 file changed, 7 insertions(+)
> >
> > diff --git a/drivers/dma/xilinx/xilinx_dma.c
> > b/drivers/dma/xilinx/xilinx_dma.c index 890bf46b36e5..f2305a73cb91
> > 100644
> > --- a/drivers/dma/xilinx/xilinx_dma.c
> > +++ b/drivers/dma/xilinx/xilinx_dma.c
> > @@ -177,6 +177,7 @@
> > #define XILINX_DMA_CR_COALESCE_SHIFT 16
> > #define XILINX_DMA_BD_SOP BIT(27)
> > #define XILINX_DMA_BD_EOP BIT(26)
> > +#define XILINX_DMA_BD_COMP_MASK BIT(31)
> > #define XILINX_DMA_COALESCE_MAX 255
> > #define XILINX_DMA_NUM_DESCS 512
> > #define XILINX_DMA_NUM_APP_WORDS 5
> > @@ -1683,12 +1684,18 @@ static void xilinx_dma_issue_pending(struct
> dma_chan *dchan)
> > static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan
> *chan)
> > {
> > struct xilinx_dma_tx_descriptor *desc, *next;
> > + struct xilinx_axidma_tx_segment *seg;
> >
> > /* This function was invoked with lock held */
> > if (list_empty(&chan->active_list))
> > return;
> >
> > list_for_each_entry_safe(desc, next, &chan->active_list, node) {
> > + /* TODO: remove hardcoding for axidma_tx_segment */
> > + seg = list_last_entry(&desc->segments,
> > + struct xilinx_axidma_tx_segment, node);
> > + if (!(seg->hw.status & XILINX_DMA_BD_COMP_MASK) &&
> chan->has_sg)
> > + break;
> > if (chan->has_sg && chan->xdev->dma_config->dmatype !=
> > XDMA_TYPE_VDMA)
> > desc->residue = xilinx_dma_get_residue(chan, desc);
>
> Since not all descriptors will be completed in this function the `chan->idle =
> true;` in xilinx_dma_irq_handler() needs to be gated on the active_list being
> empty.

Thanks for pointing it out. Agree to it, will fix it in the next version.
>
> xilinx_dma_complete_descriptor

2021-06-11 19:37:31

by Radhey Shyam Pandey

[permalink] [raw]
Subject: RE: [RFC v2 PATCH 7/7] dmaengine: xilinx_dma: Program interrupt delay timeout

-----Original Message-----
> From: Lars-Peter Clausen <[email protected]>
> Sent: Thursday, April 15, 2021 1:03 PM
> To: Radhey Shyam Pandey <[email protected]>; [email protected];
> [email protected]; Michal Simek <[email protected]>
> Cc: [email protected]; [email protected]; linux-arm-
> [email protected]; [email protected]; git
> <[email protected]>
> Subject: Re: [RFC v2 PATCH 7/7] dmaengine: xilinx_dma: Program interrupt
> delay timeout
>
> On 4/9/21 7:56 PM, Radhey Shyam Pandey wrote:
> > Program IRQDelay for AXI DMA. The interrupt timeout mechanism causes
> > the DMA engine to generate an interrupt after the delay time period
> > has expired. It enables dmaengine to respond in real-time even though
> > interrupt coalescing is configured. It also remove the placeholder
> > for delay interrupt and merge it with frame completion interrupt.
> > Since by default interrupt delay timeout is disabled this feature
> > addition has no functional impact on VDMA and CDMA IP's.
>
> In my opinion this should not come from the devicetree. This setting is
> application specific and should be configured through a runtime API.

The inclination for reading irq delay from DT was to minimize creating custom
interface for clients. For example - If we use xilinx_vdma_channel_set_config
API in ethernet driver it won't be then generic enough to be hooked to any
other dmaengine client. Any thoughts on it? With DT only limitation is it's
not runtime programmable.

Thanks,
Radhey
>
> For the VDMA there is already xilinx_vdma_channel_set_config() which
> allows to configure the maximum number of IRQs that can be coalesced and
> the IRQ delay. Something similar is probably needed for the AXIDMA.
>
> >
> > Signed-off-by: Radhey Shyam Pandey <[email protected]>
> > ---
> > Changes for v2:
> > - Read irq delay timeout value from DT.
> > - Merge interrupt processing for frame done and delay interrupt.
> > ---
> > drivers/dma/xilinx/xilinx_dma.c | 20 +++++++++++---------
> > 1 file changed, 11 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/dma/xilinx/xilinx_dma.c
> b/drivers/dma/xilinx/xilinx_dma.c
> > index a2ea2d649332..0c0dc9882a01 100644
> > --- a/drivers/dma/xilinx/xilinx_dma.c
> > +++ b/drivers/dma/xilinx/xilinx_dma.c
> > @@ -173,8 +173,10 @@
> > #define XILINX_DMA_MAX_TRANS_LEN_MAX 23
> > #define XILINX_DMA_V2_MAX_TRANS_LEN_MAX 26
> > #define XILINX_DMA_CR_COALESCE_MAX GENMASK(23, 16)
> > +#define XILINX_DMA_CR_DELAY_MAX GENMASK(31, 24)
> > #define XILINX_DMA_CR_CYCLIC_BD_EN_MASK BIT(4)
> > #define XILINX_DMA_CR_COALESCE_SHIFT 16
> > +#define XILINX_DMA_CR_DELAY_SHIFT 24
> > #define XILINX_DMA_BD_SOP BIT(27)
> > #define XILINX_DMA_BD_EOP BIT(26)
> > #define XILINX_DMA_BD_COMP_MASK BIT(31)
> > @@ -410,6 +412,7 @@ struct xilinx_dma_tx_descriptor {
> > * @stop_transfer: Differentiate b/w DMA IP's quiesce
> > * @tdest: TDEST value for mcdma
> > * @has_vflip: S2MM vertical flip
> > + * @irq_delay: Interrupt delay timeout
> > */
> > struct xilinx_dma_chan {
> > struct xilinx_dma_device *xdev;
> > @@ -447,6 +450,7 @@ struct xilinx_dma_chan {
> > int (*stop_transfer)(struct xilinx_dma_chan *chan);
> > u16 tdest;
> > bool has_vflip;
> > + u8 irq_delay;
> > };
> >
> > /**
> > @@ -1555,6 +1559,9 @@ static void xilinx_dma_start_transfer(struct
> xilinx_dma_chan *chan)
> > if (chan->has_sg)
> > xilinx_write(chan, XILINX_DMA_REG_CURDESC,
> > head_desc->async_tx.phys);
> > + reg &= ~XILINX_DMA_CR_DELAY_MAX;
> > + reg |= chan->irq_delay << XILINX_DMA_CR_DELAY_SHIFT;
> > + dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
> >
> > xilinx_dma_start(chan);
> >
> > @@ -1877,15 +1884,8 @@ static irqreturn_t xilinx_dma_irq_handler(int
> irq, void *data)
> > }
> > }
> >
> > - if (status & XILINX_DMA_DMASR_DLY_CNT_IRQ) {
> > - /*
> > - * Device takes too long to do the transfer when user
> requires
> > - * responsiveness.
> > - */
> > - dev_dbg(chan->dev, "Inter-packet latency too long\n");
> > - }
> > -
> > - if (status & XILINX_DMA_DMASR_FRM_CNT_IRQ) {
> > + if (status & (XILINX_DMA_DMASR_FRM_CNT_IRQ |
> > + XILINX_DMA_DMASR_DLY_CNT_IRQ)) {
> > spin_lock(&chan->lock);
> > xilinx_dma_complete_descriptor(chan);
> > chan->idle = true;
> > @@ -2802,6 +2802,8 @@ static int xilinx_dma_chan_probe(struct
> xilinx_dma_device *xdev,
> > /* Retrieve the channel properties from the device tree */
> > has_dre = of_property_read_bool(node, "xlnx,include-dre");
> >
> > + of_property_read_u8(node, "xlnx,irq-delay", &chan->irq_delay);
> > +
> > chan->genlock = of_property_read_bool(node, "xlnx,genlock-
> mode");
> >
> > err = of_property_read_u32(node, "xlnx,datawidth", &value);
>