For some years and since many kernel versions there are reports that
RX UART DMA channel stops working at one point. So far the usual
workaround was to disable RX DMA. This patches fix the underlying
problem.
When a running sdma script does not find any usable destination buffer
to put its data into it just leads to stopping the channel being
scheduled again. As solution we manually retrigger the sdma script for
this channel and by this dissolve the freeze.
While this seems to work fine so far, it may come to buffer overruns
when the channel - even temporary - is stopped. This case has to be
addressed by device drivers by increasing the number of DMA periods.
This patch series was tested with the current kernel and backported to
kernel 4.15 with a special use case using a WL1837MOD via UART and
provoking the hanging of UART RX DMA within seconds after starting a
test application. It resulted in well known
"Bluetooth: hci0: command 0x0408 tx timeout"
errors and complete stop of UART data reception. Our Bluetooth traffic
consists of many independent small packets, mostly only a few bytes,
causing high usage of periods.
Changelog v4:
- fixed the fixes tags
Changelog v3:
- fixes typo in dma_wmb
- add fixes tags
Changelog v2:
- adapt title (this patches are not only for i.MX6)
- improve some comments and patch descriptions
- add a dma_wb() around BD_DONE flag
- add Reviewed-by tags
- split off "serial: imx: adapt rx buffer and dma periods"
Philipp Puschmann (3):
dmaengine: imx-sdma: fix buffer ownership
dmaengine: imx-sdma: fix dma freezes
dmaengine: imx-sdma: drop redundant variable
drivers/dma/imx-sdma.c | 32 ++++++++++++++++++++++----------
1 file changed, 22 insertions(+), 10 deletions(-)
--
2.23.0
In sdma_prep_dma_cyclic buf is redundant. Drop it.
Signed-off-by: Philipp Puschmann <[email protected]>
Reviewed-by: Lucas Stach <[email protected]>
---
Changelog v3,v4:
- no changes
Changelog v2:
- add Reviewed-by tag
drivers/dma/imx-sdma.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index a32b5962630e..17961451941a 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1544,7 +1544,7 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
struct sdma_engine *sdma = sdmac->sdma;
int num_periods = buf_len / period_len;
int channel = sdmac->channel;
- int i = 0, buf = 0;
+ int i;
struct sdma_desc *desc;
dev_dbg(sdma->dev, "%s channel: %d\n", __func__, channel);
@@ -1565,7 +1565,7 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
goto err_bd_out;
}
- while (buf < buf_len) {
+ for (i = 0; i < num_periods; i++) {
struct sdma_buffer_descriptor *bd = &desc->bd[i];
int param;
@@ -1592,9 +1592,6 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
bd->mode.status = param;
dma_addr += period_len;
- buf += period_len;
-
- i++;
}
return vchan_tx_prep(&sdmac->vc, &desc->vd, flags);
--
2.23.0
For some years and since many kernel versions there are reports that the
RX UART SDMA channel stops working at some point. The workaround was to
disable DMA for RX. This commit tries to fix the problem itself.
Due to its license i wasn't able to debug the sdma script itself but it
somehow leads to blocking the scheduling of the channel script when a
running sdma script does not find any free descriptor in the ring to put
its data into.
If we detect such a potential case we manually restart the channel.
As sdmac->desc is constant we can move desc out of the loop.
Fixes: 1ec1e82f2510 ("dmaengine: Add Freescale i.MX SDMA support")
Signed-off-by: Philipp Puschmann <[email protected]>
Reviewed-by: Lucas Stach <[email protected]>
---
Changelog v4:
- fixed the fixes tag
Changelog v3:
- use correct dma_wmb() instead of dma_wb()
- add fixes tag
Changelog v2:
- clarify comment and commit description
drivers/dma/imx-sdma.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index e029a2443cfc..a32b5962630e 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -775,21 +775,23 @@ static void sdma_start_desc(struct sdma_channel *sdmac)
static void sdma_update_channel_loop(struct sdma_channel *sdmac)
{
struct sdma_buffer_descriptor *bd;
- int error = 0;
- enum dma_status old_status = sdmac->status;
+ struct sdma_desc *desc = sdmac->desc;
+ int error = 0, cnt = 0;
+ enum dma_status old_status = sdmac->status;
/*
* loop mode. Iterate over descriptors, re-setup them and
* call callback function.
*/
- while (sdmac->desc) {
- struct sdma_desc *desc = sdmac->desc;
+ while (desc) {
bd = &desc->bd[desc->buf_tail];
if (bd->mode.status & BD_DONE)
break;
+ cnt++;
+
if (bd->mode.status & BD_RROR) {
bd->mode.status &= ~BD_RROR;
sdmac->status = DMA_ERROR;
@@ -822,6 +824,17 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac)
if (error)
sdmac->status = old_status;
}
+
+ /* In some situations it may happen that the sdma does not found any
+ * usable descriptor in the ring to put data into. The channel is
+ * stopped then. While there is no specific error condition we can
+ * check for, a necessary condition is that all available buffers for
+ * the current channel have been written to by the sdma script. In
+ * this case and after we have made the buffers available again,
+ * we restart the channel.
+ */
+ if (cnt >= desc->num_bd)
+ sdma_enable_channel(sdmac->sdma, sdmac->channel);
}
static void mxc_sdma_handle_channel_normal(struct sdma_channel *data)
--
2.23.0
From: Philipp Puschmann <[email protected]> Sent: Thursday, September 19, 2019 10:30 PM
> For some years and since many kernel versions there are reports that RX
> UART DMA channel stops working at one point. So far the usual workaround
> was to disable RX DMA. This patches fix the underlying problem.
>
> When a running sdma script does not find any usable destination buffer to put
> its data into it just leads to stopping the channel being scheduled again. As
> solution we manually retrigger the sdma script for this channel and by this
> dissolve the freeze.
>
> While this seems to work fine so far, it may come to buffer overruns when the
> channel - even temporary - is stopped. This case has to be addressed by
> device drivers by increasing the number of DMA periods.
>
> This patch series was tested with the current kernel and backported to kernel
> 4.15 with a special use case using a WL1837MOD via UART and provoking the
> hanging of UART RX DMA within seconds after starting a test application. It
> resulted in well known
> "Bluetooth: hci0: command 0x0408 tx timeout"
> errors and complete stop of UART data reception. Our Bluetooth traffic
> consists of many independent small packets, mostly only a few bytes, causing
> high usage of periods.
>
> Changelog v4:
> - fixed the fixes tags
>
> Changelog v3:
> - fixes typo in dma_wmb
> - add fixes tags
>
> Changelog v2:
> - adapt title (this patches are not only for i.MX6)
> - improve some comments and patch descriptions
> - add a dma_wb() around BD_DONE flag
> - add Reviewed-by tags
> - split off "serial: imx: adapt rx buffer and dma periods"
>
> Philipp Puschmann (3):
> dmaengine: imx-sdma: fix buffer ownership
> dmaengine: imx-sdma: fix dma freezes
> dmaengine: imx-sdma: drop redundant variable
>
> drivers/dma/imx-sdma.c | 32 ++++++++++++++++++++++----------
> 1 file changed, 22 insertions(+), 10 deletions(-)
>
> --
> 2.23.0
The patch set look fine that is really to fix some corner issue from the logical view.
Reviewed-by: Fugang Duan <[email protected]>
On 2019-9-19 22:30 Philipp Puschmann <[email protected]> wrote
> For some years and since many kernel versions there are reports that the RX
> UART SDMA channel stops working at some point. The workaround was to
> disable DMA for RX. This commit tries to fix the problem itself.
>
> Due to its license i wasn't able to debug the sdma script itself but it somehow
> leads to blocking the scheduling of the channel script when a running sdma
> script does not find any free descriptor in the ring to put its data into.
>
> If we detect such a potential case we manually restart the channel.
>
> As sdmac->desc is constant we can move desc out of the loop.
>
> Fixes: 1ec1e82f2510 ("dmaengine: Add Freescale i.MX SDMA support")
In fact, it's a refine patch rather than bug fix, just restore cyclic transfer
back in the corner case. There are two causes for such 'corner case':
1. improper number of BD or length of BD setting for cyclic, so that BD could
be consumed very quickly, worst case is uart Aging timer which one byte
may consume one BD. So for such case, enlarge more BDs is the right way as
your UART patch.
2. High cpu loading so that SDMA interrupt handler can't run in time to set
BD_DONE flag back again, at last all BDs consumed. In such case, this patch
may blind other coding issues such as long time window of disable irq(spin_lock_irq)
. So I think this patch is much like a refine/restore patch, and it's better to add
a clear print information to hint user channel is restoring and unexpected high cpu
loading is coming...
> Signed-off-by: Philipp Puschmann <[email protected]>
> Reviewed-by: Lucas Stach <[email protected]>
> ---
>
> Changelog v4:
> - fixed the fixes tag
>
> Changelog v3:
> - use correct dma_wmb() instead of dma_wb()
> - add fixes tag
>
> Changelog v2:
> - clarify comment and commit description
>
> drivers/dma/imx-sdma.c | 21 +++++++++++++++++----
> 1 file changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index
> e029a2443cfc..a32b5962630e 100644
> --- a/drivers/dma/imx-sdma.c
> +++ b/drivers/dma/imx-sdma.c
> @@ -775,21 +775,23 @@ static void sdma_start_desc(struct sdma_channel
> *sdmac) static void sdma_update_channel_loop(struct sdma_channel
> *sdmac) {
> struct sdma_buffer_descriptor *bd;
> - int error = 0;
> - enum dma_status old_status = sdmac->status;
> + struct sdma_desc *desc = sdmac->desc;
> + int error = 0, cnt = 0;
> + enum dma_status old_status = sdmac->status;
>
> /*
> * loop mode. Iterate over descriptors, re-setup them and
> * call callback function.
> */
> - while (sdmac->desc) {
> - struct sdma_desc *desc = sdmac->desc;
> + while (desc) {
>
> bd = &desc->bd[desc->buf_tail];
>
> if (bd->mode.status & BD_DONE)
> break;
>
> + cnt++;
> +
> if (bd->mode.status & BD_RROR) {
> bd->mode.status &= ~BD_RROR;
> sdmac->status = DMA_ERROR;
> @@ -822,6 +824,17 @@ static void sdma_update_channel_loop(struct
> sdma_channel *sdmac)
> if (error)
> sdmac->status = old_status;
> }
> +
> + /* In some situations it may happen that the sdma does not found any
> + * usable descriptor in the ring to put data into. The channel is
> + * stopped then. While there is no specific error condition we can
> + * check for, a necessary condition is that all available buffers for
> + * the current channel have been written to by the sdma script. In
> + * this case and after we have made the buffers available again,
> + * we restart the channel.
> + */
> + if (cnt >= desc->num_bd)
> + sdma_enable_channel(sdmac->sdma, sdmac->channel);
> }
>
> static void mxc_sdma_handle_channel_normal(struct sdma_channel *data)
> --
> 2.23.0