2022-03-28 22:11:44

by Yann Gautier

[permalink] [raw]
Subject: [PATCH v2] mmc: mmci: stm32: use a buffer for unaligned DMA requests

In SDIO mode, the sg list for requests can be unaligned with what the
STM32 SDMMC internal DMA can support. In that case, instead of failing,
use a temporary bounce buffer to copy from/to the sg list.
This buffer is limited to 1MB. But for that we need to also limit
max_req_size to 1MB. It has not shown any throughput penalties for
SD-cards or eMMC.

Signed-off-by: Yann Gautier <[email protected]>
---
Changes since v1:
- allocate bounce buffer in sdmmc_idma_validate_data()
- realign on top of mmc/devel branch
(25e14a52d35928a1831ca98889a8a25ac3017990)

drivers/mmc/host/mmci_stm32_sdmmc.c | 88 +++++++++++++++++++++++------
1 file changed, 71 insertions(+), 17 deletions(-)

diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
index 4566d7fc9055..60bca78a72b1 100644
--- a/drivers/mmc/host/mmci_stm32_sdmmc.c
+++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
@@ -43,6 +43,9 @@ struct sdmmc_lli_desc {
struct sdmmc_idma {
dma_addr_t sg_dma;
void *sg_cpu;
+ dma_addr_t bounce_dma_addr;
+ void *bounce_buf;
+ bool use_bounce_buffer;
};

struct sdmmc_dlyb {
@@ -54,6 +57,8 @@ struct sdmmc_dlyb {
static int sdmmc_idma_validate_data(struct mmci_host *host,
struct mmc_data *data)
{
+ struct sdmmc_idma *idma = host->dma_priv;
+ struct device *dev = mmc_dev(host->mmc);
struct scatterlist *sg;
int i;

@@ -61,41 +66,69 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
* idma has constraints on idmabase & idmasize for each element
* excepted the last element which has no constraint on idmasize
*/
+ idma->use_bounce_buffer = false;
for_each_sg(data->sg, sg, data->sg_len - 1, i) {
if (!IS_ALIGNED(sg->offset, sizeof(u32)) ||
!IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) {
- dev_err(mmc_dev(host->mmc),
+ dev_dbg(mmc_dev(host->mmc),
"unaligned scatterlist: ofst:%x length:%d\n",
data->sg->offset, data->sg->length);
- return -EINVAL;
+ goto use_bounce_buffer;
}
}

if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
- dev_err(mmc_dev(host->mmc),
+ dev_dbg(mmc_dev(host->mmc),
"unaligned last scatterlist: ofst:%x length:%d\n",
data->sg->offset, data->sg->length);
- return -EINVAL;
+ goto use_bounce_buffer;
}

+ return 0;
+
+use_bounce_buffer:
+ if (!idma->bounce_buf) {
+ idma->bounce_buf = dmam_alloc_coherent(dev,
+ host->mmc->max_req_size,
+ &idma->bounce_dma_addr,
+ GFP_KERNEL);
+ if (!idma->bounce_buf) {
+ dev_err(dev, "Unable to map allocate DMA bounce buffer.\n");
+ return -ENOMEM;
+ }
+ }
+
+ idma->use_bounce_buffer = true;
+
return 0;
}

static int _sdmmc_idma_prep_data(struct mmci_host *host,
struct mmc_data *data)
{
- int n_elem;
+ struct sdmmc_idma *idma = host->dma_priv;

- n_elem = dma_map_sg(mmc_dev(host->mmc),
- data->sg,
- data->sg_len,
- mmc_get_dma_dir(data));
+ if (idma->use_bounce_buffer) {
+ if (data->flags & MMC_DATA_WRITE) {
+ unsigned int xfer_bytes = data->blksz * data->blocks;

- if (!n_elem) {
- dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n");
- return -EINVAL;
- }
+ sg_copy_to_buffer(data->sg, data->sg_len,
+ idma->bounce_buf, xfer_bytes);
+ dma_wmb();
+ }
+ } else {
+ int n_elem;
+
+ n_elem = dma_map_sg(mmc_dev(host->mmc),
+ data->sg,
+ data->sg_len,
+ mmc_get_dma_dir(data));

+ if (!n_elem) {
+ dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n");
+ return -EINVAL;
+ }
+ }
return 0;
}

@@ -112,8 +145,19 @@ static int sdmmc_idma_prep_data(struct mmci_host *host,
static void sdmmc_idma_unprep_data(struct mmci_host *host,
struct mmc_data *data, int err)
{
- dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
- mmc_get_dma_dir(data));
+ struct sdmmc_idma *idma = host->dma_priv;
+
+ if (idma->use_bounce_buffer) {
+ if (data->flags & MMC_DATA_READ) {
+ unsigned int xfer_bytes = data->blksz * data->blocks;
+
+ sg_copy_from_buffer(data->sg, data->sg_len,
+ idma->bounce_buf, xfer_bytes);
+ }
+ } else {
+ dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+ mmc_get_dma_dir(data));
+ }
}

static int sdmmc_idma_setup(struct mmci_host *host)
@@ -137,6 +181,8 @@ static int sdmmc_idma_setup(struct mmci_host *host)
host->mmc->max_segs = SDMMC_LLI_BUF_LEN /
sizeof(struct sdmmc_lli_desc);
host->mmc->max_seg_size = host->variant->stm32_idmabsize_mask;
+
+ host->mmc->max_req_size = SZ_1M;
} else {
host->mmc->max_segs = 1;
host->mmc->max_seg_size = host->mmc->max_req_size;
@@ -154,8 +200,16 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
struct scatterlist *sg;
int i;

- if (!host->variant->dma_lli || data->sg_len == 1) {
- writel_relaxed(sg_dma_address(data->sg),
+ if (!host->variant->dma_lli || data->sg_len == 1 ||
+ idma->use_bounce_buffer) {
+ u32 dma_addr;
+
+ if (idma->use_bounce_buffer)
+ dma_addr = idma->bounce_dma_addr;
+ else
+ dma_addr = sg_dma_address(data->sg);
+
+ writel_relaxed(dma_addr,
host->base + MMCI_STM32_IDMABASE0R);
writel_relaxed(MMCI_STM32_IDMAEN,
host->base + MMCI_STM32_IDMACTRLR);
--
2.25.1


2022-04-04 23:51:27

by Ulf Hansson

[permalink] [raw]
Subject: Re: [PATCH v2] mmc: mmci: stm32: use a buffer for unaligned DMA requests

On Mon, 28 Mar 2022 at 16:51, Yann Gautier <[email protected]> wrote:
>
> In SDIO mode, the sg list for requests can be unaligned with what the
> STM32 SDMMC internal DMA can support. In that case, instead of failing,
> use a temporary bounce buffer to copy from/to the sg list.
> This buffer is limited to 1MB. But for that we need to also limit
> max_req_size to 1MB. It has not shown any throughput penalties for
> SD-cards or eMMC.
>
> Signed-off-by: Yann Gautier <[email protected]>

Queued up for v5.19 on the devel branch, thanks!

Kind regards
Uffe


> ---
> Changes since v1:
> - allocate bounce buffer in sdmmc_idma_validate_data()
> - realign on top of mmc/devel branch
> (25e14a52d35928a1831ca98889a8a25ac3017990)
>
> drivers/mmc/host/mmci_stm32_sdmmc.c | 88 +++++++++++++++++++++++------
> 1 file changed, 71 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
> index 4566d7fc9055..60bca78a72b1 100644
> --- a/drivers/mmc/host/mmci_stm32_sdmmc.c
> +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
> @@ -43,6 +43,9 @@ struct sdmmc_lli_desc {
> struct sdmmc_idma {
> dma_addr_t sg_dma;
> void *sg_cpu;
> + dma_addr_t bounce_dma_addr;
> + void *bounce_buf;
> + bool use_bounce_buffer;
> };
>
> struct sdmmc_dlyb {
> @@ -54,6 +57,8 @@ struct sdmmc_dlyb {
> static int sdmmc_idma_validate_data(struct mmci_host *host,
> struct mmc_data *data)
> {
> + struct sdmmc_idma *idma = host->dma_priv;
> + struct device *dev = mmc_dev(host->mmc);
> struct scatterlist *sg;
> int i;
>
> @@ -61,41 +66,69 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
> * idma has constraints on idmabase & idmasize for each element
> * excepted the last element which has no constraint on idmasize
> */
> + idma->use_bounce_buffer = false;
> for_each_sg(data->sg, sg, data->sg_len - 1, i) {
> if (!IS_ALIGNED(sg->offset, sizeof(u32)) ||
> !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) {
> - dev_err(mmc_dev(host->mmc),
> + dev_dbg(mmc_dev(host->mmc),
> "unaligned scatterlist: ofst:%x length:%d\n",
> data->sg->offset, data->sg->length);
> - return -EINVAL;
> + goto use_bounce_buffer;
> }
> }
>
> if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
> - dev_err(mmc_dev(host->mmc),
> + dev_dbg(mmc_dev(host->mmc),
> "unaligned last scatterlist: ofst:%x length:%d\n",
> data->sg->offset, data->sg->length);
> - return -EINVAL;
> + goto use_bounce_buffer;
> }
>
> + return 0;
> +
> +use_bounce_buffer:
> + if (!idma->bounce_buf) {
> + idma->bounce_buf = dmam_alloc_coherent(dev,
> + host->mmc->max_req_size,
> + &idma->bounce_dma_addr,
> + GFP_KERNEL);
> + if (!idma->bounce_buf) {
> + dev_err(dev, "Unable to map allocate DMA bounce buffer.\n");
> + return -ENOMEM;
> + }
> + }
> +
> + idma->use_bounce_buffer = true;
> +
> return 0;
> }
>
> static int _sdmmc_idma_prep_data(struct mmci_host *host,
> struct mmc_data *data)
> {
> - int n_elem;
> + struct sdmmc_idma *idma = host->dma_priv;
>
> - n_elem = dma_map_sg(mmc_dev(host->mmc),
> - data->sg,
> - data->sg_len,
> - mmc_get_dma_dir(data));
> + if (idma->use_bounce_buffer) {
> + if (data->flags & MMC_DATA_WRITE) {
> + unsigned int xfer_bytes = data->blksz * data->blocks;
>
> - if (!n_elem) {
> - dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n");
> - return -EINVAL;
> - }
> + sg_copy_to_buffer(data->sg, data->sg_len,
> + idma->bounce_buf, xfer_bytes);
> + dma_wmb();
> + }
> + } else {
> + int n_elem;
> +
> + n_elem = dma_map_sg(mmc_dev(host->mmc),
> + data->sg,
> + data->sg_len,
> + mmc_get_dma_dir(data));
>
> + if (!n_elem) {
> + dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n");
> + return -EINVAL;
> + }
> + }
> return 0;
> }
>
> @@ -112,8 +145,19 @@ static int sdmmc_idma_prep_data(struct mmci_host *host,
> static void sdmmc_idma_unprep_data(struct mmci_host *host,
> struct mmc_data *data, int err)
> {
> - dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
> - mmc_get_dma_dir(data));
> + struct sdmmc_idma *idma = host->dma_priv;
> +
> + if (idma->use_bounce_buffer) {
> + if (data->flags & MMC_DATA_READ) {
> + unsigned int xfer_bytes = data->blksz * data->blocks;
> +
> + sg_copy_from_buffer(data->sg, data->sg_len,
> + idma->bounce_buf, xfer_bytes);
> + }
> + } else {
> + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
> + mmc_get_dma_dir(data));
> + }
> }
>
> static int sdmmc_idma_setup(struct mmci_host *host)
> @@ -137,6 +181,8 @@ static int sdmmc_idma_setup(struct mmci_host *host)
> host->mmc->max_segs = SDMMC_LLI_BUF_LEN /
> sizeof(struct sdmmc_lli_desc);
> host->mmc->max_seg_size = host->variant->stm32_idmabsize_mask;
> +
> + host->mmc->max_req_size = SZ_1M;
> } else {
> host->mmc->max_segs = 1;
> host->mmc->max_seg_size = host->mmc->max_req_size;
> @@ -154,8 +200,16 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
> struct scatterlist *sg;
> int i;
>
> - if (!host->variant->dma_lli || data->sg_len == 1) {
> - writel_relaxed(sg_dma_address(data->sg),
> + if (!host->variant->dma_lli || data->sg_len == 1 ||
> + idma->use_bounce_buffer) {
> + u32 dma_addr;
> +
> + if (idma->use_bounce_buffer)
> + dma_addr = idma->bounce_dma_addr;
> + else
> + dma_addr = sg_dma_address(data->sg);
> +
> + writel_relaxed(dma_addr,
> host->base + MMCI_STM32_IDMABASE0R);
> writel_relaxed(MMCI_STM32_IDMAEN,
> host->base + MMCI_STM32_IDMACTRLR);
> --
> 2.25.1
>