The following add driver patches for porting TI's am335x and am437x
devices to the sdhci-omap driver.
This involves adding external DMA support to sdhci (first 3 patches from
Chunyan) plus some miscellaneous patches to take care of deviations of
the controllers from the sdhci model.
DT changes will be posted in a separate series.
Untested versions of Chunyan's patches were posted before[1].
Tested on: am335x-evm, am335x-boneblack, am335x-sk, am437x-gpevm,
am43xx-gpevm, am437x-idk, dra7xx-evm, dra72x-evm
v2:
1. sdhci is using two bottom halves. One threaded_rq for card detect and a
tasklet for finishing mmc requests. Patch 1 removes the tasklet and
moves its function to the threaded_irq. This enables me to
terminate_sync() in sdhci_request_done()
2. Factored out common code for between the normal adn external dma case
3. Using existing API sdhci_data_timeout_irq for disabling DTO during
erase commands.
4. Fixed subject line for dt-bindings patch.
[1] https://patchwork.kernel.org/project/linux-mmc/list/?series=54897
Chunyan Zhang (3):
mmc: sdhci: add support for using external DMA devices
dt-bindings: sdhci-omap: Add properties for using external dma
mmc: sdhci-omap: Add using external dma
Faiz Abbas (5):
mmc: sdhci: Get rid of finish_tasklet
mmc: sdhci: Add quirk for disabling DTO during erase command
mmc: sdhci-omap: Add DISABLE_DTO_FOR_ERASE Quirk
dt-bindings: sdhci-omap: Add am335x and am437x specific bindings
mmc: sdhci-omap: Add am335x and am437x specific compatibles
.../devicetree/bindings/mmc/sdhci-omap.txt | 9 +
drivers/mmc/host/Kconfig | 4 +
drivers/mmc/host/sdhci-omap.c | 27 +-
drivers/mmc/host/sdhci.c | 333 +++++++++++++++---
drivers/mmc/host/sdhci.h | 12 +-
5 files changed, 328 insertions(+), 57 deletions(-)
--
2.19.2
sdhci.c has two bottom halves implemented. A threaded_irq for handling
card insert/remove operations and a tasklet for finishing mmc requests.
With the addition of external dma support, dmaengine APIs need to
terminate in non-atomic context before unmapping the dma buffers.
To facilitate this, remove the finish_tasklet and move the call of
sdhci_request_done() to the threaded_irq() callback. Also move the
interrupt result variable to sdhci_host so it can be populated from
anywhere inside the sdhci_irq handler.
Signed-off-by: Faiz Abbas <[email protected]>
---
drivers/mmc/host/sdhci.c | 43 +++++++++++++++-------------------------
drivers/mmc/host/sdhci.h | 2 +-
2 files changed, 17 insertions(+), 28 deletions(-)
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index eba9bcc92ad3..20ed09b896d7 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1232,7 +1232,7 @@ static void __sdhci_finish_mrq(struct sdhci_host *host, struct mmc_request *mrq)
WARN_ON(i >= SDHCI_MAX_MRQS);
- tasklet_schedule(&host->finish_tasklet);
+ host->result = IRQ_WAKE_THREAD;
}
static void sdhci_finish_mrq(struct sdhci_host *host, struct mmc_request *mrq)
@@ -2705,14 +2705,6 @@ static bool sdhci_request_done(struct sdhci_host *host)
return false;
}
-static void sdhci_tasklet_finish(unsigned long param)
-{
- struct sdhci_host *host = (struct sdhci_host *)param;
-
- while (!sdhci_request_done(host))
- ;
-}
-
static void sdhci_timeout_timer(struct timer_list *t)
{
struct sdhci_host *host;
@@ -2995,11 +2987,12 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
static irqreturn_t sdhci_irq(int irq, void *dev_id)
{
- irqreturn_t result = IRQ_NONE;
struct sdhci_host *host = dev_id;
u32 intmask, mask, unexpected = 0;
int max_loops = 16;
+ host->result = IRQ_NONE;
+
spin_lock(&host->lock);
if (host->runtime_suspended && !sdhci_sdio_irq_enabled(host)) {
@@ -3009,7 +3002,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
intmask = sdhci_readl(host, SDHCI_INT_STATUS);
if (!intmask || intmask == 0xffffffff) {
- result = IRQ_NONE;
+ host->result = IRQ_NONE;
goto out;
}
@@ -3054,7 +3047,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
host->thread_isr |= intmask & (SDHCI_INT_CARD_INSERT |
SDHCI_INT_CARD_REMOVE);
- result = IRQ_WAKE_THREAD;
+ host->result = IRQ_WAKE_THREAD;
}
if (intmask & SDHCI_INT_CMD_MASK)
@@ -3074,7 +3067,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
(host->ier & SDHCI_INT_CARD_INT)) {
sdhci_enable_sdio_irq_nolock(host, false);
host->thread_isr |= SDHCI_INT_CARD_INT;
- result = IRQ_WAKE_THREAD;
+ host->result = IRQ_WAKE_THREAD;
}
intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE |
@@ -3087,8 +3080,8 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
sdhci_writel(host, intmask, SDHCI_INT_STATUS);
}
cont:
- if (result == IRQ_NONE)
- result = IRQ_HANDLED;
+ if (host->result == IRQ_NONE)
+ host->result = IRQ_HANDLED;
intmask = sdhci_readl(host, SDHCI_INT_STATUS);
} while (intmask && --max_loops);
@@ -3101,7 +3094,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
sdhci_dumpregs(host);
}
- return result;
+ return host->result;
}
static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
@@ -3131,6 +3124,12 @@ static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
spin_unlock_irqrestore(&host->lock, flags);
}
+ if (!isr) {
+ do {
+ isr = !sdhci_request_done(host);
+ } while (isr);
+ }
+
return isr ? IRQ_HANDLED : IRQ_NONE;
}
@@ -4212,12 +4211,6 @@ int __sdhci_add_host(struct sdhci_host *host)
struct mmc_host *mmc = host->mmc;
int ret;
- /*
- * Init tasklets.
- */
- tasklet_init(&host->finish_tasklet,
- sdhci_tasklet_finish, (unsigned long)host);
-
timer_setup(&host->timer, sdhci_timeout_timer, 0);
timer_setup(&host->data_timer, sdhci_timeout_data_timer, 0);
@@ -4230,7 +4223,7 @@ int __sdhci_add_host(struct sdhci_host *host)
if (ret) {
pr_err("%s: Failed to request IRQ %d: %d\n",
mmc_hostname(mmc), host->irq, ret);
- goto untasklet;
+ return ret;
}
ret = sdhci_led_register(host);
@@ -4263,8 +4256,6 @@ int __sdhci_add_host(struct sdhci_host *host)
sdhci_writel(host, 0, SDHCI_INT_ENABLE);
sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
free_irq(host->irq, host);
-untasklet:
- tasklet_kill(&host->finish_tasklet);
return ret;
}
@@ -4326,8 +4317,6 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
del_timer_sync(&host->timer);
del_timer_sync(&host->data_timer);
- tasklet_kill(&host->finish_tasklet);
-
if (!IS_ERR(mmc->supply.vqmmc))
regulator_disable(mmc->supply.vqmmc);
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 6cc9a3c2ac66..624d5aa01995 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -554,7 +554,7 @@ struct sdhci_host {
unsigned int desc_sz; /* ADMA descriptor size */
- struct tasklet_struct finish_tasklet; /* Tasklet structures */
+ irqreturn_t result; /* Result of IRQ handler */
struct timer_list timer; /* Timer for timeouts */
struct timer_list data_timer; /* Timer for data timeouts */
--
2.19.2
From: Chunyan Zhang <[email protected]>
Some standard SD host controllers can support both external dma
controllers as well as ADMA/SDMA in which the SD host controller
acts as DMA master. TI's omap controller is the case as an example.
Currently the generic SDHCI code supports ADMA/SDMA integrated in
the host controller but does not have any support for external DMA
controllers implemented using dmaengine, meaning that custom code is
needed for any systems that use an external DMA controller with SDHCI.
Fixes by Faiz Abbas <[email protected]>:
1. Map scatterlists before dmaengine_prep_slave_sg()
2. Use dma_async() functions inside of the send_command() path and call
terminate_sync() in non-atomic context in case of an error.
Signed-off-by: Chunyan Zhang <[email protected]>
Signed-off-by: Faiz Abbas <[email protected]>
---
drivers/mmc/host/Kconfig | 3 +
drivers/mmc/host/sdhci.c | 285 +++++++++++++++++++++++++++++++++++----
drivers/mmc/host/sdhci.h | 8 ++
3 files changed, 268 insertions(+), 28 deletions(-)
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index a44ec8bb5418..f3b442528ed8 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -999,3 +999,6 @@ config MMC_SDHCI_AM654
If you have a controller with this interface, say Y or M here.
If unsure, say N.
+
+config MMC_SDHCI_EXTERNAL_DMA
+ bool
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 20ed09b896d7..2f5f64efad16 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -14,6 +14,7 @@
*/
#include <linux/delay.h>
+#include <linux/dmaengine.h>
#include <linux/ktime.h>
#include <linux/highmem.h>
#include <linux/io.h>
@@ -34,6 +35,7 @@
#include <linux/mmc/card.h>
#include <linux/mmc/sdio.h>
#include <linux/mmc/slot-gpio.h>
+#include <linux/workqueue.h>
#include "sdhci.h"
@@ -987,18 +989,9 @@ static void sdhci_set_timeout(struct sdhci_host *host, struct mmc_command *cmd)
}
}
-static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
+static inline void sdhci_reset_data(struct sdhci_host *host,
+ struct mmc_data *data)
{
- struct mmc_data *data = cmd->data;
-
- host->data_timeout = 0;
-
- if (sdhci_data_line_cmd(cmd))
- sdhci_set_timeout(host, cmd);
-
- if (!data)
- return;
-
WARN_ON(host->data);
/* Sanity checks */
@@ -1009,6 +1002,34 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
host->data = data;
host->data_early = 0;
host->data->bytes_xfered = 0;
+}
+
+static inline void sdhci_set_block_info(struct sdhci_host *host)
+{
+
+ /* Set the DMA boundary value and block size */
+ sdhci_writew(host,
+ SDHCI_MAKE_BLKSZ(host->sdma_boundary, host->data->blksz),
+ SDHCI_BLOCK_SIZE);
+ /*
+ * For Version 4.10 onwards, if v4 mode is enabled, 32-bit Block Count
+ * can be supported, in that case 16-bit block count register must be 0.
+ */
+ if (host->version >= SDHCI_SPEC_410 && host->v4_mode &&
+ (host->quirks2 & SDHCI_QUIRK2_USE_32BIT_BLK_CNT)) {
+ if (sdhci_readw(host, SDHCI_BLOCK_COUNT))
+ sdhci_writew(host, 0, SDHCI_BLOCK_COUNT);
+ sdhci_writew(host, host->data->blocks, SDHCI_32BIT_BLK_CNT);
+ } else {
+ sdhci_writew(host, host->data->blocks, SDHCI_BLOCK_COUNT);
+ }
+}
+
+static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
+{
+ struct mmc_data *data = cmd->data;
+
+ sdhci_reset_data(host, data);
if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
struct scatterlist *sg;
@@ -1100,24 +1121,186 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
sdhci_set_transfer_irqs(host);
- /* Set the DMA boundary value and block size */
- sdhci_writew(host, SDHCI_MAKE_BLKSZ(host->sdma_boundary, data->blksz),
- SDHCI_BLOCK_SIZE);
+ sdhci_set_block_info(host);
+}
- /*
- * For Version 4.10 onwards, if v4 mode is enabled, 32-bit Block Count
- * can be supported, in that case 16-bit block count register must be 0.
- */
- if (host->version >= SDHCI_SPEC_410 && host->v4_mode &&
- (host->quirks2 & SDHCI_QUIRK2_USE_32BIT_BLK_CNT)) {
- if (sdhci_readw(host, SDHCI_BLOCK_COUNT))
- sdhci_writew(host, 0, SDHCI_BLOCK_COUNT);
- sdhci_writew(host, data->blocks, SDHCI_32BIT_BLK_CNT);
+#if IS_ENABLED(CONFIG_MMC_SDHCI_EXTERNAL_DMA)
+static int sdhci_external_dma_init(struct sdhci_host *host)
+{
+ int ret = 0;
+ struct mmc_host *mmc = host->mmc;
+
+ host->tx_chan = dma_request_chan(mmc->parent, "tx");
+ if (IS_ERR(host->tx_chan)) {
+ ret = PTR_ERR(host->tx_chan);
+ if (ret != -EPROBE_DEFER)
+ pr_warn("Failed to request TX DMA channel.\n");
+ host->tx_chan = NULL;
+ return ret;
+ }
+
+ host->rx_chan = dma_request_chan(mmc->parent, "rx");
+ if (IS_ERR(host->rx_chan)) {
+ if (host->tx_chan) {
+ dma_release_channel(host->tx_chan);
+ host->tx_chan = NULL;
+ }
+
+ ret = PTR_ERR(host->rx_chan);
+ if (ret != -EPROBE_DEFER)
+ pr_warn("Failed to request RX DMA channel.\n");
+ host->rx_chan = NULL;
+ }
+
+ return ret;
+}
+
+static inline struct dma_chan *
+sdhci_external_dma_channel(struct sdhci_host *host, struct mmc_data *data)
+{
+ return data->flags & MMC_DATA_WRITE ? host->tx_chan : host->rx_chan;
+}
+
+static int sdhci_external_dma_setup(struct sdhci_host *host,
+ struct mmc_command *cmd)
+{
+ int ret, i;
+ struct dma_async_tx_descriptor *desc;
+ struct mmc_data *data = cmd->data;
+ struct dma_chan *chan;
+ struct dma_slave_config cfg;
+ dma_cookie_t cookie;
+ int sg_cnt;
+
+ if (!host->mapbase)
+ return -EINVAL;
+
+ cfg.src_addr = host->mapbase + SDHCI_BUFFER;
+ cfg.dst_addr = host->mapbase + SDHCI_BUFFER;
+ cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ cfg.src_maxburst = data->blksz / 4;
+ cfg.dst_maxburst = data->blksz / 4;
+
+ /* Sanity check: all the SG entries must be aligned by block size. */
+ for (i = 0; i < data->sg_len; i++) {
+ if ((data->sg + i)->length % data->blksz)
+ return -EINVAL;
+ }
+
+ chan = sdhci_external_dma_channel(host, data);
+
+ ret = dmaengine_slave_config(chan, &cfg);
+ if (ret)
+ return ret;
+
+ sg_cnt = sdhci_pre_dma_transfer(host, data, COOKIE_MAPPED);
+ if (sg_cnt <= 0)
+ return -EINVAL;
+
+ desc = dmaengine_prep_slave_sg(chan, data->sg, data->sg_len,
+ mmc_get_dma_dir(data),
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ if (!desc)
+ return -EINVAL;
+
+ desc->callback = NULL;
+ desc->callback_param = NULL;
+
+ cookie = dmaengine_submit(desc);
+ if (cookie < 0)
+ ret = cookie;
+
+ return ret;
+}
+
+static void sdhci_external_dma_release(struct sdhci_host *host)
+{
+ if (host->tx_chan) {
+ dma_release_channel(host->tx_chan);
+ host->tx_chan = NULL;
+ }
+
+ if (host->rx_chan) {
+ dma_release_channel(host->rx_chan);
+ host->rx_chan = NULL;
+ }
+
+ sdhci_switch_external_dma(host, false);
+}
+
+static void __sdhci_external_dma_prepare_data(struct sdhci_host *host,
+ struct mmc_command *cmd)
+{
+ struct mmc_data *data = cmd->data;
+
+ sdhci_reset_data(host, data);
+
+ host->flags |= SDHCI_REQ_USE_DMA;
+ sdhci_set_transfer_irqs(host);
+
+ sdhci_set_block_info(host);
+}
+
+static void sdhci_external_dma_prepare_data(struct sdhci_host *host,
+ struct mmc_command *cmd)
+{
+ if (!sdhci_external_dma_setup(host, cmd)) {
+ __sdhci_external_dma_prepare_data(host, cmd);
} else {
- sdhci_writew(host, data->blocks, SDHCI_BLOCK_COUNT);
+ sdhci_external_dma_release(host);
+ pr_err("%s: Cannot use external DMA, switch to the DMA/PIO which standard SDHCI provides.\n",
+ mmc_hostname(host->mmc));
+ sdhci_prepare_data(host, cmd);
}
}
+static void sdhci_external_dma_pre_transfer(struct sdhci_host *host,
+ struct mmc_command *cmd)
+{
+ struct dma_chan *chan;
+
+ if (!cmd->data)
+ return;
+
+ chan = sdhci_external_dma_channel(host, cmd->data);
+ if (chan)
+ dma_async_issue_pending(chan);
+}
+
+#else
+static int sdhci_external_dma_init(struct sdhci_host *host)
+{
+ return -EOPNOTSUPP;
+}
+
+static void sdhci_external_dma_release(struct sdhci_host *host)
+{}
+
+static void sdhci_external_dma_prepare_data(struct sdhci_host *host,
+ struct mmc_command *cmd)
+{
+ /* If MMC_SDHCI_EXTERNAL_DMA not supported, PIO will be used */
+ sdhci_prepare_data(host, cmd);
+}
+
+static void sdhci_external_dma_pre_transfer(struct sdhci_host *host,
+ struct mmc_command *cmd)
+{}
+
+static inline struct dma_chan *
+sdhci_external_dma_channel(struct sdhci_host *host, struct mmc_data *data)
+{
+ return NULL;
+}
+#endif
+
+void sdhci_switch_external_dma(struct sdhci_host *host, bool en)
+{
+ host->use_external_dma = en;
+}
+EXPORT_SYMBOL_GPL(sdhci_switch_external_dma);
+
static inline bool sdhci_auto_cmd12(struct sdhci_host *host,
struct mmc_request *mrq)
{
@@ -1369,12 +1552,19 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
}
host->cmd = cmd;
+ host->data_timeout = 0;
if (sdhci_data_line_cmd(cmd)) {
WARN_ON(host->data_cmd);
host->data_cmd = cmd;
+ sdhci_set_timeout(host, cmd);
}
- sdhci_prepare_data(host, cmd);
+ if (cmd->data) {
+ if (host->use_external_dma)
+ sdhci_external_dma_prepare_data(host, cmd);
+ else
+ sdhci_prepare_data(host, cmd);
+ }
sdhci_writel(host, cmd->arg, SDHCI_ARGUMENT);
@@ -1416,6 +1606,9 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
timeout += 10 * HZ;
sdhci_mod_timer(host, cmd->mrq, timeout);
+ if (host->use_external_dma)
+ sdhci_external_dma_pre_transfer(host, cmd);
+
sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND);
}
EXPORT_SYMBOL_GPL(sdhci_send_command);
@@ -2619,6 +2812,18 @@ static bool sdhci_request_done(struct sdhci_host *host)
if (host->flags & SDHCI_REQ_USE_DMA) {
struct mmc_data *data = mrq->data;
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ /* Terminate and synchronize dma in case of an error */
+ if (data && (mrq->cmd->error || data->error) &&
+ host->use_external_dma) {
+ struct dma_chan *chan = sdhci_external_dma_channel(host,
+ data);
+ dmaengine_terminate_sync(chan);
+ }
+
+ spin_lock_irqsave(&host->lock, flags);
+
if (data && data->host_cookie == COOKIE_MAPPED) {
if (host->bounce_buffer) {
/*
@@ -3691,12 +3896,28 @@ int sdhci_setup_host(struct sdhci_host *host)
mmc_hostname(mmc), host->version);
}
- if (host->quirks & SDHCI_QUIRK_FORCE_DMA)
+ if (host->use_external_dma) {
+ ret = sdhci_external_dma_init(host);
+ if (ret == -EPROBE_DEFER)
+ goto unreg;
+
+ /*
+ * Fall back to use the DMA/PIO integrated in standard SDHCI
+ * instead of external DMA devices.
+ */
+ if (ret)
+ sdhci_switch_external_dma(host, false);
+ }
+
+ if (host->quirks & SDHCI_QUIRK_FORCE_DMA) {
host->flags |= SDHCI_USE_SDMA;
- else if (!(host->caps & SDHCI_CAN_DO_SDMA))
+ } else if (!(host->caps & SDHCI_CAN_DO_SDMA)) {
DBG("Controller doesn't have SDMA capability\n");
- else
+ } else if (host->use_external_dma) {
+ /* Using dma-names to detect external dma capability */
+ } else {
host->flags |= SDHCI_USE_SDMA;
+ }
if ((host->quirks & SDHCI_QUIRK_BROKEN_DMA) &&
(host->flags & SDHCI_USE_SDMA)) {
@@ -4201,6 +4422,10 @@ void sdhci_cleanup_host(struct sdhci_host *host)
dma_free_coherent(mmc_dev(mmc), host->align_buffer_sz +
host->adma_table_sz, host->align_buffer,
host->align_addr);
+
+ if (host->use_external_dma)
+ sdhci_external_dma_release(host);
+
host->adma_table = NULL;
host->align_buffer = NULL;
}
@@ -4241,6 +4466,7 @@ int __sdhci_add_host(struct sdhci_host *host)
pr_info("%s: SDHCI controller on %s [%s] using %s\n",
mmc_hostname(mmc), host->hw_name, dev_name(mmc_dev(mmc)),
+ host->use_external_dma ? "External DMA" :
(host->flags & SDHCI_USE_ADMA) ?
(host->flags & SDHCI_USE_64_BIT_DMA) ? "ADMA 64-bit" : "ADMA" :
(host->flags & SDHCI_USE_SDMA) ? "DMA" : "PIO");
@@ -4325,6 +4551,9 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
host->adma_table_sz, host->align_buffer,
host->align_addr);
+ if (host->use_external_dma)
+ sdhci_external_dma_release(host);
+
host->adma_table = NULL;
host->align_buffer = NULL;
}
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 624d5aa01995..6ff3e29791ee 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -482,6 +482,7 @@ struct sdhci_host {
int irq; /* Device IRQ */
void __iomem *ioaddr; /* Mapped address */
+ phys_addr_t mapbase; /* physical address base */
char *bounce_buffer; /* For packing SDMA reads/writes */
dma_addr_t bounce_addr;
unsigned int bounce_buffer_size;
@@ -531,6 +532,7 @@ struct sdhci_host {
bool pending_reset; /* Cmd/data reset is pending */
bool irq_wake_enabled; /* IRQ wakeup is enabled */
bool v4_mode; /* Host Version 4 Enable */
+ bool use_external_dma; /* Host selects to use external DMA */
struct mmc_request *mrqs_done[SDHCI_MAX_MRQS]; /* Requests done */
struct mmc_command *cmd; /* Current command */
@@ -559,6 +561,11 @@ struct sdhci_host {
struct timer_list timer; /* Timer for timeouts */
struct timer_list data_timer; /* Timer for data timeouts */
+#if IS_ENABLED(CONFIG_MMC_SDHCI_EXTERNAL_DMA)
+ struct dma_chan *rx_chan;
+ struct dma_chan *tx_chan;
+#endif
+
u32 caps; /* CAPABILITY_0 */
u32 caps1; /* CAPABILITY_1 */
bool read_caps; /* Capability flags have been read */
@@ -792,5 +799,6 @@ void sdhci_start_tuning(struct sdhci_host *host);
void sdhci_end_tuning(struct sdhci_host *host);
void sdhci_reset_tuning(struct sdhci_host *host);
void sdhci_send_tuning(struct sdhci_host *host, u32 opcode);
+void sdhci_switch_external_dma(struct sdhci_host *host, bool en);
#endif /* __SDHCI_HW_H */
--
2.19.2
From: Chunyan Zhang <[email protected]>
sdhci-omap can support both external dma controller via dmaengine
framework as well as ADMA which standard SD host controller
provides.
Signed-off-by: Chunyan Zhang <[email protected]>
Signed-off-by: Faiz Abbas <[email protected]>
---
Documentation/devicetree/bindings/mmc/sdhci-omap.txt | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-omap.txt b/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
index 72c4dec7e1db..4485dbceb373 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
@@ -14,6 +14,11 @@ Required properties:
"ddr_1_8v-rev11", "ddr_1_8v" or "ddr_3_3v", "hs200_1_8v-rev11",
"hs200_1_8v",
- pinctrl-<n> : Pinctrl states as described in bindings/pinctrl/pinctrl-bindings.txt
+- dmas: List of DMA specifiers with the controller specific format as described
+ in the generic DMA client binding. A tx and rx specifier is required.
+- dma-names: List of DMA request names. These strings correspond 1:1 with the
+ DMA specifiers listed in dmas. The string naming is to be "tx"
+ and "rx" for TX and RX DMA requests, respectively.
Example:
mmc1: mmc@4809c000 {
@@ -22,4 +27,6 @@ Example:
ti,hwmods = "mmc1";
bus-width = <4>;
vmmc-supply = <&vmmc>; /* phandle to regulator node */
+ dmas = <&sdma 61 &sdma 62>;
+ dma-names = "tx", "rx";
};
--
2.19.2
From: Chunyan Zhang <[email protected]>
sdhci-omap can support both external dma controller via dmaengine framework
as well as ADMA which standard SD host controller provides.
Fixes by Faiz Abbas <[email protected]>:
1. Switch to DMA slave mode when using external DMA
2. Add offset to mapbase
Signed-off-by: Chunyan Zhang <[email protected]>
Signed-off-by: Faiz Abbas <[email protected]>
---
drivers/mmc/host/Kconfig | 1 +
drivers/mmc/host/sdhci-omap.c | 16 +++++++++++++++-
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index f3b442528ed8..5d1189478ee0 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -979,6 +979,7 @@ config MMC_SDHCI_OMAP
depends on MMC_SDHCI_PLTFM && OF
select THERMAL
imply TI_SOC_THERMAL
+ select MMC_SDHCI_EXTERNAL_DMA if DMA_ENGINE
help
This selects the Secure Digital Host Controller Interface (SDHCI)
support present in TI's DRA7 SOCs. The controller supports
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index c11c18a9aacb..8a05e94fe612 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -696,7 +696,11 @@ static int sdhci_omap_enable_dma(struct sdhci_host *host)
struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
reg = sdhci_omap_readl(omap_host, SDHCI_OMAP_CON);
- reg |= CON_DMA_MASTER;
+ reg &= ~CON_DMA_MASTER;
+ /* Switch to DMA slave mode when using external DMA */
+ if (!host->use_external_dma)
+ reg |= CON_DMA_MASTER;
+
sdhci_omap_writel(omap_host, SDHCI_OMAP_CON, reg);
return 0;
@@ -1010,6 +1014,7 @@ static int sdhci_omap_probe(struct platform_device *pdev)
const struct of_device_id *match;
struct sdhci_omap_data *data;
const struct soc_device_attribute *soc;
+ struct resource *regs;
match = of_match_device(omap_sdhci_match, dev);
if (!match)
@@ -1022,6 +1027,10 @@ static int sdhci_omap_probe(struct platform_device *pdev)
}
offset = data->offset;
+ regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!regs)
+ return -ENXIO;
+
host = sdhci_pltfm_init(pdev, &sdhci_omap_pdata,
sizeof(*omap_host));
if (IS_ERR(host)) {
@@ -1038,6 +1047,7 @@ static int sdhci_omap_probe(struct platform_device *pdev)
omap_host->timing = MMC_TIMING_LEGACY;
omap_host->flags = data->flags;
host->ioaddr += offset;
+ host->mapbase = regs->start + offset;
mmc = host->mmc;
sdhci_get_of_property(pdev);
@@ -1105,6 +1115,10 @@ static int sdhci_omap_probe(struct platform_device *pdev)
host->mmc_host_ops.execute_tuning = sdhci_omap_execute_tuning;
host->mmc_host_ops.enable_sdio_irq = sdhci_omap_enable_sdio_irq;
+ /* Switch to external DMA only if there is the "dmas" property */
+ if (of_find_property(dev->of_node, "dmas", NULL))
+ sdhci_switch_external_dma(host, true);
+
ret = sdhci_setup_host(host);
if (ret)
goto err_put_sync;
--
2.19.2
Add a Quirk to disable DTO during an erase command.
Signed-off-by: Faiz Abbas <[email protected]>
---
drivers/mmc/host/sdhci-omap.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 8a05e94fe612..ae9701d198d5 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -852,6 +852,7 @@ static const struct sdhci_pltfm_data sdhci_omap_pdata = {
.quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN |
SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
SDHCI_QUIRK2_RSP_136_HAS_CRC |
+ SDHCI_QUIRK2_DISABLE_DTO_FOR_ERASE |
SDHCI_QUIRK2_DISABLE_HW_TIMEOUT,
.ops = &sdhci_omap_ops,
};
--
2.19.2
Some controllers might prematurely issue a data timeout during an erase
command. Add a quirk to disable the interrupt when an erase command is
issued.
Signed-off-by: Faiz Abbas <[email protected]>
---
drivers/mmc/host/sdhci.c | 5 +++++
drivers/mmc/host/sdhci.h | 2 ++
2 files changed, 7 insertions(+)
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 2f5f64efad16..608f7306742c 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1522,6 +1522,11 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
/* Initially, a command has no error */
cmd->error = 0;
+ if (cmd->opcode == MMC_ERASE &&
+ (host->quirks2 & SDHCI_QUIRK2_DISABLE_DTO_FOR_ERASE)) {
+ sdhci_set_data_timeout_irq(host, false);
+ }
+
if ((host->quirks2 & SDHCI_QUIRK2_STOP_WITH_TC) &&
cmd->opcode == MMC_STOP_TRANSMISSION)
cmd->flags |= MMC_RSP_BUSY;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 6ff3e29791ee..60282721f827 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -479,6 +479,8 @@ struct sdhci_host {
* block count.
*/
#define SDHCI_QUIRK2_USE_32BIT_BLK_CNT (1<<18)
+/* Controller needs to disable DTO for erase command */
+#define SDHCI_QUIRK2_DISABLE_DTO_FOR_ERASE (1<<19)
int irq; /* Device IRQ */
void __iomem *ioaddr; /* Mapped address */
--
2.19.2
Add binding for the TI's sdhci-omap controller present in am335x and
am437x devices.
Signed-off-by: Faiz Abbas <[email protected]>
Reviewed-by: Rob Herring <[email protected]>
---
Documentation/devicetree/bindings/mmc/sdhci-omap.txt | 2 ++
1 file changed, 2 insertions(+)
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-omap.txt b/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
index 4485dbceb373..859291e83964 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
@@ -7,6 +7,8 @@ For UHS devices which require tuning, the device tree should have a "cpu_thermal
Required properties:
- compatible: Should be "ti,dra7-sdhci" for DRA7 and DRA72 controllers
Should be "ti,k2g-sdhci" for K2G
+ Should be "ti,am335-sdhci" for am335x controllers
+ Should be "ti,am437-sdhci" for am437x controllers
- ti,hwmods: Must be "mmc<n>", <n> is controller instance starting 1
(Not required for K2G).
- pinctrl-names: Should be subset of "default", "hs", "sdr12", "sdr25", "sdr50",
--
2.19.2
Add support for new compatible for TI's am335x and am437x devices.
Signed-off-by: Faiz Abbas <[email protected]>
---
drivers/mmc/host/sdhci-omap.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index ae9701d198d5..2e246b5b744c 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -861,6 +861,14 @@ static const struct sdhci_omap_data k2g_data = {
.offset = 0x200,
};
+static const struct sdhci_omap_data am335_data = {
+ .offset = 0x200,
+};
+
+static const struct sdhci_omap_data am437_data = {
+ .offset = 0x200,
+};
+
static const struct sdhci_omap_data dra7_data = {
.offset = 0x200,
.flags = SDHCI_OMAP_REQUIRE_IODELAY,
@@ -869,6 +877,8 @@ static const struct sdhci_omap_data dra7_data = {
static const struct of_device_id omap_sdhci_match[] = {
{ .compatible = "ti,dra7-sdhci", .data = &dra7_data },
{ .compatible = "ti,k2g-sdhci", .data = &k2g_data },
+ { .compatible = "ti,am335-sdhci", .data = &am335_data },
+ { .compatible = "ti,am437-sdhci", .data = &am437_data },
{},
};
MODULE_DEVICE_TABLE(of, omap_sdhci_match);
--
2.19.2
* Faiz Abbas <[email protected]> [190215 19:17]:
> The following add driver patches for porting TI's am335x and am437x
> devices to the sdhci-omap driver.
>
> This involves adding external DMA support to sdhci (first 3 patches from
> Chunyan) plus some miscellaneous patches to take care of deviations of
> the controllers from the sdhci model.
Good to see this happening :)
I think am437x should have also the ADMA as it's mostly
omap4 based? See the old omap4 ADMA series at [0] below.
Are you seeing any improvment in SDIO card read/write speeds
between external DMA and ADM BTW?
Regards,
Tony
[0] https://linux-omap.vger.kernel.narkive.com/H4EbLW96/patch-0-4-omap4-hsmmc-adding-adma-support
* Faiz Abbas <[email protected]> [190215 19:18]:
> From: Chunyan Zhang <[email protected]>
>
> sdhci-omap can support both external dma controller via dmaengine
> framework as well as ADMA which standard SD host controller
> provides.
Care to describe here also how to configure things for ADMA and
instead of DMA? Just leave out dmas property?
Regards,
Tony
Hi Tony,
On 16/02/19 1:37 AM, Tony Lindgren wrote:
> * Faiz Abbas <[email protected]> [190215 19:18]:
>> From: Chunyan Zhang <[email protected]>
>>
>> sdhci-omap can support both external dma controller via dmaengine
>> framework as well as ADMA which standard SD host controller
>> provides.
>
> Care to describe here also how to configure things for ADMA and
> instead of DMA? Just leave out dmas property?
>
That's correct. If dmas property is populated, then use external DMA.
Otherwise use ADMA/SDMA depending on what you read from the CAPS register.
Thanks,
Faiz
Hi Tony,
On 16/02/19 1:32 AM, Tony Lindgren wrote:
> * Faiz Abbas <[email protected]> [190215 19:17]:
>> The following add driver patches for porting TI's am335x and am437x
>> devices to the sdhci-omap driver.
>>
>> This involves adding external DMA support to sdhci (first 3 patches from
>> Chunyan) plus some miscellaneous patches to take care of deviations of
>> the controllers from the sdhci model.
>
> Good to see this happening :)
>
> I think am437x should have also the ADMA as it's mostly
> omap4 based? See the old omap4 ADMA series at [0] below.
>
> Are you seeing any improvment in SDIO card read/write speeds
> between external DMA and ADM BTW
am437 is using dmaengine even in the old omap_hsmmc driver. I haven't
tried to enable ADMA in am437x.
Thanks,
Faiz
* Tony Lindgren <[email protected]> [190218 16:20]:
> * Faiz Abbas <[email protected]> [190218 13:38]:
> > Hi Tony,
> >
> > On 16/02/19 1:37 AM, Tony Lindgren wrote:
> > > * Faiz Abbas <[email protected]> [190215 19:18]:
> > >> From: Chunyan Zhang <[email protected]>
> > >>
> > >> sdhci-omap can support both external dma controller via dmaengine
> > >> framework as well as ADMA which standard SD host controller
> > >> provides.
> > >
> > > Care to describe here also how to configure things for ADMA and
> > > instead of DMA? Just leave out dmas property?
> > >
> > That's correct. If dmas property is populated, then use external DMA.
> > Otherwise use ADMA/SDMA depending on what you read from the CAPS register.
>
> OK thanks.
So I guess the dma properties should be under optional
properties in the binding doc then.
Regards,
Tony
* Faiz Abbas <[email protected]> [190218 13:38]:
> Hi Tony,
>
> On 16/02/19 1:37 AM, Tony Lindgren wrote:
> > * Faiz Abbas <[email protected]> [190215 19:18]:
> >> From: Chunyan Zhang <[email protected]>
> >>
> >> sdhci-omap can support both external dma controller via dmaengine
> >> framework as well as ADMA which standard SD host controller
> >> provides.
> >
> > Care to describe here also how to configure things for ADMA and
> > instead of DMA? Just leave out dmas property?
> >
> That's correct. If dmas property is populated, then use external DMA.
> Otherwise use ADMA/SDMA depending on what you read from the CAPS register.
OK thanks.
Tony
* Faiz Abbas <[email protected]> [190218 13:46]:
> Hi Tony,
>
> On 16/02/19 1:32 AM, Tony Lindgren wrote:
> > * Faiz Abbas <[email protected]> [190215 19:17]:
> >> The following add driver patches for porting TI's am335x and am437x
> >> devices to the sdhci-omap driver.
> >>
> >> This involves adding external DMA support to sdhci (first 3 patches from
> >> Chunyan) plus some miscellaneous patches to take care of deviations of
> >> the controllers from the sdhci model.
> >
> > Good to see this happening :)
> >
> > I think am437x should have also the ADMA as it's mostly
> > omap4 based? See the old omap4 ADMA series at [0] below.
> >
> > Are you seeing any improvment in SDIO card read/write speeds
> > between external DMA and ADM BTW
>
> am437 is using dmaengine even in the old omap_hsmmc driver. I haven't
> tried to enable ADMA in am437x.
Right, it's probably safest to keep it that way at least initially
to avoid changing multiple things at once. Then ADMA can be
configured later on if needed.
Regards,
Tony
On Mon, Feb 18, 2019 at 07:11:32PM +0530, Faiz Abbas wrote:
> Hi Tony,
>
> On 16/02/19 1:37 AM, Tony Lindgren wrote:
> > * Faiz Abbas <[email protected]> [190215 19:18]:
> >> From: Chunyan Zhang <[email protected]>
> >>
> >> sdhci-omap can support both external dma controller via dmaengine
> >> framework as well as ADMA which standard SD host controller
> >> provides.
> >
> > Care to describe here also how to configure things for ADMA and
> > instead of DMA? Just leave out dmas property?
> >
>
> That's correct. If dmas property is populated, then use external DMA.
> Otherwise use ADMA/SDMA depending on what you read from the CAPS register.
Then the properties should be optional.
Rob
Hi Rob,
On 19/02/19 1:42 AM, Rob Herring wrote:
> On Mon, Feb 18, 2019 at 07:11:32PM +0530, Faiz Abbas wrote:
>> Hi Tony,
>>
>> On 16/02/19 1:37 AM, Tony Lindgren wrote:
>>> * Faiz Abbas <[email protected]> [190215 19:18]:
>>>> From: Chunyan Zhang <[email protected]>
>>>>
>>>> sdhci-omap can support both external dma controller via dmaengine
>>>> framework as well as ADMA which standard SD host controller
>>>> provides.
>>>
>>> Care to describe here also how to configure things for ADMA and
>>> instead of DMA? Just leave out dmas property?
>>>
>>
>> That's correct. If dmas property is populated, then use external DMA.
>> Otherwise use ADMA/SDMA depending on what you read from the CAPS register.
>
> Then the properties should be optional.
>
Will move it to optional in v3.
Thanks,
Faiz
On 15/02/19 9:20 PM, Faiz Abbas wrote:
> sdhci.c has two bottom halves implemented. A threaded_irq for handling
> card insert/remove operations and a tasklet for finishing mmc requests.
> With the addition of external dma support, dmaengine APIs need to
> terminate in non-atomic context before unmapping the dma buffers.
>
> To facilitate this, remove the finish_tasklet and move the call of
> sdhci_request_done() to the threaded_irq() callback.
The irq thread has a higher latency than the tasklet. The performance drop
is measurable on the system I tried:
Before:
# dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
1+0 records in
1+0 records out
1073741824 bytes (1.1 GB) copied, 4.44502 s, 242 MB/s
After:
# dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
1+0 records in
1+0 records out
1073741824 bytes (1.1 GB) copied, 4.50898 s, 238 MB/s
So we only want to resort to the thread for the error case.
Adrian,
On 25/02/19 1:47 PM, Adrian Hunter wrote:
> On 15/02/19 9:20 PM, Faiz Abbas wrote:
>> sdhci.c has two bottom halves implemented. A threaded_irq for handling
>> card insert/remove operations and a tasklet for finishing mmc requests.
>> With the addition of external dma support, dmaengine APIs need to
>> terminate in non-atomic context before unmapping the dma buffers.
>>
>> To facilitate this, remove the finish_tasklet and move the call of
>> sdhci_request_done() to the threaded_irq() callback.
>
> The irq thread has a higher latency than the tasklet. The performance drop
> is measurable on the system I tried:
>
> Before:
>
> # dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
> 1+0 records in
> 1+0 records out
> 1073741824 bytes (1.1 GB) copied, 4.44502 s, 242 MB/s
>
> After:
>
> # dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
> 1+0 records in
> 1+0 records out
> 1073741824 bytes (1.1 GB) copied, 4.50898 s, 238 MB/s
>
> So we only want to resort to the thread for the error case.
>
Sorry for the late response here, but this is about 1.6% decrease. I
tried out the same commands on a dra7xx board here (with about 5
consecutive dd of 1GB) and the average decrease was 0.3%. I believe you
will also find a lesser percentage change if you average over multiple
dd commands.
Is this really so significant that we have to maintain two different
bottom halves and keep having difficulty with adding APIs that can sleep?
Also I am not sure how to implement only the error handling part in the
threaded_irq. We need to enter sdhci_request_done() and get the current
mrq before we can check for error conditions like I've done in patch 2:
/* Terminate and synchronize dma in case of an error */
if (data && (mrq->cmd->error || data->error) &&
host->use_external_dma) {
struct dma_chan *chan = sdhci_external_dma_channel(host, data);
dmaengine_terminate_sync(chan);
}
On a related note, do we really need to protect everything in
sdhci_request_done() with spinlocks? In patch 2 I have only removed lock
for the terminate_sync() parts that I added but the whole
dma_unmap/dma_sync parts should be left unprotected IMO.
Thanks,
Faiz
On 6/03/19 12:00 PM, Faiz Abbas wrote:
> Adrian,
>
> On 25/02/19 1:47 PM, Adrian Hunter wrote:
>> On 15/02/19 9:20 PM, Faiz Abbas wrote:
>>> sdhci.c has two bottom halves implemented. A threaded_irq for handling
>>> card insert/remove operations and a tasklet for finishing mmc requests.
>>> With the addition of external dma support, dmaengine APIs need to
>>> terminate in non-atomic context before unmapping the dma buffers.
>>>
>>> To facilitate this, remove the finish_tasklet and move the call of
>>> sdhci_request_done() to the threaded_irq() callback.
>>
>> The irq thread has a higher latency than the tasklet. The performance drop
>> is measurable on the system I tried:
>>
>> Before:
>>
>> # dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
>> 1+0 records in
>> 1+0 records out
>> 1073741824 bytes (1.1 GB) copied, 4.44502 s, 242 MB/s
>>
>> After:
>>
>> # dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
>> 1+0 records in
>> 1+0 records out
>> 1073741824 bytes (1.1 GB) copied, 4.50898 s, 238 MB/s
>>
>> So we only want to resort to the thread for the error case.
>>
>
> Sorry for the late response here, but this is about 1.6% decrease. I
> tried out the same commands on a dra7xx board here (with about 5
> consecutive dd of 1GB) and the average decrease was 0.3%. I believe you
> will also find a lesser percentage change if you average over multiple
> dd commands.
>
> Is this really so significant that we have to maintain two different
> bottom halves and keep having difficulty with adding APIs that can sleep?
It is a performance drop that can be avoided, so it might as well be.
Splitting the success path from the failure path is common for I/O drivers
for similar reasons as here: the success path can be optimized whereas the
failure path potentially needs to sleep.
>
> Also I am not sure how to implement only the error handling part in the
> threaded_irq. We need to enter sdhci_request_done() and get the current
> mrq before we can check for error conditions like I've done in patch 2:
>
> /* Terminate and synchronize dma in case of an error */
> if (data && (mrq->cmd->error || data->error) &&
> host->use_external_dma) {
> struct dma_chan *chan = sdhci_external_dma_channel(host, data);
> dmaengine_terminate_sync(chan);
> }
>
> On a related note, do we really need to protect everything in
> sdhci_request_done() with spinlocks?
> In patch 2 I have only removed lock
> for the terminate_sync() parts that I added but the whole
> dma_unmap/dma_sync parts should be left unprotected IMO.
As it is written, synchronization is needed to stop the same mrq being
finished twice.
I suggest doing the dmaengine_terminate_sync() before calling
sdhci_request_done(). Perhaps you could record the channel that needs to be
sync'd and then do:
struct dma_chan *chan = READ_ONCE(host->sync_chan);
if (chan) {
dmaengine_terminate_sync(chan);
host->sync_chan = NULL;
}
Hi Adrian,
On 3/8/2019 7:06 PM, Adrian Hunter wrote:
> On 6/03/19 12:00 PM, Faiz Abbas wrote:
>> Adrian,
>>
>> On 25/02/19 1:47 PM, Adrian Hunter wrote:
>>> On 15/02/19 9:20 PM, Faiz Abbas wrote:
>>>> sdhci.c has two bottom halves implemented. A threaded_irq for handling
>>>> card insert/remove operations and a tasklet for finishing mmc requests.
>>>> With the addition of external dma support, dmaengine APIs need to
>>>> terminate in non-atomic context before unmapping the dma buffers.
>>>>
>>>> To facilitate this, remove the finish_tasklet and move the call of
>>>> sdhci_request_done() to the threaded_irq() callback.
>>>
>>> The irq thread has a higher latency than the tasklet. The performance drop
>>> is measurable on the system I tried:
>>>
>>> Before:
>>>
>>> # dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
>>> 1+0 records in
>>> 1+0 records out
>>> 1073741824 bytes (1.1 GB) copied, 4.44502 s, 242 MB/s
>>>
>>> After:
>>>
>>> # dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
>>> 1+0 records in
>>> 1+0 records out
>>> 1073741824 bytes (1.1 GB) copied, 4.50898 s, 238 MB/s
>>>
>>> So we only want to resort to the thread for the error case.
>>>
>>
>> Sorry for the late response here, but this is about 1.6% decrease. I
>> tried out the same commands on a dra7xx board here (with about 5
>> consecutive dd of 1GB) and the average decrease was 0.3%. I believe you
>> will also find a lesser percentage change if you average over multiple
>> dd commands.
>>
>> Is this really so significant that we have to maintain two different
>> bottom halves and keep having difficulty with adding APIs that can sleep?
>
> It is a performance drop that can be avoided, so it might as well be.
> Splitting the success path from the failure path is common for I/O drivers
> for similar reasons as here: the success path can be optimized whereas the
> failure path potentially needs to sleep.
Understood. You wanna keep the success path as fast as possible.
>
>>
>> Also I am not sure how to implement only the error handling part in the
>> threaded_irq. We need to enter sdhci_request_done() and get the current
>> mrq before we can check for error conditions like I've done in patch 2:
>>
>> /* Terminate and synchronize dma in case of an error */
>> if (data && (mrq->cmd->error || data->error) &&
>> host->use_external_dma) {
>> struct dma_chan *chan = sdhci_external_dma_channel(host, data);
>> dmaengine_terminate_sync(chan);
>> }
>>
>> On a related note, do we really need to protect everything in
>> sdhci_request_done() with spinlocks?
>> In patch 2 I have only removed lock
>> for the terminate_sync() parts that I added but the whole
>> dma_unmap/dma_sync parts should be left unprotected IMO.
>
> As it is written, synchronization is needed to stop the same mrq being
> finished twice.
>
> I suggest doing the dmaengine_terminate_sync() before calling
> sdhci_request_done(). Perhaps you could record the channel that needs to be
> sync'd and then do:
>
> struct dma_chan *chan = READ_ONCE(host->sync_chan);
>
> if (chan) {
> dmaengine_terminate_sync(chan);
> host->sync_chan = NULL;
> }
>
Will try this out and send next version.
Thanks,
Faiz
On 12.03.19 19:30, Rizvi, Mohammad Faiz Abbas wrote:
> Hi Adrian,
>
> On 3/8/2019 7:06 PM, Adrian Hunter wrote:
>> On 6/03/19 12:00 PM, Faiz Abbas wrote:
>>> Adrian,
>>>
>>> On 25/02/19 1:47 PM, Adrian Hunter wrote:
>>>> On 15/02/19 9:20 PM, Faiz Abbas wrote:
>>>>> sdhci.c has two bottom halves implemented. A threaded_irq for handling
>>>>> card insert/remove operations and a tasklet for finishing mmc requests.
>>>>> With the addition of external dma support, dmaengine APIs need to
>>>>> terminate in non-atomic context before unmapping the dma buffers.
>>>>>
>>>>> To facilitate this, remove the finish_tasklet and move the call of
>>>>> sdhci_request_done() to the threaded_irq() callback.
>>>>
>>>> The irq thread has a higher latency than the tasklet. The performance drop
>>>> is measurable on the system I tried:
>>>>
>>>> Before:
>>>>
>>>> # dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
>>>> 1+0 records in
>>>> 1+0 records out
>>>> 1073741824 bytes (1.1 GB) copied, 4.44502 s, 242 MB/s
>>>>
>>>> After:
>>>>
>>>> # dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
>>>> 1+0 records in
>>>> 1+0 records out
>>>> 1073741824 bytes (1.1 GB) copied, 4.50898 s, 238 MB/s
>>>>
>>>> So we only want to resort to the thread for the error case.
>>>>
>>>
>>> Sorry for the late response here, but this is about 1.6% decrease. I
>>> tried out the same commands on a dra7xx board here (with about 5
>>> consecutive dd of 1GB) and the average decrease was 0.3%. I believe you
>>> will also find a lesser percentage change if you average over multiple
>>> dd commands.
>>>
>>> Is this really so significant that we have to maintain two different
>>> bottom halves and keep having difficulty with adding APIs that can sleep?
>>
>> It is a performance drop that can be avoided, so it might as well be.
>> Splitting the success path from the failure path is common for I/O drivers
>> for similar reasons as here: the success path can be optimized whereas the
>> failure path potentially needs to sleep.
>
> Understood. You wanna keep the success path as fast as possible.
Sry, I've not completely followed this series, but I'd like to add 5c
It's good thing to get rid of tasklets hence RT Linux kernel is actively moving towards to LKML
and there everything handled in threads (even networking trying to get rid of softirqs).
Performance is pretty relative thing here - just try to run network traffic in parallel, and
there are no control over it comparing to threads. Now way to assign priority or pin to CPU.
--
Best regards,
grygorii
On Tue, Mar 12, 2019 at 6:32 PM Rizvi, Mohammad Faiz Abbas
<[email protected]> wrote:
> On 3/8/2019 7:06 PM, Adrian Hunter wrote:
> > On 6/03/19 12:00 PM, Faiz Abbas wrote:
> > It is a performance drop that can be avoided, so it might as well be.
> > Splitting the success path from the failure path is common for I/O drivers
> > for similar reasons as here: the success path can be optimized whereas the
> > failure path potentially needs to sleep.
>
> Understood. You wanna keep the success path as fast as possible.
I looked at the sdhci_request_done() function and found that almost all
of it is executed inside of a 'spin_lock_irqsave()', including the potentially
expensive dma_sync_single_for_cpu() calls.
This means there is very little benefit in using the tasklet in the first place,
it could just as well run in the hwirq context that triggered it.
The part that is actually run with interrupts enabled in the tasklet
is mmc_blk_cqe_req_done(), but other drivers also call this from
IRQ context.
Arnd
Hi,
On 14/03/19 4:45 PM, Grygorii Strashko wrote:
>
>
> On 12.03.19 19:30, Rizvi, Mohammad Faiz Abbas wrote:
>> Hi Adrian,
>>
>> On 3/8/2019 7:06 PM, Adrian Hunter wrote:
>>> On 6/03/19 12:00 PM, Faiz Abbas wrote:
>>>> Adrian,
>>>>
>>>> On 25/02/19 1:47 PM, Adrian Hunter wrote:
>>>>> On 15/02/19 9:20 PM, Faiz Abbas wrote:
>>>>>> sdhci.c has two bottom halves implemented. A threaded_irq for handling
>>>>>> card insert/remove operations and a tasklet for finishing mmc requests.
>>>>>> With the addition of external dma support, dmaengine APIs need to
>>>>>> terminate in non-atomic context before unmapping the dma buffers.
>>>>>>
>>>>>> To facilitate this, remove the finish_tasklet and move the call of
>>>>>> sdhci_request_done() to the threaded_irq() callback.
>>>>>
>>>>> The irq thread has a higher latency than the tasklet. The performance drop
>>>>> is measurable on the system I tried:
>>>>>
>>>>> Before:
>>>>>
>>>>> # dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
>>>>> 1+0 records in
>>>>> 1+0 records out
>>>>> 1073741824 bytes (1.1 GB) copied, 4.44502 s, 242 MB/s
>>>>>
>>>>> After:
>>>>>
>>>>> # dd if=/dev/mmcblk1 of=/dev/null bs=1G count=1 &
>>>>> 1+0 records in
>>>>> 1+0 records out
>>>>> 1073741824 bytes (1.1 GB) copied, 4.50898 s, 238 MB/s
>>>>>
>>>>> So we only want to resort to the thread for the error case.
>>>>>
>>>>
>>>> Sorry for the late response here, but this is about 1.6% decrease. I
>>>> tried out the same commands on a dra7xx board here (with about 5
>>>> consecutive dd of 1GB) and the average decrease was 0.3%. I believe you
>>>> will also find a lesser percentage change if you average over multiple
>>>> dd commands.
>>>>
>>>> Is this really so significant that we have to maintain two different
>>>> bottom halves and keep having difficulty with adding APIs that can sleep?
>>>
>>> It is a performance drop that can be avoided, so it might as well be.
>>> Splitting the success path from the failure path is common for I/O drivers
>>> for similar reasons as here: the success path can be optimized whereas the
>>> failure path potentially needs to sleep.
>>
>> Understood. You wanna keep the success path as fast as possible.
>
> Sry, I've not completely followed this series, but I'd like to add 5c
>
> It's good thing to get rid of tasklets hence RT Linux kernel is actively moving towards to LKML
> and there everything handled in threads (even networking trying to get rid of softirqs).
>
> Performance is pretty relative thing here - just try to run network traffic in parallel, and
> there are no control over it comparing to threads. Now way to assign priority or pin to CPU.
There is a 2007 LWN article(https://lwn.net/Articles/239633/) which
talks about removing tasklets altogether. I wonder what happened after that.
Thanks,
Faiz
+ Arnd, Grygorii
On Fri, 15 Feb 2019 at 20:17, Faiz Abbas <[email protected]> wrote:
>
> sdhci.c has two bottom halves implemented. A threaded_irq for handling
> card insert/remove operations and a tasklet for finishing mmc requests.
> With the addition of external dma support, dmaengine APIs need to
> terminate in non-atomic context before unmapping the dma buffers.
>
> To facilitate this, remove the finish_tasklet and move the call of
> sdhci_request_done() to the threaded_irq() callback. Also move the
> interrupt result variable to sdhci_host so it can be populated from
> anywhere inside the sdhci_irq handler.
>
> Signed-off-by: Faiz Abbas <[email protected]>
Adrian, I think it makes sense to apply this patch, even if there is
very minor negative impact throughput wise.
To me, it doesn't seems like MMC/SD/SDIO has good justification for
using tasklets, besides from the legacy point of view, of course.
Instead, I think we should try to move all mmc hosts into using
threaded IRQs.
So, what do you think? Can you overlook the throughput drop and
instead we can try to recover this on top with other optimizations?
Kind regards
Uffe
> ---
> drivers/mmc/host/sdhci.c | 43 +++++++++++++++-------------------------
> drivers/mmc/host/sdhci.h | 2 +-
> 2 files changed, 17 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> index eba9bcc92ad3..20ed09b896d7 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -1232,7 +1232,7 @@ static void __sdhci_finish_mrq(struct sdhci_host *host, struct mmc_request *mrq)
>
> WARN_ON(i >= SDHCI_MAX_MRQS);
>
> - tasklet_schedule(&host->finish_tasklet);
> + host->result = IRQ_WAKE_THREAD;
> }
>
> static void sdhci_finish_mrq(struct sdhci_host *host, struct mmc_request *mrq)
> @@ -2705,14 +2705,6 @@ static bool sdhci_request_done(struct sdhci_host *host)
> return false;
> }
>
> -static void sdhci_tasklet_finish(unsigned long param)
> -{
> - struct sdhci_host *host = (struct sdhci_host *)param;
> -
> - while (!sdhci_request_done(host))
> - ;
> -}
> -
> static void sdhci_timeout_timer(struct timer_list *t)
> {
> struct sdhci_host *host;
> @@ -2995,11 +2987,12 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>
> static irqreturn_t sdhci_irq(int irq, void *dev_id)
> {
> - irqreturn_t result = IRQ_NONE;
> struct sdhci_host *host = dev_id;
> u32 intmask, mask, unexpected = 0;
> int max_loops = 16;
>
> + host->result = IRQ_NONE;
> +
> spin_lock(&host->lock);
>
> if (host->runtime_suspended && !sdhci_sdio_irq_enabled(host)) {
> @@ -3009,7 +3002,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
>
> intmask = sdhci_readl(host, SDHCI_INT_STATUS);
> if (!intmask || intmask == 0xffffffff) {
> - result = IRQ_NONE;
> + host->result = IRQ_NONE;
> goto out;
> }
>
> @@ -3054,7 +3047,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
>
> host->thread_isr |= intmask & (SDHCI_INT_CARD_INSERT |
> SDHCI_INT_CARD_REMOVE);
> - result = IRQ_WAKE_THREAD;
> + host->result = IRQ_WAKE_THREAD;
> }
>
> if (intmask & SDHCI_INT_CMD_MASK)
> @@ -3074,7 +3067,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
> (host->ier & SDHCI_INT_CARD_INT)) {
> sdhci_enable_sdio_irq_nolock(host, false);
> host->thread_isr |= SDHCI_INT_CARD_INT;
> - result = IRQ_WAKE_THREAD;
> + host->result = IRQ_WAKE_THREAD;
> }
>
> intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE |
> @@ -3087,8 +3080,8 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
> sdhci_writel(host, intmask, SDHCI_INT_STATUS);
> }
> cont:
> - if (result == IRQ_NONE)
> - result = IRQ_HANDLED;
> + if (host->result == IRQ_NONE)
> + host->result = IRQ_HANDLED;
>
> intmask = sdhci_readl(host, SDHCI_INT_STATUS);
> } while (intmask && --max_loops);
> @@ -3101,7 +3094,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
> sdhci_dumpregs(host);
> }
>
> - return result;
> + return host->result;
> }
>
> static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
> @@ -3131,6 +3124,12 @@ static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
> spin_unlock_irqrestore(&host->lock, flags);
> }
>
> + if (!isr) {
> + do {
> + isr = !sdhci_request_done(host);
> + } while (isr);
> + }
> +
> return isr ? IRQ_HANDLED : IRQ_NONE;
> }
>
> @@ -4212,12 +4211,6 @@ int __sdhci_add_host(struct sdhci_host *host)
> struct mmc_host *mmc = host->mmc;
> int ret;
>
> - /*
> - * Init tasklets.
> - */
> - tasklet_init(&host->finish_tasklet,
> - sdhci_tasklet_finish, (unsigned long)host);
> -
> timer_setup(&host->timer, sdhci_timeout_timer, 0);
> timer_setup(&host->data_timer, sdhci_timeout_data_timer, 0);
>
> @@ -4230,7 +4223,7 @@ int __sdhci_add_host(struct sdhci_host *host)
> if (ret) {
> pr_err("%s: Failed to request IRQ %d: %d\n",
> mmc_hostname(mmc), host->irq, ret);
> - goto untasklet;
> + return ret;
> }
>
> ret = sdhci_led_register(host);
> @@ -4263,8 +4256,6 @@ int __sdhci_add_host(struct sdhci_host *host)
> sdhci_writel(host, 0, SDHCI_INT_ENABLE);
> sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
> free_irq(host->irq, host);
> -untasklet:
> - tasklet_kill(&host->finish_tasklet);
>
> return ret;
> }
> @@ -4326,8 +4317,6 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
> del_timer_sync(&host->timer);
> del_timer_sync(&host->data_timer);
>
> - tasklet_kill(&host->finish_tasklet);
> -
> if (!IS_ERR(mmc->supply.vqmmc))
> regulator_disable(mmc->supply.vqmmc);
>
> diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
> index 6cc9a3c2ac66..624d5aa01995 100644
> --- a/drivers/mmc/host/sdhci.h
> +++ b/drivers/mmc/host/sdhci.h
> @@ -554,7 +554,7 @@ struct sdhci_host {
>
> unsigned int desc_sz; /* ADMA descriptor size */
>
> - struct tasklet_struct finish_tasklet; /* Tasklet structures */
> + irqreturn_t result; /* Result of IRQ handler */
>
> struct timer_list timer; /* Timer for timeouts */
> struct timer_list data_timer; /* Timer for data timeouts */
> --
> 2.19.2
>
On 18/03/19 11:33 AM, Ulf Hansson wrote:
> + Arnd, Grygorii
>
> On Fri, 15 Feb 2019 at 20:17, Faiz Abbas <[email protected]> wrote:
>>
>> sdhci.c has two bottom halves implemented. A threaded_irq for handling
>> card insert/remove operations and a tasklet for finishing mmc requests.
>> With the addition of external dma support, dmaengine APIs need to
>> terminate in non-atomic context before unmapping the dma buffers.
>>
>> To facilitate this, remove the finish_tasklet and move the call of
>> sdhci_request_done() to the threaded_irq() callback. Also move the
>> interrupt result variable to sdhci_host so it can be populated from
>> anywhere inside the sdhci_irq handler.
>>
>> Signed-off-by: Faiz Abbas <[email protected]>
>
> Adrian, I think it makes sense to apply this patch, even if there is
> very minor negative impact throughput wise.
>
> To me, it doesn't seems like MMC/SD/SDIO has good justification for
> using tasklets, besides from the legacy point of view, of course.
> Instead, I think we should try to move all mmc hosts into using
> threaded IRQs.
>
> So, what do you think? Can you overlook the throughput drop and
> instead we can try to recover this on top with other optimizations?
I tend to favour good results as expressed here:
https://lkml.org/lkml/2007/6/22/360
So I want to do optimization first.
But performance is not the only problem with the patch. Give me a few
days and I will see what I can come up with.
>
> Kind regards
> Uffe
>
>> ---
>> drivers/mmc/host/sdhci.c | 43 +++++++++++++++-------------------------
>> drivers/mmc/host/sdhci.h | 2 +-
>> 2 files changed, 17 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>> index eba9bcc92ad3..20ed09b896d7 100644
>> --- a/drivers/mmc/host/sdhci.c
>> +++ b/drivers/mmc/host/sdhci.c
>> @@ -1232,7 +1232,7 @@ static void __sdhci_finish_mrq(struct sdhci_host *host, struct mmc_request *mrq)
>>
>> WARN_ON(i >= SDHCI_MAX_MRQS);
>>
>> - tasklet_schedule(&host->finish_tasklet);
>> + host->result = IRQ_WAKE_THREAD;
>> }
>>
>> static void sdhci_finish_mrq(struct sdhci_host *host, struct mmc_request *mrq)
>> @@ -2705,14 +2705,6 @@ static bool sdhci_request_done(struct sdhci_host *host)
>> return false;
>> }
>>
>> -static void sdhci_tasklet_finish(unsigned long param)
>> -{
>> - struct sdhci_host *host = (struct sdhci_host *)param;
>> -
>> - while (!sdhci_request_done(host))
>> - ;
>> -}
>> -
>> static void sdhci_timeout_timer(struct timer_list *t)
>> {
>> struct sdhci_host *host;
>> @@ -2995,11 +2987,12 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>
>> static irqreturn_t sdhci_irq(int irq, void *dev_id)
>> {
>> - irqreturn_t result = IRQ_NONE;
>> struct sdhci_host *host = dev_id;
>> u32 intmask, mask, unexpected = 0;
>> int max_loops = 16;
>>
>> + host->result = IRQ_NONE;
>> +
>> spin_lock(&host->lock);
>>
>> if (host->runtime_suspended && !sdhci_sdio_irq_enabled(host)) {
>> @@ -3009,7 +3002,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
>>
>> intmask = sdhci_readl(host, SDHCI_INT_STATUS);
>> if (!intmask || intmask == 0xffffffff) {
>> - result = IRQ_NONE;
>> + host->result = IRQ_NONE;
>> goto out;
>> }
>>
>> @@ -3054,7 +3047,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
>>
>> host->thread_isr |= intmask & (SDHCI_INT_CARD_INSERT |
>> SDHCI_INT_CARD_REMOVE);
>> - result = IRQ_WAKE_THREAD;
>> + host->result = IRQ_WAKE_THREAD;
>> }
>>
>> if (intmask & SDHCI_INT_CMD_MASK)
>> @@ -3074,7 +3067,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
>> (host->ier & SDHCI_INT_CARD_INT)) {
>> sdhci_enable_sdio_irq_nolock(host, false);
>> host->thread_isr |= SDHCI_INT_CARD_INT;
>> - result = IRQ_WAKE_THREAD;
>> + host->result = IRQ_WAKE_THREAD;
>> }
>>
>> intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE |
>> @@ -3087,8 +3080,8 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
>> sdhci_writel(host, intmask, SDHCI_INT_STATUS);
>> }
>> cont:
>> - if (result == IRQ_NONE)
>> - result = IRQ_HANDLED;
>> + if (host->result == IRQ_NONE)
>> + host->result = IRQ_HANDLED;
>>
>> intmask = sdhci_readl(host, SDHCI_INT_STATUS);
>> } while (intmask && --max_loops);
>> @@ -3101,7 +3094,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
>> sdhci_dumpregs(host);
>> }
>>
>> - return result;
>> + return host->result;
>> }
>>
>> static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
>> @@ -3131,6 +3124,12 @@ static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
>> spin_unlock_irqrestore(&host->lock, flags);
>> }
>>
>> + if (!isr) {
>> + do {
>> + isr = !sdhci_request_done(host);
>> + } while (isr);
>> + }
>> +
>> return isr ? IRQ_HANDLED : IRQ_NONE;
>> }
>>
>> @@ -4212,12 +4211,6 @@ int __sdhci_add_host(struct sdhci_host *host)
>> struct mmc_host *mmc = host->mmc;
>> int ret;
>>
>> - /*
>> - * Init tasklets.
>> - */
>> - tasklet_init(&host->finish_tasklet,
>> - sdhci_tasklet_finish, (unsigned long)host);
>> -
>> timer_setup(&host->timer, sdhci_timeout_timer, 0);
>> timer_setup(&host->data_timer, sdhci_timeout_data_timer, 0);
>>
>> @@ -4230,7 +4223,7 @@ int __sdhci_add_host(struct sdhci_host *host)
>> if (ret) {
>> pr_err("%s: Failed to request IRQ %d: %d\n",
>> mmc_hostname(mmc), host->irq, ret);
>> - goto untasklet;
>> + return ret;
>> }
>>
>> ret = sdhci_led_register(host);
>> @@ -4263,8 +4256,6 @@ int __sdhci_add_host(struct sdhci_host *host)
>> sdhci_writel(host, 0, SDHCI_INT_ENABLE);
>> sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
>> free_irq(host->irq, host);
>> -untasklet:
>> - tasklet_kill(&host->finish_tasklet);
>>
>> return ret;
>> }
>> @@ -4326,8 +4317,6 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
>> del_timer_sync(&host->timer);
>> del_timer_sync(&host->data_timer);
>>
>> - tasklet_kill(&host->finish_tasklet);
>> -
>> if (!IS_ERR(mmc->supply.vqmmc))
>> regulator_disable(mmc->supply.vqmmc);
>>
>> diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
>> index 6cc9a3c2ac66..624d5aa01995 100644
>> --- a/drivers/mmc/host/sdhci.h
>> +++ b/drivers/mmc/host/sdhci.h
>> @@ -554,7 +554,7 @@ struct sdhci_host {
>>
>> unsigned int desc_sz; /* ADMA descriptor size */
>>
>> - struct tasklet_struct finish_tasklet; /* Tasklet structures */
>> + irqreturn_t result; /* Result of IRQ handler */
>>
>> struct timer_list timer; /* Timer for timeouts */
>> struct timer_list data_timer; /* Timer for data timeouts */
>> --
>> 2.19.2
>>
>
Hi Adrian,
On 26/03/19 1:03 PM, Adrian Hunter wrote:
> On 18/03/19 11:33 AM, Ulf Hansson wrote:
>> + Arnd, Grygorii
>>
>> On Fri, 15 Feb 2019 at 20:17, Faiz Abbas <[email protected]> wrote:
>>>
>>> sdhci.c has two bottom halves implemented. A threaded_irq for handling
>>> card insert/remove operations and a tasklet for finishing mmc requests.
>>> With the addition of external dma support, dmaengine APIs need to
>>> terminate in non-atomic context before unmapping the dma buffers.
>>>
>>> To facilitate this, remove the finish_tasklet and move the call of
>>> sdhci_request_done() to the threaded_irq() callback. Also move the
>>> interrupt result variable to sdhci_host so it can be populated from
>>> anywhere inside the sdhci_irq handler.
>>>
>>> Signed-off-by: Faiz Abbas <[email protected]>
>>
>> Adrian, I think it makes sense to apply this patch, even if there is
>> very minor negative impact throughput wise.
>>
>> To me, it doesn't seems like MMC/SD/SDIO has good justification for
>> using tasklets, besides from the legacy point of view, of course.
>> Instead, I think we should try to move all mmc hosts into using
>> threaded IRQs.
>>
>> So, what do you think? Can you overlook the throughput drop and
>> instead we can try to recover this on top with other optimizations?
>
> I tend to favour good results as expressed here:
>
> https://lkml.org/lkml/2007/6/22/360
>
> So I want to do optimization first.
>
> But performance is not the only problem with the patch. Give me a few
> days and I will see what I can come up with.
>
Gentle ping on this.
Thanks,
Faiz
On 2/04/19 10:59 AM, Faiz Abbas wrote:
> Hi Adrian,
>
> On 26/03/19 1:03 PM, Adrian Hunter wrote:
>> On 18/03/19 11:33 AM, Ulf Hansson wrote:
>>> + Arnd, Grygorii
>>>
>>> On Fri, 15 Feb 2019 at 20:17, Faiz Abbas <[email protected]> wrote:
>>>>
>>>> sdhci.c has two bottom halves implemented. A threaded_irq for handling
>>>> card insert/remove operations and a tasklet for finishing mmc requests.
>>>> With the addition of external dma support, dmaengine APIs need to
>>>> terminate in non-atomic context before unmapping the dma buffers.
>>>>
>>>> To facilitate this, remove the finish_tasklet and move the call of
>>>> sdhci_request_done() to the threaded_irq() callback. Also move the
>>>> interrupt result variable to sdhci_host so it can be populated from
>>>> anywhere inside the sdhci_irq handler.
>>>>
>>>> Signed-off-by: Faiz Abbas <[email protected]>
>>>
>>> Adrian, I think it makes sense to apply this patch, even if there is
>>> very minor negative impact throughput wise.
>>>
>>> To me, it doesn't seems like MMC/SD/SDIO has good justification for
>>> using tasklets, besides from the legacy point of view, of course.
>>> Instead, I think we should try to move all mmc hosts into using
>>> threaded IRQs.
>>>
>>> So, what do you think? Can you overlook the throughput drop and
>>> instead we can try to recover this on top with other optimizations?
>>
>> I tend to favour good results as expressed here:
>>
>> https://lkml.org/lkml/2007/6/22/360
>>
>> So I want to do optimization first.
>>
>> But performance is not the only problem with the patch. Give me a few
>> days and I will see what I can come up with.
>>
>
> Gentle ping on this.
Working on it :-)