2022-01-21 22:27:10

by Shaik Sajida Bhanu

[permalink] [raw]
Subject: [PATCH V3 0/4] mmc: add error statistics for eMMC and SD card

Changes since V2:
-Removed userspace error stats clear debug fs entry as suggested by Adrain Hunter.
-Split patch into 4 patches
[PATCH V3 1/4] : sdhci driver
[PATCH V3 2/4] : debug fs entries
[PATCH V3 3/4] : core driver
[PATCH V3 4/4] : cqhci driver
-Used for loop to print error messages instead of using printf
statements for all error messages as suggested by Adrain Hunter.
-Introduced one flag to enable error stats feature, if any other
client wants to use this feature, they need to enable that flag.
-Moved reset command timeout error statement to card init flow
as suggested by Adrain Hunter.

Changes since V1:
-Removed sysfs entry for eMMC and SD card error statistics and added
debugfs entry as suggested by Adrian Hunter and Ulf Hansson.

Shaik Sajida Bhanu (4):
mmc: sdhci: Capture eMMC and SD card errors
mmc: debugfs: Add debug fs entry for mmc driver
mmc: core: Capture eMMC and SD card errors
mmc: cqhci: Capture eMMC and SD card errors

drivers/mmc/core/core.c | 8 +++++
drivers/mmc/core/debugfs.c | 81 +++++++++++++++++++++++++++++++++++++++++++
drivers/mmc/core/queue.c | 3 ++
drivers/mmc/host/cqhci-core.c | 9 ++++-
drivers/mmc/host/sdhci-msm.c | 3 ++
drivers/mmc/host/sdhci.c | 72 +++++++++++++++++++++++++++++++-------
include/linux/mmc/host.h | 31 +++++++++++++++++
7 files changed, 194 insertions(+), 13 deletions(-)

--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation


2022-01-21 22:27:13

by Shaik Sajida Bhanu

[permalink] [raw]
Subject: [PATCH V3 4/4] mmc: cqhci: Capture eMMC and SD card errors

Add changes to capture eMMC and SD card errors.
This is useful for debug and testing.

Signed-off-by: Shaik Sajida Bhanu <[email protected]>
Signed-off-by: Liangliang Lu <[email protected]>
Signed-off-by: Sayali Lokhande <[email protected]>
Signed-off-by: Bao D. Nguyen <[email protected]>
Signed-off-by: Ram Prakash Gupta <[email protected]>
---
drivers/mmc/host/cqhci-core.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c
index b0d30c3..2908d30 100644
--- a/drivers/mmc/host/cqhci-core.c
+++ b/drivers/mmc/host/cqhci-core.c
@@ -822,8 +822,15 @@ irqreturn_t cqhci_irq(struct mmc_host *mmc, u32 intmask, int cmd_error,
pr_debug("%s: cqhci: IRQ status: 0x%08x\n", mmc_hostname(mmc), status);

if ((status & (CQHCI_IS_RED | CQHCI_IS_GCE | CQHCI_IS_ICCE)) ||
- cmd_error || data_error)
+ cmd_error || data_error) {
+ if ((status & CQHCI_IS_RED) && mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(mmc, MMC_ERR_CMDQ_RED);
+ if ((status & CQHCI_IS_GCE) && (mmc->err_stats_enabled))
+ mmc_debugfs_err_stats_inc(mmc, MMC_ERR_CMDQ_GCE);
+ if ((status & CQHCI_IS_ICCE) && mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(mmc, MMC_ERR_CMDQ_ICCE);
cqhci_error_irq(mmc, status, cmd_error, data_error);
+ }

if (status & CQHCI_IS_TCC) {
/* read TCN and complete the request */
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

2022-01-21 22:27:19

by Shaik Sajida Bhanu

[permalink] [raw]
Subject: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

Add changes to capture eMMC and SD card errors.
This is useful for debug and testing.

Signed-off-by: Shaik Sajida Bhanu <[email protected]>
Signed-off-by: Liangliang Lu <[email protected]>
Signed-off-by: Sayali Lokhande <[email protected]>
Signed-off-by: Bao D. Nguyen <[email protected]>
---
drivers/mmc/host/sdhci-msm.c | 3 ++
drivers/mmc/host/sdhci.c | 72 ++++++++++++++++++++++++++++++++++++--------
include/linux/mmc/host.h | 31 +++++++++++++++++++
3 files changed, 94 insertions(+), 12 deletions(-)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 50c71e0..309eb7b 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -128,6 +128,8 @@

#define MSM_MMC_AUTOSUSPEND_DELAY_MS 50

+#define MSM_MMC_ERR_STATS_ENABLE 1
+
/* Timeout value to avoid infinite waiting for pwr_irq */
#define MSM_PWR_IRQ_TIMEOUT_MS 5000

@@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
if (ret)
goto pm_runtime_disable;

+ host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
pm_runtime_mark_last_busy(&pdev->dev);
pm_runtime_put_autosuspend(&pdev->dev);

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 07c6da1..74b356e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
if (host->ops->dump_vendor_regs)
host->ops->dump_vendor_regs(host);

+ if (host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_enable(host->mmc);
SDHCI_DUMP("============================================\n");
}
EXPORT_SYMBOL_GPL(sdhci_dumpregs);
@@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
spin_lock_irqsave(&host->lock, flags);

if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
mmc_hostname(host->mmc));
sdhci_dumpregs(host);
@@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct timer_list *t)

if (host->data || host->data_cmd ||
(host->cmd && sdhci_data_line_cmd(host->cmd))) {
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
pr_err("%s: Timeout waiting for hardware interrupt.\n",
mmc_hostname(host->mmc));
sdhci_dumpregs(host);
@@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)

if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
- if (intmask & SDHCI_INT_TIMEOUT)
+ if (intmask & SDHCI_INT_TIMEOUT) {
host->cmd->error = -ETIMEDOUT;
- else
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
+ } else {
host->cmd->error = -EILSEQ;
-
+ if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+ host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
+ }
+ }
/* Treat data command CRC error the same as data CRC error */
if (host->cmd->data &&
(intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
@@ -3265,6 +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
-ETIMEDOUT :
-EILSEQ;
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);

if (sdhci_auto_cmd23(host, mrq)) {
mrq->sbc->error = err;
@@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
if (intmask & SDHCI_INT_DATA_TIMEOUT) {
host->data_cmd = NULL;
data_cmd->error = -ETIMEDOUT;
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
__sdhci_finish_mrq(host, data_cmd->mrq);
return;
}
@@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
return;
}

- if (intmask & SDHCI_INT_DATA_TIMEOUT)
+ if (intmask & SDHCI_INT_DATA_TIMEOUT) {
host->data->error = -ETIMEDOUT;
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
+ }
else if (intmask & SDHCI_INT_DATA_END_BIT)
host->data->error = -EILSEQ;
else if ((intmask & SDHCI_INT_DATA_CRC) &&
SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
- != MMC_BUS_TEST_R)
+ != MMC_BUS_TEST_R) {
host->data->error = -EILSEQ;
+ if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+ host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
+ }
+ }
else if (intmask & SDHCI_INT_ADMA_ERROR) {
pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
intmask);
sdhci_adma_show_error(host);
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
host->data->error = -EIO;
if (host->ops->adma_workaround)
host->ops->adma_workaround(host, intmask);
@@ -3905,20 +3933,40 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
if (!host->cqe_on)
return false;

- if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
+ if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) {
*cmd_error = -EILSEQ;
- else if (intmask & SDHCI_INT_TIMEOUT)
+ if (intmask & SDHCI_INT_CRC) {
+ if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+ host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
+ }
+ }
+ } else if (intmask & SDHCI_INT_TIMEOUT) {
*cmd_error = -ETIMEDOUT;
- else
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
+ } else
*cmd_error = 0;

- if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
+ if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
*data_error = -EILSEQ;
- else if (intmask & SDHCI_INT_DATA_TIMEOUT)
+ if (intmask & SDHCI_INT_DATA_CRC) {
+ if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+ host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
+ }
+ }
+ } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
*data_error = -ETIMEDOUT;
- else if (intmask & SDHCI_INT_ADMA_ERROR)
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
+ } else if (intmask & SDHCI_INT_ADMA_ERROR) {
*data_error = -EIO;
- else
+ if (host->mmc && host->mmc->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
+ } else
*data_error = 0;

/* Clear selected interrupts. */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 7afb57c..883b50b 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -93,6 +93,23 @@ struct mmc_clk_phase_map {

struct mmc_host;

+enum mmc_err_stat {
+ MMC_ERR_CMD_TIMEOUT,
+ MMC_ERR_CMD_CRC,
+ MMC_ERR_DAT_TIMEOUT,
+ MMC_ERR_DAT_CRC,
+ MMC_ERR_AUTO_CMD,
+ MMC_ERR_ADMA,
+ MMC_ERR_TUNING,
+ MMC_ERR_CMDQ_RED,
+ MMC_ERR_CMDQ_GCE,
+ MMC_ERR_CMDQ_ICCE,
+ MMC_ERR_REQ_TIMEOUT,
+ MMC_ERR_CMDQ_REQ_TIMEOUT,
+ MMC_ERR_ICE_CFG,
+ MMC_ERR_MAX,
+};
+
struct mmc_host_ops {
/*
* It is optional for the host to implement pre_req and post_req in
@@ -500,6 +517,9 @@ struct mmc_host {

/* Host Software Queue support */
bool hsq_enabled;
+ u32 err_stats[MMC_ERR_MAX];
+ bool err_stats_enabled;
+ bool err_state;

unsigned long private[] ____cacheline_aligned;
};
@@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
}

+static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
+{
+ mmc->err_state = true;
+}
+
+static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
+ enum mmc_err_stat stat) {
+
+ mmc->err_stats[stat] += 1;
+}
+
int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

2022-01-21 22:28:18

by Shaik Sajida Bhanu

[permalink] [raw]
Subject: [PATCH V3 3/4] mmc: core: Capture eMMC and SD card errors

Add changes to capture eMMC and SD card errors.
This is useful for debug and testing.

Signed-off-by: Shaik Sajida Bhanu <[email protected]>
Signed-off-by: Liangliang Lu <[email protected]>
Signed-off-by: Sayali Lokhande <[email protected]>
Signed-off-by: Bao D. Nguyen <[email protected]>
Signed-off-by: Ram Prakash Gupta <[email protected]>
---
drivers/mmc/core/core.c | 8 ++++++++
drivers/mmc/core/queue.c | 3 +++
2 files changed, 11 insertions(+)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 368f104..c586d69 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2242,6 +2242,14 @@ void mmc_rescan(struct work_struct *work)
if (freqs[i] <= host->f_min)
break;
}
+
+ /*
+ * Ignore the command timeout errors observed during
+ * the card init as those are excepted.
+ */
+
+ if (host && host->err_stats_enabled)
+ host->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
mmc_release_host(host);

out:
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index c69b2d9..7dc9dfb 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -100,6 +100,9 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
bool recovery_needed = false;

+ if (host->err_stats_enabled)
+ mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
+
switch (issue_type) {
case MMC_ISSUE_ASYNC:
case MMC_ISSUE_DCMD:
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

2022-01-21 22:28:25

by Shaik Sajida Bhanu

[permalink] [raw]
Subject: [PATCH V3 2/4] mmc: debugfs: Add debug fs entry for mmc driver

Add debug fs entry to query eMMC and SD card errors statistics

Signed-off-by: Shaik Sajida Bhanu <[email protected]>
Signed-off-by: Liangliang Lu <[email protected]>
Signed-off-by: Sayali Lokhande <[email protected]>
Signed-off-by: Bao D. Nguyen <[email protected]>
---
drivers/mmc/core/debugfs.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 81 insertions(+)

diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 3fdbc80..f4cb594 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -223,6 +223,82 @@ static int mmc_clock_opt_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
"%llu\n");

+static int mmc_err_state_get(void *data, u64 *val)
+{
+ struct mmc_host *host = data;
+
+ if (!host)
+ return -EINVAL;
+
+ *val = host->err_state ? 1 : 0;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get, NULL, "%llu\n");
+
+static int mmc_err_stats_show(struct seq_file *file, void *data)
+{
+ struct mmc_host *host = (struct mmc_host *)file->private;
+ const char *desc[MMC_ERR_MAX] = {
+ [MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
+ [MMC_ERR_CMD_CRC] = "Command CRC Errors Occurred",
+ [MMC_ERR_DAT_TIMEOUT] = "Data Timeout Occurred",
+ [MMC_ERR_DAT_CRC] = "Data CRC Errors Occurred",
+ [MMC_ERR_AUTO_CMD] = "Auto-Cmd Error Occurred",
+ [MMC_ERR_ADMA] = "ADMA Error Occurred",
+ [MMC_ERR_TUNING] = "Tuning Error Occurred",
+ [MMC_ERR_CMDQ_RED] = "CMDQ RED Errors",
+ [MMC_ERR_CMDQ_GCE] = "CMDQ GCE Errors",
+ [MMC_ERR_CMDQ_ICCE] = "CMDQ ICCE Errors",
+ [MMC_ERR_REQ_TIMEOUT] = "Request Timedout",
+ [MMC_ERR_CMDQ_REQ_TIMEOUT] = "CMDQ Request Timedout",
+ [MMC_ERR_ICE_CFG] = "ICE Config Errors",
+ };
+ int i;
+
+ if (!host)
+ return -EINVAL;
+
+ if (!host->err_stats_enabled) {
+ seq_printf(file, "Not supported by driver\n");
+ return 0;
+ }
+
+ for (i = 0; i < MMC_ERR_MAX; i++) {
+ if (desc[i])
+ seq_printf(file, "# %s:\t %d\n",
+ desc[i], host->err_stats[i]);
+ }
+
+ return 0;
+}
+
+static int mmc_err_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mmc_err_stats_show, inode->i_private);
+}
+
+static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct mmc_host *host = filp->f_mapping->host->i_private;
+
+ if (!host)
+ return -EINVAL;
+
+ pr_debug("%s: Resetting MMC error statistics\n", __func__);
+ memset(host->err_stats, 0, sizeof(host->err_stats));
+
+ return cnt;
+}
+
+static const struct file_operations mmc_err_stats_fops = {
+ .open = mmc_err_stats_open,
+ .read = seq_read,
+ .write = mmc_err_stats_write,
+};
+
void mmc_add_host_debugfs(struct mmc_host *host)
{
struct dentry *root;
@@ -236,6 +312,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
&mmc_clock_fops);

+ debugfs_create_file("err_state", 0600, root, host,
+ &mmc_err_state);
+ debugfs_create_file("err_stats", 0600, root, host,
+ &mmc_err_stats_fops);
+
#ifdef CONFIG_FAIL_MMC_REQUEST
if (fail_request)
setup_fault_attr(&fail_default_attr, fail_request);
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

2022-01-22 00:35:48

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
>
> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
> Signed-off-by: Liangliang Lu <[email protected]>
> Signed-off-by: Sayali Lokhande <[email protected]>
> Signed-off-by: Bao D. Nguyen <[email protected]>
> ---
> drivers/mmc/host/sdhci-msm.c | 3 ++
> drivers/mmc/host/sdhci.c | 72 ++++++++++++++++++++++++++++++++++++--------
> include/linux/mmc/host.h | 31 +++++++++++++++++++
> 3 files changed, 94 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
> index 50c71e0..309eb7b 100644
> --- a/drivers/mmc/host/sdhci-msm.c
> +++ b/drivers/mmc/host/sdhci-msm.c
> @@ -128,6 +128,8 @@
>
> #define MSM_MMC_AUTOSUSPEND_DELAY_MS 50
>
> +#define MSM_MMC_ERR_STATS_ENABLE 1
> +
> /* Timeout value to avoid infinite waiting for pwr_irq */
> #define MSM_PWR_IRQ_TIMEOUT_MS 5000
>
> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
> if (ret)
> goto pm_runtime_disable;
>
> + host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;

Please remove this. SDHCI will enable error stats.

> pm_runtime_mark_last_busy(&pdev->dev);
> pm_runtime_put_autosuspend(&pdev->dev);
>
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> index 07c6da1..74b356e 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
> if (host->ops->dump_vendor_regs)
> host->ops->dump_vendor_regs(host);
>
> + if (host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_enable(host->mmc);

Please move this to sdhci_setup_host() and call it unconditionally i.e. just

mmc_debugfs_err_stats_enable(host->mmc);

> SDHCI_DUMP("============================================\n");
> }
> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
> spin_lock_irqsave(&host->lock, flags);
>
> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);

Please remove the 'if ()', i.e. just make it, unconditionally:

mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);

Same for other calls to mmc_debugfs_err_stats_inc()

> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
>
> if (host->data || host->data_cmd ||
> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>
> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> - if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_TIMEOUT) {
> host->cmd->error = -ETIMEDOUT;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else {
> host->cmd->error = -EILSEQ;
> -
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> + }
> /* Treat data command CRC error the same as data CRC error */
> if (host->cmd->data &&
> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
> @@ -3265,6 +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
> int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
> -ETIMEDOUT :
> -EILSEQ;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>
> if (sdhci_auto_cmd23(host, mrq)) {
> mrq->sbc->error = err;
> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data_cmd = NULL;
> data_cmd->error = -ETIMEDOUT;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> __sdhci_finish_mrq(host, data_cmd->mrq);
> return;
> }
> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> return;
> }
>
> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data->error = -ETIMEDOUT;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + }
> else if (intmask & SDHCI_INT_DATA_END_BIT)
> host->data->error = -EILSEQ;
> else if ((intmask & SDHCI_INT_DATA_CRC) &&
> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> - != MMC_BUS_TEST_R)
> + != MMC_BUS_TEST_R) {
> host->data->error = -EILSEQ;
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> + }
> else if (intmask & SDHCI_INT_ADMA_ERROR) {
> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
> intmask);
> sdhci_adma_show_error(host);
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> host->data->error = -EIO;
> if (host->ops->adma_workaround)
> host->ops->adma_workaround(host, intmask);
> @@ -3905,20 +3933,40 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
> if (!host->cqe_on)
> return false;
>
> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) {
> *cmd_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> + }
> + } else if (intmask & SDHCI_INT_TIMEOUT) {
> *cmd_error = -ETIMEDOUT;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else
> *cmd_error = 0;
>
> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
> *data_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> + }
> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> *data_error = -ETIMEDOUT;
> - else if (intmask & SDHCI_INT_ADMA_ERROR)
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
> *data_error = -EIO;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> + } else
> *data_error = 0;
>
> /* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h

Changes to host.h are core changes and belong in patch 3,
which should be the first patch.

> index 7afb57c..883b50b 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>
> struct mmc_host;
>
> +enum mmc_err_stat {
> + MMC_ERR_CMD_TIMEOUT,
> + MMC_ERR_CMD_CRC,
> + MMC_ERR_DAT_TIMEOUT,
> + MMC_ERR_DAT_CRC,
> + MMC_ERR_AUTO_CMD,
> + MMC_ERR_ADMA,
> + MMC_ERR_TUNING,
> + MMC_ERR_CMDQ_RED,
> + MMC_ERR_CMDQ_GCE,
> + MMC_ERR_CMDQ_ICCE,
> + MMC_ERR_REQ_TIMEOUT,
> + MMC_ERR_CMDQ_REQ_TIMEOUT,
> + MMC_ERR_ICE_CFG,
> + MMC_ERR_MAX,
> +};
> +
> struct mmc_host_ops {
> /*
> * It is optional for the host to implement pre_req and post_req in
> @@ -500,6 +517,9 @@ struct mmc_host {
>
> /* Host Software Queue support */
> bool hsq_enabled;
> + u32 err_stats[MMC_ERR_MAX];
> + bool err_stats_enabled;
> + bool err_state;

Please drop err_state for now

>
> unsigned long private[] ____cacheline_aligned;
> };
> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
> }
>
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)

Please use 'host' as the mmc_host parameter in this file.

> +{
> + mmc->err_state = true;

Let's make this:

host->err_stats_enabled = true;

> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> + enum mmc_err_stat stat) {
> +

Please remove blank line here

> + mmc->err_stats[stat] += 1;
> +}
> +
> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
> int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
> int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
>

2022-01-22 00:36:36

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH V3 2/4] mmc: debugfs: Add debug fs entry for mmc driver

On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
> Add debug fs entry to query eMMC and SD card errors statistics
>
> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
> Signed-off-by: Liangliang Lu <[email protected]>
> Signed-off-by: Sayali Lokhande <[email protected]>
> Signed-off-by: Bao D. Nguyen <[email protected]>
> ---
> drivers/mmc/core/debugfs.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 81 insertions(+)
>
> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
> index 3fdbc80..f4cb594 100644
> --- a/drivers/mmc/core/debugfs.c
> +++ b/drivers/mmc/core/debugfs.c
> @@ -223,6 +223,82 @@ static int mmc_clock_opt_set(void *data, u64 val)
> DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
> "%llu\n");
>
> +static int mmc_err_state_get(void *data, u64 *val)
> +{
> + struct mmc_host *host = data;
> +
> + if (!host)
> + return -EINVAL;
> +
> + *val = host->err_state ? 1 : 0;
> +
> + return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get, NULL, "%llu\n");
> +
> +static int mmc_err_stats_show(struct seq_file *file, void *data)
> +{
> + struct mmc_host *host = (struct mmc_host *)file->private;
> + const char *desc[MMC_ERR_MAX] = {
> + [MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
> + [MMC_ERR_CMD_CRC] = "Command CRC Errors Occurred",
> + [MMC_ERR_DAT_TIMEOUT] = "Data Timeout Occurred",
> + [MMC_ERR_DAT_CRC] = "Data CRC Errors Occurred",
> + [MMC_ERR_AUTO_CMD] = "Auto-Cmd Error Occurred",
> + [MMC_ERR_ADMA] = "ADMA Error Occurred",
> + [MMC_ERR_TUNING] = "Tuning Error Occurred",
> + [MMC_ERR_CMDQ_RED] = "CMDQ RED Errors",
> + [MMC_ERR_CMDQ_GCE] = "CMDQ GCE Errors",
> + [MMC_ERR_CMDQ_ICCE] = "CMDQ ICCE Errors",
> + [MMC_ERR_REQ_TIMEOUT] = "Request Timedout",
> + [MMC_ERR_CMDQ_REQ_TIMEOUT] = "CMDQ Request Timedout",
> + [MMC_ERR_ICE_CFG] = "ICE Config Errors",
> + };
> + int i;
> +
> + if (!host)
> + return -EINVAL;
> +
> + if (!host->err_stats_enabled) {
> + seq_printf(file, "Not supported by driver\n");
> + return 0;
> + }
> +
> + for (i = 0; i < MMC_ERR_MAX; i++) {
> + if (desc[i])
> + seq_printf(file, "# %s:\t %d\n",
> + desc[i], host->err_stats[i]);
> + }
> +
> + return 0;
> +}
> +
> +static int mmc_err_stats_open(struct inode *inode, struct file *file)
> +{
> + return single_open(file, mmc_err_stats_show, inode->i_private);
> +}
> +
> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
> + size_t cnt, loff_t *ppos)
> +{
> + struct mmc_host *host = filp->f_mapping->host->i_private;
> +
> + if (!host)
> + return -EINVAL;
> +
> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
> + memset(host->err_stats, 0, sizeof(host->err_stats));
> +
> + return cnt;
> +}
> +
> +static const struct file_operations mmc_err_stats_fops = {
> + .open = mmc_err_stats_open,
> + .read = seq_read,
> + .write = mmc_err_stats_write,
> +};
> +
> void mmc_add_host_debugfs(struct mmc_host *host)
> {
> struct dentry *root;
> @@ -236,6 +312,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
> &mmc_clock_fops);
>
> + debugfs_create_file("err_state", 0600, root, host,
> + &mmc_err_state);

Please, let's drop err_state for now

> + debugfs_create_file("err_stats", 0600, root, host,
> + &mmc_err_stats_fops);
> +
> #ifdef CONFIG_FAIL_MMC_REQUEST
> if (fail_request)
> setup_fault_attr(&fail_default_attr, fail_request);
>

2022-01-22 00:40:40

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH V3 3/4] mmc: core: Capture eMMC and SD card errors

On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
>
> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
> Signed-off-by: Liangliang Lu <[email protected]>
> Signed-off-by: Sayali Lokhande <[email protected]>
> Signed-off-by: Bao D. Nguyen <[email protected]>
> Signed-off-by: Ram Prakash Gupta <[email protected]>
> ---
> drivers/mmc/core/core.c | 8 ++++++++
> drivers/mmc/core/queue.c | 3 +++
> 2 files changed, 11 insertions(+)
>
> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
> index 368f104..c586d69 100644
> --- a/drivers/mmc/core/core.c
> +++ b/drivers/mmc/core/core.c
> @@ -2242,6 +2242,14 @@ void mmc_rescan(struct work_struct *work)
> if (freqs[i] <= host->f_min)
> break;
> }
> +
> + /*
> + * Ignore the command timeout errors observed during
> + * the card init as those are excepted.
> + */
> +

Please remove blank line here.

> + if (host && host->err_stats_enabled)

The condition is not needed.

> + host->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;

Please put this after successful call to mmc_rescan_try_freq

> mmc_release_host(host);
>
> out:
> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
> index c69b2d9..7dc9dfb 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -100,6 +100,9 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
> enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
> bool recovery_needed = false;
>
> + if (host->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);

Doesn't this get covered by the drivers. It seems like this should not be needed.

> +
> switch (issue_type) {
> case MMC_ISSUE_ASYNC:
> case MMC_ISSUE_DCMD:
>

2022-01-22 00:40:47

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH V3 4/4] mmc: cqhci: Capture eMMC and SD card errors

On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
>
> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
> Signed-off-by: Liangliang Lu <[email protected]>
> Signed-off-by: Sayali Lokhande <[email protected]>
> Signed-off-by: Bao D. Nguyen <[email protected]>
> Signed-off-by: Ram Prakash Gupta <[email protected]>
> ---
> drivers/mmc/host/cqhci-core.c | 9 ++++++++-
> 1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c
> index b0d30c3..2908d30 100644
> --- a/drivers/mmc/host/cqhci-core.c
> +++ b/drivers/mmc/host/cqhci-core.c
> @@ -822,8 +822,15 @@ irqreturn_t cqhci_irq(struct mmc_host *mmc, u32 intmask, int cmd_error,
> pr_debug("%s: cqhci: IRQ status: 0x%08x\n", mmc_hostname(mmc), status);
>
> if ((status & (CQHCI_IS_RED | CQHCI_IS_GCE | CQHCI_IS_ICCE)) ||
> - cmd_error || data_error)
> + cmd_error || data_error) {
> + if ((status & CQHCI_IS_RED) && mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(mmc, MMC_ERR_CMDQ_RED);
> + if ((status & CQHCI_IS_GCE) && (mmc->err_stats_enabled))
> + mmc_debugfs_err_stats_inc(mmc, MMC_ERR_CMDQ_GCE);
> + if ((status & CQHCI_IS_ICCE) && mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(mmc, MMC_ERR_CMDQ_ICCE);

Please don't check mmc->err_stats_enabled

> cqhci_error_irq(mmc, status, cmd_error, data_error);
> + }
>
> if (status & CQHCI_IS_TCC) {
> /* read TCN and complete the request */
>

2022-01-26 03:25:45

by Sajida Bhanu (Temp)

[permalink] [raw]
Subject: RE: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

Hi,

Thanks for the Review.

Please find the inline comments.

Thanks,
Sajida

-----Original Message-----
From: Adrian Hunter <[email protected]>
Sent: Friday, January 21, 2022 12:38 PM
To: Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh Das (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
Cc: [email protected]; [email protected]; Ram Prakash Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; Liangliang Lu <[email protected]>; Bao D . Nguyen <[email protected]>
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
>
> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
> Signed-off-by: Liangliang Lu <[email protected]>
> Signed-off-by: Sayali Lokhande <[email protected]>
> Signed-off-by: Bao D. Nguyen <[email protected]>
> ---
> drivers/mmc/host/sdhci-msm.c | 3 ++
> drivers/mmc/host/sdhci.c | 72 ++++++++++++++++++++++++++++++++++++--------
> include/linux/mmc/host.h | 31 +++++++++++++++++++
> 3 files changed, 94 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/mmc/host/sdhci-msm.c
> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
> --- a/drivers/mmc/host/sdhci-msm.c
> +++ b/drivers/mmc/host/sdhci-msm.c
> @@ -128,6 +128,8 @@
>
> #define MSM_MMC_AUTOSUSPEND_DELAY_MS 50
>
> +#define MSM_MMC_ERR_STATS_ENABLE 1
> +
> /* Timeout value to avoid infinite waiting for pwr_irq */ #define
> MSM_PWR_IRQ_TIMEOUT_MS 5000
>
> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
> if (ret)
> goto pm_runtime_disable;
>
> + host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;

Please remove this. SDHCI will enable error stats.

>>>>>> Sure.

> pm_runtime_mark_last_busy(&pdev->dev);
> pm_runtime_put_autosuspend(&pdev->dev);
>
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index
> 07c6da1..74b356e 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
> if (host->ops->dump_vendor_regs)
> host->ops->dump_vendor_regs(host);
>
> + if (host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_enable(host->mmc);

Please move this to sdhci_setup_host() and call it unconditionally i.e. just

mmc_debugfs_err_stats_enable(host->mmc);


>>>>>>> mmc_debugfs_err_stats_enable() will set err_state , that means some errors occurred in driver level.
If we move this call to sdhci_setup_host(), then it will set if no errors also right?


> SDHCI_DUMP("============================================\n");
> }
> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
> spin_lock_irqsave(&host->lock, flags);
>
> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);

Please remove the 'if ()', i.e. just make it, unconditionally:

mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);

Same for other calls to mmc_debugfs_err_stats_inc()

>>>>>>>>Sure.

> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct
> timer_list *t)
>
> if (host->data || host->data_cmd ||
> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host
> *host, u32 intmask, u32 *intmask_p)
>
> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> - if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_TIMEOUT) {
> host->cmd->error = -ETIMEDOUT;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else {
> host->cmd->error = -EILSEQ;
> -
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> + }
> /* Treat data command CRC error the same as data CRC error */
> if (host->cmd->data &&
> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3265,6
> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
> int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
> -ETIMEDOUT :
> -EILSEQ;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>
> if (sdhci_auto_cmd23(host, mrq)) {
> mrq->sbc->error = err;
> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data_cmd = NULL;
> data_cmd->error = -ETIMEDOUT;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> __sdhci_finish_mrq(host, data_cmd->mrq);
> return;
> }
> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> return;
> }
>
> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data->error = -ETIMEDOUT;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + }
> else if (intmask & SDHCI_INT_DATA_END_BIT)
> host->data->error = -EILSEQ;
> else if ((intmask & SDHCI_INT_DATA_CRC) &&
> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> - != MMC_BUS_TEST_R)
> + != MMC_BUS_TEST_R) {
> host->data->error = -EILSEQ;
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> + }
> else if (intmask & SDHCI_INT_ADMA_ERROR) {
> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
> intmask);
> sdhci_adma_show_error(host);
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> host->data->error = -EIO;
> if (host->ops->adma_workaround)
> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3933,40 @@
> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
> if (!host->cqe_on)
> return false;
>
> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +{
> *cmd_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> + }
> + } else if (intmask & SDHCI_INT_TIMEOUT) {
> *cmd_error = -ETIMEDOUT;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else
> *cmd_error = 0;
>
> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
> *data_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> + }
> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> *data_error = -ETIMEDOUT;
> - else if (intmask & SDHCI_INT_ADMA_ERROR)
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
> *data_error = -EIO;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> + } else
> *data_error = 0;
>
> /* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h

Changes to host.h are core changes and belong in patch 3, which should be the first patch.

>>>>>Sure.

> index 7afb57c..883b50b 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>
> struct mmc_host;
>
> +enum mmc_err_stat {
> + MMC_ERR_CMD_TIMEOUT,
> + MMC_ERR_CMD_CRC,
> + MMC_ERR_DAT_TIMEOUT,
> + MMC_ERR_DAT_CRC,
> + MMC_ERR_AUTO_CMD,
> + MMC_ERR_ADMA,
> + MMC_ERR_TUNING,
> + MMC_ERR_CMDQ_RED,
> + MMC_ERR_CMDQ_GCE,
> + MMC_ERR_CMDQ_ICCE,
> + MMC_ERR_REQ_TIMEOUT,
> + MMC_ERR_CMDQ_REQ_TIMEOUT,
> + MMC_ERR_ICE_CFG,
> + MMC_ERR_MAX,
> +};
> +
> struct mmc_host_ops {
> /*
> * It is optional for the host to implement pre_req and post_req in
> @@ -500,6 +517,9 @@ struct mmc_host {
>
> /* Host Software Queue support */
> bool hsq_enabled;
> + u32 err_stats[MMC_ERR_MAX];
> + bool err_stats_enabled;
> + bool err_state;

Please drop err_state for now

>>>>>>>first we can check this variable right, if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
Please let me know your opinion on this.

>
> unsigned long private[] ____cacheline_aligned;
> };
> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
> DMA_FROM_DEVICE; }
>
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)

Please use 'host' as the mmc_host parameter in this file.

> +{
> + mmc->err_state = true;

Let's make this:

host->err_stats_enabled = true;

>>>>>>Sure.

> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> + enum mmc_err_stat stat) {
> +

Please remove blank line here

>>>>>>sure.

> + mmc->err_stats[stat] += 1;
> +}
> +
> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
> **new_ext_csd);
>

2022-01-26 03:29:20

by Shaik Sajida Bhanu

[permalink] [raw]
Subject: RE: [PATCH V3 2/4] mmc: debugfs: Add debug fs entry for mmc driver

Hi,

Thanks for the review.

Please find the inline comments.

Thanks,
Sajida

-----Original Message-----
From: Adrian Hunter <[email protected]>
Sent: Friday, January 21, 2022 12:40 PM
To: Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh Das (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
Cc: [email protected]; [email protected]; Ram Prakash Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; Liangliang Lu <[email protected]>; Bao D . Nguyen <[email protected]>
Subject: Re: [PATCH V3 2/4] mmc: debugfs: Add debug fs entry for mmc driver

On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
> Add debug fs entry to query eMMC and SD card errors statistics
>
> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
> Signed-off-by: Liangliang Lu <[email protected]>
> Signed-off-by: Sayali Lokhande <[email protected]>
> Signed-off-by: Bao D. Nguyen <[email protected]>
> ---
> drivers/mmc/core/debugfs.c | 81
> ++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 81 insertions(+)
>
> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
> index 3fdbc80..f4cb594 100644
> --- a/drivers/mmc/core/debugfs.c
> +++ b/drivers/mmc/core/debugfs.c
> @@ -223,6 +223,82 @@ static int mmc_clock_opt_set(void *data, u64 val)
> DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
> "%llu\n");
>
> +static int mmc_err_state_get(void *data, u64 *val) {
> + struct mmc_host *host = data;
> +
> + if (!host)
> + return -EINVAL;
> +
> + *val = host->err_state ? 1 : 0;
> +
> + return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get, NULL,
> +"%llu\n");
> +
> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
> + struct mmc_host *host = (struct mmc_host *)file->private;
> + const char *desc[MMC_ERR_MAX] = {
> + [MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
> + [MMC_ERR_CMD_CRC] = "Command CRC Errors Occurred",
> + [MMC_ERR_DAT_TIMEOUT] = "Data Timeout Occurred",
> + [MMC_ERR_DAT_CRC] = "Data CRC Errors Occurred",
> + [MMC_ERR_AUTO_CMD] = "Auto-Cmd Error Occurred",
> + [MMC_ERR_ADMA] = "ADMA Error Occurred",
> + [MMC_ERR_TUNING] = "Tuning Error Occurred",
> + [MMC_ERR_CMDQ_RED] = "CMDQ RED Errors",
> + [MMC_ERR_CMDQ_GCE] = "CMDQ GCE Errors",
> + [MMC_ERR_CMDQ_ICCE] = "CMDQ ICCE Errors",
> + [MMC_ERR_REQ_TIMEOUT] = "Request Timedout",
> + [MMC_ERR_CMDQ_REQ_TIMEOUT] = "CMDQ Request Timedout",
> + [MMC_ERR_ICE_CFG] = "ICE Config Errors",
> + };
> + int i;
> +
> + if (!host)
> + return -EINVAL;
> +
> + if (!host->err_stats_enabled) {
> + seq_printf(file, "Not supported by driver\n");
> + return 0;
> + }
> +
> + for (i = 0; i < MMC_ERR_MAX; i++) {
> + if (desc[i])
> + seq_printf(file, "# %s:\t %d\n",
> + desc[i], host->err_stats[i]);
> + }
> +
> + return 0;
> +}
> +
> +static int mmc_err_stats_open(struct inode *inode, struct file *file)
> +{
> + return single_open(file, mmc_err_stats_show, inode->i_private); }
> +
> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
> + size_t cnt, loff_t *ppos)
> +{
> + struct mmc_host *host = filp->f_mapping->host->i_private;
> +
> + if (!host)
> + return -EINVAL;
> +
> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
> + memset(host->err_stats, 0, sizeof(host->err_stats));
> +
> + return cnt;
> +}
> +
> +static const struct file_operations mmc_err_stats_fops = {
> + .open = mmc_err_stats_open,
> + .read = seq_read,
> + .write = mmc_err_stats_write,
> +};
> +
> void mmc_add_host_debugfs(struct mmc_host *host) {
> struct dentry *root;
> @@ -236,6 +312,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
> &mmc_clock_fops);
>
> + debugfs_create_file("err_state", 0600, root, host,
> + &mmc_err_state);

Please, let's drop err_state for now

>>>>> first we can check this right, if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
Please let me know your opinion on this.( Same as patch set (V3 1/4).

> + debugfs_create_file("err_stats", 0600, root, host,
> + &mmc_err_stats_fops);
> +
> #ifdef CONFIG_FAIL_MMC_REQUEST
> if (fail_request)
> setup_fault_attr(&fail_default_attr, fail_request);
>

2022-01-26 03:36:24

by Shaik Sajida Bhanu

[permalink] [raw]
Subject: RE: [PATCH V3 3/4] mmc: core: Capture eMMC and SD card errors

Hi,

Thanks for the review.

Please find the inline comments.

Thanks,
Sajida

-----Original Message-----
From: Adrian Hunter <[email protected]>
Sent: Friday, January 21, 2022 1:50 PM
To: Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh Das (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
Cc: [email protected]; [email protected]; Ram Prakash Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; Liangliang Lu <[email protected]>; Bao D . Nguyen <[email protected]>
Subject: Re: [PATCH V3 3/4] mmc: core: Capture eMMC and SD card errors

On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
>
> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
> Signed-off-by: Liangliang Lu <[email protected]>
> Signed-off-by: Sayali Lokhande <[email protected]>
> Signed-off-by: Bao D. Nguyen <[email protected]>
> Signed-off-by: Ram Prakash Gupta <[email protected]>
> ---
> drivers/mmc/core/core.c | 8 ++++++++ drivers/mmc/core/queue.c | 3
> +++
> 2 files changed, 11 insertions(+)
>
> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index
> 368f104..c586d69 100644
> --- a/drivers/mmc/core/core.c
> +++ b/drivers/mmc/core/core.c
> @@ -2242,6 +2242,14 @@ void mmc_rescan(struct work_struct *work)
> if (freqs[i] <= host->f_min)
> break;
> }
> +
> + /*
> + * Ignore the command timeout errors observed during
> + * the card init as those are excepted.
> + */
> +

Please remove blank line here.

>>>>Sure.

> + if (host && host->err_stats_enabled)

The condition is not needed.

>>>>Sure .

> + host->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;

Please put this after successful call to mmc_rescan_try_freq

>>>>>Sure.

> mmc_release_host(host);
>
> out:
> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
> c69b2d9..7dc9dfb 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -100,6 +100,9 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
> enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
> bool recovery_needed = false;
>
> + if (host->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);

Doesn't this get covered by the drivers. It seems like this should not be needed.

>>>>>>Okay

> +
> switch (issue_type) {
> case MMC_ISSUE_ASYNC:
> case MMC_ISSUE_DCMD:
>

2022-01-26 06:33:41

by Shaik Sajida Bhanu

[permalink] [raw]
Subject: RE: [PATCH V3 4/4] mmc: cqhci: Capture eMMC and SD card errors

Hi,

Thanks for the review.

Please find the inline comments.

Thanks,
Sajida
-----Original Message-----
From: Adrian Hunter <[email protected]>
Sent: Friday, January 21, 2022 1:52 PM
To: Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh Das (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
Cc: [email protected]; [email protected]; Ram Prakash Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; Liangliang Lu <[email protected]>; Bao D . Nguyen <[email protected]>
Subject: Re: [PATCH V3 4/4] mmc: cqhci: Capture eMMC and SD card errors

On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
>
> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
> Signed-off-by: Liangliang Lu <[email protected]>
> Signed-off-by: Sayali Lokhande <[email protected]>
> Signed-off-by: Bao D. Nguyen <[email protected]>
> Signed-off-by: Ram Prakash Gupta <[email protected]>
> ---
> drivers/mmc/host/cqhci-core.c | 9 ++++++++-
> 1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/mmc/host/cqhci-core.c
> b/drivers/mmc/host/cqhci-core.c index b0d30c3..2908d30 100644
> --- a/drivers/mmc/host/cqhci-core.c
> +++ b/drivers/mmc/host/cqhci-core.c
> @@ -822,8 +822,15 @@ irqreturn_t cqhci_irq(struct mmc_host *mmc, u32 intmask, int cmd_error,
> pr_debug("%s: cqhci: IRQ status: 0x%08x\n", mmc_hostname(mmc),
> status);
>
> if ((status & (CQHCI_IS_RED | CQHCI_IS_GCE | CQHCI_IS_ICCE)) ||
> - cmd_error || data_error)
> + cmd_error || data_error) {
> + if ((status & CQHCI_IS_RED) && mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(mmc, MMC_ERR_CMDQ_RED);
> + if ((status & CQHCI_IS_GCE) && (mmc->err_stats_enabled))
> + mmc_debugfs_err_stats_inc(mmc, MMC_ERR_CMDQ_GCE);
> + if ((status & CQHCI_IS_ICCE) && mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(mmc, MMC_ERR_CMDQ_ICCE);

Please don't check mmc->err_stats_enabled

>>>>>>>Sure.

> cqhci_error_irq(mmc, status, cmd_error, data_error);
> + }
>
> if (status & CQHCI_IS_TCC) {
> /* read TCN and complete the request */
>

2022-02-02 08:03:34

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

On 25/01/2022 20:17, Sajida Bhanu (Temp) wrote:
> Hi,
>
> Thanks for the Review.
>
> Please find the inline comments.
>
> Thanks,
> Sajida
>
> -----Original Message-----
> From: Adrian Hunter <[email protected]>
> Sent: Friday, January 21, 2022 12:38 PM
> To: Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh Das (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
> Cc: [email protected]; [email protected]; Ram Prakash Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; Liangliang Lu <[email protected]>; Bao D . Nguyen <[email protected]>
> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors
>
> On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
>> Add changes to capture eMMC and SD card errors.
>> This is useful for debug and testing.
>>
>> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
>> Signed-off-by: Liangliang Lu <[email protected]>
>> Signed-off-by: Sayali Lokhande <[email protected]>
>> Signed-off-by: Bao D. Nguyen <[email protected]>
>> ---
>> drivers/mmc/host/sdhci-msm.c | 3 ++
>> drivers/mmc/host/sdhci.c | 72 ++++++++++++++++++++++++++++++++++++--------
>> include/linux/mmc/host.h | 31 +++++++++++++++++++
>> 3 files changed, 94 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/mmc/host/sdhci-msm.c
>> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
>> --- a/drivers/mmc/host/sdhci-msm.c
>> +++ b/drivers/mmc/host/sdhci-msm.c
>> @@ -128,6 +128,8 @@
>>
>> #define MSM_MMC_AUTOSUSPEND_DELAY_MS 50
>>
>> +#define MSM_MMC_ERR_STATS_ENABLE 1
>> +
>> /* Timeout value to avoid infinite waiting for pwr_irq */ #define
>> MSM_PWR_IRQ_TIMEOUT_MS 5000
>>
>> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>> if (ret)
>> goto pm_runtime_disable;
>>
>> + host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
>
> Please remove this. SDHCI will enable error stats.
>
>>>>>>> Sure.
>
>> pm_runtime_mark_last_busy(&pdev->dev);
>> pm_runtime_put_autosuspend(&pdev->dev);
>>
>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index
>> 07c6da1..74b356e 100644
>> --- a/drivers/mmc/host/sdhci.c
>> +++ b/drivers/mmc/host/sdhci.c
>> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>> if (host->ops->dump_vendor_regs)
>> host->ops->dump_vendor_regs(host);
>>
>> + if (host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_enable(host->mmc);
>
> Please move this to sdhci_setup_host() and call it unconditionally i.e. just
>
> mmc_debugfs_err_stats_enable(host->mmc);
>
>
>>>>>>>> mmc_debugfs_err_stats_enable() will set err_state , that means some errors occurred in driver level.
> If we move this call to sdhci_setup_host(), then it will set if no errors also right?

Then it seems like you want to set err_state = true in mmc_debugfs_err_stats_inc()

>
>
>> SDHCI_DUMP("============================================\n");
>> }
>> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>> spin_lock_irqsave(&host->lock, flags);
>>
>> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>
> Please remove the 'if ()', i.e. just make it, unconditionally:
>
> mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>
> Same for other calls to mmc_debugfs_err_stats_inc()
>
>>>>>>>>> Sure.
>
>> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>> mmc_hostname(host->mmc));
>> sdhci_dumpregs(host);
>> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct
>> timer_list *t)
>>
>> if (host->data || host->data_cmd ||
>> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>> pr_err("%s: Timeout waiting for hardware interrupt.\n",
>> mmc_hostname(host->mmc));
>> sdhci_dumpregs(host);
>> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host
>> *host, u32 intmask, u32 *intmask_p)
>>
>> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>> - if (intmask & SDHCI_INT_TIMEOUT)
>> + if (intmask & SDHCI_INT_TIMEOUT) {
>> host->cmd->error = -ETIMEDOUT;
>> - else
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> + } else {
>> host->cmd->error = -EILSEQ;
>> -
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> + }
>> + }
>> /* Treat data command CRC error the same as data CRC error */
>> if (host->cmd->data &&
>> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3265,6
>> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>> int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>> -ETIMEDOUT :
>> -EILSEQ;
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>
>> if (sdhci_auto_cmd23(host, mrq)) {
>> mrq->sbc->error = err;
>> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> host->data_cmd = NULL;
>> data_cmd->error = -ETIMEDOUT;
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> __sdhci_finish_mrq(host, data_cmd->mrq);
>> return;
>> }
>> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>> return;
>> }
>>
>> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> host->data->error = -ETIMEDOUT;
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> + }
>> else if (intmask & SDHCI_INT_DATA_END_BIT)
>> host->data->error = -EILSEQ;
>> else if ((intmask & SDHCI_INT_DATA_CRC) &&
>> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>> - != MMC_BUS_TEST_R)
>> + != MMC_BUS_TEST_R) {
>> host->data->error = -EILSEQ;
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> + }
>> + }
>> else if (intmask & SDHCI_INT_ADMA_ERROR) {
>> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>> intmask);
>> sdhci_adma_show_error(host);
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> host->data->error = -EIO;
>> if (host->ops->adma_workaround)
>> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3933,40 @@
>> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>> if (!host->cqe_on)
>> return false;
>>
>> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>> +{
>> *cmd_error = -EILSEQ;
>> - else if (intmask & SDHCI_INT_TIMEOUT)
>> + if (intmask & SDHCI_INT_CRC) {
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> + }
>> + }
>> + } else if (intmask & SDHCI_INT_TIMEOUT) {
>> *cmd_error = -ETIMEDOUT;
>> - else
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> + } else
>> *cmd_error = 0;
>>
>> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>> *data_error = -EILSEQ;
>> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> + if (intmask & SDHCI_INT_DATA_CRC) {
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> + }
>> + }
>> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> *data_error = -ETIMEDOUT;
>> - else if (intmask & SDHCI_INT_ADMA_ERROR)
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
>> *data_error = -EIO;
>> - else
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> + } else
>> *data_error = 0;
>>
>> /* Clear selected interrupts. */
>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>
> Changes to host.h are core changes and belong in patch 3, which should be the first patch.
>
>>>>>> Sure.
>
>> index 7afb57c..883b50b 100644
>> --- a/include/linux/mmc/host.h
>> +++ b/include/linux/mmc/host.h
>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>
>> struct mmc_host;
>>
>> +enum mmc_err_stat {
>> + MMC_ERR_CMD_TIMEOUT,
>> + MMC_ERR_CMD_CRC,
>> + MMC_ERR_DAT_TIMEOUT,
>> + MMC_ERR_DAT_CRC,
>> + MMC_ERR_AUTO_CMD,
>> + MMC_ERR_ADMA,
>> + MMC_ERR_TUNING,
>> + MMC_ERR_CMDQ_RED,
>> + MMC_ERR_CMDQ_GCE,
>> + MMC_ERR_CMDQ_ICCE,
>> + MMC_ERR_REQ_TIMEOUT,
>> + MMC_ERR_CMDQ_REQ_TIMEOUT,
>> + MMC_ERR_ICE_CFG,
>> + MMC_ERR_MAX,
>> +};
>> +
>> struct mmc_host_ops {
>> /*
>> * It is optional for the host to implement pre_req and post_req in
>> @@ -500,6 +517,9 @@ struct mmc_host {
>>
>> /* Host Software Queue support */
>> bool hsq_enabled;
>> + u32 err_stats[MMC_ERR_MAX];
>> + bool err_stats_enabled;
>> + bool err_state;
>
> Please drop err_state for now
>
>>>>>>>> first we can check this variable right, if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
> Please let me know your opinion on this.

As I wrote above, you could set err_state in mmc_debugfs_err_stats_inc().
But maybe make the err_state addition a separate patch so it is easy to see how it works.

>
>>
>> unsigned long private[] ____cacheline_aligned;
>> };
>> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
>> DMA_FROM_DEVICE; }
>>
>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
>
> Please use 'host' as the mmc_host parameter in this file.
>
>> +{
>> + mmc->err_state = true;
>
> Let's make this:
>
> host->err_stats_enabled = true;
>
>>>>>>> Sure.
>
>> +}
>> +
>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>> + enum mmc_err_stat stat) {
>> +
>
> Please remove blank line here
>
>>>>>>> sure.
>
>> + mmc->err_stats[stat] += 1;
>> +}
>> +
>> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
>> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
>> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
>> **new_ext_csd);
>>
>

2022-02-02 21:26:53

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH V3 2/4] mmc: debugfs: Add debug fs entry for mmc driver

On 25/01/2022 20:19, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi,
>
> Thanks for the review.
>
> Please find the inline comments.
>
> Thanks,
> Sajida
>
> -----Original Message-----
> From: Adrian Hunter <[email protected]>
> Sent: Friday, January 21, 2022 12:40 PM
> To: Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh Das (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
> Cc: [email protected]; [email protected]; Ram Prakash Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; Liangliang Lu <[email protected]>; Bao D . Nguyen <[email protected]>
> Subject: Re: [PATCH V3 2/4] mmc: debugfs: Add debug fs entry for mmc driver
>
> On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
>> Add debug fs entry to query eMMC and SD card errors statistics
>>
>> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
>> Signed-off-by: Liangliang Lu <[email protected]>
>> Signed-off-by: Sayali Lokhande <[email protected]>
>> Signed-off-by: Bao D. Nguyen <[email protected]>
>> ---
>> drivers/mmc/core/debugfs.c | 81
>> ++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 81 insertions(+)
>>
>> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
>> index 3fdbc80..f4cb594 100644
>> --- a/drivers/mmc/core/debugfs.c
>> +++ b/drivers/mmc/core/debugfs.c
>> @@ -223,6 +223,82 @@ static int mmc_clock_opt_set(void *data, u64 val)
>> DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>> "%llu\n");
>>
>> +static int mmc_err_state_get(void *data, u64 *val) {
>> + struct mmc_host *host = data;
>> +
>> + if (!host)
>> + return -EINVAL;
>> +
>> + *val = host->err_state ? 1 : 0;
>> +
>> + return 0;
>> +}
>> +
>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get, NULL,
>> +"%llu\n");
>> +
>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>> + struct mmc_host *host = (struct mmc_host *)file->private;
>> + const char *desc[MMC_ERR_MAX] = {
>> + [MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
>> + [MMC_ERR_CMD_CRC] = "Command CRC Errors Occurred",
>> + [MMC_ERR_DAT_TIMEOUT] = "Data Timeout Occurred",
>> + [MMC_ERR_DAT_CRC] = "Data CRC Errors Occurred",
>> + [MMC_ERR_AUTO_CMD] = "Auto-Cmd Error Occurred",
>> + [MMC_ERR_ADMA] = "ADMA Error Occurred",
>> + [MMC_ERR_TUNING] = "Tuning Error Occurred",
>> + [MMC_ERR_CMDQ_RED] = "CMDQ RED Errors",
>> + [MMC_ERR_CMDQ_GCE] = "CMDQ GCE Errors",
>> + [MMC_ERR_CMDQ_ICCE] = "CMDQ ICCE Errors",
>> + [MMC_ERR_REQ_TIMEOUT] = "Request Timedout",
>> + [MMC_ERR_CMDQ_REQ_TIMEOUT] = "CMDQ Request Timedout",
>> + [MMC_ERR_ICE_CFG] = "ICE Config Errors",
>> + };
>> + int i;
>> +
>> + if (!host)
>> + return -EINVAL;
>> +
>> + if (!host->err_stats_enabled) {
>> + seq_printf(file, "Not supported by driver\n");
>> + return 0;
>> + }
>> +
>> + for (i = 0; i < MMC_ERR_MAX; i++) {
>> + if (desc[i])
>> + seq_printf(file, "# %s:\t %d\n",
>> + desc[i], host->err_stats[i]);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int mmc_err_stats_open(struct inode *inode, struct file *file)
>> +{
>> + return single_open(file, mmc_err_stats_show, inode->i_private); }
>> +
>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>> + size_t cnt, loff_t *ppos)
>> +{
>> + struct mmc_host *host = filp->f_mapping->host->i_private;
>> +
>> + if (!host)
>> + return -EINVAL;
>> +
>> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
>> + memset(host->err_stats, 0, sizeof(host->err_stats));
>> +
>> + return cnt;
>> +}
>> +
>> +static const struct file_operations mmc_err_stats_fops = {
>> + .open = mmc_err_stats_open,
>> + .read = seq_read,
>> + .write = mmc_err_stats_write,
>> +};
>> +
>> void mmc_add_host_debugfs(struct mmc_host *host) {
>> struct dentry *root;
>> @@ -236,6 +312,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>> &mmc_clock_fops);
>>
>> + debugfs_create_file("err_state", 0600, root, host,
>> + &mmc_err_state);
>
> Please, let's drop err_state for now
>
>>>>>> first we can check this right, if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
> Please let me know your opinion on this.( Same as patch set (V3 1/4).

Please see my comments for patch 1

>
>> + debugfs_create_file("err_stats", 0600, root, host,
>> + &mmc_err_stats_fops);
>> +
>> #ifdef CONFIG_FAIL_MMC_REQUEST
>> if (fail_request)
>> setup_fault_attr(&fail_default_attr, fail_request);
>>
>

2022-02-09 06:21:07

by Shaik Sajida Bhanu

[permalink] [raw]
Subject: RE: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

Hi,

Thanks for the review.

Please find the inline comments

Thanks,
Sajida
-----Original Message-----
From: Adrian Hunter <[email protected]>
Sent: Tuesday, February 1, 2022 7:28 PM
To: Sajida Bhanu (Temp) <[email protected]>; Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh Das (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
Cc: [email protected]; [email protected]; Ram Prakash Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; Liangliang Lu <[email protected]>; Bao D . Nguyen <[email protected]>
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

On 25/01/2022 20:17, Sajida Bhanu (Temp) wrote:
> Hi,
>
> Thanks for the Review.
>
> Please find the inline comments.
>
> Thanks,
> Sajida
>
> -----Original Message-----
> From: Adrian Hunter <[email protected]>
> Sent: Friday, January 21, 2022 12:38 PM
> To: Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh
> Das (QUIC) <[email protected]>; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]
> Cc: [email protected]; [email protected]; Ram Prakash
> Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC)
> <[email protected]>; [email protected];
> [email protected]; [email protected]; Liangliang Lu
> <[email protected]>; Bao D . Nguyen <[email protected]>
> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card
> errors
>
> On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
>> Add changes to capture eMMC and SD card errors.
>> This is useful for debug and testing.
>>
>> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
>> Signed-off-by: Liangliang Lu <[email protected]>
>> Signed-off-by: Sayali Lokhande <[email protected]>
>> Signed-off-by: Bao D. Nguyen <[email protected]>
>> ---
>> drivers/mmc/host/sdhci-msm.c | 3 ++
>> drivers/mmc/host/sdhci.c | 72 ++++++++++++++++++++++++++++++++++++--------
>> include/linux/mmc/host.h | 31 +++++++++++++++++++
>> 3 files changed, 94 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/mmc/host/sdhci-msm.c
>> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
>> --- a/drivers/mmc/host/sdhci-msm.c
>> +++ b/drivers/mmc/host/sdhci-msm.c
>> @@ -128,6 +128,8 @@
>>
>> #define MSM_MMC_AUTOSUSPEND_DELAY_MS 50
>>
>> +#define MSM_MMC_ERR_STATS_ENABLE 1
>> +
>> /* Timeout value to avoid infinite waiting for pwr_irq */ #define
>> MSM_PWR_IRQ_TIMEOUT_MS 5000
>>
>> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>> if (ret)
>> goto pm_runtime_disable;
>>
>> + host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
>
> Please remove this. SDHCI will enable error stats.
>
>>>>>>> Sure.
>
>> pm_runtime_mark_last_busy(&pdev->dev);
>> pm_runtime_put_autosuspend(&pdev->dev);
>>
>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>> index 07c6da1..74b356e 100644
>> --- a/drivers/mmc/host/sdhci.c
>> +++ b/drivers/mmc/host/sdhci.c
>> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>> if (host->ops->dump_vendor_regs)
>> host->ops->dump_vendor_regs(host);
>>
>> + if (host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_enable(host->mmc);
>
> Please move this to sdhci_setup_host() and call it unconditionally
> i.e. just
>
> mmc_debugfs_err_stats_enable(host->mmc);
>
>
>>>>>>>> mmc_debugfs_err_stats_enable() will set err_state , that means some errors occurred in driver level.
> If we move this call to sdhci_setup_host(), then it will set if no errors also right?

Then it seems like you want to set err_state = true in mmc_debugfs_err_stats_inc()

>>>>>No ..I have updated err_state = true in sdhci_dumpregs() because if any errors (serious) in driver, we are calling sdhci_dumpregs().

>
>
>> SDHCI_DUMP("============================================\n");
>> }
>> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>> spin_lock_irqsave(&host->lock, flags);
>>
>> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>
> Please remove the 'if ()', i.e. just make it, unconditionally:
>
> mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>
> Same for other calls to mmc_debugfs_err_stats_inc()
>
>>>>>>>>> Sure.
>
>> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>> mmc_hostname(host->mmc));
>> sdhci_dumpregs(host);
>> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct
>> timer_list *t)
>>
>> if (host->data || host->data_cmd ||
>> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>> pr_err("%s: Timeout waiting for hardware interrupt.\n",
>> mmc_hostname(host->mmc));
>> sdhci_dumpregs(host);
>> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host
>> *host, u32 intmask, u32 *intmask_p)
>>
>> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>> - if (intmask & SDHCI_INT_TIMEOUT)
>> + if (intmask & SDHCI_INT_TIMEOUT) {
>> host->cmd->error = -ETIMEDOUT;
>> - else
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> + } else {
>> host->cmd->error = -EILSEQ;
>> -
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> + }
>> + }
>> /* Treat data command CRC error the same as data CRC error */
>> if (host->cmd->data &&
>> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3265,6
>> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32
>> +intmask, u32 *intmask_p)
>> int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>> -ETIMEDOUT :
>> -EILSEQ;
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>
>> if (sdhci_auto_cmd23(host, mrq)) {
>> mrq->sbc->error = err;
>> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> host->data_cmd = NULL;
>> data_cmd->error = -ETIMEDOUT;
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> __sdhci_finish_mrq(host, data_cmd->mrq);
>> return;
>> }
>> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>> return;
>> }
>>
>> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> host->data->error = -ETIMEDOUT;
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> + }
>> else if (intmask & SDHCI_INT_DATA_END_BIT)
>> host->data->error = -EILSEQ;
>> else if ((intmask & SDHCI_INT_DATA_CRC) &&
>> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>> - != MMC_BUS_TEST_R)
>> + != MMC_BUS_TEST_R) {
>> host->data->error = -EILSEQ;
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> + }
>> + }
>> else if (intmask & SDHCI_INT_ADMA_ERROR) {
>> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>> intmask);
>> sdhci_adma_show_error(host);
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> host->data->error = -EIO;
>> if (host->ops->adma_workaround)
>> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3933,40
>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>> if (!host->cqe_on)
>> return false;
>>
>> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>> +SDHCI_INT_CRC)) {
>> *cmd_error = -EILSEQ;
>> - else if (intmask & SDHCI_INT_TIMEOUT)
>> + if (intmask & SDHCI_INT_CRC) {
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> + }
>> + }
>> + } else if (intmask & SDHCI_INT_TIMEOUT) {
>> *cmd_error = -ETIMEDOUT;
>> - else
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> + } else
>> *cmd_error = 0;
>>
>> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>> *data_error = -EILSEQ;
>> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> + if (intmask & SDHCI_INT_DATA_CRC) {
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> + }
>> + }
>> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> *data_error = -ETIMEDOUT;
>> - else if (intmask & SDHCI_INT_ADMA_ERROR)
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
>> *data_error = -EIO;
>> - else
>> + if (host->mmc && host->mmc->err_stats_enabled)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> + } else
>> *data_error = 0;
>>
>> /* Clear selected interrupts. */
>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>
> Changes to host.h are core changes and belong in patch 3, which should be the first patch.
>
>>>>>> Sure.
>
>> index 7afb57c..883b50b 100644
>> --- a/include/linux/mmc/host.h
>> +++ b/include/linux/mmc/host.h
>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>
>> struct mmc_host;
>>
>> +enum mmc_err_stat {
>> + MMC_ERR_CMD_TIMEOUT,
>> + MMC_ERR_CMD_CRC,
>> + MMC_ERR_DAT_TIMEOUT,
>> + MMC_ERR_DAT_CRC,
>> + MMC_ERR_AUTO_CMD,
>> + MMC_ERR_ADMA,
>> + MMC_ERR_TUNING,
>> + MMC_ERR_CMDQ_RED,
>> + MMC_ERR_CMDQ_GCE,
>> + MMC_ERR_CMDQ_ICCE,
>> + MMC_ERR_REQ_TIMEOUT,
>> + MMC_ERR_CMDQ_REQ_TIMEOUT,
>> + MMC_ERR_ICE_CFG,
>> + MMC_ERR_MAX,
>> +};
>> +
>> struct mmc_host_ops {
>> /*
>> * It is optional for the host to implement pre_req and post_req in
>> @@ -500,6 +517,9 @@ struct mmc_host {
>>
>> /* Host Software Queue support */
>> bool hsq_enabled;
>> + u32 err_stats[MMC_ERR_MAX];
>> + bool err_stats_enabled;
>> + bool err_state;
>
> Please drop err_state for now
>
>>>>>>>> first we can check this variable right, if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
> Please let me know your opinion on this.

As I wrote above, you could set err_state in mmc_debugfs_err_stats_inc().
But maybe make the err_state addition a separate patch so it is easy to see how it works.

>>>>> Sure will post separate patch for err_state settings.

>
>>
>> unsigned long private[] ____cacheline_aligned;
>> };
>> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
>> DMA_FROM_DEVICE; }
>>
>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>> +*mmc)
>
> Please use 'host' as the mmc_host parameter in this file.
>
>> +{
>> + mmc->err_state = true;
>
> Let's make this:
>
> host->err_stats_enabled = true;
>
>>>>>>> Sure.
>
>> +}
>> +
>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>> + enum mmc_err_stat stat) {
>> +
>
> Please remove blank line here
>
>>>>>>> sure.
>
>> + mmc->err_stats[stat] += 1;
>> +}
>> +
>> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
>> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
>> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
>> **new_ext_csd);
>>
>

2022-02-11 09:55:17

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

On 08/02/2022 21:04, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi,
>
> Thanks for the review.
>
> Please find the inline comments
>
> Thanks,
> Sajida
> -----Original Message-----
> From: Adrian Hunter <[email protected]>
> Sent: Tuesday, February 1, 2022 7:28 PM
> To: Sajida Bhanu (Temp) <[email protected]>; Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh Das (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
> Cc: [email protected]; [email protected]; Ram Prakash Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; Liangliang Lu <[email protected]>; Bao D . Nguyen <[email protected]>
> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors
>
> On 25/01/2022 20:17, Sajida Bhanu (Temp) wrote:
>> Hi,
>>
>> Thanks for the Review.
>>
>> Please find the inline comments.
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <[email protected]>
>> Sent: Friday, January 21, 2022 12:38 PM
>> To: Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh
>> Das (QUIC) <[email protected]>; [email protected];
>> [email protected]; [email protected];
>> [email protected]; [email protected];
>> [email protected]
>> Cc: [email protected]; [email protected]; Ram Prakash
>> Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC)
>> <[email protected]>; [email protected];
>> [email protected]; [email protected]; Liangliang Lu
>> <[email protected]>; Bao D . Nguyen <[email protected]>
>> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card
>> errors
>>
>> On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
>>> Add changes to capture eMMC and SD card errors.
>>> This is useful for debug and testing.
>>>
>>> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
>>> Signed-off-by: Liangliang Lu <[email protected]>
>>> Signed-off-by: Sayali Lokhande <[email protected]>
>>> Signed-off-by: Bao D. Nguyen <[email protected]>
>>> ---
>>> drivers/mmc/host/sdhci-msm.c | 3 ++
>>> drivers/mmc/host/sdhci.c | 72 ++++++++++++++++++++++++++++++++++++--------
>>> include/linux/mmc/host.h | 31 +++++++++++++++++++
>>> 3 files changed, 94 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/mmc/host/sdhci-msm.c
>>> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
>>> --- a/drivers/mmc/host/sdhci-msm.c
>>> +++ b/drivers/mmc/host/sdhci-msm.c
>>> @@ -128,6 +128,8 @@
>>>
>>> #define MSM_MMC_AUTOSUSPEND_DELAY_MS 50
>>>
>>> +#define MSM_MMC_ERR_STATS_ENABLE 1
>>> +
>>> /* Timeout value to avoid infinite waiting for pwr_irq */ #define
>>> MSM_PWR_IRQ_TIMEOUT_MS 5000
>>>
>>> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>>> if (ret)
>>> goto pm_runtime_disable;
>>>
>>> + host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
>>
>> Please remove this. SDHCI will enable error stats.
>>
>>>>>>>> Sure.
>>
>>> pm_runtime_mark_last_busy(&pdev->dev);
>>> pm_runtime_put_autosuspend(&pdev->dev);
>>>
>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>>> index 07c6da1..74b356e 100644
>>> --- a/drivers/mmc/host/sdhci.c
>>> +++ b/drivers/mmc/host/sdhci.c
>>> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>> if (host->ops->dump_vendor_regs)
>>> host->ops->dump_vendor_regs(host);
>>>
>>> + if (host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_enable(host->mmc);
>>
>> Please move this to sdhci_setup_host() and call it unconditionally
>> i.e. just
>>
>> mmc_debugfs_err_stats_enable(host->mmc);
>>
>>
>>>>>>>>> mmc_debugfs_err_stats_enable() will set err_state , that means some errors occurred in driver level.
>> If we move this call to sdhci_setup_host(), then it will set if no errors also right?
>
> Then it seems like you want to set err_state = true in mmc_debugfs_err_stats_inc()
>
>>>>>> No ..I have updated err_state = true in sdhci_dumpregs() because if any errors (serious) in driver, we are calling sdhci_dumpregs().

I see, but it is not OK to mix up the register dump with error logging.
Perhaps add another error type and increment that when needed.

>
>>
>>
>>> SDHCI_DUMP("============================================\n");
>>> }
>>> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>> spin_lock_irqsave(&host->lock, flags);
>>>
>>> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>
>> Please remove the 'if ()', i.e. just make it, unconditionally:
>>
>> mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>
>> Same for other calls to mmc_debugfs_err_stats_inc()
>>
>>>>>>>>>> Sure.
>>
>>> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>> mmc_hostname(host->mmc));
>>> sdhci_dumpregs(host);
>>> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct
>>> timer_list *t)
>>>
>>> if (host->data || host->data_cmd ||
>>> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>> pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>> mmc_hostname(host->mmc));
>>> sdhci_dumpregs(host);
>>> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host
>>> *host, u32 intmask, u32 *intmask_p)
>>>
>>> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>> - if (intmask & SDHCI_INT_TIMEOUT)
>>> + if (intmask & SDHCI_INT_TIMEOUT) {
>>> host->cmd->error = -ETIMEDOUT;
>>> - else
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> + } else {
>>> host->cmd->error = -EILSEQ;
>>> -
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> + }
>>> + }
>>> /* Treat data command CRC error the same as data CRC error */
>>> if (host->cmd->data &&
>>> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3265,6
>>> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32
>>> +intmask, u32 *intmask_p)
>>> int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>>> -ETIMEDOUT :
>>> -EILSEQ;
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>>
>>> if (sdhci_auto_cmd23(host, mrq)) {
>>> mrq->sbc->error = err;
>>> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> host->data_cmd = NULL;
>>> data_cmd->error = -ETIMEDOUT;
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> __sdhci_finish_mrq(host, data_cmd->mrq);
>>> return;
>>> }
>>> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>> return;
>>> }
>>>
>>> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> host->data->error = -ETIMEDOUT;
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> + }
>>> else if (intmask & SDHCI_INT_DATA_END_BIT)
>>> host->data->error = -EILSEQ;
>>> else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>> - != MMC_BUS_TEST_R)
>>> + != MMC_BUS_TEST_R) {
>>> host->data->error = -EILSEQ;
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> + }
>>> + }
>>> else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>> intmask);
>>> sdhci_adma_show_error(host);
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>> host->data->error = -EIO;
>>> if (host->ops->adma_workaround)
>>> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3933,40
>>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>> if (!host->cqe_on)
>>> return false;
>>>
>>> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>>> +SDHCI_INT_CRC)) {
>>> *cmd_error = -EILSEQ;
>>> - else if (intmask & SDHCI_INT_TIMEOUT)
>>> + if (intmask & SDHCI_INT_CRC) {
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> + }
>>> + }
>>> + } else if (intmask & SDHCI_INT_TIMEOUT) {
>>> *cmd_error = -ETIMEDOUT;
>>> - else
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> + } else
>>> *cmd_error = 0;
>>>
>>> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>> *data_error = -EILSEQ;
>>> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> + if (intmask & SDHCI_INT_DATA_CRC) {
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> + }
>>> + }
>>> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> *data_error = -ETIMEDOUT;
>>> - else if (intmask & SDHCI_INT_ADMA_ERROR)
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>> *data_error = -EIO;
>>> - else
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>> + } else
>>> *data_error = 0;
>>>
>>> /* Clear selected interrupts. */
>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>
>> Changes to host.h are core changes and belong in patch 3, which should be the first patch.
>>
>>>>>>> Sure.
>>
>>> index 7afb57c..883b50b 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>
>>> struct mmc_host;
>>>
>>> +enum mmc_err_stat {
>>> + MMC_ERR_CMD_TIMEOUT,
>>> + MMC_ERR_CMD_CRC,
>>> + MMC_ERR_DAT_TIMEOUT,
>>> + MMC_ERR_DAT_CRC,
>>> + MMC_ERR_AUTO_CMD,
>>> + MMC_ERR_ADMA,
>>> + MMC_ERR_TUNING,
>>> + MMC_ERR_CMDQ_RED,
>>> + MMC_ERR_CMDQ_GCE,
>>> + MMC_ERR_CMDQ_ICCE,
>>> + MMC_ERR_REQ_TIMEOUT,
>>> + MMC_ERR_CMDQ_REQ_TIMEOUT,
>>> + MMC_ERR_ICE_CFG,
>>> + MMC_ERR_MAX,
>>> +};
>>> +
>>> struct mmc_host_ops {
>>> /*
>>> * It is optional for the host to implement pre_req and post_req in
>>> @@ -500,6 +517,9 @@ struct mmc_host {
>>>
>>> /* Host Software Queue support */
>>> bool hsq_enabled;
>>> + u32 err_stats[MMC_ERR_MAX];
>>> + bool err_stats_enabled;
>>> + bool err_state;
>>
>> Please drop err_state for now
>>
>>>>>>>>> first we can check this variable right, if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
>> Please let me know your opinion on this.
>
> As I wrote above, you could set err_state in mmc_debugfs_err_stats_inc().
> But maybe make the err_state addition a separate patch so it is easy to see how it works.
>
>>>>>> Sure will post separate patch for err_state settings.
>
>>
>>>
>>> unsigned long private[] ____cacheline_aligned;
>>> };
>>> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
>>> DMA_FROM_DEVICE; }
>>>
>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>>> +*mmc)
>>
>> Please use 'host' as the mmc_host parameter in this file.
>>
>>> +{
>>> + mmc->err_state = true;
>>
>> Let's make this:
>>
>> host->err_stats_enabled = true;
>>
>>>>>>>> Sure.
>>
>>> +}
>>> +
>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>> + enum mmc_err_stat stat) {
>>> +
>>
>> Please remove blank line here
>>
>>>>>>>> sure.
>>
>>> + mmc->err_stats[stat] += 1;
>>> +}
>>> +
>>> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
>>> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
>>> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
>>> **new_ext_csd);
>>>
>>
>


2022-02-15 14:55:04

by Sajida Bhanu (Temp)

[permalink] [raw]
Subject: RE: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

Hi Adrian,

Thanks for the review.

Please find the inline comments.

Thanks,
Sajida
-----Original Message-----
From: Adrian Hunter <[email protected]>
Sent: Friday, February 11, 2022 11:21 AM
To: Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh Das (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
Cc: [email protected]; [email protected]; Ram Prakash Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; Liangliang Lu <[email protected]>; Bao D . Nguyen <[email protected]>
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

WARNING: This email originated from outside of Qualcomm. Please be wary of any links or attachments, and do not enable macros.

On 08/02/2022 21:04, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi,
>
> Thanks for the review.
>
> Please find the inline comments
>
> Thanks,
> Sajida
> -----Original Message-----
> From: Adrian Hunter <[email protected]>
> Sent: Tuesday, February 1, 2022 7:28 PM
> To: Sajida Bhanu (Temp) <[email protected]>; Sajida Bhanu
> (Temp) (QUIC) <[email protected]>; Asutosh Das (QUIC)
> <[email protected]>; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]
> Cc: [email protected]; [email protected]; Ram Prakash
> Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC)
> <[email protected]>; [email protected];
> [email protected]; [email protected]; Liangliang Lu
> <[email protected]>; Bao D . Nguyen <[email protected]>
> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card
> errors
>
> On 25/01/2022 20:17, Sajida Bhanu (Temp) wrote:
>> Hi,
>>
>> Thanks for the Review.
>>
>> Please find the inline comments.
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <[email protected]>
>> Sent: Friday, January 21, 2022 12:38 PM
>> To: Sajida Bhanu (Temp) (QUIC) <[email protected]>; Asutosh
>> Das (QUIC) <[email protected]>; [email protected];
>> [email protected]; [email protected];
>> [email protected]; [email protected];
>> [email protected]
>> Cc: [email protected]; [email protected]; Ram Prakash
>> Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC)
>> <[email protected]>; [email protected];
>> [email protected]; [email protected]; Liangliang Lu
>> <[email protected]>; Bao D . Nguyen <[email protected]>
>> Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card
>> errors
>>
>> On 20/01/2022 19:26, Shaik Sajida Bhanu wrote:
>>> Add changes to capture eMMC and SD card errors.
>>> This is useful for debug and testing.
>>>
>>> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
>>> Signed-off-by: Liangliang Lu <[email protected]>
>>> Signed-off-by: Sayali Lokhande <[email protected]>
>>> Signed-off-by: Bao D. Nguyen <[email protected]>
>>> ---
>>> drivers/mmc/host/sdhci-msm.c | 3 ++
>>> drivers/mmc/host/sdhci.c | 72 ++++++++++++++++++++++++++++++++++++--------
>>> include/linux/mmc/host.h | 31 +++++++++++++++++++
>>> 3 files changed, 94 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/mmc/host/sdhci-msm.c
>>> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
>>> --- a/drivers/mmc/host/sdhci-msm.c
>>> +++ b/drivers/mmc/host/sdhci-msm.c
>>> @@ -128,6 +128,8 @@
>>>
>>> #define MSM_MMC_AUTOSUSPEND_DELAY_MS 50
>>>
>>> +#define MSM_MMC_ERR_STATS_ENABLE 1
>>> +
>>> /* Timeout value to avoid infinite waiting for pwr_irq */ #define
>>> MSM_PWR_IRQ_TIMEOUT_MS 5000
>>>
>>> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
>>> if (ret)
>>> goto pm_runtime_disable;
>>>
>>> + host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
>>
>> Please remove this. SDHCI will enable error stats.
>>
>>>>>>>> Sure.
>>
>>> pm_runtime_mark_last_busy(&pdev->dev);
>>> pm_runtime_put_autosuspend(&pdev->dev);
>>>
>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>>> index 07c6da1..74b356e 100644
>>> --- a/drivers/mmc/host/sdhci.c
>>> +++ b/drivers/mmc/host/sdhci.c
>>> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>> if (host->ops->dump_vendor_regs)
>>> host->ops->dump_vendor_regs(host);
>>>
>>> + if (host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_enable(host->mmc);
>>
>> Please move this to sdhci_setup_host() and call it unconditionally
>> i.e. just
>>
>> mmc_debugfs_err_stats_enable(host->mmc);
>>
>>
>>>>>>>>> mmc_debugfs_err_stats_enable() will set err_state , that means some errors occurred in driver level.
>> If we move this call to sdhci_setup_host(), then it will set if no errors also right?
>
> Then it seems like you want to set err_state = true in
> mmc_debugfs_err_stats_inc()
>
>>>>>> No ..I have updated err_state = true in sdhci_dumpregs() because if any errors (serious) in driver, we are calling sdhci_dumpregs().

I see, but it is not OK to mix up the register dump with error logging.
Perhaps add another error type and increment that when needed.

>>>>> okay sure.
>
>>
>>
>>> SDHCI_DUMP("============================================\n");
>>> }
>>> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>> spin_lock_irqsave(&host->lock, flags);
>>>
>>> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc,
>>> + MMC_ERR_REQ_TIMEOUT);
>>
>> Please remove the 'if ()', i.e. just make it, unconditionally:
>>
>> mmc_debugfs_err_stats_inc(host->mmc,
>> MMC_ERR_REQ_TIMEOUT);
>>
>> Same for other calls to mmc_debugfs_err_stats_inc()
>>
>>>>>>>>>> Sure.
>>
>>> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>> mmc_hostname(host->mmc));
>>> sdhci_dumpregs(host);
>>> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct
>>> timer_list *t)
>>>
>>> if (host->data || host->data_cmd ||
>>> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc,
>>> + MMC_ERR_REQ_TIMEOUT);
>>> pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>> mmc_hostname(host->mmc));
>>> sdhci_dumpregs(host);
>>> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host
>>> *host, u32 intmask, u32 *intmask_p)
>>>
>>> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>> - if (intmask & SDHCI_INT_TIMEOUT)
>>> + if (intmask & SDHCI_INT_TIMEOUT) {
>>> host->cmd->error = -ETIMEDOUT;
>>> - else
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> + } else {
>>> host->cmd->error = -EILSEQ;
>>> -
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> + }
>>> + }
>>> /* Treat data command CRC error the same as data CRC error */
>>> if (host->cmd->data &&
>>> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
>>> @@ -3265,6
>>> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32
>>> +intmask, u32 *intmask_p)
>>> int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
>>> -ETIMEDOUT :
>>> -EILSEQ;
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc,
>>> + MMC_ERR_AUTO_CMD);
>>>
>>> if (sdhci_auto_cmd23(host, mrq)) {
>>> mrq->sbc->error = err; @@ -3342,6 +3357,8 @@
>>> static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> host->data_cmd = NULL;
>>> data_cmd->error = -ETIMEDOUT;
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> +
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> __sdhci_finish_mrq(host, data_cmd->mrq);
>>> return;
>>> }
>>> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>> return;
>>> }
>>>
>>> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> host->data->error = -ETIMEDOUT;
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> + }
>>> else if (intmask & SDHCI_INT_DATA_END_BIT)
>>> host->data->error = -EILSEQ;
>>> else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>> - != MMC_BUS_TEST_R)
>>> + != MMC_BUS_TEST_R) {
>>> host->data->error = -EILSEQ;
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> + }
>>> + }
>>> else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>> intmask);
>>> sdhci_adma_show_error(host);
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc,
>>> + MMC_ERR_ADMA);
>>> host->data->error = -EIO;
>>> if (host->ops->adma_workaround)
>>> host->ops->adma_workaround(host, intmask); @@
>>> -3905,20 +3933,40 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>> if (!host->cqe_on)
>>> return false;
>>>
>>> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>>> +SDHCI_INT_CRC)) {
>>> *cmd_error = -EILSEQ;
>>> - else if (intmask & SDHCI_INT_TIMEOUT)
>>> + if (intmask & SDHCI_INT_CRC) {
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> + }
>>> + }
>>> + } else if (intmask & SDHCI_INT_TIMEOUT) {
>>> *cmd_error = -ETIMEDOUT;
>>> - else
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> + } else
>>> *cmd_error = 0;
>>>
>>> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>> *data_error = -EILSEQ;
>>> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> + if (intmask & SDHCI_INT_DATA_CRC) {
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> + }
>>> + }
>>> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> *data_error = -ETIMEDOUT;
>>> - else if (intmask & SDHCI_INT_ADMA_ERROR)
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>> *data_error = -EIO;
>>> - else
>>> + if (host->mmc && host->mmc->err_stats_enabled)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>> + } else
>>> *data_error = 0;
>>>
>>> /* Clear selected interrupts. */ diff --git
>>> a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>
>> Changes to host.h are core changes and belong in patch 3, which should be the first patch.
>>
>>>>>>> Sure.
>>
>>> index 7afb57c..883b50b 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>
>>> struct mmc_host;
>>>
>>> +enum mmc_err_stat {
>>> + MMC_ERR_CMD_TIMEOUT,
>>> + MMC_ERR_CMD_CRC,
>>> + MMC_ERR_DAT_TIMEOUT,
>>> + MMC_ERR_DAT_CRC,
>>> + MMC_ERR_AUTO_CMD,
>>> + MMC_ERR_ADMA,
>>> + MMC_ERR_TUNING,
>>> + MMC_ERR_CMDQ_RED,
>>> + MMC_ERR_CMDQ_GCE,
>>> + MMC_ERR_CMDQ_ICCE,
>>> + MMC_ERR_REQ_TIMEOUT,
>>> + MMC_ERR_CMDQ_REQ_TIMEOUT,
>>> + MMC_ERR_ICE_CFG,
>>> + MMC_ERR_MAX,
>>> +};
>>> +
>>> struct mmc_host_ops {
>>> /*
>>> * It is optional for the host to implement pre_req and post_req
>>> in @@ -500,6 +517,9 @@ struct mmc_host {
>>>
>>> /* Host Software Queue support */
>>> bool hsq_enabled;
>>> + u32 err_stats[MMC_ERR_MAX];
>>> + bool err_stats_enabled;
>>> + bool err_state;
>>
>> Please drop err_state for now
>>
>>>>>>>>> first we can check this variable right, if it is set then we can go and check err_stats[] to know more on type of error (data /cmd timeout or CRC errors etc.).
>> Please let me know your opinion on this.
>
> As I wrote above, you could set err_state in mmc_debugfs_err_stats_inc().
> But maybe make the err_state addition a separate patch so it is easy to see how it works.
>
>>>>>> Sure will post separate patch for err_state settings.
>
>>
>>>
>>> unsigned long private[] ____cacheline_aligned;
>>> };
>>> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
>>> DMA_FROM_DEVICE; }
>>>
>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>>> +*mmc)
>>
>> Please use 'host' as the mmc_host parameter in this file.
>>
>>> +{
>>> + mmc->err_state = true;
>>
>> Let's make this:
>>
>> host->err_stats_enabled = true;
>>
>>>>>>>> Sure.
>>
>>> +}
>>> +
>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>> + enum mmc_err_stat stat) {
>>> +
>>
>> Please remove blank line here
>>
>>>>>>>> sure.
>>
>>> + mmc->err_stats[stat] += 1;
>>> +}
>>> +
>>> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
>>> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
>>> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
>>> **new_ext_csd);
>>>
>>
>

2022-02-16 07:39:04

by Bjorn Andersson

[permalink] [raw]
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

On Thu 20 Jan 11:26 CST 2022, Shaik Sajida Bhanu wrote:

> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
>
> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
> Signed-off-by: Liangliang Lu <[email protected]>
> Signed-off-by: Sayali Lokhande <[email protected]>
> Signed-off-by: Bao D. Nguyen <[email protected]>

Please read
https://docs.kernel.org/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin
and the one section below on what your S-o-b actually means.

In particular this does not say "the four of us authored this patch", it
documents the path the patch took to this point. In which case Bao is
the last one stating that he _handled_ the patch - but then somehow it
came out of your mailbox.

You're probably looking for Co-developed-by, which is described just
below that.

Regards,
Bjorn

> ---
> drivers/mmc/host/sdhci-msm.c | 3 ++
> drivers/mmc/host/sdhci.c | 72 ++++++++++++++++++++++++++++++++++++--------
> include/linux/mmc/host.h | 31 +++++++++++++++++++
> 3 files changed, 94 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
> index 50c71e0..309eb7b 100644
> --- a/drivers/mmc/host/sdhci-msm.c
> +++ b/drivers/mmc/host/sdhci-msm.c
> @@ -128,6 +128,8 @@
>
> #define MSM_MMC_AUTOSUSPEND_DELAY_MS 50
>
> +#define MSM_MMC_ERR_STATS_ENABLE 1
> +
> /* Timeout value to avoid infinite waiting for pwr_irq */
> #define MSM_PWR_IRQ_TIMEOUT_MS 5000
>
> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
> if (ret)
> goto pm_runtime_disable;
>
> + host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
> pm_runtime_mark_last_busy(&pdev->dev);
> pm_runtime_put_autosuspend(&pdev->dev);
>
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> index 07c6da1..74b356e 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
> if (host->ops->dump_vendor_regs)
> host->ops->dump_vendor_regs(host);
>
> + if (host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_enable(host->mmc);
> SDHCI_DUMP("============================================\n");
> }
> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
> spin_lock_irqsave(&host->lock, flags);
>
> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
>
> if (host->data || host->data_cmd ||
> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>
> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> - if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_TIMEOUT) {
> host->cmd->error = -ETIMEDOUT;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else {
> host->cmd->error = -EILSEQ;
> -
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> + }
> /* Treat data command CRC error the same as data CRC error */
> if (host->cmd->data &&
> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
> @@ -3265,6 +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
> int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
> -ETIMEDOUT :
> -EILSEQ;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>
> if (sdhci_auto_cmd23(host, mrq)) {
> mrq->sbc->error = err;
> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data_cmd = NULL;
> data_cmd->error = -ETIMEDOUT;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> __sdhci_finish_mrq(host, data_cmd->mrq);
> return;
> }
> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> return;
> }
>
> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data->error = -ETIMEDOUT;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + }
> else if (intmask & SDHCI_INT_DATA_END_BIT)
> host->data->error = -EILSEQ;
> else if ((intmask & SDHCI_INT_DATA_CRC) &&
> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> - != MMC_BUS_TEST_R)
> + != MMC_BUS_TEST_R) {
> host->data->error = -EILSEQ;
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> + }
> else if (intmask & SDHCI_INT_ADMA_ERROR) {
> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
> intmask);
> sdhci_adma_show_error(host);
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> host->data->error = -EIO;
> if (host->ops->adma_workaround)
> host->ops->adma_workaround(host, intmask);
> @@ -3905,20 +3933,40 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
> if (!host->cqe_on)
> return false;
>
> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) {
> *cmd_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> + }
> + } else if (intmask & SDHCI_INT_TIMEOUT) {
> *cmd_error = -ETIMEDOUT;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else
> *cmd_error = 0;
>
> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
> *data_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> + }
> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> *data_error = -ETIMEDOUT;
> - else if (intmask & SDHCI_INT_ADMA_ERROR)
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
> *data_error = -EIO;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> + } else
> *data_error = 0;
>
> /* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> index 7afb57c..883b50b 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>
> struct mmc_host;
>
> +enum mmc_err_stat {
> + MMC_ERR_CMD_TIMEOUT,
> + MMC_ERR_CMD_CRC,
> + MMC_ERR_DAT_TIMEOUT,
> + MMC_ERR_DAT_CRC,
> + MMC_ERR_AUTO_CMD,
> + MMC_ERR_ADMA,
> + MMC_ERR_TUNING,
> + MMC_ERR_CMDQ_RED,
> + MMC_ERR_CMDQ_GCE,
> + MMC_ERR_CMDQ_ICCE,
> + MMC_ERR_REQ_TIMEOUT,
> + MMC_ERR_CMDQ_REQ_TIMEOUT,
> + MMC_ERR_ICE_CFG,
> + MMC_ERR_MAX,
> +};
> +
> struct mmc_host_ops {
> /*
> * It is optional for the host to implement pre_req and post_req in
> @@ -500,6 +517,9 @@ struct mmc_host {
>
> /* Host Software Queue support */
> bool hsq_enabled;
> + u32 err_stats[MMC_ERR_MAX];
> + bool err_stats_enabled;
> + bool err_state;
>
> unsigned long private[] ____cacheline_aligned;
> };
> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
> }
>
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
> +{
> + mmc->err_state = true;
> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> + enum mmc_err_stat stat) {
> +
> + mmc->err_stats[stat] += 1;
> +}
> +
> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
> int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
> int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> of Code Aurora Forum, hosted by The Linux Foundation
>

2022-02-16 08:01:13

by Shaik Sajida Bhanu

[permalink] [raw]
Subject: RE: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

Hi Bjorn,

Thank you.

Sure will address this in patch set.

Thanks,
Sajida
-----Original Message-----
From: Bjorn Andersson <[email protected]>
Sent: Tuesday, February 15, 2022 10:29 PM
To: Sajida Bhanu (Temp) (QUIC) <[email protected]>
Cc: [email protected]; Asutosh Das (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; Ram Prakash Gupta (QUIC) <[email protected]>; Pradeep Pragallapati (QUIC) <[email protected]>; [email protected]; [email protected]; [email protected]; Liangliang Lu <[email protected]>; Bao D . Nguyen <[email protected]>
Subject: Re: [PATCH V3 1/4] mmc: sdhci: Capture eMMC and SD card errors

On Thu 20 Jan 11:26 CST 2022, Shaik Sajida Bhanu wrote:

> Add changes to capture eMMC and SD card errors.
> This is useful for debug and testing.
>
> Signed-off-by: Shaik Sajida Bhanu <[email protected]>
> Signed-off-by: Liangliang Lu <[email protected]>
> Signed-off-by: Sayali Lokhande <[email protected]>
> Signed-off-by: Bao D. Nguyen <[email protected]>

Please read
https://docs.kernel.org/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin
and the one section below on what your S-o-b actually means.

In particular this does not say "the four of us authored this patch", it documents the path the patch took to this point. In which case Bao is the last one stating that he _handled_ the patch - but then somehow it came out of your mailbox.

You're probably looking for Co-developed-by, which is described just below that.

Regards,
Bjorn

> ---
> drivers/mmc/host/sdhci-msm.c | 3 ++
> drivers/mmc/host/sdhci.c | 72 ++++++++++++++++++++++++++++++++++++--------
> include/linux/mmc/host.h | 31 +++++++++++++++++++
> 3 files changed, 94 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/mmc/host/sdhci-msm.c
> b/drivers/mmc/host/sdhci-msm.c index 50c71e0..309eb7b 100644
> --- a/drivers/mmc/host/sdhci-msm.c
> +++ b/drivers/mmc/host/sdhci-msm.c
> @@ -128,6 +128,8 @@
>
> #define MSM_MMC_AUTOSUSPEND_DELAY_MS 50
>
> +#define MSM_MMC_ERR_STATS_ENABLE 1
> +
> /* Timeout value to avoid infinite waiting for pwr_irq */ #define
> MSM_PWR_IRQ_TIMEOUT_MS 5000
>
> @@ -2734,6 +2736,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
> if (ret)
> goto pm_runtime_disable;
>
> + host->mmc->err_stats_enabled = MSM_MMC_ERR_STATS_ENABLE;
> pm_runtime_mark_last_busy(&pdev->dev);
> pm_runtime_put_autosuspend(&pdev->dev);
>
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index
> 07c6da1..74b356e 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,8 @@ void sdhci_dumpregs(struct sdhci_host *host)
> if (host->ops->dump_vendor_regs)
> host->ops->dump_vendor_regs(host);
>
> + if (host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_enable(host->mmc);
> SDHCI_DUMP("============================================\n");
> }
> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3161,8 @@ static void sdhci_timeout_timer(struct timer_list *t)
> spin_lock_irqsave(&host->lock, flags);
>
> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3181,6 +3185,8 @@ static void sdhci_timeout_data_timer(struct
> timer_list *t)
>
> if (host->data || host->data_cmd ||
> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3240,11 +3246,18 @@ static void sdhci_cmd_irq(struct sdhci_host
> *host, u32 intmask, u32 *intmask_p)
>
> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> - if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_TIMEOUT) {
> host->cmd->error = -ETIMEDOUT;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else {
> host->cmd->error = -EILSEQ;
> -
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> + }
> /* Treat data command CRC error the same as data CRC error */
> if (host->cmd->data &&
> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3265,6
> +3278,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
> int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
> -ETIMEDOUT :
> -EILSEQ;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>
> if (sdhci_auto_cmd23(host, mrq)) {
> mrq->sbc->error = err;
> @@ -3342,6 +3357,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data_cmd = NULL;
> data_cmd->error = -ETIMEDOUT;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> __sdhci_finish_mrq(host, data_cmd->mrq);
> return;
> }
> @@ -3375,18 +3392,29 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> return;
> }
>
> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data->error = -ETIMEDOUT;
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + }
> else if (intmask & SDHCI_INT_DATA_END_BIT)
> host->data->error = -EILSEQ;
> else if ((intmask & SDHCI_INT_DATA_CRC) &&
> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> - != MMC_BUS_TEST_R)
> + != MMC_BUS_TEST_R) {
> host->data->error = -EILSEQ;
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> + }
> else if (intmask & SDHCI_INT_ADMA_ERROR) {
> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
> intmask);
> sdhci_adma_show_error(host);
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> host->data->error = -EIO;
> if (host->ops->adma_workaround)
> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3933,40 @@
> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
> if (!host->cqe_on)
> return false;
>
> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +{
> *cmd_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> + }
> + } else if (intmask & SDHCI_INT_TIMEOUT) {
> *cmd_error = -ETIMEDOUT;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else
> *cmd_error = 0;
>
> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
> *data_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) {
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> + }
> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> *data_error = -ETIMEDOUT;
> - else if (intmask & SDHCI_INT_ADMA_ERROR)
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
> *data_error = -EIO;
> - else
> + if (host->mmc && host->mmc->err_stats_enabled)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> + } else
> *data_error = 0;
>
> /* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index
> 7afb57c..883b50b 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>
> struct mmc_host;
>
> +enum mmc_err_stat {
> + MMC_ERR_CMD_TIMEOUT,
> + MMC_ERR_CMD_CRC,
> + MMC_ERR_DAT_TIMEOUT,
> + MMC_ERR_DAT_CRC,
> + MMC_ERR_AUTO_CMD,
> + MMC_ERR_ADMA,
> + MMC_ERR_TUNING,
> + MMC_ERR_CMDQ_RED,
> + MMC_ERR_CMDQ_GCE,
> + MMC_ERR_CMDQ_ICCE,
> + MMC_ERR_REQ_TIMEOUT,
> + MMC_ERR_CMDQ_REQ_TIMEOUT,
> + MMC_ERR_ICE_CFG,
> + MMC_ERR_MAX,
> +};
> +
> struct mmc_host_ops {
> /*
> * It is optional for the host to implement pre_req and post_req in
> @@ -500,6 +517,9 @@ struct mmc_host {
>
> /* Host Software Queue support */
> bool hsq_enabled;
> + u32 err_stats[MMC_ERR_MAX];
> + bool err_stats_enabled;
> + bool err_state;
>
> unsigned long private[] ____cacheline_aligned;
> };
> @@ -635,6 +655,17 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
> DMA_FROM_DEVICE; }
>
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
> +{
> + mmc->err_state = true;
> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> + enum mmc_err_stat stat) {
> +
> + mmc->err_stats[stat] += 1;
> +}
> +
> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
> **new_ext_csd);
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
> member of Code Aurora Forum, hosted by The Linux Foundation
>