2021-07-01 08:09:24

by Vincent Whitchurch

[permalink] [raw]
Subject: [PATCH v2] mmc: dw_mmc: Add data CRC error injection

This driver has had problems when handling data errors. Add fault
injection support so that the abort handling can be easily triggered and
regression-tested. A hrtimer is used to indicate a data CRC error at
various points during the data transfer.

Note that for the recent problem with hangs in the case of some data CRC
errors, a udelay(10) inserted at the start of send_stop_abort() greatly
helped in triggering the error, but I've not included this as part of
the fault injection support since it seemed too specific.

Signed-off-by: Vincent Whitchurch <[email protected]>
---

Notes:
v2: Add missing includes.

drivers/mmc/host/dw_mmc.c | 73 +++++++++++++++++++++++++++++++++++++++
drivers/mmc/host/dw_mmc.h | 7 ++++
2 files changed, 80 insertions(+)

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index d333130d1531..dbbb94e6ff4b 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -17,9 +17,11 @@
#include <linux/interrupt.h>
#include <linux/iopoll.h>
#include <linux/ioport.h>
+#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
+#include <linux/prandom.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/stat.h>
@@ -181,6 +183,9 @@ static void dw_mci_init_debugfs(struct dw_mci_slot *slot)
&host->pending_events);
debugfs_create_xul("completed_events", S_IRUSR, root,
&host->completed_events);
+#ifdef CONFIG_FAULT_INJECTION
+ fault_create_debugfs_attr("fail_data_crc", root, &host->fail_data_crc);
+#endif
}
#endif /* defined(CONFIG_DEBUG_FS) */

@@ -1788,6 +1793,68 @@ static const struct mmc_host_ops dw_mci_ops = {
.prepare_hs400_tuning = dw_mci_prepare_hs400_tuning,
};

+#ifdef CONFIG_FAULT_INJECTION
+static enum hrtimer_restart dw_mci_fault_timer(struct hrtimer *t)
+{
+ struct dw_mci *host = container_of(t, struct dw_mci, fault_timer);
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->irq_lock, flags);
+
+ if (!host->data_status)
+ host->data_status = SDMMC_INT_DCRC;
+ set_bit(EVENT_DATA_ERROR, &host->pending_events);
+ tasklet_schedule(&host->tasklet);
+
+ spin_unlock_irqrestore(&host->irq_lock, flags);
+
+ return HRTIMER_NORESTART;
+}
+
+static void dw_mci_start_fault_timer(struct dw_mci *host)
+{
+ struct mmc_data *data = host->data;
+
+ if (!data || data->blocks <= 1)
+ return;
+
+ if (!should_fail(&host->fail_data_crc, 1))
+ return;
+
+ /*
+ * Try to inject the error at random points during the data transfer.
+ */
+ hrtimer_start(&host->fault_timer,
+ ms_to_ktime(prandom_u32() % 25),
+ HRTIMER_MODE_REL);
+}
+
+static void dw_mci_stop_fault_timer(struct dw_mci *host)
+{
+ hrtimer_cancel(&host->fault_timer);
+}
+
+static void dw_mci_init_fault(struct dw_mci *host)
+{
+ host->fail_data_crc = (struct fault_attr) FAULT_ATTR_INITIALIZER;
+
+ hrtimer_init(&host->fault_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ host->fault_timer.function = dw_mci_fault_timer;
+}
+#else
+static void dw_mci_init_fault(struct dw_mci *host)
+{
+}
+
+static void dw_mci_start_fault_timer(struct dw_mci *host)
+{
+}
+
+static void dw_mci_stop_fault_timer(struct dw_mci *host)
+{
+}
+#endif
+
static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
__releases(&host->lock)
__acquires(&host->lock)
@@ -2102,6 +2169,7 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
break;
}

+ dw_mci_stop_fault_timer(host);
host->data = NULL;
set_bit(EVENT_DATA_COMPLETE, &host->completed_events);
err = dw_mci_data_complete(host, data);
@@ -2151,6 +2219,7 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
if (mrq->cmd->error && mrq->data)
dw_mci_reset(host);

+ dw_mci_stop_fault_timer(host);
host->cmd = NULL;
host->data = NULL;

@@ -2600,6 +2669,8 @@ static void dw_mci_cmd_interrupt(struct dw_mci *host, u32 status)

set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
tasklet_schedule(&host->tasklet);
+
+ dw_mci_start_fault_timer(host);
}

static void dw_mci_handle_cd(struct dw_mci *host)
@@ -3223,6 +3294,8 @@ int dw_mci_probe(struct dw_mci *host)
spin_lock_init(&host->irq_lock);
INIT_LIST_HEAD(&host->queue);

+ dw_mci_init_fault(host);
+
/*
* Get the host data width - this assumes that HCON has been set with
* the correct values.
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index da5923a92e60..ce05d81477d9 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -14,6 +14,8 @@
#include <linux/mmc/core.h>
#include <linux/dmaengine.h>
#include <linux/reset.h>
+#include <linux/fault-inject.h>
+#include <linux/hrtimer.h>
#include <linux/interrupt.h>

enum dw_mci_state {
@@ -230,6 +232,11 @@ struct dw_mci {
struct timer_list cmd11_timer;
struct timer_list cto_timer;
struct timer_list dto_timer;
+
+#ifdef CONFIG_FAULT_INJECTION
+ struct fault_attr fail_data_crc;
+ struct hrtimer fault_timer;
+#endif
};

/* DMA ops for Internal/External DMAC interface */
--
2.28.0


2021-07-25 21:44:27

by Jaehoon Chung

[permalink] [raw]
Subject: Re: [PATCH v2] mmc: dw_mmc: Add data CRC error injection

On 7/1/21 5:05 PM, Vincent Whitchurch wrote:
> This driver has had problems when handling data errors. Add fault
> injection support so that the abort handling can be easily triggered and
> regression-tested. A hrtimer is used to indicate a data CRC error at
> various points during the data transfer.
>
> Note that for the recent problem with hangs in the case of some data CRC
> errors, a udelay(10) inserted at the start of send_stop_abort() greatly
> helped in triggering the error, but I've not included this as part of
> the fault injection support since it seemed too specific.
>
> Signed-off-by: Vincent Whitchurch <[email protected]>

Reviewed-by: Jaehoon Chung <[email protected]>

Best Regards,
Jaehoon Chung

> ---
>
> Notes:
> v2: Add missing includes.
>
> drivers/mmc/host/dw_mmc.c | 73 +++++++++++++++++++++++++++++++++++++++
> drivers/mmc/host/dw_mmc.h | 7 ++++
> 2 files changed, 80 insertions(+)
>
> diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
> index d333130d1531..dbbb94e6ff4b 100644
> --- a/drivers/mmc/host/dw_mmc.c
> +++ b/drivers/mmc/host/dw_mmc.c
> @@ -17,9 +17,11 @@
> #include <linux/interrupt.h>
> #include <linux/iopoll.h>
> #include <linux/ioport.h>
> +#include <linux/ktime.h>
> #include <linux/module.h>
> #include <linux/platform_device.h>
> #include <linux/pm_runtime.h>
> +#include <linux/prandom.h>
> #include <linux/seq_file.h>
> #include <linux/slab.h>
> #include <linux/stat.h>
> @@ -181,6 +183,9 @@ static void dw_mci_init_debugfs(struct dw_mci_slot *slot)
> &host->pending_events);
> debugfs_create_xul("completed_events", S_IRUSR, root,
> &host->completed_events);
> +#ifdef CONFIG_FAULT_INJECTION
> + fault_create_debugfs_attr("fail_data_crc", root, &host->fail_data_crc);
> +#endif
> }
> #endif /* defined(CONFIG_DEBUG_FS) */
>
> @@ -1788,6 +1793,68 @@ static const struct mmc_host_ops dw_mci_ops = {
> .prepare_hs400_tuning = dw_mci_prepare_hs400_tuning,
> };
>
> +#ifdef CONFIG_FAULT_INJECTION
> +static enum hrtimer_restart dw_mci_fault_timer(struct hrtimer *t)
> +{
> + struct dw_mci *host = container_of(t, struct dw_mci, fault_timer);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->irq_lock, flags);
> +
> + if (!host->data_status)
> + host->data_status = SDMMC_INT_DCRC;
> + set_bit(EVENT_DATA_ERROR, &host->pending_events);
> + tasklet_schedule(&host->tasklet);
> +
> + spin_unlock_irqrestore(&host->irq_lock, flags);
> +
> + return HRTIMER_NORESTART;
> +}
> +
> +static void dw_mci_start_fault_timer(struct dw_mci *host)
> +{
> + struct mmc_data *data = host->data;
> +
> + if (!data || data->blocks <= 1)
> + return;
> +
> + if (!should_fail(&host->fail_data_crc, 1))
> + return;
> +
> + /*
> + * Try to inject the error at random points during the data transfer.
> + */
> + hrtimer_start(&host->fault_timer,
> + ms_to_ktime(prandom_u32() % 25),
> + HRTIMER_MODE_REL);
> +}
> +
> +static void dw_mci_stop_fault_timer(struct dw_mci *host)
> +{
> + hrtimer_cancel(&host->fault_timer);
> +}
> +
> +static void dw_mci_init_fault(struct dw_mci *host)
> +{
> + host->fail_data_crc = (struct fault_attr) FAULT_ATTR_INITIALIZER;
> +
> + hrtimer_init(&host->fault_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> + host->fault_timer.function = dw_mci_fault_timer;
> +}
> +#else
> +static void dw_mci_init_fault(struct dw_mci *host)
> +{
> +}
> +
> +static void dw_mci_start_fault_timer(struct dw_mci *host)
> +{
> +}
> +
> +static void dw_mci_stop_fault_timer(struct dw_mci *host)
> +{
> +}
> +#endif
> +
> static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
> __releases(&host->lock)
> __acquires(&host->lock)
> @@ -2102,6 +2169,7 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
> break;
> }
>
> + dw_mci_stop_fault_timer(host);
> host->data = NULL;
> set_bit(EVENT_DATA_COMPLETE, &host->completed_events);
> err = dw_mci_data_complete(host, data);
> @@ -2151,6 +2219,7 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
> if (mrq->cmd->error && mrq->data)
> dw_mci_reset(host);
>
> + dw_mci_stop_fault_timer(host);
> host->cmd = NULL;
> host->data = NULL;
>
> @@ -2600,6 +2669,8 @@ static void dw_mci_cmd_interrupt(struct dw_mci *host, u32 status)
>
> set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
> tasklet_schedule(&host->tasklet);
> +
> + dw_mci_start_fault_timer(host);
> }
>
> static void dw_mci_handle_cd(struct dw_mci *host)
> @@ -3223,6 +3294,8 @@ int dw_mci_probe(struct dw_mci *host)
> spin_lock_init(&host->irq_lock);
> INIT_LIST_HEAD(&host->queue);
>
> + dw_mci_init_fault(host);
> +
> /*
> * Get the host data width - this assumes that HCON has been set with
> * the correct values.
> diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
> index da5923a92e60..ce05d81477d9 100644
> --- a/drivers/mmc/host/dw_mmc.h
> +++ b/drivers/mmc/host/dw_mmc.h
> @@ -14,6 +14,8 @@
> #include <linux/mmc/core.h>
> #include <linux/dmaengine.h>
> #include <linux/reset.h>
> +#include <linux/fault-inject.h>
> +#include <linux/hrtimer.h>
> #include <linux/interrupt.h>
>
> enum dw_mci_state {
> @@ -230,6 +232,11 @@ struct dw_mci {
> struct timer_list cmd11_timer;
> struct timer_list cto_timer;
> struct timer_list dto_timer;
> +
> +#ifdef CONFIG_FAULT_INJECTION
> + struct fault_attr fail_data_crc;
> + struct hrtimer fault_timer;
> +#endif
> };
>
> /* DMA ops for Internal/External DMAC interface */
>

2021-08-04 13:04:37

by Ulf Hansson

[permalink] [raw]
Subject: Re: [PATCH v2] mmc: dw_mmc: Add data CRC error injection

On Thu, 1 Jul 2021 at 10:05, Vincent Whitchurch
<[email protected]> wrote:
>
> This driver has had problems when handling data errors. Add fault
> injection support so that the abort handling can be easily triggered and
> regression-tested. A hrtimer is used to indicate a data CRC error at
> various points during the data transfer.
>
> Note that for the recent problem with hangs in the case of some data CRC
> errors, a udelay(10) inserted at the start of send_stop_abort() greatly
> helped in triggering the error, but I've not included this as part of
> the fault injection support since it seemed too specific.
>
> Signed-off-by: Vincent Whitchurch <[email protected]>

Applied for next, thanks!

Kind regards
Uffe


> ---
>
> Notes:
> v2: Add missing includes.
>
> drivers/mmc/host/dw_mmc.c | 73 +++++++++++++++++++++++++++++++++++++++
> drivers/mmc/host/dw_mmc.h | 7 ++++
> 2 files changed, 80 insertions(+)
>
> diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
> index d333130d1531..dbbb94e6ff4b 100644
> --- a/drivers/mmc/host/dw_mmc.c
> +++ b/drivers/mmc/host/dw_mmc.c
> @@ -17,9 +17,11 @@
> #include <linux/interrupt.h>
> #include <linux/iopoll.h>
> #include <linux/ioport.h>
> +#include <linux/ktime.h>
> #include <linux/module.h>
> #include <linux/platform_device.h>
> #include <linux/pm_runtime.h>
> +#include <linux/prandom.h>
> #include <linux/seq_file.h>
> #include <linux/slab.h>
> #include <linux/stat.h>
> @@ -181,6 +183,9 @@ static void dw_mci_init_debugfs(struct dw_mci_slot *slot)
> &host->pending_events);
> debugfs_create_xul("completed_events", S_IRUSR, root,
> &host->completed_events);
> +#ifdef CONFIG_FAULT_INJECTION
> + fault_create_debugfs_attr("fail_data_crc", root, &host->fail_data_crc);
> +#endif
> }
> #endif /* defined(CONFIG_DEBUG_FS) */
>
> @@ -1788,6 +1793,68 @@ static const struct mmc_host_ops dw_mci_ops = {
> .prepare_hs400_tuning = dw_mci_prepare_hs400_tuning,
> };
>
> +#ifdef CONFIG_FAULT_INJECTION
> +static enum hrtimer_restart dw_mci_fault_timer(struct hrtimer *t)
> +{
> + struct dw_mci *host = container_of(t, struct dw_mci, fault_timer);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->irq_lock, flags);
> +
> + if (!host->data_status)
> + host->data_status = SDMMC_INT_DCRC;
> + set_bit(EVENT_DATA_ERROR, &host->pending_events);
> + tasklet_schedule(&host->tasklet);
> +
> + spin_unlock_irqrestore(&host->irq_lock, flags);
> +
> + return HRTIMER_NORESTART;
> +}
> +
> +static void dw_mci_start_fault_timer(struct dw_mci *host)
> +{
> + struct mmc_data *data = host->data;
> +
> + if (!data || data->blocks <= 1)
> + return;
> +
> + if (!should_fail(&host->fail_data_crc, 1))
> + return;
> +
> + /*
> + * Try to inject the error at random points during the data transfer.
> + */
> + hrtimer_start(&host->fault_timer,
> + ms_to_ktime(prandom_u32() % 25),
> + HRTIMER_MODE_REL);
> +}
> +
> +static void dw_mci_stop_fault_timer(struct dw_mci *host)
> +{
> + hrtimer_cancel(&host->fault_timer);
> +}
> +
> +static void dw_mci_init_fault(struct dw_mci *host)
> +{
> + host->fail_data_crc = (struct fault_attr) FAULT_ATTR_INITIALIZER;
> +
> + hrtimer_init(&host->fault_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> + host->fault_timer.function = dw_mci_fault_timer;
> +}
> +#else
> +static void dw_mci_init_fault(struct dw_mci *host)
> +{
> +}
> +
> +static void dw_mci_start_fault_timer(struct dw_mci *host)
> +{
> +}
> +
> +static void dw_mci_stop_fault_timer(struct dw_mci *host)
> +{
> +}
> +#endif
> +
> static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
> __releases(&host->lock)
> __acquires(&host->lock)
> @@ -2102,6 +2169,7 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
> break;
> }
>
> + dw_mci_stop_fault_timer(host);
> host->data = NULL;
> set_bit(EVENT_DATA_COMPLETE, &host->completed_events);
> err = dw_mci_data_complete(host, data);
> @@ -2151,6 +2219,7 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t)
> if (mrq->cmd->error && mrq->data)
> dw_mci_reset(host);
>
> + dw_mci_stop_fault_timer(host);
> host->cmd = NULL;
> host->data = NULL;
>
> @@ -2600,6 +2669,8 @@ static void dw_mci_cmd_interrupt(struct dw_mci *host, u32 status)
>
> set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
> tasklet_schedule(&host->tasklet);
> +
> + dw_mci_start_fault_timer(host);
> }
>
> static void dw_mci_handle_cd(struct dw_mci *host)
> @@ -3223,6 +3294,8 @@ int dw_mci_probe(struct dw_mci *host)
> spin_lock_init(&host->irq_lock);
> INIT_LIST_HEAD(&host->queue);
>
> + dw_mci_init_fault(host);
> +
> /*
> * Get the host data width - this assumes that HCON has been set with
> * the correct values.
> diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
> index da5923a92e60..ce05d81477d9 100644
> --- a/drivers/mmc/host/dw_mmc.h
> +++ b/drivers/mmc/host/dw_mmc.h
> @@ -14,6 +14,8 @@
> #include <linux/mmc/core.h>
> #include <linux/dmaengine.h>
> #include <linux/reset.h>
> +#include <linux/fault-inject.h>
> +#include <linux/hrtimer.h>
> #include <linux/interrupt.h>
>
> enum dw_mci_state {
> @@ -230,6 +232,11 @@ struct dw_mci {
> struct timer_list cmd11_timer;
> struct timer_list cto_timer;
> struct timer_list dto_timer;
> +
> +#ifdef CONFIG_FAULT_INJECTION
> + struct fault_attr fail_data_crc;
> + struct hrtimer fault_timer;
> +#endif
> };
>
> /* DMA ops for Internal/External DMAC interface */
> --
> 2.28.0
>