2013-05-10 06:46:05

by KOBAYASHI Yoshitake

[permalink] [raw]
Subject: [PATCH] mmc: block: Fix the error handling of write command response

Current MMC driver does not recognize the general error (bit19 of device
status) in write command sequence.
So the host might ignore the general error.
This patch adds the code which checks the response of CMD12/CMD13 in write
command sequence and retry the write command sequence if the error is
recognized.

Signed-off-by: Yoshitake Kobayashi <[email protected]>
---
drivers/mmc/card/block.c | 26 +++++++++++++++++++++++---
1 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index e12a03c..44f55d1 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -770,7 +770,7 @@ static int mmc_blk_cmd_error(struct request *req, const char *name, int error,
* Otherwise we don't understand what happened, so abort.
*/
static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
- struct mmc_blk_request *brq, int *ecc_err)
+ struct mmc_blk_request *brq, int *ecc_err, int *gen_err)
{
bool prev_cmd_status_valid = true;
u32 status, stop_status = 0;
@@ -808,6 +808,12 @@ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
(brq->cmd.resp[0] & R1_CARD_ECC_FAILED))
*ecc_err = 1;

+ /* Flag General error */
+ if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ)
+ if ((status & R1_ERROR) ||
+ (brq->stop.resp[0] & R1_ERROR))
+ *gen_err = 1;
+
/*
* Check the current card state. If it is in some data transfer
* mode, tell it to stop (and hopefully transition back to TRAN.)
@@ -827,6 +833,9 @@ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
return ERR_ABORT;
if (stop_status & R1_CARD_ECC_FAILED)
*ecc_err = 1;
+ if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ)
+ if (stop_status & R1_ERROR)
+ *gen_err = 1;
}

/* Check for set block count errors */
@@ -1070,7 +1079,7 @@ static int mmc_blk_err_check(struct mmc_card *card,
mmc_active);
struct mmc_blk_request *brq = &mq_mrq->brq;
struct request *req = mq_mrq->req;
- int ecc_err = 0;
+ int ecc_err = 0, gen_err = 0;

/*
* sbc.error indicates a problem with the set block count
@@ -1084,7 +1093,7 @@ static int mmc_blk_err_check(struct mmc_card *card,
*/
if (brq->sbc.error || brq->cmd.error || brq->stop.error ||
brq->data.error) {
- switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err)) {
+ switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err, &gen_err)) {
case ERR_RETRY:
return MMC_BLK_RETRY;
case ERR_ABORT:
@@ -1116,6 +1125,10 @@ static int mmc_blk_err_check(struct mmc_card *card,
u32 status;
unsigned long timeout;

+ /* Check stop command response */
+ if (brq->stop.resp[0] & R1_ERROR)
+ gen_err = 1;
+
timeout = jiffies + msecs_to_jiffies(MMC_BLK_TIMEOUT_MS);
do {
int err = get_card_status(card, &status, 5);
@@ -1125,6 +1138,9 @@ static int mmc_blk_err_check(struct mmc_card *card,
return MMC_BLK_CMD_ERR;
}

+ if (status & R1_ERROR)
+ gen_err = 1;
+
/* Timeout if the device never becomes ready for data
* and never leaves the program state.
*/
@@ -1144,6 +1160,10 @@ static int mmc_blk_err_check(struct mmc_card *card,
(R1_CURRENT_STATE(status) == R1_STATE_PRG));
}

+ /* if error occur, retry operation executes */
+ if (gen_err)
+ return MMC_BLK_RETRY;
+
if (brq->data.error) {
pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n",
req->rq_disk->disk_name, brq->data.error,
--
1.7.0.4


2013-07-01 08:26:06

by KOBAYASHI Yoshitake

[permalink] [raw]
Subject: [PATCH 3.4-stable] mmc: block: fix a bug of error handling in MMC driver

Backport for 3.4-stable for the following patch:
https://lkml.org/lkml/2013/5/10/20

This patch fixes a bug of error handling in Linux MMC driver.

Current MMC driver doesn't handle generic error (bit19 of device
status) in write sequence. As a result, write data gets lost when
generic error occurs. For example, a generic error when updating a
filesystem management information causes a loss of write data and
corrupts the filesystem. In the worst case, the system will never
boot.

This patch includes the following functionality:
1. To enable error checking for the response of CMD12 and CMD13
in write command sequence
2. To retry write sequence when a generic error occures

This patch was also posted to 3.4.46-LTSI.
https://patchwork.kernel.org/patch/2726801/

Signed-off-by: KOBAYASHI Yoshitake <[email protected]>
---
drivers/mmc/card/block.c | 46 +++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 833ff16..e77053f 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -701,7 +701,7 @@ static int mmc_blk_cmd_error(struct request *req, const char *name, int error,
* Otherwise we don't understand what happened, so abort.
*/
static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
- struct mmc_blk_request *brq, int *ecc_err)
+ struct mmc_blk_request *brq, int *ecc_err, int *gen_err)
{
bool prev_cmd_status_valid = true;
u32 status, stop_status = 0;
@@ -739,6 +739,16 @@ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
(brq->cmd.resp[0] & R1_CARD_ECC_FAILED))
*ecc_err = 1;

+ /* Flag General errors */
+ if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ)
+ if ((status & R1_ERROR) ||
+ (brq->stop.resp[0] & R1_ERROR)) {
+ pr_err("%s: %s: general error sending stop or status command, stop cmd response %#x, card status %#x\n",
+ req->rq_disk->disk_name, __func__,
+ brq->stop.resp[0], status);
+ *gen_err = 1;
+ }
+
/*
* Check the current card state. If it is in some data transfer
* mode, tell it to stop (and hopefully transition back to TRAN.)
@@ -758,6 +768,13 @@ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
return ERR_ABORT;
if (stop_status & R1_CARD_ECC_FAILED)
*ecc_err = 1;
+ if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ)
+ if (stop_status & R1_ERROR) {
+ pr_err("%s: %s: general error sending stop command, stop cmd response %#x\n",
+ req->rq_disk->disk_name, __func__,
+ stop_status);
+ *gen_err = 1;
+ }
}

/* Check for set block count errors */
@@ -1007,7 +1024,7 @@ static int mmc_blk_err_check(struct mmc_card *card,
mmc_active);
struct mmc_blk_request *brq = &mq_mrq->brq;
struct request *req = mq_mrq->req;
- int ecc_err = 0;
+ int ecc_err = 0, gen_err = 0;

/*
* sbc.error indicates a problem with the set block count
@@ -1021,7 +1038,7 @@ static int mmc_blk_err_check(struct mmc_card *card,
*/
if (brq->sbc.error || brq->cmd.error || brq->stop.error ||
brq->data.error) {
- switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err)) {
+ switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err, &gen_err)) {
case ERR_RETRY:
return MMC_BLK_RETRY;
case ERR_ABORT:
@@ -1051,6 +1068,15 @@ static int mmc_blk_err_check(struct mmc_card *card,
*/
if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
u32 status;
+
+ /* Check stop command response */
+ if (brq->stop.resp[0] & R1_ERROR) {
+ pr_err("%s: %s: general error sending stop command, stop cmd response %#x\n",
+ req->rq_disk->disk_name, __func__,
+ brq->stop.resp[0]);
+ gen_err = 1;
+ }
+
do {
int err = get_card_status(card, &status, 5);
if (err) {
@@ -1058,6 +1084,13 @@ static int mmc_blk_err_check(struct mmc_card *card,
req->rq_disk->disk_name, err);
return MMC_BLK_CMD_ERR;
}
+
+ if (status & R1_ERROR) {
+ pr_err("%s: %s: general error sending status command, card status %#x\n",
+ req->rq_disk->disk_name, __func__,
+ status);
+ gen_err = 1;
+ }
/*
* Some cards mishandle the status bits,
* so make sure to check both the busy
@@ -1067,6 +1100,13 @@ static int mmc_blk_err_check(struct mmc_card *card,
(R1_CURRENT_STATE(status) == R1_STATE_PRG));
}

+ /* if general error occurs, retry the write operation. */
+ if (gen_err) {
+ pr_warning("%s: retrying write for general error\n",
+ req->rq_disk->disk_name);
+ return MMC_BLK_RETRY;
+ }
+
if (brq->data.error) {
pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n",
req->rq_disk->disk_name, brq->data.error,
--
1.7.0.4

2013-07-01 18:42:15

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH 3.4-stable] mmc: block: fix a bug of error handling in MMC driver

On Mon, Jul 01, 2013 at 04:56:25PM +0900, KOBAYASHI Yoshitake wrote:
> Backport for 3.4-stable for the following patch:
> https://lkml.org/lkml/2013/5/10/20

What is the git commit id of this patch in Linus's tree?

thanks,

greg k-h

2013-07-03 14:42:16

by KOBAYASHI Yoshitake

[permalink] [raw]
Subject: Re: [PATCH 3.4-stable] mmc: block: fix a bug of error handling in MMC driver

2013/07/02 3:42, Greg KH wrote:
> On Mon, Jul 01, 2013 at 04:56:25PM +0900, KOBAYASHI Yoshitake wrote:
>> Backport for 3.4-stable for the following patch:
>> https://lkml.org/lkml/2013/5/10/20
>
> What is the git commit id of this patch in Linus's tree?

Sorry, this is my mistake. Please ignore this patch for now.
Thanks for your comment.

--
Yoshi

>
> thanks,
>
> greg k-h
>
>


2013-07-06 22:43:04

by KOBAYASHI Yoshitake

[permalink] [raw]
Subject: [PATCH v2] mmc: block: fix a bug of error handling in MMC driver

From: KOBAYASHI Yoshitake <[email protected]>

This patch fixes a bug of error handling in Linux MMC driver.

Current MMC driver doesn't handle generic error (bit19 of device
status) in write sequence. As a result, write data gets lost when
generic error occurs. For example, a generic error when updating a
filesystem management information causes a loss of write data and
corrupts the filesystem. In the worst case, the system will never
boot.

This patch includes the following functionality:
1. To enable error checking for the response of CMD12 and CMD13
in write command sequence
2. To retry write sequence when a generic error occurs

Messages are added for v2 to show what occurs.

Signed-off-by: KOBAYASHI Yoshitake <[email protected]>
---
drivers/mmc/card/block.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index dd27b07..76a3d3a 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -769,7 +769,7 @@ static int mmc_blk_cmd_error(struct request *req, const char *name, int error,
* Otherwise we don't understand what happened, so abort.
*/
static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
- struct mmc_blk_request *brq, int *ecc_err)
+ struct mmc_blk_request *brq, int *ecc_err, int *gen_err)
{
bool prev_cmd_status_valid = true;
u32 status, stop_status = 0;
@@ -807,6 +807,16 @@ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
(brq->cmd.resp[0] & R1_CARD_ECC_FAILED))
*ecc_err = 1;

+ /* Flag General errors */
+ if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ)
+ if ((status & R1_ERROR) ||
+ (brq->stop.resp[0] & R1_ERROR)) {
+ pr_err("%s: %s: general error sending stop or status command, stop cmd response %#x, card status %#x\n",
+ req->rq_disk->disk_name, __func__,
+ brq->stop.resp[0], status);
+ *gen_err = 1;
+ }
+
/*
* Check the current card state. If it is in some data transfer
* mode, tell it to stop (and hopefully transition back to TRAN.)
@@ -826,6 +836,13 @@ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
return ERR_ABORT;
if (stop_status & R1_CARD_ECC_FAILED)
*ecc_err = 1;
+ if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ)
+ if (stop_status & R1_ERROR) {
+ pr_err("%s: %s: general error sending stop command, stop cmd response %#x\n",
+ req->rq_disk->disk_name, __func__,
+ stop_status);
+ *gen_err = 1;
+ }
}

/* Check for set block count errors */
@@ -1069,7 +1086,7 @@ static int mmc_blk_err_check(struct mmc_card *card,
mmc_active);
struct mmc_blk_request *brq = &mq_mrq->brq;
struct request *req = mq_mrq->req;
- int ecc_err = 0;
+ int ecc_err = 0, gen_err = 0;

/*
* sbc.error indicates a problem with the set block count
@@ -1083,7 +1100,7 @@ static int mmc_blk_err_check(struct mmc_card *card,
*/
if (brq->sbc.error || brq->cmd.error || brq->stop.error ||
brq->data.error) {
- switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err)) {
+ switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err, &gen_err)) {
case ERR_RETRY:
return MMC_BLK_RETRY;
case ERR_ABORT:
@@ -1115,6 +1132,14 @@ static int mmc_blk_err_check(struct mmc_card *card,
u32 status;
unsigned long timeout;

+ /* Check stop command response */
+ if (brq->stop.resp[0] & R1_ERROR) {
+ pr_err("%s: %s: general error sending stop command, stop cmd response %#x\n",
+ req->rq_disk->disk_name, __func__,
+ brq->stop.resp[0]);
+ gen_err = 1;
+ }
+
timeout = jiffies + msecs_to_jiffies(MMC_BLK_TIMEOUT_MS);
do {
int err = get_card_status(card, &status, 5);
@@ -1124,6 +1149,13 @@ static int mmc_blk_err_check(struct mmc_card *card,
return MMC_BLK_CMD_ERR;
}

+ if (status & R1_ERROR) {
+ pr_err("%s: %s: general error sending status command, card status %#x\n",
+ req->rq_disk->disk_name, __func__,
+ status);
+ gen_err = 1;
+ }
+
/* Timeout if the device never becomes ready for data
* and never leaves the program state.
*/
@@ -1143,6 +1175,13 @@ static int mmc_blk_err_check(struct mmc_card *card,
(R1_CURRENT_STATE(status) == R1_STATE_PRG));
}

+ /* if general error occurs, retry the write operation. */
+ if (gen_err) {
+ pr_warn("%s: retrying write for general error\n",
+ req->rq_disk->disk_name);
+ return MMC_BLK_RETRY;
+ }
+
if (brq->data.error) {
pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n",
req->rq_disk->disk_name, brq->data.error,
--
1.7.0.4