LinuxLists.cc - [PATCH v1 5/5] ufs: core: Add error handling for MCQ mode

2023-03-29 10:15:11

Subject: [PATCH v1 5/5] ufs: core: Add error handling for MCQ mode

Add support for error handling for MCQ mode.

Signed-off-by: Bao D. Nguyen <[email protected]>
---
drivers/ufs/core/ufshcd.c | 80 ++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 69 insertions(+), 11 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index fef1907..e947f7f 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -3127,6 +3127,12 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
err = -ETIMEDOUT;
dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
__func__, lrbp->task_tag);
+
+ /* MCQ mode */
+ if (is_mcq_enabled(hba))
+ return ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag);
+
+ /* SDB mode */
if (ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag) == 0) {
/* successfully cleared the command, retry if needed */
err = -EAGAIN;
@@ -5562,6 +5568,10 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
*/
static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
{
+ struct ufshcd_lrb *lrbp;
+ u32 hwq_num, utag;
+ int tag;
+
/* Resetting interrupt aggregation counters first and reading the
* DOOR_BELL afterward allows us to handle all the completed requests.
* In order to prevent other interrupts starvation the DB is read once
@@ -5580,7 +5590,22 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
* Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we
* do not want polling to trigger spurious interrupt complaints.
*/
- ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
+ if (!is_mcq_enabled(hba)) {
+ ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
+ goto out;
+ }
+
+ /* MCQ mode */
+ for (tag = 0; tag < hba->nutrs; tag++) {
+ lrbp = &hba->lrb[tag];
+ if (lrbp->cmd) {
+ utag = blk_mq_unique_tag(scsi_cmd_to_rq(lrbp->cmd));
+ hwq_num = blk_mq_unique_tag_to_hwq(utag);
+ ufshcd_poll(hba->host, hwq_num);
+ }
+ }
+
+out:

return IRQ_HANDLED;
}
@@ -6359,18 +6384,36 @@ static bool ufshcd_abort_all(struct ufs_hba *hba)
bool needs_reset = false;
int tag, ret;

- /* Clear pending transfer requests */
- for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
- ret = ufshcd_try_to_abort_task(hba, tag);
- dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
- hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
- ret ? "failed" : "succeeded");
- if (ret) {
- needs_reset = true;
- goto out;
+ if (is_mcq_enabled(hba)) {
+ struct ufshcd_lrb *lrbp;
+ int tag;
+
+ for (tag = 0; tag < hba->nutrs; tag++) {
+ lrbp = &hba->lrb[tag];
+ if (lrbp->cmd) {
+ ret = ufshcd_try_to_abort_task(hba, tag);
+ dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
+ hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
+ ret ? "failed" : "succeeded");
+ }
+ if (ret) {
+ needs_reset = true;
+ goto out;
+ }
+ }
+ } else {
+ /* Clear pending transfer requests */
+ for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
+ ret = ufshcd_try_to_abort_task(hba, tag);
+ dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
+ hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
+ ret ? "failed" : "succeeded");
+ if (ret) {
+ needs_reset = true;
+ goto out;
+ }
}
}
-
/* Clear pending task management requests */
for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
if (ufshcd_clear_tm_cmd(hba, tag)) {
@@ -7302,6 +7345,8 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
unsigned long flags, pending_reqs = 0, not_cleared = 0;
struct Scsi_Host *host;
struct ufs_hba *hba;
+ struct ufs_hw_queue *hwq;
+ struct ufshcd_lrb *lrbp;
u32 pos;
int err;
u8 resp = 0xF, lun;
@@ -7317,6 +7362,19 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
goto out;
}

+ if (is_mcq_enabled(hba)) {
+ for (pos = 0; pos < hba->nutrs; pos++) {
+ lrbp = &hba->lrb[pos];
+ if (lrbp->cmd && lrbp->lun == lun) {
+ ufshcd_clear_cmds(hba, 1UL << pos);
+ hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
+ ufshcd_mcq_poll_cqe_lock(hba, hwq);
+ }
+ }
+ err = 0;
+ goto out;
+ }
+
/* clear the commands that were pending for corresponding LUN */
spin_lock_irqsave(&hba->outstanding_lock, flags);
for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)
--
2.7.4

2023-04-27 07:21:19

by Stanley Jhu

[permalink] [raw]

Subject: Re: [PATCH v1 5/5] ufs: core: Add error handling for MCQ mode

Hi Bao,

Bao D. Nguyen <[email protected]> 於 2023年3月29日週三下午6:14寫道：
>
> Add support for error handling for MCQ mode.
>
> Signed-off-by: Bao D. Nguyen <[email protected]>
> ---
> drivers/ufs/core/ufshcd.c | 80 ++++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 69 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index fef1907..e947f7f 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -3127,6 +3127,12 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
> err = -ETIMEDOUT;
> dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
> __func__, lrbp->task_tag);
> +
> + /* MCQ mode */
> + if (is_mcq_enabled(hba))
> + return ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag);

When a time-out occurs during the command-clearing process, it appears
that the MCQ path does not properly clear 'hba->dev_cmd.complete'.
This could result in a null pointer reference if the device command
interrupt arrives at a later time.

Could you please help check this?

Thanks,
Stanley Chu

> +
> + /* SDB mode */
> if (ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag) == 0) {
> /* successfully cleared the command, retry if needed */
> err = -EAGAIN;
> @@ -5562,6 +5568,10 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
> */
> static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
> {
> + struct ufshcd_lrb *lrbp;
> + u32 hwq_num, utag;
> + int tag;
> +
> /* Resetting interrupt aggregation counters first and reading the
> * DOOR_BELL afterward allows us to handle all the completed requests.
> * In order to prevent other interrupts starvation the DB is read once
> @@ -5580,7 +5590,22 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
> * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we
> * do not want polling to trigger spurious interrupt complaints.
> */
> - ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
> + if (!is_mcq_enabled(hba)) {
> + ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
> + goto out;
> + }
> +
> + /* MCQ mode */
> + for (tag = 0; tag < hba->nutrs; tag++) {
> + lrbp = &hba->lrb[tag];
> + if (lrbp->cmd) {
> + utag = blk_mq_unique_tag(scsi_cmd_to_rq(lrbp->cmd));
> + hwq_num = blk_mq_unique_tag_to_hwq(utag);
> + ufshcd_poll(hba->host, hwq_num);
> + }
> + }
> +
> +out:
>
> return IRQ_HANDLED;
> }
> @@ -6359,18 +6384,36 @@ static bool ufshcd_abort_all(struct ufs_hba *hba)
> bool needs_reset = false;
> int tag, ret;
>
> - /* Clear pending transfer requests */
> - for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
> - ret = ufshcd_try_to_abort_task(hba, tag);
> - dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
> - hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
> - ret ? "failed" : "succeeded");
> - if (ret) {
> - needs_reset = true;
> - goto out;
> + if (is_mcq_enabled(hba)) {
> + struct ufshcd_lrb *lrbp;
> + int tag;
> +
> + for (tag = 0; tag < hba->nutrs; tag++) {
> + lrbp = &hba->lrb[tag];
> + if (lrbp->cmd) {
> + ret = ufshcd_try_to_abort_task(hba, tag);
> + dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
> + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
> + ret ? "failed" : "succeeded");
> + }
> + if (ret) {
> + needs_reset = true;
> + goto out;
> + }
> + }
> + } else {
> + /* Clear pending transfer requests */
> + for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
> + ret = ufshcd_try_to_abort_task(hba, tag);
> + dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
> + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
> + ret ? "failed" : "succeeded");
> + if (ret) {
> + needs_reset = true;
> + goto out;
> + }
> }
> }
> -
> /* Clear pending task management requests */
> for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
> if (ufshcd_clear_tm_cmd(hba, tag)) {
> @@ -7302,6 +7345,8 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
> unsigned long flags, pending_reqs = 0, not_cleared = 0;
> struct Scsi_Host *host;
> struct ufs_hba *hba;
> + struct ufs_hw_queue *hwq;
> + struct ufshcd_lrb *lrbp;
> u32 pos;
> int err;
> u8 resp = 0xF, lun;
> @@ -7317,6 +7362,19 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
> goto out;
> }
>
> + if (is_mcq_enabled(hba)) {
> + for (pos = 0; pos < hba->nutrs; pos++) {
> + lrbp = &hba->lrb[pos];
> + if (lrbp->cmd && lrbp->lun == lun) {
> + ufshcd_clear_cmds(hba, 1UL << pos);
> + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
> + ufshcd_mcq_poll_cqe_lock(hba, hwq);
> + }
> + }
> + err = 0;
> + goto out;
> + }
> +
> /* clear the commands that were pending for corresponding LUN */
> spin_lock_irqsave(&hba->outstanding_lock, flags);
> for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)
> --
> 2.7.4
>

2023-05-04 04:23:41

by Bao D. Nguyen

[permalink] [raw]

Subject: Re: [PATCH v1 5/5] ufs: core: Add error handling for MCQ mode

On 4/27/2023 12:17 AM, Stanley Chu wrote:
> Hi Bao,
>
> Bao D. Nguyen <[email protected]> 於 2023年3月29日週三下午6:14寫道：
>>
>> Add support for error handling for MCQ mode.
>>
>> Signed-off-by: Bao D. Nguyen <[email protected]>
>> ---
>> drivers/ufs/core/ufshcd.c | 80 ++++++++++++++++++++++++++++++++++++++++-------
>> 1 file changed, 69 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
>> index fef1907..e947f7f 100644
>> --- a/drivers/ufs/core/ufshcd.c
>> +++ b/drivers/ufs/core/ufshcd.c
>> @@ -3127,6 +3127,12 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
>> err = -ETIMEDOUT;
>> dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
>> __func__, lrbp->task_tag);
>> +
>> + /* MCQ mode */
>> + if (is_mcq_enabled(hba))
>> + return ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag);
>
> When a time-out occurs during the command-clearing process, it appears
> that the MCQ path does not properly clear 'hba->dev_cmd.complete'.
> This could result in a null pointer reference if the device command
> interrupt arrives at a later time.
>
> Could you please help check this?
Thanks Stanley. I will take a look.

>
> Thanks,
> Stanley Chu
>
>> +
>> + /* SDB mode */
>> if (ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag) == 0) {
>> /* successfully cleared the command, retry if needed */
>> err = -EAGAIN;
>> @@ -5562,6 +5568,10 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
>> */
>> static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
>> {
>> + struct ufshcd_lrb *lrbp;
>> + u32 hwq_num, utag;
>> + int tag;
>> +
>> /* Resetting interrupt aggregation counters first and reading the
>> * DOOR_BELL afterward allows us to handle all the completed requests.
>> * In order to prevent other interrupts starvation the DB is read once
>> @@ -5580,7 +5590,22 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
>> * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we
>> * do not want polling to trigger spurious interrupt complaints.
>> */
>> - ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
>> + if (!is_mcq_enabled(hba)) {
>> + ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
>> + goto out;
>> + }
>> +
>> + /* MCQ mode */
>> + for (tag = 0; tag < hba->nutrs; tag++) {
>> + lrbp = &hba->lrb[tag];
>> + if (lrbp->cmd) {
>> + utag = blk_mq_unique_tag(scsi_cmd_to_rq(lrbp->cmd));
>> + hwq_num = blk_mq_unique_tag_to_hwq(utag);
>> + ufshcd_poll(hba->host, hwq_num);
>> + }
>> + }
>> +
>> +out:
>>
>> return IRQ_HANDLED;
>> }
>> @@ -6359,18 +6384,36 @@ static bool ufshcd_abort_all(struct ufs_hba *hba)
>> bool needs_reset = false;
>> int tag, ret;
>>
>> - /* Clear pending transfer requests */
>> - for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
>> - ret = ufshcd_try_to_abort_task(hba, tag);
>> - dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
>> - hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
>> - ret ? "failed" : "succeeded");
>> - if (ret) {
>> - needs_reset = true;
>> - goto out;
>> + if (is_mcq_enabled(hba)) {
>> + struct ufshcd_lrb *lrbp;
>> + int tag;
>> +
>> + for (tag = 0; tag < hba->nutrs; tag++) {
>> + lrbp = &hba->lrb[tag];
>> + if (lrbp->cmd) {
>> + ret = ufshcd_try_to_abort_task(hba, tag);
>> + dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
>> + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
>> + ret ? "failed" : "succeeded");
>> + }
>> + if (ret) {
>> + needs_reset = true;
>> + goto out;
>> + }
>> + }
>> + } else {
>> + /* Clear pending transfer requests */
>> + for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
>> + ret = ufshcd_try_to_abort_task(hba, tag);
>> + dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
>> + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
>> + ret ? "failed" : "succeeded");
>> + if (ret) {
>> + needs_reset = true;
>> + goto out;
>> + }
>> }
>> }
>> -
>> /* Clear pending task management requests */
>> for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
>> if (ufshcd_clear_tm_cmd(hba, tag)) {
>> @@ -7302,6 +7345,8 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
>> unsigned long flags, pending_reqs = 0, not_cleared = 0;
>> struct Scsi_Host *host;
>> struct ufs_hba *hba;
>> + struct ufs_hw_queue *hwq;
>> + struct ufshcd_lrb *lrbp;
>> u32 pos;
>> int err;
>> u8 resp = 0xF, lun;
>> @@ -7317,6 +7362,19 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
>> goto out;
>> }
>>
>> + if (is_mcq_enabled(hba)) {
>> + for (pos = 0; pos < hba->nutrs; pos++) {
>> + lrbp = &hba->lrb[pos];
>> + if (lrbp->cmd && lrbp->lun == lun) {
>> + ufshcd_clear_cmds(hba, 1UL << pos);
>> + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd));
>> + ufshcd_mcq_poll_cqe_lock(hba, hwq);
>> + }
>> + }
>> + err = 0;
>> + goto out;
>> + }
>> +
>> /* clear the commands that were pending for corresponding LUN */
>> spin_lock_irqsave(&hba->outstanding_lock, flags);
>> for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)
>> --
>> 2.7.4
>>