2023-03-08 04:04:50

by Bao D. Nguyen

[permalink] [raw]
Subject: [RFC PATCH v1 4/4] ufs: mcq: Added ufshcd_mcq_abort()

Add ufshcd_mcq_abort() to support ufs abort in mcq mode.

Signed-off-by: Bao D. Nguyen <[email protected]>
---
drivers/ufs/core/ufs-mcq.c | 76 ++++++++++++++++++++++++++++++++++++++++++
drivers/ufs/core/ufshcd-priv.h | 5 ++-
drivers/ufs/core/ufshcd.c | 11 ++++--
3 files changed, 88 insertions(+), 4 deletions(-)

diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index e27d8eb..6c65cd5 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -646,3 +646,79 @@ static bool ufshcd_mcq_cqe_search(struct ufs_hba *hba,
spin_unlock(&hwq->cq_lock);
return ret;
}
+
+/**
+ * ufshcd_mcq_abort - Abort the command in MCQ.
+ * @cmd - The command to be aborted.
+ *
+ * Returns SUCCESS or FAILED error codes
+ */
+int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
+{
+ struct Scsi_Host *host = cmd->device->host;
+ struct ufs_hba *hba = shost_priv(host);
+ int tag = scsi_cmd_to_rq(cmd)->tag;
+ struct ufshcd_lrb *lrbp = &hba->lrb[tag];
+ struct ufs_hw_queue *hwq;
+ int err = FAILED;
+
+ if (!lrbp->cmd) {
+ dev_err(hba->dev,
+ "%s: skip abort. cmd at tag %d already completed.\n",
+ __func__, tag);
+ goto out;
+ }
+
+ /* Skip task abort in case previous aborts failed and report failure */
+ if (lrbp->req_abort_skip) {
+ dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n",
+ __func__, tag);
+ goto out;
+ }
+
+ hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
+
+ if (ufshcd_mcq_sqe_search(hba, hwq, tag)) {
+ /*
+ * Failure. The command should not be "stuck" in SQ for
+ * a long time which resulted in command being aborted.
+ */
+ dev_err(hba->dev, "%s: cmd found in sq. hwq=%d, tag=%d\n",
+ __func__, hwq->id, tag);
+ /* Set the Command Type to 0xF per the spec */
+ ufshcd_mcq_nullify_cmd(hba, hwq);
+ goto out;
+ }
+
+ if (ufshcd_mcq_cqe_search(hba, hwq, tag)) {
+ dev_err(hba->dev, "%s: cmd found in cq. hwq=%d, tag=%d\n",
+ __func__, hwq->id, tag);
+ /*
+ * The command should not be 'stuck' in the CQ for such a long time.
+ * Is interrupt missing? Process the CQEs here. If the interrupt is
+ * invoked at a later time, the CQ will be empty because the CQEs
+ * are already processed here.
+ */
+ ufshcd_mcq_poll_cqe_lock(hba, hwq);
+ err = SUCCESS;
+ goto out;
+ }
+
+ /*
+ * The command is not in the Submission Queue, and it is not
+ * in the Completion Queue either. Query the device to see if
+ * the command is being processed in the device.
+ */
+ if (ufshcd_try_to_abort_task(hba, tag)) {
+ dev_err(hba->dev, "%s: device abort failed %d\n", __func__, err);
+ lrbp->req_abort_skip = true;
+ goto out;
+ }
+
+ err = SUCCESS;
+ if (lrbp->cmd)
+ ufshcd_release_scsi_cmd(hba, lrbp);
+
+out:
+ return err;
+}
diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
index 0527926..d883c03 100644
--- a/drivers/ufs/core/ufshcd-priv.h
+++ b/drivers/ufs/core/ufshcd-priv.h
@@ -77,7 +77,10 @@ unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
struct ufs_hw_queue *hwq);

int ufshcd_mcq_sq_cleanup(struct ufs_hba *hba, int task_tag, int *result);
-
+int ufshcd_mcq_abort(struct scsi_cmnd *cmd);
+int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag);
+void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
+ struct ufshcd_lrb *lrbp);
#define UFSHCD_MCQ_IO_QUEUE_OFFSET 1
#define SD_ASCII_STD true
#define SD_RAW false
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 408c16c..e06399e 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -302,7 +302,6 @@ static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on);
static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on);
static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
struct ufs_vreg *vreg);
-static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag);
static void ufshcd_wb_toggle_buf_flush_during_h8(struct ufs_hba *hba,
bool enable);
static void ufshcd_hba_vreg_set_lpm(struct ufs_hba *hba);
@@ -5414,7 +5413,7 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status)
}

/* Release the resources allocated for processing a SCSI command. */
-static void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
+void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
struct ufshcd_lrb *lrbp)
{
struct scsi_cmnd *cmd = lrbp->cmd;
@@ -7340,7 +7339,7 @@ static void ufshcd_set_req_abort_skip(struct ufs_hba *hba, unsigned long bitmap)
*
* Returns zero on success, non-zero on failure
*/
-static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
+int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
{
struct ufshcd_lrb *lrbp = &hba->lrb[tag];
int err = 0;
@@ -7500,6 +7499,12 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
goto release;
}

+ if (is_mcq_enabled(hba)) {
+ /* MCQ mode. Branch off to handle abort for mcq mode */
+ err = ufshcd_mcq_abort(cmd);
+ goto release;
+ }
+
/* Skip task abort in case previous aborts failed and report failure */
if (lrbp->req_abort_skip) {
dev_err(hba->dev, "%s: skipping abort\n", __func__);
--
2.7.4



2023-03-08 19:03:36

by Bart Van Assche

[permalink] [raw]
Subject: Re: [RFC PATCH v1 4/4] ufs: mcq: Added ufshcd_mcq_abort()

On 3/7/23 20:01, Bao D. Nguyen wrote:
> + if (ufshcd_mcq_cqe_search(hba, hwq, tag)) {
> + dev_err(hba->dev, "%s: cmd found in cq. hwq=%d, tag=%d\n",
> + __func__, hwq->id, tag);
> + /*
> + * The command should not be 'stuck' in the CQ for such a long time.
> + * Is interrupt missing? Process the CQEs here. If the interrupt is
> + * invoked at a later time, the CQ will be empty because the CQEs
> + * are already processed here.
> + */
> + ufshcd_mcq_poll_cqe_lock(hba, hwq);
> + err = SUCCESS;
> + goto out;
> + }

Please remove the above code and also the definition of the
ufshcd_mcq_cqe_search() function. The SCSI error handler submits an
abort to deal with command processing timeouts. ufshcd_mcq_cqe_search()
can only return true in case of a software bug at the host side.
Addressing such bugs is out of scope for the SCSI error handler.

Thanks,

Bart.

2023-03-08 22:37:31

by Bao D. Nguyen

[permalink] [raw]
Subject: Re: [RFC PATCH v1 4/4] ufs: mcq: Added ufshcd_mcq_abort()

On 3/8/2023 11:02 AM, Bart Van Assche wrote:
> On 3/7/23 20:01, Bao D. Nguyen wrote:
>> +    if (ufshcd_mcq_cqe_search(hba, hwq, tag)) {
>> +        dev_err(hba->dev, "%s: cmd found in cq. hwq=%d, tag=%d\n",
>> +                __func__, hwq->id, tag);
>> +        /*
>> +         * The command should not be 'stuck' in the CQ for such a
>> long time.
>> +         * Is interrupt missing? Process the CQEs here. If the
>> interrupt is
>> +         * invoked at a later time, the CQ will be empty because the
>> CQEs
>> +         * are already processed here.
>> +         */
>> +        ufshcd_mcq_poll_cqe_lock(hba, hwq);
>> +        err = SUCCESS;
>> +        goto out;
>> +    }
>
> Please remove the above code and also the definition of the
> ufshcd_mcq_cqe_search() function. The SCSI error handler submits an
> abort to deal with command processing timeouts.
> ufshcd_mcq_cqe_search() can only return true in case of a software bug
> at the host side. Addressing such bugs is out of scope for the SCSI
> error handler.

This is an attempt to handle the error case similar to SDB mode where it
prints "%s: cmd was completed, but without a notifying intr, tag = %d"
in the ufshcd_abort() function.

In this case the command has been completed by the hardware, but some
reasons the software has not processed it. We have seen this print
happened during debug sessions, so the error case does happen in SBL mode.

Are you suggesting we should return error in this case without calling
ufshcd_mcq_poll_cqe_lock()?

Thanks.

>
>
> Thanks,
>
> Bart.



2023-03-08 23:25:47

by Bart Van Assche

[permalink] [raw]
Subject: Re: [RFC PATCH v1 4/4] ufs: mcq: Added ufshcd_mcq_abort()

On 3/8/23 14:37, Bao D. Nguyen wrote:
> On 3/8/2023 11:02 AM, Bart Van Assche wrote:
>> On 3/7/23 20:01, Bao D. Nguyen wrote:
>>> +    if (ufshcd_mcq_cqe_search(hba, hwq, tag)) {
>>> +        dev_err(hba->dev, "%s: cmd found in cq. hwq=%d, tag=%d\n",
>>> +                __func__, hwq->id, tag);
>>> +        /*
>>> +         * The command should not be 'stuck' in the CQ for such a
>>> long time.
>>> +         * Is interrupt missing? Process the CQEs here. If the
>>> interrupt is
>>> +         * invoked at a later time, the CQ will be empty because the
>>> CQEs
>>> +         * are already processed here.
>>> +         */
>>> +        ufshcd_mcq_poll_cqe_lock(hba, hwq);
>>> +        err = SUCCESS;
>>> +        goto out;
>>> +    }
>>
>> Please remove the above code and also the definition of the
>> ufshcd_mcq_cqe_search() function. The SCSI error handler submits an
>> abort to deal with command processing timeouts.
>> ufshcd_mcq_cqe_search() can only return true in case of a software bug
>> at the host side. Addressing such bugs is out of scope for the SCSI
>> error handler.
>
> This is an attempt to handle the error case similar to SDB mode where it
> prints "%s: cmd was completed, but without a notifying intr, tag = %d"
> in the ufshcd_abort() function.
>
> In this case the command has been completed by the hardware, but some
> reasons the software has not processed it. We have seen this print
> happened during debug sessions, so the error case does happen in SBL mode.
>
> Are you suggesting we should return error in this case without calling
> ufshcd_mcq_poll_cqe_lock()?

What I am asking is to remove ufshcd_mcq_poll_cqe_lock() and all code
that depends on that function returning true. Although such code might
be useful for SoC debugging, helping with SoC debugging is out of scope
for Linux kernel drivers.

Thanks,

Bart.


2023-03-09 01:36:19

by Bao D. Nguyen

[permalink] [raw]
Subject: Re: [RFC PATCH v1 4/4] ufs: mcq: Added ufshcd_mcq_abort()

On 3/8/2023 3:25 PM, Bart Van Assche wrote:
> On 3/8/23 14:37, Bao D. Nguyen wrote:
>> On 3/8/2023 11:02 AM, Bart Van Assche wrote:
>>> On 3/7/23 20:01, Bao D. Nguyen wrote:
>>>> +    if (ufshcd_mcq_cqe_search(hba, hwq, tag)) {
>>>> +        dev_err(hba->dev, "%s: cmd found in cq. hwq=%d, tag=%d\n",
>>>> +                __func__, hwq->id, tag);
>>>> +        /*
>>>> +         * The command should not be 'stuck' in the CQ for such a
>>>> long time.
>>>> +         * Is interrupt missing? Process the CQEs here. If the
>>>> interrupt is
>>>> +         * invoked at a later time, the CQ will be empty because
>>>> the CQEs
>>>> +         * are already processed here.
>>>> +         */
>>>> +        ufshcd_mcq_poll_cqe_lock(hba, hwq);
>>>> +        err = SUCCESS;
>>>> +        goto out;
>>>> +    }
>>>
>>> Please remove the above code and also the definition of the
>>> ufshcd_mcq_cqe_search() function. The SCSI error handler submits an
>>> abort to deal with command processing timeouts.
>>> ufshcd_mcq_cqe_search() can only return true in case of a software
>>> bug at the host side. Addressing such bugs is out of scope for the
>>> SCSI error handler.
>>
>> This is an attempt to handle the error case similar to SDB mode where
>> it prints "%s: cmd was completed, but without a notifying intr, tag =
>> %d" in the ufshcd_abort() function.
>>
>> In this case the command has been completed by the hardware, but some
>> reasons the software has not processed it. We have seen this print
>> happened during debug sessions, so the error case does happen in SBL
>> mode.
>>
>> Are you suggesting we should return error in this case without
>> calling ufshcd_mcq_poll_cqe_lock()?
>
> What I am asking is to remove ufshcd_mcq_poll_cqe_lock() and all code
> that depends on that function returning true. Although such code might
> be useful for SoC debugging, helping with SoC debugging is out of
> scope for Linux kernel drivers.
I will remove it. In that case, we don't need the first patch of this
series, so I will remove the first patch as well. Thanks.
>
> Thanks,
>
> Bart.
>


2023-03-09 03:10:41

by Stanley Jhu

[permalink] [raw]
Subject: Re: [RFC PATCH v1 4/4] ufs: mcq: Added ufshcd_mcq_abort()

Hi Bao,

On Wed, Mar 8, 2023 at 12:10 PM Bao D. Nguyen <[email protected]> wrote:
>
> Add ufshcd_mcq_abort() to support ufs abort in mcq mode.
>
> Signed-off-by: Bao D. Nguyen <[email protected]>
> ---
> drivers/ufs/core/ufs-mcq.c | 76 ++++++++++++++++++++++++++++++++++++++++++
> drivers/ufs/core/ufshcd-priv.h | 5 ++-
> drivers/ufs/core/ufshcd.c | 11 ++++--
> 3 files changed, 88 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
> index e27d8eb..6c65cd5 100644
> --- a/drivers/ufs/core/ufs-mcq.c
> +++ b/drivers/ufs/core/ufs-mcq.c
> @@ -646,3 +646,79 @@ static bool ufshcd_mcq_cqe_search(struct ufs_hba *hba,
> spin_unlock(&hwq->cq_lock);
> return ret;
> }
> +
> +/**
> + * ufshcd_mcq_abort - Abort the command in MCQ.
> + * @cmd - The command to be aborted.
> + *
> + * Returns SUCCESS or FAILED error codes
> + */
> +int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
> +{
> + struct Scsi_Host *host = cmd->device->host;
> + struct ufs_hba *hba = shost_priv(host);
> + int tag = scsi_cmd_to_rq(cmd)->tag;
> + struct ufshcd_lrb *lrbp = &hba->lrb[tag];
> + struct ufs_hw_queue *hwq;
> + int err = FAILED;
> +
> + if (!lrbp->cmd) {
> + dev_err(hba->dev,
> + "%s: skip abort. cmd at tag %d already completed.\n",
> + __func__, tag);
> + goto out;
> + }
> +
> + /* Skip task abort in case previous aborts failed and report failure */
> + if (lrbp->req_abort_skip) {
> + dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n",
> + __func__, tag);
> + goto out;
> + }
> +
> + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
> +
> + if (ufshcd_mcq_sqe_search(hba, hwq, tag)) {
> + /*
> + * Failure. The command should not be "stuck" in SQ for
> + * a long time which resulted in command being aborted.
> + */
> + dev_err(hba->dev, "%s: cmd found in sq. hwq=%d, tag=%d\n",
> + __func__, hwq->id, tag);
> + /* Set the Command Type to 0xF per the spec */
> + ufshcd_mcq_nullify_cmd(hba, hwq);
> + goto out;
> + }
> +
> + if (ufshcd_mcq_cqe_search(hba, hwq, tag)) {
> + dev_err(hba->dev, "%s: cmd found in cq. hwq=%d, tag=%d\n",
> + __func__, hwq->id, tag);
> + /*
> + * The command should not be 'stuck' in the CQ for such a long time.
> + * Is interrupt missing? Process the CQEs here. If the interrupt is
> + * invoked at a later time, the CQ will be empty because the CQEs
> + * are already processed here.
> + */
> + ufshcd_mcq_poll_cqe_lock(hba, hwq);
> + err = SUCCESS;
> + goto out;
> + }
> +
> + /*
> + * The command is not in the Submission Queue, and it is not
> + * in the Completion Queue either. Query the device to see if
> + * the command is being processed in the device.
> + */
> + if (ufshcd_try_to_abort_task(hba, tag)) {
> + dev_err(hba->dev, "%s: device abort failed %d\n", __func__, err);
> + lrbp->req_abort_skip = true;
> + goto out;
> + }
> +
> + err = SUCCESS;
> + if (lrbp->cmd)
> + ufshcd_release_scsi_cmd(hba, lrbp);
> +
> +out:
> + return err;
> +}
> diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
> index 0527926..d883c03 100644
> --- a/drivers/ufs/core/ufshcd-priv.h
> +++ b/drivers/ufs/core/ufshcd-priv.h
> @@ -77,7 +77,10 @@ unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
> struct ufs_hw_queue *hwq);
>
> int ufshcd_mcq_sq_cleanup(struct ufs_hba *hba, int task_tag, int *result);
> -
> +int ufshcd_mcq_abort(struct scsi_cmnd *cmd);
> +int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag);
> +void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
> + struct ufshcd_lrb *lrbp);
> #define UFSHCD_MCQ_IO_QUEUE_OFFSET 1
> #define SD_ASCII_STD true
> #define SD_RAW false
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index 408c16c..e06399e 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -302,7 +302,6 @@ static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on);
> static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on);
> static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
> struct ufs_vreg *vreg);
> -static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag);
> static void ufshcd_wb_toggle_buf_flush_during_h8(struct ufs_hba *hba,
> bool enable);
> static void ufshcd_hba_vreg_set_lpm(struct ufs_hba *hba);
> @@ -5414,7 +5413,7 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status)
> }
>
> /* Release the resources allocated for processing a SCSI command. */
> -static void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
> +void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
> struct ufshcd_lrb *lrbp)
> {
> struct scsi_cmnd *cmd = lrbp->cmd;
> @@ -7340,7 +7339,7 @@ static void ufshcd_set_req_abort_skip(struct ufs_hba *hba, unsigned long bitmap)
> *
> * Returns zero on success, non-zero on failure
> */
> -static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
> +int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
> {
> struct ufshcd_lrb *lrbp = &hba->lrb[tag];
> int err = 0;
> @@ -7500,6 +7499,12 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
> goto release;
> }
>
> + if (is_mcq_enabled(hba)) {
> + /* MCQ mode. Branch off to handle abort for mcq mode */
> + err = ufshcd_mcq_abort(cmd);
> + goto release;
> + }
> +
> /* Skip task abort in case previous aborts failed and report failure */
> if (lrbp->req_abort_skip) {
> dev_err(hba->dev, "%s: skipping abort\n", __func__);
> --
> 2.7.4
>

It seems that ufshcd_abort_all() also needs an error handling flow for MCQ.

Thanks,
Stanley

2023-03-09 05:34:18

by Bao D. Nguyen

[permalink] [raw]
Subject: Re: [RFC PATCH v1 4/4] ufs: mcq: Added ufshcd_mcq_abort()

On 3/8/2023 7:10 PM, Stanley Chu wrote:
> Hi Bao,
>
> On Wed, Mar 8, 2023 at 12:10 PM Bao D. Nguyen <[email protected]> wrote:
>> Add ufshcd_mcq_abort() to support ufs abort in mcq mode.
>>
>> Signed-off-by: Bao D. Nguyen <[email protected]>
>> ---
>> drivers/ufs/core/ufs-mcq.c | 76 ++++++++++++++++++++++++++++++++++++++++++
>> drivers/ufs/core/ufshcd-priv.h | 5 ++-
>> drivers/ufs/core/ufshcd.c | 11 ++++--
>> 3 files changed, 88 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
>> index e27d8eb..6c65cd5 100644
>> --- a/drivers/ufs/core/ufs-mcq.c
>> +++ b/drivers/ufs/core/ufs-mcq.c
>> @@ -646,3 +646,79 @@ static bool ufshcd_mcq_cqe_search(struct ufs_hba *hba,
>> spin_unlock(&hwq->cq_lock);
>> return ret;
>> }
>> +
>> +/**
>> + * ufshcd_mcq_abort - Abort the command in MCQ.
>> + * @cmd - The command to be aborted.
>> + *
>> + * Returns SUCCESS or FAILED error codes
>> + */
>> +int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
>> +{
>> + struct Scsi_Host *host = cmd->device->host;
>> + struct ufs_hba *hba = shost_priv(host);
>> + int tag = scsi_cmd_to_rq(cmd)->tag;
>> + struct ufshcd_lrb *lrbp = &hba->lrb[tag];
>> + struct ufs_hw_queue *hwq;
>> + int err = FAILED;
>> +
>> + if (!lrbp->cmd) {
>> + dev_err(hba->dev,
>> + "%s: skip abort. cmd at tag %d already completed.\n",
>> + __func__, tag);
>> + goto out;
>> + }
>> +
>> + /* Skip task abort in case previous aborts failed and report failure */
>> + if (lrbp->req_abort_skip) {
>> + dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n",
>> + __func__, tag);
>> + goto out;
>> + }
>> +
>> + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
>> +
>> + if (ufshcd_mcq_sqe_search(hba, hwq, tag)) {
>> + /*
>> + * Failure. The command should not be "stuck" in SQ for
>> + * a long time which resulted in command being aborted.
>> + */
>> + dev_err(hba->dev, "%s: cmd found in sq. hwq=%d, tag=%d\n",
>> + __func__, hwq->id, tag);
>> + /* Set the Command Type to 0xF per the spec */
>> + ufshcd_mcq_nullify_cmd(hba, hwq);
>> + goto out;
>> + }
>> +
>> + if (ufshcd_mcq_cqe_search(hba, hwq, tag)) {
>> + dev_err(hba->dev, "%s: cmd found in cq. hwq=%d, tag=%d\n",
>> + __func__, hwq->id, tag);
>> + /*
>> + * The command should not be 'stuck' in the CQ for such a long time.
>> + * Is interrupt missing? Process the CQEs here. If the interrupt is
>> + * invoked at a later time, the CQ will be empty because the CQEs
>> + * are already processed here.
>> + */
>> + ufshcd_mcq_poll_cqe_lock(hba, hwq);
>> + err = SUCCESS;
>> + goto out;
>> + }
>> +
>> + /*
>> + * The command is not in the Submission Queue, and it is not
>> + * in the Completion Queue either. Query the device to see if
>> + * the command is being processed in the device.
>> + */
>> + if (ufshcd_try_to_abort_task(hba, tag)) {
>> + dev_err(hba->dev, "%s: device abort failed %d\n", __func__, err);
>> + lrbp->req_abort_skip = true;
>> + goto out;
>> + }
>> +
>> + err = SUCCESS;
>> + if (lrbp->cmd)
>> + ufshcd_release_scsi_cmd(hba, lrbp);
>> +
>> +out:
>> + return err;
>> +}
>> diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
>> index 0527926..d883c03 100644
>> --- a/drivers/ufs/core/ufshcd-priv.h
>> +++ b/drivers/ufs/core/ufshcd-priv.h
>> @@ -77,7 +77,10 @@ unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
>> struct ufs_hw_queue *hwq);
>>
>> int ufshcd_mcq_sq_cleanup(struct ufs_hba *hba, int task_tag, int *result);
>> -
>> +int ufshcd_mcq_abort(struct scsi_cmnd *cmd);
>> +int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag);
>> +void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
>> + struct ufshcd_lrb *lrbp);
>> #define UFSHCD_MCQ_IO_QUEUE_OFFSET 1
>> #define SD_ASCII_STD true
>> #define SD_RAW false
>> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
>> index 408c16c..e06399e 100644
>> --- a/drivers/ufs/core/ufshcd.c
>> +++ b/drivers/ufs/core/ufshcd.c
>> @@ -302,7 +302,6 @@ static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on);
>> static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on);
>> static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
>> struct ufs_vreg *vreg);
>> -static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag);
>> static void ufshcd_wb_toggle_buf_flush_during_h8(struct ufs_hba *hba,
>> bool enable);
>> static void ufshcd_hba_vreg_set_lpm(struct ufs_hba *hba);
>> @@ -5414,7 +5413,7 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status)
>> }
>>
>> /* Release the resources allocated for processing a SCSI command. */
>> -static void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
>> +void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
>> struct ufshcd_lrb *lrbp)
>> {
>> struct scsi_cmnd *cmd = lrbp->cmd;
>> @@ -7340,7 +7339,7 @@ static void ufshcd_set_req_abort_skip(struct ufs_hba *hba, unsigned long bitmap)
>> *
>> * Returns zero on success, non-zero on failure
>> */
>> -static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
>> +int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
>> {
>> struct ufshcd_lrb *lrbp = &hba->lrb[tag];
>> int err = 0;
>> @@ -7500,6 +7499,12 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
>> goto release;
>> }
>>
>> + if (is_mcq_enabled(hba)) {
>> + /* MCQ mode. Branch off to handle abort for mcq mode */
>> + err = ufshcd_mcq_abort(cmd);
>> + goto release;
>> + }
>> +
>> /* Skip task abort in case previous aborts failed and report failure */
>> if (lrbp->req_abort_skip) {
>> dev_err(hba->dev, "%s: skipping abort\n", __func__);
>> --
>> 2.7.4
>>
> It seems that ufshcd_abort_all() also needs an error handling flow for MCQ.

Hi Stanley, thank you for the reviews.

I am not able to find the function ufshcd_abort_all() in the drivers/ufs
directory. Can you please clarify where this function is located? Thanks.

>
> Thanks,
> Stanley



2023-03-09 06:21:33

by Stanley Jhu

[permalink] [raw]
Subject: Re: [RFC PATCH v1 4/4] ufs: mcq: Added ufshcd_mcq_abort()

Hi Bao,

On Thu, Mar 9, 2023 at 1:31 PM Bao D. Nguyen <[email protected]> wrote:
>
> On 3/8/2023 7:10 PM, Stanley Chu wrote:
> > Hi Bao,
> >
> > On Wed, Mar 8, 2023 at 12:10 PM Bao D. Nguyen <[email protected]> wrote:
> >> Add ufshcd_mcq_abort() to support ufs abort in mcq mode.
> >>
> >> Signed-off-by: Bao D. Nguyen <[email protected]>
> >> ---
> >> drivers/ufs/core/ufs-mcq.c | 76 ++++++++++++++++++++++++++++++++++++++++++
> >> drivers/ufs/core/ufshcd-priv.h | 5 ++-
> >> drivers/ufs/core/ufshcd.c | 11 ++++--
> >> 3 files changed, 88 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
> >> index e27d8eb..6c65cd5 100644
> >> --- a/drivers/ufs/core/ufs-mcq.c
> >> +++ b/drivers/ufs/core/ufs-mcq.c
> >> @@ -646,3 +646,79 @@ static bool ufshcd_mcq_cqe_search(struct ufs_hba *hba,
> >> spin_unlock(&hwq->cq_lock);
> >> return ret;
> >> }
> >> +
> >> +/**
> >> + * ufshcd_mcq_abort - Abort the command in MCQ.
> >> + * @cmd - The command to be aborted.
> >> + *
> >> + * Returns SUCCESS or FAILED error codes
> >> + */
> >> +int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
> >> +{
> >> + struct Scsi_Host *host = cmd->device->host;
> >> + struct ufs_hba *hba = shost_priv(host);
> >> + int tag = scsi_cmd_to_rq(cmd)->tag;
> >> + struct ufshcd_lrb *lrbp = &hba->lrb[tag];
> >> + struct ufs_hw_queue *hwq;
> >> + int err = FAILED;
> >> +
> >> + if (!lrbp->cmd) {
> >> + dev_err(hba->dev,
> >> + "%s: skip abort. cmd at tag %d already completed.\n",
> >> + __func__, tag);
> >> + goto out;
> >> + }
> >> +
> >> + /* Skip task abort in case previous aborts failed and report failure */
> >> + if (lrbp->req_abort_skip) {
> >> + dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n",
> >> + __func__, tag);
> >> + goto out;
> >> + }
> >> +
> >> + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
> >> +
> >> + if (ufshcd_mcq_sqe_search(hba, hwq, tag)) {
> >> + /*
> >> + * Failure. The command should not be "stuck" in SQ for
> >> + * a long time which resulted in command being aborted.
> >> + */
> >> + dev_err(hba->dev, "%s: cmd found in sq. hwq=%d, tag=%d\n",
> >> + __func__, hwq->id, tag);
> >> + /* Set the Command Type to 0xF per the spec */
> >> + ufshcd_mcq_nullify_cmd(hba, hwq);
> >> + goto out;
> >> + }
> >> +
> >> + if (ufshcd_mcq_cqe_search(hba, hwq, tag)) {
> >> + dev_err(hba->dev, "%s: cmd found in cq. hwq=%d, tag=%d\n",
> >> + __func__, hwq->id, tag);
> >> + /*
> >> + * The command should not be 'stuck' in the CQ for such a long time.
> >> + * Is interrupt missing? Process the CQEs here. If the interrupt is
> >> + * invoked at a later time, the CQ will be empty because the CQEs
> >> + * are already processed here.
> >> + */
> >> + ufshcd_mcq_poll_cqe_lock(hba, hwq);
> >> + err = SUCCESS;
> >> + goto out;
> >> + }
> >> +
> >> + /*
> >> + * The command is not in the Submission Queue, and it is not
> >> + * in the Completion Queue either. Query the device to see if
> >> + * the command is being processed in the device.
> >> + */
> >> + if (ufshcd_try_to_abort_task(hba, tag)) {
> >> + dev_err(hba->dev, "%s: device abort failed %d\n", __func__, err);
> >> + lrbp->req_abort_skip = true;
> >> + goto out;
> >> + }
> >> +
> >> + err = SUCCESS;
> >> + if (lrbp->cmd)
> >> + ufshcd_release_scsi_cmd(hba, lrbp);
> >> +
> >> +out:
> >> + return err;
> >> +}
> >> diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
> >> index 0527926..d883c03 100644
> >> --- a/drivers/ufs/core/ufshcd-priv.h
> >> +++ b/drivers/ufs/core/ufshcd-priv.h
> >> @@ -77,7 +77,10 @@ unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
> >> struct ufs_hw_queue *hwq);
> >>
> >> int ufshcd_mcq_sq_cleanup(struct ufs_hba *hba, int task_tag, int *result);
> >> -
> >> +int ufshcd_mcq_abort(struct scsi_cmnd *cmd);
> >> +int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag);
> >> +void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
> >> + struct ufshcd_lrb *lrbp);
> >> #define UFSHCD_MCQ_IO_QUEUE_OFFSET 1
> >> #define SD_ASCII_STD true
> >> #define SD_RAW false
> >> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> >> index 408c16c..e06399e 100644
> >> --- a/drivers/ufs/core/ufshcd.c
> >> +++ b/drivers/ufs/core/ufshcd.c
> >> @@ -302,7 +302,6 @@ static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on);
> >> static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on);
> >> static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
> >> struct ufs_vreg *vreg);
> >> -static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag);
> >> static void ufshcd_wb_toggle_buf_flush_during_h8(struct ufs_hba *hba,
> >> bool enable);
> >> static void ufshcd_hba_vreg_set_lpm(struct ufs_hba *hba);
> >> @@ -5414,7 +5413,7 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status)
> >> }
> >>
> >> /* Release the resources allocated for processing a SCSI command. */
> >> -static void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
> >> +void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
> >> struct ufshcd_lrb *lrbp)
> >> {
> >> struct scsi_cmnd *cmd = lrbp->cmd;
> >> @@ -7340,7 +7339,7 @@ static void ufshcd_set_req_abort_skip(struct ufs_hba *hba, unsigned long bitmap)
> >> *
> >> * Returns zero on success, non-zero on failure
> >> */
> >> -static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
> >> +int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
> >> {
> >> struct ufshcd_lrb *lrbp = &hba->lrb[tag];
> >> int err = 0;
> >> @@ -7500,6 +7499,12 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
> >> goto release;
> >> }
> >>
> >> + if (is_mcq_enabled(hba)) {
> >> + /* MCQ mode. Branch off to handle abort for mcq mode */
> >> + err = ufshcd_mcq_abort(cmd);
> >> + goto release;
> >> + }
> >> +
> >> /* Skip task abort in case previous aborts failed and report failure */
> >> if (lrbp->req_abort_skip) {
> >> dev_err(hba->dev, "%s: skipping abort\n", __func__);
> >> --
> >> 2.7.4
> >>
> > It seems that ufshcd_abort_all() also needs an error handling flow for MCQ.
>
> Hi Stanley, thank you for the reviews.
>
> I am not able to find the function ufshcd_abort_all() in the drivers/ufs
> directory. Can you please clarify where this function is located? Thanks.

The function ufshcd_abort_all() was introduced in the following link:
https://android.googlesource.com/kernel/common/+/b817e6ffbad7a1a0a5ca5bb7d4020823c3f4d9d0

Can you confirm if these patches are based on the latest kernel
6.3-rc1 or the latest linux-next tree?

Thanks,
Stanley

>
> >
> > Thanks,
> > Stanley
>
>

2023-03-09 07:00:56

by Bao D. Nguyen

[permalink] [raw]
Subject: Re: [RFC PATCH v1 4/4] ufs: mcq: Added ufshcd_mcq_abort()

On 3/8/2023 10:21 PM, Stanley Chu wrote:
> Hi Bao,
>
> On Thu, Mar 9, 2023 at 1:31 PM Bao D. Nguyen <[email protected]> wrote:
>> On 3/8/2023 7:10 PM, Stanley Chu wrote:
>>> Hi Bao,
>>>
>>> On Wed, Mar 8, 2023 at 12:10 PM Bao D. Nguyen <[email protected]> wrote:
>>>> Add ufshcd_mcq_abort() to support ufs abort in mcq mode.
>>>>
>>>> Signed-off-by: Bao D. Nguyen <[email protected]>
>>>> ---
>>>> drivers/ufs/core/ufs-mcq.c | 76 ++++++++++++++++++++++++++++++++++++++++++
>>>> drivers/ufs/core/ufshcd-priv.h | 5 ++-
>>>> drivers/ufs/core/ufshcd.c | 11 ++++--
>>>> 3 files changed, 88 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
>>>> index e27d8eb..6c65cd5 100644
>>>> --- a/drivers/ufs/core/ufs-mcq.c
>>>> +++ b/drivers/ufs/core/ufs-mcq.c
>>>> @@ -646,3 +646,79 @@ static bool ufshcd_mcq_cqe_search(struct ufs_hba *hba,
>>>> spin_unlock(&hwq->cq_lock);
>>>> return ret;
>>>> }
>>>> +
>>>> +/**
>>>> + * ufshcd_mcq_abort - Abort the command in MCQ.
>>>> + * @cmd - The command to be aborted.
>>>> + *
>>>> + * Returns SUCCESS or FAILED error codes
>>>> + */
>>>> +int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
>>>> +{
>>>> + struct Scsi_Host *host = cmd->device->host;
>>>> + struct ufs_hba *hba = shost_priv(host);
>>>> + int tag = scsi_cmd_to_rq(cmd)->tag;
>>>> + struct ufshcd_lrb *lrbp = &hba->lrb[tag];
>>>> + struct ufs_hw_queue *hwq;
>>>> + int err = FAILED;
>>>> +
>>>> + if (!lrbp->cmd) {
>>>> + dev_err(hba->dev,
>>>> + "%s: skip abort. cmd at tag %d already completed.\n",
>>>> + __func__, tag);
>>>> + goto out;
>>>> + }
>>>> +
>>>> + /* Skip task abort in case previous aborts failed and report failure */
>>>> + if (lrbp->req_abort_skip) {
>>>> + dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n",
>>>> + __func__, tag);
>>>> + goto out;
>>>> + }
>>>> +
>>>> + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
>>>> +
>>>> + if (ufshcd_mcq_sqe_search(hba, hwq, tag)) {
>>>> + /*
>>>> + * Failure. The command should not be "stuck" in SQ for
>>>> + * a long time which resulted in command being aborted.
>>>> + */
>>>> + dev_err(hba->dev, "%s: cmd found in sq. hwq=%d, tag=%d\n",
>>>> + __func__, hwq->id, tag);
>>>> + /* Set the Command Type to 0xF per the spec */
>>>> + ufshcd_mcq_nullify_cmd(hba, hwq);
>>>> + goto out;
>>>> + }
>>>> +
>>>> + if (ufshcd_mcq_cqe_search(hba, hwq, tag)) {
>>>> + dev_err(hba->dev, "%s: cmd found in cq. hwq=%d, tag=%d\n",
>>>> + __func__, hwq->id, tag);
>>>> + /*
>>>> + * The command should not be 'stuck' in the CQ for such a long time.
>>>> + * Is interrupt missing? Process the CQEs here. If the interrupt is
>>>> + * invoked at a later time, the CQ will be empty because the CQEs
>>>> + * are already processed here.
>>>> + */
>>>> + ufshcd_mcq_poll_cqe_lock(hba, hwq);
>>>> + err = SUCCESS;
>>>> + goto out;
>>>> + }
>>>> +
>>>> + /*
>>>> + * The command is not in the Submission Queue, and it is not
>>>> + * in the Completion Queue either. Query the device to see if
>>>> + * the command is being processed in the device.
>>>> + */
>>>> + if (ufshcd_try_to_abort_task(hba, tag)) {
>>>> + dev_err(hba->dev, "%s: device abort failed %d\n", __func__, err);
>>>> + lrbp->req_abort_skip = true;
>>>> + goto out;
>>>> + }
>>>> +
>>>> + err = SUCCESS;
>>>> + if (lrbp->cmd)
>>>> + ufshcd_release_scsi_cmd(hba, lrbp);
>>>> +
>>>> +out:
>>>> + return err;
>>>> +}
>>>> diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
>>>> index 0527926..d883c03 100644
>>>> --- a/drivers/ufs/core/ufshcd-priv.h
>>>> +++ b/drivers/ufs/core/ufshcd-priv.h
>>>> @@ -77,7 +77,10 @@ unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
>>>> struct ufs_hw_queue *hwq);
>>>>
>>>> int ufshcd_mcq_sq_cleanup(struct ufs_hba *hba, int task_tag, int *result);
>>>> -
>>>> +int ufshcd_mcq_abort(struct scsi_cmnd *cmd);
>>>> +int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag);
>>>> +void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
>>>> + struct ufshcd_lrb *lrbp);
>>>> #define UFSHCD_MCQ_IO_QUEUE_OFFSET 1
>>>> #define SD_ASCII_STD true
>>>> #define SD_RAW false
>>>> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
>>>> index 408c16c..e06399e 100644
>>>> --- a/drivers/ufs/core/ufshcd.c
>>>> +++ b/drivers/ufs/core/ufshcd.c
>>>> @@ -302,7 +302,6 @@ static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on);
>>>> static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on);
>>>> static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
>>>> struct ufs_vreg *vreg);
>>>> -static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag);
>>>> static void ufshcd_wb_toggle_buf_flush_during_h8(struct ufs_hba *hba,
>>>> bool enable);
>>>> static void ufshcd_hba_vreg_set_lpm(struct ufs_hba *hba);
>>>> @@ -5414,7 +5413,7 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status)
>>>> }
>>>>
>>>> /* Release the resources allocated for processing a SCSI command. */
>>>> -static void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
>>>> +void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
>>>> struct ufshcd_lrb *lrbp)
>>>> {
>>>> struct scsi_cmnd *cmd = lrbp->cmd;
>>>> @@ -7340,7 +7339,7 @@ static void ufshcd_set_req_abort_skip(struct ufs_hba *hba, unsigned long bitmap)
>>>> *
>>>> * Returns zero on success, non-zero on failure
>>>> */
>>>> -static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
>>>> +int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
>>>> {
>>>> struct ufshcd_lrb *lrbp = &hba->lrb[tag];
>>>> int err = 0;
>>>> @@ -7500,6 +7499,12 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
>>>> goto release;
>>>> }
>>>>
>>>> + if (is_mcq_enabled(hba)) {
>>>> + /* MCQ mode. Branch off to handle abort for mcq mode */
>>>> + err = ufshcd_mcq_abort(cmd);
>>>> + goto release;
>>>> + }
>>>> +
>>>> /* Skip task abort in case previous aborts failed and report failure */
>>>> if (lrbp->req_abort_skip) {
>>>> dev_err(hba->dev, "%s: skipping abort\n", __func__);
>>>> --
>>>> 2.7.4
>>>>
>>> It seems that ufshcd_abort_all() also needs an error handling flow for MCQ.
>> Hi Stanley, thank you for the reviews.
>>
>> I am not able to find the function ufshcd_abort_all() in the drivers/ufs
>> directory. Can you please clarify where this function is located? Thanks.
> The function ufshcd_abort_all() was introduced in the following link:
> https://android.googlesource.com/kernel/common/+/b817e6ffbad7a1a0a5ca5bb7d4020823c3f4d9d0

Ah ok. Thank you for this info. The ufshcd_abort_all() is in
ufshcd_err_handler(), it is unrelated to the ufshcd_abort().

Adding MCQ support to ufshcd_err_handler() will be in a separate patch.

> Can you confirm if these patches are based on the latest kernel
> 6.3-rc1 or the latest linux-next tree?
>
> Thanks,
> Stanley
>
>>> Thanks,
>>> Stanley
>>