2021-05-24 08:50:03

by Can Guo

[permalink] [raw]
Subject: [PATCH v2 6/6] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests

If PM requests fail during runtime suspend/resume, RPM framework saves the
error to dev->power.runtime_error. Before the runtime_error gets cleared,
runtime PM on this specific device won't work again, leaving the device
in either suspended or active state permanently.

When task abort happens to a PM request sent during runtime suspend/resume,
even if it can be successfully aborted, RPM framework anyways saves the
(TIMEOUT) error. But we want more and we can do better - let error handling
recover and clear the runtime_error. So, let PM requests take the fast
abort path in ufshcd_abort().

Signed-off-by: Can Guo <[email protected]>
---
drivers/scsi/ufs/ufshcd.c | 38 ++++++++++++++++++++++----------------
1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 9a3bc04..8312b31 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2731,7 +2731,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
* err handler blocked for too long. So, just fail the scsi cmd
* sent from PM ops, err handler can recover PM error anyways.
*/
- if (hba->wl_pm_op_in_progress) {
+ if (cmd->request->rq_flags & RQF_PM) {
hba->force_reset = true;
set_host_byte(cmd, DID_BAD_TARGET);
cmd->scsi_done(cmd);
@@ -2764,7 +2764,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
(hba->clk_gating.state != CLKS_ON));

if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
- if (hba->wl_pm_op_in_progress)
+ if (cmd->request->rq_flags & RQF_PM)
set_host_byte(cmd, DID_BAD_TARGET);
else
err = SCSI_MLQUEUE_HOST_BUSY;
@@ -6982,11 +6982,14 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
int err = 0;
struct ufshcd_lrb *lrbp;
u32 reg;
+ bool need_eh = false;

host = cmd->device->host;
hba = shost_priv(host);
tag = cmd->request->tag;
lrbp = &hba->lrb[tag];
+
+ dev_info(hba->dev, "%s: Device abort task at tag %d\n", __func__, tag);
if (!ufshcd_valid_tag(hba, tag)) {
dev_err(hba->dev,
"%s: invalid command tag %d: cmd=0x%p, cmd->request=0x%p",
@@ -7004,9 +7007,6 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
goto out;
}

- /* Print Transfer Request of aborted task */
- dev_info(hba->dev, "%s: Device abort task at tag %d\n", __func__, tag);
-
/*
* Print detailed info about aborted request.
* As more than one request might get aborted at the same time,
@@ -7034,21 +7034,21 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
}

/*
- * Task abort to the device W-LUN is illegal. When this command
- * will fail, due to spec violation, scsi err handling next step
- * will be to send LU reset which, again, is a spec violation.
- * To avoid these unnecessary/illegal steps, first we clean up
- * the lrb taken by this cmd and re-set it in outstanding_reqs,
- * then queue the eh_work and bail.
+ * This fast path guarantees the cmd always gets aborted successfully,
+ * meanwhile it invokes the error handler. It allows contexts, which
+ * are blocked by this cmd, to fail fast. It serves multiple purposes:
+ * #1 To avoid unnecessary/illagal abort attempts to the W-LU.
+ * #2 To avoid live lock between eh_work and specific contexts, i.e.,
+ * suspend/resume and eh_work itself.
+ * #3 To let eh_work recover runtime PM error in case abort happens
+ * to cmds sent from runtime suspend/resume ops.
*/
- if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN) {
+ if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN ||
+ (cmd->request->rq_flags & RQF_PM)) {
ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, lrbp->lun);
__ufshcd_transfer_req_compl(hba, (1UL << tag));
set_bit(tag, &hba->outstanding_reqs);
- spin_lock_irqsave(host->host_lock, flags);
- hba->force_reset = true;
- ufshcd_schedule_eh_work(hba);
- spin_unlock_irqrestore(host->host_lock, flags);
+ need_eh = true;
goto out;
}

@@ -7062,6 +7062,12 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
cleanup:
__ufshcd_transfer_req_compl(hba, (1UL << tag));
out:
+ if (cmd->request->rq_flags & RQF_PM || need_eh) {
+ spin_lock_irqsave(host->host_lock, flags);
+ hba->force_reset = true;
+ ufshcd_schedule_eh_work(hba);
+ spin_unlock_irqrestore(host->host_lock, flags);
+ }
err = SUCCESS;
} else {
dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.