When gate_work/ungate_work gets an error during hibern8_enter or exit,
ufshcd_err_handler()
ufshcd_scsi_block_requests()
ufshcd_reset_and_restore()
ufshcd_clear_ua_wluns() -> stuck
ufshcd_scsi_unblock_requests()
In order to avoid it, ufshcd_clear_ua_wluns() can be called per recovery flows
such as suspend/resume, link_recovery, and error_handler.
Fixes: 1918651f2d7e ("scsi: ufs: Clear UAC for RPMB after ufshcd resets")
Signed-off-by: Jaegeuk Kim <[email protected]>
---
drivers/scsi/ufs/ufshcd.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index bedb822a40a3..1678cec08b51 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3996,6 +3996,8 @@ int ufshcd_link_recovery(struct ufs_hba *hba)
if (ret)
dev_err(hba->dev, "%s: link recovery failed, err %d",
__func__, ret);
+ else
+ ufshcd_clear_ua_wluns(hba);
return ret;
}
@@ -6003,6 +6005,9 @@ static void ufshcd_err_handler(struct work_struct *work)
ufshcd_scsi_unblock_requests(hba);
ufshcd_err_handling_unprepare(hba);
up(&hba->eh_sem);
+
+ if (!err && needs_reset)
+ ufshcd_clear_ua_wluns(hba);
}
/**
@@ -6940,14 +6945,11 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
ufshcd_set_clk_freq(hba, true);
err = ufshcd_hba_enable(hba);
- if (err)
- goto out;
/* Establish the link again and restore the device */
- err = ufshcd_probe_hba(hba, false);
if (!err)
- ufshcd_clear_ua_wluns(hba);
-out:
+ err = ufshcd_probe_hba(hba, false);
+
if (err)
dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err);
ufshcd_update_evt_hist(hba, UFS_EVT_HOST_RESET, (u32)err);
@@ -8777,6 +8779,7 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
ufshcd_resume_clkscaling(hba);
hba->clk_gating.is_suspended = false;
hba->dev_info.b_rpm_dev_flush_capable = false;
+ ufshcd_clear_ua_wluns(hba);
ufshcd_release(hba);
out:
if (hba->dev_info.b_rpm_dev_flush_capable) {
@@ -8887,6 +8890,8 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
cancel_delayed_work(&hba->rpm_dev_flush_recheck_work);
}
+ ufshcd_clear_ua_wluns(hba);
+
/* Schedule clock gating in case of no access to UFS device yet */
ufshcd_release(hba);
--
2.29.2.729.g45daf8777d-goog
From: Jaegeuk Kim <[email protected]>
This fixes a warning caused by wrong reserve tag usage in __ufshcd_issue_tm_cmd.
WARNING: CPU: 7 PID: 7 at block/blk-core.c:630 blk_get_request+0x68/0x70
WARNING: CPU: 4 PID: 157 at block/blk-mq-tag.c:82 blk_mq_get_tag+0x438/0x46c
And, in ufshcd_err_handler(), we can avoid to send tm_cmd before aborting
outstanding commands by waiting a bit for IO completion like this.
__ufshcd_issue_tm_cmd: task management cmd 0x80 timed-out
Signed-off-by: Jaegeuk Kim <[email protected]>
---
drivers/scsi/ufs/ufshcd.c | 36 ++++++++++++++++++++++++++++++++----
1 file changed, 32 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 1678cec08b51..377da8e98d9b 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -44,6 +44,9 @@
/* Query request timeout */
#define QUERY_REQ_TIMEOUT 1500 /* 1.5 seconds */
+/* LINERESET TIME OUT */
+#define LINERESET_IO_TIMEOUT_MS (30000) /* 30 sec */
+
/* Task management command timeout */
#define TM_CMD_TIMEOUT 100 /* msecs */
@@ -5899,6 +5902,8 @@ static void ufshcd_err_handler(struct work_struct *work)
* check if power mode restore is needed.
*/
if (hba->saved_uic_err & UFSHCD_UIC_PA_GENERIC_ERROR) {
+ ktime_t start = ktime_get();
+
hba->saved_uic_err &= ~UFSHCD_UIC_PA_GENERIC_ERROR;
if (!hba->saved_uic_err)
hba->saved_err &= ~UIC_ERROR;
@@ -5906,6 +5911,20 @@ static void ufshcd_err_handler(struct work_struct *work)
if (ufshcd_is_pwr_mode_restore_needed(hba))
needs_restore = true;
spin_lock_irqsave(hba->host->host_lock, flags);
+ /* Wait for IO completion to avoid aborting IOs */
+ while (hba->outstanding_reqs) {
+ ufshcd_complete_requests(hba);
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
+ schedule();
+ spin_lock_irqsave(hba->host->host_lock, flags);
+ if (ktime_to_ms(ktime_sub(ktime_get(), start)) >
+ LINERESET_IO_TIMEOUT_MS) {
+ dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
+ __func__, hba->outstanding_reqs);
+ break;
+ }
+ }
+
if (!hba->saved_err && !needs_restore)
goto skip_err_handling;
}
@@ -6302,9 +6321,13 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba)
intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
}
- if (enabled_intr_status && retval == IRQ_NONE) {
- dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x\n",
- __func__, intr_status);
+ if (enabled_intr_status && retval == IRQ_NONE &&
+ !ufshcd_eh_in_progress(hba)) {
+ dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x (0x%08x, 0x%08x)\n",
+ __func__,
+ intr_status,
+ hba->ufs_stats.last_intr_status,
+ enabled_intr_status);
ufshcd_dump_regs(hba, 0, UFSHCI_REG_SPACE_SIZE, "host_regs: ");
}
@@ -6348,7 +6371,11 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
* Even though we use wait_event() which sleeps indefinitely,
* the maximum wait time is bounded by %TM_CMD_TIMEOUT.
*/
- req = blk_get_request(q, REQ_OP_DRV_OUT, BLK_MQ_REQ_RESERVED);
+ req = blk_get_request(q, REQ_OP_DRV_OUT, BLK_MQ_REQ_RESERVED |
+ BLK_MQ_REQ_NOWAIT);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
req->end_io_data = &wait;
free_slot = req->tag;
WARN_ON_ONCE(free_slot < 0 || free_slot >= hba->nutmrs);
@@ -9355,6 +9382,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
hba->tmf_tag_set = (struct blk_mq_tag_set) {
.nr_hw_queues = 1,
+ .reserved_tags = 1,
.queue_depth = hba->nutmrs,
.ops = &ufshcd_tmf_ops,
.flags = BLK_MQ_F_NO_SCHED,
--
2.29.2.729.g45daf8777d-goog
Hi Jaegeuk,
I love your patch! Perhaps something to improve:
[auto build test WARNING on scsi/for-next]
[also build test WARNING on mkp-scsi/for-next linus/master v5.11-rc2 next-20210104]
[cannot apply to linux/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119
base: https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next
config: nds32-randconfig-r012-20210106 (attached as .config)
compiler: nds32le-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/1ae2226bbc3a8096dfceaab9c598f02d387915ba
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119
git checkout 1ae2226bbc3a8096dfceaab9c598f02d387915ba
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=nds32
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
In file included from include/linux/device.h:15,
from include/linux/async.h:14,
from drivers/scsi/ufs/ufshcd.c:12:
drivers/scsi/ufs/ufshcd.c: In function 'ufshcd_err_handler':
>> drivers/scsi/ufs/ufshcd.c:5922:23: warning: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'long unsigned int' [-Wformat=]
5922 | dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/dev_printk.h:19:22: note: in definition of macro 'dev_fmt'
19 | #define dev_fmt(fmt) fmt
| ^~~
drivers/scsi/ufs/ufshcd.c:5922:5: note: in expansion of macro 'dev_err'
5922 | dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
| ^~~~~~~
drivers/scsi/ufs/ufshcd.c:5922:50: note: format string is defined here
5922 | dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
| ~^
| |
| unsigned int
| %lx
vim +5922 drivers/scsi/ufs/ufshcd.c
5818
5819 /**
5820 * ufshcd_err_handler - handle UFS errors that require s/w attention
5821 * @work: pointer to work structure
5822 */
5823 static void ufshcd_err_handler(struct work_struct *work)
5824 {
5825 struct ufs_hba *hba;
5826 unsigned long flags;
5827 bool err_xfer = false;
5828 bool err_tm = false;
5829 int err = 0, pmc_err;
5830 int tag;
5831 bool needs_reset = false, needs_restore = false;
5832
5833 hba = container_of(work, struct ufs_hba, eh_work);
5834
5835 down(&hba->eh_sem);
5836 spin_lock_irqsave(hba->host->host_lock, flags);
5837 if (ufshcd_err_handling_should_stop(hba)) {
5838 if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
5839 hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
5840 spin_unlock_irqrestore(hba->host->host_lock, flags);
5841 up(&hba->eh_sem);
5842 return;
5843 }
5844 ufshcd_set_eh_in_progress(hba);
5845 spin_unlock_irqrestore(hba->host->host_lock, flags);
5846 ufshcd_err_handling_prepare(hba);
5847 spin_lock_irqsave(hba->host->host_lock, flags);
5848 ufshcd_scsi_block_requests(hba);
5849 hba->ufshcd_state = UFSHCD_STATE_RESET;
5850
5851 /* Complete requests that have door-bell cleared by h/w */
5852 ufshcd_complete_requests(hba);
5853
5854 /*
5855 * A full reset and restore might have happened after preparation
5856 * is finished, double check whether we should stop.
5857 */
5858 if (ufshcd_err_handling_should_stop(hba))
5859 goto skip_err_handling;
5860
5861 if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
5862 bool ret;
5863
5864 spin_unlock_irqrestore(hba->host->host_lock, flags);
5865 /* release the lock as ufshcd_quirk_dl_nac_errors() may sleep */
5866 ret = ufshcd_quirk_dl_nac_errors(hba);
5867 spin_lock_irqsave(hba->host->host_lock, flags);
5868 if (!ret && ufshcd_err_handling_should_stop(hba))
5869 goto skip_err_handling;
5870 }
5871
5872 if ((hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK)) ||
5873 (hba->saved_uic_err &&
5874 (hba->saved_uic_err != UFSHCD_UIC_PA_GENERIC_ERROR))) {
5875 bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR);
5876
5877 spin_unlock_irqrestore(hba->host->host_lock, flags);
5878 ufshcd_print_host_state(hba);
5879 ufshcd_print_pwr_info(hba);
5880 ufshcd_print_evt_hist(hba);
5881 ufshcd_print_tmrs(hba, hba->outstanding_tasks);
5882 ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt);
5883 spin_lock_irqsave(hba->host->host_lock, flags);
5884 }
5885
5886 /*
5887 * if host reset is required then skip clearing the pending
5888 * transfers forcefully because they will get cleared during
5889 * host reset and restore
5890 */
5891 if (hba->force_reset || ufshcd_is_link_broken(hba) ||
5892 ufshcd_is_saved_err_fatal(hba) ||
5893 ((hba->saved_err & UIC_ERROR) &&
5894 (hba->saved_uic_err & (UFSHCD_UIC_DL_NAC_RECEIVED_ERROR |
5895 UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))) {
5896 needs_reset = true;
5897 goto do_reset;
5898 }
5899
5900 /*
5901 * If LINERESET was caught, UFS might have been put to PWM mode,
5902 * check if power mode restore is needed.
5903 */
5904 if (hba->saved_uic_err & UFSHCD_UIC_PA_GENERIC_ERROR) {
5905 ktime_t start = ktime_get();
5906
5907 hba->saved_uic_err &= ~UFSHCD_UIC_PA_GENERIC_ERROR;
5908 if (!hba->saved_uic_err)
5909 hba->saved_err &= ~UIC_ERROR;
5910 spin_unlock_irqrestore(hba->host->host_lock, flags);
5911 if (ufshcd_is_pwr_mode_restore_needed(hba))
5912 needs_restore = true;
5913 spin_lock_irqsave(hba->host->host_lock, flags);
5914 /* Wait for IO completion to avoid aborting IOs */
5915 while (hba->outstanding_reqs) {
5916 ufshcd_complete_requests(hba);
5917 spin_unlock_irqrestore(hba->host->host_lock, flags);
5918 schedule();
5919 spin_lock_irqsave(hba->host->host_lock, flags);
5920 if (ktime_to_ms(ktime_sub(ktime_get(), start)) >
5921 LINERESET_IO_TIMEOUT_MS) {
> 5922 dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
5923 __func__, hba->outstanding_reqs);
5924 break;
5925 }
5926 }
5927
5928 if (!hba->saved_err && !needs_restore)
5929 goto skip_err_handling;
5930 }
5931
5932 hba->silence_err_logs = true;
5933 /* release lock as clear command might sleep */
5934 spin_unlock_irqrestore(hba->host->host_lock, flags);
5935 /* Clear pending transfer requests */
5936 for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
5937 if (ufshcd_try_to_abort_task(hba, tag)) {
5938 err_xfer = true;
5939 goto lock_skip_pending_xfer_clear;
5940 }
5941 }
5942
5943 /* Clear pending task management requests */
5944 for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
5945 if (ufshcd_clear_tm_cmd(hba, tag)) {
5946 err_tm = true;
5947 goto lock_skip_pending_xfer_clear;
5948 }
5949 }
5950
5951 lock_skip_pending_xfer_clear:
5952 spin_lock_irqsave(hba->host->host_lock, flags);
5953
5954 /* Complete the requests that are cleared by s/w */
5955 ufshcd_complete_requests(hba);
5956 hba->silence_err_logs = false;
5957
5958 if (err_xfer || err_tm) {
5959 needs_reset = true;
5960 goto do_reset;
5961 }
5962
5963 /*
5964 * After all reqs and tasks are cleared from doorbell,
5965 * now it is safe to retore power mode.
5966 */
5967 if (needs_restore) {
5968 spin_unlock_irqrestore(hba->host->host_lock, flags);
5969 /*
5970 * Hold the scaling lock just in case dev cmds
5971 * are sent via bsg and/or sysfs.
5972 */
5973 down_write(&hba->clk_scaling_lock);
5974 hba->force_pmc = true;
5975 pmc_err = ufshcd_config_pwr_mode(hba, &(hba->pwr_info));
5976 if (pmc_err) {
5977 needs_reset = true;
5978 dev_err(hba->dev, "%s: Failed to restore power mode, err = %d\n",
5979 __func__, pmc_err);
5980 }
5981 hba->force_pmc = false;
5982 ufshcd_print_pwr_info(hba);
5983 up_write(&hba->clk_scaling_lock);
5984 spin_lock_irqsave(hba->host->host_lock, flags);
5985 }
5986
5987 do_reset:
5988 /* Fatal errors need reset */
5989 if (needs_reset) {
5990 unsigned long max_doorbells = (1UL << hba->nutrs) - 1;
5991
5992 /*
5993 * ufshcd_reset_and_restore() does the link reinitialization
5994 * which will need atleast one empty doorbell slot to send the
5995 * device management commands (NOP and query commands).
5996 * If there is no slot empty at this moment then free up last
5997 * slot forcefully.
5998 */
5999 if (hba->outstanding_reqs == max_doorbells)
6000 __ufshcd_transfer_req_compl(hba,
6001 (1UL << (hba->nutrs - 1)));
6002
6003 hba->force_reset = false;
6004 spin_unlock_irqrestore(hba->host->host_lock, flags);
6005 err = ufshcd_reset_and_restore(hba);
6006 if (err)
6007 dev_err(hba->dev, "%s: reset and restore failed with err %d\n",
6008 __func__, err);
6009 else
6010 ufshcd_recover_pm_error(hba);
6011 spin_lock_irqsave(hba->host->host_lock, flags);
6012 }
6013
6014 skip_err_handling:
6015 if (!needs_reset) {
6016 if (hba->ufshcd_state == UFSHCD_STATE_RESET)
6017 hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
6018 if (hba->saved_err || hba->saved_uic_err)
6019 dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x",
6020 __func__, hba->saved_err, hba->saved_uic_err);
6021 }
6022 ufshcd_clear_eh_in_progress(hba);
6023 spin_unlock_irqrestore(hba->host->host_lock, flags);
6024 ufshcd_scsi_unblock_requests(hba);
6025 ufshcd_err_handling_unprepare(hba);
6026 up(&hba->eh_sem);
6027
6028 if (!err && needs_reset)
6029 ufshcd_clear_ua_wluns(hba);
6030 }
6031
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
Hi Jaegeuk,
I love your patch! Perhaps something to improve:
[auto build test WARNING on scsi/for-next]
[also build test WARNING on mkp-scsi/for-next linus/master v5.11-rc2 next-20210104]
[cannot apply to linux/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119
base: https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next
config: x86_64-randconfig-r016-20210106 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 5c951623bc8965fa1e89660f2f5f4a2944e4981a)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# https://github.com/0day-ci/linux/commit/1ae2226bbc3a8096dfceaab9c598f02d387915ba
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119
git checkout 1ae2226bbc3a8096dfceaab9c598f02d387915ba
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
>> drivers/scsi/ufs/ufshcd.c:5923:16: warning: format specifies type 'unsigned int' but the argument has type 'unsigned long' [-Wformat]
__func__, hba->outstanding_reqs);
^~~~~~~~~~~~~~~~~~~~~
include/linux/dev_printk.h:112:32: note: expanded from macro 'dev_err'
_dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__)
~~~ ^~~~~~~~~~~
1 warning generated.
vim +5923 drivers/scsi/ufs/ufshcd.c
5818
5819 /**
5820 * ufshcd_err_handler - handle UFS errors that require s/w attention
5821 * @work: pointer to work structure
5822 */
5823 static void ufshcd_err_handler(struct work_struct *work)
5824 {
5825 struct ufs_hba *hba;
5826 unsigned long flags;
5827 bool err_xfer = false;
5828 bool err_tm = false;
5829 int err = 0, pmc_err;
5830 int tag;
5831 bool needs_reset = false, needs_restore = false;
5832
5833 hba = container_of(work, struct ufs_hba, eh_work);
5834
5835 down(&hba->eh_sem);
5836 spin_lock_irqsave(hba->host->host_lock, flags);
5837 if (ufshcd_err_handling_should_stop(hba)) {
5838 if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
5839 hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
5840 spin_unlock_irqrestore(hba->host->host_lock, flags);
5841 up(&hba->eh_sem);
5842 return;
5843 }
5844 ufshcd_set_eh_in_progress(hba);
5845 spin_unlock_irqrestore(hba->host->host_lock, flags);
5846 ufshcd_err_handling_prepare(hba);
5847 spin_lock_irqsave(hba->host->host_lock, flags);
5848 ufshcd_scsi_block_requests(hba);
5849 hba->ufshcd_state = UFSHCD_STATE_RESET;
5850
5851 /* Complete requests that have door-bell cleared by h/w */
5852 ufshcd_complete_requests(hba);
5853
5854 /*
5855 * A full reset and restore might have happened after preparation
5856 * is finished, double check whether we should stop.
5857 */
5858 if (ufshcd_err_handling_should_stop(hba))
5859 goto skip_err_handling;
5860
5861 if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
5862 bool ret;
5863
5864 spin_unlock_irqrestore(hba->host->host_lock, flags);
5865 /* release the lock as ufshcd_quirk_dl_nac_errors() may sleep */
5866 ret = ufshcd_quirk_dl_nac_errors(hba);
5867 spin_lock_irqsave(hba->host->host_lock, flags);
5868 if (!ret && ufshcd_err_handling_should_stop(hba))
5869 goto skip_err_handling;
5870 }
5871
5872 if ((hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK)) ||
5873 (hba->saved_uic_err &&
5874 (hba->saved_uic_err != UFSHCD_UIC_PA_GENERIC_ERROR))) {
5875 bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR);
5876
5877 spin_unlock_irqrestore(hba->host->host_lock, flags);
5878 ufshcd_print_host_state(hba);
5879 ufshcd_print_pwr_info(hba);
5880 ufshcd_print_evt_hist(hba);
5881 ufshcd_print_tmrs(hba, hba->outstanding_tasks);
5882 ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt);
5883 spin_lock_irqsave(hba->host->host_lock, flags);
5884 }
5885
5886 /*
5887 * if host reset is required then skip clearing the pending
5888 * transfers forcefully because they will get cleared during
5889 * host reset and restore
5890 */
5891 if (hba->force_reset || ufshcd_is_link_broken(hba) ||
5892 ufshcd_is_saved_err_fatal(hba) ||
5893 ((hba->saved_err & UIC_ERROR) &&
5894 (hba->saved_uic_err & (UFSHCD_UIC_DL_NAC_RECEIVED_ERROR |
5895 UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))) {
5896 needs_reset = true;
5897 goto do_reset;
5898 }
5899
5900 /*
5901 * If LINERESET was caught, UFS might have been put to PWM mode,
5902 * check if power mode restore is needed.
5903 */
5904 if (hba->saved_uic_err & UFSHCD_UIC_PA_GENERIC_ERROR) {
5905 ktime_t start = ktime_get();
5906
5907 hba->saved_uic_err &= ~UFSHCD_UIC_PA_GENERIC_ERROR;
5908 if (!hba->saved_uic_err)
5909 hba->saved_err &= ~UIC_ERROR;
5910 spin_unlock_irqrestore(hba->host->host_lock, flags);
5911 if (ufshcd_is_pwr_mode_restore_needed(hba))
5912 needs_restore = true;
5913 spin_lock_irqsave(hba->host->host_lock, flags);
5914 /* Wait for IO completion to avoid aborting IOs */
5915 while (hba->outstanding_reqs) {
5916 ufshcd_complete_requests(hba);
5917 spin_unlock_irqrestore(hba->host->host_lock, flags);
5918 schedule();
5919 spin_lock_irqsave(hba->host->host_lock, flags);
5920 if (ktime_to_ms(ktime_sub(ktime_get(), start)) >
5921 LINERESET_IO_TIMEOUT_MS) {
5922 dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
> 5923 __func__, hba->outstanding_reqs);
5924 break;
5925 }
5926 }
5927
5928 if (!hba->saved_err && !needs_restore)
5929 goto skip_err_handling;
5930 }
5931
5932 hba->silence_err_logs = true;
5933 /* release lock as clear command might sleep */
5934 spin_unlock_irqrestore(hba->host->host_lock, flags);
5935 /* Clear pending transfer requests */
5936 for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
5937 if (ufshcd_try_to_abort_task(hba, tag)) {
5938 err_xfer = true;
5939 goto lock_skip_pending_xfer_clear;
5940 }
5941 }
5942
5943 /* Clear pending task management requests */
5944 for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
5945 if (ufshcd_clear_tm_cmd(hba, tag)) {
5946 err_tm = true;
5947 goto lock_skip_pending_xfer_clear;
5948 }
5949 }
5950
5951 lock_skip_pending_xfer_clear:
5952 spin_lock_irqsave(hba->host->host_lock, flags);
5953
5954 /* Complete the requests that are cleared by s/w */
5955 ufshcd_complete_requests(hba);
5956 hba->silence_err_logs = false;
5957
5958 if (err_xfer || err_tm) {
5959 needs_reset = true;
5960 goto do_reset;
5961 }
5962
5963 /*
5964 * After all reqs and tasks are cleared from doorbell,
5965 * now it is safe to retore power mode.
5966 */
5967 if (needs_restore) {
5968 spin_unlock_irqrestore(hba->host->host_lock, flags);
5969 /*
5970 * Hold the scaling lock just in case dev cmds
5971 * are sent via bsg and/or sysfs.
5972 */
5973 down_write(&hba->clk_scaling_lock);
5974 hba->force_pmc = true;
5975 pmc_err = ufshcd_config_pwr_mode(hba, &(hba->pwr_info));
5976 if (pmc_err) {
5977 needs_reset = true;
5978 dev_err(hba->dev, "%s: Failed to restore power mode, err = %d\n",
5979 __func__, pmc_err);
5980 }
5981 hba->force_pmc = false;
5982 ufshcd_print_pwr_info(hba);
5983 up_write(&hba->clk_scaling_lock);
5984 spin_lock_irqsave(hba->host->host_lock, flags);
5985 }
5986
5987 do_reset:
5988 /* Fatal errors need reset */
5989 if (needs_reset) {
5990 unsigned long max_doorbells = (1UL << hba->nutrs) - 1;
5991
5992 /*
5993 * ufshcd_reset_and_restore() does the link reinitialization
5994 * which will need atleast one empty doorbell slot to send the
5995 * device management commands (NOP and query commands).
5996 * If there is no slot empty at this moment then free up last
5997 * slot forcefully.
5998 */
5999 if (hba->outstanding_reqs == max_doorbells)
6000 __ufshcd_transfer_req_compl(hba,
6001 (1UL << (hba->nutrs - 1)));
6002
6003 hba->force_reset = false;
6004 spin_unlock_irqrestore(hba->host->host_lock, flags);
6005 err = ufshcd_reset_and_restore(hba);
6006 if (err)
6007 dev_err(hba->dev, "%s: reset and restore failed with err %d\n",
6008 __func__, err);
6009 else
6010 ufshcd_recover_pm_error(hba);
6011 spin_lock_irqsave(hba->host->host_lock, flags);
6012 }
6013
6014 skip_err_handling:
6015 if (!needs_reset) {
6016 if (hba->ufshcd_state == UFSHCD_STATE_RESET)
6017 hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
6018 if (hba->saved_err || hba->saved_uic_err)
6019 dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x",
6020 __func__, hba->saved_err, hba->saved_uic_err);
6021 }
6022 ufshcd_clear_eh_in_progress(hba);
6023 spin_unlock_irqrestore(hba->host->host_lock, flags);
6024 ufshcd_scsi_unblock_requests(hba);
6025 ufshcd_err_handling_unprepare(hba);
6026 up(&hba->eh_sem);
6027
6028 if (!err && needs_reset)
6029 ufshcd_clear_ua_wluns(hba);
6030 }
6031
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]