From: Bean Huo <[email protected]>
Call shost_for_each_device() with host->host_lock is held will cause
a deadlock situation, which will cause the system to stall (the log
as follow). Fix this issue by narrowing the scope of the lock.
stalls on CPUs/tasks:
all trace:
__switch_to+0x120/0x170
0xffff800011643998
ask dump for CPU 5:
ask:kworker/u16:2 state:R running task stack: 0 pid: 80 ppid: 2 flags:0x0000000a
orkqueue: events_unbound async_run_entry_fn
all trace:
__switch_to+0x120/0x170
0x0
ask dump for CPU 6:
ask:kworker/u16:6 state:R running task stack: 0 pid: 164 ppid: 2 flags:0x0000000a
orkqueue: events_unbound async_run_entry_fn
all trace:
__switch_to+0x120/0x170
0xffff54e7c4429f80
ask dump for CPU 7:
ask:kworker/u16:4 state:R running task stack: 0 pid: 153 ppid: 2 flags:0x0000000a
orkqueue: events_unbound async_run_entry_fn
all trace:
__switch_to+0x120/0x170
blk_mq_run_hw_queue+0x34/0x110
blk_mq_sched_insert_request+0xb0/0x120
blk_execute_rq_nowait+0x68/0x88
blk_execute_rq+0x4c/0xd8
__scsi_execute+0xec/0x1d0
scsi_vpd_inquiry+0x84/0xf0
scsi_get_vpd_buf+0x34/0xb8
scsi_attach_vpd+0x34/0x140
scsi_probe_and_add_lun+0xa6c/0xab8
__scsi_scan_target+0x438/0x4f8
scsi_scan_channel+0x6c/0xa8
scsi_scan_host_selected+0xf0/0x150
do_scsi_scan_host+0x88/0x90
scsi_scan_host+0x1b4/0x1d0
ufshcd_async_scan+0x248/0x310
async_run_entry_fn+0x30/0x178
process_one_work+0x1e8/0x368
worker_thread+0x40/0x478
kthread+0x174/0x180
ret_from_fork+0x10/0x20
Fixes: 8d077ede48c1 ("scsi: ufs: Optimize the command queueing code")
Signed-off-by: Bean Huo <[email protected]>
---
drivers/scsi/ufs/ufshcd.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 6dd517267f1b..15333a327b93 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1099,19 +1099,21 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
ktime_t start;
ufshcd_hold(hba, false);
- spin_lock_irqsave(hba->host->host_lock, flags);
/*
* Wait for all the outstanding tasks/transfer requests.
* Verify by checking the doorbell registers are clear.
*/
start = ktime_get();
do {
+ spin_lock_irqsave(hba->host->host_lock, flags);
if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
ret = -EBUSY;
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
goto out;
}
-
tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
+
tr_pending = ufshcd_pending_cmds(hba);
if (!tm_doorbell && !tr_pending) {
timeout = false;
@@ -1120,7 +1122,6 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
break;
}
- spin_unlock_irqrestore(hba->host->host_lock, flags);
schedule();
if (ktime_to_us(ktime_sub(ktime_get(), start)) >
wait_timeout_us) {
@@ -1132,7 +1133,6 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
*/
do_last_check = true;
}
- spin_lock_irqsave(hba->host->host_lock, flags);
} while (tm_doorbell || tr_pending);
if (timeout) {
@@ -1142,7 +1142,6 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
ret = -EBUSY;
}
out:
- spin_unlock_irqrestore(hba->host->host_lock, flags);
ufshcd_release(hba);
return ret;
}
--
2.25.1
On 12/13/21 3:00 PM, Bean Huo wrote:
> Call shost_for_each_device() with host->host_lock is held will cause
> a deadlock situation, which will cause the system to stall (the log
> as follow). Fix this issue by narrowing the scope of the lock.
Hi Bean,
As you probably know I do not have access to a test setup that supports clock
scaling. Has the following patch been considered?
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 6d692aae67ce..244eddf0caf8 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1084,7 +1084,9 @@ static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
struct scsi_device *sdev;
u32 pending = 0;
- shost_for_each_device(sdev, hba->host)
+ lockdep_assert_held(hba->host->host_lock);
+
+ __shost_for_each_device(sdev, hba->host)
pending += sbitmap_weight(&sdev->budget_map);
return pending;
Thanks,
Bart.
On Mon, Dec 13, 2021 at 8:15 PM Bart Van Assche <[email protected]> wrote:
>
> On 12/13/21 3:00 PM, Bean Huo wrote:
> > Call shost_for_each_device() with host->host_lock is held will cause
> > a deadlock situation, which will cause the system to stall (the log
> > as follow). Fix this issue by narrowing the scope of the lock.
>
> Hi Bean,
>
> As you probably know I do not have access to a test setup that supports clock
> scaling. Has the following patch been considered?
>
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 6d692aae67ce..244eddf0caf8 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -1084,7 +1084,9 @@ static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
> struct scsi_device *sdev;
> u32 pending = 0;
>
> - shost_for_each_device(sdev, hba->host)
> + lockdep_assert_held(hba->host->host_lock);
> +
> + __shost_for_each_device(sdev, hba->host)
> pending += sbitmap_weight(&sdev->budget_map);
We hit the same issue today as well, and this solution works on db845c.
Reported-by: YongQin Liu <[email protected]>
Reported-by: Amit Pundir <[email protected]>
Tested-by: John Stultz <[email protected]>
thanks
-john