If we remove the scsi disk when running io with fio, oops occured with
the following condition.
[scsi_eh_0] [fio]
scsi_end_request
->blk_update_request
->end_bio(io returned to userspace)
close
->sd_release
->scsi_disk_put
->scsi_disk_release
->disk->private_data = NULL;
->scsi_mq_uninit_cmd
->scsi_uninit_cmd
->scsi_cmd_to_driver
->drv is NULL, Oops
There is a small window between blk_update_request() and
scsi_mq_uninit_cmd() that scsi disk may have been released. This will
cause a oops like below:
Unable to handle kernel NULL pointer dereference at virtual address
0000000000000000
s/sync.c:67, func=xfer, error=In[11347.116050] Mem abort info:
put/output error
[11347.121598] ESR = 0x96000006
[11347.126200] Exception class = DABT (current EL), IL = 32 bits
[11347.132117] SET = 0, FnV = 0
[11347.135170] EA = 0, S1PTW = 0
[11347.138308] Data abort info:
[11347.141186] ISV = 0, ISS = 0x00000006
[11347.145019] CM = 0, WnR = 0
[11347.147977] user pgtable: 4k pages, 48-bit VAs, pgdp =
00000000a67aece2
[11347.154591] [0000000000000000] pgd=0000002f90774003,
pud=0000002fab098003, pmd=0000000000000000
[11347.163304] Internal error: Oops: 96000006 [#1] PREEMPT SMP
[11347.168870] Modules linked in: hisi_sas_v3_hw hisi_sas_main libsas
[11347.175044] CPU: 56 PID: 4294 Comm: scsi_eh_2 Not tainted
4.19.0-g8052059-dirty #2
[11347.182600] Hardware name: Huawei D06/D06, BIOS Hisilicon D06 UEFI
RC0 - B601 (V6.01) 11/08/2018
[11347.191370] pstate: a0c00009 (NzCv daif +PAN +UAO)
[11347.196155] pc : scsi_uninit_cmd+0x24/0x3c
[11347.200240] lr : scsi_mq_uninit_cmd+0x1c/0x30
[11347.204583] sp : ffff000024dabb60
[11347.207884] x29: ffff000024dabb60 x28: ffff000024dabd38
[11347.213184] x27: ffff000000f5b3a8 x26: ffff7df3b0181600
[11347.218484] x25: 0000000000000000 x24: ffff803bc5d36778
[11347.223783] x23: 000000000000000a x22: 0000000000000000
[11347.229082] x21: ffff803bc7397000 x20: ffff802f9148e530
[11347.234381] x19: ffff802f9148e530 x18: ffff7e0000000000
[11347.239679] x17: 0000000000000000 x16: 0000002f9e37d000
[11347.244979] x15: ffff7e0000000000 x14: 3863206336203839
[11347.250278] x13: 2036302030302038 x12: a46fac3d0d363d00
[11347.255578] x11: ffffffffffffffff x10: a46fac3d0d363d00
[11347.260877] x9 : 0000000040040000 x8 : 000000000000eb4b
[11347.266177] x7 : ffff000009771000 x6 : 0000000000210d00
[11347.271476] x5 : ffff803bc9f50000 x4 : 0000000000000000
[11347.276775] x3 : ffff802fb02b4380 x2 : ffff802f9148e400
[11347.282075] x1 : 0000000000000000 x0 : ffff802f9148e530
[11347.287375] Process scsi_eh_2 (pid: 4294, stack limit =
0x000000007d2257f8)
[11347.294323] Call trace:
Jobs: 6 (f=6): [R[RRR1XXX1XRR3] 47.296758] scsi_uninit_cmd+0x24/0x3c
[22.7% done] [1516MB/0KB/0KB /s] [754/0/0 iops] [eta 08m:39s]
[11347.308390] scsi_mq_uninit_cmd+0x1c/0x30
[11347.312387] scsi_end_request+0x7c/0x1b8
[11347.316297] scsi_io_completion+0x464/0x668
[11347.320467] scsi_finish_command+0xbc/0x160
[11347.324636] scsi_eh_flush_done_q+0x10c/0x170
[11347.328990] sas_scsi_recover_host+0x84c/0xa98 [libsas]
[11347.334202] scsi_error_handler+0x140/0x5b0
[11347.338374] kthread+0x100/0x12c
[11347.341590] ret_from_fork+0x10/0x18
[11347.345153] Code: 71000c3f 540000e9 f9404c41 f941f421 (f9400021)
[11347.351234] ---[ end trace f496aacdaa1dcc51 ]---
To fix this, move the bio_endio() action from blk_update_request() to
__blk_mq_end_request().
Signed-off-by: Jason Yan <[email protected]>
---
block/blk-core.c | 6 ++++--
block/blk-mq.c | 7 +++++++
include/linux/blkdev.h | 1 +
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 4673ebe42255..f39ea78c0535 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -192,8 +192,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
bio_advance(bio, nbytes);
/* don't actually finish bio if it's part of flush sequence */
- if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
- bio_endio(bio);
+ if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) {
+ bio->bi_next = rq->bio_to_release;
+ rq->bio_to_release = bio;
+ }
}
void blk_dump_rq_flags(struct request *rq, char *msg)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a9c181603cbd..5ad595ebc198 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -529,8 +529,15 @@ EXPORT_SYMBOL_GPL(blk_mq_free_request);
inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
{
+ struct bio *bio;
u64 now = 0;
+ while (rq->bio_to_release) {
+ bio = rq->bio_to_release->bi_next;
+ bio_endio(rq->bio_to_release);
+ rq->bio_to_release = bio;
+ }
+
if (blk_mq_need_time_stamp(rq))
now = ktime_get_ns();
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0de92b29f589..74fe561d5a49 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -143,6 +143,7 @@ struct request {
struct bio *bio;
struct bio *biotail;
+ struct bio *bio_to_release;
struct list_head queuelist;
--
2.14.5
On Sat, Mar 16, 2019 at 10:11 AM Jason Yan <[email protected]> wrote:
>
> If we remove the scsi disk when running io with fio, oops occured with
> the following condition.
>
> [scsi_eh_0] [fio]
> scsi_end_request
> ->blk_update_request
> ->end_bio(io returned to userspace)
> close
> ->sd_release
> ->scsi_disk_put
> ->scsi_disk_release
> ->disk->private_data = NULL;
>
> ->scsi_mq_uninit_cmd
> ->scsi_uninit_cmd
> ->scsi_cmd_to_driver
> ->drv is NULL, Oops
>
> There is a small window between blk_update_request() and
> scsi_mq_uninit_cmd() that scsi disk may have been released. This will
> cause a oops like below:
>
> Unable to handle kernel NULL pointer dereference at virtual address
> 0000000000000000
> s/sync.c:67, func=xfer, error=In[11347.116050] Mem abort info:
> put/output error
> [11347.121598] ESR = 0x96000006
> [11347.126200] Exception class = DABT (current EL), IL = 32 bits
> [11347.132117] SET = 0, FnV = 0
> [11347.135170] EA = 0, S1PTW = 0
> [11347.138308] Data abort info:
> [11347.141186] ISV = 0, ISS = 0x00000006
> [11347.145019] CM = 0, WnR = 0
> [11347.147977] user pgtable: 4k pages, 48-bit VAs, pgdp =
> 00000000a67aece2
> [11347.154591] [0000000000000000] pgd=0000002f90774003,
> pud=0000002fab098003, pmd=0000000000000000
> [11347.163304] Internal error: Oops: 96000006 [#1] PREEMPT SMP
> [11347.168870] Modules linked in: hisi_sas_v3_hw hisi_sas_main libsas
> [11347.175044] CPU: 56 PID: 4294 Comm: scsi_eh_2 Not tainted
> 4.19.0-g8052059-dirty #2
> [11347.182600] Hardware name: Huawei D06/D06, BIOS Hisilicon D06 UEFI
> RC0 - B601 (V6.01) 11/08/2018
> [11347.191370] pstate: a0c00009 (NzCv daif +PAN +UAO)
> [11347.196155] pc : scsi_uninit_cmd+0x24/0x3c
> [11347.200240] lr : scsi_mq_uninit_cmd+0x1c/0x30
> [11347.204583] sp : ffff000024dabb60
> [11347.207884] x29: ffff000024dabb60 x28: ffff000024dabd38
> [11347.213184] x27: ffff000000f5b3a8 x26: ffff7df3b0181600
> [11347.218484] x25: 0000000000000000 x24: ffff803bc5d36778
> [11347.223783] x23: 000000000000000a x22: 0000000000000000
> [11347.229082] x21: ffff803bc7397000 x20: ffff802f9148e530
> [11347.234381] x19: ffff802f9148e530 x18: ffff7e0000000000
> [11347.239679] x17: 0000000000000000 x16: 0000002f9e37d000
> [11347.244979] x15: ffff7e0000000000 x14: 3863206336203839
> [11347.250278] x13: 2036302030302038 x12: a46fac3d0d363d00
> [11347.255578] x11: ffffffffffffffff x10: a46fac3d0d363d00
> [11347.260877] x9 : 0000000040040000 x8 : 000000000000eb4b
> [11347.266177] x7 : ffff000009771000 x6 : 0000000000210d00
> [11347.271476] x5 : ffff803bc9f50000 x4 : 0000000000000000
> [11347.276775] x3 : ffff802fb02b4380 x2 : ffff802f9148e400
> [11347.282075] x1 : 0000000000000000 x0 : ffff802f9148e530
> [11347.287375] Process scsi_eh_2 (pid: 4294, stack limit =
> 0x000000007d2257f8)
> [11347.294323] Call trace:
> Jobs: 6 (f=6): [R[RRR1XXX1XRR3] 47.296758] scsi_uninit_cmd+0x24/0x3c
> [22.7% done] [1516MB/0KB/0KB /s] [754/0/0 iops] [eta 08m:39s]
> [11347.308390] scsi_mq_uninit_cmd+0x1c/0x30
> [11347.312387] scsi_end_request+0x7c/0x1b8
> [11347.316297] scsi_io_completion+0x464/0x668
> [11347.320467] scsi_finish_command+0xbc/0x160
> [11347.324636] scsi_eh_flush_done_q+0x10c/0x170
> [11347.328990] sas_scsi_recover_host+0x84c/0xa98 [libsas]
> [11347.334202] scsi_error_handler+0x140/0x5b0
> [11347.338374] kthread+0x100/0x12c
> [11347.341590] ret_from_fork+0x10/0x18
> [11347.345153] Code: 71000c3f 540000e9 f9404c41 f941f421 (f9400021)
> [11347.351234] ---[ end trace f496aacdaa1dcc51 ]---
>
> To fix this, move the bio_endio() action from blk_update_request() to
> __blk_mq_end_request().
>
> Signed-off-by: Jason Yan <[email protected]>
> ---
> block/blk-core.c | 6 ++++--
> block/blk-mq.c | 7 +++++++
> include/linux/blkdev.h | 1 +
> 3 files changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 4673ebe42255..f39ea78c0535 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -192,8 +192,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
> bio_advance(bio, nbytes);
>
> /* don't actually finish bio if it's part of flush sequence */
> - if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
> - bio_endio(bio);
> + if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) {
> + bio->bi_next = rq->bio_to_release;
> + rq->bio_to_release = bio;
> + }
> }
In case of partial completion, the completed bios should have been
done immediately, instead that their .end_io is called after the whole
request is completed.
Also rq->bio may be reused to hold the bios to be ended.
Jason, sorry for not Cc list in previous reply.
Thanks,
Ming Lei
On Sat, 2019-03-16 at 10:09 +-0800, Jason Yan wrote:
+AD4 If we remove the scsi disk when running io with fio, oops occured with
+AD4 the following condition.
+AD4
+AD4 +AFs-scsi+AF8-eh+AF8-0+AF0 +AFs-fio+AF0
+AD4 scsi+AF8-end+AF8-request
+AD4 -+AD4-blk+AF8-update+AF8-request
+AD4 -+AD4-end+AF8-bio(io returned to userspace)
+AD4 close
+AD4 -+AD4-sd+AF8-release
+AD4 -+AD4-scsi+AF8-disk+AF8-put
+AD4 -+AD4-scsi+AF8-disk+AF8-release
+AD4 -+AD4-disk-+AD4-private+AF8-data +AD0 NULL+ADs
+AD4
+AD4 -+AD4-scsi+AF8-mq+AF8-uninit+AF8-cmd
+AD4 -+AD4-scsi+AF8-uninit+AF8-cmd
+AD4 -+AD4-scsi+AF8-cmd+AF8-to+AF8-driver
+AD4 -+AD4-drv is NULL, Oops
+AD4
+AD4 There is a small window between blk+AF8-update+AF8-request() and
+AD4 scsi+AF8-mq+AF8-uninit+AF8-cmd() that scsi disk may have been released. This will
+AD4 cause a oops like below:
+AD4
+AD4 Unable to handle kernel NULL pointer dereference at virtual address
+AD4 0000000000000000
+AD4 s/sync.c:67, func+AD0-xfer, error+AD0-In+AFs-11347.116050+AF0 Mem abort info:
+AD4 put/output error
+AD4 +AFs-11347.121598+AF0 ESR +AD0 0x96000006
+AD4 +AFs-11347.126200+AF0 Exception class +AD0 DABT (current EL), IL +AD0 32 bits
+AD4 +AFs-11347.132117+AF0 SET +AD0 0, FnV +AD0 0
+AD4 +AFs-11347.135170+AF0 EA +AD0 0, S1PTW +AD0 0
+AD4 +AFs-11347.138308+AF0 Data abort info:
+AD4 +AFs-11347.141186+AF0 ISV +AD0 0, ISS +AD0 0x00000006
+AD4 +AFs-11347.145019+AF0 CM +AD0 0, WnR +AD0 0
+AD4 +AFs-11347.147977+AF0 user pgtable: 4k pages, 48-bit VAs, pgdp +AD0
+AD4 00000000a67aece2
+AD4 +AFs-11347.154591+AF0 +AFs-0000000000000000+AF0 pgd+AD0-0000002f90774003,
+AD4 pud+AD0-0000002fab098003, pmd+AD0-0000000000000000
+AD4 +AFs-11347.163304+AF0 Internal error: Oops: 96000006 +AFsAIw-1+AF0 PREEMPT SMP
+AD4 +AFs-11347.168870+AF0 Modules linked in: hisi+AF8-sas+AF8-v3+AF8-hw hisi+AF8-sas+AF8-main libsas
+AD4 +AFs-11347.175044+AF0 CPU: 56 PID: 4294 Comm: scsi+AF8-eh+AF8-2 Not tainted
+AD4 4.19.0-g8052059-dirty +ACM-2
+AD4 +AFs-11347.182600+AF0 Hardware name: Huawei D06/D06, BIOS Hisilicon D06 UEFI
+AD4 RC0 - B601 (V6.01) 11/08/2018
+AD4 +AFs-11347.191370+AF0 pstate: a0c00009 (NzCv daif +PAN+UAO113471w
Please verify whether the following patch is a valid alternative for your patch:
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index ed34bfbc3844..745ffdda1bc1 100644
--- a/drivers/scsi/sd.c
+-+-+- b/drivers/scsi/sd.c
+AEAAQA -1408,6 +-1408,7 +AEAAQA static void sd+AF8-release(struct gendisk +ACo-disk, fmode+AF8-t mode)
+AHs
struct scsi+AF8-disk +ACo-sdkp +AD0 scsi+AF8-disk(disk)+ADs
struct scsi+AF8-device +ACo-sdev +AD0 sdkp-+AD4-device+ADs
+- struct request+AF8-queue +ACo-q +AD0 sdkp-+AD4-disk-+AD4-queue+ADs
SCSI+AF8-LOG+AF8-HLQUEUE(3, sd+AF8-printk(KERN+AF8-INFO, sdkp, +ACI-sd+AF8-release+AFw-n+ACI))+ADs
+AEAAQA -1417,9 +-1418,12 +AEAAQA static void sd+AF8-release(struct gendisk +ACo-disk, fmode+AF8-t mode)
+AH0
/+ACo
- +ACo XXX and what if there are packets in flight and this close()
- +ACo XXX is followed by a +ACI-rmmod sd+AF8-mod+ACI?
+- +ACo Wait until any requests that are in progress have completed.
+- +ACo This is necessary to avoid that e.g. scsi+AF8-end+AF8-request() crashes
+- +ACo due to scsi+AF8-disk+AF8-relase() clearing the disk-+AD4-private+AF8-data pointer.
+ACo-/
+- blk+AF8-mq+AF8-freeze+AF8-queue(q)+ADs
+- blk+AF8-mq+AF8-unfreeze+AF8-queue(q)+ADs
scsi+AF8-disk+AF8-put(sdkp)+ADs
+AH0
Thanks,
Bart.
On 2019/3/22 2:39, Bart Van Assche wrote:
> On Sat, 2019-03-16 at 10:09 +0800, Jason Yan wrote:
>> If we remove the scsi disk when running io with fio, oops occured with
>> the following condition.
>>
>> [scsi_eh_0] [fio]
>> scsi_end_request
>> ->blk_update_request
>> ->end_bio(io returned to userspace)
>> close
>> ->sd_release
>> ->scsi_disk_put
>> ->scsi_disk_release
>> ->disk->private_data = NULL;
>>
>> ->scsi_mq_uninit_cmd
>> ->scsi_uninit_cmd
>> ->scsi_cmd_to_driver
>> ->drv is NULL, Oops
>>
>> There is a small window between blk_update_request() and
>> scsi_mq_uninit_cmd() that scsi disk may have been released. This will
>> cause a oops like below:
>>
>> Unable to handle kernel NULL pointer dereference at virtual address
>> 0000000000000000
>> s/sync.c:67, func=xfer, error=In[11347.116050] Mem abort info:
>> put/output error
>> [11347.121598] ESR = 0x96000006
>> [11347.126200] Exception class = DABT (current EL), IL = 32 bits
>> [11347.132117] SET = 0, FnV = 0
>> [11347.135170] EA = 0, S1PTW = 0
>> [11347.138308] Data abort info:
>> [11347.141186] ISV = 0, ISS = 0x00000006
>> [11347.145019] CM = 0, WnR = 0
>> [11347.147977] user pgtable: 4k pages, 48-bit VAs, pgdp =
>> 00000000a67aece2
>> [11347.154591] [0000000000000000] pgd=0000002f90774003,
>> pud=0000002fab098003, pmd=0000000000000000
>> [11347.163304] Internal error: Oops: 96000006 [#1] PREEMPT SMP
>> [11347.168870] Modules linked in: hisi_sas_v3_hw hisi_sas_main libsas
>> [11347.175044] CPU: 56 PID: 4294 Comm: scsi_eh_2 Not tainted
>> 4.19.0-g8052059-dirty #2
>> [11347.182600] Hardware name: Huawei D06/D06, BIOS Hisilicon D06 UEFI
>> RC0 - B601 (V6.01) 11/08/2018
>> [11347.191370] pstate: a0c00009 (NzCv daif 㰃繐ε흾㯗
>
> Please verify whether the following patch is a valid alternative for your patch:
>
Thanks Bart, I will verify it later.
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index ed34bfbc3844..745ffdda1bc1 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -1408,6 +1408,7 @@ static void sd_release(struct gendisk *disk, fmode_t mode)
> {
> struct scsi_disk *sdkp = scsi_disk(disk);
> struct scsi_device *sdev = sdkp->device;
> + struct request_queue *q = sdkp->disk->queue;
>
> SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n"));
>
> @@ -1417,9 +1418,12 @@ static void sd_release(struct gendisk *disk, fmode_t mode)
> }
>
> /*
> - * XXX and what if there are packets in flight and this close()
> - * XXX is followed by a "rmmod sd_mod"?
> + * Wait until any requests that are in progress have completed.
> + * This is necessary to avoid that e.g. scsi_end_request() crashes
> + * due to scsi_disk_relase() clearing the disk->private_data pointer.
> */
> + blk_mq_freeze_queue(q);
> + blk_mq_unfreeze_queue(q);
>
> scsi_disk_put(sdkp);
> }
>
> Thanks,
>
> Bart.
>
> .
>
On Fri, Mar 22, 2019 at 2:39 AM Bart Van Assche <[email protected]> wrote:
>
> On Sat, 2019-03-16 at 10:09 +0800, Jason Yan wrote:
> > If we remove the scsi disk when running io with fio, oops occured with
> > the following condition.
> >
> > [scsi_eh_0] [fio]
> > scsi_end_request
> > ->blk_update_request
> > ->end_bio(io returned to userspace)
> > close
> > ->sd_release
> > ->scsi_disk_put
> > ->scsi_disk_release
> > ->disk->private_data = NULL;
> >
> > ->scsi_mq_uninit_cmd
> > ->scsi_uninit_cmd
> > ->scsi_cmd_to_driver
> > ->drv is NULL, Oops
> >
> > There is a small window between blk_update_request() and
> > scsi_mq_uninit_cmd() that scsi disk may have been released. This will
> > cause a oops like below:
> >
> > Unable to handle kernel NULL pointer dereference at virtual address
> > 0000000000000000
> > s/sync.c:67, func=xfer, error=In[11347.116050] Mem abort info:
> > put/output error
> > [11347.121598] ESR = 0x96000006
> > [11347.126200] Exception class = DABT (current EL), IL = 32 bits
> > [11347.132117] SET = 0, FnV = 0
> > [11347.135170] EA = 0, S1PTW = 0
> > [11347.138308] Data abort info:
> > [11347.141186] ISV = 0, ISS = 0x00000006
> > [11347.145019] CM = 0, WnR = 0
> > [11347.147977] user pgtable: 4k pages, 48-bit VAs, pgdp =
> > 00000000a67aece2
> > [11347.154591] [0000000000000000] pgd=0000002f90774003,
> > pud=0000002fab098003, pmd=0000000000000000
> > [11347.163304] Internal error: Oops: 96000006 [#1] PREEMPT SMP
> > [11347.168870] Modules linked in: hisi_sas_v3_hw hisi_sas_main libsas
> > [11347.175044] CPU: 56 PID: 4294 Comm: scsi_eh_2 Not tainted
> > 4.19.0-g8052059-dirty #2
> > [11347.182600] Hardware name: Huawei D06/D06, BIOS Hisilicon D06 UEFI
> > RC0 - B601 (V6.01) 11/08/2018
> > [11347.191370] pstate: a0c00009 (NzCv daif 㰃繐ε흾㯗
>
> Please verify whether the following patch is a valid alternative for your patch:
>
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index ed34bfbc3844..745ffdda1bc1 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -1408,6 +1408,7 @@ static void sd_release(struct gendisk *disk, fmode_t mode)
> {
> struct scsi_disk *sdkp = scsi_disk(disk);
> struct scsi_device *sdev = sdkp->device;
> + struct request_queue *q = sdkp->disk->queue;
>
> SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n"));
>
> @@ -1417,9 +1418,12 @@ static void sd_release(struct gendisk *disk, fmode_t mode)
> }
>
> /*
> - * XXX and what if there are packets in flight and this close()
> - * XXX is followed by a "rmmod sd_mod"?
> + * Wait until any requests that are in progress have completed.
> + * This is necessary to avoid that e.g. scsi_end_request() crashes
> + * due to scsi_disk_relase() clearing the disk->private_data pointer.
> */
> + blk_mq_freeze_queue(q);
> + blk_mq_unfreeze_queue(q);
It is over-kill to drain any requests here, what we want is to just
drain any in-flight
IO requests.
Thanks,
Ming Lei
On Fri, Mar 22, 2019 at 9:36 AM Ming Lei <[email protected]> wrote:
>
> On Fri, Mar 22, 2019 at 2:39 AM Bart Van Assche <[email protected]> wrote:
> >
> > On Sat, 2019-03-16 at 10:09 +0800, Jason Yan wrote:
> > > If we remove the scsi disk when running io with fio, oops occured with
> > > the following condition.
> > >
> > > [scsi_eh_0] [fio]
> > > scsi_end_request
> > > ->blk_update_request
> > > ->end_bio(io returned to userspace)
> > > close
> > > ->sd_release
> > > ->scsi_disk_put
> > > ->scsi_disk_release
> > > ->disk->private_data = NULL;
> > >
> > > ->scsi_mq_uninit_cmd
> > > ->scsi_uninit_cmd
> > > ->scsi_cmd_to_driver
> > > ->drv is NULL, Oops
> > >
> > > There is a small window between blk_update_request() and
> > > scsi_mq_uninit_cmd() that scsi disk may have been released. This will
> > > cause a oops like below:
> > >
> > > Unable to handle kernel NULL pointer dereference at virtual address
> > > 0000000000000000
> > > s/sync.c:67, func=xfer, error=In[11347.116050] Mem abort info:
> > > put/output error
> > > [11347.121598] ESR = 0x96000006
> > > [11347.126200] Exception class = DABT (current EL), IL = 32 bits
> > > [11347.132117] SET = 0, FnV = 0
> > > [11347.135170] EA = 0, S1PTW = 0
> > > [11347.138308] Data abort info:
> > > [11347.141186] ISV = 0, ISS = 0x00000006
> > > [11347.145019] CM = 0, WnR = 0
> > > [11347.147977] user pgtable: 4k pages, 48-bit VAs, pgdp =
> > > 00000000a67aece2
> > > [11347.154591] [0000000000000000] pgd=0000002f90774003,
> > > pud=0000002fab098003, pmd=0000000000000000
> > > [11347.163304] Internal error: Oops: 96000006 [#1] PREEMPT SMP
> > > [11347.168870] Modules linked in: hisi_sas_v3_hw hisi_sas_main libsas
> > > [11347.175044] CPU: 56 PID: 4294 Comm: scsi_eh_2 Not tainted
> > > 4.19.0-g8052059-dirty #2
> > > [11347.182600] Hardware name: Huawei D06/D06, BIOS Hisilicon D06 UEFI
> > > RC0 - B601 (V6.01) 11/08/2018
> > > [11347.191370] pstate: a0c00009 (NzCv daif 㰃繐ε흾㯗
> >
> > Please verify whether the following patch is a valid alternative for your patch:
> >
> > diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> > index ed34bfbc3844..745ffdda1bc1 100644
> > --- a/drivers/scsi/sd.c
> > +++ b/drivers/scsi/sd.c
> > @@ -1408,6 +1408,7 @@ static void sd_release(struct gendisk *disk, fmode_t mode)
> > {
> > struct scsi_disk *sdkp = scsi_disk(disk);
> > struct scsi_device *sdev = sdkp->device;
> > + struct request_queue *q = sdkp->disk->queue;
> >
> > SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n"));
> >
> > @@ -1417,9 +1418,12 @@ static void sd_release(struct gendisk *disk, fmode_t mode)
> > }
> >
> > /*
> > - * XXX and what if there are packets in flight and this close()
> > - * XXX is followed by a "rmmod sd_mod"?
> > + * Wait until any requests that are in progress have completed.
> > + * This is necessary to avoid that e.g. scsi_end_request() crashes
> > + * due to scsi_disk_relase() clearing the disk->private_data pointer.
> > */
> > + blk_mq_freeze_queue(q);
> > + blk_mq_unfreeze_queue(q);
>
> It is over-kill to drain any requests here, what we want is to just
> drain any in-flight
> IO requests.
Not only over-kill, actually it can cause big performance issue, since any
block/scsi utility may trigger the freeze/unfreeze.
Thanks,
Ming Lei