hctx->user_data is set to vq in virtblk_init_hctx(). However, vq is
freed on suspend and reallocated on resume. So, hctx->user_data is
invalid after resume, and it will cause use-after-free accessing which
will result in the kernel crash something like below:
[ 22.428391] Call Trace:
[ 22.428899] <TASK>
[ 22.429339] virtqueue_add_split+0x3eb/0x620
[ 22.430035] ? __blk_mq_alloc_requests+0x17f/0x2d0
[ 22.430789] ? kvm_clock_get_cycles+0x14/0x30
[ 22.431496] virtqueue_add_sgs+0xad/0xd0
[ 22.432108] virtblk_add_req+0xe8/0x150
[ 22.432692] virtio_queue_rqs+0xeb/0x210
[ 22.433330] blk_mq_flush_plug_list+0x1b8/0x280
[ 22.434059] __blk_flush_plug+0xe1/0x140
[ 22.434853] blk_finish_plug+0x20/0x40
[ 22.435512] read_pages+0x20a/0x2e0
[ 22.436063] ? folio_add_lru+0x62/0xa0
[ 22.436652] page_cache_ra_unbounded+0x112/0x160
[ 22.437365] filemap_get_pages+0xe1/0x5b0
[ 22.437964] ? context_to_sid+0x70/0x100
[ 22.438580] ? sidtab_context_to_sid+0x32/0x400
[ 22.439979] filemap_read+0xcd/0x3d0
[ 22.440917] xfs_file_buffered_read+0x4a/0xc0
[ 22.441984] xfs_file_read_iter+0x65/0xd0
[ 22.442970] __kernel_read+0x160/0x2e0
[ 22.443921] bprm_execve+0x21b/0x640
[ 22.444809] do_execveat_common.isra.0+0x1a8/0x220
[ 22.446008] __x64_sys_execve+0x2d/0x40
[ 22.446920] do_syscall_64+0x37/0x90
[ 22.447773] entry_SYSCALL_64_after_hwframe+0x63/0xcd
This patch fixes this issue by getting vq from vblk, and removes
virtblk_init_hctx().
Fixes: 4e0400525691 ("virtio-blk: support polling I/O")
Cc: "Suwan Kim" <[email protected]>
Signed-off-by: Shigeru Yoshida <[email protected]>
---
drivers/block/virtio_blk.c | 24 ++++++++++--------------
1 file changed, 10 insertions(+), 14 deletions(-)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6fc7850c2b0a..d756423e0059 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -101,6 +101,14 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
}
}
+static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
+{
+ struct virtio_blk *vblk = hctx->queue->queuedata;
+ struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
+
+ return vq;
+}
+
static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
{
struct scatterlist hdr, status, *sgs[3];
@@ -416,7 +424,7 @@ static void virtio_queue_rqs(struct request **rqlist)
struct request *requeue_list = NULL;
rq_list_for_each_safe(rqlist, req, next) {
- struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
+ struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
bool kick;
if (!virtblk_prep_rq_batch(req)) {
@@ -837,7 +845,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
struct virtio_blk *vblk = hctx->queue->queuedata;
- struct virtio_blk_vq *vq = hctx->driver_data;
+ struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
struct virtblk_req *vbr;
unsigned long flags;
unsigned int len;
@@ -862,22 +870,10 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
return found;
}
-static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
- unsigned int hctx_idx)
-{
- struct virtio_blk *vblk = data;
- struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
-
- WARN_ON(vblk->tag_set.tags[hctx_idx] != hctx->tags);
- hctx->driver_data = vq;
- return 0;
-}
-
static const struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq,
.queue_rqs = virtio_queue_rqs,
.commit_rqs = virtio_commit_rqs,
- .init_hctx = virtblk_init_hctx,
.complete = virtblk_request_done,
.map_queues = virtblk_map_queues,
.poll = virtblk_poll,
--
2.37.1
On Thu, Aug 11, 2022 at 1:10 AM Shigeru Yoshida <[email protected]> wrote:
>
> hctx->user_data is set to vq in virtblk_init_hctx(). However, vq is
> freed on suspend and reallocated on resume. So, hctx->user_data is
> invalid after resume, and it will cause use-after-free accessing which
> will result in the kernel crash something like below:
>
> [ 22.428391] Call Trace:
> [ 22.428899] <TASK>
> [ 22.429339] virtqueue_add_split+0x3eb/0x620
> [ 22.430035] ? __blk_mq_alloc_requests+0x17f/0x2d0
> [ 22.430789] ? kvm_clock_get_cycles+0x14/0x30
> [ 22.431496] virtqueue_add_sgs+0xad/0xd0
> [ 22.432108] virtblk_add_req+0xe8/0x150
> [ 22.432692] virtio_queue_rqs+0xeb/0x210
> [ 22.433330] blk_mq_flush_plug_list+0x1b8/0x280
> [ 22.434059] __blk_flush_plug+0xe1/0x140
> [ 22.434853] blk_finish_plug+0x20/0x40
> [ 22.435512] read_pages+0x20a/0x2e0
> [ 22.436063] ? folio_add_lru+0x62/0xa0
> [ 22.436652] page_cache_ra_unbounded+0x112/0x160
> [ 22.437365] filemap_get_pages+0xe1/0x5b0
> [ 22.437964] ? context_to_sid+0x70/0x100
> [ 22.438580] ? sidtab_context_to_sid+0x32/0x400
> [ 22.439979] filemap_read+0xcd/0x3d0
> [ 22.440917] xfs_file_buffered_read+0x4a/0xc0
> [ 22.441984] xfs_file_read_iter+0x65/0xd0
> [ 22.442970] __kernel_read+0x160/0x2e0
> [ 22.443921] bprm_execve+0x21b/0x640
> [ 22.444809] do_execveat_common.isra.0+0x1a8/0x220
> [ 22.446008] __x64_sys_execve+0x2d/0x40
> [ 22.446920] do_syscall_64+0x37/0x90
> [ 22.447773] entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> This patch fixes this issue by getting vq from vblk, and removes
> virtblk_init_hctx().
>
> Fixes: 4e0400525691 ("virtio-blk: support polling I/O")
> Cc: "Suwan Kim" <[email protected]>
> Signed-off-by: Shigeru Yoshida <[email protected]>
> ---
> drivers/block/virtio_blk.c | 24 ++++++++++--------------
> 1 file changed, 10 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 6fc7850c2b0a..d756423e0059 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -101,6 +101,14 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
> }
> }
>
> +static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
> +{
> + struct virtio_blk *vblk = hctx->queue->queuedata;
> + struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
> +
> + return vq;
> +}
> +
> static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
> {
> struct scatterlist hdr, status, *sgs[3];
> @@ -416,7 +424,7 @@ static void virtio_queue_rqs(struct request **rqlist)
> struct request *requeue_list = NULL;
>
> rq_list_for_each_safe(rqlist, req, next) {
> - struct virtio_blk_vq *vq = req->mq_hctx->driver_data;
> + struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
> bool kick;
>
> if (!virtblk_prep_rq_batch(req)) {
> @@ -837,7 +845,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
> static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
> {
> struct virtio_blk *vblk = hctx->queue->queuedata;
> - struct virtio_blk_vq *vq = hctx->driver_data;
> + struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
> struct virtblk_req *vbr;
> unsigned long flags;
> unsigned int len;
> @@ -862,22 +870,10 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
> return found;
> }
>
> -static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
> - unsigned int hctx_idx)
> -{
> - struct virtio_blk *vblk = data;
> - struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
> -
> - WARN_ON(vblk->tag_set.tags[hctx_idx] != hctx->tags);
> - hctx->driver_data = vq;
> - return 0;
> -}
> -
> static const struct blk_mq_ops virtio_mq_ops = {
> .queue_rq = virtio_queue_rq,
> .queue_rqs = virtio_queue_rqs,
> .commit_rqs = virtio_commit_rqs,
> - .init_hctx = virtblk_init_hctx,
> .complete = virtblk_request_done,
> .map_queues = virtblk_map_queues,
> .poll = virtblk_poll,
> --
> 2.37.1
>
Hi Shigeru,
It works well on my machine.
I missed the reallocation of vqs on freeze/restore situations.
Thanks for debugging this issue. Michael already committed it.
Regards,
Suwan Kim
On Thu, Aug 11, 2022 at 01:09:48AM +0900, Shigeru Yoshida wrote:
> hctx->user_data is set to vq in virtblk_init_hctx(). However, vq is
> freed on suspend and reallocated on resume. So, hctx->user_data is
> invalid after resume, and it will cause use-after-free accessing which
> will result in the kernel crash something like below:
>
> [ 22.428391] Call Trace:
> [ 22.428899] <TASK>
> [ 22.429339] virtqueue_add_split+0x3eb/0x620
> [ 22.430035] ? __blk_mq_alloc_requests+0x17f/0x2d0
> [ 22.430789] ? kvm_clock_get_cycles+0x14/0x30
> [ 22.431496] virtqueue_add_sgs+0xad/0xd0
> [ 22.432108] virtblk_add_req+0xe8/0x150
> [ 22.432692] virtio_queue_rqs+0xeb/0x210
> [ 22.433330] blk_mq_flush_plug_list+0x1b8/0x280
> [ 22.434059] __blk_flush_plug+0xe1/0x140
> [ 22.434853] blk_finish_plug+0x20/0x40
> [ 22.435512] read_pages+0x20a/0x2e0
> [ 22.436063] ? folio_add_lru+0x62/0xa0
> [ 22.436652] page_cache_ra_unbounded+0x112/0x160
> [ 22.437365] filemap_get_pages+0xe1/0x5b0
> [ 22.437964] ? context_to_sid+0x70/0x100
> [ 22.438580] ? sidtab_context_to_sid+0x32/0x400
> [ 22.439979] filemap_read+0xcd/0x3d0
> [ 22.440917] xfs_file_buffered_read+0x4a/0xc0
> [ 22.441984] xfs_file_read_iter+0x65/0xd0
> [ 22.442970] __kernel_read+0x160/0x2e0
> [ 22.443921] bprm_execve+0x21b/0x640
> [ 22.444809] do_execveat_common.isra.0+0x1a8/0x220
> [ 22.446008] __x64_sys_execve+0x2d/0x40
> [ 22.446920] do_syscall_64+0x37/0x90
> [ 22.447773] entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> This patch fixes this issue by getting vq from vblk, and removes
> virtblk_init_hctx().
>
> Fixes: 4e0400525691 ("virtio-blk: support polling I/O")
> Cc: "Suwan Kim" <[email protected]>
> Signed-off-by: Shigeru Yoshida <[email protected]>
> ---
> drivers/block/virtio_blk.c | 24 ++++++++++--------------
> 1 file changed, 10 insertions(+), 14 deletions(-)
Reviewed-by: Stefan Hajnoczi <[email protected]>
On 8/10/22 10:09 AM, Shigeru Yoshida wrote:
> +static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
> +{
> + struct virtio_blk *vblk = hctx->queue->queuedata;
> + struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
> +
> + return vq;
> +}
Would probably be cleaner as:
static struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
{
struct virtio_blk *vblk = hctx->queue->queuedata;
return &vblk->vqs[hctx->queue_num];
}
dropping the 'vq' variable, and also dropping the inline as the compiler
will work that out anyway.
Apart from that minor nit, looks good to me.
--
Jens Axboe