2013-10-25 13:07:42

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 2/2] virtio_blk: blk-mq support

Switch virtio-blk from the dual support for old-style requests and bios
to use the block-multiqueue. For now pretend to have 4 issue queues
as Jens pulled that out of his this hair and it worked.

Signed-off-by: Jens Axboe <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/block/virtio_blk.c | 312 +++++++++-----------------------------------
1 file changed, 65 insertions(+), 247 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5cdf88b..87f099d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -11,12 +11,11 @@
#include <linux/string_helpers.h>
#include <scsi/scsi_cmnd.h>
#include <linux/idr.h>
+#include <linux/blk-mq.h>
+#include <linux/numa.h>

#define PART_BITS 4

-static bool use_bio;
-module_param(use_bio, bool, S_IRUGO);
-
static int major;
static DEFINE_IDA(vd_index_ida);

@@ -26,13 +25,11 @@ struct virtio_blk
{
struct virtio_device *vdev;
struct virtqueue *vq;
- wait_queue_head_t queue_wait;
+ spinlock_t vq_lock;

/* The disk structure for the kernel. */
struct gendisk *disk;

- mempool_t *pool;
-
/* Process context for config space updates */
struct work_struct config_work;

@@ -47,15 +44,11 @@ struct virtio_blk

/* Ida index - used to track minor number allocations. */
int index;
-
- /* Scatterlist: can be too big for stack. */
- struct scatterlist sg[/*sg_elems*/];
};

struct virtblk_req
{
struct request *req;
- struct bio *bio;
struct virtio_blk_outhdr out_hdr;
struct virtio_scsi_inhdr in_hdr;
struct work_struct work;
@@ -84,22 +77,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
}
}

-static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
- gfp_t gfp_mask)
-{
- struct virtblk_req *vbr;
-
- vbr = mempool_alloc(vblk->pool, gfp_mask);
- if (!vbr)
- return NULL;
-
- vbr->vblk = vblk;
- if (use_bio)
- sg_init_table(vbr->sg, vblk->sg_elems);
-
- return vbr;
-}
-
static int __virtblk_add_req(struct virtqueue *vq,
struct virtblk_req *vbr,
struct scatterlist *data_sg,
@@ -143,83 +120,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
}

-static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
-{
- struct virtio_blk *vblk = vbr->vblk;
- DEFINE_WAIT(wait);
- int ret;
-
- spin_lock_irq(vblk->disk->queue->queue_lock);
- while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
- have_data)) < 0)) {
- prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
- TASK_UNINTERRUPTIBLE);
-
- spin_unlock_irq(vblk->disk->queue->queue_lock);
- io_schedule();
- spin_lock_irq(vblk->disk->queue->queue_lock);
-
- finish_wait(&vblk->queue_wait, &wait);
- }
-
- virtqueue_kick(vblk->vq);
- spin_unlock_irq(vblk->disk->queue->queue_lock);
-}
-
-static void virtblk_bio_send_flush(struct virtblk_req *vbr)
-{
- vbr->flags |= VBLK_IS_FLUSH;
- vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
- vbr->out_hdr.sector = 0;
- vbr->out_hdr.ioprio = 0;
-
- virtblk_add_req(vbr, false);
-}
-
-static void virtblk_bio_send_data(struct virtblk_req *vbr)
-{
- struct virtio_blk *vblk = vbr->vblk;
- struct bio *bio = vbr->bio;
- bool have_data;
-
- vbr->flags &= ~VBLK_IS_FLUSH;
- vbr->out_hdr.type = 0;
- vbr->out_hdr.sector = bio->bi_sector;
- vbr->out_hdr.ioprio = bio_prio(bio);
-
- if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
- have_data = true;
- if (bio->bi_rw & REQ_WRITE)
- vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
- else
- vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
- } else
- have_data = false;
-
- virtblk_add_req(vbr, have_data);
-}
-
-static void virtblk_bio_send_data_work(struct work_struct *work)
-{
- struct virtblk_req *vbr;
-
- vbr = container_of(work, struct virtblk_req, work);
-
- virtblk_bio_send_data(vbr);
-}
-
-static void virtblk_bio_send_flush_work(struct work_struct *work)
-{
- struct virtblk_req *vbr;
-
- vbr = container_of(work, struct virtblk_req, work);
-
- virtblk_bio_send_flush(vbr);
-}
-
static inline void virtblk_request_done(struct virtblk_req *vbr)
{
- struct virtio_blk *vblk = vbr->vblk;
struct request *req = vbr->req;
int error = virtblk_result(vbr);

@@ -231,90 +133,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
req->errors = (error != 0);
}

- __blk_end_request_all(req, error);
- mempool_free(vbr, vblk->pool);
-}
-
-static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
-{
- struct virtio_blk *vblk = vbr->vblk;
-
- if (vbr->flags & VBLK_REQ_DATA) {
- /* Send out the actual write data */
- INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
- queue_work(virtblk_wq, &vbr->work);
- } else {
- bio_endio(vbr->bio, virtblk_result(vbr));
- mempool_free(vbr, vblk->pool);
- }
-}
-
-static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
-{
- struct virtio_blk *vblk = vbr->vblk;
-
- if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
- /* Send out a flush before end the bio */
- vbr->flags &= ~VBLK_REQ_DATA;
- INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
- queue_work(virtblk_wq, &vbr->work);
- } else {
- bio_endio(vbr->bio, virtblk_result(vbr));
- mempool_free(vbr, vblk->pool);
- }
-}
-
-static inline void virtblk_bio_done(struct virtblk_req *vbr)
-{
- if (unlikely(vbr->flags & VBLK_IS_FLUSH))
- virtblk_bio_flush_done(vbr);
- else
- virtblk_bio_data_done(vbr);
+ blk_mq_end_io(req, error);
}

static void virtblk_done(struct virtqueue *vq)
{
struct virtio_blk *vblk = vq->vdev->priv;
- bool bio_done = false, req_done = false;
+ bool req_done = false;
struct virtblk_req *vbr;
unsigned long flags;
unsigned int len;

- spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
+ spin_lock_irqsave(&vblk->vq_lock, flags);
do {
virtqueue_disable_cb(vq);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
- if (vbr->bio) {
- virtblk_bio_done(vbr);
- bio_done = true;
- } else {
- virtblk_request_done(vbr);
- req_done = true;
- }
+ virtblk_request_done(vbr);
+ req_done = true;
}
} while (!virtqueue_enable_cb(vq));
+ spin_unlock_irqrestore(&vblk->vq_lock, flags);
+
/* In case queue is stopped waiting for more buffers. */
if (req_done)
- blk_start_queue(vblk->disk->queue);
- spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
-
- if (bio_done)
- wake_up(&vblk->queue_wait);
+ blk_mq_start_stopped_hw_queues(vblk->disk->queue);
}

-static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
- struct request *req)
+static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
{
+ struct virtio_blk *vblk = hctx->queue->queuedata;
+ struct virtblk_req *vbr = req->special;
+ unsigned long flags;
unsigned int num;
- struct virtblk_req *vbr;
+ const bool last = (req->cmd_flags & REQ_END) != 0;

- vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
- if (!vbr)
- /* When another request finishes we'll try again. */
- return false;
+ BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);

vbr->req = req;
- vbr->bio = NULL;
if (req->cmd_flags & REQ_FLUSH) {
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
vbr->out_hdr.sector = 0;
@@ -342,7 +197,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
}
}

- num = blk_rq_map_sg(q, vbr->req, vblk->sg);
+ num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
if (num) {
if (rq_data_dir(vbr->req) == WRITE)
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
@@ -350,63 +205,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
}

- if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) {
- mempool_free(vbr, vblk->pool);
- return false;
- }
-
- return true;
-}
-
-static void virtblk_request(struct request_queue *q)
-{
- struct virtio_blk *vblk = q->queuedata;
- struct request *req;
- unsigned int issued = 0;
-
- while ((req = blk_peek_request(q)) != NULL) {
- BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
-
- /* If this request fails, stop queue and wait for something to
- finish to restart it. */
- if (!do_req(q, vblk, req)) {
- blk_stop_queue(q);
- break;
- }
- blk_start_request(req);
- issued++;
- }
-
- if (issued)
+ spin_lock_irqsave(&vblk->vq_lock, flags);
+ if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
+ spin_unlock_irqrestore(&vblk->vq_lock, flags);
+ blk_mq_stop_hw_queue(hctx);
virtqueue_kick(vblk->vq);
-}
-
-static void virtblk_make_request(struct request_queue *q, struct bio *bio)
-{
- struct virtio_blk *vblk = q->queuedata;
- struct virtblk_req *vbr;
-
- BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
-
- vbr = virtblk_alloc_req(vblk, GFP_NOIO);
- if (!vbr) {
- bio_endio(bio, -ENOMEM);
- return;
+ return BLK_MQ_RQ_QUEUE_BUSY;
}
+ spin_unlock_irqrestore(&vblk->vq_lock, flags);

- vbr->bio = bio;
- vbr->flags = 0;
- if (bio->bi_rw & REQ_FLUSH)
- vbr->flags |= VBLK_REQ_FLUSH;
- if (bio->bi_rw & REQ_FUA)
- vbr->flags |= VBLK_REQ_FUA;
- if (bio->bi_size)
- vbr->flags |= VBLK_REQ_DATA;
-
- if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
- virtblk_bio_send_flush(vbr);
- else
- virtblk_bio_send_data(vbr);
+ if (last)
+ virtqueue_kick(vblk->vq);
+ return BLK_MQ_RQ_QUEUE_OK;
}

/* return id (s/n) string for *disk to *id_str
@@ -680,12 +490,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
__ATTR(cache_type, S_IRUGO|S_IWUSR,
virtblk_cache_type_show, virtblk_cache_type_store);

+static struct blk_mq_ops virtio_mq_ops = {
+ .queue_rq = virtio_queue_rq,
+ .map_queue = blk_mq_map_queue,
+ .alloc_hctx = blk_mq_alloc_single_hw_queue,
+ .free_hctx = blk_mq_free_single_hw_queue,
+};
+
+static struct blk_mq_reg virtio_mq_reg = {
+ .ops = &virtio_mq_ops,
+ .nr_hw_queues = 4,
+ .queue_depth = 64,
+ .numa_node = NUMA_NO_NODE,
+ .flags = BLK_MQ_F_SHOULD_MERGE,
+};
+
+static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
+ struct request *rq, unsigned int nr)
+{
+ struct virtio_blk *vblk = data;
+ struct virtblk_req *vbr = rq->special;
+
+ sg_init_table(vbr->sg, vblk->sg_elems);
+}
+
static int virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
struct request_queue *q;
int err, index;
- int pool_size;

u64 cap;
u32 v, blk_size, sg_elems, opt_io_size;
@@ -709,17 +542,14 @@ static int virtblk_probe(struct virtio_device *vdev)

/* We need an extra sg elements at head and tail. */
sg_elems += 2;
- vdev->priv = vblk = kmalloc(sizeof(*vblk) +
- sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
+ vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
if (!vblk) {
err = -ENOMEM;
goto out_free_index;
}

- init_waitqueue_head(&vblk->queue_wait);
vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
- sg_init_table(vblk->sg, vblk->sg_elems);
mutex_init(&vblk->config_lock);

INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
@@ -728,31 +558,27 @@ static int virtblk_probe(struct virtio_device *vdev)
err = init_vq(vblk);
if (err)
goto out_free_vblk;
-
- pool_size = sizeof(struct virtblk_req);
- if (use_bio)
- pool_size += sizeof(struct scatterlist) * sg_elems;
- vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
- if (!vblk->pool) {
- err = -ENOMEM;
- goto out_free_vq;
- }
+ spin_lock_init(&vblk->vq_lock);

/* FIXME: How many partitions? How long is a piece of string? */
vblk->disk = alloc_disk(1 << PART_BITS);
if (!vblk->disk) {
err = -ENOMEM;
- goto out_mempool;
+ goto out_free_vq;
}

- q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
+ virtio_mq_reg.cmd_size =
+ sizeof(struct virtblk_req) +
+ sizeof(struct scatterlist) * sg_elems;
+
+ q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
if (!q) {
err = -ENOMEM;
goto out_put_disk;
}

- if (use_bio)
- blk_queue_make_request(q, virtblk_make_request);
+ blk_mq_init_commands(q, virtblk_init_vbr, vblk);
+
q->queuedata = vblk;

virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
@@ -857,8 +683,6 @@ out_del_disk:
blk_cleanup_queue(vblk->disk->queue);
out_put_disk:
put_disk(vblk->disk);
-out_mempool:
- mempool_destroy(vblk->pool);
out_free_vq:
vdev->config->del_vqs(vdev);
out_free_vblk:
@@ -890,7 +714,6 @@ static void virtblk_remove(struct virtio_device *vdev)

refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
put_disk(vblk->disk);
- mempool_destroy(vblk->pool);
vdev->config->del_vqs(vdev);
kfree(vblk);

@@ -914,10 +737,7 @@ static int virtblk_freeze(struct virtio_device *vdev)

flush_work(&vblk->config_work);

- spin_lock_irq(vblk->disk->queue->queue_lock);
- blk_stop_queue(vblk->disk->queue);
- spin_unlock_irq(vblk->disk->queue->queue_lock);
- blk_sync_queue(vblk->disk->queue);
+ blk_mq_stop_hw_queues(vblk->disk->queue);

vdev->config->del_vqs(vdev);
return 0;
@@ -930,11 +750,9 @@ static int virtblk_restore(struct virtio_device *vdev)

vblk->config_enable = true;
ret = init_vq(vdev->priv);
- if (!ret) {
- spin_lock_irq(vblk->disk->queue->queue_lock);
- blk_start_queue(vblk->disk->queue);
- spin_unlock_irq(vblk->disk->queue->queue_lock);
- }
+ if (!ret)
+ blk_mq_start_stopped_hw_queues(vblk->disk->queue);
+
return ret;
}
#endif
--
1.7.10.4


2013-10-25 13:51:58

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 2/2] virtio_blk: blk-mq support

On Fri, Oct 25 2013, Christoph Hellwig wrote:
> Switch virtio-blk from the dual support for old-style requests and bios
> to use the block-multiqueue. For now pretend to have 4 issue queues
> as Jens pulled that out of his this hair and it worked.

Updated the blk-mq/drivers to carry this version.

--
Jens Axboe

2013-10-25 16:06:57

by Asias He

[permalink] [raw]
Subject: Re: [PATCH 2/2] virtio_blk: blk-mq support

On Fri, Oct 25, 2013 at 06:05:01AM -0700, Christoph Hellwig wrote:
> Switch virtio-blk from the dual support for old-style requests and bios
> to use the block-multiqueue. For now pretend to have 4 issue queues
> as Jens pulled that out of his this hair and it worked.
>
> Signed-off-by: Jens Axboe <[email protected]>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> drivers/block/virtio_blk.c | 312 +++++++++-----------------------------------
> 1 file changed, 65 insertions(+), 247 deletions(-)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 5cdf88b..87f099d 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -11,12 +11,11 @@
> #include <linux/string_helpers.h>
> #include <scsi/scsi_cmnd.h>
> #include <linux/idr.h>
> +#include <linux/blk-mq.h>
> +#include <linux/numa.h>
>
> #define PART_BITS 4
>
> -static bool use_bio;
> -module_param(use_bio, bool, S_IRUGO);
> -
> static int major;
> static DEFINE_IDA(vd_index_ida);
>
> @@ -26,13 +25,11 @@ struct virtio_blk
> {
> struct virtio_device *vdev;
> struct virtqueue *vq;
> - wait_queue_head_t queue_wait;
> + spinlock_t vq_lock;
>
> /* The disk structure for the kernel. */
> struct gendisk *disk;
>
> - mempool_t *pool;
> -
> /* Process context for config space updates */
> struct work_struct config_work;
>
> @@ -47,15 +44,11 @@ struct virtio_blk
>
> /* Ida index - used to track minor number allocations. */
> int index;
> -
> - /* Scatterlist: can be too big for stack. */
> - struct scatterlist sg[/*sg_elems*/];
> };
>
> struct virtblk_req
> {
> struct request *req;
> - struct bio *bio;
> struct virtio_blk_outhdr out_hdr;
> struct virtio_scsi_inhdr in_hdr;
> struct work_struct work;

We can drop more things:

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 87f099d..c5ca368 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -51,20 +51,10 @@ struct virtblk_req
struct request *req;
struct virtio_blk_outhdr out_hdr;
struct virtio_scsi_inhdr in_hdr;
- struct work_struct work;
- struct virtio_blk *vblk;
- int flags;
u8 status;
struct scatterlist sg[];
};

-enum {
- VBLK_IS_FLUSH = 1,
- VBLK_REQ_FLUSH = 2,
- VBLK_REQ_DATA = 4,
- VBLK_REQ_FUA = 8,
-};
-
static inline int virtblk_result(struct virtblk_req *vbr)
{
switch (vbr->status) {

> @@ -84,22 +77,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
> }
> }
>
> -static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
> - gfp_t gfp_mask)
> -{
> - struct virtblk_req *vbr;
> -
> - vbr = mempool_alloc(vblk->pool, gfp_mask);
> - if (!vbr)
> - return NULL;
> -
> - vbr->vblk = vblk;
> - if (use_bio)
> - sg_init_table(vbr->sg, vblk->sg_elems);
> -
> - return vbr;
> -}
> -
> static int __virtblk_add_req(struct virtqueue *vq,
> struct virtblk_req *vbr,
> struct scatterlist *data_sg,
> @@ -143,83 +120,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
> return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
> }
>
> -static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
> -{
> - struct virtio_blk *vblk = vbr->vblk;
> - DEFINE_WAIT(wait);
> - int ret;
> -
> - spin_lock_irq(vblk->disk->queue->queue_lock);
> - while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
> - have_data)) < 0)) {
> - prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
> - TASK_UNINTERRUPTIBLE);
> -
> - spin_unlock_irq(vblk->disk->queue->queue_lock);
> - io_schedule();
> - spin_lock_irq(vblk->disk->queue->queue_lock);
> -
> - finish_wait(&vblk->queue_wait, &wait);
> - }
> -
> - virtqueue_kick(vblk->vq);
> - spin_unlock_irq(vblk->disk->queue->queue_lock);
> -}
> -
> -static void virtblk_bio_send_flush(struct virtblk_req *vbr)
> -{
> - vbr->flags |= VBLK_IS_FLUSH;
> - vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
> - vbr->out_hdr.sector = 0;
> - vbr->out_hdr.ioprio = 0;
> -
> - virtblk_add_req(vbr, false);
> -}
> -
> -static void virtblk_bio_send_data(struct virtblk_req *vbr)
> -{
> - struct virtio_blk *vblk = vbr->vblk;
> - struct bio *bio = vbr->bio;
> - bool have_data;
> -
> - vbr->flags &= ~VBLK_IS_FLUSH;
> - vbr->out_hdr.type = 0;
> - vbr->out_hdr.sector = bio->bi_sector;
> - vbr->out_hdr.ioprio = bio_prio(bio);
> -
> - if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
> - have_data = true;
> - if (bio->bi_rw & REQ_WRITE)
> - vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
> - else
> - vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
> - } else
> - have_data = false;
> -
> - virtblk_add_req(vbr, have_data);
> -}
> -
> -static void virtblk_bio_send_data_work(struct work_struct *work)
> -{
> - struct virtblk_req *vbr;
> -
> - vbr = container_of(work, struct virtblk_req, work);
> -
> - virtblk_bio_send_data(vbr);
> -}
> -
> -static void virtblk_bio_send_flush_work(struct work_struct *work)
> -{
> - struct virtblk_req *vbr;
> -
> - vbr = container_of(work, struct virtblk_req, work);
> -
> - virtblk_bio_send_flush(vbr);
> -}
> -
> static inline void virtblk_request_done(struct virtblk_req *vbr)
> {
> - struct virtio_blk *vblk = vbr->vblk;
> struct request *req = vbr->req;
> int error = virtblk_result(vbr);
>
> @@ -231,90 +133,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
> req->errors = (error != 0);
> }
>
> - __blk_end_request_all(req, error);
> - mempool_free(vbr, vblk->pool);
> -}
> -
> -static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
> -{
> - struct virtio_blk *vblk = vbr->vblk;
> -
> - if (vbr->flags & VBLK_REQ_DATA) {
> - /* Send out the actual write data */
> - INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
> - queue_work(virtblk_wq, &vbr->work);
> - } else {
> - bio_endio(vbr->bio, virtblk_result(vbr));
> - mempool_free(vbr, vblk->pool);
> - }
> -}
> -
> -static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
> -{
> - struct virtio_blk *vblk = vbr->vblk;
> -
> - if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
> - /* Send out a flush before end the bio */
> - vbr->flags &= ~VBLK_REQ_DATA;
> - INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
> - queue_work(virtblk_wq, &vbr->work);
> - } else {
> - bio_endio(vbr->bio, virtblk_result(vbr));
> - mempool_free(vbr, vblk->pool);
> - }
> -}
> -
> -static inline void virtblk_bio_done(struct virtblk_req *vbr)
> -{
> - if (unlikely(vbr->flags & VBLK_IS_FLUSH))
> - virtblk_bio_flush_done(vbr);
> - else
> - virtblk_bio_data_done(vbr);
> + blk_mq_end_io(req, error);
> }
>
> static void virtblk_done(struct virtqueue *vq)
> {
> struct virtio_blk *vblk = vq->vdev->priv;
> - bool bio_done = false, req_done = false;
> + bool req_done = false;
> struct virtblk_req *vbr;
> unsigned long flags;
> unsigned int len;
>
> - spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
> + spin_lock_irqsave(&vblk->vq_lock, flags);
> do {
> virtqueue_disable_cb(vq);
> while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
> - if (vbr->bio) {
> - virtblk_bio_done(vbr);
> - bio_done = true;
> - } else {
> - virtblk_request_done(vbr);
> - req_done = true;
> - }
> + virtblk_request_done(vbr);
> + req_done = true;
> }
> } while (!virtqueue_enable_cb(vq));
> + spin_unlock_irqrestore(&vblk->vq_lock, flags);
> +
> /* In case queue is stopped waiting for more buffers. */
> if (req_done)
> - blk_start_queue(vblk->disk->queue);
> - spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
> -
> - if (bio_done)
> - wake_up(&vblk->queue_wait);
> + blk_mq_start_stopped_hw_queues(vblk->disk->queue);
> }
>
> -static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
> - struct request *req)
> +static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
> {
> + struct virtio_blk *vblk = hctx->queue->queuedata;
> + struct virtblk_req *vbr = req->special;
> + unsigned long flags;
> unsigned int num;
> - struct virtblk_req *vbr;
> + const bool last = (req->cmd_flags & REQ_END) != 0;
>
> - vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
> - if (!vbr)
> - /* When another request finishes we'll try again. */
> - return false;
> + BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
>
> vbr->req = req;
> - vbr->bio = NULL;
> if (req->cmd_flags & REQ_FLUSH) {
> vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
> vbr->out_hdr.sector = 0;
> @@ -342,7 +197,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
> }
> }
>
> - num = blk_rq_map_sg(q, vbr->req, vblk->sg);
> + num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
> if (num) {
> if (rq_data_dir(vbr->req) == WRITE)
> vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
> @@ -350,63 +205,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
> vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
> }
>
> - if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) {
> - mempool_free(vbr, vblk->pool);
> - return false;
> - }
> -
> - return true;
> -}
> -
> -static void virtblk_request(struct request_queue *q)
> -{
> - struct virtio_blk *vblk = q->queuedata;
> - struct request *req;
> - unsigned int issued = 0;
> -
> - while ((req = blk_peek_request(q)) != NULL) {
> - BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> -
> - /* If this request fails, stop queue and wait for something to
> - finish to restart it. */
> - if (!do_req(q, vblk, req)) {
> - blk_stop_queue(q);
> - break;
> - }
> - blk_start_request(req);
> - issued++;
> - }
> -
> - if (issued)
> + spin_lock_irqsave(&vblk->vq_lock, flags);
> + if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
> + spin_unlock_irqrestore(&vblk->vq_lock, flags);
> + blk_mq_stop_hw_queue(hctx);
> virtqueue_kick(vblk->vq);
> -}
> -
> -static void virtblk_make_request(struct request_queue *q, struct bio *bio)
> -{
> - struct virtio_blk *vblk = q->queuedata;
> - struct virtblk_req *vbr;
> -
> - BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
> -
> - vbr = virtblk_alloc_req(vblk, GFP_NOIO);
> - if (!vbr) {
> - bio_endio(bio, -ENOMEM);
> - return;
> + return BLK_MQ_RQ_QUEUE_BUSY;
> }
> + spin_unlock_irqrestore(&vblk->vq_lock, flags);
>
> - vbr->bio = bio;
> - vbr->flags = 0;
> - if (bio->bi_rw & REQ_FLUSH)
> - vbr->flags |= VBLK_REQ_FLUSH;
> - if (bio->bi_rw & REQ_FUA)
> - vbr->flags |= VBLK_REQ_FUA;
> - if (bio->bi_size)
> - vbr->flags |= VBLK_REQ_DATA;
> -
> - if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
> - virtblk_bio_send_flush(vbr);
> - else
> - virtblk_bio_send_data(vbr);
> + if (last)
> + virtqueue_kick(vblk->vq);
> + return BLK_MQ_RQ_QUEUE_OK;
> }
>
> /* return id (s/n) string for *disk to *id_str
> @@ -680,12 +490,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
> __ATTR(cache_type, S_IRUGO|S_IWUSR,
> virtblk_cache_type_show, virtblk_cache_type_store);
>
> +static struct blk_mq_ops virtio_mq_ops = {
> + .queue_rq = virtio_queue_rq,
> + .map_queue = blk_mq_map_queue,
> + .alloc_hctx = blk_mq_alloc_single_hw_queue,
> + .free_hctx = blk_mq_free_single_hw_queue,
> +};
> +
> +static struct blk_mq_reg virtio_mq_reg = {
> + .ops = &virtio_mq_ops,
> + .nr_hw_queues = 4,
> + .queue_depth = 64,
> + .numa_node = NUMA_NO_NODE,
> + .flags = BLK_MQ_F_SHOULD_MERGE,
> +};
> +
> +static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
> + struct request *rq, unsigned int nr)
> +{
> + struct virtio_blk *vblk = data;
> + struct virtblk_req *vbr = rq->special;
> +
> + sg_init_table(vbr->sg, vblk->sg_elems);
> +}
> +
> static int virtblk_probe(struct virtio_device *vdev)
> {
> struct virtio_blk *vblk;
> struct request_queue *q;
> int err, index;
> - int pool_size;
>
> u64 cap;
> u32 v, blk_size, sg_elems, opt_io_size;
> @@ -709,17 +542,14 @@ static int virtblk_probe(struct virtio_device *vdev)
>
> /* We need an extra sg elements at head and tail. */
> sg_elems += 2;
> - vdev->priv = vblk = kmalloc(sizeof(*vblk) +
> - sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
> + vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
> if (!vblk) {
> err = -ENOMEM;
> goto out_free_index;
> }
>
> - init_waitqueue_head(&vblk->queue_wait);
> vblk->vdev = vdev;
> vblk->sg_elems = sg_elems;
> - sg_init_table(vblk->sg, vblk->sg_elems);
> mutex_init(&vblk->config_lock);
>
> INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
> @@ -728,31 +558,27 @@ static int virtblk_probe(struct virtio_device *vdev)
> err = init_vq(vblk);
> if (err)
> goto out_free_vblk;
> -
> - pool_size = sizeof(struct virtblk_req);
> - if (use_bio)
> - pool_size += sizeof(struct scatterlist) * sg_elems;
> - vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
> - if (!vblk->pool) {
> - err = -ENOMEM;
> - goto out_free_vq;
> - }
> + spin_lock_init(&vblk->vq_lock);
>
> /* FIXME: How many partitions? How long is a piece of string? */
> vblk->disk = alloc_disk(1 << PART_BITS);
> if (!vblk->disk) {
> err = -ENOMEM;
> - goto out_mempool;
> + goto out_free_vq;
> }
>
> - q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
> + virtio_mq_reg.cmd_size =
> + sizeof(struct virtblk_req) +
> + sizeof(struct scatterlist) * sg_elems;
> +
> + q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
> if (!q) {
> err = -ENOMEM;
> goto out_put_disk;
> }
>
> - if (use_bio)
> - blk_queue_make_request(q, virtblk_make_request);
> + blk_mq_init_commands(q, virtblk_init_vbr, vblk);
> +
> q->queuedata = vblk;
>
> virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
> @@ -857,8 +683,6 @@ out_del_disk:
> blk_cleanup_queue(vblk->disk->queue);
> out_put_disk:
> put_disk(vblk->disk);
> -out_mempool:
> - mempool_destroy(vblk->pool);
> out_free_vq:
> vdev->config->del_vqs(vdev);
> out_free_vblk:
> @@ -890,7 +714,6 @@ static void virtblk_remove(struct virtio_device *vdev)
>
> refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
> put_disk(vblk->disk);
> - mempool_destroy(vblk->pool);
> vdev->config->del_vqs(vdev);
> kfree(vblk);
>
> @@ -914,10 +737,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
>
> flush_work(&vblk->config_work);
>
> - spin_lock_irq(vblk->disk->queue->queue_lock);
> - blk_stop_queue(vblk->disk->queue);
> - spin_unlock_irq(vblk->disk->queue->queue_lock);
> - blk_sync_queue(vblk->disk->queue);
> + blk_mq_stop_hw_queues(vblk->disk->queue);
>
> vdev->config->del_vqs(vdev);
> return 0;
> @@ -930,11 +750,9 @@ static int virtblk_restore(struct virtio_device *vdev)
>
> vblk->config_enable = true;
> ret = init_vq(vdev->priv);
> - if (!ret) {
> - spin_lock_irq(vblk->disk->queue->queue_lock);
> - blk_start_queue(vblk->disk->queue);
> - spin_unlock_irq(vblk->disk->queue->queue_lock);
> - }
> + if (!ret)
> + blk_mq_start_stopped_hw_queues(vblk->disk->queue);
> +
> return ret;
> }
> #endif
> --
> 1.7.10.4
>
>

--
Asias

2013-10-28 03:36:02

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 2/2] virtio_blk: blk-mq support

Christoph Hellwig <[email protected]> writes:
> Switch virtio-blk from the dual support for old-style requests and bios
> to use the block-multiqueue. For now pretend to have 4 issue queues
> as Jens pulled that out of his this hair and it worked.

Let's pretend I'm stupid.

We don't actually have multiple queues through to the host, but we're
pretending to, because it makes the block layer go faster?

Do I want to know *why* it's faster? Or should I look the other way?

Thanks,
Rusty.

> Signed-off-by: Jens Axboe <[email protected]>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> drivers/block/virtio_blk.c | 312 +++++++++-----------------------------------
> 1 file changed, 65 insertions(+), 247 deletions(-)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 5cdf88b..87f099d 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -11,12 +11,11 @@
> #include <linux/string_helpers.h>
> #include <scsi/scsi_cmnd.h>
> #include <linux/idr.h>
> +#include <linux/blk-mq.h>
> +#include <linux/numa.h>
>
> #define PART_BITS 4
>
> -static bool use_bio;
> -module_param(use_bio, bool, S_IRUGO);
> -
> static int major;
> static DEFINE_IDA(vd_index_ida);
>
> @@ -26,13 +25,11 @@ struct virtio_blk
> {
> struct virtio_device *vdev;
> struct virtqueue *vq;
> - wait_queue_head_t queue_wait;
> + spinlock_t vq_lock;
>
> /* The disk structure for the kernel. */
> struct gendisk *disk;
>
> - mempool_t *pool;
> -
> /* Process context for config space updates */
> struct work_struct config_work;
>
> @@ -47,15 +44,11 @@ struct virtio_blk
>
> /* Ida index - used to track minor number allocations. */
> int index;
> -
> - /* Scatterlist: can be too big for stack. */
> - struct scatterlist sg[/*sg_elems*/];
> };
>
> struct virtblk_req
> {
> struct request *req;
> - struct bio *bio;
> struct virtio_blk_outhdr out_hdr;
> struct virtio_scsi_inhdr in_hdr;
> struct work_struct work;
> @@ -84,22 +77,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
> }
> }
>
> -static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
> - gfp_t gfp_mask)
> -{
> - struct virtblk_req *vbr;
> -
> - vbr = mempool_alloc(vblk->pool, gfp_mask);
> - if (!vbr)
> - return NULL;
> -
> - vbr->vblk = vblk;
> - if (use_bio)
> - sg_init_table(vbr->sg, vblk->sg_elems);
> -
> - return vbr;
> -}
> -
> static int __virtblk_add_req(struct virtqueue *vq,
> struct virtblk_req *vbr,
> struct scatterlist *data_sg,
> @@ -143,83 +120,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
> return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
> }
>
> -static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
> -{
> - struct virtio_blk *vblk = vbr->vblk;
> - DEFINE_WAIT(wait);
> - int ret;
> -
> - spin_lock_irq(vblk->disk->queue->queue_lock);
> - while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
> - have_data)) < 0)) {
> - prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
> - TASK_UNINTERRUPTIBLE);
> -
> - spin_unlock_irq(vblk->disk->queue->queue_lock);
> - io_schedule();
> - spin_lock_irq(vblk->disk->queue->queue_lock);
> -
> - finish_wait(&vblk->queue_wait, &wait);
> - }
> -
> - virtqueue_kick(vblk->vq);
> - spin_unlock_irq(vblk->disk->queue->queue_lock);
> -}
> -
> -static void virtblk_bio_send_flush(struct virtblk_req *vbr)
> -{
> - vbr->flags |= VBLK_IS_FLUSH;
> - vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
> - vbr->out_hdr.sector = 0;
> - vbr->out_hdr.ioprio = 0;
> -
> - virtblk_add_req(vbr, false);
> -}
> -
> -static void virtblk_bio_send_data(struct virtblk_req *vbr)
> -{
> - struct virtio_blk *vblk = vbr->vblk;
> - struct bio *bio = vbr->bio;
> - bool have_data;
> -
> - vbr->flags &= ~VBLK_IS_FLUSH;
> - vbr->out_hdr.type = 0;
> - vbr->out_hdr.sector = bio->bi_sector;
> - vbr->out_hdr.ioprio = bio_prio(bio);
> -
> - if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
> - have_data = true;
> - if (bio->bi_rw & REQ_WRITE)
> - vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
> - else
> - vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
> - } else
> - have_data = false;
> -
> - virtblk_add_req(vbr, have_data);
> -}
> -
> -static void virtblk_bio_send_data_work(struct work_struct *work)
> -{
> - struct virtblk_req *vbr;
> -
> - vbr = container_of(work, struct virtblk_req, work);
> -
> - virtblk_bio_send_data(vbr);
> -}
> -
> -static void virtblk_bio_send_flush_work(struct work_struct *work)
> -{
> - struct virtblk_req *vbr;
> -
> - vbr = container_of(work, struct virtblk_req, work);
> -
> - virtblk_bio_send_flush(vbr);
> -}
> -
> static inline void virtblk_request_done(struct virtblk_req *vbr)
> {
> - struct virtio_blk *vblk = vbr->vblk;
> struct request *req = vbr->req;
> int error = virtblk_result(vbr);
>
> @@ -231,90 +133,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
> req->errors = (error != 0);
> }
>
> - __blk_end_request_all(req, error);
> - mempool_free(vbr, vblk->pool);
> -}
> -
> -static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
> -{
> - struct virtio_blk *vblk = vbr->vblk;
> -
> - if (vbr->flags & VBLK_REQ_DATA) {
> - /* Send out the actual write data */
> - INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
> - queue_work(virtblk_wq, &vbr->work);
> - } else {
> - bio_endio(vbr->bio, virtblk_result(vbr));
> - mempool_free(vbr, vblk->pool);
> - }
> -}
> -
> -static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
> -{
> - struct virtio_blk *vblk = vbr->vblk;
> -
> - if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
> - /* Send out a flush before end the bio */
> - vbr->flags &= ~VBLK_REQ_DATA;
> - INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
> - queue_work(virtblk_wq, &vbr->work);
> - } else {
> - bio_endio(vbr->bio, virtblk_result(vbr));
> - mempool_free(vbr, vblk->pool);
> - }
> -}
> -
> -static inline void virtblk_bio_done(struct virtblk_req *vbr)
> -{
> - if (unlikely(vbr->flags & VBLK_IS_FLUSH))
> - virtblk_bio_flush_done(vbr);
> - else
> - virtblk_bio_data_done(vbr);
> + blk_mq_end_io(req, error);
> }
>
> static void virtblk_done(struct virtqueue *vq)
> {
> struct virtio_blk *vblk = vq->vdev->priv;
> - bool bio_done = false, req_done = false;
> + bool req_done = false;
> struct virtblk_req *vbr;
> unsigned long flags;
> unsigned int len;
>
> - spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
> + spin_lock_irqsave(&vblk->vq_lock, flags);
> do {
> virtqueue_disable_cb(vq);
> while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
> - if (vbr->bio) {
> - virtblk_bio_done(vbr);
> - bio_done = true;
> - } else {
> - virtblk_request_done(vbr);
> - req_done = true;
> - }
> + virtblk_request_done(vbr);
> + req_done = true;
> }
> } while (!virtqueue_enable_cb(vq));
> + spin_unlock_irqrestore(&vblk->vq_lock, flags);
> +
> /* In case queue is stopped waiting for more buffers. */
> if (req_done)
> - blk_start_queue(vblk->disk->queue);
> - spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
> -
> - if (bio_done)
> - wake_up(&vblk->queue_wait);
> + blk_mq_start_stopped_hw_queues(vblk->disk->queue);
> }
>
> -static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
> - struct request *req)
> +static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
> {
> + struct virtio_blk *vblk = hctx->queue->queuedata;
> + struct virtblk_req *vbr = req->special;
> + unsigned long flags;
> unsigned int num;
> - struct virtblk_req *vbr;
> + const bool last = (req->cmd_flags & REQ_END) != 0;
>
> - vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
> - if (!vbr)
> - /* When another request finishes we'll try again. */
> - return false;
> + BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
>
> vbr->req = req;
> - vbr->bio = NULL;
> if (req->cmd_flags & REQ_FLUSH) {
> vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
> vbr->out_hdr.sector = 0;
> @@ -342,7 +197,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
> }
> }
>
> - num = blk_rq_map_sg(q, vbr->req, vblk->sg);
> + num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
> if (num) {
> if (rq_data_dir(vbr->req) == WRITE)
> vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
> @@ -350,63 +205,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
> vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
> }
>
> - if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) {
> - mempool_free(vbr, vblk->pool);
> - return false;
> - }
> -
> - return true;
> -}
> -
> -static void virtblk_request(struct request_queue *q)
> -{
> - struct virtio_blk *vblk = q->queuedata;
> - struct request *req;
> - unsigned int issued = 0;
> -
> - while ((req = blk_peek_request(q)) != NULL) {
> - BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> -
> - /* If this request fails, stop queue and wait for something to
> - finish to restart it. */
> - if (!do_req(q, vblk, req)) {
> - blk_stop_queue(q);
> - break;
> - }
> - blk_start_request(req);
> - issued++;
> - }
> -
> - if (issued)
> + spin_lock_irqsave(&vblk->vq_lock, flags);
> + if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
> + spin_unlock_irqrestore(&vblk->vq_lock, flags);
> + blk_mq_stop_hw_queue(hctx);
> virtqueue_kick(vblk->vq);
> -}
> -
> -static void virtblk_make_request(struct request_queue *q, struct bio *bio)
> -{
> - struct virtio_blk *vblk = q->queuedata;
> - struct virtblk_req *vbr;
> -
> - BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
> -
> - vbr = virtblk_alloc_req(vblk, GFP_NOIO);
> - if (!vbr) {
> - bio_endio(bio, -ENOMEM);
> - return;
> + return BLK_MQ_RQ_QUEUE_BUSY;
> }
> + spin_unlock_irqrestore(&vblk->vq_lock, flags);
>
> - vbr->bio = bio;
> - vbr->flags = 0;
> - if (bio->bi_rw & REQ_FLUSH)
> - vbr->flags |= VBLK_REQ_FLUSH;
> - if (bio->bi_rw & REQ_FUA)
> - vbr->flags |= VBLK_REQ_FUA;
> - if (bio->bi_size)
> - vbr->flags |= VBLK_REQ_DATA;
> -
> - if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
> - virtblk_bio_send_flush(vbr);
> - else
> - virtblk_bio_send_data(vbr);
> + if (last)
> + virtqueue_kick(vblk->vq);
> + return BLK_MQ_RQ_QUEUE_OK;
> }
>
> /* return id (s/n) string for *disk to *id_str
> @@ -680,12 +490,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
> __ATTR(cache_type, S_IRUGO|S_IWUSR,
> virtblk_cache_type_show, virtblk_cache_type_store);
>
> +static struct blk_mq_ops virtio_mq_ops = {
> + .queue_rq = virtio_queue_rq,
> + .map_queue = blk_mq_map_queue,
> + .alloc_hctx = blk_mq_alloc_single_hw_queue,
> + .free_hctx = blk_mq_free_single_hw_queue,
> +};
> +
> +static struct blk_mq_reg virtio_mq_reg = {
> + .ops = &virtio_mq_ops,
> + .nr_hw_queues = 4,
> + .queue_depth = 64,
> + .numa_node = NUMA_NO_NODE,
> + .flags = BLK_MQ_F_SHOULD_MERGE,
> +};
> +
> +static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
> + struct request *rq, unsigned int nr)
> +{
> + struct virtio_blk *vblk = data;
> + struct virtblk_req *vbr = rq->special;
> +
> + sg_init_table(vbr->sg, vblk->sg_elems);
> +}
> +
> static int virtblk_probe(struct virtio_device *vdev)
> {
> struct virtio_blk *vblk;
> struct request_queue *q;
> int err, index;
> - int pool_size;
>
> u64 cap;
> u32 v, blk_size, sg_elems, opt_io_size;
> @@ -709,17 +542,14 @@ static int virtblk_probe(struct virtio_device *vdev)
>
> /* We need an extra sg elements at head and tail. */
> sg_elems += 2;
> - vdev->priv = vblk = kmalloc(sizeof(*vblk) +
> - sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
> + vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
> if (!vblk) {
> err = -ENOMEM;
> goto out_free_index;
> }
>
> - init_waitqueue_head(&vblk->queue_wait);
> vblk->vdev = vdev;
> vblk->sg_elems = sg_elems;
> - sg_init_table(vblk->sg, vblk->sg_elems);
> mutex_init(&vblk->config_lock);
>
> INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
> @@ -728,31 +558,27 @@ static int virtblk_probe(struct virtio_device *vdev)
> err = init_vq(vblk);
> if (err)
> goto out_free_vblk;
> -
> - pool_size = sizeof(struct virtblk_req);
> - if (use_bio)
> - pool_size += sizeof(struct scatterlist) * sg_elems;
> - vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
> - if (!vblk->pool) {
> - err = -ENOMEM;
> - goto out_free_vq;
> - }
> + spin_lock_init(&vblk->vq_lock);
>
> /* FIXME: How many partitions? How long is a piece of string? */
> vblk->disk = alloc_disk(1 << PART_BITS);
> if (!vblk->disk) {
> err = -ENOMEM;
> - goto out_mempool;
> + goto out_free_vq;
> }
>
> - q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
> + virtio_mq_reg.cmd_size =
> + sizeof(struct virtblk_req) +
> + sizeof(struct scatterlist) * sg_elems;
> +
> + q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
> if (!q) {
> err = -ENOMEM;
> goto out_put_disk;
> }
>
> - if (use_bio)
> - blk_queue_make_request(q, virtblk_make_request);
> + blk_mq_init_commands(q, virtblk_init_vbr, vblk);
> +
> q->queuedata = vblk;
>
> virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
> @@ -857,8 +683,6 @@ out_del_disk:
> blk_cleanup_queue(vblk->disk->queue);
> out_put_disk:
> put_disk(vblk->disk);
> -out_mempool:
> - mempool_destroy(vblk->pool);
> out_free_vq:
> vdev->config->del_vqs(vdev);
> out_free_vblk:
> @@ -890,7 +714,6 @@ static void virtblk_remove(struct virtio_device *vdev)
>
> refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
> put_disk(vblk->disk);
> - mempool_destroy(vblk->pool);
> vdev->config->del_vqs(vdev);
> kfree(vblk);
>
> @@ -914,10 +737,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
>
> flush_work(&vblk->config_work);
>
> - spin_lock_irq(vblk->disk->queue->queue_lock);
> - blk_stop_queue(vblk->disk->queue);
> - spin_unlock_irq(vblk->disk->queue->queue_lock);
> - blk_sync_queue(vblk->disk->queue);
> + blk_mq_stop_hw_queues(vblk->disk->queue);
>
> vdev->config->del_vqs(vdev);
> return 0;
> @@ -930,11 +750,9 @@ static int virtblk_restore(struct virtio_device *vdev)
>
> vblk->config_enable = true;
> ret = init_vq(vdev->priv);
> - if (!ret) {
> - spin_lock_irq(vblk->disk->queue->queue_lock);
> - blk_start_queue(vblk->disk->queue);
> - spin_unlock_irq(vblk->disk->queue->queue_lock);
> - }
> + if (!ret)
> + blk_mq_start_stopped_hw_queues(vblk->disk->queue);
> +
> return ret;
> }
> #endif
> --
> 1.7.10.4

2013-10-28 08:52:09

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 2/2] virtio_blk: blk-mq support

On Mon, Oct 28, 2013 at 01:17:54PM +1030, Rusty Russell wrote:
> Let's pretend I'm stupid.
>
> We don't actually have multiple queues through to the host, but we're
> pretending to, because it makes the block layer go faster?
>
> Do I want to know *why* it's faster? Or should I look the other way?

You shouldn't. To how multiple queues benefit here I'd like to defer to
Jens, given the single workqueue I don't really know where to look here.

The real benefit that unfortunately wasn't obvious from the description
is that even with just a single queue the blk-multiqueue infrastructure
will be a lot faster, because it is designed in a much more streaminline
fashion and avoids lots of lock roundtrips both during submission itself
and for submission vs complettion. Back when I tried to get virtio-blk
to perform well on high-end flash (the work that Asias took over later)
the queue_lock contention was the major issue in virtio-blk and this
patch gets rid of that even with a single queue.

A good example are the patches from Nick to move scsi drivers over to
the infrastructure that only support a single queue. Even that gave
over a 10 fold improvement over the old code.

Unfortunately I do not have access to this kind of hardware at the
moment, but I'd love to see if Asias or anyone at Red Hat could redo
those old numbers.

2013-10-29 21:34:10

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 2/2] virtio_blk: blk-mq support

On 10/28/2013 02:52 AM, Christoph Hellwig wrote:
> On Mon, Oct 28, 2013 at 01:17:54PM +1030, Rusty Russell wrote:
>> Let's pretend I'm stupid.
>>
>> We don't actually have multiple queues through to the host, but we're
>> pretending to, because it makes the block layer go faster?
>>
>> Do I want to know *why* it's faster? Or should I look the other way?
>
> You shouldn't. To how multiple queues benefit here I'd like to defer to
> Jens, given the single workqueue I don't really know where to look here.

The 4 was chosen to "have some number of multiple queues" and to be able
to exercise that part, no real performance testing was done by me after
the implementation to verify whether it was faster at 1, 2, 4, or
others. But it was useful for that! For merging, we can easily just make
it 1 since that's the most logical transformation. I can set some time
aside to play with multiple queues and see if we gain anything, but that
can be done post merge.

> The real benefit that unfortunately wasn't obvious from the description
> is that even with just a single queue the blk-multiqueue infrastructure
> will be a lot faster, because it is designed in a much more streaminline
> fashion and avoids lots of lock roundtrips both during submission itself
> and for submission vs complettion. Back when I tried to get virtio-blk
> to perform well on high-end flash (the work that Asias took over later)
> the queue_lock contention was the major issue in virtio-blk and this
> patch gets rid of that even with a single queue.
>
> A good example are the patches from Nick to move scsi drivers over to
> the infrastructure that only support a single queue. Even that gave
> over a 10 fold improvement over the old code.
>
> Unfortunately I do not have access to this kind of hardware at the
> moment, but I'd love to see if Asias or anyone at Red Hat could redo
> those old numbers.

I've got a variety of fast devices, so should be able to run that.
Asias, let me know what your position is, it'd be great to have
independent results.

--
Jens Axboe

2013-10-30 08:52:00

by Asias He

[permalink] [raw]
Subject: Re: [PATCH 2/2] virtio_blk: blk-mq support

Hello Jens,

On Tue, Oct 29, 2013 at 03:34:01PM -0600, Jens Axboe wrote:
> On 10/28/2013 02:52 AM, Christoph Hellwig wrote:
> > On Mon, Oct 28, 2013 at 01:17:54PM +1030, Rusty Russell wrote:
> >> Let's pretend I'm stupid.
> >>
> >> We don't actually have multiple queues through to the host, but we're
> >> pretending to, because it makes the block layer go faster?
> >>
> >> Do I want to know *why* it's faster? Or should I look the other way?
> >
> > You shouldn't. To how multiple queues benefit here I'd like to defer to
> > Jens, given the single workqueue I don't really know where to look here.
>
> The 4 was chosen to "have some number of multiple queues" and to be able
> to exercise that part, no real performance testing was done by me after
> the implementation to verify whether it was faster at 1, 2, 4, or
> others. But it was useful for that! For merging, we can easily just make
> it 1 since that's the most logical transformation. I can set some time
> aside to play with multiple queues and see if we gain anything, but that
> can be done post merge.
>
> > The real benefit that unfortunately wasn't obvious from the description
> > is that even with just a single queue the blk-multiqueue infrastructure
> > will be a lot faster, because it is designed in a much more streaminline
> > fashion and avoids lots of lock roundtrips both during submission itself
> > and for submission vs complettion. Back when I tried to get virtio-blk
> > to perform well on high-end flash (the work that Asias took over later)
> > the queue_lock contention was the major issue in virtio-blk and this
> > patch gets rid of that even with a single queue.
> >
> > A good example are the patches from Nick to move scsi drivers over to
> > the infrastructure that only support a single queue. Even that gave
> > over a 10 fold improvement over the old code.
> >
> > Unfortunately I do not have access to this kind of hardware at the
> > moment, but I'd love to see if Asias or anyone at Red Hat could redo
> > those old numbers.
>
> I've got a variety of fast devices, so should be able to run that.
> Asias, let me know what your position is, it'd be great to have
> independent results.

I'd love to run the test but I do not have fast devices around. It would
be nice if Nick can give a try ;-)

1) Set .nr_hw_queues to 1 instead 4 for now.
2) Drop some more unused code in the other mail I sent

Otherwise, feel free to add:

Acked-by: Asias He <[email protected]>

> --
> Jens Axboe
>

--
Asias