Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753658Ab3JYQG5 (ORCPT ); Fri, 25 Oct 2013 12:06:57 -0400 Received: from mx1.redhat.com ([209.132.183.28]:31455 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751459Ab3JYQG4 (ORCPT ); Fri, 25 Oct 2013 12:06:56 -0400 Date: Sat, 26 Oct 2013 00:06:13 +0800 From: Asias He To: Christoph Hellwig Cc: Jens Axboe , Rusty Russell , linux-kernel@vger.kernel.org Subject: Re: [PATCH 2/2] virtio_blk: blk-mq support Message-ID: <20131025160613.GA14191@hj.localdomain> References: <20131025130459.730903511@bombadil.infradead.org> <20131025130735.637242058@bombadil.infradead.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20131025130735.637242058@bombadil.infradead.org> User-Agent: Mutt/1.5.21 (2010-09-15) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 15235 Lines: 537 On Fri, Oct 25, 2013 at 06:05:01AM -0700, Christoph Hellwig wrote: > Switch virtio-blk from the dual support for old-style requests and bios > to use the block-multiqueue. For now pretend to have 4 issue queues > as Jens pulled that out of his this hair and it worked. > > Signed-off-by: Jens Axboe > Signed-off-by: Christoph Hellwig > --- > drivers/block/virtio_blk.c | 312 +++++++++----------------------------------- > 1 file changed, 65 insertions(+), 247 deletions(-) > > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c > index 5cdf88b..87f099d 100644 > --- a/drivers/block/virtio_blk.c > +++ b/drivers/block/virtio_blk.c > @@ -11,12 +11,11 @@ > #include > #include > #include > +#include > +#include > > #define PART_BITS 4 > > -static bool use_bio; > -module_param(use_bio, bool, S_IRUGO); > - > static int major; > static DEFINE_IDA(vd_index_ida); > > @@ -26,13 +25,11 @@ struct virtio_blk > { > struct virtio_device *vdev; > struct virtqueue *vq; > - wait_queue_head_t queue_wait; > + spinlock_t vq_lock; > > /* The disk structure for the kernel. */ > struct gendisk *disk; > > - mempool_t *pool; > - > /* Process context for config space updates */ > struct work_struct config_work; > > @@ -47,15 +44,11 @@ struct virtio_blk > > /* Ida index - used to track minor number allocations. */ > int index; > - > - /* Scatterlist: can be too big for stack. */ > - struct scatterlist sg[/*sg_elems*/]; > }; > > struct virtblk_req > { > struct request *req; > - struct bio *bio; > struct virtio_blk_outhdr out_hdr; > struct virtio_scsi_inhdr in_hdr; > struct work_struct work; We can drop more things: diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 87f099d..c5ca368 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -51,20 +51,10 @@ struct virtblk_req struct request *req; struct virtio_blk_outhdr out_hdr; struct virtio_scsi_inhdr in_hdr; - struct work_struct work; - struct virtio_blk *vblk; - int flags; u8 status; struct scatterlist sg[]; }; -enum { - VBLK_IS_FLUSH = 1, - VBLK_REQ_FLUSH = 2, - VBLK_REQ_DATA = 4, - VBLK_REQ_FUA = 8, -}; - static inline int virtblk_result(struct virtblk_req *vbr) { switch (vbr->status) { > @@ -84,22 +77,6 @@ static inline int virtblk_result(struct virtblk_req *vbr) > } > } > > -static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk, > - gfp_t gfp_mask) > -{ > - struct virtblk_req *vbr; > - > - vbr = mempool_alloc(vblk->pool, gfp_mask); > - if (!vbr) > - return NULL; > - > - vbr->vblk = vblk; > - if (use_bio) > - sg_init_table(vbr->sg, vblk->sg_elems); > - > - return vbr; > -} > - > static int __virtblk_add_req(struct virtqueue *vq, > struct virtblk_req *vbr, > struct scatterlist *data_sg, > @@ -143,83 +120,8 @@ static int __virtblk_add_req(struct virtqueue *vq, > return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); > } > > -static void virtblk_add_req(struct virtblk_req *vbr, bool have_data) > -{ > - struct virtio_blk *vblk = vbr->vblk; > - DEFINE_WAIT(wait); > - int ret; > - > - spin_lock_irq(vblk->disk->queue->queue_lock); > - while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg, > - have_data)) < 0)) { > - prepare_to_wait_exclusive(&vblk->queue_wait, &wait, > - TASK_UNINTERRUPTIBLE); > - > - spin_unlock_irq(vblk->disk->queue->queue_lock); > - io_schedule(); > - spin_lock_irq(vblk->disk->queue->queue_lock); > - > - finish_wait(&vblk->queue_wait, &wait); > - } > - > - virtqueue_kick(vblk->vq); > - spin_unlock_irq(vblk->disk->queue->queue_lock); > -} > - > -static void virtblk_bio_send_flush(struct virtblk_req *vbr) > -{ > - vbr->flags |= VBLK_IS_FLUSH; > - vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; > - vbr->out_hdr.sector = 0; > - vbr->out_hdr.ioprio = 0; > - > - virtblk_add_req(vbr, false); > -} > - > -static void virtblk_bio_send_data(struct virtblk_req *vbr) > -{ > - struct virtio_blk *vblk = vbr->vblk; > - struct bio *bio = vbr->bio; > - bool have_data; > - > - vbr->flags &= ~VBLK_IS_FLUSH; > - vbr->out_hdr.type = 0; > - vbr->out_hdr.sector = bio->bi_sector; > - vbr->out_hdr.ioprio = bio_prio(bio); > - > - if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) { > - have_data = true; > - if (bio->bi_rw & REQ_WRITE) > - vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; > - else > - vbr->out_hdr.type |= VIRTIO_BLK_T_IN; > - } else > - have_data = false; > - > - virtblk_add_req(vbr, have_data); > -} > - > -static void virtblk_bio_send_data_work(struct work_struct *work) > -{ > - struct virtblk_req *vbr; > - > - vbr = container_of(work, struct virtblk_req, work); > - > - virtblk_bio_send_data(vbr); > -} > - > -static void virtblk_bio_send_flush_work(struct work_struct *work) > -{ > - struct virtblk_req *vbr; > - > - vbr = container_of(work, struct virtblk_req, work); > - > - virtblk_bio_send_flush(vbr); > -} > - > static inline void virtblk_request_done(struct virtblk_req *vbr) > { > - struct virtio_blk *vblk = vbr->vblk; > struct request *req = vbr->req; > int error = virtblk_result(vbr); > > @@ -231,90 +133,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr) > req->errors = (error != 0); > } > > - __blk_end_request_all(req, error); > - mempool_free(vbr, vblk->pool); > -} > - > -static inline void virtblk_bio_flush_done(struct virtblk_req *vbr) > -{ > - struct virtio_blk *vblk = vbr->vblk; > - > - if (vbr->flags & VBLK_REQ_DATA) { > - /* Send out the actual write data */ > - INIT_WORK(&vbr->work, virtblk_bio_send_data_work); > - queue_work(virtblk_wq, &vbr->work); > - } else { > - bio_endio(vbr->bio, virtblk_result(vbr)); > - mempool_free(vbr, vblk->pool); > - } > -} > - > -static inline void virtblk_bio_data_done(struct virtblk_req *vbr) > -{ > - struct virtio_blk *vblk = vbr->vblk; > - > - if (unlikely(vbr->flags & VBLK_REQ_FUA)) { > - /* Send out a flush before end the bio */ > - vbr->flags &= ~VBLK_REQ_DATA; > - INIT_WORK(&vbr->work, virtblk_bio_send_flush_work); > - queue_work(virtblk_wq, &vbr->work); > - } else { > - bio_endio(vbr->bio, virtblk_result(vbr)); > - mempool_free(vbr, vblk->pool); > - } > -} > - > -static inline void virtblk_bio_done(struct virtblk_req *vbr) > -{ > - if (unlikely(vbr->flags & VBLK_IS_FLUSH)) > - virtblk_bio_flush_done(vbr); > - else > - virtblk_bio_data_done(vbr); > + blk_mq_end_io(req, error); > } > > static void virtblk_done(struct virtqueue *vq) > { > struct virtio_blk *vblk = vq->vdev->priv; > - bool bio_done = false, req_done = false; > + bool req_done = false; > struct virtblk_req *vbr; > unsigned long flags; > unsigned int len; > > - spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); > + spin_lock_irqsave(&vblk->vq_lock, flags); > do { > virtqueue_disable_cb(vq); > while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { > - if (vbr->bio) { > - virtblk_bio_done(vbr); > - bio_done = true; > - } else { > - virtblk_request_done(vbr); > - req_done = true; > - } > + virtblk_request_done(vbr); > + req_done = true; > } > } while (!virtqueue_enable_cb(vq)); > + spin_unlock_irqrestore(&vblk->vq_lock, flags); > + > /* In case queue is stopped waiting for more buffers. */ > if (req_done) > - blk_start_queue(vblk->disk->queue); > - spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); > - > - if (bio_done) > - wake_up(&vblk->queue_wait); > + blk_mq_start_stopped_hw_queues(vblk->disk->queue); > } > > -static bool do_req(struct request_queue *q, struct virtio_blk *vblk, > - struct request *req) > +static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) > { > + struct virtio_blk *vblk = hctx->queue->queuedata; > + struct virtblk_req *vbr = req->special; > + unsigned long flags; > unsigned int num; > - struct virtblk_req *vbr; > + const bool last = (req->cmd_flags & REQ_END) != 0; > > - vbr = virtblk_alloc_req(vblk, GFP_ATOMIC); > - if (!vbr) > - /* When another request finishes we'll try again. */ > - return false; > + BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); > > vbr->req = req; > - vbr->bio = NULL; > if (req->cmd_flags & REQ_FLUSH) { > vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; > vbr->out_hdr.sector = 0; > @@ -342,7 +197,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, > } > } > > - num = blk_rq_map_sg(q, vbr->req, vblk->sg); > + num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg); > if (num) { > if (rq_data_dir(vbr->req) == WRITE) > vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; > @@ -350,63 +205,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, > vbr->out_hdr.type |= VIRTIO_BLK_T_IN; > } > > - if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) { > - mempool_free(vbr, vblk->pool); > - return false; > - } > - > - return true; > -} > - > -static void virtblk_request(struct request_queue *q) > -{ > - struct virtio_blk *vblk = q->queuedata; > - struct request *req; > - unsigned int issued = 0; > - > - while ((req = blk_peek_request(q)) != NULL) { > - BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); > - > - /* If this request fails, stop queue and wait for something to > - finish to restart it. */ > - if (!do_req(q, vblk, req)) { > - blk_stop_queue(q); > - break; > - } > - blk_start_request(req); > - issued++; > - } > - > - if (issued) > + spin_lock_irqsave(&vblk->vq_lock, flags); > + if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) { > + spin_unlock_irqrestore(&vblk->vq_lock, flags); > + blk_mq_stop_hw_queue(hctx); > virtqueue_kick(vblk->vq); > -} > - > -static void virtblk_make_request(struct request_queue *q, struct bio *bio) > -{ > - struct virtio_blk *vblk = q->queuedata; > - struct virtblk_req *vbr; > - > - BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems); > - > - vbr = virtblk_alloc_req(vblk, GFP_NOIO); > - if (!vbr) { > - bio_endio(bio, -ENOMEM); > - return; > + return BLK_MQ_RQ_QUEUE_BUSY; > } > + spin_unlock_irqrestore(&vblk->vq_lock, flags); > > - vbr->bio = bio; > - vbr->flags = 0; > - if (bio->bi_rw & REQ_FLUSH) > - vbr->flags |= VBLK_REQ_FLUSH; > - if (bio->bi_rw & REQ_FUA) > - vbr->flags |= VBLK_REQ_FUA; > - if (bio->bi_size) > - vbr->flags |= VBLK_REQ_DATA; > - > - if (unlikely(vbr->flags & VBLK_REQ_FLUSH)) > - virtblk_bio_send_flush(vbr); > - else > - virtblk_bio_send_data(vbr); > + if (last) > + virtqueue_kick(vblk->vq); > + return BLK_MQ_RQ_QUEUE_OK; > } > > /* return id (s/n) string for *disk to *id_str > @@ -680,12 +490,35 @@ static const struct device_attribute dev_attr_cache_type_rw = > __ATTR(cache_type, S_IRUGO|S_IWUSR, > virtblk_cache_type_show, virtblk_cache_type_store); > > +static struct blk_mq_ops virtio_mq_ops = { > + .queue_rq = virtio_queue_rq, > + .map_queue = blk_mq_map_queue, > + .alloc_hctx = blk_mq_alloc_single_hw_queue, > + .free_hctx = blk_mq_free_single_hw_queue, > +}; > + > +static struct blk_mq_reg virtio_mq_reg = { > + .ops = &virtio_mq_ops, > + .nr_hw_queues = 4, > + .queue_depth = 64, > + .numa_node = NUMA_NO_NODE, > + .flags = BLK_MQ_F_SHOULD_MERGE, > +}; > + > +static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx, > + struct request *rq, unsigned int nr) > +{ > + struct virtio_blk *vblk = data; > + struct virtblk_req *vbr = rq->special; > + > + sg_init_table(vbr->sg, vblk->sg_elems); > +} > + > static int virtblk_probe(struct virtio_device *vdev) > { > struct virtio_blk *vblk; > struct request_queue *q; > int err, index; > - int pool_size; > > u64 cap; > u32 v, blk_size, sg_elems, opt_io_size; > @@ -709,17 +542,14 @@ static int virtblk_probe(struct virtio_device *vdev) > > /* We need an extra sg elements at head and tail. */ > sg_elems += 2; > - vdev->priv = vblk = kmalloc(sizeof(*vblk) + > - sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL); > + vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); > if (!vblk) { > err = -ENOMEM; > goto out_free_index; > } > > - init_waitqueue_head(&vblk->queue_wait); > vblk->vdev = vdev; > vblk->sg_elems = sg_elems; > - sg_init_table(vblk->sg, vblk->sg_elems); > mutex_init(&vblk->config_lock); > > INIT_WORK(&vblk->config_work, virtblk_config_changed_work); > @@ -728,31 +558,27 @@ static int virtblk_probe(struct virtio_device *vdev) > err = init_vq(vblk); > if (err) > goto out_free_vblk; > - > - pool_size = sizeof(struct virtblk_req); > - if (use_bio) > - pool_size += sizeof(struct scatterlist) * sg_elems; > - vblk->pool = mempool_create_kmalloc_pool(1, pool_size); > - if (!vblk->pool) { > - err = -ENOMEM; > - goto out_free_vq; > - } > + spin_lock_init(&vblk->vq_lock); > > /* FIXME: How many partitions? How long is a piece of string? */ > vblk->disk = alloc_disk(1 << PART_BITS); > if (!vblk->disk) { > err = -ENOMEM; > - goto out_mempool; > + goto out_free_vq; > } > > - q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL); > + virtio_mq_reg.cmd_size = > + sizeof(struct virtblk_req) + > + sizeof(struct scatterlist) * sg_elems; > + > + q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk); > if (!q) { > err = -ENOMEM; > goto out_put_disk; > } > > - if (use_bio) > - blk_queue_make_request(q, virtblk_make_request); > + blk_mq_init_commands(q, virtblk_init_vbr, vblk); > + > q->queuedata = vblk; > > virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); > @@ -857,8 +683,6 @@ out_del_disk: > blk_cleanup_queue(vblk->disk->queue); > out_put_disk: > put_disk(vblk->disk); > -out_mempool: > - mempool_destroy(vblk->pool); > out_free_vq: > vdev->config->del_vqs(vdev); > out_free_vblk: > @@ -890,7 +714,6 @@ static void virtblk_remove(struct virtio_device *vdev) > > refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); > put_disk(vblk->disk); > - mempool_destroy(vblk->pool); > vdev->config->del_vqs(vdev); > kfree(vblk); > > @@ -914,10 +737,7 @@ static int virtblk_freeze(struct virtio_device *vdev) > > flush_work(&vblk->config_work); > > - spin_lock_irq(vblk->disk->queue->queue_lock); > - blk_stop_queue(vblk->disk->queue); > - spin_unlock_irq(vblk->disk->queue->queue_lock); > - blk_sync_queue(vblk->disk->queue); > + blk_mq_stop_hw_queues(vblk->disk->queue); > > vdev->config->del_vqs(vdev); > return 0; > @@ -930,11 +750,9 @@ static int virtblk_restore(struct virtio_device *vdev) > > vblk->config_enable = true; > ret = init_vq(vdev->priv); > - if (!ret) { > - spin_lock_irq(vblk->disk->queue->queue_lock); > - blk_start_queue(vblk->disk->queue); > - spin_unlock_irq(vblk->disk->queue->queue_lock); > - } > + if (!ret) > + blk_mq_start_stopped_hw_queues(vblk->disk->queue); > + > return ret; > } > #endif > -- > 1.7.10.4 > > -- Asias -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/