Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753854AbbGFJ5h (ORCPT ); Mon, 6 Jul 2015 05:57:37 -0400 Received: from userp1040.oracle.com ([156.151.31.81]:26770 "EHLO userp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751162AbbGFJ5e (ORCPT ); Mon, 6 Jul 2015 05:57:34 -0400 From: Bob Liu To: linux-kernel@vger.kernel.org Cc: axboe@fb.com, hch@infradead.org, xen-devel@lists.xenproject.org, avanzini.arianna@gmail.com, david.vrabel@citrix.com, konrad.wilk@oracle.com, marcus.granado@citrix.com, roger.pau@citrix.com, Bob Liu Subject: [RESEND PATCH] xen/blkfront: convert to blk-mq APIs Date: Mon, 6 Jul 2015 17:56:48 +0800 Message-Id: <1436176608-18237-1-git-send-email-bob.liu@oracle.com> X-Mailer: git-send-email 1.7.10.4 X-Source-IP: userv0021.oracle.com [156.151.31.71] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 11340 Lines: 370 From: Arianna Avanzini This patch converts xen-blkfront driver to use the block multiqueue APIs. Only one hardware queue is used now, so there is no performance change. The legacy non-mq code was deleted completely which is the same as other drivers like virtio, mtip, and nvme. Also dropped unnecessary holding of info->io_lock when calling into blk-mq APIs. Signed-off-by: Arianna Avanzini Signed-off-by: Bob Liu --- drivers/block/xen-blkfront.c | 173 ++++++++++++++++++------------------------ 1 file changed, 73 insertions(+), 100 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 6d89ed3..831a577 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -148,6 +149,7 @@ struct blkfront_info unsigned int feature_persistent:1; unsigned int max_indirect_segments; int is_ready; + struct blk_mq_tag_set tag_set; }; static unsigned int nr_minors; @@ -616,54 +618,45 @@ static inline bool blkif_request_flush_invalid(struct request *req, !(info->feature_flush & REQ_FUA))); } -/* - * do_blkif_request - * read a block; request is in a request queue - */ -static void do_blkif_request(struct request_queue *rq) +static int blk_mq_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *qd) { - struct blkfront_info *info = NULL; - struct request *req; - int queued; - - pr_debug("Entered do_blkif_request\n"); - - queued = 0; - - while ((req = blk_peek_request(rq)) != NULL) { - info = req->rq_disk->private_data; - - if (RING_FULL(&info->ring)) - goto wait; - - blk_start_request(req); + struct blkfront_info *info = qd->rq->rq_disk->private_data; + int ret = BLK_MQ_RQ_QUEUE_OK; - if (blkif_request_flush_invalid(req, info)) { - __blk_end_request_all(req, -EOPNOTSUPP); - continue; - } + blk_mq_start_request(qd->rq); + spin_lock_irq(&info->io_lock); + if (RING_FULL(&info->ring)) { + spin_unlock_irq(&info->io_lock); + blk_mq_stop_hw_queue(hctx); + ret = BLK_MQ_RQ_QUEUE_BUSY; + goto out; + } - pr_debug("do_blk_req %p: cmd %p, sec %lx, " - "(%u/%u) [%s]\n", - req, req->cmd, (unsigned long)blk_rq_pos(req), - blk_rq_cur_sectors(req), blk_rq_sectors(req), - rq_data_dir(req) ? "write" : "read"); - - if (blkif_queue_request(req)) { - blk_requeue_request(rq, req); -wait: - /* Avoid pointless unplugs. */ - blk_stop_queue(rq); - break; - } + if (blkif_request_flush_invalid(qd->rq, info)) { + spin_unlock_irq(&info->io_lock); + ret = BLK_MQ_RQ_QUEUE_ERROR; + goto out; + } - queued++; + if (blkif_queue_request(qd->rq)) { + spin_unlock_irq(&info->io_lock); + blk_mq_stop_hw_queue(hctx); + ret = BLK_MQ_RQ_QUEUE_BUSY; + goto out; } - if (queued != 0) - flush_requests(info); + flush_requests(info); + spin_unlock_irq(&info->io_lock); +out: + return ret; } +static struct blk_mq_ops blkfront_mq_ops = { + .queue_rq = blk_mq_queue_rq, + .map_queue = blk_mq_map_queue, +}; + static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, unsigned int physical_sector_size, unsigned int segments) @@ -671,9 +664,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, struct request_queue *rq; struct blkfront_info *info = gd->private_data; - rq = blk_init_queue(do_blkif_request, &info->io_lock); - if (rq == NULL) + memset(&info->tag_set, 0, sizeof(info->tag_set)); + info->tag_set.ops = &blkfront_mq_ops; + info->tag_set.nr_hw_queues = 1; + info->tag_set.queue_depth = BLK_RING_SIZE(info); + info->tag_set.numa_node = NUMA_NO_NODE; + info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; + info->tag_set.cmd_size = 0; + info->tag_set.driver_data = info; + + if (blk_mq_alloc_tag_set(&info->tag_set)) + return -1; + rq = blk_mq_init_queue(&info->tag_set); + if (IS_ERR(rq)) { + blk_mq_free_tag_set(&info->tag_set); return -1; + } queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); @@ -901,19 +907,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, static void xlvbd_release_gendisk(struct blkfront_info *info) { unsigned int minor, nr_minors; - unsigned long flags; if (info->rq == NULL) return; - spin_lock_irqsave(&info->io_lock, flags); - /* No more blkif_request(). */ - blk_stop_queue(info->rq); + blk_mq_stop_hw_queues(info->rq); /* No more gnttab callback work. */ gnttab_cancel_free_callback(&info->callback); - spin_unlock_irqrestore(&info->io_lock, flags); /* Flush gnttab callback work. Must be done with no locks held. */ flush_work(&info->work); @@ -925,6 +927,7 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) xlbd_release_minors(minor, nr_minors); blk_cleanup_queue(info->rq); + blk_mq_free_tag_set(&info->tag_set); info->rq = NULL; put_disk(info->gd); @@ -933,22 +936,23 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) static void kick_pending_request_queues(struct blkfront_info *info) { + unsigned long flags; + + spin_lock_irqsave(&info->io_lock, flags); if (!RING_FULL(&info->ring)) { - /* Re-enable calldowns. */ - blk_start_queue(info->rq); - /* Kick things off immediately. */ - do_blkif_request(info->rq); + spin_unlock_irqrestore(&info->io_lock, flags); + blk_mq_start_stopped_hw_queues(info->rq, true); + return; } + spin_unlock_irqrestore(&info->io_lock, flags); } static void blkif_restart_queue(struct work_struct *work) { struct blkfront_info *info = container_of(work, struct blkfront_info, work); - spin_lock_irq(&info->io_lock); if (info->connected == BLKIF_STATE_CONNECTED) kick_pending_request_queues(info); - spin_unlock_irq(&info->io_lock); } static void blkif_free(struct blkfront_info *info, int suspend) @@ -958,13 +962,13 @@ static void blkif_free(struct blkfront_info *info, int suspend) int i, j, segs; /* Prevent new requests being issued until we fix things up. */ - spin_lock_irq(&info->io_lock); info->connected = suspend ? BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; /* No more blkif_request(). */ if (info->rq) - blk_stop_queue(info->rq); + blk_mq_stop_hw_queues(info->rq); + spin_lock_irq(&info->io_lock); /* Remove all persistent grants */ if (!list_empty(&info->grants)) { list_for_each_entry_safe(persistent_gnt, n, @@ -1144,15 +1148,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) RING_IDX i, rp; unsigned long flags; struct blkfront_info *info = (struct blkfront_info *)dev_id; - int error; - - spin_lock_irqsave(&info->io_lock, flags); - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { - spin_unlock_irqrestore(&info->io_lock, flags); + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return IRQ_HANDLED; - } + spin_lock_irqsave(&info->io_lock, flags); again: rp = info->ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ @@ -1185,37 +1185,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) continue; } - error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; + req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { case BLKIF_OP_DISCARD: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + req->errors = -EOPNOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; queue_flag_clear(QUEUE_FLAG_DISCARD, rq); queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); } - __blk_end_request_all(req, error); + blk_mq_complete_request(req); break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + req->errors = -EOPNOTSUPP; } if (unlikely(bret->status == BLKIF_RSP_ERROR && info->shadow[id].req.u.rw.nr_segments == 0)) { printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + req->errors = -EOPNOTSUPP; } - if (unlikely(error)) { - if (error == -EOPNOTSUPP) - error = 0; + if (unlikely(req->errors)) { + if (req->errors == -EOPNOTSUPP) + req->errors = 0; info->feature_flush = 0; xlvbd_flush(info); } @@ -1226,7 +1226,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " "request: %x\n", bret->status); - __blk_end_request_all(req, error); + blk_mq_complete_request(req); break; default: BUG(); @@ -1243,9 +1243,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) } else info->ring.sring->rsp_event = i + 1; - kick_pending_request_queues(info); - spin_unlock_irqrestore(&info->io_lock, flags); + kick_pending_request_queues(info); return IRQ_HANDLED; } @@ -1555,32 +1554,8 @@ static int blkif_recover(struct blkfront_info *info) kfree(copy); - /* - * Empty the queue, this is important because we might have - * requests in the queue with more segments than what we - * can handle now. - */ - spin_lock_irq(&info->io_lock); - while ((req = blk_fetch_request(info->rq)) != NULL) { - if (req->cmd_flags & - (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { - list_add(&req->queuelist, &requests); - continue; - } - merge_bio.head = req->bio; - merge_bio.tail = req->biotail; - bio_list_merge(&bio_list, &merge_bio); - req->bio = NULL; - if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) - pr_alert("diskcache flush request found!\n"); - __blk_end_request_all(req, 0); - } - spin_unlock_irq(&info->io_lock); - xenbus_switch_state(info->xbdev, XenbusStateConnected); - spin_lock_irq(&info->io_lock); - /* Now safe for us to use the shared ring */ info->connected = BLKIF_STATE_CONNECTED; @@ -1591,9 +1566,9 @@ static int blkif_recover(struct blkfront_info *info) /* Requeue pending requests (flush or discard) */ list_del_init(&req->queuelist); BUG_ON(req->nr_phys_segments > segs); - blk_requeue_request(info->rq, req); + blk_mq_requeue_request(req); } - spin_unlock_irq(&info->io_lock); + blk_mq_kick_requeue_list(info->rq); while ((bio = bio_list_pop(&bio_list)) != NULL) { /* Traverse the list of pending bios and re-queue them */ @@ -1923,10 +1898,8 @@ static void blkfront_connect(struct blkfront_info *info) xenbus_switch_state(info->xbdev, XenbusStateConnected); /* Kick pending requests. */ - spin_lock_irq(&info->io_lock); info->connected = BLKIF_STATE_CONNECTED; kick_pending_request_queues(info); - spin_unlock_irq(&info->io_lock); add_disk(info->gd); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/