LinuxLists.cc - [RESEND PATCH] xen/blkfront: convert to blk-mq APIs

2015-07-06 09:57:37

Subject: [RESEND PATCH] xen/blkfront: convert to blk-mq APIs

From: Arianna Avanzini <[email protected]>

This patch converts xen-blkfront driver to use the block multiqueue APIs.
Only one hardware queue is used now, so there is no performance change.

The legacy non-mq code was deleted completely which is the same as other drivers
like virtio, mtip, and nvme.

Also dropped unnecessary holding of info->io_lock when calling into blk-mq APIs.

Signed-off-by: Arianna Avanzini <[email protected]>
Signed-off-by: Bob Liu <[email protected]>
---
drivers/block/xen-blkfront.c | 173 ++++++++++++++++++------------------------
1 file changed, 73 insertions(+), 100 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 6d89ed3..831a577 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -37,6 +37,7 @@

#include <linux/interrupt.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/hdreg.h>
#include <linux/cdrom.h>
#include <linux/module.h>
@@ -148,6 +149,7 @@ struct blkfront_info
unsigned int feature_persistent:1;
unsigned int max_indirect_segments;
int is_ready;
+ struct blk_mq_tag_set tag_set;
};

static unsigned int nr_minors;
@@ -616,54 +618,45 @@ static inline bool blkif_request_flush_invalid(struct request *req,
!(info->feature_flush & REQ_FUA)));
}

-/*
- * do_blkif_request
- * read a block; request is in a request queue
- */
-static void do_blkif_request(struct request_queue *rq)
+static int blk_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *qd)
{
- struct blkfront_info *info = NULL;
- struct request *req;
- int queued;
-
- pr_debug("Entered do_blkif_request\n");
-
- queued = 0;
-
- while ((req = blk_peek_request(rq)) != NULL) {
- info = req->rq_disk->private_data;
-
- if (RING_FULL(&info->ring))
- goto wait;
-
- blk_start_request(req);
+ struct blkfront_info *info = qd->rq->rq_disk->private_data;
+ int ret = BLK_MQ_RQ_QUEUE_OK;

- if (blkif_request_flush_invalid(req, info)) {
- __blk_end_request_all(req, -EOPNOTSUPP);
- continue;
- }
+ blk_mq_start_request(qd->rq);
+ spin_lock_irq(&info->io_lock);
+ if (RING_FULL(&info->ring)) {
+ spin_unlock_irq(&info->io_lock);
+ blk_mq_stop_hw_queue(hctx);
+ ret = BLK_MQ_RQ_QUEUE_BUSY;
+ goto out;
+ }

- pr_debug("do_blk_req %p: cmd %p, sec %lx, "
- "(%u/%u) [%s]\n",
- req, req->cmd, (unsigned long)blk_rq_pos(req),
- blk_rq_cur_sectors(req), blk_rq_sectors(req),
- rq_data_dir(req) ? "write" : "read");
-
- if (blkif_queue_request(req)) {
- blk_requeue_request(rq, req);
-wait:
- /* Avoid pointless unplugs. */
- blk_stop_queue(rq);
- break;
- }
+ if (blkif_request_flush_invalid(qd->rq, info)) {
+ spin_unlock_irq(&info->io_lock);
+ ret = BLK_MQ_RQ_QUEUE_ERROR;
+ goto out;
+ }

- queued++;
+ if (blkif_queue_request(qd->rq)) {
+ spin_unlock_irq(&info->io_lock);
+ blk_mq_stop_hw_queue(hctx);
+ ret = BLK_MQ_RQ_QUEUE_BUSY;
+ goto out;
}

- if (queued != 0)
- flush_requests(info);
+ flush_requests(info);
+ spin_unlock_irq(&info->io_lock);
+out:
+ return ret;
}

+static struct blk_mq_ops blkfront_mq_ops = {
+ .queue_rq = blk_mq_queue_rq,
+ .map_queue = blk_mq_map_queue,
+};
+
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
unsigned int physical_sector_size,
unsigned int segments)
@@ -671,9 +664,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
struct request_queue *rq;
struct blkfront_info *info = gd->private_data;

- rq = blk_init_queue(do_blkif_request, &info->io_lock);
- if (rq == NULL)
+ memset(&info->tag_set, 0, sizeof(info->tag_set));
+ info->tag_set.ops = &blkfront_mq_ops;
+ info->tag_set.nr_hw_queues = 1;
+ info->tag_set.queue_depth = BLK_RING_SIZE(info);
+ info->tag_set.numa_node = NUMA_NO_NODE;
+ info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+ info->tag_set.cmd_size = 0;
+ info->tag_set.driver_data = info;
+
+ if (blk_mq_alloc_tag_set(&info->tag_set))
+ return -1;
+ rq = blk_mq_init_queue(&info->tag_set);
+ if (IS_ERR(rq)) {
+ blk_mq_free_tag_set(&info->tag_set);
return -1;
+ }

queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);

@@ -901,19 +907,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
static void xlvbd_release_gendisk(struct blkfront_info *info)
{
unsigned int minor, nr_minors;
- unsigned long flags;

if (info->rq == NULL)
return;

- spin_lock_irqsave(&info->io_lock, flags);
-
/* No more blkif_request(). */
- blk_stop_queue(info->rq);
+ blk_mq_stop_hw_queues(info->rq);

/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
- spin_unlock_irqrestore(&info->io_lock, flags);

/* Flush gnttab callback work. Must be done with no locks held. */
flush_work(&info->work);
@@ -925,6 +927,7 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
xlbd_release_minors(minor, nr_minors);

blk_cleanup_queue(info->rq);
+ blk_mq_free_tag_set(&info->tag_set);
info->rq = NULL;

put_disk(info->gd);
@@ -933,22 +936,23 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)

static void kick_pending_request_queues(struct blkfront_info *info)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->io_lock, flags);
if (!RING_FULL(&info->ring)) {
- /* Re-enable calldowns. */
- blk_start_queue(info->rq);
- /* Kick things off immediately. */
- do_blkif_request(info->rq);
+ spin_unlock_irqrestore(&info->io_lock, flags);
+ blk_mq_start_stopped_hw_queues(info->rq, true);
+ return;
}
+ spin_unlock_irqrestore(&info->io_lock, flags);
}

static void blkif_restart_queue(struct work_struct *work)
{
struct blkfront_info *info = container_of(work, struct blkfront_info, work);

- spin_lock_irq(&info->io_lock);
if (info->connected == BLKIF_STATE_CONNECTED)
kick_pending_request_queues(info);
- spin_unlock_irq(&info->io_lock);
}

static void blkif_free(struct blkfront_info *info, int suspend)
@@ -958,13 +962,13 @@ static void blkif_free(struct blkfront_info *info, int suspend)
int i, j, segs;

/* Prevent new requests being issued until we fix things up. */
- spin_lock_irq(&info->io_lock);
info->connected = suspend ?
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
/* No more blkif_request(). */
if (info->rq)
- blk_stop_queue(info->rq);
+ blk_mq_stop_hw_queues(info->rq);

+ spin_lock_irq(&info->io_lock);
/* Remove all persistent grants */
if (!list_empty(&info->grants)) {
list_for_each_entry_safe(persistent_gnt, n,
@@ -1144,15 +1148,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
RING_IDX i, rp;
unsigned long flags;
struct blkfront_info *info = (struct blkfront_info *)dev_id;
- int error;
-
- spin_lock_irqsave(&info->io_lock, flags);

- if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
- spin_unlock_irqrestore(&info->io_lock, flags);
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
return IRQ_HANDLED;
- }

+ spin_lock_irqsave(&info->io_lock, flags);
again:
rp = info->ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
@@ -1185,37 +1185,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
continue;
}

- error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+ req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
switch (bret->operation) {
case BLKIF_OP_DISCARD:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq;
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
info->gd->disk_name, op_name(bret->operation));
- error = -EOPNOTSUPP;
+ req->errors = -EOPNOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
}
- __blk_end_request_all(req, error);
+ blk_mq_complete_request(req);
break;
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
info->gd->disk_name, op_name(bret->operation));
- error = -EOPNOTSUPP;
+ req->errors = -EOPNOTSUPP;
}
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
info->shadow[id].req.u.rw.nr_segments == 0)) {
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
info->gd->disk_name, op_name(bret->operation));
- error = -EOPNOTSUPP;
+ req->errors = -EOPNOTSUPP;
}
- if (unlikely(error)) {
- if (error == -EOPNOTSUPP)
- error = 0;
+ if (unlikely(req->errors)) {
+ if (req->errors == -EOPNOTSUPP)
+ req->errors = 0;
info->feature_flush = 0;
xlvbd_flush(info);
}
@@ -1226,7 +1226,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
"request: %x\n", bret->status);

- __blk_end_request_all(req, error);
+ blk_mq_complete_request(req);
break;
default:
BUG();
@@ -1243,9 +1243,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
} else
info->ring.sring->rsp_event = i + 1;

- kick_pending_request_queues(info);
-
spin_unlock_irqrestore(&info->io_lock, flags);
+ kick_pending_request_queues(info);

return IRQ_HANDLED;
}
@@ -1555,32 +1554,8 @@ static int blkif_recover(struct blkfront_info *info)

kfree(copy);

- /*
- * Empty the queue, this is important because we might have
- * requests in the queue with more segments than what we
- * can handle now.
- */
- spin_lock_irq(&info->io_lock);
- while ((req = blk_fetch_request(info->rq)) != NULL) {
- if (req->cmd_flags &
- (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
- list_add(&req->queuelist, &requests);
- continue;
- }
- merge_bio.head = req->bio;
- merge_bio.tail = req->biotail;
- bio_list_merge(&bio_list, &merge_bio);
- req->bio = NULL;
- if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
- pr_alert("diskcache flush request found!\n");
- __blk_end_request_all(req, 0);
- }
- spin_unlock_irq(&info->io_lock);
-
xenbus_switch_state(info->xbdev, XenbusStateConnected);

- spin_lock_irq(&info->io_lock);
-
/* Now safe for us to use the shared ring */
info->connected = BLKIF_STATE_CONNECTED;

@@ -1591,9 +1566,9 @@ static int blkif_recover(struct blkfront_info *info)
/* Requeue pending requests (flush or discard) */
list_del_init(&req->queuelist);
BUG_ON(req->nr_phys_segments > segs);
- blk_requeue_request(info->rq, req);
+ blk_mq_requeue_request(req);
}
- spin_unlock_irq(&info->io_lock);
+ blk_mq_kick_requeue_list(info->rq);

while ((bio = bio_list_pop(&bio_list)) != NULL) {
/* Traverse the list of pending bios and re-queue them */
@@ -1923,10 +1898,8 @@ static void blkfront_connect(struct blkfront_info *info)
xenbus_switch_state(info->xbdev, XenbusStateConnected);

/* Kick pending requests. */
- spin_lock_irq(&info->io_lock);
info->connected = BLKIF_STATE_CONNECTED;
kick_pending_request_queues(info);
- spin_unlock_irq(&info->io_lock);

add_disk(info->gd);

--
1.7.10.4

2015-07-10 19:57:53

by Konrad Rzeszutek Wilk

[permalink] [raw]

Subject: Re: [RESEND PATCH] xen/blkfront: convert to blk-mq APIs

On Mon, Jul 06, 2015 at 05:56:48PM +0800, Bob Liu wrote:
> From: Arianna Avanzini <[email protected]>
>
> This patch converts xen-blkfront driver to use the block multiqueue APIs.
> Only one hardware queue is used now, so there is no performance change.
>
> The legacy non-mq code was deleted completely which is the same as other drivers
> like virtio, mtip, and nvme.
>
> Also dropped unnecessary holding of info->io_lock when calling into blk-mq APIs.

Yeey!

Two points:

- The io_lock is now used to guard against concurrent access to the ring.
We should rename it to 'ring_lock'.

- The kick_pending_request_queues should have an extra argument - 'bool locked'.
This is so that you don't drop and immediately grab the lock from the blkif_interrupt.

See:

> @@ -1243,9 +1243,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
> } else
> info->ring.sring->rsp_event = i + 1;
>
> - kick_pending_request_queues(info);
> -
> spin_unlock_irqrestore(&info->io_lock, flags);
> + kick_pending_request_queues(info);
>
> return IRQ_HANDLED;
> }

Otherwise Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>

2015-07-11 08:19:30

by Bob Liu

[permalink] [raw]

Subject: Re: [RESEND PATCH] xen/blkfront: convert to blk-mq APIs

On 07/11/2015 03:57 AM, Konrad Rzeszutek Wilk wrote:
> On Mon, Jul 06, 2015 at 05:56:48PM +0800, Bob Liu wrote:
>> From: Arianna Avanzini <[email protected]>
>>
>> This patch converts xen-blkfront driver to use the block multiqueue APIs.
>> Only one hardware queue is used now, so there is no performance change.
>>
>> The legacy non-mq code was deleted completely which is the same as other drivers
>> like virtio, mtip, and nvme.
>>
>> Also dropped unnecessary holding of info->io_lock when calling into blk-mq APIs.
>
> Yeey!
>
> Two points:
>
> - The io_lock is now used to guard against concurrent access to the ring.
> We should rename it to 'ring_lock'.
>

Sure.

> - The kick_pending_request_queues should have an extra argument - 'bool locked'.
> This is so that you don't drop and immediately grab the lock from the blkif_interrupt.
>

Then where to drop the lock?

In kick_pending_request_queues(), the lock have to be dropped before calling blk_mq_start_stopped_hw_queues().

static void kick_pending_request_queues(struct blkfront_info *info)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->io_lock, flags);
if (!RING_FULL(&info->ring)) {
- /* Re-enable calldowns. */
- blk_start_queue(info->rq);
- /* Kick things off immediately. */
- do_blkif_request(info->rq);
+ spin_unlock_irqrestore(&info->io_lock, flags);
+ blk_mq_start_stopped_hw_queues(info->rq, true);
+ return;
}

> See:
>
>> @@ -1243,9 +1243,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>> } else
>> info->ring.sring->rsp_event = i + 1;
>>
>> - kick_pending_request_queues(info);
>> -
>> spin_unlock_irqrestore(&info->io_lock, flags);
>> + kick_pending_request_queues(info);
>>
>> return IRQ_HANDLED;
>> }
>
> Otherwise Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
>

--
Regards,
-Bob

2015-07-11 11:17:56

by Konrad Rzeszutek Wilk

[permalink] [raw]

Subject: Re: [RESEND PATCH] xen/blkfront: convert to blk-mq APIs

On July 11, 2015 4:18:42 AM EDT, Bob Liu <[email protected]> wrote:
>
>On 07/11/2015 03:57 AM, Konrad Rzeszutek Wilk wrote:
>> On Mon, Jul 06, 2015 at 05:56:48PM +0800, Bob Liu wrote:
>>> From: Arianna Avanzini <[email protected]>
>>>
>>> This patch converts xen-blkfront driver to use the block multiqueue
>APIs.
>>> Only one hardware queue is used now, so there is no performance
>change.
>>>
>>> The legacy non-mq code was deleted completely which is the same as
>other drivers
>>> like virtio, mtip, and nvme.
>>>
>>> Also dropped unnecessary holding of info->io_lock when calling into
>blk-mq APIs.
>>
>> Yeey!
>>
>> Two points:
>>
>> - The io_lock is now used to guard against concurrent access to the
>ring.
>> We should rename it to 'ring_lock'.
>>
>
>Sure.
>
>> - The kick_pending_request_queues should have an extra argument -
>'bool locked'.
>> This is so that you don't drop and immediately grab the lock from
>the blkif_interrupt.
>>
>
>Then where to drop the lock?

The 'locked' parameter can be used to tell the function to not take the lock.

But it would drop the lock in both cases.
>
>In kick_pending_request_queues(), the lock have to be dropped before
>calling blk_mq_start_stopped_hw_queues().
>
> static void kick_pending_request_queues(struct blkfront_info *info)
> {
>+ unsigned long flags;
>+
>+ spin_lock_irqsave(&info->io_lock, flags);
> if (!RING_FULL(&info->ring)) {
>- /* Re-enable calldowns. */
>- blk_start_queue(info->rq);
>- /* Kick things off immediately. */
>- do_blkif_request(info->rq);
>+ spin_unlock_irqrestore(&info->io_lock, flags);
>+ blk_mq_start_stopped_hw_queues(info->rq, true);
>+ return;
> }
>
>> See:
>>
>>> @@ -1243,9 +1243,8 @@ static irqreturn_t blkif_interrupt(int irq,
>void *dev_id)
>>> } else
>>> info->ring.sring->rsp_event = i + 1;
>>>
>>> - kick_pending_request_queues(info);
>>> -
>>> spin_unlock_irqrestore(&info->io_lock, flags);
>>> + kick_pending_request_queues(info);
>>>
>>> return IRQ_HANDLED;
>>> }
>>
>> Otherwise Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
>>

2015-07-11 11:23:41

by Christoph Hellwig

[permalink] [raw]

Subject: Re: [RESEND PATCH] xen/blkfront: convert to blk-mq APIs

On Sat, Jul 11, 2015 at 07:17:18AM -0400, Konrad Rzeszutek Wilk wrote:
> The 'locked' parameter can be used to tell the function to not take the lock.
>
> But it would drop the lock in both cases.

Konrad,

no conditional locking please. Split the functionality up in multiple
functions if you may call with or without a lock held.

2015-07-11 11:55:05

by Konrad Rzeszutek Wilk

[permalink] [raw]

Subject: Re: [RESEND PATCH] xen/blkfront: convert to blk-mq APIs

On July 11, 2015 7:23:38 AM EDT, Christoph Hellwig <[email protected]> wrote:
>On Sat, Jul 11, 2015 at 07:17:18AM -0400, Konrad Rzeszutek Wilk wrote:
>> The 'locked' parameter can be used to tell the function to not take
>the lock.
>>
>> But it would drop the lock in both cases.
>
>Konrad,
>
>no conditional locking please. Split the functionality up in multiple
>functions if you may call with or without a lock held.

That would work nicely too. Thanks!