2019-01-24 10:28:36

by jianchao.wang

[permalink] [raw]
Subject: [PATCH 0/2] small optimization for accessing queue map

Hi Jens

These two patches are small optimization for accessing the queue mapping
in hot path. It saves the queue mapping results into blk_mq_ctx directly,
then we needn't do the complicated bounce on queue_hw_ctx[] map[] and
mq_map[].

Jianchao Wang (2)
blk-mq: save queue mapping result into ctx directly
blk-mq: save default hctx into ctx->hctxs

block/blk-mq-sched.c | 2 +-
block/blk-mq-tag.c | 2 +-
block/blk-mq.c | 13 ++++++++++---
block/blk-mq.h | 20 +++++++++-----------
block/blk.h | 2 +-
5 files changed, 22 insertions(+), 17 deletions(-)

Thanks
Jianchao


2019-01-24 10:28:44

by jianchao.wang

[permalink] [raw]
Subject: [PATCH 2/2] blk-mq: save default hctx into ctx->hctxs for not-supported type

Currently, we check whether the hctx type is supported every time
in hot path. Actually, this is not necessary, we could save the
default hctx into ctx->hctxs if the type is not supported when
map swqueues and use it directly with ctx->hctxs[type].

We also needn't check whether the poll is enabled or not, because
the caller would clear the REQ_HIPRI in that case.

Signed-off-by: Jianchao Wang <[email protected]>
---
block/blk-mq.c | 9 ++++++++-
block/blk-mq.h | 13 +++++--------
2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 445d0a2..8a825ae 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2431,8 +2431,11 @@ static void blk_mq_map_swqueue(struct request_queue *q)

ctx = per_cpu_ptr(q->queue_ctx, i);
for (j = 0; j < set->nr_maps; j++) {
- if (!set->map[j].nr_queues)
+ if (!set->map[j].nr_queues) {
+ ctx->hctxs[j] = blk_mq_map_queue_type(q,
+ HCTX_TYPE_DEFAULT, i);
continue;
+ }

hctx = blk_mq_map_queue_type(q, j, i);
ctx->hctxs[j] = hctx;
@@ -2455,6 +2458,10 @@ static void blk_mq_map_swqueue(struct request_queue *q)
*/
BUG_ON(!hctx->nr_ctx);
}
+
+ for (; j < HCTX_MAX_TYPES; j++)
+ ctx->hctxs[j] = blk_mq_map_queue_type(q,
+ HCTX_TYPE_DEFAULT, i);
}

mutex_unlock(&q->sysfs_lock);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 9fb0626..14b7efb9 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -106,15 +106,12 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
{
enum hctx_type type = HCTX_TYPE_DEFAULT;

- if ((flags & REQ_HIPRI) &&
- q->tag_set->nr_maps > HCTX_TYPE_POLL &&
- q->tag_set->map[HCTX_TYPE_POLL].nr_queues &&
- test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+ /*
+ * The caller ensure that if REQ_HIPRI, poll must be enabled.
+ */
+ if (flags & REQ_HIPRI)
type = HCTX_TYPE_POLL;
-
- else if (((flags & REQ_OP_MASK) == REQ_OP_READ) &&
- q->tag_set->nr_maps > HCTX_TYPE_READ &&
- q->tag_set->map[HCTX_TYPE_READ].nr_queues)
+ else if ((flags & REQ_OP_MASK) == REQ_OP_READ)
type = HCTX_TYPE_READ;

return ctx->hctxs[type];
--
2.7.4


2019-01-24 10:30:08

by jianchao.wang

[permalink] [raw]
Subject: [PATCH 1/2] blk-mq: save queue mapping result into ctx directly

Currelty, the queue mapping result is saved in a two-dimensional
array. In hot path, to get a hctx, we need do following,

q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]

This looks not efficient. Actually, we could save the queue mapping
result into ctx directly with different hctx type, like,

ctx->hctxs[type]

Signed-off-by: Jianchao Wang <[email protected]>
---
block/blk-mq-sched.c | 2 +-
block/blk-mq-tag.c | 2 +-
block/blk-mq.c | 4 ++--
block/blk-mq.h | 7 ++++---
block/blk.h | 2 +-
5 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 140933e..4090553 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -321,7 +321,7 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx->cpu);
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
bool ret = false;
enum hctx_type type;

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 2089c6c..a4931fc 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -170,7 +170,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)

data->ctx = blk_mq_get_ctx(data->q);
data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
- data->ctx->cpu);
+ data->ctx);
tags = blk_mq_tags_from_data(data);
if (data->flags & BLK_MQ_REQ_RESERVED)
bt = &tags->breserved_tags;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8f5b533..445d0a2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -364,7 +364,7 @@ static struct request *blk_mq_get_request(struct request_queue *q,
}
if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->cmd_flags,
- data->ctx->cpu);
+ data->ctx);
if (data->cmd_flags & REQ_NOWAIT)
data->flags |= BLK_MQ_REQ_NOWAIT;

@@ -2435,7 +2435,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
continue;

hctx = blk_mq_map_queue_type(q, j, i);
-
+ ctx->hctxs[j] = hctx;
/*
* If the CPU is already set in the mask, then we've
* mapped this one already. This can happen if
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d943d46..9fb0626 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -23,6 +23,7 @@ struct blk_mq_ctx {

unsigned int cpu;
unsigned short index_hw[HCTX_MAX_TYPES];
+ struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES];

/* incremented at dispatch time */
unsigned long rq_dispatched[2];
@@ -97,11 +98,11 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *
* blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
* @q: request queue
* @flags: request command flags
- * @cpu: CPU
+ * @cpu: cpu ctx
*/
static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
unsigned int flags,
- unsigned int cpu)
+ struct blk_mq_ctx *ctx)
{
enum hctx_type type = HCTX_TYPE_DEFAULT;

@@ -116,7 +117,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
q->tag_set->map[HCTX_TYPE_READ].nr_queues)
type = HCTX_TYPE_READ;

- return blk_mq_map_queue_type(q, type, cpu);
+ return ctx->hctxs[type];
}

/*
diff --git a/block/blk.h b/block/blk.h
index 848278c..5d636ee 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -38,7 +38,7 @@ extern struct ida blk_queue_ida;
static inline struct blk_flush_queue *
blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
{
- return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx->cpu)->fq;
+ return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
}

static inline void __blk_get_queue(struct request_queue *q)
--
2.7.4


2019-01-24 18:10:23

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 0/2] small optimization for accessing queue map

On 1/24/19 3:25 AM, Jianchao Wang wrote:
> Hi Jens
>
> These two patches are small optimization for accessing the queue mapping
> in hot path. It saves the queue mapping results into blk_mq_ctx directly,
> then we needn't do the complicated bounce on queue_hw_ctx[] map[] and
> mq_map[].

I like this a lot, the current double indirect does suck, and it does show
up in profiles as well. I'll run some testing with this, thanks!

--
Jens Axboe


2019-02-01 15:45:11

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 0/2] small optimization for accessing queue map

On 1/24/19 3:25 AM, Jianchao Wang wrote:
> Hi Jens
>
> These two patches are small optimization for accessing the queue mapping
> in hot path. It saves the queue mapping results into blk_mq_ctx directly,
> then we needn't do the complicated bounce on queue_hw_ctx[] map[] and
> mq_map[].

Doing some targeted testing, the cycles wasted on the double indirect
are reclaimed with this. I've applied both for 5.1, thanks.

--
Jens Axboe