Hi Jens,
This series aimed to avoid effects of a weird queue depth,
i.e when less than reserved_tags requested or insufficient
number of requests were allocated.
In case number of normal (not reserved) tags is requested
less than 4 (current minimal cache size limit) there are two
options to resolve: adjust cache size to the depth or deny
queue depths less than the minimal cache size. I favored the
latter, but will repost if you prefer otherwise.
Alexander Gordeev (3):
blk-mq: Sanity check reserved tags
blk-mq: Check queue depth is valid
blk-mq: Do not allocate more cache entries than used
block/blk-mq-tag.c | 31 ++++++++++++++++++-------------
block/blk-mq-tag.h | 6 ++++++
block/blk-mq.c | 12 +++++++-----
3 files changed, 31 insertions(+), 18 deletions(-)
--
1.7.7.6
--
Regards,
Alexander Gordeev
[email protected]
Signed-off-by: Alexander Gordeev <[email protected]>
---
block/blk-mq-tag.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index dcbc2a4..6718007 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -335,6 +335,8 @@ static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
{
unsigned long flags;
+ BUG_ON(tag >= tags->reserved_tags);
+
spin_lock_irqsave(&tags->lock, flags);
tags->reservelist[tags->nr_reserved] = tag;
tags->nr_reserved++;
--
1.7.7.6
--
Regards,
Alexander Gordeev
[email protected]
Ensure that depth of a queue is enough to hold at least
reserved tags plus a minimal amount of normal tags. The
minimal amount is chosen to be consistent with minimal
length of tags cache.
Signed-off-by: Alexander Gordeev <[email protected]>
---
block/blk-mq-tag.c | 8 ++++----
block/blk-mq-tag.h | 6 ++++++
block/blk-mq.c | 12 +++++++-----
3 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 6718007..fe4acb1 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -447,10 +447,10 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
tags->nr_tags = nr_tags;
tags->reserved_tags = reserved_tags;
tags->max_cache = nr_tags / num_possible_cpus();
- if (tags->max_cache < 4)
- tags->max_cache = 4;
- else if (tags->max_cache > 64)
- tags->max_cache = 64;
+ if (tags->max_cache < BLK_MQ_TAG_CACHE_MIN)
+ tags->max_cache = BLK_MQ_TAG_CACHE_MIN;
+ else if (tags->max_cache > BLK_MQ_TAG_CACHE_MAX)
+ tags->max_cache = BLK_MQ_TAG_CACHE_MAX;
tags->batch_move = tags->max_cache / 2;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index ce4d5b2..716ea79 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -14,7 +14,13 @@ extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
enum {
+ BLK_MQ_TAG_CACHE_MIN = 4,
+ BLK_MQ_TAG_CACHE_MAX = 64,
+};
+
+enum {
BLK_MQ_TAG_FAIL = -1U,
+ BLK_MQ_TAG_MIN = BLK_MQ_TAG_CACHE_MIN,
BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1,
};
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6fc1df3..a8b4c79 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1151,8 +1151,8 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
}
}
- if (!i)
- return -ENOMEM;
+ if (i < (reserved_tags + BLK_MQ_TAG_MIN))
+ goto err_rq_map;
else if (i != hctx->queue_depth) {
hctx->queue_depth = i;
pr_warn("%s: queue depth set to %u because of low memory\n",
@@ -1161,6 +1161,7 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
hctx->tags = blk_mq_init_tags(hctx->queue_depth, reserved_tags, node);
if (!hctx->tags) {
+err_rq_map:
blk_mq_free_rq_map(hctx);
return -ENOMEM;
}
@@ -1305,9 +1306,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
struct request_queue *q;
int i;
- if (!reg->nr_hw_queues || !reg->ops->queue_rq ||
- !reg->ops->map_queue || !reg->ops->alloc_hctx ||
- !reg->ops->free_hctx)
+ if (!reg->nr_hw_queues ||
+ !reg->ops->queue_rq || !reg->ops->map_queue ||
+ !reg->ops->alloc_hctx || !reg->ops->free_hctx ||
+ (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)))
return ERR_PTR(-EINVAL);
if (!reg->queue_depth)
--
1.7.7.6
--
Regards,
Alexander Gordeev
[email protected]
Signed-off-by: Alexander Gordeev <[email protected]>
---
block/blk-mq-tag.c | 29 ++++++++++++++++-------------
1 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index fe4acb1..e74e18e 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -411,13 +411,14 @@ static void blk_mq_tag_notify(void *data, unsigned long action,
}
}
-struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
+struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
unsigned int reserved_tags, int node)
{
+ unsigned int nr_tags, nr_cache;
struct blk_mq_tags *tags;
size_t map_size;
- if (nr_tags > BLK_MQ_TAG_MAX) {
+ if (total_tags > BLK_MQ_TAG_MAX) {
pr_err("blk-mq: tag depth too large\n");
return NULL;
}
@@ -426,7 +427,15 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
if (!tags)
return NULL;
- map_size = sizeof(struct blk_mq_tag_map) + nr_tags * sizeof(int);
+ nr_tags = total_tags - reserved_tags;
+ nr_cache = nr_tags / num_possible_cpus();
+
+ if (nr_cache < BLK_MQ_TAG_CACHE_MIN)
+ nr_cache = BLK_MQ_TAG_CACHE_MIN;
+ else if (nr_cache > BLK_MQ_TAG_CACHE_MAX)
+ nr_cache = BLK_MQ_TAG_CACHE_MAX;
+
+ map_size = sizeof(struct blk_mq_tag_map) + nr_cache * sizeof(int);
tags->free_maps = __alloc_percpu(map_size, sizeof(void *));
if (!tags->free_maps)
goto err_free_maps;
@@ -444,15 +453,10 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
spin_lock_init(&tags->lock);
INIT_LIST_HEAD(&tags->wait);
- tags->nr_tags = nr_tags;
+ tags->nr_tags = total_tags;
tags->reserved_tags = reserved_tags;
- tags->max_cache = nr_tags / num_possible_cpus();
- if (tags->max_cache < BLK_MQ_TAG_CACHE_MIN)
- tags->max_cache = BLK_MQ_TAG_CACHE_MIN;
- else if (tags->max_cache > BLK_MQ_TAG_CACHE_MAX)
- tags->max_cache = BLK_MQ_TAG_CACHE_MAX;
-
- tags->batch_move = tags->max_cache / 2;
+ tags->max_cache = nr_cache;
+ tags->batch_move = nr_cache / 2;
/*
* Reserved tags are first
@@ -470,10 +474,9 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
* Rest of the tags start at the queue list
*/
tags->nr_free = 0;
- while (nr_tags - tags->nr_reserved) {
+ while (nr_tags--) {
tags->freelist[tags->nr_free] = tags->nr_free +
tags->nr_reserved;
- nr_tags--;
tags->nr_free++;
}
--
1.7.7.6
--
Regards,
Alexander Gordeev
[email protected]
Zero value of blk_mq_reg::queue_depth defaults to
BLK_MQ_MAX_DEPTH. Commit 1ffd49b ("blk-mq: Check queue
depth is valid") broke this default. This fix restores
the previous behaviour.
Signed-off-by: Alexander Gordeev <[email protected]>
---
block/blk-mq.c | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a8b4c79..6c2533a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1308,8 +1308,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
if (!reg->nr_hw_queues ||
!reg->ops->queue_rq || !reg->ops->map_queue ||
- !reg->ops->alloc_hctx || !reg->ops->free_hctx ||
- (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)))
+ !reg->ops->alloc_hctx || !reg->ops->free_hctx)
return ERR_PTR(-EINVAL);
if (!reg->queue_depth)
@@ -1319,6 +1318,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
reg->queue_depth = BLK_MQ_MAX_DEPTH;
}
+ if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
+ return ERR_PTR(-EINVAL);
+
ctx = alloc_percpu(struct blk_mq_ctx);
if (!ctx)
return ERR_PTR(-ENOMEM);
--
1.7.7.6
--
Regards,
Alexander Gordeev
[email protected]
There is no reason to limit minimum queue depth.
Indeed, some ATA devices ask for the depth of 1.
Signed-off-by: Alexander Gordeev <[email protected]>
---
block/blk-mq-tag.c | 2 +-
block/blk-mq-tag.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e74e18e..a115862 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -456,7 +456,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
tags->nr_tags = total_tags;
tags->reserved_tags = reserved_tags;
tags->max_cache = nr_cache;
- tags->batch_move = nr_cache / 2;
+ tags->batch_move = max(1u, nr_cache / 2);
/*
* Reserved tags are first
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 716ea79..947ba2c 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -14,7 +14,7 @@ extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
enum {
- BLK_MQ_TAG_CACHE_MIN = 4,
+ BLK_MQ_TAG_CACHE_MIN = 1,
BLK_MQ_TAG_CACHE_MAX = 64,
};
--
1.7.7.6
--
Regards,
Alexander Gordeev
[email protected]
Hi Jens,
Could you consider patches 4 and 5, please?
Thanks!
--
Regards,
Alexander Gordeev
[email protected]
On 09/12/2013 01:42 AM, Alexander Gordeev wrote:
> Hi Jens,
>
> Could you consider patches 4 and 5, please?
Added. Really should be re-folded since the previous patches were
broken, but I'll do that some time anyway for parts of the series again.
--
Jens Axboe