Hi jens,
We already know that batching requests can increase the storage
performance of some devices, but after testing, I found that it
will reduce the performance of some low and medium speed devices
when using the scheduler (especially mq-deadline), such as
rotational disks connected to a raid.
In order to balance the performance difference between different
devices, a more sophisticated mechanism may be needed to control
the number of batching (such as sometimes 1 is better),
but obviously this is not easy.
At the same time I noticed that when the mq-deadline scheduler select
a request for a higher sector outside the batch, deadline_fifo_request
is used to select the request within the fifo and restart the batch.
Selecting a request from a fifo tends to satisfy the request's timeline,
but it is not always very urgent.
So in the case of no expire, can the selection strategy of the first
request in the batch be changed? Because in devices with rotation
characteristics, proper sector access order is beneficial to performance.
This patch series includes the following two parts:
- Added a temporary sys interface nr_sched_batch to control the number
of batching requests.
- Added a deadline_head_request function to select the request from the
first of the red-black tree instead of the fifo when appropriate.
Thanks,
Wang.
Wang You (2):
block: Introduce nr_sched_batch sys interface
block/mq-deadline: Prioritize first request
block/blk-mq-sched.c | 4 +++-
block/blk-sysfs.c | 34 ++++++++++++++++++++++++++++++++++
block/mq-deadline.c | 42 +++++++++++++++++++++++++++++++++++++++---
include/linux/blkdev.h | 1 +
4 files changed, 77 insertions(+), 4 deletions(-)
--
2.27.0
The function of this patch is to add an nr_sched_batch interface under
/sys/block/sdx/queue/, which can be used to set the number of batching
requests. Of course, the default value is nr_requests and will follow
nr_request when it has not been changed.
Signed-off-by: Wang You <[email protected]>
---
block/blk-mq-sched.c | 4 +++-
block/blk-sysfs.c | 34 ++++++++++++++++++++++++++++++++++
include/linux/blkdev.h | 1 +
3 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index a4f7c101b53b..92798a0c03bd 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -100,7 +100,7 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
if (hctx->dispatch_busy)
max_dispatch = 1;
else
- max_dispatch = hctx->queue->nr_requests;
+ max_dispatch = q->nr_sched_batch;
do {
struct request *rq;
@@ -567,6 +567,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
q->elevator = NULL;
q->nr_requests = q->tag_set->queue_depth;
+ q->nr_sched_batch = q->nr_requests;
return 0;
}
@@ -577,6 +578,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
*/
q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
BLKDEV_DEFAULT_RQ);
+ q->nr_sched_batch = q->nr_requests;
if (blk_mq_is_shared_tags(flags)) {
ret = blk_mq_init_sched_shared_tags(q);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9b905e9443e4..8f299a3cf66c 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -70,6 +70,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
{
unsigned long nr;
int ret, err;
+ unsigned long prev_nr_request = q->nr_requests;
if (!queue_is_mq(q))
return -EINVAL;
@@ -85,6 +86,37 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
if (err)
return err;
+ if (q->nr_sched_batch == prev_nr_request ||
+ q->nr_sched_batch > nr)
+ q->nr_sched_batch = nr;
+
+ return ret;
+}
+
+static ssize_t
+elv_nr_batch_show(struct request_queue *q, char *page)
+{
+ return queue_var_show(q->nr_sched_batch, page);
+}
+
+static ssize_t
+elv_nr_batch_store(struct request_queue *q, const char *page, size_t count)
+{
+ unsigned long nr;
+ int ret;
+
+ if (!queue_is_mq(q))
+ return -EINVAL;
+
+ ret = queue_var_store(&nr, page, count);
+ if (ret < 0)
+ return ret;
+
+ if (nr > q->nr_requests || nr < 1)
+ return -EINVAL;
+
+ q->nr_sched_batch = nr;
+
return ret;
}
@@ -573,6 +605,7 @@ QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
QUEUE_RO_ENTRY(queue_max_segment_size, "max_segment_size");
QUEUE_RW_ENTRY(elv_iosched, "scheduler");
+QUEUE_RW_ENTRY(elv_nr_batch, "nr_sched_batch");
QUEUE_RO_ENTRY(queue_logical_block_size, "logical_block_size");
QUEUE_RO_ENTRY(queue_physical_block_size, "physical_block_size");
@@ -632,6 +665,7 @@ static struct attribute *queue_attrs[] = {
&queue_max_integrity_segments_entry.attr,
&queue_max_segment_size_entry.attr,
&elv_iosched_entry.attr,
+ &elv_nr_batch_entry.attr,
&queue_hw_sector_size_entry.attr,
&queue_logical_block_size_entry.attr,
&queue_physical_block_size_entry.attr,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2f7b43444c5f..13b050c0756b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -422,6 +422,7 @@ struct request_queue {
* queue settings
*/
unsigned long nr_requests; /* Max # of requests */
+ unsigned long nr_sched_batch;
unsigned int dma_pad_mask;
unsigned int dma_alignment;
--
2.27.0
On 7/20/22 02:30, Wang You wrote:
>
The code in these patches has not been formatted according to the kernel
coding style guidelines. Please try to run git clang-format HEAD^ on
both patches and review the changes made by clang-format.
Thanks,
Bart.
On 7/20/22 02:30, Wang You wrote:
> The function of this patch is to add an nr_sched_batch interface under
> /sys/block/sdx/queue/, which can be used to set the number of batching
> requests. Of course, the default value is nr_requests and will follow
> nr_request when it has not been changed.
How can reducing the number of batched requests increase performance?
Please provide performance numbers.
Thanks,
Bart.
> How can reducing the number of batched requests increase performance?
> Please provide performance numbers.
The test data of the original patch is in [PATCH 2/2], including the case
of nr_sched_batch = 1, and then I will organize the data of different
hardware in PATCH v2.
Thanks,
Wang.
> The code in these patches has not been formatted according to the kernel
> coding style guidelines. Please try to run git clang-format HEAD^ on
> both patches and review the changes made by clang-format.
> Thanks,
> Bart.
I apologize for the code formatting issues, I used git-clang-format
to reformat both patches automatically, and appreciate your comments.
PATCH v2 will be sent later.
Thanks,
Wang.