changes in v3:
- instead of check elevator name, add a flag in elevator_queue, as
suggested by Christoph.
- add patch 3 and patch 5 to this patchset.
changes in v2:
- define new api if wbt config is not enabled in patch 1.
Yu Kuai (5):
wbt: don't show valid wbt_lat_usec in sysfs while wbt is disabled
elevator: add new field flags in struct elevator_queue
block, bfq: don't disable wbt if CONFIG_BFQ_GROUP_IOSCHED is disabled
blk-wbt: don't enable throttling if default elevator is bfq
elevator: remove redundant code in elv_unregister_queue()
block/bfq-iosched.c | 6 +++++-
block/blk-sysfs.c | 9 ++++++++-
block/blk-wbt.c | 15 ++++++++++++++-
block/blk-wbt.h | 5 +++++
block/elevator.c | 8 ++------
block/elevator.h | 5 ++++-
6 files changed, 38 insertions(+), 10 deletions(-)
--
2.31.1
Currently, if wbt is initialized and then disabled by
wbt_disable_default(), sysfs will still show valid wbt_lat_usec, which
will confuse users that wbt is still enabled.
This patch shows wbt_lat_usec as zero and forbid to set it while wbt
is disabled.
Signed-off-by: Yu Kuai <[email protected]>
Reported-and-tested-by: Holger Hoffstätte <[email protected]>
---
block/blk-sysfs.c | 9 ++++++++-
block/blk-wbt.c | 7 +++++++
block/blk-wbt.h | 5 +++++
3 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e1f009aba6fd..1955bb6a284d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -467,10 +467,14 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
{
+ u64 lat;
+
if (!wbt_rq_qos(q))
return -EINVAL;
- return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
+ lat = wbt_disabled(q) ? 0 : div_u64(wbt_get_min_lat(q), 1000);
+
+ return sprintf(page, "%llu\n", lat);
}
static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
@@ -493,6 +497,9 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
return ret;
}
+ if (wbt_disabled(q))
+ return -EINVAL;
+
if (val == -1)
val = wbt_default_latency_nsec(q);
else if (val >= 0)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index a9982000b667..68851c2c02d2 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -422,6 +422,13 @@ static void wbt_update_limits(struct rq_wb *rwb)
rwb_wake_all(rwb);
}
+bool wbt_disabled(struct request_queue *q)
+{
+ struct rq_qos *rqos = wbt_rq_qos(q);
+
+ return !rqos || RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT;
+}
+
u64 wbt_get_min_lat(struct request_queue *q)
{
struct rq_qos *rqos = wbt_rq_qos(q);
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index 7e44eccc676d..e42465ddcbb6 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -94,6 +94,7 @@ void wbt_enable_default(struct request_queue *);
u64 wbt_get_min_lat(struct request_queue *q);
void wbt_set_min_lat(struct request_queue *q, u64 val);
+bool wbt_disabled(struct request_queue *);
void wbt_set_write_cache(struct request_queue *, bool);
@@ -125,6 +126,10 @@ static inline u64 wbt_default_latency_nsec(struct request_queue *q)
{
return 0;
}
+static inline bool wbt_disabled(struct request_queue *q)
+{
+ return true;
+}
#endif /* CONFIG_BLK_WBT */
--
2.31.1
wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
Signed-off-by: Yu Kuai <[email protected]>
---
block/bfq-iosched.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 7ea427817f7f..fec52968fe07 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7037,6 +7037,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
#ifdef CONFIG_BFQ_GROUP_IOSCHED
blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq);
+ wbt_enable_default(bfqd->queue);
#else
spin_lock_irq(&bfqd->lock);
bfq_put_async_queues(bfqd, bfqd->root_group);
@@ -7045,7 +7046,6 @@ static void bfq_exit_queue(struct elevator_queue *e)
#endif
blk_stat_disable_accounting(bfqd->queue);
- wbt_enable_default(bfqd->queue);
kfree(bfqd);
}
@@ -7190,7 +7190,9 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
/* We dispatch from request queue wide instead of hw queue */
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
wbt_disable_default(q);
+#endif
blk_stat_enable_accounting(q);
return 0;
--
2.31.1
Commit b5dc5d4d1f4f ("block,bfq: Disable writeback throttling") tries to
disable wbt for bfq, it's done by calling wbt_disable_default() in
bfq_init_queue(). However, wbt is still enabled if default elevator is
bfq:
device_add_disk
elevator_init_mq
bfq_init_queue
wbt_disable_default -> done nothing
blk_register_queue
wbt_enable_default -> wbt is enabled
Fix the problem by adding a new flag ELEVATOR_FLAG_DISBALE_WBT, bfq
will set the flag in bfq_init_queue, and following wbt_enable_default()
won't enable wbt while the flag is set.
Signed-off-by: Yu Kuai <[email protected]>
---
block/bfq-iosched.c | 2 ++
block/blk-wbt.c | 8 +++++++-
block/elevator.h | 3 ++-
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index fec52968fe07..f67ed271baa9 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7037,6 +7037,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
#ifdef CONFIG_BFQ_GROUP_IOSCHED
blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq);
+ clear_bit(ELEVATOR_FLAG_DISABLE_WBT, &e->flags);
wbt_enable_default(bfqd->queue);
#else
spin_lock_irq(&bfqd->lock);
@@ -7191,6 +7192,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ set_bit(ELEVATOR_FLAG_DISABLE_WBT, &eq->flags);
wbt_disable_default(q);
#endif
blk_stat_enable_accounting(q);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 68851c2c02d2..279f8dc1e952 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -27,6 +27,7 @@
#include "blk-wbt.h"
#include "blk-rq-qos.h"
+#include "elevator.h"
#define CREATE_TRACE_POINTS
#include <trace/events/wbt.h>
@@ -645,9 +646,14 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
*/
void wbt_enable_default(struct request_queue *q)
{
- struct rq_qos *rqos = wbt_rq_qos(q);
+ struct rq_qos *rqos;
+
+ if (q->elevator &&
+ test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags))
+ return;
/* Throttling already enabled? */
+ rqos = wbt_rq_qos(q);
if (rqos) {
if (RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
diff --git a/block/elevator.h b/block/elevator.h
index ed574bf3e629..75382471222d 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -104,7 +104,8 @@ struct elevator_queue
DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
};
-#define ELEVATOR_FLAG_REGISTERED 0
+#define ELEVATOR_FLAG_REGISTERED 0
+#define ELEVATOR_FLAG_DISABLE_WBT 1
/*
* block elevator interface
--
2.31.1
There are only one flag to indicate that elevator is registered currently,
prepare to add a flag to disable wbt if default elevator is bfq.
Signed-off-by: Yu Kuai <[email protected]>
---
block/elevator.c | 6 ++----
block/elevator.h | 4 +++-
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/block/elevator.c b/block/elevator.c
index c319765892bb..7cb61820cfa0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -512,7 +512,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
if (uevent)
kobject_uevent(&e->kobj, KOBJ_ADD);
- e->registered = 1;
+ set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
}
return error;
}
@@ -523,13 +523,11 @@ void elv_unregister_queue(struct request_queue *q)
lockdep_assert_held(&q->sysfs_lock);
- if (e && e->registered) {
+ if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
struct elevator_queue *e = q->elevator;
kobject_uevent(&e->kobj, KOBJ_REMOVE);
kobject_del(&e->kobj);
-
- e->registered = 0;
}
}
diff --git a/block/elevator.h b/block/elevator.h
index 3f0593b3bf9d..ed574bf3e629 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -100,10 +100,12 @@ struct elevator_queue
void *elevator_data;
struct kobject kobj;
struct mutex sysfs_lock;
- unsigned int registered:1;
+ unsigned long flags;
DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
};
+#define ELEVATOR_FLAG_REGISTERED 0
+
/*
* block elevator interface
*/
--
2.31.1
"elevator_queue *e" is already declared and initialized in the beginning
of elv_unregister_queue().
Signed-off-by: Yu Kuai <[email protected]>
---
block/elevator.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/block/elevator.c b/block/elevator.c
index 7cb61820cfa0..0a72d6fbbdcc 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -524,8 +524,6 @@ void elv_unregister_queue(struct request_queue *q)
lockdep_assert_held(&q->sysfs_lock);
if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
- struct elevator_queue *e = q->elevator;
-
kobject_uevent(&e->kobj, KOBJ_REMOVE);
kobject_del(&e->kobj);
}
--
2.31.1
On Thu, Sep 22, 2022 at 07:35:58PM +0800, Yu Kuai wrote:
> "elevator_queue *e" is already declared and initialized in the beginning
> of elv_unregister_queue().
>
> Signed-off-by: Yu Kuai <[email protected]>
> ---
> block/elevator.c | 2 --
> 1 file changed, 2 deletions(-)
>
> diff --git a/block/elevator.c b/block/elevator.c
> index 7cb61820cfa0..0a72d6fbbdcc 100644
> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -524,8 +524,6 @@ void elv_unregister_queue(struct request_queue *q)
> lockdep_assert_held(&q->sysfs_lock);
>
> if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
> - struct elevator_queue *e = q->elevator;
> -
> kobject_uevent(&e->kobj, KOBJ_REMOVE);
> kobject_del(&e->kobj);
> }
> --
Reviewed-by: Eric Biggers <[email protected]>
- Eric
On Thu, Sep 22, 2022 at 07:35:56PM +0800, Yu Kuai wrote:
> wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
Umm, wouldn't this be something decided at runtime, that is not
if CONFIG_BFQ_GROUP_IOSCHED is enable/disable in the kernel build
if the hierarchical cgroup based scheduling is actually used for a
given device?
Looks good:
Reviewed-by: Christoph Hellwig <[email protected]>
But this really should go first in the series.
On Fri 23-09-22 17:50:49, Yu Kuai wrote:
> Hi, Christoph
>
> 在 2022/09/23 16:56, Christoph Hellwig 写道:
> > On Thu, Sep 22, 2022 at 07:35:56PM +0800, Yu Kuai wrote:
> > > wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
> >
> > Umm, wouldn't this be something decided at runtime, that is not
> > if CONFIG_BFQ_GROUP_IOSCHED is enable/disable in the kernel build
> > if the hierarchical cgroup based scheduling is actually used for a
> > given device?
> > .
> >
>
> That's a good point,
>
> Before this patch wbt is simply disabled if elevator is bfq.
>
> With this patch, if elevator is bfq while bfq doesn't throttle
> any IO yet, wbt still is disabled unnecessarily.
It is not really disabled unnecessarily. Have you actually tested the
performance of the combination? I did once and the results were just
horrible (which is I made BFQ just disable wbt by default). The problem is
that blk-wbt assumes certain model of underlying storage stack and hardware
behavior and BFQ just does not fit in that model. For example BFQ wants to
see as many requests as possible so that it can heavily reorder them,
estimate think times of applications, etc. On the other hand blk-wbt
assumes that if request latency gets higher, it means there is too much IO
going on and we need to allow less of "lower priority" IO types to be
submitted. These two go directly against one another and I was easily
observing blk-wbt spiraling down to allowing only very small number of
requests submitted while BFQ was idling waiting for more IO from the
process that was currently scheduled.
So I'm kind of wondering why you'd like to use blk-wbt and BFQ together...
Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR
Hi, Christoph
?? 2022/09/23 16:56, Christoph Hellwig д??:
> On Thu, Sep 22, 2022 at 07:35:56PM +0800, Yu Kuai wrote:
>> wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
>
> Umm, wouldn't this be something decided at runtime, that is not
> if CONFIG_BFQ_GROUP_IOSCHED is enable/disable in the kernel build
> if the hierarchical cgroup based scheduling is actually used for a
> given device?
> .
>
That's a good point,
Before this patch wbt is simply disabled if elevator is bfq.
With this patch, if elevator is bfq while bfq doesn't throttle
any IO yet, wbt still is disabled unnecessarily.
I have an idle to enable/disable wbt while tracking how many bfq_groups
are activated, which will rely on my another patchset, which is not
applied yet...
support concurrent sync io for bfq on a specail occasion.
I think currently this patch do make sense, perhaps I can do more work
after the above patchset finally applied?
Thanks,
Kuai
Hi Kuai!
On Fri 23-09-22 18:23:03, Yu Kuai wrote:
> 在 2022/09/23 18:06, Jan Kara 写道:
> > On Fri 23-09-22 17:50:49, Yu Kuai wrote:
> > > Hi, Christoph
> > >
> > > 在 2022/09/23 16:56, Christoph Hellwig 写道:
> > > > On Thu, Sep 22, 2022 at 07:35:56PM +0800, Yu Kuai wrote:
> > > > > wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
> > > >
> > > > Umm, wouldn't this be something decided at runtime, that is not
> > > > if CONFIG_BFQ_GROUP_IOSCHED is enable/disable in the kernel build
> > > > if the hierarchical cgroup based scheduling is actually used for a
> > > > given device?
> > > > .
> > > >
> > >
> > > That's a good point,
> > >
> > > Before this patch wbt is simply disabled if elevator is bfq.
> > >
> > > With this patch, if elevator is bfq while bfq doesn't throttle
> > > any IO yet, wbt still is disabled unnecessarily.
> >
> > It is not really disabled unnecessarily. Have you actually tested the
> > performance of the combination? I did once and the results were just
> > horrible (which is I made BFQ just disable wbt by default). The problem is
> > that blk-wbt assumes certain model of underlying storage stack and hardware
> > behavior and BFQ just does not fit in that model. For example BFQ wants to
> > see as many requests as possible so that it can heavily reorder them,
> > estimate think times of applications, etc. On the other hand blk-wbt
> > assumes that if request latency gets higher, it means there is too much IO
> > going on and we need to allow less of "lower priority" IO types to be
> > submitted. These two go directly against one another and I was easily
> > observing blk-wbt spiraling down to allowing only very small number of
> > requests submitted while BFQ was idling waiting for more IO from the
> > process that was currently scheduled.
> >
>
> Thanks for your explanation, I understand that bfq and wbt should not
> work together.
>
> However, I wonder if CONFIG_BFQ_GROUP_IOSCHED is disabled, or service
> guarantee is not needed, does the above phenomenon still exist? I find
> it hard to understand... Perhaps I need to do some test.
Well, BFQ implements for example idling on sync IO queues which is one of
the features that upsets blk-wbt. That does not depend on
CONFIG_BFQ_GROUP_IOSCHED in any way. Also generally the idea that BFQ
assigns storage *time slots* to different processes and IO from other
processes is just queued at those times increases IO completion
latency (for IOs of processes that are not currently scheduled) and this
tends to confuse blk-wbt.
Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR
Hi, Jan
在 2022/09/23 18:06, Jan Kara 写道:
> On Fri 23-09-22 17:50:49, Yu Kuai wrote:
>> Hi, Christoph
>>
>> 在 2022/09/23 16:56, Christoph Hellwig 写道:
>>> On Thu, Sep 22, 2022 at 07:35:56PM +0800, Yu Kuai wrote:
>>>> wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
>>>
>>> Umm, wouldn't this be something decided at runtime, that is not
>>> if CONFIG_BFQ_GROUP_IOSCHED is enable/disable in the kernel build
>>> if the hierarchical cgroup based scheduling is actually used for a
>>> given device?
>>> .
>>>
>>
>> That's a good point,
>>
>> Before this patch wbt is simply disabled if elevator is bfq.
>>
>> With this patch, if elevator is bfq while bfq doesn't throttle
>> any IO yet, wbt still is disabled unnecessarily.
>
> It is not really disabled unnecessarily. Have you actually tested the
> performance of the combination? I did once and the results were just
> horrible (which is I made BFQ just disable wbt by default). The problem is
> that blk-wbt assumes certain model of underlying storage stack and hardware
> behavior and BFQ just does not fit in that model. For example BFQ wants to
> see as many requests as possible so that it can heavily reorder them,
> estimate think times of applications, etc. On the other hand blk-wbt
> assumes that if request latency gets higher, it means there is too much IO
> going on and we need to allow less of "lower priority" IO types to be
> submitted. These two go directly against one another and I was easily
> observing blk-wbt spiraling down to allowing only very small number of
> requests submitted while BFQ was idling waiting for more IO from the
> process that was currently scheduled.
>
Thanks for your explanation, I understand that bfq and wbt should not
work together.
However, I wonder if CONFIG_BFQ_GROUP_IOSCHED is disabled, or service
guarantee is not needed, does the above phenomenon still exist? I find
it hard to understand... Perhaps I need to do some test.
Thanks,
Kuai
> So I'm kind of wondering why you'd like to use blk-wbt and BFQ together...
>
> Honza
>
Hi, Jan!
在 2022/09/23 19:03, Jan Kara 写道:
> Hi Kuai!
>
> On Fri 23-09-22 18:23:03, Yu Kuai wrote:
>> 在 2022/09/23 18:06, Jan Kara 写道:
>>> On Fri 23-09-22 17:50:49, Yu Kuai wrote:
>>>> Hi, Christoph
>>>>
>>>> 在 2022/09/23 16:56, Christoph Hellwig 写道:
>>>>> On Thu, Sep 22, 2022 at 07:35:56PM +0800, Yu Kuai wrote:
>>>>>> wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
>>>>>
>>>>> Umm, wouldn't this be something decided at runtime, that is not
>>>>> if CONFIG_BFQ_GROUP_IOSCHED is enable/disable in the kernel build
>>>>> if the hierarchical cgroup based scheduling is actually used for a
>>>>> given device?
>>>>> .
>>>>>
>>>>
>>>> That's a good point,
>>>>
>>>> Before this patch wbt is simply disabled if elevator is bfq.
>>>>
>>>> With this patch, if elevator is bfq while bfq doesn't throttle
>>>> any IO yet, wbt still is disabled unnecessarily.
>>>
>>> It is not really disabled unnecessarily. Have you actually tested the
>>> performance of the combination? I did once and the results were just
>>> horrible (which is I made BFQ just disable wbt by default). The problem is
>>> that blk-wbt assumes certain model of underlying storage stack and hardware
>>> behavior and BFQ just does not fit in that model. For example BFQ wants to
>>> see as many requests as possible so that it can heavily reorder them,
>>> estimate think times of applications, etc. On the other hand blk-wbt
>>> assumes that if request latency gets higher, it means there is too much IO
>>> going on and we need to allow less of "lower priority" IO types to be
>>> submitted. These two go directly against one another and I was easily
>>> observing blk-wbt spiraling down to allowing only very small number of
>>> requests submitted while BFQ was idling waiting for more IO from the
>>> process that was currently scheduled.
>>>
>>
>> Thanks for your explanation, I understand that bfq and wbt should not
>> work together.
>>
>> However, I wonder if CONFIG_BFQ_GROUP_IOSCHED is disabled, or service
>> guarantee is not needed, does the above phenomenon still exist? I find
>> it hard to understand... Perhaps I need to do some test.
>
> Well, BFQ implements for example idling on sync IO queues which is one of
> the features that upsets blk-wbt. That does not depend on
> CONFIG_BFQ_GROUP_IOSCHED in any way. Also generally the idea that BFQ
> assigns storage *time slots* to different processes and IO from other
> processes is just queued at those times increases IO completion
> latency (for IOs of processes that are not currently scheduled) and this
> tends to confuse blk-wbt.
>
I see it now, thanks a lot for your expiations, that really helps a lot.
I misunderstand about the how the bfq works. I'll remove this patch in
next version.
Thanks,
Kuai
> Honza
>
On Thu 22-09-22 19:35:54, Yu Kuai wrote:
> Currently, if wbt is initialized and then disabled by
> wbt_disable_default(), sysfs will still show valid wbt_lat_usec, which
> will confuse users that wbt is still enabled.
>
> This patch shows wbt_lat_usec as zero and forbid to set it while wbt
> is disabled.
So I agree we should show 0 in wbt_lat_usec if wbt is disabled by
wbt_disable_default(). But why do you forbid setting of wbt_lat_usec?
IMHO if wbt_lat_usec is set, admin wants to turn on wbt so we should just
update rwb->enable_state to WBT_STATE_ON_MANUAL.
Honza
>
> Signed-off-by: Yu Kuai <[email protected]>
> Reported-and-tested-by: Holger Hoffst?tte <[email protected]>
> ---
> block/blk-sysfs.c | 9 ++++++++-
> block/blk-wbt.c | 7 +++++++
> block/blk-wbt.h | 5 +++++
> 3 files changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index e1f009aba6fd..1955bb6a284d 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -467,10 +467,14 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
>
> static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
> {
> + u64 lat;
> +
> if (!wbt_rq_qos(q))
> return -EINVAL;
>
> - return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
> + lat = wbt_disabled(q) ? 0 : div_u64(wbt_get_min_lat(q), 1000);
> +
> + return sprintf(page, "%llu\n", lat);
> }
>
> static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
> @@ -493,6 +497,9 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
> return ret;
> }
>
> + if (wbt_disabled(q))
> + return -EINVAL;
> +
> if (val == -1)
> val = wbt_default_latency_nsec(q);
> else if (val >= 0)
> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> index a9982000b667..68851c2c02d2 100644
> --- a/block/blk-wbt.c
> +++ b/block/blk-wbt.c
> @@ -422,6 +422,13 @@ static void wbt_update_limits(struct rq_wb *rwb)
> rwb_wake_all(rwb);
> }
>
> +bool wbt_disabled(struct request_queue *q)
> +{
> + struct rq_qos *rqos = wbt_rq_qos(q);
> +
> + return !rqos || RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT;
> +}
> +
> u64 wbt_get_min_lat(struct request_queue *q)
> {
> struct rq_qos *rqos = wbt_rq_qos(q);
> diff --git a/block/blk-wbt.h b/block/blk-wbt.h
> index 7e44eccc676d..e42465ddcbb6 100644
> --- a/block/blk-wbt.h
> +++ b/block/blk-wbt.h
> @@ -94,6 +94,7 @@ void wbt_enable_default(struct request_queue *);
>
> u64 wbt_get_min_lat(struct request_queue *q);
> void wbt_set_min_lat(struct request_queue *q, u64 val);
> +bool wbt_disabled(struct request_queue *);
>
> void wbt_set_write_cache(struct request_queue *, bool);
>
> @@ -125,6 +126,10 @@ static inline u64 wbt_default_latency_nsec(struct request_queue *q)
> {
> return 0;
> }
> +static inline bool wbt_disabled(struct request_queue *q)
> +{
> + return true;
> +}
>
> #endif /* CONFIG_BLK_WBT */
>
> --
> 2.31.1
>
--
Jan Kara <[email protected]>
SUSE Labs, CR
Hi, Jan
在 2022/09/26 17:44, Jan Kara 写道:
> On Thu 22-09-22 19:35:54, Yu Kuai wrote:
>> Currently, if wbt is initialized and then disabled by
>> wbt_disable_default(), sysfs will still show valid wbt_lat_usec, which
>> will confuse users that wbt is still enabled.
>>
>> This patch shows wbt_lat_usec as zero and forbid to set it while wbt
>> is disabled.
>
> So I agree we should show 0 in wbt_lat_usec if wbt is disabled by
> wbt_disable_default(). But why do you forbid setting of wbt_lat_usec?
> IMHO if wbt_lat_usec is set, admin wants to turn on wbt so we should just
> update rwb->enable_state to WBT_STATE_ON_MANUAL.
I was thinking that don't enable wbt if elevator is bfq. Since we know
that performance is bad, thus it doesn't make sense to me to do that,
and user might doesn't aware of the problem.
However, perhaps admin should be responsible if wbt is turned on while
elevator is bfq.
Thanks,
Kuai
>
> Honza
>
>>
>> Signed-off-by: Yu Kuai <[email protected]>
>> Reported-and-tested-by: Holger Hoffstätte <[email protected]>
>> ---
>> block/blk-sysfs.c | 9 ++++++++-
>> block/blk-wbt.c | 7 +++++++
>> block/blk-wbt.h | 5 +++++
>> 3 files changed, 20 insertions(+), 1 deletion(-)
>>
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index e1f009aba6fd..1955bb6a284d 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -467,10 +467,14 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
>>
>> static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
>> {
>> + u64 lat;
>> +
>> if (!wbt_rq_qos(q))
>> return -EINVAL;
>>
>> - return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
>> + lat = wbt_disabled(q) ? 0 : div_u64(wbt_get_min_lat(q), 1000);
>> +
>> + return sprintf(page, "%llu\n", lat);
>> }
>>
>> static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
>> @@ -493,6 +497,9 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
>> return ret;
>> }
>>
>> + if (wbt_disabled(q))
>> + return -EINVAL;
>> +
>> if (val == -1)
>> val = wbt_default_latency_nsec(q);
>> else if (val >= 0)
>> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
>> index a9982000b667..68851c2c02d2 100644
>> --- a/block/blk-wbt.c
>> +++ b/block/blk-wbt.c
>> @@ -422,6 +422,13 @@ static void wbt_update_limits(struct rq_wb *rwb)
>> rwb_wake_all(rwb);
>> }
>>
>> +bool wbt_disabled(struct request_queue *q)
>> +{
>> + struct rq_qos *rqos = wbt_rq_qos(q);
>> +
>> + return !rqos || RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT;
>> +}
>> +
>> u64 wbt_get_min_lat(struct request_queue *q)
>> {
>> struct rq_qos *rqos = wbt_rq_qos(q);
>> diff --git a/block/blk-wbt.h b/block/blk-wbt.h
>> index 7e44eccc676d..e42465ddcbb6 100644
>> --- a/block/blk-wbt.h
>> +++ b/block/blk-wbt.h
>> @@ -94,6 +94,7 @@ void wbt_enable_default(struct request_queue *);
>>
>> u64 wbt_get_min_lat(struct request_queue *q);
>> void wbt_set_min_lat(struct request_queue *q, u64 val);
>> +bool wbt_disabled(struct request_queue *);
>>
>> void wbt_set_write_cache(struct request_queue *, bool);
>>
>> @@ -125,6 +126,10 @@ static inline u64 wbt_default_latency_nsec(struct request_queue *q)
>> {
>> return 0;
>> }
>> +static inline bool wbt_disabled(struct request_queue *q)
>> +{
>> + return true;
>> +}
>>
>> #endif /* CONFIG_BLK_WBT */
>>
>> --
>> 2.31.1
>>
On Mon 26-09-22 18:25:18, Yu Kuai wrote:
> Hi, Jan
>
> 在 2022/09/26 17:44, Jan Kara 写道:
> > On Thu 22-09-22 19:35:54, Yu Kuai wrote:
> > > Currently, if wbt is initialized and then disabled by
> > > wbt_disable_default(), sysfs will still show valid wbt_lat_usec, which
> > > will confuse users that wbt is still enabled.
> > >
> > > This patch shows wbt_lat_usec as zero and forbid to set it while wbt
> > > is disabled.
> >
> > So I agree we should show 0 in wbt_lat_usec if wbt is disabled by
> > wbt_disable_default(). But why do you forbid setting of wbt_lat_usec?
> > IMHO if wbt_lat_usec is set, admin wants to turn on wbt so we should just
> > update rwb->enable_state to WBT_STATE_ON_MANUAL.
>
> I was thinking that don't enable wbt if elevator is bfq. Since we know
> that performance is bad, thus it doesn't make sense to me to do that,
> and user might doesn't aware of the problem.
Yeah, I don't think it is a good idea (that is the reason why it is
disabled by default) but in priciple I don't see a reason why we should
block admin from enabling it.
Honza
> > >
> > > Signed-off-by: Yu Kuai <[email protected]>
> > > Reported-and-tested-by: Holger Hoffstätte <[email protected]>
> > > ---
> > > block/blk-sysfs.c | 9 ++++++++-
> > > block/blk-wbt.c | 7 +++++++
> > > block/blk-wbt.h | 5 +++++
> > > 3 files changed, 20 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> > > index e1f009aba6fd..1955bb6a284d 100644
> > > --- a/block/blk-sysfs.c
> > > +++ b/block/blk-sysfs.c
> > > @@ -467,10 +467,14 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
> > > static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
> > > {
> > > + u64 lat;
> > > +
> > > if (!wbt_rq_qos(q))
> > > return -EINVAL;
> > > - return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
> > > + lat = wbt_disabled(q) ? 0 : div_u64(wbt_get_min_lat(q), 1000);
> > > +
> > > + return sprintf(page, "%llu\n", lat);
> > > }
> > > static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
> > > @@ -493,6 +497,9 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
> > > return ret;
> > > }
> > > + if (wbt_disabled(q))
> > > + return -EINVAL;
> > > +
> > > if (val == -1)
> > > val = wbt_default_latency_nsec(q);
> > > else if (val >= 0)
> > > diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> > > index a9982000b667..68851c2c02d2 100644
> > > --- a/block/blk-wbt.c
> > > +++ b/block/blk-wbt.c
> > > @@ -422,6 +422,13 @@ static void wbt_update_limits(struct rq_wb *rwb)
> > > rwb_wake_all(rwb);
> > > }
> > > +bool wbt_disabled(struct request_queue *q)
> > > +{
> > > + struct rq_qos *rqos = wbt_rq_qos(q);
> > > +
> > > + return !rqos || RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT;
> > > +}
> > > +
> > > u64 wbt_get_min_lat(struct request_queue *q)
> > > {
> > > struct rq_qos *rqos = wbt_rq_qos(q);
> > > diff --git a/block/blk-wbt.h b/block/blk-wbt.h
> > > index 7e44eccc676d..e42465ddcbb6 100644
> > > --- a/block/blk-wbt.h
> > > +++ b/block/blk-wbt.h
> > > @@ -94,6 +94,7 @@ void wbt_enable_default(struct request_queue *);
> > > u64 wbt_get_min_lat(struct request_queue *q);
> > > void wbt_set_min_lat(struct request_queue *q, u64 val);
> > > +bool wbt_disabled(struct request_queue *);
> > > void wbt_set_write_cache(struct request_queue *, bool);
> > > @@ -125,6 +126,10 @@ static inline u64 wbt_default_latency_nsec(struct request_queue *q)
> > > {
> > > return 0;
> > > }
> > > +static inline bool wbt_disabled(struct request_queue *q)
> > > +{
> > > + return true;
> > > +}
> > > #endif /* CONFIG_BLK_WBT */
> > > --
> > > 2.31.1
> > >
>
--
Jan Kara <[email protected]>
SUSE Labs, CR
Hi, Jan
在 2022/09/23 19:03, Jan Kara 写道:
> Hi Kuai!
>
> On Fri 23-09-22 18:23:03, Yu Kuai wrote:
>> 在 2022/09/23 18:06, Jan Kara 写道:
>>> On Fri 23-09-22 17:50:49, Yu Kuai wrote:
>>>> Hi, Christoph
>>>>
>>>> 在 2022/09/23 16:56, Christoph Hellwig 写道:
>>>>> On Thu, Sep 22, 2022 at 07:35:56PM +0800, Yu Kuai wrote:
>>>>>> wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
>>>>>
>>>>> Umm, wouldn't this be something decided at runtime, that is not
>>>>> if CONFIG_BFQ_GROUP_IOSCHED is enable/disable in the kernel build
>>>>> if the hierarchical cgroup based scheduling is actually used for a
>>>>> given device?
>>>>> .
>>>>>
>>>>
>>>> That's a good point,
>>>>
>>>> Before this patch wbt is simply disabled if elevator is bfq.
>>>>
>>>> With this patch, if elevator is bfq while bfq doesn't throttle
>>>> any IO yet, wbt still is disabled unnecessarily.
>>>
>>> It is not really disabled unnecessarily. Have you actually tested the
>>> performance of the combination? I did once and the results were just
>>> horrible (which is I made BFQ just disable wbt by default). The problem is
>>> that blk-wbt assumes certain model of underlying storage stack and hardware
>>> behavior and BFQ just does not fit in that model. For example BFQ wants to
>>> see as many requests as possible so that it can heavily reorder them,
>>> estimate think times of applications, etc. On the other hand blk-wbt
>>> assumes that if request latency gets higher, it means there is too much IO
>>> going on and we need to allow less of "lower priority" IO types to be
>>> submitted. These two go directly against one another and I was easily
>>> observing blk-wbt spiraling down to allowing only very small number of
>>> requests submitted while BFQ was idling waiting for more IO from the
>>> process that was currently scheduled.
>>>
>>
>> Thanks for your explanation, I understand that bfq and wbt should not
>> work together.
>>
>> However, I wonder if CONFIG_BFQ_GROUP_IOSCHED is disabled, or service
>> guarantee is not needed, does the above phenomenon still exist? I find
>> it hard to understand... Perhaps I need to do some test.
>
> Well, BFQ implements for example idling on sync IO queues which is one of
> the features that upsets blk-wbt. That does not depend on
> CONFIG_BFQ_GROUP_IOSCHED in any way. Also generally the idea that BFQ
> assigns storage *time slots* to different processes and IO from other
> processes is just queued at those times increases IO completion
> latency (for IOs of processes that are not currently scheduled) and this
> tends to confuse blk-wbt.
>
Hi, Jan
Just to be curious, have you ever think about or tested wbt with
io-cost? And even more, how bfq work with io-cost?
I haven't tested yet, but it seems to me some of them can work well
together.
Thanks,
Kuai
> Honza
>
Hi, Jan
在 2022/09/26 19:47, Jan Kara 写道:
> On Mon 26-09-22 18:25:18, Yu Kuai wrote:
>> Hi, Jan
>>
>> 在 2022/09/26 17:44, Jan Kara 写道:
>>> On Thu 22-09-22 19:35:54, Yu Kuai wrote:
>>>> Currently, if wbt is initialized and then disabled by
>>>> wbt_disable_default(), sysfs will still show valid wbt_lat_usec, which
>>>> will confuse users that wbt is still enabled.
>>>>
>>>> This patch shows wbt_lat_usec as zero and forbid to set it while wbt
>>>> is disabled.
>>>
>>> So I agree we should show 0 in wbt_lat_usec if wbt is disabled by
>>> wbt_disable_default(). But why do you forbid setting of wbt_lat_usec?
>>> IMHO if wbt_lat_usec is set, admin wants to turn on wbt so we should just
>>> update rwb->enable_state to WBT_STATE_ON_MANUAL.
>>
>> I was thinking that don't enable wbt if elevator is bfq. Since we know
>> that performance is bad, thus it doesn't make sense to me to do that,
>> and user might doesn't aware of the problem.
>
> Yeah, I don't think it is a good idea (that is the reason why it is
> disabled by default) but in priciple I don't see a reason why we should
> block admin from enabling it.
I'll enable it in next version.
Thanks,
Kuai
Hi Kuai!
On Mon 26-09-22 21:00:48, Yu Kuai wrote:
> 在 2022/09/23 19:03, Jan Kara 写道:
> > Hi Kuai!
> >
> > On Fri 23-09-22 18:23:03, Yu Kuai wrote:
> > > 在 2022/09/23 18:06, Jan Kara 写道:
> > > > On Fri 23-09-22 17:50:49, Yu Kuai wrote:
> > > > > Hi, Christoph
> > > > >
> > > > > 在 2022/09/23 16:56, Christoph Hellwig 写道:
> > > > > > On Thu, Sep 22, 2022 at 07:35:56PM +0800, Yu Kuai wrote:
> > > > > > > wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
> > > > > >
> > > > > > Umm, wouldn't this be something decided at runtime, that is not
> > > > > > if CONFIG_BFQ_GROUP_IOSCHED is enable/disable in the kernel build
> > > > > > if the hierarchical cgroup based scheduling is actually used for a
> > > > > > given device?
> > > > > > .
> > > > > >
> > > > >
> > > > > That's a good point,
> > > > >
> > > > > Before this patch wbt is simply disabled if elevator is bfq.
> > > > >
> > > > > With this patch, if elevator is bfq while bfq doesn't throttle
> > > > > any IO yet, wbt still is disabled unnecessarily.
> > > >
> > > > It is not really disabled unnecessarily. Have you actually tested the
> > > > performance of the combination? I did once and the results were just
> > > > horrible (which is I made BFQ just disable wbt by default). The problem is
> > > > that blk-wbt assumes certain model of underlying storage stack and hardware
> > > > behavior and BFQ just does not fit in that model. For example BFQ wants to
> > > > see as many requests as possible so that it can heavily reorder them,
> > > > estimate think times of applications, etc. On the other hand blk-wbt
> > > > assumes that if request latency gets higher, it means there is too much IO
> > > > going on and we need to allow less of "lower priority" IO types to be
> > > > submitted. These two go directly against one another and I was easily
> > > > observing blk-wbt spiraling down to allowing only very small number of
> > > > requests submitted while BFQ was idling waiting for more IO from the
> > > > process that was currently scheduled.
> > > >
> > >
> > > Thanks for your explanation, I understand that bfq and wbt should not
> > > work together.
> > >
> > > However, I wonder if CONFIG_BFQ_GROUP_IOSCHED is disabled, or service
> > > guarantee is not needed, does the above phenomenon still exist? I find
> > > it hard to understand... Perhaps I need to do some test.
> >
> > Well, BFQ implements for example idling on sync IO queues which is one of
> > the features that upsets blk-wbt. That does not depend on
> > CONFIG_BFQ_GROUP_IOSCHED in any way. Also generally the idea that BFQ
> > assigns storage *time slots* to different processes and IO from other
> > processes is just queued at those times increases IO completion
> > latency (for IOs of processes that are not currently scheduled) and this
> > tends to confuse blk-wbt.
> >
> Hi, Jan
>
> Just to be curious, have you ever think about or tested wbt with
> io-cost? And even more, how bfq work with io-cost?
>
> I haven't tested yet, but it seems to me some of them can work well
> together.
No, I didn't test these combinations. I actually expect there would be
troubles in both cases under high IO load but you can try :)
Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR
Hi, Jan
在 2022/09/26 22:22, Jan Kara 写道:
> Hi Kuai!
>
> On Mon 26-09-22 21:00:48, Yu Kuai wrote:
>> 在 2022/09/23 19:03, Jan Kara 写道:
>>> Hi Kuai!
>>>
>>> On Fri 23-09-22 18:23:03, Yu Kuai wrote:
>>>> 在 2022/09/23 18:06, Jan Kara 写道:
>>>>> On Fri 23-09-22 17:50:49, Yu Kuai wrote:
>>>>>> Hi, Christoph
>>>>>>
>>>>>> 在 2022/09/23 16:56, Christoph Hellwig 写道:
>>>>>>> On Thu, Sep 22, 2022 at 07:35:56PM +0800, Yu Kuai wrote:
>>>>>>>> wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
>>>>>>>
>>>>>>> Umm, wouldn't this be something decided at runtime, that is not
>>>>>>> if CONFIG_BFQ_GROUP_IOSCHED is enable/disable in the kernel build
>>>>>>> if the hierarchical cgroup based scheduling is actually used for a
>>>>>>> given device?
>>>>>>> .
>>>>>>>
>>>>>>
>>>>>> That's a good point,
>>>>>>
>>>>>> Before this patch wbt is simply disabled if elevator is bfq.
>>>>>>
>>>>>> With this patch, if elevator is bfq while bfq doesn't throttle
>>>>>> any IO yet, wbt still is disabled unnecessarily.
>>>>>
>>>>> It is not really disabled unnecessarily. Have you actually tested the
>>>>> performance of the combination? I did once and the results were just
>>>>> horrible (which is I made BFQ just disable wbt by default). The problem is
>>>>> that blk-wbt assumes certain model of underlying storage stack and hardware
>>>>> behavior and BFQ just does not fit in that model. For example BFQ wants to
>>>>> see as many requests as possible so that it can heavily reorder them,
>>>>> estimate think times of applications, etc. On the other hand blk-wbt
>>>>> assumes that if request latency gets higher, it means there is too much IO
>>>>> going on and we need to allow less of "lower priority" IO types to be
>>>>> submitted. These two go directly against one another and I was easily
>>>>> observing blk-wbt spiraling down to allowing only very small number of
>>>>> requests submitted while BFQ was idling waiting for more IO from the
>>>>> process that was currently scheduled.
>>>>>
>>>>
>>>> Thanks for your explanation, I understand that bfq and wbt should not
>>>> work together.
>>>>
>>>> However, I wonder if CONFIG_BFQ_GROUP_IOSCHED is disabled, or service
>>>> guarantee is not needed, does the above phenomenon still exist? I find
>>>> it hard to understand... Perhaps I need to do some test.
>>>
>>> Well, BFQ implements for example idling on sync IO queues which is one of
>>> the features that upsets blk-wbt. That does not depend on
>>> CONFIG_BFQ_GROUP_IOSCHED in any way. Also generally the idea that BFQ
>>> assigns storage *time slots* to different processes and IO from other
>>> processes is just queued at those times increases IO completion
>>> latency (for IOs of processes that are not currently scheduled) and this
>>> tends to confuse blk-wbt.
>>>
>> Hi, Jan
>>
>> Just to be curious, have you ever think about or tested wbt with
>> io-cost? And even more, how bfq work with io-cost?
>>
>> I haven't tested yet, but it seems to me some of them can work well
>> together.
>
> No, I didn't test these combinations. I actually expect there would be
> troubles in both cases under high IO load but you can try :)
Just realize I made a clerical error, I actually want to saied that
*can't* work well together.
I'll try to have a test the combinations.
Thanks,
Kuai
>
> Honza
>
> Il giorno 27 set 2022, alle ore 03:02, Yu Kuai <[email protected]> ha scritto:
>
> Hi, Jan
>
> 在 2022/09/26 22:22, Jan Kara 写道:
>> Hi Kuai!
>> On Mon 26-09-22 21:00:48, Yu Kuai wrote:
>>> 在 2022/09/23 19:03, Jan Kara 写道:
>>>> Hi Kuai!
>>>>
>>>> On Fri 23-09-22 18:23:03, Yu Kuai wrote:
>>>>> 在 2022/09/23 18:06, Jan Kara 写道:
>>>>>> On Fri 23-09-22 17:50:49, Yu Kuai wrote:
>>>>>>> Hi, Christoph
>>>>>>>
>>>>>>> 在 2022/09/23 16:56, Christoph Hellwig 写道:
>>>>>>>> On Thu, Sep 22, 2022 at 07:35:56PM +0800, Yu Kuai wrote:
>>>>>>>>> wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
>>>>>>>>
>>>>>>>> Umm, wouldn't this be something decided at runtime, that is not
>>>>>>>> if CONFIG_BFQ_GROUP_IOSCHED is enable/disable in the kernel build
>>>>>>>> if the hierarchical cgroup based scheduling is actually used for a
>>>>>>>> given device?
>>>>>>>> .
>>>>>>>>
>>>>>>>
>>>>>>> That's a good point,
>>>>>>>
>>>>>>> Before this patch wbt is simply disabled if elevator is bfq.
>>>>>>>
>>>>>>> With this patch, if elevator is bfq while bfq doesn't throttle
>>>>>>> any IO yet, wbt still is disabled unnecessarily.
>>>>>>
>>>>>> It is not really disabled unnecessarily. Have you actually tested the
>>>>>> performance of the combination? I did once and the results were just
>>>>>> horrible (which is I made BFQ just disable wbt by default). The problem is
>>>>>> that blk-wbt assumes certain model of underlying storage stack and hardware
>>>>>> behavior and BFQ just does not fit in that model. For example BFQ wants to
>>>>>> see as many requests as possible so that it can heavily reorder them,
>>>>>> estimate think times of applications, etc. On the other hand blk-wbt
>>>>>> assumes that if request latency gets higher, it means there is too much IO
>>>>>> going on and we need to allow less of "lower priority" IO types to be
>>>>>> submitted. These two go directly against one another and I was easily
>>>>>> observing blk-wbt spiraling down to allowing only very small number of
>>>>>> requests submitted while BFQ was idling waiting for more IO from the
>>>>>> process that was currently scheduled.
>>>>>>
>>>>>
>>>>> Thanks for your explanation, I understand that bfq and wbt should not
>>>>> work together.
>>>>>
>>>>> However, I wonder if CONFIG_BFQ_GROUP_IOSCHED is disabled, or service
>>>>> guarantee is not needed, does the above phenomenon still exist? I find
>>>>> it hard to understand... Perhaps I need to do some test.
>>>>
>>>> Well, BFQ implements for example idling on sync IO queues which is one of
>>>> the features that upsets blk-wbt. That does not depend on
>>>> CONFIG_BFQ_GROUP_IOSCHED in any way. Also generally the idea that BFQ
>>>> assigns storage *time slots* to different processes and IO from other
>>>> processes is just queued at those times increases IO completion
>>>> latency (for IOs of processes that are not currently scheduled) and this
>>>> tends to confuse blk-wbt.
>>>>
>>> Hi, Jan
>>>
>>> Just to be curious, have you ever think about or tested wbt with
>>> io-cost? And even more, how bfq work with io-cost?
>>>
>>> I haven't tested yet, but it seems to me some of them can work well
>>> together.
>> No, I didn't test these combinations. I actually expect there would be
>> troubles in both cases under high IO load but you can try :)
>
> Just realize I made a clerical error, I actually want to saied that
> *can't* work well together.
>
You are right, they can't work together, conceptually. Their logics would simply keep conflicting, and none of the two would make ti to control IO as desired.
Thanks,
Paolo
> I'll try to have a test the combinations.
>
> Thanks,
> Kuai
>> Honza
Hi,
在 2022/09/28 0:14, Paolo Valente 写道:
>
>
>> Il giorno 27 set 2022, alle ore 03:02, Yu Kuai <[email protected]> ha scritto:
>>
>> Hi, Jan
>>
>> 在 2022/09/26 22:22, Jan Kara 写道:
>>> Hi Kuai!
>>> On Mon 26-09-22 21:00:48, Yu Kuai wrote:
>>>> 在 2022/09/23 19:03, Jan Kara 写道:
>>>>> Hi Kuai!
>>>>>
>>>>> On Fri 23-09-22 18:23:03, Yu Kuai wrote:
>>>>>> 在 2022/09/23 18:06, Jan Kara 写道:
>>>>>>> On Fri 23-09-22 17:50:49, Yu Kuai wrote:
>>>>>>>> Hi, Christoph
>>>>>>>>
>>>>>>>> 在 2022/09/23 16:56, Christoph Hellwig 写道:
>>>>>>>>> On Thu, Sep 22, 2022 at 07:35:56PM +0800, Yu Kuai wrote:
>>>>>>>>>> wbt and bfq should work just fine if CONFIG_BFQ_GROUP_IOSCHED is disabled.
>>>>>>>>>
>>>>>>>>> Umm, wouldn't this be something decided at runtime, that is not
>>>>>>>>> if CONFIG_BFQ_GROUP_IOSCHED is enable/disable in the kernel build
>>>>>>>>> if the hierarchical cgroup based scheduling is actually used for a
>>>>>>>>> given device?
>>>>>>>>> .
>>>>>>>>>
>>>>>>>>
>>>>>>>> That's a good point,
>>>>>>>>
>>>>>>>> Before this patch wbt is simply disabled if elevator is bfq.
>>>>>>>>
>>>>>>>> With this patch, if elevator is bfq while bfq doesn't throttle
>>>>>>>> any IO yet, wbt still is disabled unnecessarily.
>>>>>>>
>>>>>>> It is not really disabled unnecessarily. Have you actually tested the
>>>>>>> performance of the combination? I did once and the results were just
>>>>>>> horrible (which is I made BFQ just disable wbt by default). The problem is
>>>>>>> that blk-wbt assumes certain model of underlying storage stack and hardware
>>>>>>> behavior and BFQ just does not fit in that model. For example BFQ wants to
>>>>>>> see as many requests as possible so that it can heavily reorder them,
>>>>>>> estimate think times of applications, etc. On the other hand blk-wbt
>>>>>>> assumes that if request latency gets higher, it means there is too much IO
>>>>>>> going on and we need to allow less of "lower priority" IO types to be
>>>>>>> submitted. These two go directly against one another and I was easily
>>>>>>> observing blk-wbt spiraling down to allowing only very small number of
>>>>>>> requests submitted while BFQ was idling waiting for more IO from the
>>>>>>> process that was currently scheduled.
>>>>>>>
>>>>>>
>>>>>> Thanks for your explanation, I understand that bfq and wbt should not
>>>>>> work together.
>>>>>>
>>>>>> However, I wonder if CONFIG_BFQ_GROUP_IOSCHED is disabled, or service
>>>>>> guarantee is not needed, does the above phenomenon still exist? I find
>>>>>> it hard to understand... Perhaps I need to do some test.
>>>>>
>>>>> Well, BFQ implements for example idling on sync IO queues which is one of
>>>>> the features that upsets blk-wbt. That does not depend on
>>>>> CONFIG_BFQ_GROUP_IOSCHED in any way. Also generally the idea that BFQ
>>>>> assigns storage *time slots* to different processes and IO from other
>>>>> processes is just queued at those times increases IO completion
>>>>> latency (for IOs of processes that are not currently scheduled) and this
>>>>> tends to confuse blk-wbt.
>>>>>
>>>> Hi, Jan
>>>>
>>>> Just to be curious, have you ever think about or tested wbt with
>>>> io-cost? And even more, how bfq work with io-cost?
>>>>
>>>> I haven't tested yet, but it seems to me some of them can work well
>>>> together.
>>> No, I didn't test these combinations. I actually expect there would be
>>> troubles in both cases under high IO load but you can try :)
>>
>> Just realize I made a clerical error, I actually want to saied that
>> *can't* work well together.
>>
>
> You are right, they can't work together, conceptually. Their logics would simply keep conflicting, and none of the two would make ti to control IO as desired.
Yes, I just run some simple tests, test result is very bad...
Perhaps we can do something like bfq does to disable wbt.
Thanks,
Kuai
>
> Thanks,
> Paolo
>
>> I'll try to have a test the combinations.
>>
>> Thanks,
>> Kuai
>>> Honza
>
> .
>