From: Yu Kuai <[email protected]>
handle_read_error() will resumit r10_bio by raid10_read_request(), which
will call bio_start_io_acct() again, while bio_end_io_acct() will only
be called once.
Fix the problem by don't account io again from handle_read_error().
Fixes: 528bc2cf2fcc ("md/raid10: enable io accounting")
Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/raid10.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6c66357f92f5..4f8edb6ea3e2 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1173,7 +1173,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
}
static void raid10_read_request(struct mddev *mddev, struct bio *bio,
- struct r10bio *r10_bio)
+ struct r10bio *r10_bio, bool handle_error)
{
struct r10conf *conf = mddev->private;
struct bio *read_bio;
@@ -1244,7 +1244,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
}
slot = r10_bio->read_slot;
- if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+ if (!handle_error && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
r10_bio->start_time = bio_start_io_acct(bio);
read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
@@ -1578,7 +1578,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
conf->geo.raid_disks);
if (bio_data_dir(bio) == READ)
- raid10_read_request(mddev, bio, r10_bio);
+ raid10_read_request(mddev, bio, r10_bio, false);
else
raid10_write_request(mddev, bio, r10_bio);
}
@@ -2980,7 +2980,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
rdev_dec_pending(rdev, mddev);
allow_barrier(conf);
r10_bio->state = 0;
- raid10_read_request(mddev, r10_bio->master_bio, r10_bio);
+ raid10_read_request(mddev, r10_bio->master_bio, r10_bio, true);
}
static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
--
2.31.1
Hi,
What do you mean 'leak' here?
On 3/4/23 15:01, Yu Kuai wrote:
> From: Yu Kuai <[email protected]>
>
> handle_read_error() will resumit r10_bio by raid10_read_request(), which
> will call bio_start_io_acct() again, while bio_end_io_acct() will only
> be called once.
>
> Fix the problem by don't account io again from handle_read_error().
My understanding is it caused inaccurate io stats for bio which had a read
error.
> Fixes: 528bc2cf2fcc ("md/raid10: enable io accounting")
> Signed-off-by: Yu Kuai <[email protected]>
> ---
> drivers/md/raid10.c | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 6c66357f92f5..4f8edb6ea3e2 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -1173,7 +1173,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
> }
>
> static void raid10_read_request(struct mddev *mddev, struct bio *bio,
> - struct r10bio *r10_bio)
> + struct r10bio *r10_bio, bool handle_error)
> {
> struct r10conf *conf = mddev->private;
> struct bio *read_bio;
> @@ -1244,7 +1244,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
> }
> slot = r10_bio->read_slot;
>
> - if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
> + if (!handle_error && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
> r10_bio->start_time = bio_start_io_acct(bio);
I think a simpler way is just check R10BIO_ReadError here.
Thanks,
Guoqing
Hi,
在 2023/03/09 14:36, Guoqing Jiang 写道:
> Hi,
>
> What do you mean 'leak' here?
I try to mean that inflight counting is leaked, because it's increased
twice for one io.
>
> On 3/4/23 15:01, Yu Kuai wrote:
>> From: Yu Kuai <[email protected]>
>>
>> handle_read_error() will resumit r10_bio by raid10_read_request(), which
>> will call bio_start_io_acct() again, while bio_end_io_acct() will only
>> be called once.
>>
>> Fix the problem by don't account io again from handle_read_error().
>
> My understanding is it caused inaccurate io stats for bio which had a read
> error.
>
>> Fixes: 528bc2cf2fcc ("md/raid10: enable io accounting")
>> Signed-off-by: Yu Kuai <[email protected]>
>> ---
>> drivers/md/raid10.c | 8 ++++----
>> 1 file changed, 4 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index 6c66357f92f5..4f8edb6ea3e2 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -1173,7 +1173,7 @@ static bool regular_request_wait(struct mddev
>> *mddev, struct r10conf *conf,
>> }
>> static void raid10_read_request(struct mddev *mddev, struct bio *bio,
>> - struct r10bio *r10_bio)
>> + struct r10bio *r10_bio, bool handle_error)
>> {
>> struct r10conf *conf = mddev->private;
>> struct bio *read_bio;
>> @@ -1244,7 +1244,7 @@ static void raid10_read_request(struct mddev
>> *mddev, struct bio *bio,
>> }
>> slot = r10_bio->read_slot;
>> - if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
>> + if (!handle_error &&
>> blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
>> r10_bio->start_time = bio_start_io_acct(bio);
>
> I think a simpler way is just check R10BIO_ReadError here.
No, I'm afraid this is incorrect because handle_read_error clears the
state before resubmiting the r10bio.
Thanks,
Kuai
>
> Thanks,
> Guoqing
> .
>
On 3/9/23 14:56, Yu Kuai wrote:
> Hi,
>
> 在 2023/03/09 14:36, Guoqing Jiang 写道:
>> Hi,
>>
>> What do you mean 'leak' here?
>
> I try to mean that inflight counting is leaked, because it's increased
> twice for one io.
How about change the subject to something like?
'md/raid10: Don't call bio_start_io_acct twice for bio which experienced
read error'
>
>>
>> On 3/4/23 15:01, Yu Kuai wrote:
>>> From: Yu Kuai <[email protected]>
>>>
>>> handle_read_error() will resumit r10_bio by raid10_read_request(),
>>> which
>>> will call bio_start_io_acct() again, while bio_end_io_acct() will only
>>> be called once.
>>>
>>> Fix the problem by don't account io again from handle_read_error().
>>
>> My understanding is it caused inaccurate io stats for bio which had a
>> read
>> error.
>>
>>> Fixes: 528bc2cf2fcc ("md/raid10: enable io accounting")
>>> Signed-off-by: Yu Kuai <[email protected]>
>>> ---
>>> drivers/md/raid10.c | 8 ++++----
>>> 1 file changed, 4 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>>> index 6c66357f92f5..4f8edb6ea3e2 100644
>>> --- a/drivers/md/raid10.c
>>> +++ b/drivers/md/raid10.c
>>> @@ -1173,7 +1173,7 @@ static bool regular_request_wait(struct mddev
>>> *mddev, struct r10conf *conf,
>>> }
>>> static void raid10_read_request(struct mddev *mddev, struct bio *bio,
>>> - struct r10bio *r10_bio)
>>> + struct r10bio *r10_bio, bool handle_error)
>>> {
>>> struct r10conf *conf = mddev->private;
>>> struct bio *read_bio;
>>> @@ -1244,7 +1244,7 @@ static void raid10_read_request(struct mddev
>>> *mddev, struct bio *bio,
>>> }
>>> slot = r10_bio->read_slot;
>>> - if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
>>> + if (!handle_error &&
>>> blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
>>> r10_bio->start_time = bio_start_io_acct(bio);
>>
>> I think a simpler way is just check R10BIO_ReadError here.
>
> No, I'm afraid this is incorrect because handle_read_error clears the
> state before resubmiting the r10bio.
Right,
Acked-by: Guoqing Jiang <[email protected]>
Thanks,
Guoqing
Hi,
在 2023/03/09 15:27, Guoqing Jiang 写道:
>
>
> On 3/9/23 14:56, Yu Kuai wrote:
>> Hi,
>>
>> 在 2023/03/09 14:36, Guoqing Jiang 写道:
>>> Hi,
>>>
>>> What do you mean 'leak' here?
>>
>> I try to mean that inflight counting is leaked, because it's increased
>> twice for one io.
>
> How about change the subject to something like?
>
> 'md/raid10: Don't call bio_start_io_acct twice for bio which experienced
> read error'
>
Of course, I'll change that in v2.
>>
>>>
>>> On 3/4/23 15:01, Yu Kuai wrote:
>>>> From: Yu Kuai <[email protected]>
>>>>
>>>> handle_read_error() will resumit r10_bio by raid10_read_request(),
>>>> which
>>>> will call bio_start_io_acct() again, while bio_end_io_acct() will only
>>>> be called once.
>>>>
>>>> Fix the problem by don't account io again from handle_read_error().
>>>
>>> My understanding is it caused inaccurate io stats for bio which had a
>>> read
>>> error.
>>>
>>>> Fixes: 528bc2cf2fcc ("md/raid10: enable io accounting")
>>>> Signed-off-by: Yu Kuai <[email protected]>
>>>> ---
>>>> drivers/md/raid10.c | 8 ++++----
>>>> 1 file changed, 4 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>>>> index 6c66357f92f5..4f8edb6ea3e2 100644
>>>> --- a/drivers/md/raid10.c
>>>> +++ b/drivers/md/raid10.c
>>>> @@ -1173,7 +1173,7 @@ static bool regular_request_wait(struct mddev
>>>> *mddev, struct r10conf *conf,
>>>> }
>>>> static void raid10_read_request(struct mddev *mddev, struct bio *bio,
>>>> - struct r10bio *r10_bio)
>>>> + struct r10bio *r10_bio, bool handle_error)
>>>> {
>>>> struct r10conf *conf = mddev->private;
>>>> struct bio *read_bio;
>>>> @@ -1244,7 +1244,7 @@ static void raid10_read_request(struct mddev
>>>> *mddev, struct bio *bio,
>>>> }
>>>> slot = r10_bio->read_slot;
>>>> - if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
>>>> + if (!handle_error &&
>>>> blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
>>>> r10_bio->start_time = bio_start_io_acct(bio);
>>>
>>> I think a simpler way is just check R10BIO_ReadError here.
>>
>> No, I'm afraid this is incorrect because handle_read_error clears the
>> state before resubmiting the r10bio.
>
> Right,
>
> Acked-by: Guoqing Jiang <[email protected]>
Thanks for the review.
Kuai
>
> Thanks,
> Guoqing
> .
>