LinuxLists.cc - [PATCH 1/2] md: factor out a helper exceed_read_errors() to check read

2023-12-04 07:04:41

Subject: [PATCH 1/2] md: factor out a helper exceed_read_errors() to check read_errors

From: Li Nan <[email protected]>

Move check_decay_read_errors() to raid1-10.c and factor out a helper
exceed_read_errors() to check if read_errors exceeds the limit, so that
raid1 can also use it. There are no functional changes.

Signed-off-by: Li Nan <[email protected]>
---
drivers/md/raid1-10.c | 54 +++++++++++++++++++++++++++++++++++++++++++
drivers/md/raid10.c | 47 +------------------------------------
2 files changed, 55 insertions(+), 46 deletions(-)

diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
index 3f22edec70e7..9e17eab915f5 100644
--- a/drivers/md/raid1-10.c
+++ b/drivers/md/raid1-10.c
@@ -173,3 +173,57 @@ static inline void raid1_prepare_flush_writes(struct bitmap *bitmap)
else
md_bitmap_unplug(bitmap);
}
+
+/*
+ * Used by fix_read_error() to decay the per rdev read_errors.
+ * We halve the read error count for every hour that has elapsed
+ * since the last recorded read error.
+ */
+static inline void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
+{
+ long cur_time_mon;
+ unsigned long hours_since_last;
+ unsigned int read_errors = atomic_read(&rdev->read_errors);
+
+ cur_time_mon = ktime_get_seconds();
+
+ if (rdev->last_read_error == 0) {
+ /* first time we've seen a read error */
+ rdev->last_read_error = cur_time_mon;
+ return;
+ }
+
+ hours_since_last = (long)(cur_time_mon -
+ rdev->last_read_error) / 3600;
+
+ rdev->last_read_error = cur_time_mon;
+
+ /*
+ * if hours_since_last is > the number of bits in read_errors
+ * just set read errors to 0. We do this to avoid
+ * overflowing the shift of read_errors by hours_since_last.
+ */
+ if (hours_since_last >= 8 * sizeof(read_errors))
+ atomic_set(&rdev->read_errors, 0);
+ else
+ atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
+}
+
+static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev)
+{
+ int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
+ int read_errors;
+
+ check_decay_read_errors(mddev, rdev);
+ read_errors = atomic_inc_return(&rdev->read_errors);
+ if (read_errors > max_read_errors) {
+ pr_notice("md:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
+ mdname(mddev), rdev->bdev, read_errors, max_read_errors);
+ pr_notice("md:%s: %pg: Failing raid device\n",
+ mdname(mddev), rdev->bdev);
+ md_error(mddev, rdev);
+ return true;
+ }
+
+ return false;
+}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 375c11d6159f..8ea4974fb91c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2592,42 +2592,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
}
}

-/*
- * Used by fix_read_error() to decay the per rdev read_errors.
- * We halve the read error count for every hour that has elapsed
- * since the last recorded read error.
- *
- */
-static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
-{
- long cur_time_mon;
- unsigned long hours_since_last;
- unsigned int read_errors = atomic_read(&rdev->read_errors);
-
- cur_time_mon = ktime_get_seconds();
-
- if (rdev->last_read_error == 0) {
- /* first time we've seen a read error */
- rdev->last_read_error = cur_time_mon;
- return;
- }
-
- hours_since_last = (long)(cur_time_mon -
- rdev->last_read_error) / 3600;
-
- rdev->last_read_error = cur_time_mon;
-
- /*
- * if hours_since_last is > the number of bits in read_errors
- * just set read errors to 0. We do this to avoid
- * overflowing the shift of read_errors by hours_since_last.
- */
- if (hours_since_last >= 8 * sizeof(read_errors))
- atomic_set(&rdev->read_errors, 0);
- else
- atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
-}
-
static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
int sectors, struct page *page, enum req_op op)
{
@@ -2665,7 +2629,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
struct md_rdev *rdev;
- int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[slot].devnum;

/* still own a reference to this rdev, so it cannot
@@ -2678,15 +2641,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
more fix_read_error() attempts */
return;

- check_decay_read_errors(mddev, rdev);
- atomic_inc(&rdev->read_errors);
- if (atomic_read(&rdev->read_errors) > max_read_errors) {
- pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
- mdname(mddev), rdev->bdev,
- atomic_read(&rdev->read_errors), max_read_errors);
- pr_notice("md/raid10:%s: %pg: Failing raid device\n",
- mdname(mddev), rdev->bdev);
- md_error(mddev, rdev);
+ if (exceed_read_errors(mddev, rdev)) {
r10_bio->devs[slot].bio = IO_BLOCKED;
return;
}
--
2.39.2

2023-12-08 18:38:58

by Song Liu

[permalink] [raw]

Subject: Re: [PATCH 1/2] md: factor out a helper exceed_read_errors() to check read_errors

On Sun, Dec 3, 2023 at 11:04 PM <[email protected]> wrote:
>
> From: Li Nan <[email protected]>
>
> Move check_decay_read_errors() to raid1-10.c and factor out a helper
> exceed_read_errors() to check if read_errors exceeds the limit, so that
> raid1 can also use it. There are no functional changes.
>
> Signed-off-by: Li Nan <[email protected]>
[...]
> +static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev)
> +{
> + int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
> + int read_errors;
> +
> + check_decay_read_errors(mddev, rdev);
> + read_errors = atomic_inc_return(&rdev->read_errors);
> + if (read_errors > max_read_errors) {
> + pr_notice("md:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
> + mdname(mddev), rdev->bdev, read_errors, max_read_errors);
> + pr_notice("md:%s: %pg: Failing raid device\n",
> + mdname(mddev), rdev->bdev);

This changed the print message from "md/raid10:" to "md:". We should
try to avoid
such changes. How about we do something like the following?

Thanks,
Song

diff --git i/drivers/md/raid1-10.c w/drivers/md/raid1-10.c
index 3f22edec70e7..6c0ef0fe6ba7 100644
--- i/drivers/md/raid1-10.c
+++ w/drivers/md/raid1-10.c
@@ -173,3 +173,10 @@ static inline void
raid1_prepare_flush_writes(struct bitmap *bitmap)
else
md_bitmap_unplug(bitmap);
}
+
+static inline bool exceed_read_errors(struct mddev *mddev, struct
md_rdev *rdev)
+{
+ pr_notice("md/" RAID_1_10_NAME ":%s: %pg: Raid device ...\n",
+ ...);
+ ...
+}
diff --git i/drivers/md/raid1.c w/drivers/md/raid1.c
index 9348f1709512..412e98d02a05 100644
--- i/drivers/md/raid1.c
+++ w/drivers/md/raid1.c
@@ -49,6 +49,7 @@ static void lower_barrier(struct r1conf *conf,
sector_t sector_nr);
#define raid1_log(md, fmt, args...) \
do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 "
fmt, ##args); } while (0)

+#define RAID_1_10_NAME "raid1"
#include "raid1-10.c"

#define START(node) ((node)->start)
diff --git i/drivers/md/raid10.c w/drivers/md/raid10.c
index 375c11d6159f..a1531b5f15e3 100644
--- i/drivers/md/raid10.c
+++ w/drivers/md/raid10.c
@@ -77,6 +77,8 @@ static void end_reshape(struct r10conf *conf);
#define raid10_log(md, fmt, args...) \
do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 "
fmt, ##args); } while (0)

+#define RAID_1_10_NAME "raid10"
+
#include "raid1-10.c"

#define NULL_CMD

[...]

2023-12-11 07:59:57

by Li Nan

[permalink] [raw]

Subject: Re: [PATCH 1/2] md: factor out a helper exceed_read_errors() to check read_errors

在 2023/12/9 2:38, Song Liu 写道:
> On Sun, Dec 3, 2023 at 11:04 PM <[email protected]> wrote:
>>
>> From: Li Nan <[email protected]>
>>
>> Move check_decay_read_errors() to raid1-10.c and factor out a helper
>> exceed_read_errors() to check if read_errors exceeds the limit, so that
>> raid1 can also use it. There are no functional changes.
>>
>> Signed-off-by: Li Nan <[email protected]>
> [...]
>> +static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev)
>> +{
>> + int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
>> + int read_errors;
>> +
>> + check_decay_read_errors(mddev, rdev);
>> + read_errors = atomic_inc_return(&rdev->read_errors);
>> + if (read_errors > max_read_errors) {
>> + pr_notice("md:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
>> + mdname(mddev), rdev->bdev, read_errors, max_read_errors);
>> + pr_notice("md:%s: %pg: Failing raid device\n",
>> + mdname(mddev), rdev->bdev);
>
> This changed the print message from "md/raid10:" to "md:". We should
> try to avoid
> such changes. How about we do something like the following?
>
> Thanks,
> Song
>
> diff --git i/drivers/md/raid1-10.c w/drivers/md/raid1-10.c
> index 3f22edec70e7..6c0ef0fe6ba7 100644
> --- i/drivers/md/raid1-10.c
> +++ w/drivers/md/raid1-10.c
> @@ -173,3 +173,10 @@ static inline void
> raid1_prepare_flush_writes(struct bitmap *bitmap)
> else
> md_bitmap_unplug(bitmap);
> }
> +
> +static inline bool exceed_read_errors(struct mddev *mddev, struct
> md_rdev *rdev)
> +{
> + pr_notice("md/" RAID_1_10_NAME ":%s: %pg: Raid device ...\n",
> + ...);
> + ...
> +}
> diff --git i/drivers/md/raid1.c w/drivers/md/raid1.c
> index 9348f1709512..412e98d02a05 100644
> --- i/drivers/md/raid1.c
> +++ w/drivers/md/raid1.c
> @@ -49,6 +49,7 @@ static void lower_barrier(struct r1conf *conf,
> sector_t sector_nr);
> #define raid1_log(md, fmt, args...) \
> do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 "
> fmt, ##args); } while (0)
>
> +#define RAID_1_10_NAME "raid1"
> #include "raid1-10.c"
>
> #define START(node) ((node)->start)
> diff --git i/drivers/md/raid10.c w/drivers/md/raid10.c
> index 375c11d6159f..a1531b5f15e3 100644
> --- i/drivers/md/raid10.c
> +++ w/drivers/md/raid10.c
> @@ -77,6 +77,8 @@ static void end_reshape(struct r10conf *conf);
> #define raid10_log(md, fmt, args...) \
> do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 "
> fmt, ##args); } while (0)
>
> +#define RAID_1_10_NAME "raid10"
> +
> #include "raid1-10.c"
>
> #define NULL_CMD
>
> [...]
>
> .

I agree. Let me improve this in v2.

--
Thanks,
Nan