2023-05-22 07:35:25

by Li Nan

[permalink] [raw]
Subject: [PATCH v4 0/3] md: bugfix of writing raid sysfs

From: Li Nan <[email protected]>

The patch series fix the bug of writing raid sysfs.

Changes in v4:
- in patch 1, check overflow in safe_delay_store() directly.
- in patch 2, check input number instead of using unsigned int.

Li Nan (3):
md/raid10: fix overflow of md/safe_mode_delay
md/raid10: fix wrong setting of max_corr_read_errors
md/raid10: optimize check_decay_read_errors()

drivers/md/md.c | 9 ++++++---
drivers/md/raid10.c | 41 ++++++++++++++++++++++++-----------------
2 files changed, 30 insertions(+), 20 deletions(-)

--
2.31.1



2023-05-22 07:39:18

by Li Nan

[permalink] [raw]
Subject: [PATCH v4 3/3] md/raid10: optimize check_decay_read_errors()

From: Li Nan <[email protected]>

check_decay_read_errors() is used to handle rdev->read_errors. But
read_errors is inc and read after check_decay_read_errors() is invoked
in fix_read_error().

Put all operations of read_errors into check_decay_read_errors() and
clean up unnecessary atomic_read of read_errors.

Suggested-by: Yu Kuai <[email protected]>
Signed-off-by: Li Nan <[email protected]>
---
drivers/md/raid10.c | 41 ++++++++++++++++++++++++-----------------
1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 4fcfcb350d2b..d31eed17f186 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2655,23 +2655,24 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
}

/*
- * Used by fix_read_error() to decay the per rdev read_errors.
+ * Used by fix_read_error() to decay the per rdev read_errors and check if
+ * read_error > max_read_errors.
* We halve the read error count for every hour that has elapsed
* since the last recorded read error.
*
*/
-static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
+static bool check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
{
- long cur_time_mon;
+ time64_t cur_time_mon = ktime_get_seconds();
unsigned long hours_since_last;
- unsigned int read_errors = atomic_read(&rdev->read_errors);
-
- cur_time_mon = ktime_get_seconds();
+ unsigned int read_errors;
+ unsigned int max_read_errors =
+ atomic_read(&mddev->max_corr_read_errors);

if (rdev->last_read_error == 0) {
/* first time we've seen a read error */
rdev->last_read_error = cur_time_mon;
- return;
+ goto increase;
}

hours_since_last = (long)(cur_time_mon -
@@ -2684,10 +2685,25 @@ static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
* just set read errors to 0. We do this to avoid
* overflowing the shift of read_errors by hours_since_last.
*/
+ read_errors = atomic_read(&rdev->read_errors);
if (hours_since_last >= 8 * sizeof(read_errors))
atomic_set(&rdev->read_errors, 0);
else
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
+
+increase:
+ read_errors = atomic_inc_return(&rdev->read_errors);
+ if (read_errors > max_read_errors) {
+ pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
+ mdname(mddev), rdev->bdev,
+ read_errors, max_read_errors);
+ pr_notice("md/raid10:%s: %pg: Failing raid device\n",
+ mdname(mddev), rdev->bdev);
+ md_error(mddev, rdev);
+ return false;
+ }
+
+ return true;
}

static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
@@ -2727,7 +2743,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors;
struct md_rdev *rdev;
- int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum;

/* still own a reference to this rdev, so it cannot
@@ -2740,15 +2755,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
more fix_read_error() attempts */
return;

- check_decay_read_errors(mddev, rdev);
- atomic_inc(&rdev->read_errors);
- if (atomic_read(&rdev->read_errors) > max_read_errors) {
- pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
- mdname(mddev), rdev->bdev,
- atomic_read(&rdev->read_errors), max_read_errors);
- pr_notice("md/raid10:%s: %pg: Failing raid device\n",
- mdname(mddev), rdev->bdev);
- md_error(mddev, rdev);
+ if (!check_decay_read_errors(mddev, rdev)) {
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
return;
}
--
2.31.1


2023-05-22 07:46:16

by Li Nan

[permalink] [raw]
Subject: [PATCH v4 1/3] md/raid10: fix overflow of md/safe_mode_delay

From: Li Nan <[email protected]>

There is no input check when echo md/safe_mode_delay in safe_delay_store().
And msec might also overflow when HZ < 1000 in safe_delay_show(), Fix it by
checking overflow in safe_delay_store() and use unsigned long conversion in
safe_delay_show().

Fixes: 72e02075a33f ("md: factor out parsing of fixed-point numbers")
Signed-off-by: Li Nan <[email protected]>
---
drivers/md/md.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8e344b4b3444..b2d69260b5b1 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3794,8 +3794,9 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
static ssize_t
safe_delay_show(struct mddev *mddev, char *page)
{
- int msec = (mddev->safemode_delay*1000)/HZ;
- return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
+ unsigned int msec = ((unsigned long)mddev->safemode_delay*1000)/HZ;
+
+ return sprintf(page, "%u.%03u\n", msec/1000, msec%1000);
}
static ssize_t
safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
@@ -3807,7 +3808,7 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
return -EINVAL;
}

- if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
+ if (strict_strtoul_scaled(cbuf, &msec, 3) < 0 || msec > UINT_MAX / HZ)
return -EINVAL;
if (msec == 0)
mddev->safemode_delay = 0;
--
2.31.1


2023-05-22 17:47:31

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH v4 3/3] md/raid10: optimize check_decay_read_errors()

On Mon, May 22, 2023 at 12:27 AM <[email protected]> wrote:
>
> From: Li Nan <[email protected]>
>
> check_decay_read_errors() is used to handle rdev->read_errors. But
> read_errors is inc and read after check_decay_read_errors() is invoked
> in fix_read_error().
>
> Put all operations of read_errors into check_decay_read_errors() and
> clean up unnecessary atomic_read of read_errors.

If I understand correctly, this patch doesn't change the behavior of the
code. If this is the case, I guess we don't really need it. The original code
looks reasonable to me.

Thanks,
Song

>
> Suggested-by: Yu Kuai <[email protected]>
> Signed-off-by: Li Nan <[email protected]>
> ---
> drivers/md/raid10.c | 41 ++++++++++++++++++++++++-----------------
> 1 file changed, 24 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 4fcfcb350d2b..d31eed17f186 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -2655,23 +2655,24 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
> }
>
> /*
> - * Used by fix_read_error() to decay the per rdev read_errors.
> + * Used by fix_read_error() to decay the per rdev read_errors and check if
> + * read_error > max_read_errors.
> * We halve the read error count for every hour that has elapsed
> * since the last recorded read error.
> *
> */
> -static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
> +static bool check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
> {
> - long cur_time_mon;
> + time64_t cur_time_mon = ktime_get_seconds();
> unsigned long hours_since_last;
> - unsigned int read_errors = atomic_read(&rdev->read_errors);
> -
> - cur_time_mon = ktime_get_seconds();
> + unsigned int read_errors;
> + unsigned int max_read_errors =
> + atomic_read(&mddev->max_corr_read_errors);
>
> if (rdev->last_read_error == 0) {
> /* first time we've seen a read error */
> rdev->last_read_error = cur_time_mon;
> - return;
> + goto increase;
> }
>
> hours_since_last = (long)(cur_time_mon -
> @@ -2684,10 +2685,25 @@ static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
> * just set read errors to 0. We do this to avoid
> * overflowing the shift of read_errors by hours_since_last.
> */
> + read_errors = atomic_read(&rdev->read_errors);
> if (hours_since_last >= 8 * sizeof(read_errors))
> atomic_set(&rdev->read_errors, 0);
> else
> atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
> +
> +increase:
> + read_errors = atomic_inc_return(&rdev->read_errors);
> + if (read_errors > max_read_errors) {
> + pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
> + mdname(mddev), rdev->bdev,
> + read_errors, max_read_errors);
> + pr_notice("md/raid10:%s: %pg: Failing raid device\n",
> + mdname(mddev), rdev->bdev);
> + md_error(mddev, rdev);
> + return false;
> + }
> +
> + return true;
> }
>
> static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
> @@ -2727,7 +2743,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
> int sect = 0; /* Offset from r10_bio->sector */
> int sectors = r10_bio->sectors;
> struct md_rdev *rdev;
> - int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
> int d = r10_bio->devs[r10_bio->read_slot].devnum;
>
> /* still own a reference to this rdev, so it cannot
> @@ -2740,15 +2755,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
> more fix_read_error() attempts */
> return;
>
> - check_decay_read_errors(mddev, rdev);
> - atomic_inc(&rdev->read_errors);
> - if (atomic_read(&rdev->read_errors) > max_read_errors) {
> - pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
> - mdname(mddev), rdev->bdev,
> - atomic_read(&rdev->read_errors), max_read_errors);
> - pr_notice("md/raid10:%s: %pg: Failing raid device\n",
> - mdname(mddev), rdev->bdev);
> - md_error(mddev, rdev);
> + if (!check_decay_read_errors(mddev, rdev)) {
> r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
> return;
> }
> --
> 2.31.1
>

2023-05-22 17:52:28

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH v4 0/3] md: bugfix of writing raid sysfs

On Mon, May 22, 2023 at 12:27 AM <[email protected]> wrote:
>
> From: Li Nan <[email protected]>
>
> The patch series fix the bug of writing raid sysfs.
>
> Changes in v4:
> - in patch 1, check overflow in safe_delay_store() directly.
> - in patch 2, check input number instead of using unsigned int.
>
> Li Nan (3):
> md/raid10: fix overflow of md/safe_mode_delay
> md/raid10: fix wrong setting of max_corr_read_errors
> md/raid10: optimize check_decay_read_errors()

Applied 1/3 and 2/3 to md-next.

Thanks,
Song

>
> drivers/md/md.c | 9 ++++++---
> drivers/md/raid10.c | 41 ++++++++++++++++++++++++-----------------
> 2 files changed, 30 insertions(+), 20 deletions(-)
>
> --
> 2.31.1
>