Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20;
Subject: Re: [PATCH v2 4/4] md/raid10: optimize check_decay_read_errors()
To:     linan666@huaweicloud.com, song@kernel.org, neilb@suse.de,
        Rob.Becker@riverbed.com
Cc:     linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org,
        linan122@huawei.com, yi.zhang@huawei.com, houtao1@huawei.com,
        yangerkun@huawei.com, "yukuai (C)" <yukuai3@huawei.com>
References: <20230506012315.3370489-1-linan666@huaweicloud.com>
 <20230506012315.3370489-5-linan666@huaweicloud.com>
From:   Yu Kuai <yukuai1@huaweicloud.com>
Message-ID: <aaece788-8892-1412-793e-b381b33d951b@huaweicloud.com>
Date:   Sat, 6 May 2023 10:14:32 +0800
User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101
 Thunderbird/60.8.0
MIME-Version: 1.0
In-Reply-To: <20230506012315.3370489-5-linan666@huaweicloud.com>
Content-Type: text/plain; charset=gbk; format=flowed
Content-Transfer-Encoding: 8bit
Precedence: bulk

Hi,

?? 2023/05/06 9:23, linan666@huaweicloud.com д??:
> From: Li Nan <linan122@huawei.com>
> 
> check_decay_read_errors() is used to handle rdev->read_errors. But
> read_errors is inc and read after check_decay_read_errors() is invoked
> in fix_read_error().
> 
> Put all operations of read_errors into check_decay_read_errors() and
> clean up unnecessary atomic_read of read_errors.
> 
> Suggested-by: Yu Kuai <yukuai3@huawei.com>
> Signed-off-by: Li Nan <linan122@huawei.com>
> ---
>   drivers/md/raid10.c | 66 ++++++++++++++++++++++++---------------------
>   1 file changed, 35 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 4d615fcc6a50..79f94882227d 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -2655,39 +2655,53 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>   }
>   
>   /*
> - * Used by fix_read_error() to decay the per rdev read_errors.
> + * Used by fix_read_error() to decay the per rdev read_errors and check if
> + * read_error > max_read_errors.
>    * We halve the read error count for every hour that has elapsed
>    * since the last recorded read error.
>    *
>    */
> -static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
> +static bool check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
>   {
> -	long cur_time_mon;
> +	time64_t cur_time_mon = ktime_get_seconds();
>   	unsigned long hours_since_last;
> -	unsigned int read_errors = atomic_read(&rdev->read_errors);
> -
> -	cur_time_mon = ktime_get_seconds();
> +	unsigned int read_errors;
> +	unsigned int max_read_errors =
> +			atomic_read(&mddev->max_corr_read_errors);
>   
>   	if (rdev->last_read_error == 0) {
>   		/* first time we've seen a read error */
>   		rdev->last_read_error = cur_time_mon;
> -		return;
> -	}

I prefer to use a goto tag here, so that following code doesn't need to
be changed. Other than that, this patch looks good to me.

Thanks,
Kuai
> +	} else {
> +		hours_since_last = (long)(cur_time_mon -
> +				    rdev->last_read_error) / 3600;
>   
> -	hours_since_last = (long)(cur_time_mon -
> -			    rdev->last_read_error) / 3600;
> +		rdev->last_read_error = cur_time_mon;
>   
> -	rdev->last_read_error = cur_time_mon;
> +		/*
> +		 * if hours_since_last is > the number of bits in read_errors
> +		 * just set read errors to 0. We do this to avoid
> +		 * overflowing the shift of read_errors by hours_since_last.
> +		 */
> +		read_errors = atomic_read(&rdev->read_errors);
> +		if (hours_since_last >= 8 * sizeof(read_errors))
> +			atomic_set(&rdev->read_errors, 0);
> +		else
> +			atomic_set(&rdev->read_errors,
> +				   read_errors >> hours_since_last);
> +	}
>   
> -	/*
> -	 * if hours_since_last is > the number of bits in read_errors
> -	 * just set read errors to 0. We do this to avoid
> -	 * overflowing the shift of read_errors by hours_since_last.
> -	 */
> -	if (hours_since_last >= 8 * sizeof(read_errors))
> -		atomic_set(&rdev->read_errors, 0);
> -	else
> -		atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
> +	read_errors = atomic_inc_return(&rdev->read_errors);
> +	if (read_errors > max_read_errors) {
> +		pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %u:max %u]\n",
> +			  mdname(mddev), rdev->bdev, read_errors, max_read_errors);
> +		pr_notice("md/raid10:%s: %pg: Failing raid device\n",
> +			  mdname(mddev), rdev->bdev);
> +		md_error(mddev, rdev);
> +		return false;
> +	}
> +
> +	return true;
>   }
>   
>   static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
> @@ -2727,8 +2741,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
>   	int sect = 0; /* Offset from r10_bio->sector */
>   	int sectors = r10_bio->sectors;
>   	struct md_rdev *rdev;
> -	unsigned int max_read_errors =
> -			atomic_read(&mddev->max_corr_read_errors);
>   	int d = r10_bio->devs[r10_bio->read_slot].devnum;
>   
>   	/* still own a reference to this rdev, so it cannot
> @@ -2741,15 +2753,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
>   		   more fix_read_error() attempts */
>   		return;
>   
> -	check_decay_read_errors(mddev, rdev);
> -	atomic_inc(&rdev->read_errors);
> -	if (atomic_read(&rdev->read_errors) > max_read_errors) {
> -		pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %u:max %u]\n",
> -			  mdname(mddev), rdev->bdev,
> -			  atomic_read(&rdev->read_errors), max_read_errors);
> -		pr_notice("md/raid10:%s: %pg: Failing raid device\n",
> -			  mdname(mddev), rdev->bdev);
> -		md_error(mddev, rdev);
> +	if (check_decay_read_errors(mddev, rdev)) {
>   		r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
>   		return;
>   	}
>