LinuxLists.cc - Re: [PATCH -next 6/8] md: factor out a helper to stop sync

2023-11-21 06:03:13

Subject: Re: [PATCH -next 6/8] md: factor out a helper to stop sync_thread

On Fri, Nov 10, 2023 at 5:34 PM Yu Kuai <[email protected]> wrote:
>
> From: Yu Kuai <[email protected]>
>
> stop_sync_thread(), md_set_readonly() and do_md_stop() are trying to
> stop sync_thread() the same way, hence factor out a helper to make code
> cleaner, and also prepare to use the new helper to fix problems later.
>
> Signed-off-by: Yu Kuai <[email protected]>
> Signed-off-by: Yu Kuai <[email protected]>
> ---
> drivers/md/md.c | 129 ++++++++++++++++++++++++++----------------------
> 1 file changed, 69 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index c0f2bdafe46a..7252fae0c989 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -4848,29 +4848,46 @@ action_show(struct mddev *mddev, char *page)
> return sprintf(page, "%s\n", type);
> }
>
> -static int stop_sync_thread(struct mddev *mddev)
> +static bool sync_thread_stopped(struct mddev *mddev, int *seq_ptr)
> {
> - int ret = 0;
> + if (seq_ptr && *seq_ptr != atomic_read(&mddev->sync_seq))
> + return true;
>
> - if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> - return 0;
> + return (!mddev->sync_thread &&
> + !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> +}
>
> - ret = mddev_lock(mddev);
> - if (ret)
> - return ret;
> +/*
> + * stop_sync_thread() - stop running sync_thread.
> + * @mddev: the array that sync_thread belongs to.
> + * @freeze: set true to prevent new sync_thread to start.
> + * @interruptible: if set true, then user can interrupt while waiting for
> + * sync_thread to be done.
> + *
> + * Noted that this function must be called with 'reconfig_mutex' grabbed, and
> + * fter this function return, 'reconfig_mtuex' will be released.
> + */
> +static int stop_sync_thread(struct mddev *mddev, bool freeze,
> + bool interruptible)
> + __releases(&mddev->reconfig_mutex)
> +{
> + int *seq_ptr = NULL;
> + int sync_seq;
> + int ret = 0;
> +
> + if (freeze) {
> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> + } else {
> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> + sync_seq = atomic_read(&mddev->sync_seq);
> + seq_ptr = &sync_seq;
> + }
>
> - /*
> - * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
> - * held.
> - */
> if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> mddev_unlock(mddev);
> return 0;
> }
Hi Kuai

It does the unlock inside this function. For me, it's not good,
because the caller does the lock. So the caller should do the unlock
too.
>
> - if (work_pending(&mddev->sync_work))
> - flush_workqueue(md_misc_wq);
> -
> set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> /*
> * Thread might be blocked waiting for metadata update which will now
> @@ -4879,53 +4896,58 @@ static int stop_sync_thread(struct mddev *mddev)
> md_wakeup_thread_directly(mddev->sync_thread);
>
> mddev_unlock(mddev);

Same with above point.

> - return 0;
> + if (work_pending(&mddev->sync_work))
> + flush_work(&mddev->sync_work);
> +
> + if (interruptible)
> + ret = wait_event_interruptible(resync_wait,
> + sync_thread_stopped(mddev, seq_ptr));
> + else
> + wait_event(resync_wait, sync_thread_stopped(mddev, seq_ptr));
> +

It looks like the three roles (md_set_readonly, do_md_stop and
stop_sync_thread) need to wait for different events. We can move these
codes out this helper function and make this helper function to be
more common.

Best Regards
Xiao

> + return ret;
> }
>
> static int idle_sync_thread(struct mddev *mddev)
> {
> int ret;
> - int sync_seq = atomic_read(&mddev->sync_seq);
> bool flag;
>
> ret = mutex_lock_interruptible(&mddev->sync_mutex);
> if (ret)
> return ret;
>
> - flag = test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> - ret = stop_sync_thread(mddev);
> + flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> + ret = mddev_lock(mddev);
> if (ret)
> - goto out;
> + goto unlock;
>
> - ret = wait_event_interruptible(resync_wait,
> - sync_seq != atomic_read(&mddev->sync_seq) ||
> - !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> -out:
> + ret = stop_sync_thread(mddev, false, true);
> if (ret && flag)
> set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +unlock:
> mutex_unlock(&mddev->sync_mutex);
> return ret;
> }
>
> static int frozen_sync_thread(struct mddev *mddev)
> {
> - int ret = mutex_lock_interruptible(&mddev->sync_mutex);
> + int ret;
> bool flag;
>
> + ret = mutex_lock_interruptible(&mddev->sync_mutex);
> if (ret)
> return ret;
>
> - flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> - ret = stop_sync_thread(mddev);
> + flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> + ret = mddev_lock(mddev);
> if (ret)
> - goto out;
> + goto unlock;
>
> - ret = wait_event_interruptible(resync_wait,
> - mddev->sync_thread == NULL &&
> - !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> -out:
> + ret = stop_sync_thread(mddev, true, true);
> if (ret && !flag)
> clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +unlock:
> mutex_unlock(&mddev->sync_mutex);
> return ret;
> }
> @@ -6397,22 +6419,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
> if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
> return -EBUSY;
>
> - if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
> did_freeze = 1;
> - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> - }
> - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> - set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>
> - /*
> - * Thread might be blocked waiting for metadata update which will now
> - * never happen
> - */
> - md_wakeup_thread_directly(mddev->sync_thread);
> -
> - mddev_unlock(mddev);
> - wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
> - &mddev->recovery));
> + stop_sync_thread(mddev, true, false);
> wait_event(mddev->sb_wait,
> !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
> mddev_lock_nointr(mddev);
> @@ -6421,6 +6431,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
> if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
> mddev->sync_thread ||
> test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> + /*
> + * This could happen if user change array state through
> + * ioctl/sysfs while reconfig_mutex is released.
> + */
> pr_warn("md: %s still in use.\n",mdname(mddev));
> err = -EBUSY;
> goto out;
> @@ -6457,30 +6471,25 @@ static int do_md_stop(struct mddev *mddev, int mode,
> struct md_rdev *rdev;
> int did_freeze = 0;
>
> - if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
> did_freeze = 1;
> +
> + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> + stop_sync_thread(mddev, true, false);
> + mddev_lock_nointr(mddev);
> + } else {
> set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> }
> - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> - set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> -
> - /*
> - * Thread might be blocked waiting for metadata update which will now
> - * never happen
> - */
> - md_wakeup_thread_directly(mddev->sync_thread);
> -
> - mddev_unlock(mddev);
> - wait_event(resync_wait, (mddev->sync_thread == NULL &&
> - !test_bit(MD_RECOVERY_RUNNING,
> - &mddev->recovery)));
> - mddev_lock_nointr(mddev);
>
> mutex_lock(&mddev->open_mutex);
> if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
> mddev->sysfs_active ||
> mddev->sync_thread ||
> test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> + /*
> + * This could happen if user change array state through
> + * ioctl/sysfs while reconfig_mutex is released.
> + */
> pr_warn("md: %s still in use.\n",mdname(mddev));
> mutex_unlock(&mddev->open_mutex);
> if (did_freeze) {
> --
> 2.39.2
>

2023-11-21 06:34:40

by Xiao Ni

[permalink] [raw]

Subject: Re: [PATCH -next 6/8] md: factor out a helper to stop sync_thread

On Tue, Nov 21, 2023 at 2:02 PM Xiao Ni <[email protected]> wrote:
>
> On Fri, Nov 10, 2023 at 5:34 PM Yu Kuai <[email protected]> wrote:
> >
> > From: Yu Kuai <[email protected]>
> >
> > stop_sync_thread(), md_set_readonly() and do_md_stop() are trying to
> > stop sync_thread() the same way, hence factor out a helper to make code
> > cleaner, and also prepare to use the new helper to fix problems later.
> >
> > Signed-off-by: Yu Kuai <[email protected]>
> > Signed-off-by: Yu Kuai <[email protected]>
> > ---
> > drivers/md/md.c | 129 ++++++++++++++++++++++++++----------------------
> > 1 file changed, 69 insertions(+), 60 deletions(-)
> >
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index c0f2bdafe46a..7252fae0c989 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -4848,29 +4848,46 @@ action_show(struct mddev *mddev, char *page)
> > return sprintf(page, "%s\n", type);
> > }
> >
> > -static int stop_sync_thread(struct mddev *mddev)
> > +static bool sync_thread_stopped(struct mddev *mddev, int *seq_ptr)
> > {
> > - int ret = 0;
> > + if (seq_ptr && *seq_ptr != atomic_read(&mddev->sync_seq))
> > + return true;
> >
> > - if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> > - return 0;
> > + return (!mddev->sync_thread &&
> > + !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> > +}
> >
> > - ret = mddev_lock(mddev);
> > - if (ret)
> > - return ret;
> > +/*
> > + * stop_sync_thread() - stop running sync_thread.
> > + * @mddev: the array that sync_thread belongs to.
> > + * @freeze: set true to prevent new sync_thread to start.
> > + * @interruptible: if set true, then user can interrupt while waiting for
> > + * sync_thread to be done.
> > + *
> > + * Noted that this function must be called with 'reconfig_mutex' grabbed, and
> > + * fter this function return, 'reconfig_mtuex' will be released.
> > + */
> > +static int stop_sync_thread(struct mddev *mddev, bool freeze,
> > + bool interruptible)
> > + __releases(&mddev->reconfig_mutex)
> > +{
> > + int *seq_ptr = NULL;
> > + int sync_seq;
> > + int ret = 0;
> > +
> > + if (freeze) {
> > + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > + } else {
> > + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > + sync_seq = atomic_read(&mddev->sync_seq);
> > + seq_ptr = &sync_seq;
> > + }
> >
> > - /*
> > - * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
> > - * held.
> > - */
> > if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> > mddev_unlock(mddev);
> > return 0;
> > }
> Hi Kuai
>
> It does the unlock inside this function. For me, it's not good,
> because the caller does the lock. So the caller should do the unlock
> too.
> >
> > - if (work_pending(&mddev->sync_work))
> > - flush_workqueue(md_misc_wq);
> > -
> > set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> > /*
> > * Thread might be blocked waiting for metadata update which will now
> > @@ -4879,53 +4896,58 @@ static int stop_sync_thread(struct mddev *mddev)
> > md_wakeup_thread_directly(mddev->sync_thread);
> >
> > mddev_unlock(mddev);
>
> Same with above point.
>
> > - return 0;
> > + if (work_pending(&mddev->sync_work))
> > + flush_work(&mddev->sync_work);
> > +
> > + if (interruptible)
> > + ret = wait_event_interruptible(resync_wait,
> > + sync_thread_stopped(mddev, seq_ptr));
> > + else
> > + wait_event(resync_wait, sync_thread_stopped(mddev, seq_ptr));
> > +
>
> It looks like the three roles (md_set_readonly, do_md_stop and
> stop_sync_thread) need to wait for different events. We can move these
> codes out this helper function and make this helper function to be
> more common.

Or get lock again before returning this function and leave the wait here?

Regards
Xiao

>
> Best Regards
> Xiao
>
>
> > + return ret;
> > }
> >
> > static int idle_sync_thread(struct mddev *mddev)
> > {
> > int ret;
> > - int sync_seq = atomic_read(&mddev->sync_seq);
> > bool flag;
> >
> > ret = mutex_lock_interruptible(&mddev->sync_mutex);
> > if (ret)
> > return ret;
> >
> > - flag = test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > - ret = stop_sync_thread(mddev);
> > + flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > + ret = mddev_lock(mddev);
> > if (ret)
> > - goto out;
> > + goto unlock;
> >
> > - ret = wait_event_interruptible(resync_wait,
> > - sync_seq != atomic_read(&mddev->sync_seq) ||
> > - !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> > -out:
> > + ret = stop_sync_thread(mddev, false, true);
> > if (ret && flag)
> > set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +unlock:
> > mutex_unlock(&mddev->sync_mutex);
> > return ret;
> > }
> >
> > static int frozen_sync_thread(struct mddev *mddev)
> > {
> > - int ret = mutex_lock_interruptible(&mddev->sync_mutex);
> > + int ret;
> > bool flag;
> >
> > + ret = mutex_lock_interruptible(&mddev->sync_mutex);
> > if (ret)
> > return ret;
> >
> > - flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > - ret = stop_sync_thread(mddev);
> > + flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > + ret = mddev_lock(mddev);
> > if (ret)
> > - goto out;
> > + goto unlock;
> >
> > - ret = wait_event_interruptible(resync_wait,
> > - mddev->sync_thread == NULL &&
> > - !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> > -out:
> > + ret = stop_sync_thread(mddev, true, true);
> > if (ret && !flag)
> > clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +unlock:
> > mutex_unlock(&mddev->sync_mutex);
> > return ret;
> > }
> > @@ -6397,22 +6419,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
> > if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
> > return -EBUSY;
> >
> > - if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> > + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
> > did_freeze = 1;
> > - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > - }
> > - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> > - set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> >
> > - /*
> > - * Thread might be blocked waiting for metadata update which will now
> > - * never happen
> > - */
> > - md_wakeup_thread_directly(mddev->sync_thread);
> > -
> > - mddev_unlock(mddev);
> > - wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
> > - &mddev->recovery));
> > + stop_sync_thread(mddev, true, false);
> > wait_event(mddev->sb_wait,
> > !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
> > mddev_lock_nointr(mddev);
> > @@ -6421,6 +6431,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
> > if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
> > mddev->sync_thread ||
> > test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> > + /*
> > + * This could happen if user change array state through
> > + * ioctl/sysfs while reconfig_mutex is released.
> > + */
> > pr_warn("md: %s still in use.\n",mdname(mddev));
> > err = -EBUSY;
> > goto out;
> > @@ -6457,30 +6471,25 @@ static int do_md_stop(struct mddev *mddev, int mode,
> > struct md_rdev *rdev;
> > int did_freeze = 0;
> >
> > - if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> > + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
> > did_freeze = 1;
> > +
> > + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> > + stop_sync_thread(mddev, true, false);
> > + mddev_lock_nointr(mddev);
> > + } else {
> > set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > }
> > - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> > - set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> > -
> > - /*
> > - * Thread might be blocked waiting for metadata update which will now
> > - * never happen
> > - */
> > - md_wakeup_thread_directly(mddev->sync_thread);
> > -
> > - mddev_unlock(mddev);
> > - wait_event(resync_wait, (mddev->sync_thread == NULL &&
> > - !test_bit(MD_RECOVERY_RUNNING,
> > - &mddev->recovery)));
> > - mddev_lock_nointr(mddev);
> >
> > mutex_lock(&mddev->open_mutex);
> > if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
> > mddev->sysfs_active ||
> > mddev->sync_thread ||
> > test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> > + /*
> > + * This could happen if user change array state through
> > + * ioctl/sysfs while reconfig_mutex is released.
> > + */
> > pr_warn("md: %s still in use.\n",mdname(mddev));
> > mutex_unlock(&mddev->open_mutex);
> > if (did_freeze) {
> > --
> > 2.39.2
> >

2023-11-21 13:02:14

by Yu Kuai

[permalink] [raw]

Subject: Re: [PATCH -next 6/8] md: factor out a helper to stop sync_thread

Hi,

在 2023/11/21 14:34, Xiao Ni 写道:
> On Tue, Nov 21, 2023 at 2:02 PM Xiao Ni <[email protected]> wrote:
>>
>> On Fri, Nov 10, 2023 at 5:34 PM Yu Kuai <[email protected]> wrote:
>>>
>>> From: Yu Kuai <[email protected]>
>>>
>>> stop_sync_thread(), md_set_readonly() and do_md_stop() are trying to
>>> stop sync_thread() the same way, hence factor out a helper to make code
>>> cleaner, and also prepare to use the new helper to fix problems later.
>>>
>>> Signed-off-by: Yu Kuai <[email protected]>
>>> Signed-off-by: Yu Kuai <[email protected]>
>>> ---
>>> drivers/md/md.c | 129 ++++++++++++++++++++++++++----------------------
>>> 1 file changed, 69 insertions(+), 60 deletions(-)
>>>
>>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>>> index c0f2bdafe46a..7252fae0c989 100644
>>> --- a/drivers/md/md.c
>>> +++ b/drivers/md/md.c
>>> @@ -4848,29 +4848,46 @@ action_show(struct mddev *mddev, char *page)
>>> return sprintf(page, "%s\n", type);
>>> }
>>>
>>> -static int stop_sync_thread(struct mddev *mddev)
>>> +static bool sync_thread_stopped(struct mddev *mddev, int *seq_ptr)
>>> {
>>> - int ret = 0;
>>> + if (seq_ptr && *seq_ptr != atomic_read(&mddev->sync_seq))
>>> + return true;
>>>
>>> - if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
>>> - return 0;
>>> + return (!mddev->sync_thread &&
>>> + !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
>>> +}
>>>
>>> - ret = mddev_lock(mddev);
>>> - if (ret)
>>> - return ret;
>>> +/*
>>> + * stop_sync_thread() - stop running sync_thread.
>>> + * @mddev: the array that sync_thread belongs to.
>>> + * @freeze: set true to prevent new sync_thread to start.
>>> + * @interruptible: if set true, then user can interrupt while waiting for
>>> + * sync_thread to be done.
>>> + *
>>> + * Noted that this function must be called with 'reconfig_mutex' grabbed, and
>>> + * fter this function return, 'reconfig_mtuex' will be released.
>>> + */
>>> +static int stop_sync_thread(struct mddev *mddev, bool freeze,
>>> + bool interruptible)
>>> + __releases(&mddev->reconfig_mutex)
>>> +{
>>> + int *seq_ptr = NULL;
>>> + int sync_seq;
>>> + int ret = 0;
>>> +
>>> + if (freeze) {
>>> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> + } else {
>>> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> + sync_seq = atomic_read(&mddev->sync_seq);
>>> + seq_ptr = &sync_seq;
>>> + }
>>>
>>> - /*
>>> - * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
>>> - * held.
>>> - */
>>> if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>>> mddev_unlock(mddev);
>>> return 0;
>>> }
>> Hi Kuai
>>
>> It does the unlock inside this function. For me, it's not good,
>> because the caller does the lock. So the caller should do the unlock
>> too.
>>>
>>> - if (work_pending(&mddev->sync_work))
>>> - flush_workqueue(md_misc_wq);
>>> -
>>> set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>>> /*
>>> * Thread might be blocked waiting for metadata update which will now
>>> @@ -4879,53 +4896,58 @@ static int stop_sync_thread(struct mddev *mddev)
>>> md_wakeup_thread_directly(mddev->sync_thread);
>>>
>>> mddev_unlock(mddev);
>>
>> Same with above point.
>>
>>> - return 0;
>>> + if (work_pending(&mddev->sync_work))
>>> + flush_work(&mddev->sync_work);
>>> +
>>> + if (interruptible)
>>> + ret = wait_event_interruptible(resync_wait,
>>> + sync_thread_stopped(mddev, seq_ptr));
>>> + else
>>> + wait_event(resync_wait, sync_thread_stopped(mddev, seq_ptr));
>>> +
>>
>> It looks like the three roles (md_set_readonly, do_md_stop and
>> stop_sync_thread) need to wait for different events. We can move these
>> codes out this helper function and make this helper function to be
>> more common.
>
> Or get lock again before returning this function and leave the wait here?
>

I tried but I made code complex. ????

I guess I'll need to drop this version and restart...

Thanks,
Kuai

> Regards
> Xiao
>
>
>>
>> Best Regards
>> Xiao
>>
>>
>>> + return ret;
>>> }
>>>
>>> static int idle_sync_thread(struct mddev *mddev)
>>> {
>>> int ret;
>>> - int sync_seq = atomic_read(&mddev->sync_seq);
>>> bool flag;
>>>
>>> ret = mutex_lock_interruptible(&mddev->sync_mutex);
>>> if (ret)
>>> return ret;
>>>
>>> - flag = test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> - ret = stop_sync_thread(mddev);
>>> + flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> + ret = mddev_lock(mddev);
>>> if (ret)
>>> - goto out;
>>> + goto unlock;
>>>
>>> - ret = wait_event_interruptible(resync_wait,
>>> - sync_seq != atomic_read(&mddev->sync_seq) ||
>>> - !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
>>> -out:
>>> + ret = stop_sync_thread(mddev, false, true);
>>> if (ret && flag)
>>> set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> +unlock:
>>> mutex_unlock(&mddev->sync_mutex);
>>> return ret;
>>> }
>>>
>>> static int frozen_sync_thread(struct mddev *mddev)
>>> {
>>> - int ret = mutex_lock_interruptible(&mddev->sync_mutex);
>>> + int ret;
>>> bool flag;
>>>
>>> + ret = mutex_lock_interruptible(&mddev->sync_mutex);
>>> if (ret)
>>> return ret;
>>>
>>> - flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> - ret = stop_sync_thread(mddev);
>>> + flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> + ret = mddev_lock(mddev);
>>> if (ret)
>>> - goto out;
>>> + goto unlock;
>>>
>>> - ret = wait_event_interruptible(resync_wait,
>>> - mddev->sync_thread == NULL &&
>>> - !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
>>> -out:
>>> + ret = stop_sync_thread(mddev, true, true);
>>> if (ret && !flag)
>>> clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> +unlock:
>>> mutex_unlock(&mddev->sync_mutex);
>>> return ret;
>>> }
>>> @@ -6397,22 +6419,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>>> if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
>>> return -EBUSY;
>>>
>>> - if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
>>> + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
>>> did_freeze = 1;
>>> - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> - }
>>> - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
>>> - set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>>>
>>> - /*
>>> - * Thread might be blocked waiting for metadata update which will now
>>> - * never happen
>>> - */
>>> - md_wakeup_thread_directly(mddev->sync_thread);
>>> -
>>> - mddev_unlock(mddev);
>>> - wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
>>> - &mddev->recovery));
>>> + stop_sync_thread(mddev, true, false);
>>> wait_event(mddev->sb_wait,
>>> !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
>>> mddev_lock_nointr(mddev);
>>> @@ -6421,6 +6431,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
>>> if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
>>> mddev->sync_thread ||
>>> test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>>> + /*
>>> + * This could happen if user change array state through
>>> + * ioctl/sysfs while reconfig_mutex is released.
>>> + */
>>> pr_warn("md: %s still in use.\n",mdname(mddev));
>>> err = -EBUSY;
>>> goto out;
>>> @@ -6457,30 +6471,25 @@ static int do_md_stop(struct mddev *mddev, int mode,
>>> struct md_rdev *rdev;
>>> int did_freeze = 0;
>>>
>>> - if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
>>> + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
>>> did_freeze = 1;
>>> +
>>> + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>>> + stop_sync_thread(mddev, true, false);
>>> + mddev_lock_nointr(mddev);
>>> + } else {
>>> set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> }
>>> - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
>>> - set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>>> -
>>> - /*
>>> - * Thread might be blocked waiting for metadata update which will now
>>> - * never happen
>>> - */
>>> - md_wakeup_thread_directly(mddev->sync_thread);
>>> -
>>> - mddev_unlock(mddev);
>>> - wait_event(resync_wait, (mddev->sync_thread == NULL &&
>>> - !test_bit(MD_RECOVERY_RUNNING,
>>> - &mddev->recovery)));
>>> - mddev_lock_nointr(mddev);
>>>
>>> mutex_lock(&mddev->open_mutex);
>>> if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
>>> mddev->sysfs_active ||
>>> mddev->sync_thread ||
>>> test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
>>> + /*
>>> + * This could happen if user change array state through
>>> + * ioctl/sysfs while reconfig_mutex is released.
>>> + */
>>> pr_warn("md: %s still in use.\n",mdname(mddev));
>>> mutex_unlock(&mddev->open_mutex);
>>> if (did_freeze) {
>>> --
>>> 2.39.2
>>>
>
>
> .
>

2023-11-24 02:00:03

by Yu Kuai

[permalink] [raw]

Subject: Re: [PATCH -next 6/8] md: factor out a helper to stop sync_thread

Hi,

在 2023/11/21 21:01, Yu Kuai 写道:
>>>
>>> It looks like the three roles (md_set_readonly, do_md_stop and
>>> stop_sync_thread) need to wait for different events. We can move these
>>> codes out this helper function and make this helper function to be
>>> more common.
>>
>> Or get lock again before returning this function and leave the wait here?

How about following patch?

drivers/md/md.c | 89
+++++++++++++++++++++++++++++++++++------------------------------------------------------
1 file changed, 35 insertions(+), 54 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 78f32a2e434c..fe46b67f6e87 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4868,35 +4868,18 @@ action_show(struct mddev *mddev, char *page)
return sprintf(page, "%s\n", action_names[get_sync_action(mddev)]);
}

-static int stop_sync_thread(struct mddev *mddev)
+static void prepare_to_stop_sync_thread(struct mddev *mddev)
+ __releases(&mddev->reconfig_mutex)
{
- int err = mddev_lock(mddev);
-
- if (err)
- return err;
-
- /*
- * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
- * held.
- */
- if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
- mddev_unlock(mddev);
- return 0;
- }
-
- if (work_pending(&mddev->sync_work))
- flush_workqueue(md_misc_wq);
-
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
/*
- * Thread might be blocked waiting for metadata update which
will now
- * never happen
+ * Thread might be blocked waiting for metadata update which
+ * will now never happen.
*/
md_wakeup_thread_directly(mddev->sync_thread);
-
mddev_unlock(mddev);
-
- return 0;
+ if (work_pending(&mddev->sync_work))
+ flush_work(&mddev->sync_work);
}

static int idle_sync_thread(struct mddev *mddev)
@@ -4905,13 +4888,17 @@ static int idle_sync_thread(struct mddev *mddev)
int err;

clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- err = stop_sync_thread(mddev);
+ err = mddev_lock(mddev);
if (err)
return err;

- err = wait_event_interruptible(resync_wait,
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ prepare_to_stop_sync_thread(mddev);
+ err = wait_event_interruptible(resync_wait,
sync_seq != atomic_read(&mddev->sync_seq) ||
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+ }
+
return err;
}

@@ -4920,12 +4907,15 @@ static int frozen_sync_thread(struct mddev *mddev)
int err;

set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- err = stop_sync_thread(mddev);
+ err = mddev_lock(mddev);
if (err)
return err;

- err = wait_event_interruptible(resync_wait,
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ prepare_to_stop_sync_thread(mddev);
+ err = wait_event_interruptible(resync_wait,
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+ }

return err;
}
@@ -6350,11 +6340,11 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- if (work_pending(&mddev->sync_work))
- flush_workqueue(md_misc_wq);
- if (mddev->sync_thread) {
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev);
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ prepare_to_stop_sync_thread(mddev);
+ wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+ &mddev->recovery));
+ mddev_lock_nointr(mddev);
}

del_timer_sync(&mddev->safemode_timer);
@@ -6447,18 +6437,15 @@ static int md_set_readonly(struct mddev *mddev,
struct block_device *bdev)
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);

- /*
- * Thread might be blocked waiting for metadata update which
will now
- * never happen
- */
- md_wakeup_thread_directly(mddev->sync_thread);
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ prepare_to_stop_sync_thread(mddev);
+ wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+ &mddev->recovery));
+ } else {
+ mddev_unlock(mddev);
+ }

- mddev_unlock(mddev);
- wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
- &mddev->recovery));
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
mddev_lock_nointr(mddev);
@@ -6509,19 +6496,13 @@ static int do_md_stop(struct mddev *mddev, int mode,
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
- /*
- * Thread might be blocked waiting for metadata update which
will now
- * never happen
- */
- md_wakeup_thread_directly(mddev->sync_thread);

- mddev_unlock(mddev);
- wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
- &mddev->recovery));
- mddev_lock_nointr(mddev);
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ prepare_to_stop_sync_thread(mddev);
+ wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+ &mddev->recovery));
+ mddev_lock_nointr(mddev);
+ }

mutex_lock(&mddev->open_mutex);
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||