From: Yu Kuai <[email protected]>
Changes in v2:
- add patch 2;
- split some patches from v1 that will be sent separately;
- rework some commit message;
- rework patch 5;
Yu Kuai (6):
md: fix missing flush of sync_work
md: remove redundant check of 'mddev->sync_thread'
md: remove redundant md_wakeup_thread()
md: don't leave 'MD_RECOVERY_FROZEN' in error path of
md_set_readonly()
md: fix stopping sync thread
dm-raid: delay flushing event_work() after reconfig_mutex is released
drivers/md/dm-raid.c | 3 +
drivers/md/md.c | 149 ++++++++++++++++++++-----------------------
drivers/md/raid5.c | 6 +-
3 files changed, 75 insertions(+), 83 deletions(-)
--
2.39.2
From: Yu Kuai <[email protected]>
In md_set_readonly() and do_md_stop(), md_wakeup_thread() will be called
while 'reconfig_mutex' is held, however, follow up mddev_unlock() will call
md_wakeup_thread() again. Hence remove the redundant md_wakeup_thread().
Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/md.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1d33a39d4f13..b4e21db71454 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6363,7 +6363,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
@@ -6390,7 +6389,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
if (did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
}
err = -EBUSY;
goto out;
@@ -6405,7 +6403,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
set_disk_ro(mddev->gendisk, 1);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
sysfs_notify_dirent_safe(mddev->sysfs_state);
err = 0;
}
@@ -6428,7 +6425,6 @@ static int do_md_stop(struct mddev *mddev, int mode,
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
@@ -6453,7 +6449,6 @@ static int do_md_stop(struct mddev *mddev, int mode,
if (did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
}
return -EBUSY;
}
--
2.39.2
From: Yu Kuai <[email protected]>
The lifetime of sync_thread:
1) Set MD_RECOVERY_NEEDED and wake up daemon thread (by ioctl/sysfs or
other events);
2) Daemon thread woke up, md_check_recovery() found that
MD_RECOVERY_NEEDED is set:
a) try to grab reconfig_mutex;
b) set MD_RECOVERY_RUNNING;
c) clear MD_RECOVERY_NEEDED, and then queue sync_work;
3) md_start_sync() choose sync_action, then register sync_thread;
4) md_do_sync() is done, set MD_RECOVERY_DONE and wake up daemon thread;
5) Daemon thread woke up, md_check_recovery() found that
MD_RECOVERY_DONE is set:
a) try to grab reconfig_mutex;
b) unregister sync_thread;
c) clear MD_RECOVERY_RUNNING and MD_RECOVERY_DONE;
Hence if MD_RECOVERY_RUNNING is not set, mddev->sync_thread must be
NULL, there is no need to judge "mddev->sync_thread == NULL" if
MD_RECOVERY_RUNNING is not set.
Noted that unregister sync_thread(md_reap_sync_thread()) from other
context is wrong and will cause deadlock, for example commit
130443d60b1b ("md: refactor idle/frozen_sync_thread() to fix deadlock").
Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/md.c | 22 +++++++---------------
drivers/md/raid5.c | 6 ++----
2 files changed, 9 insertions(+), 19 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1701e2fb219f..1d33a39d4f13 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3302,8 +3302,7 @@ static ssize_t new_offset_store(struct md_rdev *rdev,
if (kstrtoull(buf, 10, &new_offset) < 0)
return -EINVAL;
- if (mddev->sync_thread ||
- test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
if (new_offset == rdev->data_offset)
/* reset is always permitted */
@@ -3987,8 +3986,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
*/
rv = -EBUSY;
- if (mddev->sync_thread ||
- test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
mddev->reshape_position != MaxSector ||
mddev->sysfs_active)
goto out_unlock;
@@ -4898,8 +4896,7 @@ static void frozen_sync_thread(struct mddev *mddev)
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
stop_sync_thread(mddev);
- wait_event(resync_wait, mddev->sync_thread == NULL &&
- !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+ wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
mutex_unlock(&mddev->sync_mutex);
}
@@ -6388,7 +6385,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
mutex_lock(&mddev->open_mutex);
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
- mddev->sync_thread ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
pr_warn("md: %s still in use.\n",mdname(mddev));
if (did_freeze) {
@@ -6444,15 +6440,13 @@ static int do_md_stop(struct mddev *mddev, int mode,
md_wakeup_thread_directly(mddev->sync_thread);
mddev_unlock(mddev);
- wait_event(resync_wait, (mddev->sync_thread == NULL &&
- !test_bit(MD_RECOVERY_RUNNING,
- &mddev->recovery)));
+ wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+ &mddev->recovery));
mddev_lock_nointr(mddev);
mutex_lock(&mddev->open_mutex);
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
mddev->sysfs_active ||
- mddev->sync_thread ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
pr_warn("md: %s still in use.\n",mdname(mddev));
mutex_unlock(&mddev->open_mutex);
@@ -7298,8 +7292,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
* of each device. If num_sectors is zero, we find the largest size
* that fits.
*/
- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
- mddev->sync_thread)
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
if (!md_is_rdwr(mddev))
return -EROFS;
@@ -7336,8 +7329,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
if (raid_disks <= 0 ||
(mddev->max_disks && raid_disks >= mddev->max_disks))
return -EINVAL;
- if (mddev->sync_thread ||
- test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
mddev->reshape_position != MaxSector)
return -EBUSY;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4207e945e8c8..ec6cb8185207 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7018,10 +7018,8 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len)
pr_debug("md/raid: change stripe_size from %lu to %lu\n",
conf->stripe_size, new);
- if (mddev->sync_thread ||
- test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
- mddev->reshape_position != MaxSector ||
- mddev->sysfs_active) {
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ mddev->reshape_position != MaxSector || mddev->sysfs_active) {
err = -EBUSY;
goto out_unlock;
}
--
2.39.2
From: Yu Kuai <[email protected]>
If md_set_readonly() failed, the array could still be read-write, however
'MD_RECOVERY_FROZEN' could still be set, which leave the array in an
abnormal state that sync or recovery can't continue anymore.
Hence make sure the flag is cleared after md_set_readonly() returns.
Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/md.c | 23 +++++++++++++----------
1 file changed, 13 insertions(+), 10 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b4e21db71454..5af67748e53e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6360,6 +6360,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
int err = 0;
int did_freeze = 0;
+ if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
+ return -EBUSY;
+
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -6373,8 +6376,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
*/
md_wakeup_thread_directly(mddev->sync_thread);
- if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
- return -EBUSY;
mddev_unlock(mddev);
wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
&mddev->recovery));
@@ -6386,27 +6387,29 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
pr_warn("md: %s still in use.\n",mdname(mddev));
- if (did_freeze) {
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- }
err = -EBUSY;
goto out;
}
+
if (mddev->pers) {
__md_stop_writes(mddev);
- err = -ENXIO;
- if (mddev->ro == MD_RDONLY)
+ if (mddev->ro == MD_RDONLY) {
+ err = -ENXIO;
goto out;
+ }
+
mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
+ }
+
+out:
+ if ((mddev->pers && !err) || did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_state);
- err = 0;
}
-out:
+
mutex_unlock(&mddev->open_mutex);
return err;
}
--
2.39.2
From: Yu Kuai <[email protected]>
Currently sync thread is stopped from multiple contex:
- idle_sync_thread
- frozen_sync_thread
- __md_stop_writes
- md_set_readonly
- do_md_stop
And there are some problems:
1) sync_work is flushed while reconfig_mutex is grabbed, this can
deadlock because the work function will grab reconfig_mutex as well.
2) md_reap_sync_thread() can't be called directly while md_do_sync() is
not finished yet, for example, commit 130443d60b1b ("md: refactor
idle/frozen_sync_thread() to fix deadlock").
3) If MD_RECOVERY_RUNNING is not set, there is no need to stop
sync_thread at all because sync_thread must not be registered.
Factor out a helper prepare_to_stop_sync_thread(), so that above contex
will behave the same. Fix 1) by flushing sync_work after reconfig_mutex
is released, before waiting for sync_thread to be done; Fix 2) bt
letting daemon thread to unregister sync_thread; Fix 3) by always
checking MD_RECOVERY_RUNNING first.
Fixes: db5e653d7c9f ("md: delay choosing sync action to md_start_sync()")
Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/md.c | 96 ++++++++++++++++++++++++-------------------------
1 file changed, 46 insertions(+), 50 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5af67748e53e..fb8a7d1eebee 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4846,26 +4846,9 @@ action_show(struct mddev *mddev, char *page)
return sprintf(page, "%s\n", type);
}
-static void stop_sync_thread(struct mddev *mddev)
+static void prepare_to_stop_sync_thread(struct mddev *mddev)
+ __releases(&mddev->reconfig_mutex)
{
- if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- return;
-
- if (mddev_lock(mddev))
- return;
-
- /*
- * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
- * held.
- */
- if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
- mddev_unlock(mddev);
- return;
- }
-
- if (work_pending(&mddev->sync_work))
- flush_workqueue(md_misc_wq);
-
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
/*
* Thread might be blocked waiting for metadata update which will now
@@ -4874,6 +4857,8 @@ static void stop_sync_thread(struct mddev *mddev)
md_wakeup_thread_directly(mddev->sync_thread);
mddev_unlock(mddev);
+ if (work_pending(&mddev->sync_work))
+ flush_work(&mddev->sync_work);
}
static void idle_sync_thread(struct mddev *mddev)
@@ -4882,10 +4867,20 @@ static void idle_sync_thread(struct mddev *mddev)
mutex_lock(&mddev->sync_mutex);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- stop_sync_thread(mddev);
- wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
+ if (mddev_lock(mddev)) {
+ mutex_unlock(&mddev->sync_mutex);
+ return;
+ }
+
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ prepare_to_stop_sync_thread(mddev);
+ wait_event(resync_wait,
+ sync_seq != atomic_read(&mddev->sync_seq) ||
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+ } else {
+ mddev_unlock(mddev);
+ }
mutex_unlock(&mddev->sync_mutex);
}
@@ -4894,9 +4889,19 @@ static void frozen_sync_thread(struct mddev *mddev)
{
mutex_lock(&mddev->sync_mutex);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- stop_sync_thread(mddev);
- wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+ if (mddev_lock(mddev)) {
+ mutex_unlock(&mddev->sync_mutex);
+ return;
+ }
+
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ prepare_to_stop_sync_thread(mddev);
+ wait_event(resync_wait,
+ !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+ } else {
+ mddev_unlock(mddev);
+ }
mutex_unlock(&mddev->sync_mutex);
}
@@ -6270,11 +6275,11 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- if (work_pending(&mddev->sync_work))
- flush_workqueue(md_misc_wq);
- if (mddev->sync_thread) {
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev);
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ prepare_to_stop_sync_thread(mddev);
+ wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+ &mddev->recovery));
+ mddev_lock_nointr(mddev);
}
del_timer_sync(&mddev->safemode_timer);
@@ -6367,18 +6372,15 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- /*
- * Thread might be blocked waiting for metadata update which will now
- * never happen
- */
- md_wakeup_thread_directly(mddev->sync_thread);
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ prepare_to_stop_sync_thread(mddev);
+ wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+ &mddev->recovery));
+ } else {
+ mddev_unlock(mddev);
+ }
- mddev_unlock(mddev);
- wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
- &mddev->recovery));
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
mddev_lock_nointr(mddev);
@@ -6429,19 +6431,13 @@ static int do_md_stop(struct mddev *mddev, int mode,
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
- /*
- * Thread might be blocked waiting for metadata update which will now
- * never happen
- */
- md_wakeup_thread_directly(mddev->sync_thread);
- mddev_unlock(mddev);
- wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
- &mddev->recovery));
- mddev_lock_nointr(mddev);
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ prepare_to_stop_sync_thread(mddev);
+ wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
+ &mddev->recovery));
+ mddev_lock_nointr(mddev);
+ }
mutex_lock(&mddev->open_mutex);
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
--
2.39.2
On Fri, Nov 24, 2023 at 4:00 PM Yu Kuai <[email protected]> wrote:
>
> From: Yu Kuai <[email protected]>
>
> Changes in v2:
> - add patch 2;
> - split some patches from v1 that will be sent separately;
> - rework some commit message;
> - rework patch 5;
>
> Yu Kuai (6):
> md: fix missing flush of sync_work
> md: remove redundant check of 'mddev->sync_thread'
> md: remove redundant md_wakeup_thread()
> md: don't leave 'MD_RECOVERY_FROZEN' in error path of
> md_set_readonly()
> md: fix stopping sync thread
> dm-raid: delay flushing event_work() after reconfig_mutex is released
>
> drivers/md/dm-raid.c | 3 +
> drivers/md/md.c | 149 ++++++++++++++++++++-----------------------
> drivers/md/raid5.c | 6 +-
> 3 files changed, 75 insertions(+), 83 deletions(-)
>
> --
> 2.39.2
>
For the series
Acked-by: Xiao Ni <[email protected]>