From: Yu Kuai <[email protected]>
Commit 3ce94ce5d05a ("md: fix duplicate filename for rdev") introduce a
new lock 'delete_mutex', and trigger a new deadlock:
t1: remove rdev t2: sysfs writer
rdev_attr_store rdev_attr_store
mddev_lock
state_store
md_kick_rdev_from_array
lock delete_mutex
list_add mddev->deleting
unlock delete_mutex
mddev_unlock
mddev_lock
...
lock delete_mutex
kobject_del
// wait for sysfs writers to be done
mddev_unlock
lock delete_mutex
// wait for delete_mutex, deadlock
'delete_mutex' is used to protect the list 'mddev->deleting', turns out
that this list can be protected by 'reconfig_mutex' directly, and this
lock can be removed.
Fix this problem by removing the lock, and use 'reconfig_mutex' to
protect the list. mddev_unlock() will move this list to a local list to
be handled after 'reconfig_mutex' is dropped.
Fixes: 3ce94ce5d05a ("md: fix duplicate filename for rdev")
Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/md.c | 28 +++++++++-------------------
drivers/md/md.h | 4 +---
2 files changed, 10 insertions(+), 22 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1086d7282ee7..089f7d7a9052 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -643,7 +643,6 @@ void mddev_init(struct mddev *mddev)
{
mutex_init(&mddev->open_mutex);
mutex_init(&mddev->reconfig_mutex);
- mutex_init(&mddev->delete_mutex);
mutex_init(&mddev->sync_mutex);
mutex_init(&mddev->bitmap_info.mutex);
INIT_LIST_HEAD(&mddev->disks);
@@ -751,26 +750,15 @@ static void mddev_free(struct mddev *mddev)
static const struct attribute_group md_redundancy_group;
-static void md_free_rdev(struct mddev *mddev)
+void mddev_unlock(struct mddev *mddev)
{
struct md_rdev *rdev;
struct md_rdev *tmp;
+ LIST_HEAD(delete);
- mutex_lock(&mddev->delete_mutex);
- if (list_empty(&mddev->deleting))
- goto out;
+ if (!list_empty(&mddev->deleting))
+ list_splice_init(&mddev->deleting, &delete);
- list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) {
- list_del_init(&rdev->same_set);
- kobject_del(&rdev->kobj);
- export_rdev(rdev, mddev);
- }
-out:
- mutex_unlock(&mddev->delete_mutex);
-}
-
-void mddev_unlock(struct mddev *mddev)
-{
if (mddev->to_remove) {
/* These cannot be removed under reconfig_mutex as
* an access to the files will try to take reconfig_mutex
@@ -810,7 +798,11 @@ void mddev_unlock(struct mddev *mddev)
} else
mutex_unlock(&mddev->reconfig_mutex);
- md_free_rdev(mddev);
+ list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
+ list_del_init(&rdev->same_set);
+ kobject_del(&rdev->kobj);
+ export_rdev(rdev, mddev);
+ }
md_wakeup_thread(mddev->thread);
wake_up(&mddev->sb_wait);
@@ -2490,9 +2482,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
* reconfig_mutex is held, hence it can't be called under
* reconfig_mutex and it's delayed to mddev_unlock().
*/
- mutex_lock(&mddev->delete_mutex);
list_add(&rdev->same_set, &mddev->deleting);
- mutex_unlock(&mddev->delete_mutex);
}
static void export_array(struct mddev *mddev)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 892a598a5029..8ae957480976 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -531,11 +531,9 @@ struct mddev {
/*
* Temporarily store rdev that will be finally removed when
- * reconfig_mutex is unlocked.
+ * reconfig_mutex is unlocked, protected by reconfig_mutex.
*/
struct list_head deleting;
- /* Protect the deleting list */
- struct mutex delete_mutex;
/* Used to synchronize idle and frozen for action_store() */
struct mutex sync_mutex;
--
2.39.2
On Tue, Jun 20, 2023 at 11:31 PM Yu Kuai <[email protected]> wrote:
>
> From: Yu Kuai <[email protected]>
>
> Commit 3ce94ce5d05a ("md: fix duplicate filename for rdev") introduce a
> new lock 'delete_mutex', and trigger a new deadlock:
>
> t1: remove rdev t2: sysfs writer
>
> rdev_attr_store rdev_attr_store
> mddev_lock
> state_store
> md_kick_rdev_from_array
> lock delete_mutex
> list_add mddev->deleting
> unlock delete_mutex
> mddev_unlock
> mddev_lock
> ...
> lock delete_mutex
> kobject_del
> // wait for sysfs writers to be done
> mddev_unlock
> lock delete_mutex
> // wait for delete_mutex, deadlock
>
> 'delete_mutex' is used to protect the list 'mddev->deleting', turns out
> that this list can be protected by 'reconfig_mutex' directly, and this
> lock can be removed.
>
> Fix this problem by removing the lock, and use 'reconfig_mutex' to
> protect the list. mddev_unlock() will move this list to a local list to
> be handled after 'reconfig_mutex' is dropped.
>
> Fixes: 3ce94ce5d05a ("md: fix duplicate filename for rdev")
> Signed-off-by: Yu Kuai <[email protected]>
Applied to md-next. Thanks for the quick fix!
Song
> ---
> drivers/md/md.c | 28 +++++++++-------------------
> drivers/md/md.h | 4 +---
> 2 files changed, 10 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 1086d7282ee7..089f7d7a9052 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -643,7 +643,6 @@ void mddev_init(struct mddev *mddev)
> {
> mutex_init(&mddev->open_mutex);
> mutex_init(&mddev->reconfig_mutex);
> - mutex_init(&mddev->delete_mutex);
> mutex_init(&mddev->sync_mutex);
> mutex_init(&mddev->bitmap_info.mutex);
> INIT_LIST_HEAD(&mddev->disks);
> @@ -751,26 +750,15 @@ static void mddev_free(struct mddev *mddev)
>
> static const struct attribute_group md_redundancy_group;
>
> -static void md_free_rdev(struct mddev *mddev)
> +void mddev_unlock(struct mddev *mddev)
> {
> struct md_rdev *rdev;
> struct md_rdev *tmp;
> + LIST_HEAD(delete);
>
> - mutex_lock(&mddev->delete_mutex);
> - if (list_empty(&mddev->deleting))
> - goto out;
> + if (!list_empty(&mddev->deleting))
> + list_splice_init(&mddev->deleting, &delete);
>
> - list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) {
> - list_del_init(&rdev->same_set);
> - kobject_del(&rdev->kobj);
> - export_rdev(rdev, mddev);
> - }
> -out:
> - mutex_unlock(&mddev->delete_mutex);
> -}
> -
> -void mddev_unlock(struct mddev *mddev)
> -{
> if (mddev->to_remove) {
> /* These cannot be removed under reconfig_mutex as
> * an access to the files will try to take reconfig_mutex
> @@ -810,7 +798,11 @@ void mddev_unlock(struct mddev *mddev)
> } else
> mutex_unlock(&mddev->reconfig_mutex);
>
> - md_free_rdev(mddev);
> + list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
> + list_del_init(&rdev->same_set);
> + kobject_del(&rdev->kobj);
> + export_rdev(rdev, mddev);
> + }
>
> md_wakeup_thread(mddev->thread);
> wake_up(&mddev->sb_wait);
> @@ -2490,9 +2482,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
> * reconfig_mutex is held, hence it can't be called under
> * reconfig_mutex and it's delayed to mddev_unlock().
> */
> - mutex_lock(&mddev->delete_mutex);
> list_add(&rdev->same_set, &mddev->deleting);
> - mutex_unlock(&mddev->delete_mutex);
> }
>
> static void export_array(struct mddev *mddev)
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 892a598a5029..8ae957480976 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -531,11 +531,9 @@ struct mddev {
>
> /*
> * Temporarily store rdev that will be finally removed when
> - * reconfig_mutex is unlocked.
> + * reconfig_mutex is unlocked, protected by reconfig_mutex.
> */
> struct list_head deleting;
> - /* Protect the deleting list */
> - struct mutex delete_mutex;
>
> /* Used to synchronize idle and frozen for action_store() */
> struct mutex sync_mutex;
> --
> 2.39.2
>