2023-03-11 09:32:39

by Yu Kuai

[permalink] [raw]
Subject: [PATCH -next 0/5] md: fix uaf for sync_thread

From: Yu Kuai <[email protected]>

Our test reports a uaf for 'mddev->sync_thread':

T1 T2
md_start_sync
md_register_thread
raid1d
md_check_recovery
md_reap_sync_thread
md_unregister_thread
kfree

md_wakeup_thread
wake_up
->sync_thread was freed

Currently, a global spinlock 'pers_lock' is borrowed to protect
'mddev->thread', this problem can be fixed likewise, however, there might
be similar problem for other md_thread, and I really don't like the idea to
borrow a global lock.

This patchset do some refactor, and then use a disk level spinlock to
protect md_thread in relevant apis.

Yu Kuai (5):
md: pass a md_thread pointer to md_register_thread()
md: refactor md_wakeup_thread()
md: use md_thread api to wake up sync_thread
md: pass a mddev to md_unregister_thread()
md: protect md_thread with a new disk level spin lock

drivers/md/dm-raid.c | 6 +-
drivers/md/md-bitmap.c | 6 +-
drivers/md/md-cluster.c | 39 +++++-----
drivers/md/md-multipath.c | 8 +-
drivers/md/md.c | 157 ++++++++++++++++++++------------------
drivers/md/md.h | 15 ++--
drivers/md/raid1.c | 19 +++--
drivers/md/raid10.c | 31 ++++----
drivers/md/raid5-cache.c | 19 +++--
drivers/md/raid5-ppl.c | 2 +-
drivers/md/raid5.c | 48 ++++++------
11 files changed, 175 insertions(+), 175 deletions(-)

--
2.31.1



2023-03-11 09:32:45

by Yu Kuai

[permalink] [raw]
Subject: [PATCH -next 3/5] md: use md_thread api to wake up sync_thread

From: Yu Kuai <[email protected]>

Instead of wake_up_process() directly, convert to use
md_wakeup_thread().

Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/md.c | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 97e87df4ee43..4ecfd0508afb 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6267,10 +6267,12 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- if (mddev->sync_thread)
- /* Thread might be blocked waiting for metadata update
- * which will now never happen */
- wake_up_process(mddev->sync_thread->tsk);
+
+ /*
+ * Thread might be blocked waiting for metadata update
+ * which will now never happen
+ */
+ md_wakeup_thread(&mddev->sync_thread, mddev);

if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
return -EBUSY;
@@ -6331,10 +6333,12 @@ static int do_md_stop(struct mddev *mddev, int mode,
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- if (mddev->sync_thread)
- /* Thread might be blocked waiting for metadata update
- * which will now never happen */
- wake_up_process(mddev->sync_thread->tsk);
+
+ /*
+ * Thread might be blocked waiting for metadata update
+ * which will now never happen
+ */
+ md_wakeup_thread(&mddev->sync_thread, mddev);

mddev_unlock(mddev);
wait_event(resync_wait, (mddev->sync_thread == NULL &&
--
2.31.1


2023-03-11 09:32:48

by Yu Kuai

[permalink] [raw]
Subject: [PATCH -next 1/5] md: pass a md_thread pointer to md_register_thread()

From: Yu Kuai <[email protected]>

Prepare to use a disk level spinlock to protect md_thread, there are no
functional changes.

Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/md-cluster.c | 11 +++++------
drivers/md/md-multipath.c | 6 +++---
drivers/md/md.c | 27 ++++++++++++++-------------
drivers/md/md.h | 7 +++----
drivers/md/raid1.c | 5 ++---
drivers/md/raid10.c | 15 ++++++---------
drivers/md/raid5-cache.c | 5 ++---
drivers/md/raid5.c | 15 ++++++---------
8 files changed, 41 insertions(+), 50 deletions(-)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 10e0c5381d01..c19e29cb73bf 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -362,9 +362,8 @@ static void __recover_slot(struct mddev *mddev, int slot)

set_bit(slot, &cinfo->recovery_map);
if (!cinfo->recovery_thread) {
- cinfo->recovery_thread = md_register_thread(recover_bitmaps,
- mddev, "recover");
- if (!cinfo->recovery_thread) {
+ if (md_register_thread(&cinfo->recovery_thread, recover_bitmaps,
+ mddev, "recover")) {
pr_warn("md-cluster: Could not create recovery thread\n");
return;
}
@@ -888,9 +887,9 @@ static int join(struct mddev *mddev, int nodes)
goto err;
}
/* Initiate the communication resources */
- ret = -ENOMEM;
- cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
- if (!cinfo->recv_thread) {
+ ret = md_register_thread(&cinfo->recv_thread, recv_daemon, mddev,
+ "cluster_recv");
+ if (ret) {
pr_err("md-cluster: cannot allocate memory for recv_thread!\n");
goto err;
}
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 66edf5e72bd6..ceec9e4b2a60 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -400,9 +400,9 @@ static int multipath_run (struct mddev *mddev)
if (ret)
goto out_free_conf;

- mddev->thread = md_register_thread(multipathd, mddev,
- "multipath");
- if (!mddev->thread)
+ ret = md_register_thread(&mddev->thread, multipathd, mddev,
+ "multipath");
+ if (ret)
goto out_free_conf;

pr_info("multipath: array %s active with %d out of %d IO paths\n",
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 98970bbe32bf..0bbdde29a41f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7896,29 +7896,32 @@ void md_wakeup_thread(struct md_thread *thread)
}
EXPORT_SYMBOL(md_wakeup_thread);

-struct md_thread *md_register_thread(void (*run) (struct md_thread *),
- struct mddev *mddev, const char *name)
+int md_register_thread(struct md_thread **threadp,
+ void (*run)(struct md_thread *),
+ struct mddev *mddev, const char *name)
{
struct md_thread *thread;

thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
if (!thread)
- return NULL;
+ return -ENOMEM;

init_waitqueue_head(&thread->wqueue);

thread->run = run;
thread->mddev = mddev;
thread->timeout = MAX_SCHEDULE_TIMEOUT;
- thread->tsk = kthread_run(md_thread, thread,
- "%s_%s",
- mdname(thread->mddev),
- name);
+ thread->tsk = kthread_run(md_thread, thread, "%s_%s",
+ mdname(thread->mddev), name);
if (IS_ERR(thread->tsk)) {
+ int err = PTR_ERR(thread->tsk);
+
kfree(thread);
- return NULL;
+ return err;
}
- return thread;
+
+ *threadp = thread;
+ return 0;
}
EXPORT_SYMBOL(md_register_thread);

@@ -9199,10 +9202,8 @@ static void md_start_sync(struct work_struct *ws)
{
struct mddev *mddev = container_of(ws, struct mddev, del_work);

- mddev->sync_thread = md_register_thread(md_do_sync,
- mddev,
- "resync");
- if (!mddev->sync_thread) {
+ if (md_register_thread(&mddev->sync_thread, md_do_sync, mddev,
+ "resync")) {
pr_warn("%s: could not start resync thread...\n",
mdname(mddev));
/* leave the spares where they are, it shouldn't hurt */
diff --git a/drivers/md/md.h b/drivers/md/md.h
index e148e3c83b0d..344e055e4d0f 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -730,10 +730,9 @@ extern int register_md_cluster_operations(struct md_cluster_operations *ops,
extern int unregister_md_cluster_operations(void);
extern int md_setup_cluster(struct mddev *mddev, int nodes);
extern void md_cluster_stop(struct mddev *mddev);
-extern struct md_thread *md_register_thread(
- void (*run)(struct md_thread *thread),
- struct mddev *mddev,
- const char *name);
+int md_register_thread(struct md_thread **threadp,
+ void (*run)(struct md_thread *thread),
+ struct mddev *mddev, const char *name);
extern void md_unregister_thread(struct md_thread **threadp);
extern void md_wakeup_thread(struct md_thread *thread);
extern void md_check_recovery(struct mddev *mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 68a9e2d9985b..1217c1db0a40 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3083,9 +3083,8 @@ static struct r1conf *setup_conf(struct mddev *mddev)
}
}

- err = -ENOMEM;
- conf->thread = md_register_thread(raid1d, mddev, "raid1");
- if (!conf->thread)
+ err = md_register_thread(&conf->thread, raid1d, mddev, "raid1");
+ if (err)
goto abort;

return conf;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 2f1522bba80d..f1e54c62f930 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4096,9 +4096,8 @@ static struct r10conf *setup_conf(struct mddev *mddev)
init_waitqueue_head(&conf->wait_barrier);
atomic_set(&conf->nr_pending, 0);

- err = -ENOMEM;
- conf->thread = md_register_thread(raid10d, mddev, "raid10");
- if (!conf->thread)
+ err = md_register_thread(&conf->thread, raid10d, mddev, "raid10");
+ if (err)
goto out;

conf->mddev = mddev;
@@ -4286,9 +4285,8 @@ static int raid10_run(struct mddev *mddev)
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
- mddev->sync_thread = md_register_thread(md_do_sync, mddev,
- "reshape");
- if (!mddev->sync_thread)
+ if (md_register_thread(&mddev->sync_thread, md_do_sync, mddev,
+ "reshape"))
goto out_free_conf;
}

@@ -4688,9 +4686,8 @@ static int raid10_start_reshape(struct mddev *mddev)
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);

- mddev->sync_thread = md_register_thread(md_do_sync, mddev,
- "reshape");
- if (!mddev->sync_thread) {
+ if (md_register_thread(&mddev->sync_thread, md_do_sync, mddev,
+ "reshape")) {
ret = -EAGAIN;
goto abort;
}
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 46182b955aef..0464d4d551fc 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -3121,9 +3121,8 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
spin_lock_init(&log->tree_lock);
INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN);

- log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
- log->rdev->mddev, "reclaim");
- if (!log->reclaim_thread)
+ if (md_register_thread(&log->reclaim_thread, r5l_reclaim_thread,
+ log->rdev->mddev, "reclaim"))
goto reclaim_thread;
log->reclaim_thread->timeout = R5C_RECLAIM_WAKEUP_INTERVAL;

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7b820b81d8c2..04b1093195d0 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7665,11 +7665,10 @@ static struct r5conf *setup_conf(struct mddev *mddev)
}

sprintf(pers_name, "raid%d", mddev->new_level);
- conf->thread = md_register_thread(raid5d, mddev, pers_name);
- if (!conf->thread) {
+ ret = md_register_thread(&conf->thread, raid5d, mddev, pers_name);
+ if (ret) {
pr_warn("md/raid:%s: couldn't allocate thread.\n",
mdname(mddev));
- ret = -ENOMEM;
goto abort;
}

@@ -7989,9 +7988,8 @@ static int raid5_run(struct mddev *mddev)
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
- mddev->sync_thread = md_register_thread(md_do_sync, mddev,
- "reshape");
- if (!mddev->sync_thread)
+ if (md_register_thread(&mddev->sync_thread, md_do_sync, mddev,
+ "reshape"))
goto abort;
}

@@ -8567,9 +8565,8 @@ static int raid5_start_reshape(struct mddev *mddev)
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
- mddev->sync_thread = md_register_thread(md_do_sync, mddev,
- "reshape");
- if (!mddev->sync_thread) {
+ if (md_register_thread(&mddev->sync_thread, md_do_sync, mddev,
+ "reshape")) {
mddev->recovery = 0;
spin_lock_irq(&conf->device_lock);
write_seqcount_begin(&conf->gen_lock);
--
2.31.1


2023-03-11 09:32:52

by Yu Kuai

[permalink] [raw]
Subject: [PATCH -next 4/5] md: pass a mddev to md_unregister_thread()

From: Yu Kuai <[email protected]>

Prepare to use a disk level spinlock to protect md_thread, there are no
functional changes.

Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/dm-raid.c | 2 +-
drivers/md/md-cluster.c | 8 ++++----
drivers/md/md.c | 13 +++++++------
drivers/md/md.h | 3 ++-
drivers/md/raid1.c | 4 ++--
drivers/md/raid10.c | 2 +-
drivers/md/raid5-cache.c | 2 +-
drivers/md/raid5.c | 2 +-
8 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 257c9c9f2b4d..1393c80b083b 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3729,7 +3729,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_unregister_thread(&mddev->sync_thread);
+ md_unregister_thread(&mddev->sync_thread, mddev);
md_reap_sync_thread(mddev);
}
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 92b0e49b4e53..4d538c9ab7b3 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -946,8 +946,8 @@ static int join(struct mddev *mddev, int nodes)
return 0;
err:
set_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
- md_unregister_thread(&cinfo->recovery_thread);
- md_unregister_thread(&cinfo->recv_thread);
+ md_unregister_thread(&cinfo->recovery_thread, mddev);
+ md_unregister_thread(&cinfo->recv_thread, mddev);
lockres_free(cinfo->message_lockres);
lockres_free(cinfo->token_lockres);
lockres_free(cinfo->ack_lockres);
@@ -1009,8 +1009,8 @@ static int leave(struct mddev *mddev)
resync_bitmap(mddev);

set_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
- md_unregister_thread(&cinfo->recovery_thread);
- md_unregister_thread(&cinfo->recv_thread);
+ md_unregister_thread(&cinfo->recovery_thread, mddev);
+ md_unregister_thread(&cinfo->recv_thread, mddev);
lockres_free(cinfo->message_lockres);
lockres_free(cinfo->token_lockres);
lockres_free(cinfo->ack_lockres);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4ecfd0508afb..ab9299187cfe 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4775,7 +4775,8 @@ action_store(struct mddev *mddev, const char *page, size_t len)

mddev_unlock(mddev);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_unregister_thread(&mddev->sync_thread);
+ md_unregister_thread(&mddev->sync_thread,
+ mddev);
mddev_lock_nointr(mddev);
/*
* set RECOVERY_INTR again and restore reshape
@@ -6175,7 +6176,7 @@ static void __md_stop_writes(struct mddev *mddev)
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_unregister_thread(&mddev->sync_thread);
+ md_unregister_thread(&mddev->sync_thread, mddev);
md_reap_sync_thread(mddev);
}

@@ -6215,7 +6216,7 @@ static void mddev_detach(struct mddev *mddev)
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
}
- md_unregister_thread(&mddev->thread);
+ md_unregister_thread(&mddev->thread, mddev);
if (mddev->queue)
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
}
@@ -7933,7 +7934,7 @@ int md_register_thread(struct md_thread **threadp,
}
EXPORT_SYMBOL(md_register_thread);

-void md_unregister_thread(struct md_thread **threadp)
+void md_unregister_thread(struct md_thread **threadp, struct mddev *mddev)
{
struct md_thread *thread;

@@ -9324,7 +9325,7 @@ void md_check_recovery(struct mddev *mddev)
* ->spare_active and clear saved_raid_disk
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_unregister_thread(&mddev->sync_thread);
+ md_unregister_thread(&mddev->sync_thread, mddev);
md_reap_sync_thread(mddev);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -9360,7 +9361,7 @@ void md_check_recovery(struct mddev *mddev)
goto unlock;
}
if (mddev->sync_thread) {
- md_unregister_thread(&mddev->sync_thread);
+ md_unregister_thread(&mddev->sync_thread, mddev);
md_reap_sync_thread(mddev);
goto unlock;
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index aeb2fc6b65c7..8f4137ad2dde 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -733,7 +733,8 @@ extern void md_cluster_stop(struct mddev *mddev);
int md_register_thread(struct md_thread **threadp,
void (*run)(struct md_thread *thread),
struct mddev *mddev, const char *name);
-extern void md_unregister_thread(struct md_thread **threadp);
+extern void md_unregister_thread(struct md_thread **threadp,
+ struct mddev *mddev);
extern void md_wakeup_thread(struct md_thread **threadp, struct mddev *mddev);
extern void md_check_recovery(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 391ff239c711..8329a1ba9d12 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3158,7 +3158,7 @@ static int raid1_run(struct mddev *mddev)
* RAID1 needs at least one disk in active
*/
if (conf->raid_disks - mddev->degraded < 1) {
- md_unregister_thread(&conf->thread);
+ md_unregister_thread(&conf->thread, mddev);
ret = -EINVAL;
goto abort;
}
@@ -3185,7 +3185,7 @@ static int raid1_run(struct mddev *mddev)

ret = md_integrity_register(mddev);
if (ret) {
- md_unregister_thread(&mddev->thread);
+ md_unregister_thread(&mddev->thread, mddev);
goto abort;
}
return 0;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 920e5722040f..47d18d56000e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4293,7 +4293,7 @@ static int raid10_run(struct mddev *mddev)
return 0;

out_free_conf:
- md_unregister_thread(&mddev->thread);
+ md_unregister_thread(&mddev->thread, mddev);
raid10_free_conf(conf);
mddev->private = NULL;
out:
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index d6ee6a7a83b7..588c3d1f7467 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -3166,7 +3166,7 @@ void r5l_exit_log(struct r5conf *conf)
/* Ensure disable_writeback_work wakes up and exits */
wake_up(&conf->mddev->sb_wait);
flush_work(&log->disable_writeback_work);
- md_unregister_thread(&log->reclaim_thread);
+ md_unregister_thread(&log->reclaim_thread, conf->mddev);

conf->log = NULL;

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2c0695d41436..b9f2688b141f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -8070,7 +8070,7 @@ static int raid5_run(struct mddev *mddev)

return 0;
abort:
- md_unregister_thread(&mddev->thread);
+ md_unregister_thread(&mddev->thread, mddev);
print_raid5_conf(conf);
free_conf(conf);
mddev->private = NULL;
--
2.31.1


2023-03-11 09:32:55

by Yu Kuai

[permalink] [raw]
Subject: [PATCH -next 5/5] md: protect md_thread with a new disk level spin lock

From: Yu Kuai <[email protected]>

Our test reports a uaf for 'mddev->sync_thread':

T1 T2
md_start_sync
md_register_thread
raid1d
md_check_recovery
md_reap_sync_thread
md_unregister_thread
kfree

md_wakeup_thread
wake_up
->sync_thread was freed

Currently, a global spinlock 'pers_lock' is borrowed to protect
'mddev->thread', this problem can be fixed likewise, however, there might
be similar problem for other md_thread, and I really don't like the idea to
borrow a global lock.

This patch use a disk level spinlock to protect md_thread in relevant apis.

Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/md.c | 23 ++++++++++-------------
drivers/md/md.h | 1 +
2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index ab9299187cfe..a952978884a5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -663,6 +663,7 @@ void mddev_init(struct mddev *mddev)
atomic_set(&mddev->active, 1);
atomic_set(&mddev->openers, 0);
spin_lock_init(&mddev->lock);
+ spin_lock_init(&mddev->thread_lock);
atomic_set(&mddev->flush_pending, 0);
init_waitqueue_head(&mddev->sb_wait);
init_waitqueue_head(&mddev->recovery_wait);
@@ -801,13 +802,8 @@ void mddev_unlock(struct mddev *mddev)
} else
mutex_unlock(&mddev->reconfig_mutex);

- /* As we've dropped the mutex we need a spinlock to
- * make sure the thread doesn't disappear
- */
- spin_lock(&pers_lock);
md_wakeup_thread(&mddev->thread, mddev);
wake_up(&mddev->sb_wait);
- spin_unlock(&pers_lock);
}
EXPORT_SYMBOL_GPL(mddev_unlock);

@@ -7895,13 +7891,16 @@ static int md_thread(void *arg)

void md_wakeup_thread(struct md_thread **threadp, struct mddev *mddev)
{
- struct md_thread *thread = *threadp;
+ struct md_thread *thread;

+ spin_lock(&mddev->thread_lock);
+ thread = *threadp;
if (thread) {
pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
set_bit(THREAD_WAKEUP, &thread->flags);
wake_up(&thread->wqueue);
}
+ spin_unlock(&mddev->thread_lock);
}
EXPORT_SYMBOL(md_wakeup_thread);

@@ -7929,7 +7928,9 @@ int md_register_thread(struct md_thread **threadp,
return err;
}

+ spin_lock(&mddev->thread_lock);
*threadp = thread;
+ spin_unlock(&mddev->thread_lock);
return 0;
}
EXPORT_SYMBOL(md_register_thread);
@@ -7938,18 +7939,14 @@ void md_unregister_thread(struct md_thread **threadp, struct mddev *mddev)
{
struct md_thread *thread;

- /*
- * Locking ensures that mddev_unlock does not wake_up a
- * non-existent thread
- */
- spin_lock(&pers_lock);
+ spin_lock(&mddev->thread_lock);
thread = *threadp;
if (!thread) {
- spin_unlock(&pers_lock);
+ spin_unlock(&mddev->thread_lock);
return;
}
*threadp = NULL;
- spin_unlock(&pers_lock);
+ spin_unlock(&mddev->thread_lock);

pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
kthread_stop(thread->tsk);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 8f4137ad2dde..ca182d21dd8d 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -367,6 +367,7 @@ struct mddev {
int new_chunk_sectors;
int reshape_backwards;

+ spinlock_t thread_lock;
struct md_thread *thread; /* management thread */
struct md_thread *sync_thread; /* doing resync or reconstruct */

--
2.31.1


2023-03-11 09:32:58

by Yu Kuai

[permalink] [raw]
Subject: [PATCH -next 2/5] md: refactor md_wakeup_thread()

From: Yu Kuai <[email protected]>

Pass a md_thread pointer and a mddev to md_wakeup_thread(), prepare to
use a disk level spinlock to protect md_thread, there are no functional
changes.

Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/dm-raid.c | 4 +--
drivers/md/md-bitmap.c | 6 ++--
drivers/md/md-cluster.c | 20 +++++------
drivers/md/md-multipath.c | 2 +-
drivers/md/md.c | 76 ++++++++++++++++++++-------------------
drivers/md/md.h | 4 +--
drivers/md/raid1.c | 10 +++---
drivers/md/raid10.c | 14 ++++----
drivers/md/raid5-cache.c | 12 +++----
drivers/md/raid5-ppl.c | 2 +-
drivers/md/raid5.c | 31 ++++++++--------
11 files changed, 93 insertions(+), 88 deletions(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 60632b409b80..257c9c9f2b4d 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3755,11 +3755,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
*/
mddev->ro = 0;
if (!mddev->suspended && mddev->sync_thread)
- md_wakeup_thread(mddev->sync_thread);
+ md_wakeup_thread(&mddev->sync_thread, mddev);
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
if (!mddev->suspended && mddev->thread)
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);

return 0;
}
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index e7cc6ba1b657..9489510405f7 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -1942,7 +1942,7 @@ int md_bitmap_load(struct mddev *mddev)
set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);

mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);

md_bitmap_update_sb(bitmap);

@@ -2363,7 +2363,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
* metadata promptly.
*/
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
rv = 0;
out:
@@ -2454,7 +2454,7 @@ timeout_store(struct mddev *mddev, const char *buf, size_t len)
*/
if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
mddev->thread->timeout = timeout;
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
}
return len;
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index c19e29cb73bf..92b0e49b4e53 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -325,7 +325,7 @@ static void recover_bitmaps(struct md_thread *thread)
if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
mddev->reshape_position != MaxSector)
- md_wakeup_thread(mddev->sync_thread);
+ md_wakeup_thread(&mddev->sync_thread, mddev);

if (hi > 0) {
if (lo < mddev->recovery_cp)
@@ -340,7 +340,7 @@ static void recover_bitmaps(struct md_thread *thread)
clear_bit(MD_RESYNCING_REMOTE,
&mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
}
clear_bit:
@@ -368,7 +368,7 @@ static void __recover_slot(struct mddev *mddev, int slot)
return;
}
}
- md_wakeup_thread(cinfo->recovery_thread);
+ md_wakeup_thread(&cinfo->recovery_thread, mddev);
}

static void recover_slot(void *arg, struct dlm_slot *slot)
@@ -422,7 +422,7 @@ static void ack_bast(void *arg, int mode)

if (mode == DLM_LOCK_EX) {
if (test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state))
- md_wakeup_thread(cinfo->recv_thread);
+ md_wakeup_thread(&cinfo->recv_thread, mddev);
else
set_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state);
}
@@ -454,7 +454,7 @@ static void process_suspend_info(struct mddev *mddev,
clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
remove_suspend_info(mddev, slot);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
return;
}

@@ -546,7 +546,7 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
if (rdev) {
set_bit(ClusterRemove, &rdev->flags);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
else
pr_warn("%s: %d Could not find disk(%d) to REMOVE\n",
@@ -696,7 +696,7 @@ static int lock_comm(struct md_cluster_info *cinfo, bool mddev_locked)
rv = test_and_set_bit_lock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD,
&cinfo->state);
WARN_ON_ONCE(rv);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
set_bit = 1;
}

@@ -971,7 +971,7 @@ static void load_bitmaps(struct mddev *mddev, int total_slots)
set_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state);
/* wake up recv thread in case something need to be handled */
if (test_and_clear_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state))
- md_wakeup_thread(cinfo->recv_thread);
+ md_wakeup_thread(&cinfo->recv_thread, mddev);
}

static void resync_bitmap(struct mddev *mddev)
@@ -1052,7 +1052,7 @@ static int metadata_update_start(struct mddev *mddev)
ret = test_and_set_bit_lock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD,
&cinfo->state);
WARN_ON_ONCE(ret);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);

wait_event(cinfo->wait,
!test_and_set_bit(MD_CLUSTER_SEND_LOCK, &cinfo->state) ||
@@ -1430,7 +1430,7 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev)
/* Since MD_CHANGE_DEVS will be set in add_bound_rdev which
* will run soon after add_new_disk, the below path will be
* invoked:
- * md_wakeup_thread(mddev->thread)
+ * md_wakeup_thread(&mddev->thread)
* -> conf->thread (raid1d)
* -> md_check_recovery -> md_update_sb
* -> metadata_update_start/finish
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index ceec9e4b2a60..482536ec8850 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -57,7 +57,7 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
spin_lock_irqsave(&conf->device_lock, flags);
list_add(&mp_bh->retry_list, &conf->retry_list);
spin_unlock_irqrestore(&conf->device_lock, flags);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}

/*
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0bbdde29a41f..97e87df4ee43 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -487,8 +487,9 @@ void mddev_resume(struct mddev *mddev)
mddev->pers->quiesce(mddev, 0);

set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
- md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
+ md_wakeup_thread(&mddev->thread, mddev);
+ /* possibly kick off a reshape */
+ md_wakeup_thread(&mddev->sync_thread, mddev);
}
EXPORT_SYMBOL_GPL(mddev_resume);

@@ -804,7 +805,7 @@ void mddev_unlock(struct mddev *mddev)
* make sure the thread doesn't disappear
*/
spin_lock(&pers_lock);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
wake_up(&mddev->sb_wait);
spin_unlock(&pers_lock);
}
@@ -2814,7 +2815,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_new_event();
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
return 0;
}

@@ -2931,7 +2932,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
md_kick_rdev_from_array(rdev);
if (mddev->pers) {
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
md_new_event();
}
@@ -2962,7 +2963,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
clear_bit(BlockedBadBlocks, &rdev->flags);
wake_up(&rdev->blocked_wait);
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
- md_wakeup_thread(rdev->mddev->thread);
+ md_wakeup_thread(&rdev->mddev->thread, rdev->mddev);

err = 0;
} else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
@@ -3000,7 +3001,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
!test_bit(Replacement, &rdev->flags))
set_bit(WantReplacement, &rdev->flags);
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
- md_wakeup_thread(rdev->mddev->thread);
+ md_wakeup_thread(&rdev->mddev->thread, rdev->mddev);
err = 0;
} else if (cmd_match(buf, "-want_replacement")) {
/* Clearing 'want_replacement' is always allowed.
@@ -3127,7 +3128,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
if (rdev->raid_disk >= 0)
return -EBUSY;
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
- md_wakeup_thread(rdev->mddev->thread);
+ md_wakeup_thread(&rdev->mddev->thread, rdev->mddev);
} else if (rdev->mddev->pers) {
/* Activating a spare .. or possibly reactivating
* if we ever get bitmaps working here.
@@ -4359,7 +4360,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
if (st == active) {
restart_array(mddev);
clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
wake_up(&mddev->sb_wait);
} else /* st == clean */ {
restart_array(mddev);
@@ -4826,10 +4827,10 @@ action_store(struct mddev *mddev, const char *page, size_t len)
* canceling read-auto mode
*/
mddev->ro = MD_RDWR;
- md_wakeup_thread(mddev->sync_thread);
+ md_wakeup_thread(&mddev->sync_thread, mddev);
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
sysfs_notify_dirent_safe(mddev->sysfs_action);
return len;
}
@@ -5733,7 +5734,7 @@ static void md_safemode_timeout(struct timer_list *t)
if (mddev->external)
sysfs_notify_dirent_safe(mddev->sysfs_state);

- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}

static int start_dirty_degraded;
@@ -6045,8 +6046,9 @@ int do_md_run(struct mddev *mddev)
/* run start up tasks that require md_thread */
md_start(mddev);

- md_wakeup_thread(mddev->thread);
- md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
+ md_wakeup_thread(&mddev->thread, mddev);
+ /* possibly kick off a reshape */
+ md_wakeup_thread(&mddev->sync_thread, mddev);

set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
clear_bit(MD_NOT_READY, &mddev->flags);
@@ -6066,10 +6068,10 @@ int md_start(struct mddev *mddev)

if (mddev->pers->start) {
set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
ret = mddev->pers->start(mddev);
clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
- md_wakeup_thread(mddev->sync_thread);
+ md_wakeup_thread(&mddev->sync_thread, mddev);
}
return ret;
}
@@ -6111,8 +6113,8 @@ static int restart_array(struct mddev *mddev)
pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
/* Kick recovery or resync if necessary */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
- md_wakeup_thread(mddev->sync_thread);
+ md_wakeup_thread(&mddev->thread, mddev);
+ md_wakeup_thread(&mddev->sync_thread, mddev);
sysfs_notify_dirent_safe(mddev->sysfs_state);
return 0;
}
@@ -6261,7 +6263,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
@@ -6287,7 +6289,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
if (did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
err = -EBUSY;
goto out;
@@ -6302,7 +6304,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
set_disk_ro(mddev->gendisk, 1);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
sysfs_notify_dirent_safe(mddev->sysfs_state);
err = 0;
}
@@ -6325,7 +6327,7 @@ static int do_md_stop(struct mddev *mddev, int mode,
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
@@ -6350,7 +6352,7 @@ static int do_md_stop(struct mddev *mddev, int mode,
if (did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
return -EBUSY;
}
@@ -6893,7 +6895,7 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
md_kick_rdev_from_array(rdev);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
if (mddev->thread)
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
else
md_update_sb(mddev, 1);
md_new_event();
@@ -6976,7 +6978,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
* array immediately.
*/
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
md_new_event();
return 0;

@@ -7886,8 +7888,10 @@ static int md_thread(void *arg)
return 0;
}

-void md_wakeup_thread(struct md_thread *thread)
+void md_wakeup_thread(struct md_thread **threadp, struct mddev *mddev)
{
+ struct md_thread *thread = *threadp;
+
if (thread) {
pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
set_bit(THREAD_WAKEUP, &thread->flags);
@@ -7963,7 +7967,7 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
if (!test_bit(MD_BROKEN, &mddev->flags)) {
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
if (mddev->event_work.func)
queue_work(md_misc_wq, &mddev->event_work);
@@ -8474,7 +8478,7 @@ void md_done_sync(struct mddev *mddev, int blocks, int ok)
if (!ok) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
// stop recovery, signal do_sync ....
}
}
@@ -8499,8 +8503,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
/* need to switch to read/write */
mddev->ro = MD_RDWR;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
- md_wakeup_thread(mddev->sync_thread);
+ md_wakeup_thread(&mddev->thread, mddev);
+ md_wakeup_thread(&mddev->sync_thread, mddev);
did_change = 1;
}
rcu_read_lock();
@@ -8515,7 +8519,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
mddev->in_sync = 0;
set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
did_change = 1;
}
spin_unlock(&mddev->lock);
@@ -8558,7 +8562,7 @@ void md_write_end(struct mddev *mddev)
percpu_ref_put(&mddev->writes_pending);

if (mddev->safemode == 2)
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
else if (mddev->safemode_delay)
/* The roundup() ensures this only performs locking once
* every ->safemode_delay jiffies
@@ -9100,7 +9104,7 @@ void md_do_sync(struct md_thread *thread)
spin_unlock(&mddev->lock);

wake_up(&resync_wait);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
return;
}
EXPORT_SYMBOL_GPL(md_do_sync);
@@ -9218,7 +9222,7 @@ static void md_start_sync(struct work_struct *ws)
if (mddev->sysfs_action)
sysfs_notify_dirent_safe(mddev->sysfs_action);
} else
- md_wakeup_thread(mddev->sync_thread);
+ md_wakeup_thread(&mddev->sync_thread, mddev);
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event();
}
@@ -9534,7 +9538,7 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
sysfs_notify_dirent_safe(rdev->sysfs_state);
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
- md_wakeup_thread(rdev->mddev->thread);
+ md_wakeup_thread(&rdev->mddev->thread, mddev);
return 1;
} else
return 0;
@@ -9699,7 +9703,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
/* wakeup mddev->thread here, so array could
* perform resync with the new activated disk */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
/* device faulty
* We just want to do the minimum to mark the disk
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 344e055e4d0f..aeb2fc6b65c7 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -734,7 +734,7 @@ int md_register_thread(struct md_thread **threadp,
void (*run)(struct md_thread *thread),
struct mddev *mddev, const char *name);
extern void md_unregister_thread(struct md_thread **threadp);
-extern void md_wakeup_thread(struct md_thread *thread);
+extern void md_wakeup_thread(struct md_thread **threadp, struct mddev *mddev);
extern void md_check_recovery(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev);
extern int mddev_init_writes_pending(struct mddev *mddev);
@@ -805,7 +805,7 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
int faulty = test_bit(Faulty, &rdev->flags);
if (atomic_dec_and_test(&rdev->nr_pending) && faulty) {
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
}

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 1217c1db0a40..391ff239c711 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -289,7 +289,7 @@ static void reschedule_retry(struct r1bio *r1_bio)
spin_unlock_irqrestore(&conf->device_lock, flags);

wake_up(&conf->wait_barrier);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}

/*
@@ -1180,7 +1180,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
bio_list_merge(&conf->pending_bio_list, &plug->pending);
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_barrier);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
kfree(plug);
return;
}
@@ -1585,7 +1585,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
spin_unlock_irqrestore(&conf->device_lock, flags);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
}

@@ -2501,7 +2501,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
* get_unqueued_pending() == extra to be true.
*/
wake_up(&conf->wait_barrier);
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
} else {
if (test_bit(R1BIO_WriteError, &r1_bio->state))
close_write(r1_bio);
@@ -3344,7 +3344,7 @@ static int raid1_reshape(struct mddev *mddev)

set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);

mempool_exit(&oldpool);
return 0;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f1e54c62f930..920e5722040f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -309,7 +309,7 @@ static void reschedule_retry(struct r10bio *r10_bio)
/* wake up frozen array... */
wake_up(&conf->wait_barrier);

- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}

/*
@@ -1114,7 +1114,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
bio_list_merge(&conf->pending_bio_list, &plug->pending);
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_barrier);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
kfree(plug);
return;
}
@@ -1329,7 +1329,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
spin_unlock_irqrestore(&conf->device_lock, flags);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
}

@@ -1441,7 +1441,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
mddev->reshape_position = conf->reshape_progress;
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
if (bio->bi_opf & REQ_NOWAIT) {
allow_barrier(conf);
bio_wouldblock_error(bio);
@@ -3079,7 +3079,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
* nr_pending == nr_queued + extra to be true.
*/
wake_up(&conf->wait_barrier);
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
} else {
if (test_bit(R10BIO_WriteError,
&r10_bio->state))
@@ -4692,7 +4692,7 @@ static int raid10_start_reshape(struct mddev *mddev)
goto abort;
}
conf->reshape_checkpoint = jiffies;
- md_wakeup_thread(mddev->sync_thread);
+ md_wakeup_thread(&mddev->sync_thread, mddev);
md_new_event();
return 0;

@@ -4874,7 +4874,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
mddev->curr_resync_completed = conf->reshape_progress;
conf->reshape_checkpoint = jiffies;
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
wait_event(mddev->sb_wait, mddev->sb_flags == 0 ||
test_bit(MD_RECOVERY_INTR, &mddev->recovery));
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0464d4d551fc..d6ee6a7a83b7 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -600,7 +600,7 @@ static void r5l_log_endio(struct bio *bio)
spin_unlock_irqrestore(&log->io_list_lock, flags);

if (log->need_cache_flush)
- md_wakeup_thread(log->rdev->mddev->thread);
+ md_wakeup_thread(&log->rdev->mddev->thread, log->rdev->mddev);

/* finish flush only io_unit and PAYLOAD_FLUSH only io_unit */
if (has_null_flush) {
@@ -1491,7 +1491,7 @@ static void r5c_do_reclaim(struct r5conf *conf)
if (!test_bit(R5C_LOG_CRITICAL, &conf->cache_state))
r5l_run_no_space_stripes(log);

- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
}

static void r5l_do_reclaim(struct r5l_log *log)
@@ -1519,7 +1519,7 @@ static void r5l_do_reclaim(struct r5l_log *log)
list_empty(&log->finished_ios)))
break;

- md_wakeup_thread(log->rdev->mddev->thread);
+ md_wakeup_thread(&log->rdev->mddev->thread, log->rdev->mddev);
wait_event_lock_irq(log->iounit_wait,
r5l_reclaimable_space(log) > reclaimable,
log->io_list_lock);
@@ -1571,7 +1571,7 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space)
if (new < target)
return;
} while (!try_cmpxchg(&log->reclaim_target, &target, new));
- md_wakeup_thread(log->reclaim_thread);
+ md_wakeup_thread(&log->reclaim_thread, log->rdev->mddev);
}

void r5l_quiesce(struct r5l_log *log, int quiesce)
@@ -2776,7 +2776,7 @@ void r5c_release_extra_page(struct stripe_head *sh)

if (using_disk_info_extra_page) {
clear_bit(R5C_EXTRA_PAGE_IN_USE, &conf->cache_state);
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
}
}

@@ -2832,7 +2832,7 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,

if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
if (atomic_dec_and_test(&conf->pending_full_writes))
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);

if (do_wakeup)
wake_up(&conf->wait_for_overlap);
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index e495939bb3e0..47cf1e85c48d 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -601,7 +601,7 @@ static void ppl_flush_endio(struct bio *bio)

if (atomic_dec_and_test(&io->pending_flushes)) {
ppl_io_unit_finished(io);
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
}
}

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 04b1093195d0..2c0695d41436 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -195,7 +195,7 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
}

if (conf->worker_cnt_per_group == 0) {
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
return;
}

@@ -268,13 +268,14 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
return;
}
}
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
} else {
BUG_ON(stripe_operations_active(sh));
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
if (atomic_dec_return(&conf->preread_active_stripes)
< IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread,
+ conf->mddev);
atomic_dec(&conf->active_stripes);
if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
if (!r5c_is_writeback(conf->log))
@@ -356,7 +357,7 @@ static void release_inactive_stripe_list(struct r5conf *conf,
if (atomic_read(&conf->active_stripes) == 0)
wake_up(&conf->wait_for_quiescent);
if (conf->retry_read_aligned)
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
}
}

@@ -407,7 +408,7 @@ void raid5_release_stripe(struct stripe_head *sh)
goto slow_path;
wakeup = llist_add(&sh->release_list, &conf->released_stripes);
if (wakeup)
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
return;
slow_path:
/* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
@@ -981,7 +982,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf,
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
if (atomic_dec_return(&conf->preread_active_stripes)
< IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);

if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) {
int seq = sh->bm_seq;
@@ -3759,7 +3760,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,

if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
if (atomic_dec_and_test(&conf->pending_full_writes))
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
}

static void
@@ -4156,7 +4157,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,

if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
if (atomic_dec_and_test(&conf->pending_full_writes))
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);

if (head_sh->batch_head && do_endio)
break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
@@ -5369,7 +5370,7 @@ static void handle_stripe(struct stripe_head *sh)
atomic_dec(&conf->preread_active_stripes);
if (atomic_read(&conf->preread_active_stripes) <
IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
}

clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
@@ -5436,7 +5437,7 @@ static void add_bio_to_retry(struct bio *bi,struct r5conf *conf)
conf->retry_read_aligned_list = bi;

spin_unlock_irqrestore(&conf->device_lock, flags);
- md_wakeup_thread(conf->mddev->thread);
+ md_wakeup_thread(&conf->mddev->thread, conf->mddev);
}

static struct bio *remove_bio_from_retry(struct r5conf *conf,
@@ -6045,7 +6046,7 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
* Stripe is busy expanding or add failed due to
* overlap. Flush everything and wait a while.
*/
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
ret = STRIPE_SCHEDULE_AND_RETRY;
goto out_release;
}
@@ -6345,7 +6346,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk

conf->reshape_checkpoint = jiffies;
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
wait_event(mddev->sb_wait, mddev->sb_flags == 0 ||
test_bit(MD_RECOVERY_INTR, &mddev->recovery));
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
@@ -6453,7 +6454,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
rdev->recovery_offset = sector_nr;
conf->reshape_checkpoint = jiffies;
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)
|| test_bit(MD_RECOVERY_INTR, &mddev->recovery));
@@ -8585,7 +8586,7 @@ static int raid5_start_reshape(struct mddev *mddev)
return -EAGAIN;
}
conf->reshape_checkpoint = jiffies;
- md_wakeup_thread(mddev->sync_thread);
+ md_wakeup_thread(&mddev->sync_thread, mddev);
md_new_event();
return 0;
}
@@ -8815,7 +8816,7 @@ static int raid5_check_reshape(struct mddev *mddev)
mddev->chunk_sectors = new_chunk;
}
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
- md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(&mddev->thread, mddev);
}
return check_reshape(mddev);
}
--
2.31.1


2023-03-14 00:44:07

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH -next 0/5] md: fix uaf for sync_thread

On Sat, Mar 11, 2023 at 1:32 AM Yu Kuai <[email protected]> wrote:
>
> From: Yu Kuai <[email protected]>
>
> Our test reports a uaf for 'mddev->sync_thread':
>
> T1 T2
> md_start_sync
> md_register_thread
> raid1d
> md_check_recovery
> md_reap_sync_thread
> md_unregister_thread
> kfree
>
> md_wakeup_thread
> wake_up
> ->sync_thread was freed
>
> Currently, a global spinlock 'pers_lock' is borrowed to protect
> 'mddev->thread', this problem can be fixed likewise, however, there might
> be similar problem for other md_thread, and I really don't like the idea to
> borrow a global lock.
>
> This patchset do some refactor, and then use a disk level spinlock to
> protect md_thread in relevant apis.
>
> Yu Kuai (5):
> md: pass a md_thread pointer to md_register_thread()
> md: refactor md_wakeup_thread()
> md: use md_thread api to wake up sync_thread
> md: pass a mddev to md_unregister_thread()
> md: protect md_thread with a new disk level spin lock

Applied to md-next.

Thanks,
Song

2023-03-14 10:55:10

by Yu Kuai

[permalink] [raw]
Subject: Re: [PATCH -next 5/5] md: protect md_thread with a new disk level spin lock

Hi, song!

?? 2023/03/11 17:31, Yu Kuai д??:
> From: Yu Kuai <[email protected]>
>
> Our test reports a uaf for 'mddev->sync_thread':
>
> T1 T2
> md_start_sync
> md_register_thread
> raid1d
> md_check_recovery
> md_reap_sync_thread
> md_unregister_thread
> kfree
>
> md_wakeup_thread
> wake_up
> ->sync_thread was freed
>
> Currently, a global spinlock 'pers_lock' is borrowed to protect
> 'mddev->thread', this problem can be fixed likewise, however, there might
> be similar problem for other md_thread, and I really don't like the idea to
> borrow a global lock.
>
> This patch use a disk level spinlock to protect md_thread in relevant apis.
>
> Signed-off-by: Yu Kuai <[email protected]>
> ---
> drivers/md/md.c | 23 ++++++++++-------------
> drivers/md/md.h | 1 +
> 2 files changed, 11 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index ab9299187cfe..a952978884a5 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -663,6 +663,7 @@ void mddev_init(struct mddev *mddev)
> atomic_set(&mddev->active, 1);
> atomic_set(&mddev->openers, 0);
> spin_lock_init(&mddev->lock);
> + spin_lock_init(&mddev->thread_lock);
> atomic_set(&mddev->flush_pending, 0);
> init_waitqueue_head(&mddev->sb_wait);
> init_waitqueue_head(&mddev->recovery_wait);
> @@ -801,13 +802,8 @@ void mddev_unlock(struct mddev *mddev)
> } else
> mutex_unlock(&mddev->reconfig_mutex);
>
> - /* As we've dropped the mutex we need a spinlock to
> - * make sure the thread doesn't disappear
> - */
> - spin_lock(&pers_lock);
> md_wakeup_thread(&mddev->thread, mddev);
> wake_up(&mddev->sb_wait);
> - spin_unlock(&pers_lock);
> }
> EXPORT_SYMBOL_GPL(mddev_unlock);
>
> @@ -7895,13 +7891,16 @@ static int md_thread(void *arg)
>
> void md_wakeup_thread(struct md_thread **threadp, struct mddev *mddev)
> {
> - struct md_thread *thread = *threadp;
> + struct md_thread *thread;
>
> + spin_lock(&mddev->thread_lock);
> + thread = *threadp;
> if (thread) {
> pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
> set_bit(THREAD_WAKEUP, &thread->flags);
> wake_up(&thread->wqueue);
> }
> + spin_unlock(&mddev->thread_lock);

I just found that md_wakeup_thread can be called from irq context:

md_safemode_timeout
md_wakeup_thread

And I need to use irq safe spinlock apis here.

Can you drop this verion from md-next? I'll send a new version after I
verified that there are no new regression, at least for mdadm tests.

Thanks,
Kuai
> }
> EXPORT_SYMBOL(md_wakeup_thread);
>
> @@ -7929,7 +7928,9 @@ int md_register_thread(struct md_thread **threadp,
> return err;
> }
>
> + spin_lock(&mddev->thread_lock);
> *threadp = thread;
> + spin_unlock(&mddev->thread_lock);
> return 0;
> }
> EXPORT_SYMBOL(md_register_thread);
> @@ -7938,18 +7939,14 @@ void md_unregister_thread(struct md_thread **threadp, struct mddev *mddev)
> {
> struct md_thread *thread;
>
> - /*
> - * Locking ensures that mddev_unlock does not wake_up a
> - * non-existent thread
> - */
> - spin_lock(&pers_lock);
> + spin_lock(&mddev->thread_lock);
> thread = *threadp;
> if (!thread) {
> - spin_unlock(&pers_lock);
> + spin_unlock(&mddev->thread_lock);
> return;
> }
> *threadp = NULL;
> - spin_unlock(&pers_lock);
> + spin_unlock(&mddev->thread_lock);
>
> pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
> kthread_stop(thread->tsk);
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 8f4137ad2dde..ca182d21dd8d 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -367,6 +367,7 @@ struct mddev {
> int new_chunk_sectors;
> int reshape_backwards;
>
> + spinlock_t thread_lock;
> struct md_thread *thread; /* management thread */
> struct md_thread *sync_thread; /* doing resync or reconstruct */
>
>

2023-03-14 16:59:10

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH -next 5/5] md: protect md_thread with a new disk level spin lock

On Tue, Mar 14, 2023 at 3:54 AM Yu Kuai <[email protected]> wrote:
>
> Hi, song!
>
> 在 2023/03/11 17:31, Yu Kuai 写道:
> > From: Yu Kuai <[email protected]>
> >
> > Our test reports a uaf for 'mddev->sync_thread':
> >
> > T1 T2
> > md_start_sync
> > md_register_thread
> > raid1d
> > md_check_recovery
> > md_reap_sync_thread
> > md_unregister_thread
> > kfree
> >
> > md_wakeup_thread
> > wake_up
> > ->sync_thread was freed
> >
> > Currently, a global spinlock 'pers_lock' is borrowed to protect
> > 'mddev->thread', this problem can be fixed likewise, however, there might
> > be similar problem for other md_thread, and I really don't like the idea to
> > borrow a global lock.
> >
> > This patch use a disk level spinlock to protect md_thread in relevant apis.
> >
> > Signed-off-by: Yu Kuai <[email protected]>
> > ---
> > drivers/md/md.c | 23 ++++++++++-------------
> > drivers/md/md.h | 1 +
> > 2 files changed, 11 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index ab9299187cfe..a952978884a5 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -663,6 +663,7 @@ void mddev_init(struct mddev *mddev)
> > atomic_set(&mddev->active, 1);
> > atomic_set(&mddev->openers, 0);
> > spin_lock_init(&mddev->lock);
> > + spin_lock_init(&mddev->thread_lock);
> > atomic_set(&mddev->flush_pending, 0);
> > init_waitqueue_head(&mddev->sb_wait);
> > init_waitqueue_head(&mddev->recovery_wait);
> > @@ -801,13 +802,8 @@ void mddev_unlock(struct mddev *mddev)
> > } else
> > mutex_unlock(&mddev->reconfig_mutex);
> >
> > - /* As we've dropped the mutex we need a spinlock to
> > - * make sure the thread doesn't disappear
> > - */
> > - spin_lock(&pers_lock);
> > md_wakeup_thread(&mddev->thread, mddev);
> > wake_up(&mddev->sb_wait);
> > - spin_unlock(&pers_lock);
> > }
> > EXPORT_SYMBOL_GPL(mddev_unlock);
> >
> > @@ -7895,13 +7891,16 @@ static int md_thread(void *arg)
> >
> > void md_wakeup_thread(struct md_thread **threadp, struct mddev *mddev)
> > {
> > - struct md_thread *thread = *threadp;
> > + struct md_thread *thread;
> >
> > + spin_lock(&mddev->thread_lock);
> > + thread = *threadp;
> > if (thread) {
> > pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
> > set_bit(THREAD_WAKEUP, &thread->flags);
> > wake_up(&thread->wqueue);
> > }
> > + spin_unlock(&mddev->thread_lock);
>
> I just found that md_wakeup_thread can be called from irq context:
>
> md_safemode_timeout
> md_wakeup_thread
>
> And I need to use irq safe spinlock apis here.
>
> Can you drop this verion from md-next? I'll send a new version after I
> verified that there are no new regression, at least for mdadm tests.

I will drop it from md-next. Please send a new version.

Thanks,
Song

2023-03-14 20:08:43

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH -next 5/5] md: protect md_thread with a new disk level spin lock

On Tue, Mar 14, 2023 at 9:58 AM Song Liu <[email protected]> wrote:
>
> On Tue, Mar 14, 2023 at 3:54 AM Yu Kuai <[email protected]> wrote:
> >
> > Hi, song!
> >
> > 在 2023/03/11 17:31, Yu Kuai 写道:
> > > From: Yu Kuai <[email protected]>
> > >
> > > Our test reports a uaf for 'mddev->sync_thread':
> > >
> > > T1 T2
> > > md_start_sync
> > > md_register_thread
> > > raid1d
> > > md_check_recovery
> > > md_reap_sync_thread
> > > md_unregister_thread
> > > kfree
> > >
> > > md_wakeup_thread
> > > wake_up
> > > ->sync_thread was freed
> > >
> > > Currently, a global spinlock 'pers_lock' is borrowed to protect
> > > 'mddev->thread', this problem can be fixed likewise, however, there might
> > > be similar problem for other md_thread, and I really don't like the idea to
> > > borrow a global lock.
> > >
> > > This patch use a disk level spinlock to protect md_thread in relevant apis.
> > >
> > > Signed-off-by: Yu Kuai <[email protected]>
> > > ---
> > > drivers/md/md.c | 23 ++++++++++-------------
> > > drivers/md/md.h | 1 +
> > > 2 files changed, 11 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > > index ab9299187cfe..a952978884a5 100644
> > > --- a/drivers/md/md.c
> > > +++ b/drivers/md/md.c
> > > @@ -663,6 +663,7 @@ void mddev_init(struct mddev *mddev)
> > > atomic_set(&mddev->active, 1);
> > > atomic_set(&mddev->openers, 0);
> > > spin_lock_init(&mddev->lock);
> > > + spin_lock_init(&mddev->thread_lock);
> > > atomic_set(&mddev->flush_pending, 0);
> > > init_waitqueue_head(&mddev->sb_wait);
> > > init_waitqueue_head(&mddev->recovery_wait);
> > > @@ -801,13 +802,8 @@ void mddev_unlock(struct mddev *mddev)
> > > } else
> > > mutex_unlock(&mddev->reconfig_mutex);
> > >
> > > - /* As we've dropped the mutex we need a spinlock to
> > > - * make sure the thread doesn't disappear
> > > - */
> > > - spin_lock(&pers_lock);
> > > md_wakeup_thread(&mddev->thread, mddev);
> > > wake_up(&mddev->sb_wait);
> > > - spin_unlock(&pers_lock);
> > > }
> > > EXPORT_SYMBOL_GPL(mddev_unlock);
> > >
> > > @@ -7895,13 +7891,16 @@ static int md_thread(void *arg)
> > >
> > > void md_wakeup_thread(struct md_thread **threadp, struct mddev *mddev)
> > > {
> > > - struct md_thread *thread = *threadp;
> > > + struct md_thread *thread;
> > >
> > > + spin_lock(&mddev->thread_lock);
> > > + thread = *threadp;
> > > if (thread) {
> > > pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
> > > set_bit(THREAD_WAKEUP, &thread->flags);
> > > wake_up(&thread->wqueue);
> > > }
> > > + spin_unlock(&mddev->thread_lock);
> >
> > I just found that md_wakeup_thread can be called from irq context:
> >
> > md_safemode_timeout
> > md_wakeup_thread
> >
> > And I need to use irq safe spinlock apis here.
> >
> > Can you drop this verion from md-next? I'll send a new version after I
> > verified that there are no new regression, at least for mdadm tests.
>
> I will drop it from md-next. Please send a new version.

To clarify: I dropped the whole set from md-next. Please resend the set
after fixing the issue.

Thanks,
Song