2023-10-21 02:26:02

by Yu Kuai

[permalink] [raw]
Subject: [PATCH -next v2 1/6] md: remove useless debug code to print configuration

From: Yu Kuai <[email protected]>

One the one hand, print_conf() can be called without grabbing
'reconfig_mtuex' and current rcu protection to access rdev through 'conf'
is not safe. Fortunately, there is a separate rcu protection to access
rdev from 'mddev->disks', and rdev is always removed from 'conf' before
'mddev->disks'.

On the other hand, print_conf() is just used for debug,
and user can always grab such information(/proc/mdstat and mdadm).

There is no need to always enable this debug and try to fix misuse rcu
protection for accessing rdev from 'conf', hence remove print_conf().

Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/raid1.c | 28 ----------------------------
drivers/md/raid10.c | 29 -----------------------------
drivers/md/raid5.c | 34 ----------------------------------
3 files changed, 91 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 35d12948e0a9..c13088eae401 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1679,30 +1679,6 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
mdname(mddev), conf->raid_disks - mddev->degraded);
}

-static void print_conf(struct r1conf *conf)
-{
- int i;
-
- pr_debug("RAID1 conf printout:\n");
- if (!conf) {
- pr_debug("(!conf)\n");
- return;
- }
- pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
- conf->raid_disks);
-
- rcu_read_lock();
- for (i = 0; i < conf->raid_disks; i++) {
- struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
- if (rdev)
- pr_debug(" disk %d, wo:%d, o:%d, dev:%pg\n",
- i, !test_bit(In_sync, &rdev->flags),
- !test_bit(Faulty, &rdev->flags),
- rdev->bdev);
- }
- rcu_read_unlock();
-}
-
static void close_sync(struct r1conf *conf)
{
int idx;
@@ -1763,7 +1739,6 @@ static int raid1_spare_active(struct mddev *mddev)
mddev->degraded -= count;
spin_unlock_irqrestore(&conf->device_lock, flags);

- print_conf(conf);
return count;
}

@@ -1829,7 +1804,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
}

- print_conf(conf);
return err;
}

@@ -1846,7 +1820,6 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev != p->rdev)
p = conf->mirrors + conf->raid_disks + number;

- print_conf(conf);
if (rdev == p->rdev) {
if (test_bit(In_sync, &rdev->flags) ||
atomic_read(&rdev->nr_pending)) {
@@ -1902,7 +1875,6 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
}
abort:

- print_conf(conf);
return err;
}

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a5927e98dc67..4b5f34f320c8 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2059,31 +2059,6 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
mdname(mddev), conf->geo.raid_disks - mddev->degraded);
}

-static void print_conf(struct r10conf *conf)
-{
- int i;
- struct md_rdev *rdev;
-
- pr_debug("RAID10 conf printout:\n");
- if (!conf) {
- pr_debug("(!conf)\n");
- return;
- }
- pr_debug(" --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded,
- conf->geo.raid_disks);
-
- /* This is only called with ->reconfix_mutex held, so
- * rcu protection of rdev is not needed */
- for (i = 0; i < conf->geo.raid_disks; i++) {
- rdev = conf->mirrors[i].rdev;
- if (rdev)
- pr_debug(" disk %d, wo:%d, o:%d, dev:%pg\n",
- i, !test_bit(In_sync, &rdev->flags),
- !test_bit(Faulty, &rdev->flags),
- rdev->bdev);
- }
-}
-
static void close_sync(struct r10conf *conf)
{
wait_barrier(conf, false);
@@ -2136,7 +2111,6 @@ static int raid10_spare_active(struct mddev *mddev)
mddev->degraded -= count;
spin_unlock_irqrestore(&conf->device_lock, flags);

- print_conf(conf);
return count;
}

@@ -2207,7 +2181,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
rcu_assign_pointer(p->replacement, rdev);
}

- print_conf(conf);
return err;
}

@@ -2219,7 +2192,6 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
struct md_rdev **rdevp;
struct raid10_info *p;

- print_conf(conf);
if (unlikely(number >= mddev->raid_disks))
return 0;
p = conf->mirrors + number;
@@ -2271,7 +2243,6 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)

abort:

- print_conf(conf);
return err;
}

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4207e945e8c8..27a4dce51c92 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -156,8 +156,6 @@ static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
return slot;
}

-static void print_raid5_conf (struct r5conf *conf);
-
static int stripe_operations_active(struct stripe_head *sh)
{
return sh->check_state || sh->reconstruct_state ||
@@ -7983,8 +7981,6 @@ static int raid5_run(struct mddev *mddev)
mddev->raid_disks-mddev->degraded, mddev->raid_disks,
mddev->new_layout);

- print_raid5_conf(conf);
-
if (conf->reshape_progress != MaxSector) {
conf->reshape_safe = conf->reshape_progress;
atomic_set(&conf->reshape_stripes, 0);
@@ -8075,7 +8071,6 @@ static int raid5_run(struct mddev *mddev)
return 0;
abort:
md_unregister_thread(mddev, &mddev->thread);
- print_raid5_conf(conf);
free_conf(conf);
mddev->private = NULL;
pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
@@ -8107,31 +8102,6 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
seq_printf (seq, "]");
}

-static void print_raid5_conf (struct r5conf *conf)
-{
- struct md_rdev *rdev;
- int i;
-
- pr_debug("RAID conf printout:\n");
- if (!conf) {
- pr_debug("(conf==NULL)\n");
- return;
- }
- pr_debug(" --- level:%d rd:%d wd:%d\n", conf->level,
- conf->raid_disks,
- conf->raid_disks - conf->mddev->degraded);
-
- rcu_read_lock();
- for (i = 0; i < conf->raid_disks; i++) {
- rdev = rcu_dereference(conf->disks[i].rdev);
- if (rdev)
- pr_debug(" disk %d, o:%d, dev:%pg\n",
- i, !test_bit(Faulty, &rdev->flags),
- rdev->bdev);
- }
- rcu_read_unlock();
-}
-
static int raid5_spare_active(struct mddev *mddev)
{
int i;
@@ -8173,7 +8143,6 @@ static int raid5_spare_active(struct mddev *mddev)
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded = raid5_calc_degraded(conf);
spin_unlock_irqrestore(&conf->device_lock, flags);
- print_raid5_conf(conf);
return count;
}

@@ -8186,7 +8155,6 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
struct disk_info *p;
struct md_rdev *tmp;

- print_raid5_conf(conf);
if (test_bit(Journal, &rdev->flags) && conf->log) {
/*
* we can't wait pending write here, as this is called in
@@ -8266,7 +8234,6 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
clear_bit(WantReplacement, &rdev->flags);
abort:

- print_raid5_conf(conf);
return err;
}

@@ -8348,7 +8315,6 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
}
}
out:
- print_raid5_conf(conf);
return err;
}

--
2.39.2


2023-11-24 08:17:59

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH -next v2 1/6] md: remove useless debug code to print configuration

On Fri, Oct 20, 2023 at 7:25 PM Yu Kuai <[email protected]> wrote:
>
> From: Yu Kuai <[email protected]>
>
> One the one hand, print_conf() can be called without grabbing
> 'reconfig_mtuex' and current rcu protection to access rdev through 'conf'
> is not safe. Fortunately, there is a separate rcu protection to access
> rdev from 'mddev->disks', and rdev is always removed from 'conf' before
> 'mddev->disks'.
>
> On the other hand, print_conf() is just used for debug,
> and user can always grab such information(/proc/mdstat and mdadm).
>
> There is no need to always enable this debug and try to fix misuse rcu
> protection for accessing rdev from 'conf', hence remove print_conf().
>
> Signed-off-by: Yu Kuai <[email protected]>

I wouldn't call these debug functions useless. There is probably some
users who use them for debugging (or even in some automations).
How hard is it to keep these functions? Can we just add some lockdep
to these functions to make sure they are called from safe places?

Thanks,
Song

[...]

2023-11-24 09:12:57

by Yu Kuai

[permalink] [raw]
Subject: Re: [PATCH -next v2 1/6] md: remove useless debug code to print configuration

Hi,

在 2023/11/24 16:17, Song Liu 写道:
> On Fri, Oct 20, 2023 at 7:25 PM Yu Kuai <[email protected]> wrote:
>>
>> From: Yu Kuai <[email protected]>
>>
>> One the one hand, print_conf() can be called without grabbing
>> 'reconfig_mtuex' and current rcu protection to access rdev through 'conf'
>> is not safe. Fortunately, there is a separate rcu protection to access
>> rdev from 'mddev->disks', and rdev is always removed from 'conf' before
>> 'mddev->disks'.
>>
>> On the other hand, print_conf() is just used for debug,
>> and user can always grab such information(/proc/mdstat and mdadm).
>>
>> There is no need to always enable this debug and try to fix misuse rcu
>> protection for accessing rdev from 'conf', hence remove print_conf().
>>
>> Signed-off-by: Yu Kuai <[email protected]>
>
> I wouldn't call these debug functions useless. There is probably some
> users who use them for debugging (or even in some automations).
> How hard is it to keep these functions? Can we just add some lockdep
> to these functions to make sure they are called from safe places?

Okay, I can keep these debug code, and since these code are
dereferencing rdev from conf, and they need new syncronization:

1) dereference rdev from mddev->disks instead of conf, and use
rdev->raid_disk >= 0 to judge if this rdev is in conf. There might
be a race window that rdev can be removed from conf, however, I think
this dones't matter. Or:

2) grab 'active_io' before print_conf(), to make sure rdev won't be
removed from conf.

Thanks,
Kuai

>
> Thanks,
> Song
>
> [...]
> .
>

2023-11-24 16:49:32

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH -next v2 1/6] md: remove useless debug code to print configuration

On Fri, Nov 24, 2023 at 1:12 AM Yu Kuai <[email protected]> wrote:
>
> Hi,
>
> 在 2023/11/24 16:17, Song Liu 写道:
> > On Fri, Oct 20, 2023 at 7:25 PM Yu Kuai <[email protected]> wrote:
> >>
> >> From: Yu Kuai <[email protected]>
> >>
> >> One the one hand, print_conf() can be called without grabbing
> >> 'reconfig_mtuex' and current rcu protection to access rdev through 'conf'
> >> is not safe. Fortunately, there is a separate rcu protection to access
> >> rdev from 'mddev->disks', and rdev is always removed from 'conf' before
> >> 'mddev->disks'.
> >>
> >> On the other hand, print_conf() is just used for debug,
> >> and user can always grab such information(/proc/mdstat and mdadm).
> >>
> >> There is no need to always enable this debug and try to fix misuse rcu
> >> protection for accessing rdev from 'conf', hence remove print_conf().
> >>
> >> Signed-off-by: Yu Kuai <[email protected]>
> >
> > I wouldn't call these debug functions useless. There is probably some
> > users who use them for debugging (or even in some automations).
> > How hard is it to keep these functions? Can we just add some lockdep
> > to these functions to make sure they are called from safe places?
>
> Okay, I can keep these debug code, and since these code are
> dereferencing rdev from conf, and they need new syncronization:
>
> 1) dereference rdev from mddev->disks instead of conf, and use
> rdev->raid_disk >= 0 to judge if this rdev is in conf. There might
> be a race window that rdev can be removed from conf, however, I think
> this dones't matter. Or:
>
> 2) grab 'active_io' before print_conf(), to make sure rdev won't be
> removed from conf.

I think for most of these cases, we can already do print_conf()
safely (holding mutex/lock), no? We can grab active_io when it is
really necessary.

Thanks,
Song

2023-11-25 06:52:36

by Yu Kuai

[permalink] [raw]
Subject: Re: [PATCH -next v2 1/6] md: remove useless debug code to print configuration

Hi,

在 2023/11/25 0:49, Song Liu 写道:
> On Fri, Nov 24, 2023 at 1:12 AM Yu Kuai <[email protected]> wrote:
>>
>> Hi,
>>
>> 在 2023/11/24 16:17, Song Liu 写道:
>>> On Fri, Oct 20, 2023 at 7:25 PM Yu Kuai <[email protected]> wrote:
>>>>
>>>> From: Yu Kuai <[email protected]>
>>>>
>>>> One the one hand, print_conf() can be called without grabbing
>>>> 'reconfig_mtuex' and current rcu protection to access rdev through 'conf'
>>>> is not safe. Fortunately, there is a separate rcu protection to access
>>>> rdev from 'mddev->disks', and rdev is always removed from 'conf' before
>>>> 'mddev->disks'.
>>>>
>>>> On the other hand, print_conf() is just used for debug,
>>>> and user can always grab such information(/proc/mdstat and mdadm).
>>>>
>>>> There is no need to always enable this debug and try to fix misuse rcu
>>>> protection for accessing rdev from 'conf', hence remove print_conf().
>>>>
>>>> Signed-off-by: Yu Kuai <[email protected]>
>>>
>>> I wouldn't call these debug functions useless. There is probably some
>>> users who use them for debugging (or even in some automations).
>>> How hard is it to keep these functions? Can we just add some lockdep
>>> to these functions to make sure they are called from safe places?
>>
>> Okay, I can keep these debug code, and since these code are
>> dereferencing rdev from conf, and they need new syncronization:
>>
>> 1) dereference rdev from mddev->disks instead of conf, and use
>> rdev->raid_disk >= 0 to judge if this rdev is in conf. There might
>> be a race window that rdev can be removed from conf, however, I think
>> this dones't matter. Or:
>>
>> 2) grab 'active_io' before print_conf(), to make sure rdev won't be
>> removed from conf.
>
> I think for most of these cases, we can already do print_conf()
> safely (holding mutex/lock), no? We can grab active_io when it is
> really necessary.

Ok, I'll look into this, and send a v3 if there are no other corner
cases.

Thanks,
Kuai

>
> Thanks,
> Song
>
> .
>