2022-09-16 11:29:15

by Yu Kuai

[permalink] [raw]
Subject: [PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock

From: Yu Kuai <[email protected]>

Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
and io can't be dispatched until 'barrier' is dropped.

Since holding the 'barrier' is not common, convert 'resync_lock' to use
seqlock so that holding lock can be avoided in fast path.

Signed-off-by: Yu Kuai <[email protected]>
---
drivers/md/raid10.c | 87 ++++++++++++++++++++++++++++++---------------
drivers/md/raid10.h | 2 +-
2 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9a28abd19709..2daa7d57034c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);

#include "raid1-10.c"

+#define NULL_CMD
+#define cmd_before(conf, cmd) \
+ do { \
+ write_sequnlock_irq(&(conf)->resync_lock); \
+ cmd; \
+ } while (0)
+#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
+
+#define wait_event_barrier_cmd(conf, cond, cmd) \
+ wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
+ cmd_after(conf))
+
+#define wait_event_barrier(conf, cond) \
+ wait_event_barrier_cmd(conf, cond, NULL_CMD)
+
/*
* for resync bio, r10bio pointer can be retrieved from the per-bio
* 'struct resync_pages'.
@@ -936,30 +951,29 @@ static void flush_pending_writes(struct r10conf *conf)

static void raise_barrier(struct r10conf *conf, int force)
{
- spin_lock_irq(&conf->resync_lock);
+ write_seqlock_irq(&conf->resync_lock);
BUG_ON(force && !conf->barrier);

/* Wait until no block IO is waiting (unless 'force') */
- wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
- conf->resync_lock);
+ wait_event_barrier(conf, force || !conf->nr_waiting);

/* block any new IO from starting */
- conf->barrier++;
+ WRITE_ONCE(conf->barrier, conf->barrier + 1);

/* Now wait for all pending IO to complete */
- wait_event_lock_irq(conf->wait_barrier,
- !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
- conf->resync_lock);
+ wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
+ conf->barrier < RESYNC_DEPTH);

- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}

static void lower_barrier(struct r10conf *conf)
{
unsigned long flags;
- spin_lock_irqsave(&conf->resync_lock, flags);
- conf->barrier--;
- spin_unlock_irqrestore(&conf->resync_lock, flags);
+
+ write_seqlock_irqsave(&conf->resync_lock, flags);
+ WRITE_ONCE(conf->barrier, conf->barrier - 1);
+ write_sequnlock_irqrestore(&conf->resync_lock, flags);
wake_up(&conf->wait_barrier);
}

@@ -990,11 +1004,31 @@ static bool stop_waiting_barrier(struct r10conf *conf)
return false;
}

+static bool wait_barrier_nolock(struct r10conf *conf)
+{
+ unsigned int seq = read_seqbegin(&conf->resync_lock);
+
+ if (READ_ONCE(conf->barrier))
+ return false;
+
+ atomic_inc(&conf->nr_pending);
+ if (!read_seqretry(&conf->resync_lock, seq))
+ return true;
+
+ if (atomic_dec_and_test(&conf->nr_pending))
+ wake_up_barrier(conf);
+
+ return false;
+}
+
static bool wait_barrier(struct r10conf *conf, bool nowait)
{
bool ret = true;

- spin_lock_irq(&conf->resync_lock);
+ if (wait_barrier_nolock(conf))
+ return true;
+
+ write_seqlock_irq(&conf->resync_lock);
if (conf->barrier) {
/* Return false when nowait flag is set */
if (nowait) {
@@ -1002,9 +1036,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
} else {
conf->nr_waiting++;
raid10_log(conf->mddev, "wait barrier");
- wait_event_lock_irq(conf->wait_barrier,
- stop_waiting_barrier(conf),
- conf->resync_lock);
+ wait_event_barrier(conf, stop_waiting_barrier(conf));
conf->nr_waiting--;
}
if (!conf->nr_waiting)
@@ -1013,7 +1045,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
/* Only increment nr_pending when we wait */
if (ret)
atomic_inc(&conf->nr_pending);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
return ret;
}

@@ -1038,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
* must match the number of pending IOs (nr_pending) before
* we continue.
*/
- spin_lock_irq(&conf->resync_lock);
+ write_seqlock_irq(&conf->resync_lock);
conf->array_freeze_pending++;
- conf->barrier++;
+ WRITE_ONCE(conf->barrier, conf->barrier + 1);
conf->nr_waiting++;
- wait_event_lock_irq_cmd(conf->wait_barrier,
- atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
- conf->resync_lock,
- flush_pending_writes(conf));
-
+ wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
+ conf->nr_queued + extra, flush_pending_writes(conf));
conf->array_freeze_pending--;
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}

static void unfreeze_array(struct r10conf *conf)
{
/* reverse the effect of the freeze */
- spin_lock_irq(&conf->resync_lock);
- conf->barrier--;
+ write_seqlock_irq(&conf->resync_lock);
+ WRITE_ONCE(conf->barrier, conf->barrier - 1);
conf->nr_waiting--;
wake_up(&conf->wait_barrier);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}

static sector_t choose_data_offset(struct r10bio *r10_bio,
@@ -4044,7 +4073,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
INIT_LIST_HEAD(&conf->retry_list);
INIT_LIST_HEAD(&conf->bio_end_io_list);

- spin_lock_init(&conf->resync_lock);
+ seqlock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier);
atomic_set(&conf->nr_pending, 0);

@@ -4363,7 +4392,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
rdev->new_raid_disk = rdev->raid_disk * 2;
rdev->sectors = size;
}
- conf->barrier = 1;
+ WRITE_ONCE(conf->barrier, 1);
}

return conf;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 5c0804d8bb1f..8c072ce0bc54 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -76,7 +76,7 @@ struct r10conf {
/* queue pending writes and submit them on unplug */
struct bio_list pending_bio_list;

- spinlock_t resync_lock;
+ seqlock_t resync_lock;
atomic_t nr_pending;
int nr_waiting;
int nr_queued;
--
2.31.1


2022-09-16 19:16:05

by Logan Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock



On 2022-09-16 05:34, Yu Kuai wrote:
> From: Yu Kuai <[email protected]>
>
> Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
> and io can't be dispatched until 'barrier' is dropped.
>
> Since holding the 'barrier' is not common, convert 'resync_lock' to use
> seqlock so that holding lock can be avoided in fast path.
>
> Signed-off-by: Yu Kuai <[email protected]>

Reviewed-and-Tested-by: Logan Gunthorpe <[email protected]>

So far, I've run this series 3 times through the mdadm test suite and
haven't detected any issues.

Thanks,

Logan

2022-09-18 11:40:34

by Guoqing Jiang

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock



On 9/16/22 7:34 PM, Yu Kuai wrote:
> From: Yu Kuai <[email protected]>
>
> Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
> and io can't be dispatched until 'barrier' is dropped.
>
> Since holding the 'barrier' is not common, convert 'resync_lock' to use
> seqlock so that holding lock can be avoided in fast path.
>
> Signed-off-by: Yu Kuai <[email protected]>
> ---
> drivers/md/raid10.c | 87 ++++++++++++++++++++++++++++++---------------
> drivers/md/raid10.h | 2 +-
> 2 files changed, 59 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 9a28abd19709..2daa7d57034c 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
>
> #include "raid1-10.c"
>
> +#define NULL_CMD
> +#define cmd_before(conf, cmd) \
> + do { \
> + write_sequnlock_irq(&(conf)->resync_lock); \
> + cmd; \
> + } while (0)
> +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)

The two is not paired well given only cmd_before needs the 'cmd'.

> +
> +#define wait_event_barrier_cmd(conf, cond, cmd) \
> + wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
> + cmd_after(conf))
> +
> +#define wait_event_barrier(conf, cond) \
> + wait_event_barrier_cmd(conf, cond, NULL_CMD)

What is the issue without define NULL_CMD?

Thanks,
Guoqing

2022-09-19 01:39:23

by Yu Kuai

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock

Hi,

在 2022/09/18 19:36, Guoqing Jiang 写道:
>
>
> On 9/16/22 7:34 PM, Yu Kuai wrote:
>> From: Yu Kuai <[email protected]>
>>
>> Currently, wait_barrier() will hold 'resync_lock' to read
>> 'conf->barrier',
>> and io can't be dispatched until 'barrier' is dropped.
>>
>> Since holding the 'barrier' is not common, convert 'resync_lock' to use
>> seqlock so that holding lock can be avoided in fast path.
>>
>> Signed-off-by: Yu Kuai <[email protected]>
>> ---
>>   drivers/md/raid10.c | 87 ++++++++++++++++++++++++++++++---------------
>>   drivers/md/raid10.h |  2 +-
>>   2 files changed, 59 insertions(+), 30 deletions(-)
>>
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index 9a28abd19709..2daa7d57034c 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
>>   #include "raid1-10.c"
>> +#define NULL_CMD
>> +#define cmd_before(conf, cmd) \
>> +    do { \
>> +        write_sequnlock_irq(&(conf)->resync_lock); \
>> +        cmd; \
>> +    } while (0)
>> +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
>
> The two is not paired well given only cmd_before needs the 'cmd'.

May be should I just remove cmd_after?
>
>> +
>> +#define wait_event_barrier_cmd(conf, cond, cmd) \
>> +    wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
>> +               cmd_after(conf))
>> +
>> +#define wait_event_barrier(conf, cond) \
>> +    wait_event_barrier_cmd(conf, cond, NULL_CMD)
>
> What is the issue without define NULL_CMD?
>

Checkpatch will complain this:

ERROR: space prohibited before that close parenthesis ')'
#38: FILE: drivers/md/raid10.c:94:
+ wait_event_barrier_cmd(conf, cond, )

total: 1 errors, 0 warnings, 169 lines checked

Thanks,
Kuai
> Thanks,
> Guoqing
> .
>

2022-09-19 11:27:34

by Guoqing Jiang

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock



On 9/19/22 9:08 AM, Yu Kuai wrote:
> Hi,
>
> 在 2022/09/18 19:36, Guoqing Jiang 写道:
>>
>>
>> On 9/16/22 7:34 PM, Yu Kuai wrote:
>>> From: Yu Kuai <[email protected]>
>>>
>>> Currently, wait_barrier() will hold 'resync_lock' to read
>>> 'conf->barrier',
>>> and io can't be dispatched until 'barrier' is dropped.
>>>
>>> Since holding the 'barrier' is not common, convert 'resync_lock' to use
>>> seqlock so that holding lock can be avoided in fast path.
>>>
>>> Signed-off-by: Yu Kuai <[email protected]>
>>> ---
>>>   drivers/md/raid10.c | 87
>>> ++++++++++++++++++++++++++++++---------------
>>>   drivers/md/raid10.h |  2 +-
>>>   2 files changed, 59 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>>> index 9a28abd19709..2daa7d57034c 100644
>>> --- a/drivers/md/raid10.c
>>> +++ b/drivers/md/raid10.c
>>> @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
>>>   #include "raid1-10.c"
>>> +#define NULL_CMD
>>> +#define cmd_before(conf, cmd) \
>>> +    do { \
>>> +        write_sequnlock_irq(&(conf)->resync_lock); \
>>> +        cmd; \
>>> +    } while (0)
>>> +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
>>
>> The two is not paired well given only cmd_before needs the 'cmd'.
>
> May be should I just remove cmd_after?

I'd remove it but just my personal flavor.

>>
>>> +
>>> +#define wait_event_barrier_cmd(conf, cond, cmd) \
>>> +    wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf,
>>> cmd), \
>>> +               cmd_after(conf))
>>> +
>>> +#define wait_event_barrier(conf, cond) \
>>> +    wait_event_barrier_cmd(conf, cond, NULL_CMD)
>>
>> What is the issue without define NULL_CMD?
>>
>
> Checkpatch will complain this:
>
> ERROR: space prohibited before that close parenthesis ')'
> #38: FILE: drivers/md/raid10.c:94:
> +       wait_event_barrier_cmd(conf, cond, )
>
> total: 1 errors, 0 warnings, 169 lines checked

Hmm, freeze_array has a different usage for it, so two cmds before sleep
and one cmd after sleep, perhaps it is the best way for now.

Thanks,
Guoqing

2022-09-19 19:21:38

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock

On Mon, Sep 19, 2022 at 3:28 AM Guoqing Jiang <[email protected]> wrote:
>
>
>
> On 9/19/22 9:08 AM, Yu Kuai wrote:
> > Hi,
> >
> > 在 2022/09/18 19:36, Guoqing Jiang 写道:
> >>
> >>
> >> On 9/16/22 7:34 PM, Yu Kuai wrote:
> >>> From: Yu Kuai <[email protected]>
> >>>
> >>> Currently, wait_barrier() will hold 'resync_lock' to read
> >>> 'conf->barrier',
> >>> and io can't be dispatched until 'barrier' is dropped.
> >>>
> >>> Since holding the 'barrier' is not common, convert 'resync_lock' to use
> >>> seqlock so that holding lock can be avoided in fast path.
> >>>
> >>> Signed-off-by: Yu Kuai <[email protected]>
> >>> ---
> >>> drivers/md/raid10.c | 87
> >>> ++++++++++++++++++++++++++++++---------------
> >>> drivers/md/raid10.h | 2 +-
> >>> 2 files changed, 59 insertions(+), 30 deletions(-)
> >>>
> >>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> >>> index 9a28abd19709..2daa7d57034c 100644
> >>> --- a/drivers/md/raid10.c
> >>> +++ b/drivers/md/raid10.c
> >>> @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
> >>> #include "raid1-10.c"
> >>> +#define NULL_CMD
> >>> +#define cmd_before(conf, cmd) \
> >>> + do { \
> >>> + write_sequnlock_irq(&(conf)->resync_lock); \
> >>> + cmd; \
> >>> + } while (0)
> >>> +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
> >>
> >> The two is not paired well given only cmd_before needs the 'cmd'.
> >
> > May be should I just remove cmd_after?
>
> I'd remove it but just my personal flavor.
>
> >>
> >>> +
> >>> +#define wait_event_barrier_cmd(conf, cond, cmd) \
> >>> + wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf,
> >>> cmd), \
> >>> + cmd_after(conf))
> >>> +
> >>> +#define wait_event_barrier(conf, cond) \
> >>> + wait_event_barrier_cmd(conf, cond, NULL_CMD)
> >>
> >> What is the issue without define NULL_CMD?
> >>
> >
> > Checkpatch will complain this:
> >
> > ERROR: space prohibited before that close parenthesis ')'
> > #38: FILE: drivers/md/raid10.c:94:
> > + wait_event_barrier_cmd(conf, cond, )
> >
> > total: 1 errors, 0 warnings, 169 lines checked
>
> Hmm, freeze_array has a different usage for it, so two cmds before sleep
> and one cmd after sleep, perhaps it is the best way for now.

Current version looks good to me. Please let me know if there is more
concern.

Applied the set to md-next. Thanks!

Song