2022-01-06 02:45:38

by Xin Yin

[permalink] [raw]
Subject: [PATCH 0/2] fix blocks allocate issue during fast commit replay

when test fast_commit with xfstests generic/455, one failed case is
after fast commit replay, fsck raise ’multiply-claimed blocks‘ issue.
one inode's etb block may share with other file.

fast commit replay procedure may allocate etb blocks for inodes, but
it may allocate blocks in use. This patch set fix this issue.

Xin Yin (2):
ext4: prevent used blocks from being allocated during fast commit
replay
ext4: modify the logic of ext4_mb_new_blocks_simple

fs/ext4/ext4.h | 2 ++
fs/ext4/extents.c | 4 ++++
fs/ext4/fast_commit.c | 11 ++++++++---
fs/ext4/mballoc.c | 26 +++++++++++++++++---------
4 files changed, 31 insertions(+), 12 deletions(-)

--
2.20.1



2022-01-06 02:45:49

by Xin Yin

[permalink] [raw]
Subject: [PATCH 1/2] ext4: prevent used blocks from being allocated during fast commit replay

during fast commit replay procedure, we clear inode blocks bitmap in
ext4_ext_clear_bb(), this may cause ext4_mb_new_blocks_simple() allocate
blocks still in use.

make ext4_fc_record_regions() also record physical disk regions used by
inodes during replay procedure. Then ext4_mb_new_blocks_simple() can
excludes these blocks in use.

Signed-off-by: Xin Yin <[email protected]>
---
fs/ext4/ext4.h | 2 ++
fs/ext4/extents.c | 4 ++++
fs/ext4/fast_commit.c | 11 ++++++++---
3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 82fa51d6f145..7b0686758691 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2932,6 +2932,8 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
void ext4_fc_replay_cleanup(struct super_block *sb);
int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
int __init ext4_fc_init_dentry_cache(void);
+int ext4_fc_record_regions(struct super_block *sb, int ino,
+ ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay);

/* mballoc.c */
extern const struct seq_operations ext4_mb_seq_groups_ops;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c3e76a5de661..9b6c76629c93 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -6096,11 +6096,15 @@ int ext4_ext_clear_bb(struct inode *inode)

ext4_mb_mark_bb(inode->i_sb,
path[j].p_block, 1, 0);
+ ext4_fc_record_regions(inode->i_sb, inode->i_ino,
+ 0, path[j].p_block, 1, 1);
}
ext4_ext_drop_refs(path);
kfree(path);
}
ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
+ ext4_fc_record_regions(inode->i_sb, inode->i_ino,
+ map.m_lblk, map.m_pblk, map.m_len, 1);
}
cur = cur + map.m_len;
}
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 23d13983a281..f0cd20f5fe5e 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -1567,13 +1567,15 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
* Record physical disk regions which are in use as per fast commit area. Our
* simple replay phase allocator excludes these regions from allocation.
*/
-static int ext4_fc_record_regions(struct super_block *sb, int ino,
- ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
+int ext4_fc_record_regions(struct super_block *sb, int ino,
+ ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
{
struct ext4_fc_replay_state *state;
struct ext4_fc_alloc_region *region;

state = &EXT4_SB(sb)->s_fc_replay_state;
+ if (replay && state->fc_regions_used != state->fc_regions_valid)
+ state->fc_regions_used = state->fc_regions_valid;
if (state->fc_regions_used == state->fc_regions_size) {
state->fc_regions_size +=
EXT4_FC_REPLAY_REALLOC_INCREMENT;
@@ -1591,6 +1593,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,
region->pblk = pblk;
region->len = len;

+ if (replay)
+ state->fc_regions_valid++;
+
return 0;
}

@@ -1938,7 +1943,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
ret = ext4_fc_record_regions(sb,
le32_to_cpu(ext.fc_ino),
le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
- ext4_ext_get_actual_len(ex));
+ ext4_ext_get_actual_len(ex), 0);
if (ret < 0)
break;
ret = JBD2_FC_REPLAY_CONTINUE;
--
2.20.1


2022-01-06 02:45:56

by Xin Yin

[permalink] [raw]
Subject: [PATCH 2/2] ext4: modify the logic of ext4_mb_new_blocks_simple

for now in ext4_mb_new_blocks_simple, if we found a block which
should be excluded then will switch to next group, this may
probably cause 'group' run out of range.

change to check next block in the same group when get a block should
be excluded. Also change the searche range to EXT4_CLUSTERS_PER_GROUP
and add error checking.

Signed-off-by: Xin Yin <[email protected]>
---
fs/ext4/mballoc.c | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 215b7068f548..31a00b473f3e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -5757,7 +5757,8 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
struct super_block *sb = ar->inode->i_sb;
ext4_group_t group;
ext4_grpblk_t blkoff;
- int i = sb->s_blocksize;
+ ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
+ ext4_grpblk_t i = 0;
ext4_fsblk_t goal, block;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;

@@ -5779,19 +5780,26 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
ext4_get_group_no_and_offset(sb,
max(ext4_group_first_block_no(sb, group), goal),
NULL, &blkoff);
- i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize,
+ while (1) {
+ i = mb_find_next_zero_bit(bitmap_bh->b_data, max,
blkoff);
+ if (i >= max)
+ break;
+ if (ext4_fc_replay_check_excluded(sb,
+ ext4_group_first_block_no(sb, group) + i)) {
+ blkoff = i + 1;
+ } else
+ break;
+ }
brelse(bitmap_bh);
- if (i >= sb->s_blocksize)
- continue;
- if (ext4_fc_replay_check_excluded(sb,
- ext4_group_first_block_no(sb, group) + i))
- continue;
- break;
+ if (i < max)
+ break;
}

- if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize)
+ if (group >= ext4_get_groups_count(sb) || i >= max) {
+ *errp = -ENOSPC;
return 0;
+ }

block = ext4_group_first_block_no(sb, group) + i;
ext4_mb_mark_bb(sb, block, 1, 1);
--
2.20.1


2022-01-07 20:26:44

by harshad shirwadkar

[permalink] [raw]
Subject: Re: [PATCH 1/2] ext4: prevent used blocks from being allocated during fast commit replay

On Wed, Jan 5, 2022 at 6:45 PM Xin Yin <[email protected]> wrote:
>
> during fast commit replay procedure, we clear inode blocks bitmap in
> ext4_ext_clear_bb(), this may cause ext4_mb_new_blocks_simple() allocate
> blocks still in use.
>
> make ext4_fc_record_regions() also record physical disk regions used by
> inodes during replay procedure. Then ext4_mb_new_blocks_simple() can
> excludes these blocks in use.
>
> Signed-off-by: Xin Yin <[email protected]>
> ---
> fs/ext4/ext4.h | 2 ++
> fs/ext4/extents.c | 4 ++++
> fs/ext4/fast_commit.c | 11 ++++++++---
> 3 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 82fa51d6f145..7b0686758691 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2932,6 +2932,8 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
> void ext4_fc_replay_cleanup(struct super_block *sb);
> int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
> int __init ext4_fc_init_dentry_cache(void);
> +int ext4_fc_record_regions(struct super_block *sb, int ino,
> + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay);
>
> /* mballoc.c */
> extern const struct seq_operations ext4_mb_seq_groups_ops;
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index c3e76a5de661..9b6c76629c93 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -6096,11 +6096,15 @@ int ext4_ext_clear_bb(struct inode *inode)
>
> ext4_mb_mark_bb(inode->i_sb,
> path[j].p_block, 1, 0);
> + ext4_fc_record_regions(inode->i_sb, inode->i_ino,
> + 0, path[j].p_block, 1, 1);
> }
> ext4_ext_drop_refs(path);
> kfree(path);
> }
> ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
> + ext4_fc_record_regions(inode->i_sb, inode->i_ino,
> + map.m_lblk, map.m_pblk, map.m_len, 1);
> }
> cur = cur + map.m_len;
> }
> diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> index 23d13983a281..f0cd20f5fe5e 100644
> --- a/fs/ext4/fast_commit.c
> +++ b/fs/ext4/fast_commit.c
> @@ -1567,13 +1567,15 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
> * Record physical disk regions which are in use as per fast commit area. Our
> * simple replay phase allocator excludes these regions from allocation.
> */
> -static int ext4_fc_record_regions(struct super_block *sb, int ino,
> - ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
> +int ext4_fc_record_regions(struct super_block *sb, int ino,
> + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
Can you explain a bit why this replay parameter is needed here? This
function simply reallocs the regions array if it doesn't have enough
space. I am not sure why we need to change that behavior.
> {
> struct ext4_fc_replay_state *state;
> struct ext4_fc_alloc_region *region;
>
> state = &EXT4_SB(sb)->s_fc_replay_state;
> + if (replay && state->fc_regions_used != state->fc_regions_valid)
> + state->fc_regions_used = state->fc_regions_valid;
> if (state->fc_regions_used == state->fc_regions_size) {
> state->fc_regions_size +=
> EXT4_FC_REPLAY_REALLOC_INCREMENT;
> @@ -1591,6 +1593,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,
> region->pblk = pblk;
> region->len = len;
>
> + if (replay)
> + state->fc_regions_valid++;
> +
> return 0;
> }
>
> @@ -1938,7 +1943,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
> ret = ext4_fc_record_regions(sb,
> le32_to_cpu(ext.fc_ino),
> le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
> - ext4_ext_get_actual_len(ex));
> + ext4_ext_get_actual_len(ex), 0);
> if (ret < 0)
> break;
> ret = JBD2_FC_REPLAY_CONTINUE;
> --
> 2.20.1
>

2022-01-07 20:30:24

by harshad shirwadkar

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext4: modify the logic of ext4_mb_new_blocks_simple

Looks good to me.

Reviewed-by: Harshad Shirwadkar <[email protected]>

On Wed, Jan 5, 2022 at 6:45 PM Xin Yin <[email protected]> wrote:
>
> for now in ext4_mb_new_blocks_simple, if we found a block which
> should be excluded then will switch to next group, this may
> probably cause 'group' run out of range.
>
> change to check next block in the same group when get a block should
> be excluded. Also change the searche range to EXT4_CLUSTERS_PER_GROUP
> and add error checking.
>
> Signed-off-by: Xin Yin <[email protected]>
> ---
> fs/ext4/mballoc.c | 26 +++++++++++++++++---------
> 1 file changed, 17 insertions(+), 9 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 215b7068f548..31a00b473f3e 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -5757,7 +5757,8 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
> struct super_block *sb = ar->inode->i_sb;
> ext4_group_t group;
> ext4_grpblk_t blkoff;
> - int i = sb->s_blocksize;
> + ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
> + ext4_grpblk_t i = 0;
> ext4_fsblk_t goal, block;
> struct ext4_super_block *es = EXT4_SB(sb)->s_es;
>
> @@ -5779,19 +5780,26 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
> ext4_get_group_no_and_offset(sb,
> max(ext4_group_first_block_no(sb, group), goal),
> NULL, &blkoff);
> - i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize,
> + while (1) {
> + i = mb_find_next_zero_bit(bitmap_bh->b_data, max,
> blkoff);
> + if (i >= max)
> + break;
> + if (ext4_fc_replay_check_excluded(sb,
> + ext4_group_first_block_no(sb, group) + i)) {
> + blkoff = i + 1;
> + } else
> + break;
> + }
> brelse(bitmap_bh);
> - if (i >= sb->s_blocksize)
> - continue;
> - if (ext4_fc_replay_check_excluded(sb,
> - ext4_group_first_block_no(sb, group) + i))
> - continue;
> - break;
> + if (i < max)
> + break;
> }
>
> - if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize)
> + if (group >= ext4_get_groups_count(sb) || i >= max) {
> + *errp = -ENOSPC;
> return 0;
> + }
>
> block = ext4_group_first_block_no(sb, group) + i;
> ext4_mb_mark_bb(sb, block, 1, 1);
> --
> 2.20.1
>

2022-01-08 02:09:21

by Xin Yin

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ext4: prevent used blocks from being allocated during fast commit replay

On Sat, Jan 8, 2022 at 4:26 AM harshad shirwadkar
<[email protected]> wrote:
>
> On Wed, Jan 5, 2022 at 6:45 PM Xin Yin <[email protected]> wrote:
> >
> > during fast commit replay procedure, we clear inode blocks bitmap in
> > ext4_ext_clear_bb(), this may cause ext4_mb_new_blocks_simple() allocate
> > blocks still in use.
> >
> > make ext4_fc_record_regions() also record physical disk regions used by
> > inodes during replay procedure. Then ext4_mb_new_blocks_simple() can
> > excludes these blocks in use.
> >
> > Signed-off-by: Xin Yin <[email protected]>
> > ---
> > fs/ext4/ext4.h | 2 ++
> > fs/ext4/extents.c | 4 ++++
> > fs/ext4/fast_commit.c | 11 ++++++++---
> > 3 files changed, 14 insertions(+), 3 deletions(-)
> >
> > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> > index 82fa51d6f145..7b0686758691 100644
> > --- a/fs/ext4/ext4.h
> > +++ b/fs/ext4/ext4.h
> > @@ -2932,6 +2932,8 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
> > void ext4_fc_replay_cleanup(struct super_block *sb);
> > int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
> > int __init ext4_fc_init_dentry_cache(void);
> > +int ext4_fc_record_regions(struct super_block *sb, int ino,
> > + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay);
> >
> > /* mballoc.c */
> > extern const struct seq_operations ext4_mb_seq_groups_ops;
> > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > index c3e76a5de661..9b6c76629c93 100644
> > --- a/fs/ext4/extents.c
> > +++ b/fs/ext4/extents.c
> > @@ -6096,11 +6096,15 @@ int ext4_ext_clear_bb(struct inode *inode)
> >
> > ext4_mb_mark_bb(inode->i_sb,
> > path[j].p_block, 1, 0);
> > + ext4_fc_record_regions(inode->i_sb, inode->i_ino,
> > + 0, path[j].p_block, 1, 1);
> > }
> > ext4_ext_drop_refs(path);
> > kfree(path);
> > }
> > ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
> > + ext4_fc_record_regions(inode->i_sb, inode->i_ino,
> > + map.m_lblk, map.m_pblk, map.m_len, 1);
> > }
> > cur = cur + map.m_len;
> > }
> > diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> > index 23d13983a281..f0cd20f5fe5e 100644
> > --- a/fs/ext4/fast_commit.c
> > +++ b/fs/ext4/fast_commit.c
> > @@ -1567,13 +1567,15 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
> > * Record physical disk regions which are in use as per fast commit area. Our
> > * simple replay phase allocator excludes these regions from allocation.
> > */
> > -static int ext4_fc_record_regions(struct super_block *sb, int ino,
> > - ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
> > +int ext4_fc_record_regions(struct super_block *sb, int ino,
> > + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
> Can you explain a bit why this replay parameter is needed here? This
> function simply reallocs the regions array if it doesn't have enough
> space. I am not sure why we need to change that behavior.

ext4_fc_record_regions() originally only used during scan phase, and
set fc_regions_valid = fc_regions_use when getting a TAIL tag. Now we
also use it during the replay phase, and need to update
fc_regions_valid in this case, because ext4_fc_replay_check_excluded()
uses fc_regions_valid for regions checking.
Please correct me if I'm wrong.

> > {
> > struct ext4_fc_replay_state *state;
> > struct ext4_fc_alloc_region *region;
> >
> > state = &EXT4_SB(sb)->s_fc_replay_state;
> > + if (replay && state->fc_regions_used != state->fc_regions_valid)
> > + state->fc_regions_used = state->fc_regions_valid;
> > if (state->fc_regions_used == state->fc_regions_size) {
> > state->fc_regions_size +=
> > EXT4_FC_REPLAY_REALLOC_INCREMENT;
> > @@ -1591,6 +1593,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,
> > region->pblk = pblk;
> > region->len = len;
> >
> > + if (replay)
> > + state->fc_regions_valid++;
> > +
> > return 0;
> > }
> >
> > @@ -1938,7 +1943,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
> > ret = ext4_fc_record_regions(sb,
> > le32_to_cpu(ext.fc_ino),
> > le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
> > - ext4_ext_get_actual_len(ex));
> > + ext4_ext_get_actual_len(ex), 0);
> > if (ret < 0)
> > break;
> > ret = JBD2_FC_REPLAY_CONTINUE;
> > --
> > 2.20.1
> >

2022-01-08 06:05:35

by harshad shirwadkar

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ext4: prevent used blocks from being allocated during fast commit replay

Ah, I see okay. Yeah, that makes sense. We do need to update
fc_regions_valid for every addition during the replay phase. I think
it may be helpful to add some comments describing this behavior in the
code. But other than that, I think what you're doing is fine.

Reviewed-by: Harshad Shirwadkar <[email protected]>

On Fri, Jan 7, 2022 at 6:09 PM Xin Yin <[email protected]> wrote:
>
> On Sat, Jan 8, 2022 at 4:26 AM harshad shirwadkar
> <[email protected]> wrote:
> >
> > On Wed, Jan 5, 2022 at 6:45 PM Xin Yin <[email protected]> wrote:
> > >
> > > during fast commit replay procedure, we clear inode blocks bitmap in
> > > ext4_ext_clear_bb(), this may cause ext4_mb_new_blocks_simple() allocate
> > > blocks still in use.
> > >
> > > make ext4_fc_record_regions() also record physical disk regions used by
> > > inodes during replay procedure. Then ext4_mb_new_blocks_simple() can
> > > excludes these blocks in use.
> > >
> > > Signed-off-by: Xin Yin <[email protected]>
> > > ---
> > > fs/ext4/ext4.h | 2 ++
> > > fs/ext4/extents.c | 4 ++++
> > > fs/ext4/fast_commit.c | 11 ++++++++---
> > > 3 files changed, 14 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> > > index 82fa51d6f145..7b0686758691 100644
> > > --- a/fs/ext4/ext4.h
> > > +++ b/fs/ext4/ext4.h
> > > @@ -2932,6 +2932,8 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
> > > void ext4_fc_replay_cleanup(struct super_block *sb);
> > > int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
> > > int __init ext4_fc_init_dentry_cache(void);
> > > +int ext4_fc_record_regions(struct super_block *sb, int ino,
> > > + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay);
> > >
> > > /* mballoc.c */
> > > extern const struct seq_operations ext4_mb_seq_groups_ops;
> > > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > > index c3e76a5de661..9b6c76629c93 100644
> > > --- a/fs/ext4/extents.c
> > > +++ b/fs/ext4/extents.c
> > > @@ -6096,11 +6096,15 @@ int ext4_ext_clear_bb(struct inode *inode)
> > >
> > > ext4_mb_mark_bb(inode->i_sb,
> > > path[j].p_block, 1, 0);
> > > + ext4_fc_record_regions(inode->i_sb, inode->i_ino,
> > > + 0, path[j].p_block, 1, 1);
> > > }
> > > ext4_ext_drop_refs(path);
> > > kfree(path);
> > > }
> > > ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
> > > + ext4_fc_record_regions(inode->i_sb, inode->i_ino,
> > > + map.m_lblk, map.m_pblk, map.m_len, 1);
> > > }
> > > cur = cur + map.m_len;
> > > }
> > > diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> > > index 23d13983a281..f0cd20f5fe5e 100644
> > > --- a/fs/ext4/fast_commit.c
> > > +++ b/fs/ext4/fast_commit.c
> > > @@ -1567,13 +1567,15 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
> > > * Record physical disk regions which are in use as per fast commit area. Our
> > > * simple replay phase allocator excludes these regions from allocation.
> > > */
> > > -static int ext4_fc_record_regions(struct super_block *sb, int ino,
> > > - ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
> > > +int ext4_fc_record_regions(struct super_block *sb, int ino,
> > > + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
> > Can you explain a bit why this replay parameter is needed here? This
> > function simply reallocs the regions array if it doesn't have enough
> > space. I am not sure why we need to change that behavior.
>
> ext4_fc_record_regions() originally only used during scan phase, and
> set fc_regions_valid = fc_regions_use when getting a TAIL tag. Now we
> also use it during the replay phase, and need to update
> fc_regions_valid in this case, because ext4_fc_replay_check_excluded()
> uses fc_regions_valid for regions checking.
> Please correct me if I'm wrong.
>
> > > {
> > > struct ext4_fc_replay_state *state;
> > > struct ext4_fc_alloc_region *region;
> > >
> > > state = &EXT4_SB(sb)->s_fc_replay_state;
> > > + if (replay && state->fc_regions_used != state->fc_regions_valid)
> > > + state->fc_regions_used = state->fc_regions_valid;
> > > if (state->fc_regions_used == state->fc_regions_size) {
> > > state->fc_regions_size +=
> > > EXT4_FC_REPLAY_REALLOC_INCREMENT;
> > > @@ -1591,6 +1593,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,
> > > region->pblk = pblk;
> > > region->len = len;
> > >
> > > + if (replay)
> > > + state->fc_regions_valid++;
> > > +
> > > return 0;
> > > }
> > >
> > > @@ -1938,7 +1943,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
> > > ret = ext4_fc_record_regions(sb,
> > > le32_to_cpu(ext.fc_ino),
> > > le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
> > > - ext4_ext_get_actual_len(ex));
> > > + ext4_ext_get_actual_len(ex), 0);
> > > if (ret < 0)
> > > break;
> > > ret = JBD2_FC_REPLAY_CONTINUE;
> > > --
> > > 2.20.1
> > >

2022-01-09 03:01:46

by Xin Yin

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ext4: prevent used blocks from being allocated during fast commit replay

Yes , this is a little tricky, I will add some comments for it, and
send a v2 patches set

Thanks,
Xin Yin

On Sat, Jan 8, 2022 at 2:05 PM harshad shirwadkar
<[email protected]> wrote:
>
> Ah, I see okay. Yeah, that makes sense. We do need to update
> fc_regions_valid for every addition during the replay phase. I think
> it may be helpful to add some comments describing this behavior in the
> code. But other than that, I think what you're doing is fine.
>
> Reviewed-by: Harshad Shirwadkar <[email protected]>
>
> On Fri, Jan 7, 2022 at 6:09 PM Xin Yin <[email protected]> wrote:
> >
> > On Sat, Jan 8, 2022 at 4:26 AM harshad shirwadkar
> > <[email protected]> wrote:
> > >
> > > On Wed, Jan 5, 2022 at 6:45 PM Xin Yin <[email protected]> wrote:
> > > >
> > > > during fast commit replay procedure, we clear inode blocks bitmap in
> > > > ext4_ext_clear_bb(), this may cause ext4_mb_new_blocks_simple() allocate
> > > > blocks still in use.
> > > >
> > > > make ext4_fc_record_regions() also record physical disk regions used by
> > > > inodes during replay procedure. Then ext4_mb_new_blocks_simple() can
> > > > excludes these blocks in use.
> > > >
> > > > Signed-off-by: Xin Yin <[email protected]>
> > > > ---
> > > > fs/ext4/ext4.h | 2 ++
> > > > fs/ext4/extents.c | 4 ++++
> > > > fs/ext4/fast_commit.c | 11 ++++++++---
> > > > 3 files changed, 14 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> > > > index 82fa51d6f145..7b0686758691 100644
> > > > --- a/fs/ext4/ext4.h
> > > > +++ b/fs/ext4/ext4.h
> > > > @@ -2932,6 +2932,8 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
> > > > void ext4_fc_replay_cleanup(struct super_block *sb);
> > > > int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
> > > > int __init ext4_fc_init_dentry_cache(void);
> > > > +int ext4_fc_record_regions(struct super_block *sb, int ino,
> > > > + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay);
> > > >
> > > > /* mballoc.c */
> > > > extern const struct seq_operations ext4_mb_seq_groups_ops;
> > > > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > > > index c3e76a5de661..9b6c76629c93 100644
> > > > --- a/fs/ext4/extents.c
> > > > +++ b/fs/ext4/extents.c
> > > > @@ -6096,11 +6096,15 @@ int ext4_ext_clear_bb(struct inode *inode)
> > > >
> > > > ext4_mb_mark_bb(inode->i_sb,
> > > > path[j].p_block, 1, 0);
> > > > + ext4_fc_record_regions(inode->i_sb, inode->i_ino,
> > > > + 0, path[j].p_block, 1, 1);
> > > > }
> > > > ext4_ext_drop_refs(path);
> > > > kfree(path);
> > > > }
> > > > ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
> > > > + ext4_fc_record_regions(inode->i_sb, inode->i_ino,
> > > > + map.m_lblk, map.m_pblk, map.m_len, 1);
> > > > }
> > > > cur = cur + map.m_len;
> > > > }
> > > > diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> > > > index 23d13983a281..f0cd20f5fe5e 100644
> > > > --- a/fs/ext4/fast_commit.c
> > > > +++ b/fs/ext4/fast_commit.c
> > > > @@ -1567,13 +1567,15 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
> > > > * Record physical disk regions which are in use as per fast commit area. Our
> > > > * simple replay phase allocator excludes these regions from allocation.
> > > > */
> > > > -static int ext4_fc_record_regions(struct super_block *sb, int ino,
> > > > - ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
> > > > +int ext4_fc_record_regions(struct super_block *sb, int ino,
> > > > + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
> > > Can you explain a bit why this replay parameter is needed here? This
> > > function simply reallocs the regions array if it doesn't have enough
> > > space. I am not sure why we need to change that behavior.
> >
> > ext4_fc_record_regions() originally only used during scan phase, and
> > set fc_regions_valid = fc_regions_use when getting a TAIL tag. Now we
> > also use it during the replay phase, and need to update
> > fc_regions_valid in this case, because ext4_fc_replay_check_excluded()
> > uses fc_regions_valid for regions checking.
> > Please correct me if I'm wrong.
> >
> > > > {
> > > > struct ext4_fc_replay_state *state;
> > > > struct ext4_fc_alloc_region *region;
> > > >
> > > > state = &EXT4_SB(sb)->s_fc_replay_state;
> > > > + if (replay && state->fc_regions_used != state->fc_regions_valid)
> > > > + state->fc_regions_used = state->fc_regions_valid;
> > > > if (state->fc_regions_used == state->fc_regions_size) {
> > > > state->fc_regions_size +=
> > > > EXT4_FC_REPLAY_REALLOC_INCREMENT;
> > > > @@ -1591,6 +1593,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,
> > > > region->pblk = pblk;
> > > > region->len = len;
> > > >
> > > > + if (replay)
> > > > + state->fc_regions_valid++;
> > > > +
> > > > return 0;
> > > > }
> > > >
> > > > @@ -1938,7 +1943,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
> > > > ret = ext4_fc_record_regions(sb,
> > > > le32_to_cpu(ext.fc_ino),
> > > > le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
> > > > - ext4_ext_get_actual_len(ex));
> > > > + ext4_ext_get_actual_len(ex), 0);
> > > > if (ret < 0)
> > > > break;
> > > > ret = JBD2_FC_REPLAY_CONTINUE;
> > > > --
> > > > 2.20.1
> > > >