2023-09-19 12:44:30

by Kemeng Shi

[permalink] [raw]
Subject: [PATCH v7 02/12] ext4: factor out codes to update block bitmap and group descriptor on disk from ext4_mb_mark_bb

There are several reasons to add a general function ext4_mb_mark_context
to update block bitmap and group descriptor on disk:
1. pair behavior of alloc/free bits. For example,
ext4_mb_new_blocks_simple will update free_clusters in struct flex_groups
in ext4_mb_mark_bb while ext4_free_blocks_simple forgets this.
2. remove repeat code to read from disk, update and write back to disk.
3. reduce future unit test mocks to catch real IO to update structure
on disk.

Signed-off-by: Kemeng Shi <[email protected]>
---
fs/ext4/mballoc.c | 147 ++++++++++++++++++++++++----------------------
1 file changed, 77 insertions(+), 70 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index cf09adfbaf11..e1320eea46e9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3953,6 +3953,80 @@ void ext4_exit_mballoc(void)
ext4_groupinfo_destroy_slabs();
}

+static int
+ext4_mb_mark_context(struct super_block *sb, bool state, ext4_group_t group,
+ ext4_grpblk_t blkoff, ext4_grpblk_t len)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct buffer_head *bitmap_bh = NULL;
+ struct ext4_group_desc *gdp;
+ struct buffer_head *gdp_bh;
+ int err;
+ unsigned int i, already, changed;
+
+ bitmap_bh = ext4_read_block_bitmap(sb, group);
+ if (IS_ERR(bitmap_bh))
+ return PTR_ERR(bitmap_bh);
+
+ err = -EIO;
+ gdp = ext4_get_group_desc(sb, group, &gdp_bh);
+ if (!gdp)
+ goto out_err;
+
+ ext4_lock_group(sb, group);
+ if (ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+ ext4_free_group_clusters_set(sb, gdp,
+ ext4_free_clusters_after_init(sb, group, gdp));
+ }
+
+ already = 0;
+ for (i = 0; i < len; i++)
+ if (mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
+ state)
+ already++;
+ changed = len - already;
+
+ if (state) {
+ mb_set_bits(bitmap_bh->b_data, blkoff, len);
+ ext4_free_group_clusters_set(sb, gdp,
+ ext4_free_group_clusters(sb, gdp) - changed);
+ } else {
+ mb_clear_bits(bitmap_bh->b_data, blkoff, len);
+ ext4_free_group_clusters_set(sb, gdp,
+ ext4_free_group_clusters(sb, gdp) + changed);
+ }
+
+ ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
+ ext4_group_desc_csum_set(sb, group, gdp);
+ ext4_unlock_group(sb, group);
+
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group = ext4_flex_group(sbi, group);
+ struct flex_groups *fg = sbi_array_rcu_deref(sbi,
+ s_flex_groups, flex_group);
+
+ if (state)
+ atomic64_sub(changed, &fg->free_clusters);
+ else
+ atomic64_add(changed, &fg->free_clusters);
+ }
+
+ err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
+ if (err)
+ goto out_err;
+ err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
+ if (err)
+ goto out_err;
+
+ sync_dirty_buffer(bitmap_bh);
+ sync_dirty_buffer(gdp_bh);
+
+out_err:
+ brelse(bitmap_bh);
+ return err;
+}

/*
* Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
@@ -4079,15 +4153,11 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
int len, bool state)
{
- struct buffer_head *bitmap_bh = NULL;
- struct ext4_group_desc *gdp;
- struct buffer_head *gdp_bh;
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_group_t group;
ext4_grpblk_t blkoff;
- int i, err = 0;
- int already;
- unsigned int clen, clen_changed, thisgrp_len;
+ int err = 0;
+ unsigned int clen, thisgrp_len;

while (len > 0) {
ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
@@ -4108,80 +4178,17 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
ext4_error(sb, "Marking blocks in system zone - "
"Block = %llu, len = %u",
block, thisgrp_len);
- bitmap_bh = NULL;
break;
}

- bitmap_bh = ext4_read_block_bitmap(sb, group);
- if (IS_ERR(bitmap_bh)) {
- err = PTR_ERR(bitmap_bh);
- bitmap_bh = NULL;
- break;
- }
-
- err = -EIO;
- gdp = ext4_get_group_desc(sb, group, &gdp_bh);
- if (!gdp)
- break;
-
- ext4_lock_group(sb, group);
- already = 0;
- for (i = 0; i < clen; i++)
- if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
- !state)
- already++;
-
- clen_changed = clen - already;
- if (state)
- mb_set_bits(bitmap_bh->b_data, blkoff, clen);
- else
- mb_clear_bits(bitmap_bh->b_data, blkoff, clen);
- if (ext4_has_group_desc_csum(sb) &&
- (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
- ext4_free_group_clusters_set(sb, gdp,
- ext4_free_clusters_after_init(sb, group, gdp));
- }
- if (state)
- clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
- else
- clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
-
- ext4_free_group_clusters_set(sb, gdp, clen);
- ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
- ext4_group_desc_csum_set(sb, group, gdp);
-
- ext4_unlock_group(sb, group);
-
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t flex_group = ext4_flex_group(sbi, group);
- struct flex_groups *fg = sbi_array_rcu_deref(sbi,
- s_flex_groups, flex_group);
-
- if (state)
- atomic64_sub(clen_changed, &fg->free_clusters);
- else
- atomic64_add(clen_changed, &fg->free_clusters);
-
- }
-
- err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
- if (err)
- break;
- sync_dirty_buffer(bitmap_bh);
- err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
- sync_dirty_buffer(gdp_bh);
+ err = ext4_mb_mark_context(sb, state, group, blkoff, clen);
if (err)
break;

block += thisgrp_len;
len -= thisgrp_len;
- brelse(bitmap_bh);
BUG_ON(len < 0);
}
-
- if (err)
- brelse(bitmap_bh);
}

/*
--
2.30.0


2023-09-27 12:46:55

by Ritesh Harjani

[permalink] [raw]
Subject: Re: [PATCH v7 02/12] ext4: factor out codes to update block bitmap and group descriptor on disk from ext4_mb_mark_bb

Kemeng Shi <[email protected]> writes:

> There are several reasons to add a general function ext4_mb_mark_context
> to update block bitmap and group descriptor on disk:
> 1. pair behavior of alloc/free bits. For example,
> ext4_mb_new_blocks_simple will update free_clusters in struct flex_groups
> in ext4_mb_mark_bb while ext4_free_blocks_simple forgets this.
> 2. remove repeat code to read from disk, update and write back to disk.
> 3. reduce future unit test mocks to catch real IO to update structure
> on disk.
>
> Signed-off-by: Kemeng Shi <[email protected]>
> ---
> fs/ext4/mballoc.c | 147 ++++++++++++++++++++++++----------------------
> 1 file changed, 77 insertions(+), 70 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index cf09adfbaf11..e1320eea46e9 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3953,6 +3953,80 @@ void ext4_exit_mballoc(void)
> ext4_groupinfo_destroy_slabs();
> }
>
> +static int
> +ext4_mb_mark_context(struct super_block *sb, bool state, ext4_group_t group,
> + ext4_grpblk_t blkoff, ext4_grpblk_t len)


ext4_grpblk_t is defined as int.
/* data type for block offset of block group */
typedef int ext4_grpblk_t;

I think len should be unsigned int (u32) here.

> +{
> + struct ext4_sb_info *sbi = EXT4_SB(sb);
> + struct buffer_head *bitmap_bh = NULL;
> + struct ext4_group_desc *gdp;
> + struct buffer_head *gdp_bh;
> + int err;
> + unsigned int i, already, changed;
> +
> + bitmap_bh = ext4_read_block_bitmap(sb, group);
> + if (IS_ERR(bitmap_bh))
> + return PTR_ERR(bitmap_bh);
> +
> + err = -EIO;
> + gdp = ext4_get_group_desc(sb, group, &gdp_bh);
> + if (!gdp)
> + goto out_err;
> +
> + ext4_lock_group(sb, group);
> + if (ext4_has_group_desc_csum(sb) &&
> + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
> + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
> + ext4_free_group_clusters_set(sb, gdp,
> + ext4_free_clusters_after_init(sb, group, gdp));
> + }
> +
> + already = 0;
> + for (i = 0; i < len; i++)
> + if (mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
> + state)
> + already++;
> + changed = len - already;
> +
> + if (state) {
> + mb_set_bits(bitmap_bh->b_data, blkoff, len);
> + ext4_free_group_clusters_set(sb, gdp,
> + ext4_free_group_clusters(sb, gdp) - changed);
> + } else {
> + mb_clear_bits(bitmap_bh->b_data, blkoff, len);
> + ext4_free_group_clusters_set(sb, gdp,
> + ext4_free_group_clusters(sb, gdp) + changed);
> + }
> +
> + ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
> + ext4_group_desc_csum_set(sb, group, gdp);
> + ext4_unlock_group(sb, group);
> +
> + if (sbi->s_log_groups_per_flex) {
> + ext4_group_t flex_group = ext4_flex_group(sbi, group);
> + struct flex_groups *fg = sbi_array_rcu_deref(sbi,
> + s_flex_groups, flex_group);
> +
> + if (state)
> + atomic64_sub(changed, &fg->free_clusters);
> + else
> + atomic64_add(changed, &fg->free_clusters);
> + }
> +
> + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
> + if (err)
> + goto out_err;
> + err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
> + if (err)
> + goto out_err;
> +
> + sync_dirty_buffer(bitmap_bh);
> + sync_dirty_buffer(gdp_bh);
> +
> +out_err:
> + brelse(bitmap_bh);
> + return err;
> +}
>
> /*
> * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
> @@ -4079,15 +4153,11 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
> void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
> int len, bool state)

Even ext4_mb_mark_bb should take len as unsigned int IMO.
For e.g. ext4_fc_replay_add_range() passes map.m_len which is also
unsigned int.


Otherwise the patch looks good to me. Feel free to add -

Reviewed-by: Ritesh Harjani (IBM) <[email protected]>

-ritesh

2023-09-28 05:57:41

by Ritesh Harjani

[permalink] [raw]
Subject: Re: [PATCH v7 02/12] ext4: factor out codes to update block bitmap and group descriptor on disk from ext4_mb_mark_bb

Kemeng Shi <[email protected]> writes:

> on 9/27/2023 4:49 PM, Ritesh Harjani wrote:
>> Kemeng Shi <[email protected]> writes:
>>
>>> There are several reasons to add a general function ext4_mb_mark_context
>>> to update block bitmap and group descriptor on disk:
>>> 1. pair behavior of alloc/free bits. For example,
>>> ext4_mb_new_blocks_simple will update free_clusters in struct flex_groups
>>> in ext4_mb_mark_bb while ext4_free_blocks_simple forgets this.
>>> 2. remove repeat code to read from disk, update and write back to disk.
>>> 3. reduce future unit test mocks to catch real IO to update structure
>>> on disk.
>>>
>>> Signed-off-by: Kemeng Shi <[email protected]>
>>> ---
>>> fs/ext4/mballoc.c | 147 ++++++++++++++++++++++++----------------------
>>> 1 file changed, 77 insertions(+), 70 deletions(-)
>>>
>>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>>> index cf09adfbaf11..e1320eea46e9 100644
>>> --- a/fs/ext4/mballoc.c
>>> +++ b/fs/ext4/mballoc.c
>>> @@ -3953,6 +3953,80 @@ void ext4_exit_mballoc(void)
>>> ext4_groupinfo_destroy_slabs();
>>> }
>>>
>>> +static int
>>> +ext4_mb_mark_context(struct super_block *sb, bool state, ext4_group_t group,
>>> + ext4_grpblk_t blkoff, ext4_grpblk_t len)
>>
>>
>> ext4_grpblk_t is defined as int.
>> /* data type for block offset of block group */
>> typedef int ext4_grpblk_t;
>>
>> I think len should be unsigned int (u32) here.
>>
> Hi Ritesh, thanks for reply and a lot suggestions to this patch and other
> patches in this series.
> I define len as ext4_grpblk_t as I think ext4_grpblk_t is supposed to fit
> block or cluster number of single group.
>

At different places the use of datatype for no. of blocks/clusters within
a group gets very confusing :(

However, IMO ext4_grpblk_t should be fine for using len argument here.
I did respond about that while reviewing in some later patches [1]

[1]: https://lore.kernel.org/linux-ext4/[email protected]/

So, I don't think we need any changes to this patch.
Reviewed-by: Ritesh Harjani (IBM) <[email protected]>

Also overall the series looks good. There are just some minor
changes suggested in 1st patch and some commit msg updates suggested for
other changes. If you send a v8, then I think that looks good to be
picked up :)

Thanks a lot for working on it & the suggested changes!

-ritesh