2023-06-29 06:40:24

by Kemeng Shi

[permalink] [raw]
Subject: [PATCH v5 1/8] ext4: factor out codes to update block bitmap and group descriptor on disk from ext4_mb_mark_bb

There are several reasons to add a general function to update block
bitmap and group descriptor on disk:
1. pair behavior of alloc/free bits. For example,
ext4_mb_new_blocks_simple will update free_clusters in struct flex_groups
in ext4_mb_mark_bb while ext4_free_blocks_simple forgets this.
2. remove repeat code to read from disk, update and write back to disk.
3. reduce future unit test mocks to catch real IO to update structure
on disk.

Signed-off-by: Kemeng Shi <[email protected]>
Reviewed-by: Ojaswin Mujoo <[email protected]>
---
fs/ext4/mballoc.c | 157 +++++++++++++++++++++++++---------------------
1 file changed, 87 insertions(+), 70 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a2475b8c9fb5..58864a9116c0 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3948,6 +3948,86 @@ void ext4_exit_mballoc(void)
ext4_groupinfo_destroy_slabs();
}

+struct ext4_mark_context {
+ struct super_block *sb;
+ int state;
+};
+
+static int
+ext4_mb_mark_group_bb(struct ext4_mark_context *mc, ext4_group_t group,
+ ext4_grpblk_t blkoff, ext4_grpblk_t len)
+{
+ struct super_block *sb = mc->sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct buffer_head *bitmap_bh = NULL;
+ struct ext4_group_desc *gdp;
+ struct buffer_head *gdp_bh;
+ int err;
+ unsigned int i, already, changed;
+
+ bitmap_bh = ext4_read_block_bitmap(sb, group);
+ if (IS_ERR(bitmap_bh))
+ return PTR_ERR(bitmap_bh);
+
+ err = -EIO;
+ gdp = ext4_get_group_desc(sb, group, &gdp_bh);
+ if (!gdp)
+ goto out_err;
+
+ ext4_lock_group(sb, group);
+ if (ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+ ext4_free_group_clusters_set(sb, gdp,
+ ext4_free_clusters_after_init(sb, group, gdp));
+ }
+
+ already = 0;
+ for (i = 0; i < len; i++)
+ if (mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
+ mc->state)
+ already++;
+ changed = len - already;
+
+ if (mc->state) {
+ mb_set_bits(bitmap_bh->b_data, blkoff, len);
+ ext4_free_group_clusters_set(sb, gdp,
+ ext4_free_group_clusters(sb, gdp) - changed);
+ } else {
+ mb_clear_bits(bitmap_bh->b_data, blkoff, len);
+ ext4_free_group_clusters_set(sb, gdp,
+ ext4_free_group_clusters(sb, gdp) + changed);
+ }
+
+ ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
+ ext4_group_desc_csum_set(sb, group, gdp);
+ ext4_unlock_group(sb, group);
+
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group = ext4_flex_group(sbi, group);
+ struct flex_groups *fg = sbi_array_rcu_deref(sbi,
+ s_flex_groups, flex_group);
+
+ if (mc->state)
+ atomic64_sub(changed, &fg->free_clusters);
+ else
+ atomic64_add(changed, &fg->free_clusters);
+ }
+
+ err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
+ if (err)
+ goto out_err;
+ err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
+ if (err)
+ goto out_err;
+
+ sync_dirty_buffer(bitmap_bh);
+ sync_dirty_buffer(gdp_bh);
+
+out_err:
+ brelse(bitmap_bh);
+ return err;
+}

/*
* Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
@@ -4074,15 +4154,15 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
int len, int state)
{
- struct buffer_head *bitmap_bh = NULL;
- struct ext4_group_desc *gdp;
- struct buffer_head *gdp_bh;
+ struct ext4_mark_context mc = {
+ .sb = sb,
+ .state = state,
+ };
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_group_t group;
ext4_grpblk_t blkoff;
- int i, err;
- int already;
- unsigned int clen, clen_changed, thisgrp_len;
+ int err;
+ unsigned int clen, thisgrp_len;

while (len > 0) {
ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
@@ -4103,80 +4183,17 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
ext4_error(sb, "Marking blocks in system zone - "
"Block = %llu, len = %u",
block, thisgrp_len);
- bitmap_bh = NULL;
break;
}

- bitmap_bh = ext4_read_block_bitmap(sb, group);
- if (IS_ERR(bitmap_bh)) {
- err = PTR_ERR(bitmap_bh);
- bitmap_bh = NULL;
- break;
- }
-
- err = -EIO;
- gdp = ext4_get_group_desc(sb, group, &gdp_bh);
- if (!gdp)
- break;
-
- ext4_lock_group(sb, group);
- already = 0;
- for (i = 0; i < clen; i++)
- if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
- !state)
- already++;
-
- clen_changed = clen - already;
- if (state)
- mb_set_bits(bitmap_bh->b_data, blkoff, clen);
- else
- mb_clear_bits(bitmap_bh->b_data, blkoff, clen);
- if (ext4_has_group_desc_csum(sb) &&
- (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
- ext4_free_group_clusters_set(sb, gdp,
- ext4_free_clusters_after_init(sb, group, gdp));
- }
- if (state)
- clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
- else
- clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
-
- ext4_free_group_clusters_set(sb, gdp, clen);
- ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
- ext4_group_desc_csum_set(sb, group, gdp);
-
- ext4_unlock_group(sb, group);
-
- if (sbi->s_log_groups_per_flex) {
- ext4_group_t flex_group = ext4_flex_group(sbi, group);
- struct flex_groups *fg = sbi_array_rcu_deref(sbi,
- s_flex_groups, flex_group);
-
- if (state)
- atomic64_sub(clen_changed, &fg->free_clusters);
- else
- atomic64_add(clen_changed, &fg->free_clusters);
-
- }
-
- err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
- if (err)
- break;
- sync_dirty_buffer(bitmap_bh);
- err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
- sync_dirty_buffer(gdp_bh);
+ err = ext4_mb_mark_group_bb(&mc, group, blkoff, clen);
if (err)
break;

block += thisgrp_len;
len -= thisgrp_len;
- brelse(bitmap_bh);
BUG_ON(len < 0);
}
-
- if (err)
- brelse(bitmap_bh);
}

/*
--
2.30.0



2023-07-22 06:50:40

by Ritesh Harjani

[permalink] [raw]
Subject: Re: [PATCH v5 1/8] ext4: factor out codes to update block bitmap and group descriptor on disk from ext4_mb_mark_bb

Kemeng Shi <[email protected]> writes:

> There are several reasons to add a general function to update block
> bitmap and group descriptor on disk:
> 1. pair behavior of alloc/free bits. For example,
> ext4_mb_new_blocks_simple will update free_clusters in struct flex_groups
> in ext4_mb_mark_bb while ext4_free_blocks_simple forgets this.
> 2. remove repeat code to read from disk, update and write back to disk.
> 3. reduce future unit test mocks to catch real IO to update structure
> on disk.

Thanks for the cleanup and sorry that I am starting to review this
series only now. However I do have some review comments to understand a
bit more on the patch series.

>
> Signed-off-by: Kemeng Shi <[email protected]>
> Reviewed-by: Ojaswin Mujoo <[email protected]>
> ---
> fs/ext4/mballoc.c | 157 +++++++++++++++++++++++++---------------------
> 1 file changed, 87 insertions(+), 70 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index a2475b8c9fb5..58864a9116c0 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3948,6 +3948,86 @@ void ext4_exit_mballoc(void)
> ext4_groupinfo_destroy_slabs();
> }
>
> +struct ext4_mark_context {
> + struct super_block *sb;
> + int state;
> +};

It's not totally clear the intention behind this structure from above
since it lacking any comments.

Can you please help me understand why do we need this.
I still don't know whether we require this structure and what is it's
purpose. Is it only for reducing the number of variable passing?

Let me do more reading...

...On more reading, I was previous considering to rename it to something
like ext4_mb_mark_context, but then I realized the naming of this is
something similar to ext4_allocation_context. So we may keep the naming
as is.

So since this structure, presumably, is used for marking blk bits for
mballoc. Why don't we pass useful information which is relevant for
this operation like -

ext4_mark_context {
ext4_group_t mc_group; /* block group */
ext4_grpblk_t mc_clblk; /* block in cluster units */
ext4_grpblk_t mc_cllen; /* len in cluster units */
ext4_grpblk_t mc_clupdates; /* number of clusters marked/unmarked */
unsigned int mc_flags; /* flags ... */
bool mc_state; /* to set or unset state */
};

Maybe, super_block and handle we can pass as an argument as those doesn't
define the ext4_mark_context for mballoc.

Since this structure is prepared not at the begining of any function, we
may need a prepare function for it. e.g.

static void ext4_mb_prepare_mark_context(&mc, ...)
static int ext4_mb_mark_context(sb, handle, &mc); (instead of ext4_mb_mark_group_bb())

Does this sounds better to you? Thoughts?

Otherwise I think having a common function for mb_mark_context looks
like a nice cleanup.

-ritesh

> +
> +static int
> +ext4_mb_mark_group_bb(struct ext4_mark_context *mc, ext4_group_t group,
> + ext4_grpblk_t blkoff, ext4_grpblk_t len)
> +{
> + struct super_block *sb = mc->sb;
> + struct ext4_sb_info *sbi = EXT4_SB(sb);
> + struct buffer_head *bitmap_bh = NULL;
> + struct ext4_group_desc *gdp;
> + struct buffer_head *gdp_bh;
> + int err;
> + unsigned int i, already, changed;
> +
> + bitmap_bh = ext4_read_block_bitmap(sb, group);
> + if (IS_ERR(bitmap_bh))
> + return PTR_ERR(bitmap_bh);
> +
> + err = -EIO;
> + gdp = ext4_get_group_desc(sb, group, &gdp_bh);
> + if (!gdp)
> + goto out_err;
> +
> + ext4_lock_group(sb, group);
> + if (ext4_has_group_desc_csum(sb) &&
> + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
> + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
> + ext4_free_group_clusters_set(sb, gdp,
> + ext4_free_clusters_after_init(sb, group, gdp));
> + }
> +
> + already = 0;
> + for (i = 0; i < len; i++)
> + if (mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
> + mc->state)
> + already++;
> + changed = len - already;
> +
> + if (mc->state) {
> + mb_set_bits(bitmap_bh->b_data, blkoff, len);
> + ext4_free_group_clusters_set(sb, gdp,
> + ext4_free_group_clusters(sb, gdp) - changed);
> + } else {
> + mb_clear_bits(bitmap_bh->b_data, blkoff, len);
> + ext4_free_group_clusters_set(sb, gdp,
> + ext4_free_group_clusters(sb, gdp) + changed);
> + }
> +
> + ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
> + ext4_group_desc_csum_set(sb, group, gdp);
> + ext4_unlock_group(sb, group);
> +
> + if (sbi->s_log_groups_per_flex) {
> + ext4_group_t flex_group = ext4_flex_group(sbi, group);
> + struct flex_groups *fg = sbi_array_rcu_deref(sbi,
> + s_flex_groups, flex_group);
> +
> + if (mc->state)
> + atomic64_sub(changed, &fg->free_clusters);
> + else
> + atomic64_add(changed, &fg->free_clusters);
> + }
> +
> + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
> + if (err)
> + goto out_err;
> + err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
> + if (err)
> + goto out_err;
> +
> + sync_dirty_buffer(bitmap_bh);
> + sync_dirty_buffer(gdp_bh);
> +
> +out_err:
> + brelse(bitmap_bh);
> + return err;
> +}
>
> /*
> * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
> @@ -4074,15 +4154,15 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
> void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
> int len, int state)
> {
> - struct buffer_head *bitmap_bh = NULL;
> - struct ext4_group_desc *gdp;
> - struct buffer_head *gdp_bh;
> + struct ext4_mark_context mc = {
> + .sb = sb,
> + .state = state,
> + };
> struct ext4_sb_info *sbi = EXT4_SB(sb);
> ext4_group_t group;
> ext4_grpblk_t blkoff;
> - int i, err;
> - int already;
> - unsigned int clen, clen_changed, thisgrp_len;
> + int err;
> + unsigned int clen, thisgrp_len;
>
> while (len > 0) {
> ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
> @@ -4103,80 +4183,17 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
> ext4_error(sb, "Marking blocks in system zone - "
> "Block = %llu, len = %u",
> block, thisgrp_len);
> - bitmap_bh = NULL;
> break;
> }
>
> - bitmap_bh = ext4_read_block_bitmap(sb, group);
> - if (IS_ERR(bitmap_bh)) {
> - err = PTR_ERR(bitmap_bh);
> - bitmap_bh = NULL;
> - break;
> - }
> -
> - err = -EIO;
> - gdp = ext4_get_group_desc(sb, group, &gdp_bh);
> - if (!gdp)
> - break;
> -
> - ext4_lock_group(sb, group);
> - already = 0;
> - for (i = 0; i < clen; i++)
> - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
> - !state)
> - already++;
> -
> - clen_changed = clen - already;
> - if (state)
> - mb_set_bits(bitmap_bh->b_data, blkoff, clen);
> - else
> - mb_clear_bits(bitmap_bh->b_data, blkoff, clen);
> - if (ext4_has_group_desc_csum(sb) &&
> - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
> - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
> - ext4_free_group_clusters_set(sb, gdp,
> - ext4_free_clusters_after_init(sb, group, gdp));
> - }
> - if (state)
> - clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
> - else
> - clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
> -
> - ext4_free_group_clusters_set(sb, gdp, clen);
> - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
> - ext4_group_desc_csum_set(sb, group, gdp);
> -
> - ext4_unlock_group(sb, group);
> -
> - if (sbi->s_log_groups_per_flex) {
> - ext4_group_t flex_group = ext4_flex_group(sbi, group);
> - struct flex_groups *fg = sbi_array_rcu_deref(sbi,
> - s_flex_groups, flex_group);
> -
> - if (state)
> - atomic64_sub(clen_changed, &fg->free_clusters);
> - else
> - atomic64_add(clen_changed, &fg->free_clusters);
> -
> - }
> -
> - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
> - if (err)
> - break;
> - sync_dirty_buffer(bitmap_bh);
> - err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
> - sync_dirty_buffer(gdp_bh);
> + err = ext4_mb_mark_group_bb(&mc, group, blkoff, clen);
> if (err)
> break;
>
> block += thisgrp_len;
> len -= thisgrp_len;
> - brelse(bitmap_bh);
> BUG_ON(len < 0);
> }
> -
> - if (err)
> - brelse(bitmap_bh);
> }
>
> /*
> --
> 2.30.0

2023-07-25 04:00:20

by Kemeng Shi

[permalink] [raw]
Subject: Re: [PATCH v5 1/8] ext4: factor out codes to update block bitmap and group descriptor on disk from ext4_mb_mark_bb



on 7/22/2023 2:24 PM, Ritesh Harjani wrote:
> Kemeng Shi <[email protected]> writes:
>
>> There are several reasons to add a general function to update block
>> bitmap and group descriptor on disk:
>> 1. pair behavior of alloc/free bits. For example,
>> ext4_mb_new_blocks_simple will update free_clusters in struct flex_groups
>> in ext4_mb_mark_bb while ext4_free_blocks_simple forgets this.
>> 2. remove repeat code to read from disk, update and write back to disk.
>> 3. reduce future unit test mocks to catch real IO to update structure
>> on disk.
>
> Thanks for the cleanup and sorry that I am starting to review this
> series only now. However I do have some review comments to understand a
> bit more on the patch series.
>
>>
>> Signed-off-by: Kemeng Shi <[email protected]>
>> Reviewed-by: Ojaswin Mujoo <[email protected]>
>> ---
>> fs/ext4/mballoc.c | 157 +++++++++++++++++++++++++---------------------
>> 1 file changed, 87 insertions(+), 70 deletions(-)
>>
>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index a2475b8c9fb5..58864a9116c0 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -3948,6 +3948,86 @@ void ext4_exit_mballoc(void)
>> ext4_groupinfo_destroy_slabs();
>> }
>>
>> +struct ext4_mark_context {
>> + struct super_block *sb;
>> + int state;
>> +};
>
> It's not totally clear the intention behind this structure from above
> since it lacking any comments.
>
> Can you please help me understand why do we need this.
> I still don't know whether we require this structure and what is it's
> purpose. Is it only for reducing the number of variable passing?
Exactly. It's only for reducing the number of variable passing.
> Let me do more reading...
>
> ...On more reading, I was previous considering to rename it to something
> like ext4_mb_mark_context, but then I realized the naming of this is
> something similar to ext4_allocation_context. So we may keep the naming
> as is.
Exactly again. The ext4_mark_context is based on ext4_allocation_context.
> So since this structure, presumably, is used for marking blk bits for
> mballoc. Why don't we pass useful information which is relevant for
> this operation like -
>
> ext4_mark_context {
> ext4_group_t mc_group; /* block group */
> ext4_grpblk_t mc_clblk; /* block in cluster units */
> ext4_grpblk_t mc_cllen; /* len in cluster units */
> ext4_grpblk_t mc_clupdates; /* number of clusters marked/unmarked */
> unsigned int mc_flags; /* flags ... */
> bool mc_state; /* to set or unset state */
> };
>
> Maybe, super_block and handle we can pass as an argument as those doesn't
> define the ext4_mark_context for mballoc.
Actually, I try to put stable arguments need by bit mark into
ext4_mark_context then ext4_mark_context could be initialized once and used
multiple times. For example, if there is function to mark multiple bit
fragments, it will use ext4_allocation_context as:
struct ext4_mark_context mc = {
/* initialization */
}
/* mark fragment1 */
ext4_mb_mark_group_bb(&mc, group1, blkoff1, len1);
/* mark fragment2 */
ext4_mb_mark_group_bb(&mc, group2, blkoff2, len2);
And I thinks these stable arguments match "context" meaning which bit
mark needed to work around :).

Put bit mark relevant information into ext4_mark_context is absolutely
a great choice. I will arrange ext4_mark_context in this way if you
still prefer this.

> Since this structure is prepared not at the begining of any function, we
> may need a prepare function for it. e.g.
>
> static void ext4_mb_prepare_mark_context(&mc, ...)
> static int ext4_mb_mark_context(sb, handle, &mc); (instead of ext4_mb_mark_group_bb())
>
> Does this sounds better to you? Thoughts?
>
Yes, prepare function is a great idea. I will add this in next version.
> Otherwise I think having a common function for mb_mark_context looks
> like a nice cleanup.
>
Thanks! this means a lot to me!


--
Best wishes
Kemeng Shi