2023-04-17 03:05:25

by Kemeng Shi

[permalink] [raw]
Subject: [PATCH v3 15/19] ext4: call ext4_mb_mark_group_bb in ext4_mb_mark_diskspace_used

call ext4_mb_mark_group_bb in ext4_mb_mark_diskspace_used to:
1. remove repeat code to normally update bitmap and group descriptor
on disk.
2. call ext4_mb_mark_group_bb instead of only setting bits in block bitmap
to fix the bitmap. Function ext4_mb_mark_group_bb will also update
checksum of bitmap and other counter along with the bit change to keep
the cosistent with bit change or block bitmap will be marked corrupted as
checksum of bitmap is in inconsistent state.

Signed-off-by: Kemeng Shi <[email protected]>
---
fs/ext4/mballoc.c | 90 +++++++++++++----------------------------------
1 file changed, 24 insertions(+), 66 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c3e620f6eded..bd440614db76 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3846,9 +3846,12 @@ static noinline_for_stack int
ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
handle_t *handle, unsigned int reserv_clstrs)
{
- struct buffer_head *bitmap_bh = NULL;
+ struct ext4_mark_context mc = {
+ .handle = handle,
+ .sb = ac->ac_sb,
+ .state = 1,
+ };
struct ext4_group_desc *gdp;
- struct buffer_head *gdp_bh;
struct ext4_sb_info *sbi;
struct super_block *sb;
ext4_fsblk_t block;
@@ -3860,32 +3863,13 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
sb = ac->ac_sb;
sbi = EXT4_SB(sb);

- bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
- if (IS_ERR(bitmap_bh)) {
- return PTR_ERR(bitmap_bh);
- }
-
- BUFFER_TRACE(bitmap_bh, "getting write access");
- err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
- EXT4_JTR_NONE);
- if (err)
- goto out_err;
-
- err = -EIO;
- gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
+ gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, NULL);
if (!gdp)
- goto out_err;
-
+ return -EIO;
ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
ext4_free_group_clusters(sb, gdp));

- BUFFER_TRACE(gdp_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, sb, gdp_bh, EXT4_JTR_NONE);
- if (err)
- goto out_err;
-
block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
-
len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
@@ -3894,41 +3878,30 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
* Fix the bitmap and return EFSCORRUPTED
* We leak some of the blocks here.
*/
- ext4_lock_group(sb, ac->ac_b_ex.fe_group);
- mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
- ac->ac_b_ex.fe_len);
- ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
- err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
+ err = ext4_mb_mark_group_bb(&mc, ac->ac_b_ex.fe_group,
+ ac->ac_b_ex.fe_start,
+ ac->ac_b_ex.fe_len,
+ 0);
if (!err)
err = -EFSCORRUPTED;
- goto out_err;
+ return err;
}

- ext4_lock_group(sb, ac->ac_b_ex.fe_group);
#ifdef AGGRESSIVE_CHECK
- {
- int i;
- for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
- BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
- bitmap_bh->b_data));
- }
- }
+ err = ext4_mb_mark_group_bb(&mc, ac->ac_b_ex.fe_group,
+ ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len,
+ EXT4_MB_BITMAP_MARKED_CHECK);
+#else
+ err = ext4_mb_mark_group_bb(&mc, ac->ac_b_ex.fe_group,
+ ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len,
+ 0);
#endif
- mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
- ac->ac_b_ex.fe_len);
- if (ext4_has_group_desc_csum(sb) &&
- (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
- ext4_free_group_clusters_set(sb, gdp,
- ext4_free_clusters_after_init(sb,
- ac->ac_b_ex.fe_group, gdp));
- }
- len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
- ext4_free_group_clusters_set(sb, gdp, len);
- ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
- ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
+ if (err && mc.changed == 0)
+ return err;

- ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+#ifdef AGGRESSIVE_CHECK
+ BUG_ON(mc.changed != ac->ac_b_ex.fe_len);
+#endif
percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
/*
* Now reduce the dirty block count also. Should not go negative
@@ -3938,21 +3911,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
percpu_counter_sub(&sbi->s_dirtyclusters_counter,
reserv_clstrs);

- if (sbi->s_log_groups_per_flex) {
- ext4_group_t flex_group = ext4_flex_group(sbi,
- ac->ac_b_ex.fe_group);
- atomic64_sub(ac->ac_b_ex.fe_len,
- &sbi_array_rcu_deref(sbi, s_flex_groups,
- flex_group)->free_clusters);
- }
-
- err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
- if (err)
- goto out_err;
- err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
-
-out_err:
- brelse(bitmap_bh);
return err;
}

--
2.30.0


2023-05-14 09:49:47

by Ojaswin Mujoo

[permalink] [raw]
Subject: Re: [PATCH v3 15/19] ext4: call ext4_mb_mark_group_bb in ext4_mb_mark_diskspace_used

On Mon, Apr 17, 2023 at 07:06:13PM +0800, Kemeng Shi wrote:
> call ext4_mb_mark_group_bb in ext4_mb_mark_diskspace_used to:
> 1. remove repeat code to normally update bitmap and group descriptor
> on disk.
> 2. call ext4_mb_mark_group_bb instead of only setting bits in block bitmap
> to fix the bitmap. Function ext4_mb_mark_group_bb will also update
> checksum of bitmap and other counter along with the bit change to keep
> the cosistent with bit change or block bitmap will be marked corrupted as
> checksum of bitmap is in inconsistent state.
>
> Signed-off-by: Kemeng Shi <[email protected]>

Looks good, feel free to add:

Reviewed-by: Ojaswin Mujoo <[email protected]>

Just a minor suggestion below:
> ---
> fs/ext4/mballoc.c | 90 +++++++++++++----------------------------------
> 1 file changed, 24 insertions(+), 66 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index c3e620f6eded..bd440614db76 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3846,9 +3846,12 @@ static noinline_for_stack int
> ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
> handle_t *handle, unsigned int reserv_clstrs)
> {
> - struct buffer_head *bitmap_bh = NULL;
> + struct ext4_mark_context mc = {
> + .handle = handle,
> + .sb = ac->ac_sb,
> + .state = 1,
> + };
> struct ext4_group_desc *gdp;
> - struct buffer_head *gdp_bh;
> struct ext4_sb_info *sbi;
> struct super_block *sb;
> ext4_fsblk_t block;
> @@ -3860,32 +3863,13 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
> sb = ac->ac_sb;
> sbi = EXT4_SB(sb);
>
> - bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
> - if (IS_ERR(bitmap_bh)) {
> - return PTR_ERR(bitmap_bh);
> - }
> -
> - BUFFER_TRACE(bitmap_bh, "getting write access");
> - err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
> - EXT4_JTR_NONE);
> - if (err)
> - goto out_err;
> -
> - err = -EIO;
> - gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
> + gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, NULL);
> if (!gdp)
> - goto out_err;
> -
> + return -EIO;
> ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
> ext4_free_group_clusters(sb, gdp));
>
> - BUFFER_TRACE(gdp_bh, "get_write_access");
> - err = ext4_journal_get_write_access(handle, sb, gdp_bh, EXT4_JTR_NONE);
> - if (err)
> - goto out_err;
> -
> block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
> -
> len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
> if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
> ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
> @@ -3894,41 +3878,30 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
> * Fix the bitmap and return EFSCORRUPTED
> * We leak some of the blocks here.
> */
> - ext4_lock_group(sb, ac->ac_b_ex.fe_group);
> - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
> - ac->ac_b_ex.fe_len);
> - ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
> - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
> + err = ext4_mb_mark_group_bb(&mc, ac->ac_b_ex.fe_group,
> + ac->ac_b_ex.fe_start,
> + ac->ac_b_ex.fe_len,
> + 0);
> if (!err)
> err = -EFSCORRUPTED;
> - goto out_err;
> + return err;
> }
>
> - ext4_lock_group(sb, ac->ac_b_ex.fe_group);
> #ifdef AGGRESSIVE_CHECK
> - {
> - int i;
> - for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
> - BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
> - bitmap_bh->b_data));
> - }
> - }
> + err = ext4_mb_mark_group_bb(&mc, ac->ac_b_ex.fe_group,
> + ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len,
> + EXT4_MB_BITMAP_MARKED_CHECK);
> +#else
> + err = ext4_mb_mark_group_bb(&mc, ac->ac_b_ex.fe_group,
> + ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len,
> + 0);
> #endif

I think, the refactoring the AGGRESSIVE_CHECK as follows makes the
intent more obvious and easier to read.

#ifdef AGGRESSIVE_CHECK

flags |= EXT4_MB_BITMAP_MARKED_CHECK;

#endif

err = ext4_mb_mark_group_bb(&mc, ac->ac_b_ex.fe_group,
ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len,
flags);

Regards,
ojaswin

> - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
> - ac->ac_b_ex.fe_len);
> - if (ext4_has_group_desc_csum(sb) &&
> - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
> - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
> - ext4_free_group_clusters_set(sb, gdp,
> - ext4_free_clusters_after_init(sb,
> - ac->ac_b_ex.fe_group, gdp));
> - }
> - len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
> - ext4_free_group_clusters_set(sb, gdp, len);
> - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
> - ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
> + if (err && mc.changed == 0)
> + return err;
>
> - ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
> +#ifdef AGGRESSIVE_CHECK
> + BUG_ON(mc.changed != ac->ac_b_ex.fe_len);
> +#endif
> percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
> /*
> * Now reduce the dirty block count also. Should not go negative
> @@ -3938,21 +3911,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
> percpu_counter_sub(&sbi->s_dirtyclusters_counter,
> reserv_clstrs);
>
> - if (sbi->s_log_groups_per_flex) {
> - ext4_group_t flex_group = ext4_flex_group(sbi,
> - ac->ac_b_ex.fe_group);
> - atomic64_sub(ac->ac_b_ex.fe_len,
> - &sbi_array_rcu_deref(sbi, s_flex_groups,
> - flex_group)->free_clusters);
> - }
> -
> - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
> - if (err)
> - goto out_err;
> - err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
> -
> -out_err:
> - brelse(bitmap_bh);
> return err;
> }
>
> --
> 2.30.0
>

2023-05-14 12:07:53

by Kemeng Shi

[permalink] [raw]
Subject: Re: [PATCH v3 15/19] ext4: call ext4_mb_mark_group_bb in ext4_mb_mark_diskspace_used



on 5/14/2023 5:37 PM, Ojaswin Mujoo wrote:
> On Mon, Apr 17, 2023 at 07:06:13PM +0800, Kemeng Shi wrote:
>> call ext4_mb_mark_group_bb in ext4_mb_mark_diskspace_used to:
>> 1. remove repeat code to normally update bitmap and group descriptor
>> on disk.
>> 2. call ext4_mb_mark_group_bb instead of only setting bits in block bitmap
>> to fix the bitmap. Function ext4_mb_mark_group_bb will also update
>> checksum of bitmap and other counter along with the bit change to keep
>> the cosistent with bit change or block bitmap will be marked corrupted as
>> checksum of bitmap is in inconsistent state.
>>
>> Signed-off-by: Kemeng Shi <[email protected]>
>
> Looks good, feel free to add:
>
> Reviewed-by: Ojaswin Mujoo <[email protected]>
>
> Just a minor suggestion below:
>> ---
>> fs/ext4/mballoc.c | 90 +++++++++++++----------------------------------
>> 1 file changed, 24 insertions(+), 66 deletions(-)
>>
>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index c3e620f6eded..bd440614db76 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -3846,9 +3846,12 @@ static noinline_for_stack int
>> ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
>> handle_t *handle, unsigned int reserv_clstrs)
>> {
>> - struct buffer_head *bitmap_bh = NULL;
>> + struct ext4_mark_context mc = {
>> + .handle = handle,
>> + .sb = ac->ac_sb,
>> + .state = 1,
>> + };
>> struct ext4_group_desc *gdp;
>> - struct buffer_head *gdp_bh;
>> struct ext4_sb_info *sbi;
>> struct super_block *sb;
>> ext4_fsblk_t block;
>> @@ -3860,32 +3863,13 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
>> sb = ac->ac_sb;
>> sbi = EXT4_SB(sb);
>>
>> - bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
>> - if (IS_ERR(bitmap_bh)) {
>> - return PTR_ERR(bitmap_bh);
>> - }
>> -
>> - BUFFER_TRACE(bitmap_bh, "getting write access");
>> - err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
>> - EXT4_JTR_NONE);
>> - if (err)
>> - goto out_err;
>> -
>> - err = -EIO;
>> - gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
>> + gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, NULL);
>> if (!gdp)
>> - goto out_err;
>> -
>> + return -EIO;
>> ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
>> ext4_free_group_clusters(sb, gdp));
>>
>> - BUFFER_TRACE(gdp_bh, "get_write_access");
>> - err = ext4_journal_get_write_access(handle, sb, gdp_bh, EXT4_JTR_NONE);
>> - if (err)
>> - goto out_err;
>> -
>> block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
>> -
>> len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
>> if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
>> ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
>> @@ -3894,41 +3878,30 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
>> * Fix the bitmap and return EFSCORRUPTED
>> * We leak some of the blocks here.
>> */
>> - ext4_lock_group(sb, ac->ac_b_ex.fe_group);
>> - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
>> - ac->ac_b_ex.fe_len);
>> - ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
>> - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
>> + err = ext4_mb_mark_group_bb(&mc, ac->ac_b_ex.fe_group,
>> + ac->ac_b_ex.fe_start,
>> + ac->ac_b_ex.fe_len,
>> + 0);
>> if (!err)
>> err = -EFSCORRUPTED;
>> - goto out_err;
>> + return err;
>> }
>>
>> - ext4_lock_group(sb, ac->ac_b_ex.fe_group);
>> #ifdef AGGRESSIVE_CHECK
>> - {
>> - int i;
>> - for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
>> - BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
>> - bitmap_bh->b_data));
>> - }
>> - }
>> + err = ext4_mb_mark_group_bb(&mc, ac->ac_b_ex.fe_group,
>> + ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len,
>> + EXT4_MB_BITMAP_MARKED_CHECK);
>> +#else
>> + err = ext4_mb_mark_group_bb(&mc, ac->ac_b_ex.fe_group,
>> + ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len,
>> + 0);
>> #endif
>
> I think, the refactoring the AGGRESSIVE_CHECK as follows makes the
> intent more obvious and easier to read.
>
> #ifdef AGGRESSIVE_CHECK
>
> flags |= EXT4_MB_BITMAP_MARKED_CHECK;
>
> #endif
>
> err = ext4_mb_mark_group_bb(&mc, ac->ac_b_ex.fe_group,
> ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len,
> flags);
Although this will add a new flags variable declartion, the AGGRESSIVE_CHECK code
looks much better. Thanks for the suggestion, I will refactoring AGGRESSIVE_CHECK
in this way in this patch and patch 16.

> Regards,
> ojaswin
>
>> - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
>> - ac->ac_b_ex.fe_len);
>> - if (ext4_has_group_desc_csum(sb) &&
>> - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
>> - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
>> - ext4_free_group_clusters_set(sb, gdp,
>> - ext4_free_clusters_after_init(sb,
>> - ac->ac_b_ex.fe_group, gdp));
>> - }
>> - len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
>> - ext4_free_group_clusters_set(sb, gdp, len);
>> - ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
>> - ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
>> + if (err && mc.changed == 0)
>> + return err;
>>
>> - ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
>> +#ifdef AGGRESSIVE_CHECK
>> + BUG_ON(mc.changed != ac->ac_b_ex.fe_len);
>> +#endif
>> percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
>> /*
>> * Now reduce the dirty block count also. Should not go negative
>> @@ -3938,21 +3911,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
>> percpu_counter_sub(&sbi->s_dirtyclusters_counter,
>> reserv_clstrs);
>>
>> - if (sbi->s_log_groups_per_flex) {
>> - ext4_group_t flex_group = ext4_flex_group(sbi,
>> - ac->ac_b_ex.fe_group);
>> - atomic64_sub(ac->ac_b_ex.fe_len,
>> - &sbi_array_rcu_deref(sbi, s_flex_groups,
>> - flex_group)->free_clusters);
>> - }
>> -
>> - err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
>> - if (err)
>> - goto out_err;
>> - err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
>> -
>> -out_err:
>> - brelse(bitmap_bh);
>> return err;
>> }
>>
>> --
>> 2.30.0
>>
>

--
Best wishes
Kemeng Shi