2022-12-19 13:14:51

by Li Dongyang

[permalink] [raw]
Subject: [PATCH] e2fsck: optimize clone_file on large devices

When cloning multiply-claimed blocks for an inode,
clone_file() uses ext2fs_block_iterate3() to iterate
every block calling clone_file_block().
clone_file_block() calls check_if_fs_cluster(), even
the block is not on the block_dup_map, which could take
a long time on a large device.

Only check if it's metadata block when we need to clone
it.

Test block_metadata_map in check_if_fs_block()
and check_if_fs_cluster(), so we don't need to go over
each bg every time. The metadata blocks are already
marked in the bitmap.

Before this patch on a 500TB device with 3 files having
3 multiply-claimed blocks between them, pass1b is stuck
for more than 48 hours without progressing,
before e2fsck was terminated.
After this patch pass1b could finish in 180 seconds.

Signed-off-by: Li Dongyang <[email protected]>
---
e2fsck/pass1b.c | 73 ++++++-------------------------------
tests/f_dup_resize/expect.1 | 3 +-
2 files changed, 13 insertions(+), 63 deletions(-)

diff --git a/e2fsck/pass1b.c b/e2fsck/pass1b.c
index 92c746c1d..950af5be0 100644
--- a/e2fsck/pass1b.c
+++ b/e2fsck/pass1b.c
@@ -90,7 +90,7 @@ static void delete_file(e2fsck_t ctx, ext2_ino_t ino,
struct dup_inode *dp, char *block_buf);
static errcode_t clone_file(e2fsck_t ctx, ext2_ino_t ino,
struct dup_inode *dp, char* block_buf);
-static int check_if_fs_block(e2fsck_t ctx, blk64_t test_block);
+static int check_if_fs_block(e2fsck_t ctx, blk64_t block);
static int check_if_fs_cluster(e2fsck_t ctx, blk64_t cluster);

static void pass1b(e2fsck_t ctx, char *block_buf);
@@ -815,8 +815,6 @@ static int clone_file_block(ext2_filsys fs,
should_write = 0;

c = EXT2FS_B2C(fs, blockcnt);
- if (check_if_fs_cluster(ctx, EXT2FS_B2C(fs, *block_nr)))
- is_meta = 1;

if (c == cs->dup_cluster && cs->alloc_block) {
new_block = cs->alloc_block;
@@ -894,6 +892,8 @@ cluster_alloc_ok:
return BLOCK_ABORT;
}
}
+ if (check_if_fs_cluster(ctx, EXT2FS_B2C(fs, *block_nr)))
+ is_meta = 1;
cs->save_dup_cluster = (is_meta ? NULL : p);
cs->save_blocknr = *block_nr;
*block_nr = new_block;
@@ -1021,37 +1021,9 @@ errout:
* This routine returns 1 if a block overlaps with one of the superblocks,
* group descriptors, inode bitmaps, or block bitmaps.
*/
-static int check_if_fs_block(e2fsck_t ctx, blk64_t test_block)
+static int check_if_fs_block(e2fsck_t ctx, blk64_t block)
{
- ext2_filsys fs = ctx->fs;
- blk64_t first_block;
- dgrp_t i;
-
- first_block = fs->super->s_first_data_block;
- for (i = 0; i < fs->group_desc_count; i++) {
-
- /* Check superblocks/block group descriptors */
- if (ext2fs_bg_has_super(fs, i)) {
- if (test_block >= first_block &&
- (test_block <= first_block + fs->desc_blocks))
- return 1;
- }
-
- /* Check the inode table */
- if ((ext2fs_inode_table_loc(fs, i)) &&
- (test_block >= ext2fs_inode_table_loc(fs, i)) &&
- (test_block < (ext2fs_inode_table_loc(fs, i) +
- fs->inode_blocks_per_group)))
- return 1;
-
- /* Check the bitmap blocks */
- if ((test_block == ext2fs_block_bitmap_loc(fs, i)) ||
- (test_block == ext2fs_inode_bitmap_loc(fs, i)))
- return 1;
-
- first_block += fs->super->s_blocks_per_group;
- }
- return 0;
+ return ext2fs_test_block_bitmap2(ctx->block_metadata_map, block);
}

/*
@@ -1061,37 +1033,14 @@ static int check_if_fs_block(e2fsck_t ctx, blk64_t test_block)
static int check_if_fs_cluster(e2fsck_t ctx, blk64_t cluster)
{
ext2_filsys fs = ctx->fs;
- blk64_t first_block;
- dgrp_t i;
-
- first_block = fs->super->s_first_data_block;
- for (i = 0; i < fs->group_desc_count; i++) {
-
- /* Check superblocks/block group descriptors */
- if (ext2fs_bg_has_super(fs, i)) {
- if (cluster >= EXT2FS_B2C(fs, first_block) &&
- (cluster <= EXT2FS_B2C(fs, first_block +
- fs->desc_blocks)))
- return 1;
- }
+ blk64_t block = EXT2FS_C2B(fs, cluster);
+ int i;

- /* Check the inode table */
- if ((ext2fs_inode_table_loc(fs, i)) &&
- (cluster >= EXT2FS_B2C(fs,
- ext2fs_inode_table_loc(fs, i))) &&
- (cluster <= EXT2FS_B2C(fs,
- ext2fs_inode_table_loc(fs, i) +
- fs->inode_blocks_per_group - 1)))
+ for (i = 0; i < EXT2FS_CLUSTER_RATIO(fs); i++) {
+ if (ext2fs_test_block_bitmap2(ctx->block_metadata_map,
+ block + i))
return 1;
-
- /* Check the bitmap blocks */
- if ((cluster == EXT2FS_B2C(fs,
- ext2fs_block_bitmap_loc(fs, i))) ||
- (cluster == EXT2FS_B2C(fs,
- ext2fs_inode_bitmap_loc(fs, i))))
- return 1;
-
- first_block += fs->super->s_blocks_per_group;
}
+
return 0;
}
diff --git a/tests/f_dup_resize/expect.1 b/tests/f_dup_resize/expect.1
index e0d869795..8a2764d32 100644
--- a/tests/f_dup_resize/expect.1
+++ b/tests/f_dup_resize/expect.1
@@ -11,7 +11,8 @@ Pass 1D: Reconciling multiply-claimed blocks
(There are 1 inodes containing multiply-claimed blocks.)

File /debugfs (inode #12, mod time Mon Apr 11 00:00:00 2005)
- has 4 multiply-claimed block(s), shared with 1 file(s):
+ has 4 multiply-claimed block(s), shared with 2 file(s):
+ <filesystem metadata>
<The group descriptor inode> (inode #7, mod time Mon Apr 11 06:13:20 2005)
Clone multiply-claimed blocks? yes

--
2.37.3


2023-01-12 08:34:19

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH] e2fsck: optimize clone_file on large devices

On Dec 19, 2022, at 6:05 AM, Li Dongyang <[email protected]> wrote:
>
> When cloning multiply-claimed blocks for an inode,
> clone_file() uses ext2fs_block_iterate3() to iterate
> every block calling clone_file_block().
> clone_file_block() calls check_if_fs_cluster(), even
> the block is not on the block_dup_map, which could take
> a long time on a large device.
>
> Only check if it's metadata block when we need to clone
> it.
>
> Test block_metadata_map in check_if_fs_block()
> and check_if_fs_cluster(), so we don't need to go over
> each bg every time. The metadata blocks are already
> marked in the bitmap.
>
> Before this patch on a 500TB device with 3 files having
> 3 multiply-claimed blocks between them, pass1b is stuck
> for more than 48 hours without progressing,
> before e2fsck was terminated.
> After this patch pass1b could finish in 180 seconds.
>
> Signed-off-by: Li Dongyang <[email protected]>

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> e2fsck/pass1b.c | 73 ++++++-------------------------------
> tests/f_dup_resize/expect.1 | 3 +-
> 2 files changed, 13 insertions(+), 63 deletions(-)
>
> diff --git a/e2fsck/pass1b.c b/e2fsck/pass1b.c
> index 92c746c1d..950af5be0 100644
> --- a/e2fsck/pass1b.c
> +++ b/e2fsck/pass1b.c
> @@ -90,7 +90,7 @@ static void delete_file(e2fsck_t ctx, ext2_ino_t ino,
> struct dup_inode *dp, char *block_buf);
> static errcode_t clone_file(e2fsck_t ctx, ext2_ino_t ino,
> struct dup_inode *dp, char* block_buf);
> -static int check_if_fs_block(e2fsck_t ctx, blk64_t test_block);
> +static int check_if_fs_block(e2fsck_t ctx, blk64_t block);
> static int check_if_fs_cluster(e2fsck_t ctx, blk64_t cluster);
>
> static void pass1b(e2fsck_t ctx, char *block_buf);
> @@ -815,8 +815,6 @@ static int clone_file_block(ext2_filsys fs,
> should_write = 0;
>
> c = EXT2FS_B2C(fs, blockcnt);
> - if (check_if_fs_cluster(ctx, EXT2FS_B2C(fs, *block_nr)))
> - is_meta = 1;
>
> if (c == cs->dup_cluster && cs->alloc_block) {
> new_block = cs->alloc_block;
> @@ -894,6 +892,8 @@ cluster_alloc_ok:
> return BLOCK_ABORT;
> }
> }
> + if (check_if_fs_cluster(ctx, EXT2FS_B2C(fs, *block_nr)))
> + is_meta = 1;
> cs->save_dup_cluster = (is_meta ? NULL : p);
> cs->save_blocknr = *block_nr;
> *block_nr = new_block;
> @@ -1021,37 +1021,9 @@ errout:
> * This routine returns 1 if a block overlaps with one of the superblocks,
> * group descriptors, inode bitmaps, or block bitmaps.
> */
> -static int check_if_fs_block(e2fsck_t ctx, blk64_t test_block)
> +static int check_if_fs_block(e2fsck_t ctx, blk64_t block)
> {
> - ext2_filsys fs = ctx->fs;
> - blk64_t first_block;
> - dgrp_t i;
> -
> - first_block = fs->super->s_first_data_block;
> - for (i = 0; i < fs->group_desc_count; i++) {
> -
> - /* Check superblocks/block group descriptors */
> - if (ext2fs_bg_has_super(fs, i)) {
> - if (test_block >= first_block &&
> - (test_block <= first_block + fs->desc_blocks))
> - return 1;
> - }
> -
> - /* Check the inode table */
> - if ((ext2fs_inode_table_loc(fs, i)) &&
> - (test_block >= ext2fs_inode_table_loc(fs, i)) &&
> - (test_block < (ext2fs_inode_table_loc(fs, i) +
> - fs->inode_blocks_per_group)))
> - return 1;
> -
> - /* Check the bitmap blocks */
> - if ((test_block == ext2fs_block_bitmap_loc(fs, i)) ||
> - (test_block == ext2fs_inode_bitmap_loc(fs, i)))
> - return 1;
> -
> - first_block += fs->super->s_blocks_per_group;
> - }
> - return 0;
> + return ext2fs_test_block_bitmap2(ctx->block_metadata_map, block);
> }
>
> /*
> @@ -1061,37 +1033,14 @@ static int check_if_fs_block(e2fsck_t ctx, blk64_t test_block)
> static int check_if_fs_cluster(e2fsck_t ctx, blk64_t cluster)
> {
> ext2_filsys fs = ctx->fs;
> - blk64_t first_block;
> - dgrp_t i;
> -
> - first_block = fs->super->s_first_data_block;
> - for (i = 0; i < fs->group_desc_count; i++) {
> -
> - /* Check superblocks/block group descriptors */
> - if (ext2fs_bg_has_super(fs, i)) {
> - if (cluster >= EXT2FS_B2C(fs, first_block) &&
> - (cluster <= EXT2FS_B2C(fs, first_block +
> - fs->desc_blocks)))
> - return 1;
> - }
> + blk64_t block = EXT2FS_C2B(fs, cluster);
> + int i;
>
> - /* Check the inode table */
> - if ((ext2fs_inode_table_loc(fs, i)) &&
> - (cluster >= EXT2FS_B2C(fs,
> - ext2fs_inode_table_loc(fs, i))) &&
> - (cluster <= EXT2FS_B2C(fs,
> - ext2fs_inode_table_loc(fs, i) +
> - fs->inode_blocks_per_group - 1)))
> + for (i = 0; i < EXT2FS_CLUSTER_RATIO(fs); i++) {
> + if (ext2fs_test_block_bitmap2(ctx->block_metadata_map,
> + block + i))
> return 1;
> -
> - /* Check the bitmap blocks */
> - if ((cluster == EXT2FS_B2C(fs,
> - ext2fs_block_bitmap_loc(fs, i))) ||
> - (cluster == EXT2FS_B2C(fs,
> - ext2fs_inode_bitmap_loc(fs, i))))
> - return 1;
> -
> - first_block += fs->super->s_blocks_per_group;
> }
> +
> return 0;
> }
> diff --git a/tests/f_dup_resize/expect.1 b/tests/f_dup_resize/expect.1
> index e0d869795..8a2764d32 100644
> --- a/tests/f_dup_resize/expect.1
> +++ b/tests/f_dup_resize/expect.1
> @@ -11,7 +11,8 @@ Pass 1D: Reconciling multiply-claimed blocks
> (There are 1 inodes containing multiply-claimed blocks.)
>
> File /debugfs (inode #12, mod time Mon Apr 11 00:00:00 2005)
> - has 4 multiply-claimed block(s), shared with 1 file(s):
> + has 4 multiply-claimed block(s), shared with 2 file(s):
> + <filesystem metadata>
> <The group descriptor inode> (inode #7, mod time Mon Apr 11 06:13:20 2005)
> Clone multiply-claimed blocks? yes
>
> --
> 2.37.3
>


Cheers, Andreas






Attachments:
signature.asc (890.00 B)
Message signed with OpenPGP

2023-01-26 03:51:13

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH] e2fsck: optimize clone_file on large devices

On Tue, 20 Dec 2022 00:05:44 +1100, Li Dongyang wrote:
> When cloning multiply-claimed blocks for an inode,
> clone_file() uses ext2fs_block_iterate3() to iterate
> every block calling clone_file_block().
> clone_file_block() calls check_if_fs_cluster(), even
> the block is not on the block_dup_map, which could take
> a long time on a large device.
>
> [...]

Applied, thanks!

[1/1] e2fsck: optimize clone_file on large devices
commit: 6cae615a47dfe37fe5fd096accb82579813a6366

Best regards,
--
Theodore Ts'o <[email protected]>