2017-02-13 17:30:59

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v2 0/4] Largedir support for e2fsprogs

The INCOMPAT_LARGEDIR feature allows larger directories to
be created, both with directory sizes over 2GB and and a
maximum htree depth of 3 instead of the current limit of 2.
These features are needed in order to exceed the currently
limit of approximately 10M entries in a single directory.

This is second version of the patch set. Changes since v1:
* 4-patch series with cover letter created
* Not allowed for i_size_high to be used for directories
without the LARGEDIR feature
* i_size_high checks are removed only in patch that adds largedir
support.
* PR_2_DIR_ACL_ZERO renamed to PR_2_DIR_SIZE_HIGH_ZERO
* helper functions are added
* redundant check in update_parents() is deleted
* cleanup code is moved to first patch
* test expect wrong ref count after ln
* test manually expands a directory
* code style cleanup

Artem Blagodarenko (4):
e2fsprogs: supersede i_dir_acl with i_size_high for all cases
e2fsprogs: add support for 3-level htree
e2fsck: 3 level hash tree directory optimization
tests: 3 level hash tree test

debugfs/debugfs.c | 16 ++--
debugfs/htree.c | 3 +-
debugfs/set_fields.c | 1 -
e2fsck/e2fsck.h | 1 +
e2fsck/message.c | 4 +-
e2fsck/pass1.c | 5 +-
e2fsck/pass2.c | 77 +++++++++++++-----
e2fsck/problem.c | 6 +-
e2fsck/problem.h | 4 +-
e2fsck/rehash.c | 123 ++++++++++++++++++++++++------
ext2ed/doc/ext2ed-design.sgml | 2 +-
ext2ed/doc/ext2fs-overview.sgml | 2 +-
ext2ed/ext2.descriptors | 2 +-
lib/ext2fs/ext2_fs.h | 9 +-
lib/ext2fs/ext2fs.h | 26 ++++++-
lib/ext2fs/swapfs.c | 2 +-
misc/mke2fs.c | 3 +-
misc/tune2fs.c | 3 +-
po/at-expand.pl | 2 +-
tests/d_fallocate_blkmap/expect | 4 +-
tests/d_inline_dump/expect | 12 ++--
tests/d_special_files/expect | 10 +-
tests/f_badcluster/expect | 14 ++--
tests/f_convert_bmap/expect.1 | 2 +-
tests/f_convert_bmap_and_extent/expect.1 | 2 +-
tests/f_create_symlinks/expect | 8 +-
tests/f_large_dir/debugfs_script | 15 ++++
tests/f_large_dir/expect | 12 +++
tests/f_large_dir/name | 1 +
tests/f_large_dir/script | 28 +++++++
tests/f_recnect_bad/expect.1 | 2 +-
31 files changed, 294 insertions(+), 107 deletions(-)
create mode 100755 tests/f_large_dir/debugfs_script
create mode 100644 tests/f_large_dir/expect
create mode 100644 tests/f_large_dir/name
create mode 100644 tests/f_large_dir/script


2017-02-13 17:31:01

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v2 1/4] e2fsprogs: supersede i_dir_acl with i_size_high for all cases

From: Artem Blagodarenko <[email protected]>

This patch removes i_dir_acl macros and macros users.
Now stucture field can be accessed as i_size_high. This field
is useful for largedir feature.

Signed-off-by: Alexey Lyashkov <[email protected]>
Signed-off-by: Artem Blagodarenko <[email protected]>
---
debugfs/debugfs.c | 16 +++++++---------
debugfs/set_fields.c | 1 -
e2fsck/message.c | 4 ++--
e2fsck/pass1.c | 2 +-
e2fsck/pass2.c | 6 +++---
e2fsck/problem.c | 6 +++---
e2fsck/problem.h | 4 ++--
ext2ed/doc/ext2ed-design.sgml | 2 +-
ext2ed/doc/ext2fs-overview.sgml | 2 +-
ext2ed/ext2.descriptors | 2 +-
lib/ext2fs/ext2_fs.h | 6 ++----
lib/ext2fs/swapfs.c | 2 +-
po/at-expand.pl | 2 +-
tests/d_fallocate_blkmap/expect | 4 ++--
tests/d_inline_dump/expect | 12 ++++++------
tests/d_special_files/expect | 10 +++++-----
tests/f_badcluster/expect | 14 +++++++-------
tests/f_convert_bmap/expect.1 | 2 +-
tests/f_convert_bmap_and_extent/expect.1 | 2 +-
tests/f_create_symlinks/expect | 8 ++++----
tests/f_recnect_bad/expect.1 | 2 +-
21 files changed, 52 insertions(+), 57 deletions(-)

diff --git a/debugfs/debugfs.c b/debugfs/debugfs.c
index 165f924..ba942ce 100644
--- a/debugfs/debugfs.c
+++ b/debugfs/debugfs.c
@@ -841,16 +841,15 @@ void internal_dump_inode(FILE *out, const char *prefix,
fprintf(out, "%d\n", inode->i_size);
if (os == EXT2_OS_HURD)
fprintf(out,
- "%sFile ACL: %d Directory ACL: %d Translator: %d\n",
+ "%sFile ACL: %d Translator: %d\n",
prefix,
- inode->i_file_acl, LINUX_S_ISDIR(inode->i_mode) ? inode->i_dir_acl : 0,
+ inode->i_file_acl,
inode->osd1.hurd1.h_i_translator);
else
- fprintf(out, "%sFile ACL: %llu Directory ACL: %d\n",
+ fprintf(out, "%sFile ACL: %llu\n",
prefix,
inode->i_file_acl | ((long long)
- (inode->osd2.linux2.l_i_file_acl_high) << 32),
- LINUX_S_ISDIR(inode->i_mode) ? inode->i_dir_acl : 0);
+ (inode->osd2.linux2.l_i_file_acl_high) << 32));
if (os != EXT2_OS_HURD)
fprintf(out, "%sLinks: %d Blockcount: %llu\n",
prefix, inode->i_links_count,
@@ -1347,10 +1346,9 @@ void do_modify_inode(int argc, char *argv[])
modify_u32(argv[0], "Reserved1", decimal_format, &inode.i_reserved1);
#endif
modify_u32(argv[0], "File acl", decimal_format, &inode.i_file_acl);
- if (LINUX_S_ISDIR(inode.i_mode))
- modify_u32(argv[0], "Directory acl", decimal_format, &inode.i_dir_acl);
- else
- modify_u32(argv[0], "High 32bits of size", decimal_format, &inode.i_size_high);
+
+ modify_u32(argv[0], "High 32bits of size", decimal_format,
+ &inode.i_size_high);

if (os == EXT2_OS_HURD)
modify_u32(argv[0], "Translator Block",
diff --git a/debugfs/set_fields.c b/debugfs/set_fields.c
index ff9b7b6..ca68862 100644
--- a/debugfs/set_fields.c
+++ b/debugfs/set_fields.c
@@ -212,7 +212,6 @@ static struct field_set_info inode_fields[] = {
/* Special case: i_file_acl_high is 2 bytes */
{ "file_acl", &set_inode.i_file_acl,
&set_inode.osd2.linux2.l_i_file_acl_high, 6, parse_uint },
- { "dir_acl", &set_inode.i_dir_acl, NULL, 4, parse_uint, FLAG_ALIAS },
{ "faddr", &set_inode.i_faddr, NULL, 4, parse_uint },
{ "frag", &set_inode.osd2.hurd2.h_i_frag, NULL, 1, parse_uint, FLAG_ALIAS },
{ "fsize", &set_inode.osd2.hurd2.h_i_fsize, NULL, 1, parse_uint },
diff --git a/e2fsck/message.c b/e2fsck/message.c
index 1c3fcd8..34201a3 100644
--- a/e2fsck/message.c
+++ b/e2fsck/message.c
@@ -32,7 +32,7 @@
* %IM <inode> -> i_mtime
* %IF <inode> -> i_faddr
* %If <inode> -> i_file_acl
- * %Id <inode> -> i_dir_acl
+ * %Id <inode> -> i_size_high
* %Iu <inode> -> i_uid
* %Ig <inode> -> i_gid
* %It <inode type>
@@ -320,7 +320,7 @@ static _INLINE_ void expand_inode_expression(FILE *f, ext2_filsys fs, char ch,
break;
case 'd':
fprintf(f, "%u", (LINUX_S_ISDIR(inode->i_mode) ?
- inode->i_dir_acl : 0));
+ inode->i_size_high : 0));
break;
case 'u':
fprintf(f, "%d", inode_uid(*inode));
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 8ef40f6..ce37176 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -1716,7 +1716,7 @@ void e2fsck_pass1(e2fsck_t ctx)
}

if (inode->i_faddr || frag || fsize ||
- (LINUX_S_ISDIR(inode->i_mode) && inode->i_dir_acl))
+ (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high))
mark_inode_bad(ctx, ino);
if ((fs->super->s_creator_os != EXT2_OS_HURD) &&
!ext2fs_has_feature_64bit(fs->super) &&
diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
index 11c19e8..b89ebc9 100644
--- a/e2fsck/pass2.c
+++ b/e2fsck/pass2.c
@@ -1811,10 +1811,10 @@ int e2fsck_process_bad_inode(e2fsck_t ctx, ext2_ino_t dir,
} else
not_fixed++;
}
- if (inode.i_dir_acl &&
+ if (inode.i_size_high &&
LINUX_S_ISDIR(inode.i_mode)) {
- if (fix_problem(ctx, PR_2_DIR_ACL_ZERO, &pctx)) {
- inode.i_dir_acl = 0;
+ if (fix_problem(ctx, PR_2_DIR_SIZE_HIGH_ZERO, &pctx)) {
+ inode.i_size_high = 0;
inode_modified++;
} else
not_fixed++;
diff --git a/e2fsck/problem.c b/e2fsck/problem.c
index 34a671e..8b28819 100644
--- a/e2fsck/problem.c
+++ b/e2fsck/problem.c
@@ -1360,9 +1360,9 @@ static struct e2fsck_problem problem_table[] = {
N_("i_file_acl @F %If, @s zero.\n"),
PROMPT_CLEAR, 0 },

- /* i_dir_acl should be zero */
- { PR_2_DIR_ACL_ZERO,
- N_("i_dir_acl @F %Id, @s zero.\n"),
+ /* i_size_high should be zero */
+ { PR_2_DIR_SIZE_HIGH_ZERO,
+ N_("i_size_high @F %Id, @s zero.\n"),
PROMPT_CLEAR, 0 },

/* i_frag should be zero */
diff --git a/e2fsck/problem.h b/e2fsck/problem.h
index 86cb614..3306560 100644
--- a/e2fsck/problem.h
+++ b/e2fsck/problem.h
@@ -808,8 +808,8 @@ struct problem_context {
/* i_file_acl should be zero */
#define PR_2_FILE_ACL_ZERO 0x02000E

-/* i_dir_acl should be zero */
-#define PR_2_DIR_ACL_ZERO 0x02000F
+/* i_size_high should be zero */
+#define PR_2_DIR_SIZE_HIGH_ZERO 0x02000F

/* i_frag should be zero */
#define PR_2_FRAG_ZERO 0x020010
diff --git a/ext2ed/doc/ext2ed-design.sgml b/ext2ed/doc/ext2ed-design.sgml
index ad2df96..7841358 100644
--- a/ext2ed/doc/ext2ed-design.sgml
+++ b/ext2ed/doc/ext2ed-design.sgml
@@ -2726,7 +2726,7 @@ struct ext2_inode {
__u32 i_block[EXT2_N_BLOCKS]; /* Pointers to blocks */
__u32 i_version; /* File version (for NFS) */
__u32 i_file_acl; /* File ACL */
- __u32 i_dir_acl; /* Directory ACL */
+ __u32 i_size_high; /* High 32bits of size */
__u32 i_faddr; /* Fragment address */
union {
struct {
diff --git a/ext2ed/doc/ext2fs-overview.sgml b/ext2ed/doc/ext2fs-overview.sgml
index a6ebf5a..900c393 100644
--- a/ext2ed/doc/ext2fs-overview.sgml
+++ b/ext2ed/doc/ext2fs-overview.sgml
@@ -487,7 +487,7 @@ struct ext2_inode {
__u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
__u32 i_version; /* File version (for NFS) */
__u32 i_file_acl; /* File ACL */
- __u32 i_dir_acl; /* Directory ACL */
+ __u32 i_size_high; /* High 32bits of size */
__u32 i_faddr; /* Fragment address */
union {
struct {
diff --git a/ext2ed/ext2.descriptors b/ext2ed/ext2.descriptors
index bf927b0..b1ac4c4 100644
--- a/ext2ed/ext2.descriptors
+++ b/ext2ed/ext2.descriptors
@@ -102,7 +102,7 @@ struct ext2_inode {
__u32 i_block[14]; /* Pointers to blocks */
__u32 i_version; /* File version (for NFS) */
__u32 i_file_acl; /* File ACL */
- __u32 i_dir_acl; /* Directory ACL */
+ __u32 i_size_high; /* High 32bits of size */
__u32 i_faddr; /* Fragment address */
__u8 l_i_frag; /* Fragment number */
__u8 l_i_fsize; /* Fragment size */
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 27a7d3a..195e366 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -398,7 +398,7 @@ struct ext2_inode {
__u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
__u32 i_generation; /* File version (for NFS) */
__u32 i_file_acl; /* File ACL */
- __u32 i_size_high; /* Formerly i_dir_acl, directory ACL */
+ __u32 i_size_high;
__u32 i_faddr; /* Fragment address */
union {
struct {
@@ -446,7 +446,7 @@ struct ext2_inode_large {
__u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
__u32 i_generation; /* File version (for NFS) */
__u32 i_file_acl; /* File ACL */
- __u32 i_size_high; /* Formerly i_dir_acl, directory ACL */
+ __u32 i_size_high;
__u32 i_faddr; /* Fragment address */
union {
struct {
@@ -484,8 +484,6 @@ struct ext2_inode_large {
#define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)

-#define i_dir_acl i_size_high
-
#define i_checksum_lo osd2.linux2.l_i_checksum_lo

#define inode_includes(size, field) \
diff --git a/lib/ext2fs/swapfs.c b/lib/ext2fs/swapfs.c
index d63fc55..2d05ee7 100644
--- a/lib/ext2fs/swapfs.c
+++ b/lib/ext2fs/swapfs.c
@@ -247,7 +247,7 @@ void ext2fs_swap_inode_full(ext2_filsys fs, struct ext2_inode_large *t,
has_extents = 1;
if (!hostorder && (t->i_flags & EXT4_INLINE_DATA_FL))
has_inline_data = 1;
- t->i_dir_acl = ext2fs_swab32(f->i_dir_acl);
+ t->i_size_high = ext2fs_swab32(f->i_size_high);
/*
* Extent data and inline data are swapped on access, not here
*/
diff --git a/po/at-expand.pl b/po/at-expand.pl
index bc1a744..47e4ebd 100644
--- a/po/at-expand.pl
+++ b/po/at-expand.pl
@@ -45,7 +45,7 @@ my @translator_help = (
"#. %IM <inode> -> i_mtime\n",
"#. %IF <inode> -> i_faddr\n",
"#. %If <inode> -> i_file_acl\n",
- "#. %Id <inode> -> i_dir_acl\n",
+ "#. %Id <inode> -> i_size_high\n",
"#. %Iu <inode> -> i_uid\n",
"#. %Ig <inode> -> i_gid\n",
"#. %It <str> file type\n",
diff --git a/tests/d_fallocate_blkmap/expect b/tests/d_fallocate_blkmap/expect
index 8ce79ff..f588511 100644
--- a/tests/d_fallocate_blkmap/expect
+++ b/tests/d_fallocate_blkmap/expect
@@ -18,7 +18,7 @@ debugfs: stat /a
Inode: 12 Type: regular Mode: 0666 Flags: 0x0
Generation: 0 Version: 0x00000000:00000000
User: 0 Group: 0 Project: 0 Size: 40960
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 82
Fragment: Address: 0 Number: 0 Size: 0
Size of extra inode fields: 32
@@ -30,7 +30,7 @@ debugfs: stat /b
Inode: 13 Type: regular Mode: 0666 Flags: 0x0
Generation: 0 Version: 0x00000000:00000000
User: 0 Group: 0 Project: 0 Size: 10240000
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 20082
Fragment: Address: 0 Number: 0 Size: 0
Size of extra inode fields: 32
diff --git a/tests/d_inline_dump/expect b/tests/d_inline_dump/expect
index c84f64d..f0ba471 100644
--- a/tests/d_inline_dump/expect
+++ b/tests/d_inline_dump/expect
@@ -2,7 +2,7 @@
Inode: 13 Type: regular Mode: 0644 Flags: 0x10000000
Generation: 3289262644 Version: 0x00000000:00000001
User: 0 Group: 0 Size: 80
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x53cec6b4:c72e3c00 -- Tue Jul 22 20:16:52 2014
@@ -18,7 +18,7 @@ Size of inline data: 80
Inode: 18 Type: regular Mode: 0644 Flags: 0x10000000
Generation: 3842229473 Version: 0x00000000:00000001
User: 0 Group: 0 Size: 20
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x53cec6b4:cafecc00 -- Tue Jul 22 20:16:52 2014
@@ -35,7 +35,7 @@ Size of inline data: 60
Inode: 16 Type: directory Mode: 0755 Flags: 0x10000000
Generation: 3842229469 Version: 0x00000000:00000004
User: 0 Group: 0 Size: 132
-File ACL: 7 Directory ACL: 0
+File ACL: 7
Links: 2 Blockcount: 8
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x53cec6e3:27eac000 -- Tue Jul 22 20:17:39 2014
@@ -51,7 +51,7 @@ Size of inline data: 132
Inode: 20 Type: directory Mode: 0755 Flags: 0x10000000
Generation: 3710818931 Version: 0x00000000:00000001
User: 0 Group: 0 Size: 60
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 2 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x53cec6b4:ca0aa800 -- Tue Jul 22 20:16:52 2014
@@ -68,7 +68,7 @@ Size of inline data: 60
Inode: 12 Type: symlink Mode: 0777 Flags: 0x10000000
Generation: 3289262643 Version: 0x00000000:00000001
User: 0 Group: 0 Size: 80
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x53cec47f:724db800 -- Tue Jul 22 20:07:27 2014
@@ -83,7 +83,7 @@ Fast link dest: "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Inode: 19 Type: symlink Mode: 0777 Flags: 0x0
Generation: 3842229474 Version: 0x00000000:00000001
User: 0 Group: 0 Size: 20
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x53cec44c:a1fcc000 -- Tue Jul 22 20:06:36 2014
diff --git a/tests/d_special_files/expect b/tests/d_special_files/expect
index f729b0f..c825932 100644
--- a/tests/d_special_files/expect
+++ b/tests/d_special_files/expect
@@ -5,7 +5,7 @@ debugfs -R ''stat foo'' -w test.img
Inode: 12 Type: symlink Mode: 0777 Flags: 0x0
Generation: 0 Version: 0x00000000
User: 0 Group: 0 Size: 3
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x50f560e0 -- Tue Jan 15 14:00:00 2013
@@ -17,7 +17,7 @@ debugfs -R ''stat foo2'' -w test.img
Inode: 13 Type: symlink Mode: 0777 Flags: 0x0
Generation: 0 Version: 0x00000000
User: 0 Group: 0 Size: 80
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 2
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x50f560e0 -- Tue Jan 15 14:00:00 2013
@@ -42,7 +42,7 @@ debugfs -R ''stat pipe'' -w test.img
Inode: 14 Type: FIFO Mode: 0000 Flags: 0x0
Generation: 0 Version: 0x00000000
User: 0 Group: 0 Size: 0
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x50f560e0 -- Tue Jan 15 14:00:00 2013
@@ -55,7 +55,7 @@ debugfs -R ''stat sda'' -w test.img
Inode: 15 Type: block special Mode: 0000 Flags: 0x0
Generation: 0 Version: 0x00000000
User: 0 Group: 0 Size: 0
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x50f560e0 -- Tue Jan 15 14:00:00 2013
@@ -67,7 +67,7 @@ debugfs -R ''stat null'' -w test.img
Inode: 16 Type: character special Mode: 0000 Flags: 0x0
Generation: 0 Version: 0x00000000
User: 0 Group: 0 Size: 0
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x50f560e0 -- Tue Jan 15 14:00:00 2013
diff --git a/tests/f_badcluster/expect b/tests/f_badcluster/expect
index 65a1641..75a3820 100644
--- a/tests/f_badcluster/expect
+++ b/tests/f_badcluster/expect
@@ -116,7 +116,7 @@ debugfs: stat /a
Inode: 12 Type: regular Mode: 0644 Flags: 0x80000
Generation: 1117152157 Version: 0x00000001
User: 0 Group: 0 Size: 3072
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 32
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
@@ -128,7 +128,7 @@ debugfs: stat /b
Inode: 13 Type: regular Mode: 0644 Flags: 0x80000
Generation: 1117152158 Version: 0x00000001
User: 0 Group: 0 Size: 3072
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 32
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
@@ -140,7 +140,7 @@ debugfs: stat /c
Inode: 14 Type: regular Mode: 0644 Flags: 0x80000
Generation: 1117152159 Version: 0x00000001
User: 0 Group: 0 Size: 3072
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 32
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
@@ -152,7 +152,7 @@ debugfs: stat /d
Inode: 15 Type: regular Mode: 0644 Flags: 0x80000
Generation: 1117152160 Version: 0x00000001
User: 0 Group: 0 Size: 3072
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
@@ -163,7 +163,7 @@ debugfs: stat /e
Inode: 16 Type: regular Mode: 0644 Flags: 0x80000
Generation: 1117152161 Version: 0x00000001
User: 0 Group: 0 Size: 6144
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 32
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
@@ -175,7 +175,7 @@ debugfs: stat /f
Inode: 17 Type: regular Mode: 0644 Flags: 0x80000
Generation: 1117152162 Version: 0x00000001
User: 0 Group: 0 Size: 3072
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 32
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
@@ -187,7 +187,7 @@ debugfs: stat /g
Inode: 18 Type: regular Mode: 0644 Flags: 0x80000
Generation: 1117152163 Version: 0x00000001
User: 0 Group: 0 Size: 3072
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 32
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
diff --git a/tests/f_convert_bmap/expect.1 b/tests/f_convert_bmap/expect.1
index 7d2ca86..0291f94 100644
--- a/tests/f_convert_bmap/expect.1
+++ b/tests/f_convert_bmap/expect.1
@@ -2,7 +2,7 @@ debugfs: stat /a
Inode: 12 Type: regular Mode: 0644 Flags: 0x0
Generation: 1573716129 Version: 0x00000000:00000001
User: 0 Group: 0 Size: 524288
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 1030
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x5457f87a:62ae2980 -- Mon Nov 3 21:49:46 2014
diff --git a/tests/f_convert_bmap_and_extent/expect.1 b/tests/f_convert_bmap_and_extent/expect.1
index 7af91aa..eb55db7 100644
--- a/tests/f_convert_bmap_and_extent/expect.1
+++ b/tests/f_convert_bmap_and_extent/expect.1
@@ -2,7 +2,7 @@ debugfs: stat /a
Inode: 12 Type: regular Mode: 0644 Flags: 0x0
Generation: 1573716129 Version: 0x00000000:00000001
User: 0 Group: 0 Size: 524288
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 1030
Fragment: Address: 0 Number: 0 Size: 0
ctime: 0x5457f87a:62ae2980 -- Mon Nov 3 21:49:46 2014
diff --git a/tests/f_create_symlinks/expect b/tests/f_create_symlinks/expect
index dca6e92..4409385 100644
--- a/tests/f_create_symlinks/expect
+++ b/tests/f_create_symlinks/expect
@@ -20,7 +20,7 @@ debugfs -R "stat /l_30" test.img
Inode: 12 Type: symlink Mode: 0777 Flags: 0x0
Generation: 0 Version: 0x00000000:00000000
User: 0 Group: 0 Project: 0 Size: 31
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
Size of extra inode fields: 32
@@ -29,7 +29,7 @@ debugfs -R "stat /l_70" test.img
Inode: 13 Type: symlink Mode: 0777 Flags: 0x10000000
Generation: 0 Version: 0x00000000:00000000
User: 0 Group: 0 Project: 0 Size: 71
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 0
Fragment: Address: 0 Number: 0 Size: 0
Size of extra inode fields: 32
@@ -40,7 +40,7 @@ debugfs -R "stat /l_500" test.img
Inode: 14 Type: symlink Mode: 0777 Flags: 0x80000
Generation: 0 Version: 0x00000000:00000000
User: 0 Group: 0 Project: 0 Size: 501
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 2
Fragment: Address: 0 Number: 0 Size: 0
Size of extra inode fields: 32
@@ -50,7 +50,7 @@ debugfs -R "stat /l_1023" test.img
Inode: 15 Type: symlink Mode: 0777 Flags: 0x80000
Generation: 0 Version: 0x00000000:00000000
User: 0 Group: 0 Project: 0 Size: 1024
-File ACL: 0 Directory ACL: 0
+File ACL: 0
Links: 1 Blockcount: 2
Fragment: Address: 0 Number: 0 Size: 0
Size of extra inode fields: 32
diff --git a/tests/f_recnect_bad/expect.1 b/tests/f_recnect_bad/expect.1
index 8ba81e6..d4f72a1 100644
--- a/tests/f_recnect_bad/expect.1
+++ b/tests/f_recnect_bad/expect.1
@@ -3,7 +3,7 @@ Pass 2: Checking directory structure
i_faddr for inode 15 (/test/quux) is 23, should be zero.
Clear? yes

-i_dir_acl for inode 15 (/test/quux) is 12, should be zero.
+i_size_high for inode 15 (/test/quux) is 12, should be zero.
Clear? yes

i_file_acl for inode 13 (/test/???) is 12, should be zero.
--
1.7.1

2017-02-13 17:31:02

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v2 2/4] e2fsprogs: add support for 3-level htree

From: Artem Blagodarenko <[email protected]>

The INCOMPAT_LARGEDIR feature allows larger directories to
be created, both with directory sizes over 2GB and and a
maximum htree depth of 3 instead of the current limit of 2.
These features are needed in order to exceed the currently
limit of approximately 10M entries in a single directory.

debugfs, e2fsck, ext2fs, mke2fs and tune2fs support is
added.

Signed-off-by: Alexey Lyashkov <[email protected]>
Signed-off-by: Artem Blagodarenko <[email protected]>
---
e2fsck/pass1.c | 5 +++--
e2fsck/pass2.c | 5 +++--
lib/ext2fs/ext2_fs.h | 3 ++-
lib/ext2fs/ext2fs.h | 21 ++++++++++++++++++++-
misc/mke2fs.c | 3 ++-
misc/tune2fs.c | 3 ++-
6 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index ce37176..fff7dcf 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -1716,7 +1716,8 @@ void e2fsck_pass1(e2fsck_t ctx)
}

if (inode->i_faddr || frag || fsize ||
- (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high))
+ (!ext2fs_has_feature_large_dir(fs) &&
+ (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high)))
mark_inode_bad(ctx, ino);
if ((fs->super->s_creator_os != EXT2_OS_HURD) &&
!ext2fs_has_feature_64bit(fs->super) &&
@@ -2469,7 +2470,7 @@ static int handle_htree(e2fsck_t ctx, struct problem_context *pctx,
return 1;

pctx->num = root->indirect_levels;
- if ((root->indirect_levels > 1) &&
+ if ((root->indirect_levels > ext2_dir_htree_level(fs)) &&
fix_problem(ctx, PR_1_HTREE_DEPTH, pctx))
return 1;

diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
index b89ebc9..139d48f 100644
--- a/e2fsck/pass2.c
+++ b/e2fsck/pass2.c
@@ -1058,7 +1058,8 @@ inline_read_fail:
dx_db->flags |= DX_FLAG_FIRST | DX_FLAG_LAST;
if ((root->reserved_zero ||
root->info_length < 8 ||
- root->indirect_levels > 1) &&
+ root->indirect_levels
+ > ext2_dir_htree_level(fs)) &&
fix_problem(ctx, PR_2_HTREE_BAD_ROOT, &cd->pctx)) {
clear_htree(ctx, ino);
dx_dir->numblocks = 0;
@@ -1811,7 +1812,7 @@ int e2fsck_process_bad_inode(e2fsck_t ctx, ext2_ino_t dir,
} else
not_fixed++;
}
- if (inode.i_size_high &&
+ if (inode.i_size_high && !ext2fs_has_feature_large_dir(fs) &&
LINUX_S_ISDIR(inode.i_mode)) {
if (fix_problem(ctx, PR_2_DIR_SIZE_HIGH_ZERO, &pctx)) {
inode.i_size_high = 0;
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 195e366..6d9a5d0 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -921,7 +921,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, 4, ENCRYPT)

#define EXT2_FEATURE_COMPAT_SUPP 0
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \
- EXT4_FEATURE_INCOMPAT_MMP)
+ EXT4_FEATURE_INCOMPAT_MMP|\
+ EXT4_FEATURE_INCOMPAT_LARGEDIR)
#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 786ded8..d714b44 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -588,7 +588,8 @@ typedef struct ext2_icount *ext2_icount_t;
EXT4_FEATURE_INCOMPAT_64BIT|\
EXT4_FEATURE_INCOMPAT_INLINE_DATA|\
EXT4_FEATURE_INCOMPAT_ENCRYPT|\
- EXT4_FEATURE_INCOMPAT_CSUM_SEED)
+ EXT4_FEATURE_INCOMPAT_CSUM_SEED|\
+ EXT4_FEATURE_INCOMPAT_LARGEDIR)

#define EXT2_LIB_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
EXT4_FEATURE_RO_COMPAT_HUGE_FILE|\
@@ -1924,6 +1925,24 @@ _INLINE_ blk_t ext2fs_inode_data_blocks(ext2_filsys fs,
return (blk_t) ext2fs_inode_data_blocks2(fs, inode);
}

+/* htree levels for ext4 */
+#define EXT4_HTREE_LEVEL_COMPAT 1
+#define EXT4_HTREE_LEVEL 3
+
+_INLINE_ int ext2fs_has_feature_large_dir(ext2_filsys fs)
+{
+ return EXT2_HAS_INCOMPAT_FEATURE(fs->super,
+ EXT4_FEATURE_INCOMPAT_LARGEDIR);
+}
+
+_INLINE_ unsigned int ext2_dir_htree_level(ext2_filsys fs)
+{
+ if (ext2fs_has_feature_large_dir(fs))
+ return EXT4_HTREE_LEVEL;
+
+ return EXT4_HTREE_LEVEL_COMPAT;
+}
+
/*
* This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b)
*/
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 9f18c83..b2bf461 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -1081,7 +1081,8 @@ static __u32 ok_features[3] = {
EXT4_FEATURE_INCOMPAT_64BIT|
EXT4_FEATURE_INCOMPAT_INLINE_DATA|
EXT4_FEATURE_INCOMPAT_ENCRYPT |
- EXT4_FEATURE_INCOMPAT_CSUM_SEED,
+ EXT4_FEATURE_INCOMPAT_CSUM_SEED |
+ EXT4_FEATURE_INCOMPAT_LARGEDIR,
/* R/O compat */
EXT2_FEATURE_RO_COMPAT_LARGE_FILE|
EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
diff --git a/misc/tune2fs.c b/misc/tune2fs.c
index 6239577..f78d105 100644
--- a/misc/tune2fs.c
+++ b/misc/tune2fs.c
@@ -156,7 +156,8 @@ static __u32 ok_features[3] = {
EXT4_FEATURE_INCOMPAT_MMP |
EXT4_FEATURE_INCOMPAT_64BIT |
EXT4_FEATURE_INCOMPAT_ENCRYPT |
- EXT4_FEATURE_INCOMPAT_CSUM_SEED,
+ EXT4_FEATURE_INCOMPAT_CSUM_SEED |
+ EXT4_FEATURE_INCOMPAT_LARGEDIR,
/* R/O compat */
EXT2_FEATURE_RO_COMPAT_LARGE_FILE |
EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
--
1.7.1

2017-02-13 17:31:06

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v2 4/4] tests: 3 level hash tree test

From: Artem Blagodarenko <[email protected]>

Test is added that recreate directory (-fD fsck option)
with 47.5k of 255-symbol name files. This amount of files
can not be stored only in 2 hevel htree, so 3 levels are used.

Signed-off-by: Artem Blagodarenko <[email protected]>
---
tests/f_large_dir/debugfs_script | 15 +++++++++++++++
tests/f_large_dir/expect | 12 ++++++++++++
tests/f_large_dir/name | 1 +
tests/f_large_dir/script | 28 ++++++++++++++++++++++++++++
4 files changed, 56 insertions(+), 0 deletions(-)

diff --git a/tests/f_large_dir/debugfs_script b/tests/f_large_dir/debugfs_script
new file mode 100755
index 0000000..b869db5
--- /dev/null
+++ b/tests/f_large_dir/debugfs_script
@@ -0,0 +1,15 @@
+#!/bin/bash
+echo "feature large_dir"
+echo "mkdir /foo"
+echo "cd /foo"
+touch foofile
+echo "write foofile foofile"
+for i in $(seq 47300); do
+ [[ $(( $i % 3 )) -eq 0 ]] && \
+ echo "expand ./"
+ [[ $(( $i % 5000 )) -eq 0 ]] && \
+ >&2 echo "$i processed"
+ new_uuid=`printf %0255X $i`
+ echo "ln foofile $new_uuid"
+done
+
diff --git a/tests/f_large_dir/expect b/tests/f_large_dir/expect
new file mode 100644
index 0000000..9c94675
--- /dev/null
+++ b/tests/f_large_dir/expect
@@ -0,0 +1,12 @@
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 3A: Optimizing directories
+Pass 4: Checking reference counts
+Inode 13 ref count is 1, should be 47301. Fix? yes
+
+Pass 5: Checking group summary information
+
+test.img: ***** FILE SYSTEM WAS MODIFIED *****
+test.img: 13/115368 files (0.0% non-contiguous), 32839/460800 blocks
+Exit status is 1
diff --git a/tests/f_large_dir/name b/tests/f_large_dir/name
new file mode 100644
index 0000000..4b96890
--- /dev/null
+++ b/tests/f_large_dir/name
@@ -0,0 +1 @@
+optimize 3 level htree directories
diff --git a/tests/f_large_dir/script b/tests/f_large_dir/script
new file mode 100644
index 0000000..25983c2
--- /dev/null
+++ b/tests/f_large_dir/script
@@ -0,0 +1,28 @@
+OUT=$test_name.log
+EXP=$test_dir/expect
+DFSCRIPT=$test_dir/debugfs_script
+E2FSCK=../e2fsck/e2fsck
+
+TMPFILE2=/tmp/image
+cp /dev/null $OUT
+$MKE2FS -b 1024 -O large_dir,uninit_bg,dir_nlink -F $TMPFILE2 460800 > /dev/null
+$DFSCRIPT | $DEBUGFS -w -f /dev/stdin $TMPFILE2 > /dev/null
+
+$E2FSCK -yfD $TMPFILE2 > $OUT.new 2>&1
+status=$?
+echo Exit status is $status >> $OUT.new
+sed -f $cmd_dir/filter.sed -e "s;$TMPFILE2;test.img;" $OUT.new >> $OUT
+rm -f $OUT.new
+
+cmp -s $OUT $EXP
+RC=$?
+if [ $RC -eq 0 ]; then
+ echo "$test_name: $test_description: ok"
+ touch $test_name.ok
+else
+ echo "$test_name: $test_description: failed"
+ diff -u $EXP $OUT > $test_name.failed
+fi
+
+
+
--
1.7.1

2017-02-13 17:31:04

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v2 3/4] e2fsck: 3 level hash tree directory optimization

From: Artem Blagodarenko <[email protected]>

e2fsck fix for partitions with 3 level hash directries.
Additional level is added to e2fsck -D codepath.

Signed-off-by: Artem Blagodarenko <[email protected]>
---
debugfs/htree.c | 3 +-
e2fsck/e2fsck.h | 1 +
e2fsck/pass2.c | 68 +++++++++++++++++++++-------
e2fsck/rehash.c | 123 ++++++++++++++++++++++++++++++++++++++++----------
lib/ext2fs/ext2fs.h | 5 ++
5 files changed, 156 insertions(+), 44 deletions(-)

diff --git a/debugfs/htree.c b/debugfs/htree.c
index 54e55e2..8c18666 100644
--- a/debugfs/htree.c
+++ b/debugfs/htree.c
@@ -287,7 +287,8 @@ void do_htree_dump(int argc, char *argv[])
fprintf(pager, "\t Indirect levels: %d\n", rootnode->indirect_levels);
fprintf(pager, "\t Flags: %d\n", rootnode->unused_flags);

- ent = (struct ext2_dx_entry *) (buf + 24 + rootnode->info_length);
+ ent = (struct ext2_dx_entry *)
+ ((char *)rootnode + rootnode->info_length);

htree_dump_int_node(current_fs, ino, &inode, rootnode, ent,
buf + current_fs->blocksize,
diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index f356810..a4efbdf 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -122,6 +122,7 @@ struct dx_dirblock_info {
blk64_t phys;
int flags;
blk64_t parent;
+ blk64_t previous;
ext2_dirhash_t min_hash;
ext2_dirhash_t max_hash;
ext2_dirhash_t node_min_hash;
diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
index 139d48f..2e2c721 100644
--- a/e2fsck/pass2.c
+++ b/e2fsck/pass2.c
@@ -85,6 +85,39 @@ struct check_dir_struct {
unsigned long long next_ra_off;
};

+static void update_parents(struct dx_dir_info *dx_dir, int type)
+{
+ struct dx_dirblock_info *dx_db, *dx_parent, *dx_previous;
+ int b;
+
+ for (b = 0, dx_db = dx_dir->dx_block;
+ b < dx_dir->numblocks;
+ b++, dx_db++) {
+ dx_parent = &dx_dir->dx_block[dx_db->parent];
+ if (dx_db->type != type)
+ continue;
+
+ /*
+ * XXX Make sure dx_parent->min_hash > dx_db->min_hash
+ */
+ if (dx_db->flags & DX_FLAG_FIRST) {
+ dx_parent->min_hash = dx_db->min_hash;
+ if (dx_parent->previous) {
+ dx_previous =
+ &dx_dir->dx_block[dx_parent->previous];
+ dx_previous->node_max_hash =
+ dx_parent->min_hash;
+ }
+ }
+ /*
+ * XXX Make sure dx_parent->max_hash < dx_db->max_hash
+ */
+ if (dx_db->flags & DX_FLAG_LAST) {
+ dx_parent->max_hash = dx_db->max_hash;
+ }
+ }
+}
+
void e2fsck_pass2(e2fsck_t ctx)
{
struct ext2_super_block *sb = ctx->fs->super;
@@ -182,24 +215,11 @@ void e2fsck_pass2(e2fsck_t ctx)
* Find all of the first and last leaf blocks, and
* update their parent's min and max hash values
*/
- for (b=0, dx_db = dx_dir->dx_block;
- b < dx_dir->numblocks;
- b++, dx_db++) {
- if ((dx_db->type != DX_DIRBLOCK_LEAF) ||
- !(dx_db->flags & (DX_FLAG_FIRST | DX_FLAG_LAST)))
- continue;
- dx_parent = &dx_dir->dx_block[dx_db->parent];
- /*
- * XXX Make sure dx_parent->min_hash > dx_db->min_hash
- */
- if (dx_db->flags & DX_FLAG_FIRST)
- dx_parent->min_hash = dx_db->min_hash;
- /*
- * XXX Make sure dx_parent->max_hash < dx_db->max_hash
- */
- if (dx_db->flags & DX_FLAG_LAST)
- dx_parent->max_hash = dx_db->max_hash;
- }
+ update_parents(dx_dir, DX_DIRBLOCK_LEAF);
+
+ /* for 3 level htree: update 2 level parent's min
+ * and max hash values */
+ update_parents(dx_dir, DX_DIRBLOCK_NODE);

for (b=0, dx_db = dx_dir->dx_block;
b < dx_dir->numblocks;
@@ -642,6 +662,10 @@ static void parse_int_node(ext2_filsys fs,
dx_db->flags |= DX_FLAG_REFERENCED;
dx_db->parent = db->blockcnt;
}
+
+ dx_db->previous =
+ i ? ext2fs_le32_to_cpu(ent[i-1].block & 0x0ffffff) : 0;
+
if (hash < min_hash)
min_hash = hash;
if (hash > max_hash)
@@ -949,6 +973,14 @@ static int check_dir_block(ext2_filsys fs,
return DIRENT_ABORT;
}

+ /* This will allow (at some point in the future) to punch out empty
+ * directory blocks and reduce the space used by a directory that grows
+ * very large and then the files are deleted. For now, all that is
+ * needed is to avoid e2fsck filling in these holes as part of
+ * feature flag. */
+ if (db->blk == 0 && ext2fs_has_feature_large_dir(fs))
+ return 0;
+
if (db->blk == 0 && !inline_data_size) {
if (allocate_dir_block(ctx, db, buf, &cd->pctx))
return 0;
diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c
index 22a58f3..29d5a54 100644
--- a/e2fsck/rehash.c
+++ b/e2fsck/rehash.c
@@ -603,6 +603,42 @@ static struct ext2_dx_entry *set_int_node(ext2_filsys fs, char *buf)
return (struct ext2_dx_entry *) limits;
}

+static int alloc_blocks(ext2_filsys fs,
+ struct ext2_dx_countlimit **limit,
+ struct ext2_dx_entry **prev_ent,
+ struct ext2_dx_entry **next_ent,
+ int *prev_offset, int *next_offset,
+ struct out_dir *outdir, int i,
+ int *prev_count, int *next_count)
+{
+ errcode_t retval;
+ char *block_start;
+
+ if (*limit)
+ (*limit)->limit = (*limit)->count =
+ ext2fs_cpu_to_le16((*limit)->limit);
+ *prev_ent = (struct ext2_dx_entry *) (outdir->buf + *prev_offset);
+ (*prev_ent)->block = ext2fs_cpu_to_le32(outdir->num);
+
+ if (i != 1)
+ (*prev_ent)->hash =
+ ext2fs_cpu_to_le32(outdir->hashes[i]);
+
+ retval = get_next_block(fs, outdir, &block_start);
+ if (retval)
+ return retval;
+
+ *next_ent = set_int_node(fs, block_start);
+ *limit = (struct ext2_dx_countlimit *)(*next_ent);
+ if (next_offset)
+ *next_offset = ((char *) *next_ent - outdir->buf);
+
+ *next_count = (*limit)->limit;
+ (*prev_offset) += sizeof(struct ext2_dx_entry);
+ (*prev_count)--;
+return 0;
+}
+
/*
* This function takes the leaf nodes which have been written in
* outdir, and populates the root node and any necessary interior nodes.
@@ -612,13 +648,13 @@ static errcode_t calculate_tree(ext2_filsys fs,
ext2_ino_t ino,
ext2_ino_t parent)
{
- struct ext2_dx_root_info *root_info;
- struct ext2_dx_entry *root, *dx_ent = 0;
- struct ext2_dx_countlimit *root_limit, *limit;
+ struct ext2_dx_root_info *root_info;
+ struct ext2_dx_entry *root, *int_ent, *dx_ent = 0;
+ struct ext2_dx_countlimit *root_limit, *int_limit, *limit;
errcode_t retval;
char * block_start;
- int i, c1, c2, nblks;
- int limit_offset, root_offset;
+ int i, c1, c2, c3, nblks;
+ int limit_offset, int_offset, root_offset;

root_info = set_root_node(fs, outdir->buf, ino, parent);
root_offset = limit_offset = ((char *) root_info - outdir->buf) +
@@ -628,7 +664,7 @@ static errcode_t calculate_tree(ext2_filsys fs,
nblks = outdir->num;

/* Write out the pointer blocks */
- if (nblks-1 <= c1) {
+ if (nblks - 1 <= c1) {
/* Just write out the root block, and we're done */
root = (struct ext2_dx_entry *) (outdir->buf + root_offset);
for (i=1; i < nblks; i++) {
@@ -639,31 +675,23 @@ static errcode_t calculate_tree(ext2_filsys fs,
root++;
c1--;
}
- } else {
+ } else if (nblks - 1 <= ext2fs_htree_intnode_maxrecs(fs, c1)) {
c2 = 0;
- limit = 0;
+ limit = NULL;
root_info->indirect_levels = 1;
for (i=1; i < nblks; i++) {
- if (c1 == 0)
+ if (c2 == 0 && c1 == 0)
return ENOSPC;
if (c2 == 0) {
- if (limit)
- limit->limit = limit->count =
- ext2fs_cpu_to_le16(limit->limit);
- root = (struct ext2_dx_entry *)
- (outdir->buf + root_offset);
- root->block = ext2fs_cpu_to_le32(outdir->num);
- if (i != 1)
- root->hash =
- ext2fs_cpu_to_le32(outdir->hashes[i]);
- if ((retval = get_next_block(fs, outdir,
- &block_start)))
+ retval = alloc_blocks(fs, &limit,
+ &root,
+ &dx_ent,
+ &root_offset,
+ NULL,
+ outdir, i,
+ &c1, &c2);
+ if (retval)
return retval;
- dx_ent = set_int_node(fs, block_start);
- limit = (struct ext2_dx_countlimit *) dx_ent;
- c2 = limit->limit;
- root_offset += sizeof(struct ext2_dx_entry);
- c1--;
}
dx_ent->block = ext2fs_cpu_to_le32(i);
if (c2 != limit->limit)
@@ -674,6 +702,51 @@ static errcode_t calculate_tree(ext2_filsys fs,
}
limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
limit->limit = ext2fs_cpu_to_le16(limit->limit);
+ } else {
+ c2 = 0;
+ c3 = 0;
+ limit = NULL;
+ int_limit = 0;
+ root_info->indirect_levels = 2;
+ for (i = 1; i < nblks; i++) {
+ if (c3 == 0 && c2 == 0 && c1 == 0)
+ return ENOSPC;
+ if (c3 == 0 && c2 == 0) {
+ retval = alloc_blocks(fs, &int_limit,
+ &root,
+ &int_ent,
+ &root_offset,
+ &int_offset,
+ outdir, i,
+ &c1, &c2);
+ if (retval)
+ return retval;
+ }
+ if (c3 == 0) {
+ retval = alloc_blocks(fs, &limit,
+ &int_ent,
+ &dx_ent,
+ &int_offset,
+ NULL,
+ outdir, i,
+ &c2, &c3);
+ if (retval)
+ return retval;
+
+ }
+ dx_ent->block = ext2fs_cpu_to_le32(i);
+ if (c3 != limit->limit)
+ dx_ent->hash =
+ ext2fs_cpu_to_le32(outdir->hashes[i]);
+ dx_ent++;
+ c3--;
+ }
+ int_limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
+ int_limit->limit = ext2fs_cpu_to_le16(limit->limit);
+
+ limit->count = ext2fs_cpu_to_le16(limit->limit - c3);
+ limit->limit = ext2fs_cpu_to_le16(limit->limit);
+
}
root_limit = (struct ext2_dx_countlimit *) (outdir->buf + limit_offset);
root_limit->count = ext2fs_cpu_to_le16(root_limit->limit - c1);
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index d714b44..79698ce 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1943,6 +1943,11 @@ _INLINE_ unsigned int ext2_dir_htree_level(ext2_filsys fs)
return EXT4_HTREE_LEVEL_COMPAT;
}

+_INLINE_ int ext2fs_htree_intnode_maxrecs(ext2_filsys fs, int blocks)
+{
+ return blocks * ((fs->blocksize - 8) / sizeof(struct ext2_dx_entry));
+}
+
/*
* This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b)
*/
--
1.7.1

2017-02-14 20:06:24

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH v2 1/4] e2fsprogs: supersede i_dir_acl with i_size_high for all cases


> On Feb 13, 2017, at 2:20 AM, Artem Blagodarenko <[email protected]> wrote:
>
> From: Artem Blagodarenko <[email protected]>
>
> This patch removes i_dir_acl macros and macros users.
> Now stucture field can be accessed as i_size_high. This field
> is useful for largedir feature.
>
> Signed-off-by: Alexey Lyashkov <[email protected]>
> Signed-off-by: Artem Blagodarenko <[email protected]>

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> debugfs/debugfs.c | 16 +++++++---------
> debugfs/set_fields.c | 1 -
> e2fsck/message.c | 4 ++--
> e2fsck/pass1.c | 2 +-
> e2fsck/pass2.c | 6 +++---
> e2fsck/problem.c | 6 +++---
> e2fsck/problem.h | 4 ++--
> ext2ed/doc/ext2ed-design.sgml | 2 +-
> ext2ed/doc/ext2fs-overview.sgml | 2 +-
> ext2ed/ext2.descriptors | 2 +-
> lib/ext2fs/ext2_fs.h | 6 ++----
> lib/ext2fs/swapfs.c | 2 +-
> po/at-expand.pl | 2 +-
> tests/d_fallocate_blkmap/expect | 4 ++--
> tests/d_inline_dump/expect | 12 ++++++------
> tests/d_special_files/expect | 10 +++++-----
> tests/f_badcluster/expect | 14 +++++++-------
> tests/f_convert_bmap/expect.1 | 2 +-
> tests/f_convert_bmap_and_extent/expect.1 | 2 +-
> tests/f_create_symlinks/expect | 8 ++++----
> tests/f_recnect_bad/expect.1 | 2 +-
> 21 files changed, 52 insertions(+), 57 deletions(-)
>
> diff --git a/debugfs/debugfs.c b/debugfs/debugfs.c
> index 165f924..ba942ce 100644
> --- a/debugfs/debugfs.c
> +++ b/debugfs/debugfs.c
> @@ -841,16 +841,15 @@ void internal_dump_inode(FILE *out, const char *prefix,
> fprintf(out, "%d\n", inode->i_size);
> if (os == EXT2_OS_HURD)
> fprintf(out,
> - "%sFile ACL: %d Directory ACL: %d Translator: %d\n",
> + "%sFile ACL: %d Translator: %d\n",
> prefix,
> - inode->i_file_acl, LINUX_S_ISDIR(inode->i_mode) ? inode->i_dir_acl : 0,
> + inode->i_file_acl,
> inode->osd1.hurd1.h_i_translator);
> else
> - fprintf(out, "%sFile ACL: %llu Directory ACL: %d\n",
> + fprintf(out, "%sFile ACL: %llu\n",
> prefix,
> inode->i_file_acl | ((long long)
> - (inode->osd2.linux2.l_i_file_acl_high) << 32),
> - LINUX_S_ISDIR(inode->i_mode) ? inode->i_dir_acl : 0);
> + (inode->osd2.linux2.l_i_file_acl_high) << 32));
> if (os != EXT2_OS_HURD)
> fprintf(out, "%sLinks: %d Blockcount: %llu\n",
> prefix, inode->i_links_count,
> @@ -1347,10 +1346,9 @@ void do_modify_inode(int argc, char *argv[])
> modify_u32(argv[0], "Reserved1", decimal_format, &inode.i_reserved1);
> #endif
> modify_u32(argv[0], "File acl", decimal_format, &inode.i_file_acl);
> - if (LINUX_S_ISDIR(inode.i_mode))
> - modify_u32(argv[0], "Directory acl", decimal_format, &inode.i_dir_acl);
> - else
> - modify_u32(argv[0], "High 32bits of size", decimal_format, &inode.i_size_high);
> +
> + modify_u32(argv[0], "High 32bits of size", decimal_format,
> + &inode.i_size_high);
>
> if (os == EXT2_OS_HURD)
> modify_u32(argv[0], "Translator Block",
> diff --git a/debugfs/set_fields.c b/debugfs/set_fields.c
> index ff9b7b6..ca68862 100644
> --- a/debugfs/set_fields.c
> +++ b/debugfs/set_fields.c
> @@ -212,7 +212,6 @@ static struct field_set_info inode_fields[] = {
> /* Special case: i_file_acl_high is 2 bytes */
> { "file_acl", &set_inode.i_file_acl,
> &set_inode.osd2.linux2.l_i_file_acl_high, 6, parse_uint },
> - { "dir_acl", &set_inode.i_dir_acl, NULL, 4, parse_uint, FLAG_ALIAS },
> { "faddr", &set_inode.i_faddr, NULL, 4, parse_uint },
> { "frag", &set_inode.osd2.hurd2.h_i_frag, NULL, 1, parse_uint, FLAG_ALIAS },
> { "fsize", &set_inode.osd2.hurd2.h_i_fsize, NULL, 1, parse_uint },
> diff --git a/e2fsck/message.c b/e2fsck/message.c
> index 1c3fcd8..34201a3 100644
> --- a/e2fsck/message.c
> +++ b/e2fsck/message.c
> @@ -32,7 +32,7 @@
> * %IM <inode> -> i_mtime
> * %IF <inode> -> i_faddr
> * %If <inode> -> i_file_acl
> - * %Id <inode> -> i_dir_acl
> + * %Id <inode> -> i_size_high
> * %Iu <inode> -> i_uid
> * %Ig <inode> -> i_gid
> * %It <inode type>
> @@ -320,7 +320,7 @@ static _INLINE_ void expand_inode_expression(FILE *f, ext2_filsys fs, char ch,
> break;
> case 'd':
> fprintf(f, "%u", (LINUX_S_ISDIR(inode->i_mode) ?
> - inode->i_dir_acl : 0));
> + inode->i_size_high : 0));
> break;
> case 'u':
> fprintf(f, "%d", inode_uid(*inode));
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index 8ef40f6..ce37176 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -1716,7 +1716,7 @@ void e2fsck_pass1(e2fsck_t ctx)
> }
>
> if (inode->i_faddr || frag || fsize ||
> - (LINUX_S_ISDIR(inode->i_mode) && inode->i_dir_acl))
> + (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high))
> mark_inode_bad(ctx, ino);
> if ((fs->super->s_creator_os != EXT2_OS_HURD) &&
> !ext2fs_has_feature_64bit(fs->super) &&
> diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
> index 11c19e8..b89ebc9 100644
> --- a/e2fsck/pass2.c
> +++ b/e2fsck/pass2.c
> @@ -1811,10 +1811,10 @@ int e2fsck_process_bad_inode(e2fsck_t ctx, ext2_ino_t dir,
> } else
> not_fixed++;
> }
> - if (inode.i_dir_acl &&
> + if (inode.i_size_high &&
> LINUX_S_ISDIR(inode.i_mode)) {
> - if (fix_problem(ctx, PR_2_DIR_ACL_ZERO, &pctx)) {
> - inode.i_dir_acl = 0;
> + if (fix_problem(ctx, PR_2_DIR_SIZE_HIGH_ZERO, &pctx)) {
> + inode.i_size_high = 0;
> inode_modified++;
> } else
> not_fixed++;
> diff --git a/e2fsck/problem.c b/e2fsck/problem.c
> index 34a671e..8b28819 100644
> --- a/e2fsck/problem.c
> +++ b/e2fsck/problem.c
> @@ -1360,9 +1360,9 @@ static struct e2fsck_problem problem_table[] = {
> N_("i_file_acl @F %If, @s zero.\n"),
> PROMPT_CLEAR, 0 },
>
> - /* i_dir_acl should be zero */
> - { PR_2_DIR_ACL_ZERO,
> - N_("i_dir_acl @F %Id, @s zero.\n"),
> + /* i_size_high should be zero */
> + { PR_2_DIR_SIZE_HIGH_ZERO,
> + N_("i_size_high @F %Id, @s zero.\n"),
> PROMPT_CLEAR, 0 },
>
> /* i_frag should be zero */
> diff --git a/e2fsck/problem.h b/e2fsck/problem.h
> index 86cb614..3306560 100644
> --- a/e2fsck/problem.h
> +++ b/e2fsck/problem.h
> @@ -808,8 +808,8 @@ struct problem_context {
> /* i_file_acl should be zero */
> #define PR_2_FILE_ACL_ZERO 0x02000E
>
> -/* i_dir_acl should be zero */
> -#define PR_2_DIR_ACL_ZERO 0x02000F
> +/* i_size_high should be zero */
> +#define PR_2_DIR_SIZE_HIGH_ZERO 0x02000F
>
> /* i_frag should be zero */
> #define PR_2_FRAG_ZERO 0x020010
> diff --git a/ext2ed/doc/ext2ed-design.sgml b/ext2ed/doc/ext2ed-design.sgml
> index ad2df96..7841358 100644
> --- a/ext2ed/doc/ext2ed-design.sgml
> +++ b/ext2ed/doc/ext2ed-design.sgml
> @@ -2726,7 +2726,7 @@ struct ext2_inode {
> __u32 i_block[EXT2_N_BLOCKS]; /* Pointers to blocks */
> __u32 i_version; /* File version (for NFS) */
> __u32 i_file_acl; /* File ACL */
> - __u32 i_dir_acl; /* Directory ACL */
> + __u32 i_size_high; /* High 32bits of size */
> __u32 i_faddr; /* Fragment address */
> union {
> struct {
> diff --git a/ext2ed/doc/ext2fs-overview.sgml b/ext2ed/doc/ext2fs-overview.sgml
> index a6ebf5a..900c393 100644
> --- a/ext2ed/doc/ext2fs-overview.sgml
> +++ b/ext2ed/doc/ext2fs-overview.sgml
> @@ -487,7 +487,7 @@ struct ext2_inode {
> __u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
> __u32 i_version; /* File version (for NFS) */
> __u32 i_file_acl; /* File ACL */
> - __u32 i_dir_acl; /* Directory ACL */
> + __u32 i_size_high; /* High 32bits of size */
> __u32 i_faddr; /* Fragment address */
> union {
> struct {
> diff --git a/ext2ed/ext2.descriptors b/ext2ed/ext2.descriptors
> index bf927b0..b1ac4c4 100644
> --- a/ext2ed/ext2.descriptors
> +++ b/ext2ed/ext2.descriptors
> @@ -102,7 +102,7 @@ struct ext2_inode {
> __u32 i_block[14]; /* Pointers to blocks */
> __u32 i_version; /* File version (for NFS) */
> __u32 i_file_acl; /* File ACL */
> - __u32 i_dir_acl; /* Directory ACL */
> + __u32 i_size_high; /* High 32bits of size */
> __u32 i_faddr; /* Fragment address */
> __u8 l_i_frag; /* Fragment number */
> __u8 l_i_fsize; /* Fragment size */
> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> index 27a7d3a..195e366 100644
> --- a/lib/ext2fs/ext2_fs.h
> +++ b/lib/ext2fs/ext2_fs.h
> @@ -398,7 +398,7 @@ struct ext2_inode {
> __u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
> __u32 i_generation; /* File version (for NFS) */
> __u32 i_file_acl; /* File ACL */
> - __u32 i_size_high; /* Formerly i_dir_acl, directory ACL */
> + __u32 i_size_high;
> __u32 i_faddr; /* Fragment address */
> union {
> struct {
> @@ -446,7 +446,7 @@ struct ext2_inode_large {
> __u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
> __u32 i_generation; /* File version (for NFS) */
> __u32 i_file_acl; /* File ACL */
> - __u32 i_size_high; /* Formerly i_dir_acl, directory ACL */
> + __u32 i_size_high;
> __u32 i_faddr; /* Fragment address */
> union {
> struct {
> @@ -484,8 +484,6 @@ struct ext2_inode_large {
> #define EXT4_EPOCH_BITS 2
> #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
>
> -#define i_dir_acl i_size_high
> -
> #define i_checksum_lo osd2.linux2.l_i_checksum_lo
>
> #define inode_includes(size, field) \
> diff --git a/lib/ext2fs/swapfs.c b/lib/ext2fs/swapfs.c
> index d63fc55..2d05ee7 100644
> --- a/lib/ext2fs/swapfs.c
> +++ b/lib/ext2fs/swapfs.c
> @@ -247,7 +247,7 @@ void ext2fs_swap_inode_full(ext2_filsys fs, struct ext2_inode_large *t,
> has_extents = 1;
> if (!hostorder && (t->i_flags & EXT4_INLINE_DATA_FL))
> has_inline_data = 1;
> - t->i_dir_acl = ext2fs_swab32(f->i_dir_acl);
> + t->i_size_high = ext2fs_swab32(f->i_size_high);
> /*
> * Extent data and inline data are swapped on access, not here
> */
> diff --git a/po/at-expand.pl b/po/at-expand.pl
> index bc1a744..47e4ebd 100644
> --- a/po/at-expand.pl
> +++ b/po/at-expand.pl
> @@ -45,7 +45,7 @@ my @translator_help = (
> "#. %IM <inode> -> i_mtime\n",
> "#. %IF <inode> -> i_faddr\n",
> "#. %If <inode> -> i_file_acl\n",
> - "#. %Id <inode> -> i_dir_acl\n",
> + "#. %Id <inode> -> i_size_high\n",
> "#. %Iu <inode> -> i_uid\n",
> "#. %Ig <inode> -> i_gid\n",
> "#. %It <str> file type\n",
> diff --git a/tests/d_fallocate_blkmap/expect b/tests/d_fallocate_blkmap/expect
> index 8ce79ff..f588511 100644
> --- a/tests/d_fallocate_blkmap/expect
> +++ b/tests/d_fallocate_blkmap/expect
> @@ -18,7 +18,7 @@ debugfs: stat /a
> Inode: 12 Type: regular Mode: 0666 Flags: 0x0
> Generation: 0 Version: 0x00000000:00000000
> User: 0 Group: 0 Project: 0 Size: 40960
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 82
> Fragment: Address: 0 Number: 0 Size: 0
> Size of extra inode fields: 32
> @@ -30,7 +30,7 @@ debugfs: stat /b
> Inode: 13 Type: regular Mode: 0666 Flags: 0x0
> Generation: 0 Version: 0x00000000:00000000
> User: 0 Group: 0 Project: 0 Size: 10240000
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 20082
> Fragment: Address: 0 Number: 0 Size: 0
> Size of extra inode fields: 32
> diff --git a/tests/d_inline_dump/expect b/tests/d_inline_dump/expect
> index c84f64d..f0ba471 100644
> --- a/tests/d_inline_dump/expect
> +++ b/tests/d_inline_dump/expect
> @@ -2,7 +2,7 @@
> Inode: 13 Type: regular Mode: 0644 Flags: 0x10000000
> Generation: 3289262644 Version: 0x00000000:00000001
> User: 0 Group: 0 Size: 80
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x53cec6b4:c72e3c00 -- Tue Jul 22 20:16:52 2014
> @@ -18,7 +18,7 @@ Size of inline data: 80
> Inode: 18 Type: regular Mode: 0644 Flags: 0x10000000
> Generation: 3842229473 Version: 0x00000000:00000001
> User: 0 Group: 0 Size: 20
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x53cec6b4:cafecc00 -- Tue Jul 22 20:16:52 2014
> @@ -35,7 +35,7 @@ Size of inline data: 60
> Inode: 16 Type: directory Mode: 0755 Flags: 0x10000000
> Generation: 3842229469 Version: 0x00000000:00000004
> User: 0 Group: 0 Size: 132
> -File ACL: 7 Directory ACL: 0
> +File ACL: 7
> Links: 2 Blockcount: 8
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x53cec6e3:27eac000 -- Tue Jul 22 20:17:39 2014
> @@ -51,7 +51,7 @@ Size of inline data: 132
> Inode: 20 Type: directory Mode: 0755 Flags: 0x10000000
> Generation: 3710818931 Version: 0x00000000:00000001
> User: 0 Group: 0 Size: 60
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 2 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x53cec6b4:ca0aa800 -- Tue Jul 22 20:16:52 2014
> @@ -68,7 +68,7 @@ Size of inline data: 60
> Inode: 12 Type: symlink Mode: 0777 Flags: 0x10000000
> Generation: 3289262643 Version: 0x00000000:00000001
> User: 0 Group: 0 Size: 80
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x53cec47f:724db800 -- Tue Jul 22 20:07:27 2014
> @@ -83,7 +83,7 @@ Fast link dest: "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> Inode: 19 Type: symlink Mode: 0777 Flags: 0x0
> Generation: 3842229474 Version: 0x00000000:00000001
> User: 0 Group: 0 Size: 20
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x53cec44c:a1fcc000 -- Tue Jul 22 20:06:36 2014
> diff --git a/tests/d_special_files/expect b/tests/d_special_files/expect
> index f729b0f..c825932 100644
> --- a/tests/d_special_files/expect
> +++ b/tests/d_special_files/expect
> @@ -5,7 +5,7 @@ debugfs -R ''stat foo'' -w test.img
> Inode: 12 Type: symlink Mode: 0777 Flags: 0x0
> Generation: 0 Version: 0x00000000
> User: 0 Group: 0 Size: 3
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x50f560e0 -- Tue Jan 15 14:00:00 2013
> @@ -17,7 +17,7 @@ debugfs -R ''stat foo2'' -w test.img
> Inode: 13 Type: symlink Mode: 0777 Flags: 0x0
> Generation: 0 Version: 0x00000000
> User: 0 Group: 0 Size: 80
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 2
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x50f560e0 -- Tue Jan 15 14:00:00 2013
> @@ -42,7 +42,7 @@ debugfs -R ''stat pipe'' -w test.img
> Inode: 14 Type: FIFO Mode: 0000 Flags: 0x0
> Generation: 0 Version: 0x00000000
> User: 0 Group: 0 Size: 0
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x50f560e0 -- Tue Jan 15 14:00:00 2013
> @@ -55,7 +55,7 @@ debugfs -R ''stat sda'' -w test.img
> Inode: 15 Type: block special Mode: 0000 Flags: 0x0
> Generation: 0 Version: 0x00000000
> User: 0 Group: 0 Size: 0
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x50f560e0 -- Tue Jan 15 14:00:00 2013
> @@ -67,7 +67,7 @@ debugfs -R ''stat null'' -w test.img
> Inode: 16 Type: character special Mode: 0000 Flags: 0x0
> Generation: 0 Version: 0x00000000
> User: 0 Group: 0 Size: 0
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x50f560e0 -- Tue Jan 15 14:00:00 2013
> diff --git a/tests/f_badcluster/expect b/tests/f_badcluster/expect
> index 65a1641..75a3820 100644
> --- a/tests/f_badcluster/expect
> +++ b/tests/f_badcluster/expect
> @@ -116,7 +116,7 @@ debugfs: stat /a
> Inode: 12 Type: regular Mode: 0644 Flags: 0x80000
> Generation: 1117152157 Version: 0x00000001
> User: 0 Group: 0 Size: 3072
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 32
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
> @@ -128,7 +128,7 @@ debugfs: stat /b
> Inode: 13 Type: regular Mode: 0644 Flags: 0x80000
> Generation: 1117152158 Version: 0x00000001
> User: 0 Group: 0 Size: 3072
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 32
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
> @@ -140,7 +140,7 @@ debugfs: stat /c
> Inode: 14 Type: regular Mode: 0644 Flags: 0x80000
> Generation: 1117152159 Version: 0x00000001
> User: 0 Group: 0 Size: 3072
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 32
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
> @@ -152,7 +152,7 @@ debugfs: stat /d
> Inode: 15 Type: regular Mode: 0644 Flags: 0x80000
> Generation: 1117152160 Version: 0x00000001
> User: 0 Group: 0 Size: 3072
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
> @@ -163,7 +163,7 @@ debugfs: stat /e
> Inode: 16 Type: regular Mode: 0644 Flags: 0x80000
> Generation: 1117152161 Version: 0x00000001
> User: 0 Group: 0 Size: 6144
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 32
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
> @@ -175,7 +175,7 @@ debugfs: stat /f
> Inode: 17 Type: regular Mode: 0644 Flags: 0x80000
> Generation: 1117152162 Version: 0x00000001
> User: 0 Group: 0 Size: 3072
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 32
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
> @@ -187,7 +187,7 @@ debugfs: stat /g
> Inode: 18 Type: regular Mode: 0644 Flags: 0x80000
> Generation: 1117152163 Version: 0x00000001
> User: 0 Group: 0 Size: 3072
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 32
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x539ff5b2 -- Tue Jun 17 08:00:50 2014
> diff --git a/tests/f_convert_bmap/expect.1 b/tests/f_convert_bmap/expect.1
> index 7d2ca86..0291f94 100644
> --- a/tests/f_convert_bmap/expect.1
> +++ b/tests/f_convert_bmap/expect.1
> @@ -2,7 +2,7 @@ debugfs: stat /a
> Inode: 12 Type: regular Mode: 0644 Flags: 0x0
> Generation: 1573716129 Version: 0x00000000:00000001
> User: 0 Group: 0 Size: 524288
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 1030
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x5457f87a:62ae2980 -- Mon Nov 3 21:49:46 2014
> diff --git a/tests/f_convert_bmap_and_extent/expect.1 b/tests/f_convert_bmap_and_extent/expect.1
> index 7af91aa..eb55db7 100644
> --- a/tests/f_convert_bmap_and_extent/expect.1
> +++ b/tests/f_convert_bmap_and_extent/expect.1
> @@ -2,7 +2,7 @@ debugfs: stat /a
> Inode: 12 Type: regular Mode: 0644 Flags: 0x0
> Generation: 1573716129 Version: 0x00000000:00000001
> User: 0 Group: 0 Size: 524288
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 1030
> Fragment: Address: 0 Number: 0 Size: 0
> ctime: 0x5457f87a:62ae2980 -- Mon Nov 3 21:49:46 2014
> diff --git a/tests/f_create_symlinks/expect b/tests/f_create_symlinks/expect
> index dca6e92..4409385 100644
> --- a/tests/f_create_symlinks/expect
> +++ b/tests/f_create_symlinks/expect
> @@ -20,7 +20,7 @@ debugfs -R "stat /l_30" test.img
> Inode: 12 Type: symlink Mode: 0777 Flags: 0x0
> Generation: 0 Version: 0x00000000:00000000
> User: 0 Group: 0 Project: 0 Size: 31
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> Size of extra inode fields: 32
> @@ -29,7 +29,7 @@ debugfs -R "stat /l_70" test.img
> Inode: 13 Type: symlink Mode: 0777 Flags: 0x10000000
> Generation: 0 Version: 0x00000000:00000000
> User: 0 Group: 0 Project: 0 Size: 71
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 0
> Fragment: Address: 0 Number: 0 Size: 0
> Size of extra inode fields: 32
> @@ -40,7 +40,7 @@ debugfs -R "stat /l_500" test.img
> Inode: 14 Type: symlink Mode: 0777 Flags: 0x80000
> Generation: 0 Version: 0x00000000:00000000
> User: 0 Group: 0 Project: 0 Size: 501
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 2
> Fragment: Address: 0 Number: 0 Size: 0
> Size of extra inode fields: 32
> @@ -50,7 +50,7 @@ debugfs -R "stat /l_1023" test.img
> Inode: 15 Type: symlink Mode: 0777 Flags: 0x80000
> Generation: 0 Version: 0x00000000:00000000
> User: 0 Group: 0 Project: 0 Size: 1024
> -File ACL: 0 Directory ACL: 0
> +File ACL: 0
> Links: 1 Blockcount: 2
> Fragment: Address: 0 Number: 0 Size: 0
> Size of extra inode fields: 32
> diff --git a/tests/f_recnect_bad/expect.1 b/tests/f_recnect_bad/expect.1
> index 8ba81e6..d4f72a1 100644
> --- a/tests/f_recnect_bad/expect.1
> +++ b/tests/f_recnect_bad/expect.1
> @@ -3,7 +3,7 @@ Pass 2: Checking directory structure
> i_faddr for inode 15 (/test/quux) is 23, should be zero.
> Clear? yes
>
> -i_dir_acl for inode 15 (/test/quux) is 12, should be zero.
> +i_size_high for inode 15 (/test/quux) is 12, should be zero.
> Clear? yes
>
> i_file_acl for inode 13 (/test/???) is 12, should be zero.
> --
> 1.7.1
>


Cheers, Andreas






Attachments:
signature.asc (195.00 B)
Message signed with OpenPGP

2017-02-14 20:10:50

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH v2 2/4] e2fsprogs: add support for 3-level htree

On Feb 13, 2017, at 2:20 AM, Artem Blagodarenko <[email protected]> wrote:
>
> From: Artem Blagodarenko <[email protected]>
>
> The INCOMPAT_LARGEDIR feature allows larger directories to
> be created, both with directory sizes over 2GB and and a
> maximum htree depth of 3 instead of the current limit of 2.
> These features are needed in order to exceed the currently
> limit of approximately 10M entries in a single directory.

Note that the ~10M entry limit is for 4KB blocksize. With 1KB
blocksize the entry limit is more like 100k.

> debugfs, e2fsck, ext2fs, mke2fs and tune2fs support is
> added.
>
> Signed-off-by: Alexey Lyashkov <[email protected]>
> Signed-off-by: Artem Blagodarenko <[email protected]>

Some very minor style fixes and improvement to the commit comment
possible if the patch is resent.

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> e2fsck/pass1.c | 5 +++--
> e2fsck/pass2.c | 5 +++--
> lib/ext2fs/ext2_fs.h | 3 ++-
> lib/ext2fs/ext2fs.h | 21 ++++++++++++++++++++-
> misc/mke2fs.c | 3 ++-
> misc/tune2fs.c | 3 ++-
> 6 files changed, 32 insertions(+), 8 deletions(-)
>
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index ce37176..fff7dcf 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -1716,7 +1716,8 @@ void e2fsck_pass1(e2fsck_t ctx)
> }
>
> if (inode->i_faddr || frag || fsize ||
> - (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high))
> + (!ext2fs_has_feature_large_dir(fs) &&
> + (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high)))
> mark_inode_bad(ctx, ino);
> if ((fs->super->s_creator_os != EXT2_OS_HURD) &&
> !ext2fs_has_feature_64bit(fs->super) &&
> @@ -2469,7 +2470,7 @@ static int handle_htree(e2fsck_t ctx, struct problem_context *pctx,
> return 1;
>
> pctx->num = root->indirect_levels;
> - if ((root->indirect_levels > 1) &&
> + if ((root->indirect_levels > ext2_dir_htree_level(fs)) &&
> fix_problem(ctx, PR_1_HTREE_DEPTH, pctx))
> return 1;
>
> diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
> index b89ebc9..139d48f 100644
> --- a/e2fsck/pass2.c
> +++ b/e2fsck/pass2.c
> @@ -1058,7 +1058,8 @@ inline_read_fail:
> dx_db->flags |= DX_FLAG_FIRST | DX_FLAG_LAST;
> if ((root->reserved_zero ||
> root->info_length < 8 ||
> - root->indirect_levels > 1) &&
> + root->indirect_levels
> + > ext2_dir_htree_level(fs)) &&

(style) '>' operator at the end of the previous line

> fix_problem(ctx, PR_2_HTREE_BAD_ROOT, &cd->pctx)) {
> clear_htree(ctx, ino);
> dx_dir->numblocks = 0;
> @@ -1811,7 +1812,7 @@ int e2fsck_process_bad_inode(e2fsck_t ctx, ext2_ino_t dir,
> } else
> not_fixed++;
> }
> - if (inode.i_size_high &&
> + if (inode.i_size_high && !ext2fs_has_feature_large_dir(fs) &&
> LINUX_S_ISDIR(inode.i_mode)) {
> if (fix_problem(ctx, PR_2_DIR_SIZE_HIGH_ZERO, &pctx)) {
> inode.i_size_high = 0;
> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> index 195e366..6d9a5d0 100644
> --- a/lib/ext2fs/ext2_fs.h
> +++ b/lib/ext2fs/ext2_fs.h
> @@ -921,7 +921,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, 4, ENCRYPT)
>
> #define EXT2_FEATURE_COMPAT_SUPP 0
> #define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \
> - EXT4_FEATURE_INCOMPAT_MMP)
> + EXT4_FEATURE_INCOMPAT_MMP|\

(style) space before that '\', though I do see it is very inconsistent here

> + EXT4_FEATURE_INCOMPAT_LARGEDIR)
> #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
> EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
> EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
> diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
> index 786ded8..d714b44 100644
> --- a/lib/ext2fs/ext2fs.h
> +++ b/lib/ext2fs/ext2fs.h
> @@ -588,7 +588,8 @@ typedef struct ext2_icount *ext2_icount_t;
> EXT4_FEATURE_INCOMPAT_64BIT|\
> EXT4_FEATURE_INCOMPAT_INLINE_DATA|\
> EXT4_FEATURE_INCOMPAT_ENCRYPT|\
> - EXT4_FEATURE_INCOMPAT_CSUM_SEED)
> + EXT4_FEATURE_INCOMPAT_CSUM_SEED|\
> + EXT4_FEATURE_INCOMPAT_LARGEDIR)
>
> #define EXT2_LIB_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|\
> @@ -1924,6 +1925,24 @@ _INLINE_ blk_t ext2fs_inode_data_blocks(ext2_filsys fs,
> return (blk_t) ext2fs_inode_data_blocks2(fs, inode);
> }
>
> +/* htree levels for ext4 */
> +#define EXT4_HTREE_LEVEL_COMPAT 1
> +#define EXT4_HTREE_LEVEL 3
> +
> +_INLINE_ int ext2fs_has_feature_large_dir(ext2_filsys fs)
> +{
> + return EXT2_HAS_INCOMPAT_FEATURE(fs->super,
> + EXT4_FEATURE_INCOMPAT_LARGEDIR);
> +}
> +
> +_INLINE_ unsigned int ext2_dir_htree_level(ext2_filsys fs)
> +{
> + if (ext2fs_has_feature_large_dir(fs))
> + return EXT4_HTREE_LEVEL;
> +
> + return EXT4_HTREE_LEVEL_COMPAT;
> +}
> +
> /*
> * This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b)
> */
> diff --git a/misc/mke2fs.c b/misc/mke2fs.c
> index 9f18c83..b2bf461 100644
> --- a/misc/mke2fs.c
> +++ b/misc/mke2fs.c
> @@ -1081,7 +1081,8 @@ static __u32 ok_features[3] = {
> EXT4_FEATURE_INCOMPAT_64BIT|
> EXT4_FEATURE_INCOMPAT_INLINE_DATA|
> EXT4_FEATURE_INCOMPAT_ENCRYPT |
> - EXT4_FEATURE_INCOMPAT_CSUM_SEED,
> + EXT4_FEATURE_INCOMPAT_CSUM_SEED |
> + EXT4_FEATURE_INCOMPAT_LARGEDIR,
> /* R/O compat */
> EXT2_FEATURE_RO_COMPAT_LARGE_FILE|
> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
> diff --git a/misc/tune2fs.c b/misc/tune2fs.c
> index 6239577..f78d105 100644
> --- a/misc/tune2fs.c
> +++ b/misc/tune2fs.c
> @@ -156,7 +156,8 @@ static __u32 ok_features[3] = {
> EXT4_FEATURE_INCOMPAT_MMP |
> EXT4_FEATURE_INCOMPAT_64BIT |
> EXT4_FEATURE_INCOMPAT_ENCRYPT |
> - EXT4_FEATURE_INCOMPAT_CSUM_SEED,
> + EXT4_FEATURE_INCOMPAT_CSUM_SEED |
> + EXT4_FEATURE_INCOMPAT_LARGEDIR,
> /* R/O compat */
> EXT2_FEATURE_RO_COMPAT_LARGE_FILE |
> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
> --
> 1.7.1
>


Cheers, Andreas






Attachments:
signature.asc (195.00 B)
Message signed with OpenPGP

2017-02-14 20:15:03

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] e2fsck: 3 level hash tree directory optimization

On Feb 13, 2017, at 2:20 AM, Artem Blagodarenko <[email protected]> wrote:
>
> From: Artem Blagodarenko <[email protected]>
>
> e2fsck fix for partitions with 3 level hash directries.
> Additional level is added to e2fsck -D codepath.
>
> Signed-off-by: Artem Blagodarenko <[email protected]>
> ---
> debugfs/htree.c | 3 +-
> e2fsck/e2fsck.h | 1 +
> e2fsck/pass2.c | 68 +++++++++++++++++++++-------
> e2fsck/rehash.c | 123 ++++++++++++++++++++++++++++++++++++++++----------
> lib/ext2fs/ext2fs.h | 5 ++
> 5 files changed, 156 insertions(+), 44 deletions(-)
>
> diff --git a/debugfs/htree.c b/debugfs/htree.c
> index 54e55e2..8c18666 100644
> --- a/debugfs/htree.c
> +++ b/debugfs/htree.c
> @@ -287,7 +287,8 @@ void do_htree_dump(int argc, char *argv[])
> fprintf(pager, "\t Indirect levels: %d\n", rootnode->indirect_levels);
> fprintf(pager, "\t Flags: %d\n", rootnode->unused_flags);
>
> - ent = (struct ext2_dx_entry *) (buf + 24 + rootnode->info_length);
> + ent = (struct ext2_dx_entry *)
> + ((char *)rootnode + rootnode->info_length);
>
> htree_dump_int_node(current_fs, ino, &inode, rootnode, ent,
> buf + current_fs->blocksize,
> diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
> index f356810..a4efbdf 100644
> --- a/e2fsck/e2fsck.h
> +++ b/e2fsck/e2fsck.h
> @@ -122,6 +122,7 @@ struct dx_dirblock_info {
> blk64_t phys;
> int flags;
> blk64_t parent;
> + blk64_t previous;
> ext2_dirhash_t min_hash;
> ext2_dirhash_t max_hash;
> ext2_dirhash_t node_min_hash;
> diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
> index 139d48f..2e2c721 100644
> --- a/e2fsck/pass2.c
> +++ b/e2fsck/pass2.c
> @@ -85,6 +85,39 @@ struct check_dir_struct {
> unsigned long long next_ra_off;
> };
>
> +static void update_parents(struct dx_dir_info *dx_dir, int type)
> +{
> + struct dx_dirblock_info *dx_db, *dx_parent, *dx_previous;
> + int b;
> +
> + for (b = 0, dx_db = dx_dir->dx_block;
> + b < dx_dir->numblocks;
> + b++, dx_db++) {
> + dx_parent = &dx_dir->dx_block[dx_db->parent];
> + if (dx_db->type != type)
> + continue;
> +
> + /*
> + * XXX Make sure dx_parent->min_hash > dx_db->min_hash
> + */
> + if (dx_db->flags & DX_FLAG_FIRST) {
> + dx_parent->min_hash = dx_db->min_hash;
> + if (dx_parent->previous) {
> + dx_previous =
> + &dx_dir->dx_block[dx_parent->previous];
> + dx_previous->node_max_hash =
> + dx_parent->min_hash;
> + }
> + }
> + /*
> + * XXX Make sure dx_parent->max_hash < dx_db->max_hash
> + */
> + if (dx_db->flags & DX_FLAG_LAST) {
> + dx_parent->max_hash = dx_db->max_hash;
> + }
> + }
> +}
> +
> void e2fsck_pass2(e2fsck_t ctx)
> {
> struct ext2_super_block *sb = ctx->fs->super;
> @@ -182,24 +215,11 @@ void e2fsck_pass2(e2fsck_t ctx)
> * Find all of the first and last leaf blocks, and
> * update their parent's min and max hash values
> */
> - for (b=0, dx_db = dx_dir->dx_block;
> - b < dx_dir->numblocks;
> - b++, dx_db++) {
> - if ((dx_db->type != DX_DIRBLOCK_LEAF) ||
> - !(dx_db->flags & (DX_FLAG_FIRST | DX_FLAG_LAST)))
> - continue;
> - dx_parent = &dx_dir->dx_block[dx_db->parent];
> - /*
> - * XXX Make sure dx_parent->min_hash > dx_db->min_hash
> - */
> - if (dx_db->flags & DX_FLAG_FIRST)
> - dx_parent->min_hash = dx_db->min_hash;
> - /*
> - * XXX Make sure dx_parent->max_hash < dx_db->max_hash
> - */
> - if (dx_db->flags & DX_FLAG_LAST)
> - dx_parent->max_hash = dx_db->max_hash;
> - }
> + update_parents(dx_dir, DX_DIRBLOCK_LEAF);
> +
> + /* for 3 level htree: update 2 level parent's min
> + * and max hash values */
> + update_parents(dx_dir, DX_DIRBLOCK_NODE);
>
> for (b=0, dx_db = dx_dir->dx_block;
> b < dx_dir->numblocks;
> @@ -642,6 +662,10 @@ static void parse_int_node(ext2_filsys fs,
> dx_db->flags |= DX_FLAG_REFERENCED;
> dx_db->parent = db->blockcnt;
> }
> +
> + dx_db->previous =
> + i ? ext2fs_le32_to_cpu(ent[i-1].block & 0x0ffffff) : 0;
> +
> if (hash < min_hash)
> min_hash = hash;
> if (hash > max_hash)
> @@ -949,6 +973,14 @@ static int check_dir_block(ext2_filsys fs,
> return DIRENT_ABORT;
> }
>
> + /* This will allow (at some point in the future) to punch out empty
> + * directory blocks and reduce the space used by a directory that grows
> + * very large and then the files are deleted. For now, all that is
> + * needed is to avoid e2fsck filling in these holes as part of
> + * feature flag. */
> + if (db->blk == 0 && ext2fs_has_feature_large_dir(fs))
> + return 0;
> +
> if (db->blk == 0 && !inline_data_size) {
> if (allocate_dir_block(ctx, db, buf, &cd->pctx))
> return 0;
> diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c
> index 22a58f3..29d5a54 100644
> --- a/e2fsck/rehash.c
> +++ b/e2fsck/rehash.c
> @@ -603,6 +603,42 @@ static struct ext2_dx_entry *set_int_node(ext2_filsys fs, char *buf)
> return (struct ext2_dx_entry *) limits;
> }
>
> +static int alloc_blocks(ext2_filsys fs,
> + struct ext2_dx_countlimit **limit,
> + struct ext2_dx_entry **prev_ent,
> + struct ext2_dx_entry **next_ent,
> + int *prev_offset, int *next_offset,
> + struct out_dir *outdir, int i,
> + int *prev_count, int *next_count)
> +{
> + errcode_t retval;
> + char *block_start;
> +
> + if (*limit)
> + (*limit)->limit = (*limit)->count =
> + ext2fs_cpu_to_le16((*limit)->limit);
> + *prev_ent = (struct ext2_dx_entry *) (outdir->buf + *prev_offset);
> + (*prev_ent)->block = ext2fs_cpu_to_le32(outdir->num);
> +
> + if (i != 1)
> + (*prev_ent)->hash =
> + ext2fs_cpu_to_le32(outdir->hashes[i]);
> +
> + retval = get_next_block(fs, outdir, &block_start);
> + if (retval)
> + return retval;
> +
> + *next_ent = set_int_node(fs, block_start);
> + *limit = (struct ext2_dx_countlimit *)(*next_ent);
> + if (next_offset)
> + *next_offset = ((char *) *next_ent - outdir->buf);
> +
> + *next_count = (*limit)->limit;
> + (*prev_offset) += sizeof(struct ext2_dx_entry);
> + (*prev_count)--;
> +return 0;

(style) blank line before return
(style) indent return

> +}
> +
> /*
> * This function takes the leaf nodes which have been written in
> * outdir, and populates the root node and any necessary interior nodes.
> @@ -612,13 +648,13 @@ static errcode_t calculate_tree(ext2_filsys fs,
> ext2_ino_t ino,
> ext2_ino_t parent)
> {
> - struct ext2_dx_root_info *root_info;
> - struct ext2_dx_entry *root, *dx_ent = 0;
> - struct ext2_dx_countlimit *root_limit, *limit;
> + struct ext2_dx_root_info *root_info;
> + struct ext2_dx_entry *root, *int_ent, *dx_ent = 0;
> + struct ext2_dx_countlimit *root_limit, *int_limit, *limit;
> errcode_t retval;
> char * block_start;
> - int i, c1, c2, nblks;
> - int limit_offset, root_offset;
> + int i, c1, c2, c3, nblks;
> + int limit_offset, int_offset, root_offset;
>
> root_info = set_root_node(fs, outdir->buf, ino, parent);
> root_offset = limit_offset = ((char *) root_info - outdir->buf) +
> @@ -628,7 +664,7 @@ static errcode_t calculate_tree(ext2_filsys fs,
> nblks = outdir->num;
>
> /* Write out the pointer blocks */
> - if (nblks-1 <= c1) {
> + if (nblks - 1 <= c1) {
> /* Just write out the root block, and we're done */
> root = (struct ext2_dx_entry *) (outdir->buf + root_offset);
> for (i=1; i < nblks; i++) {
> @@ -639,31 +675,23 @@ static errcode_t calculate_tree(ext2_filsys fs,
> root++;
> c1--;
> }
> - } else {
> + } else if (nblks - 1 <= ext2fs_htree_intnode_maxrecs(fs, c1)) {
> c2 = 0;
> - limit = 0;
> + limit = NULL;
> root_info->indirect_levels = 1;
> for (i=1; i < nblks; i++) {
> - if (c1 == 0)
> + if (c2 == 0 && c1 == 0)
> return ENOSPC;
> if (c2 == 0) {
> - if (limit)
> - limit->limit = limit->count =
> - ext2fs_cpu_to_le16(limit->limit);
> - root = (struct ext2_dx_entry *)
> - (outdir->buf + root_offset);
> - root->block = ext2fs_cpu_to_le32(outdir->num);
> - if (i != 1)
> - root->hash =
> - ext2fs_cpu_to_le32(outdir->hashes[i]);
> - if ((retval = get_next_block(fs, outdir,
> - &block_start)))
> + retval = alloc_blocks(fs, &limit,
> + &root,
> + &dx_ent,
> + &root_offset,
> + NULL,
> + outdir, i,
> + &c1, &c2);
> + if (retval)
> return retval;
> - dx_ent = set_int_node(fs, block_start);
> - limit = (struct ext2_dx_countlimit *) dx_ent;
> - c2 = limit->limit;
> - root_offset += sizeof(struct ext2_dx_entry);
> - c1--;
> }
> dx_ent->block = ext2fs_cpu_to_le32(i);
> if (c2 != limit->limit)
> @@ -674,6 +702,51 @@ static errcode_t calculate_tree(ext2_filsys fs,
> }
> limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
> limit->limit = ext2fs_cpu_to_le16(limit->limit);
> + } else {
> + c2 = 0;
> + c3 = 0;
> + limit = NULL;
> + int_limit = 0;
> + root_info->indirect_levels = 2;
> + for (i = 1; i < nblks; i++) {
> + if (c3 == 0 && c2 == 0 && c1 == 0)
> + return ENOSPC;
> + if (c3 == 0 && c2 == 0) {
> + retval = alloc_blocks(fs, &int_limit,
> + &root,

(style) continued lines should be aligned after '(' on previous line
(style) arguments should be packed onto lines as many as will fit

> + &int_ent,
> + &root_offset,
> + &int_offset,
> + outdir, i,
> + &c1, &c2);
> + if (retval)
> + return retval;
> + }
> + if (c3 == 0) {
> + retval = alloc_blocks(fs, &limit,

...

> + &int_ent,
> + &dx_ent,
> + &int_offset,
> + NULL,
> + outdir, i,
> + &c2, &c3);
> + if (retval)
> + return retval;
> +
> + }
> + dx_ent->block = ext2fs_cpu_to_le32(i);
> + if (c3 != limit->limit)
> + dx_ent->hash =
> + ext2fs_cpu_to_le32(outdir->hashes[i]);
> + dx_ent++;
> + c3--;
> + }
> + int_limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
> + int_limit->limit = ext2fs_cpu_to_le16(limit->limit);
> +
> + limit->count = ext2fs_cpu_to_le16(limit->limit - c3);
> + limit->limit = ext2fs_cpu_to_le16(limit->limit);
> +
> }
> root_limit = (struct ext2_dx_countlimit *) (outdir->buf + limit_offset);
> root_limit->count = ext2fs_cpu_to_le16(root_limit->limit - c1);
> diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
> index d714b44..79698ce 100644
> --- a/lib/ext2fs/ext2fs.h
> +++ b/lib/ext2fs/ext2fs.h
> @@ -1943,6 +1943,11 @@ _INLINE_ unsigned int ext2_dir_htree_level(ext2_filsys fs)
> return EXT4_HTREE_LEVEL_COMPAT;
> }
>
> +_INLINE_ int ext2fs_htree_intnode_maxrecs(ext2_filsys fs, int blocks)
> +{
> + return blocks * ((fs->blocksize - 8) / sizeof(struct ext2_dx_entry));
> +}
> +
> /*
> * This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b)
> */
> --
> 1.7.1
>


Cheers, Andreas






Attachments:
signature.asc (195.00 B)
Message signed with OpenPGP

2017-02-14 20:40:39

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] tests: 3 level hash tree test

On Feb 13, 2017, at 2:20 AM, Artem Blagodarenko <[email protected]> wrote:
>
> From: Artem Blagodarenko <[email protected]>
>
> Test is added that recreate directory (-fD fsck option)
> with 47.5k of 255-symbol name files. This amount of files
> can not be stored only in 2 hevel htree, so 3 levels are used.
>
> Signed-off-by: Artem Blagodarenko <[email protected]>
> ---
> tests/f_large_dir/debugfs_script | 15 +++++++++++++++
> tests/f_large_dir/expect | 12 ++++++++++++
> tests/f_large_dir/name | 1 +
> tests/f_large_dir/script | 28 ++++++++++++++++++++++++++++
> 4 files changed, 56 insertions(+), 0 deletions(-)
>
> diff --git a/tests/f_large_dir/debugfs_script b/tests/f_large_dir/debugfs_script
> new file mode 100755
> index 0000000..b869db5
> --- /dev/null
> +++ b/tests/f_large_dir/debugfs_script

Instead of making a separate script to supply the commands to debugfs,
this could be part of the test script and then just piped into debugfs?
That keeps all the logic in one place in the test script and makes it
easier to see what the test is doing.

> @@ -0,0 +1,15 @@
> +#!/bin/bash
> +echo "feature large_dir"
> +echo "mkdir /foo"
> +echo "cd /foo"
> +touch foofile
> +echo "write foofile foofile"
> +for i in $(seq 47300); do

(style) "for ((i = 0; i < 47300; i++)); do" avoids an external shell command

It might be useful to show how "47300" is derived, something like:

NAMELEN=255
DIRENT_SZ=8
BLOCKSZ=1024
DIRENT_PER_LEAF=$(((BLOCKSZ / (NAMELEN + DIRENT_SZ)))
HEADER=32
INDEX_SZ=8
INDEX_L1=$(((BLOCKSZ - HEADER) / INDEX_SZ))
INDEX_L2=$(((BLOCKSZ - DIRENT_SZ) / INDEX_SZ))
ENTRIES=$((INDEX_L1 * INDEX_L2 * DIRENT_PER_LEAF))

which gives 47244 entries to overflow the L2 htree.

> + [[ $(( $i % 3 )) -eq 0 ]] && \

(style) no need for '$' inside $((...))

> + echo "expand ./"

(style) no need to continue this line

> + [[ $(( $i % 5000 )) -eq 0 ]] && \
> + >&2 echo "$i processed"

(style) same two as above

> + new_uuid=`printf %0255X $i`

(style) prefer $(...) as used above
(style) not sure I'd call this a "uuid", maybe just "filename"? Or maybe
just get rid of echo and "new_uuid" entirely and use:

printf "ln foofile %0255X\n" $i

> + echo "ln foofile $new_uuid"
> +done
> +
> diff --git a/tests/f_large_dir/expect b/tests/f_large_dir/expect
> new file mode 100644
> index 0000000..9c94675
> --- /dev/null
> +++ b/tests/f_large_dir/expect
> @@ -0,0 +1,12 @@
> +Pass 1: Checking inodes, blocks, and sizes
> +Pass 2: Checking directory structure
> +Pass 3: Checking directory connectivity
> +Pass 3A: Optimizing directories
> +Pass 4: Checking reference counts
> +Inode 13 ref count is 1, should be 47301. Fix? yes
> +
> +Pass 5: Checking group summary information
> +
> +test.img: ***** FILE SYSTEM WAS MODIFIED *****
> +test.img: 13/115368 files (0.0% non-contiguous), 32839/460800 blocks
> +Exit status is 1
> diff --git a/tests/f_large_dir/name b/tests/f_large_dir/name
> new file mode 100644
> index 0000000..4b96890
> --- /dev/null
> +++ b/tests/f_large_dir/name
> @@ -0,0 +1 @@
> +optimize 3 level htree directories
> diff --git a/tests/f_large_dir/script b/tests/f_large_dir/script
> new file mode 100644
> index 0000000..25983c2
> --- /dev/null
> +++ b/tests/f_large_dir/script
> @@ -0,0 +1,28 @@
> +OUT=$test_name.log
> +EXP=$test_dir/expect
> +DFSCRIPT=$test_dir/debugfs_script
> +E2FSCK=../e2fsck/e2fsck
> +
> +TMPFILE2=/tmp/image

Why not use the existing $TMPFILE? That is sure to be in the right location,
while hard-coding "/tmp/image" can fail for various reasons (e.g. /tmp is
too small, another test is using /tmp/image when running tests in parallel,
or worse it is some important file).

> +cp /dev/null $OUT
> +$MKE2FS -b 1024 -O large_dir,uninit_bg,dir_nlink -F $TMPFILE2 460800 > /dev/null
> +$DFSCRIPT | $DEBUGFS -w -f /dev/stdin $TMPFILE2 > /dev/null

Something like:

{
echo "feature large_dir"
echo "mkdir /foo"
echo "cd /foo"
touch foofile
echo "write foofile foofile"
for ((i = 0; i < $ENTRIES; i++)); do
[[ $((i % 3)) == 0 ]] && echo "expand ./"
[[ $((i % 5000)) == 0 ]] && echo "$i processed" 1>&2
printf "ln foofile %0255X\n" $i
done
} | $DEBUGFS -w -f /dev/stdin $TMPFILE2 > /dev/null

> +$E2FSCK -yfD $TMPFILE2 > $OUT.new 2>&1
> +status=$?
> +echo Exit status is $status >> $OUT.new
> +sed -f $cmd_dir/filter.sed -e "s;$TMPFILE2;test.img;" $OUT.new >> $OUT
> +rm -f $OUT.new
> +
> +cmp -s $OUT $EXP
> +RC=$?
> +if [ $RC -eq 0 ]; then
> + echo "$test_name: $test_description: ok"
> + touch $test_name.ok
> +else
> + echo "$test_name: $test_description: failed"
> + diff -u $EXP $OUT > $test_name.failed
> +fi
> +
> +
> +
> --
> 1.7.1
>


Cheers, Andreas






Attachments:
signature.asc (195.00 B)
Message signed with OpenPGP

2017-02-15 05:42:53

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH v2 2/4] e2fsprogs: add support for 3-level htree

On Mon, Feb 13, 2017 at 12:20:15PM +0300, Artem Blagodarenko wrote:
> From: Artem Blagodarenko <[email protected]>
>
> The INCOMPAT_LARGEDIR feature allows larger directories to
> be created, both with directory sizes over 2GB and and a
> maximum htree depth of 3 instead of the current limit of 2.
> These features are needed in order to exceed the currently
> limit of approximately 10M entries in a single directory.
>
> debugfs, e2fsck, ext2fs, mke2fs and tune2fs support is
> added.
>
> Signed-off-by: Alexey Lyashkov <[email protected]>
> Signed-off-by: Artem Blagodarenko <[email protected]>
> ---
> e2fsck/pass1.c | 5 +++--
> e2fsck/pass2.c | 5 +++--
> lib/ext2fs/ext2_fs.h | 3 ++-
> lib/ext2fs/ext2fs.h | 21 ++++++++++++++++++++-
> misc/mke2fs.c | 3 ++-
> misc/tune2fs.c | 3 ++-
> 6 files changed, 32 insertions(+), 8 deletions(-)
>
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index ce37176..fff7dcf 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -1716,7 +1716,8 @@ void e2fsck_pass1(e2fsck_t ctx)
> }
>
> if (inode->i_faddr || frag || fsize ||
> - (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high))
> + (!ext2fs_has_feature_large_dir(fs) &&
> + (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high)))
> mark_inode_bad(ctx, ino);
> if ((fs->super->s_creator_os != EXT2_OS_HURD) &&
> !ext2fs_has_feature_64bit(fs->super) &&
> @@ -2469,7 +2470,7 @@ static int handle_htree(e2fsck_t ctx, struct problem_context *pctx,
> return 1;
>
> pctx->num = root->indirect_levels;
> - if ((root->indirect_levels > 1) &&
> + if ((root->indirect_levels > ext2_dir_htree_level(fs)) &&
> fix_problem(ctx, PR_1_HTREE_DEPTH, pctx))
> return 1;
>
> diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
> index b89ebc9..139d48f 100644
> --- a/e2fsck/pass2.c
> +++ b/e2fsck/pass2.c
> @@ -1058,7 +1058,8 @@ inline_read_fail:
> dx_db->flags |= DX_FLAG_FIRST | DX_FLAG_LAST;
> if ((root->reserved_zero ||
> root->info_length < 8 ||
> - root->indirect_levels > 1) &&
> + root->indirect_levels
> + > ext2_dir_htree_level(fs)) &&
> fix_problem(ctx, PR_2_HTREE_BAD_ROOT, &cd->pctx)) {
> clear_htree(ctx, ino);
> dx_dir->numblocks = 0;
> @@ -1811,7 +1812,7 @@ int e2fsck_process_bad_inode(e2fsck_t ctx, ext2_ino_t dir,
> } else
> not_fixed++;
> }
> - if (inode.i_size_high &&
> + if (inode.i_size_high && !ext2fs_has_feature_large_dir(fs) &&
> LINUX_S_ISDIR(inode.i_mode)) {
> if (fix_problem(ctx, PR_2_DIR_SIZE_HIGH_ZERO, &pctx)) {
> inode.i_size_high = 0;
> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> index 195e366..6d9a5d0 100644
> --- a/lib/ext2fs/ext2_fs.h
> +++ b/lib/ext2fs/ext2_fs.h
> @@ -921,7 +921,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, 4, ENCRYPT)
>
> #define EXT2_FEATURE_COMPAT_SUPP 0
> #define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \
> - EXT4_FEATURE_INCOMPAT_MMP)
> + EXT4_FEATURE_INCOMPAT_MMP|\
> + EXT4_FEATURE_INCOMPAT_LARGEDIR)
> #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
> EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
> EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
> diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
> index 786ded8..d714b44 100644
> --- a/lib/ext2fs/ext2fs.h
> +++ b/lib/ext2fs/ext2fs.h
> @@ -588,7 +588,8 @@ typedef struct ext2_icount *ext2_icount_t;
> EXT4_FEATURE_INCOMPAT_64BIT|\
> EXT4_FEATURE_INCOMPAT_INLINE_DATA|\
> EXT4_FEATURE_INCOMPAT_ENCRYPT|\
> - EXT4_FEATURE_INCOMPAT_CSUM_SEED)
> + EXT4_FEATURE_INCOMPAT_CSUM_SEED|\
> + EXT4_FEATURE_INCOMPAT_LARGEDIR)
>
> #define EXT2_LIB_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|\
> @@ -1924,6 +1925,24 @@ _INLINE_ blk_t ext2fs_inode_data_blocks(ext2_filsys fs,
> return (blk_t) ext2fs_inode_data_blocks2(fs, inode);
> }
>
> +/* htree levels for ext4 */
> +#define EXT4_HTREE_LEVEL_COMPAT 1
> +#define EXT4_HTREE_LEVEL 3
> +
> +_INLINE_ int ext2fs_has_feature_large_dir(ext2_filsys fs)
> +{
> + return EXT2_HAS_INCOMPAT_FEATURE(fs->super,
> + EXT4_FEATURE_INCOMPAT_LARGEDIR);
> +}

Already provided, see line 955:

EXT4_FEATURE_INCOMPAT_FUNCS(largedir, 4, LARGEDIR)

defines ext2fs_has_feature_largedir().

The rest looks ok.

--D

> +
> +_INLINE_ unsigned int ext2_dir_htree_level(ext2_filsys fs)
> +{
> + if (ext2fs_has_feature_large_dir(fs))
> + return EXT4_HTREE_LEVEL;
> +
> + return EXT4_HTREE_LEVEL_COMPAT;
> +}
> +
> /*
> * This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b)
> */
> diff --git a/misc/mke2fs.c b/misc/mke2fs.c
> index 9f18c83..b2bf461 100644
> --- a/misc/mke2fs.c
> +++ b/misc/mke2fs.c
> @@ -1081,7 +1081,8 @@ static __u32 ok_features[3] = {
> EXT4_FEATURE_INCOMPAT_64BIT|
> EXT4_FEATURE_INCOMPAT_INLINE_DATA|
> EXT4_FEATURE_INCOMPAT_ENCRYPT |
> - EXT4_FEATURE_INCOMPAT_CSUM_SEED,
> + EXT4_FEATURE_INCOMPAT_CSUM_SEED |
> + EXT4_FEATURE_INCOMPAT_LARGEDIR,
> /* R/O compat */
> EXT2_FEATURE_RO_COMPAT_LARGE_FILE|
> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
> diff --git a/misc/tune2fs.c b/misc/tune2fs.c
> index 6239577..f78d105 100644
> --- a/misc/tune2fs.c
> +++ b/misc/tune2fs.c
> @@ -156,7 +156,8 @@ static __u32 ok_features[3] = {
> EXT4_FEATURE_INCOMPAT_MMP |
> EXT4_FEATURE_INCOMPAT_64BIT |
> EXT4_FEATURE_INCOMPAT_ENCRYPT |
> - EXT4_FEATURE_INCOMPAT_CSUM_SEED,
> + EXT4_FEATURE_INCOMPAT_CSUM_SEED |
> + EXT4_FEATURE_INCOMPAT_LARGEDIR,
> /* R/O compat */
> EXT2_FEATURE_RO_COMPAT_LARGE_FILE |
> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
> --
> 1.7.1
>

2017-02-15 13:54:48

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v3 3/4] e2fsck: 3 level hash tree directory optimization

From: Artem Blagodarenko <[email protected]>

e2fsck fix for partitions with 3 level hash directries.
Additional level is added to e2fsck -D codepath.

Signed-off-by: Artem Blagodarenko <[email protected]>
---
debugfs/htree.c | 3 +-
e2fsck/e2fsck.h | 1 +
e2fsck/pass2.c | 68 ++++++++++++++++++++++--------
e2fsck/rehash.c | 115 ++++++++++++++++++++++++++++++++++++++++-----------
lib/ext2fs/ext2fs.h | 5 ++
5 files changed, 148 insertions(+), 44 deletions(-)

diff --git a/debugfs/htree.c b/debugfs/htree.c
index 54e55e2..8c18666 100644
--- a/debugfs/htree.c
+++ b/debugfs/htree.c
@@ -287,7 +287,8 @@ void do_htree_dump(int argc, char *argv[])
fprintf(pager, "\t Indirect levels: %d\n", rootnode->indirect_levels);
fprintf(pager, "\t Flags: %d\n", rootnode->unused_flags);

- ent = (struct ext2_dx_entry *) (buf + 24 + rootnode->info_length);
+ ent = (struct ext2_dx_entry *)
+ ((char *)rootnode + rootnode->info_length);

htree_dump_int_node(current_fs, ino, &inode, rootnode, ent,
buf + current_fs->blocksize,
diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index f356810..a4efbdf 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -122,6 +122,7 @@ struct dx_dirblock_info {
blk64_t phys;
int flags;
blk64_t parent;
+ blk64_t previous;
ext2_dirhash_t min_hash;
ext2_dirhash_t max_hash;
ext2_dirhash_t node_min_hash;
diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
index 139d48f..2e2c721 100644
--- a/e2fsck/pass2.c
+++ b/e2fsck/pass2.c
@@ -85,6 +85,39 @@ struct check_dir_struct {
unsigned long long next_ra_off;
};

+static void update_parents(struct dx_dir_info *dx_dir, int type)
+{
+ struct dx_dirblock_info *dx_db, *dx_parent, *dx_previous;
+ int b;
+
+ for (b = 0, dx_db = dx_dir->dx_block;
+ b < dx_dir->numblocks;
+ b++, dx_db++) {
+ dx_parent = &dx_dir->dx_block[dx_db->parent];
+ if (dx_db->type != type)
+ continue;
+
+ /*
+ * XXX Make sure dx_parent->min_hash > dx_db->min_hash
+ */
+ if (dx_db->flags & DX_FLAG_FIRST) {
+ dx_parent->min_hash = dx_db->min_hash;
+ if (dx_parent->previous) {
+ dx_previous =
+ &dx_dir->dx_block[dx_parent->previous];
+ dx_previous->node_max_hash =
+ dx_parent->min_hash;
+ }
+ }
+ /*
+ * XXX Make sure dx_parent->max_hash < dx_db->max_hash
+ */
+ if (dx_db->flags & DX_FLAG_LAST) {
+ dx_parent->max_hash = dx_db->max_hash;
+ }
+ }
+}
+
void e2fsck_pass2(e2fsck_t ctx)
{
struct ext2_super_block *sb = ctx->fs->super;
@@ -182,24 +215,11 @@ void e2fsck_pass2(e2fsck_t ctx)
* Find all of the first and last leaf blocks, and
* update their parent's min and max hash values
*/
- for (b=0, dx_db = dx_dir->dx_block;
- b < dx_dir->numblocks;
- b++, dx_db++) {
- if ((dx_db->type != DX_DIRBLOCK_LEAF) ||
- !(dx_db->flags & (DX_FLAG_FIRST | DX_FLAG_LAST)))
- continue;
- dx_parent = &dx_dir->dx_block[dx_db->parent];
- /*
- * XXX Make sure dx_parent->min_hash > dx_db->min_hash
- */
- if (dx_db->flags & DX_FLAG_FIRST)
- dx_parent->min_hash = dx_db->min_hash;
- /*
- * XXX Make sure dx_parent->max_hash < dx_db->max_hash
- */
- if (dx_db->flags & DX_FLAG_LAST)
- dx_parent->max_hash = dx_db->max_hash;
- }
+ update_parents(dx_dir, DX_DIRBLOCK_LEAF);
+
+ /* for 3 level htree: update 2 level parent's min
+ * and max hash values */
+ update_parents(dx_dir, DX_DIRBLOCK_NODE);

for (b=0, dx_db = dx_dir->dx_block;
b < dx_dir->numblocks;
@@ -642,6 +662,10 @@ static void parse_int_node(ext2_filsys fs,
dx_db->flags |= DX_FLAG_REFERENCED;
dx_db->parent = db->blockcnt;
}
+
+ dx_db->previous =
+ i ? ext2fs_le32_to_cpu(ent[i-1].block & 0x0ffffff) : 0;
+
if (hash < min_hash)
min_hash = hash;
if (hash > max_hash)
@@ -949,6 +973,14 @@ static int check_dir_block(ext2_filsys fs,
return DIRENT_ABORT;
}

+ /* This will allow (at some point in the future) to punch out empty
+ * directory blocks and reduce the space used by a directory that grows
+ * very large and then the files are deleted. For now, all that is
+ * needed is to avoid e2fsck filling in these holes as part of
+ * feature flag. */
+ if (db->blk == 0 && ext2fs_has_feature_large_dir(fs))
+ return 0;
+
if (db->blk == 0 && !inline_data_size) {
if (allocate_dir_block(ctx, db, buf, &cd->pctx))
return 0;
diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c
index 22a58f3..7dcb386 100644
--- a/e2fsck/rehash.c
+++ b/e2fsck/rehash.c
@@ -603,6 +603,43 @@ static struct ext2_dx_entry *set_int_node(ext2_filsys fs, char *buf)
return (struct ext2_dx_entry *) limits;
}

+static int alloc_blocks(ext2_filsys fs,
+ struct ext2_dx_countlimit **limit,
+ struct ext2_dx_entry **prev_ent,
+ struct ext2_dx_entry **next_ent,
+ int *prev_offset, int *next_offset,
+ struct out_dir *outdir, int i,
+ int *prev_count, int *next_count)
+{
+ errcode_t retval;
+ char *block_start;
+
+ if (*limit)
+ (*limit)->limit = (*limit)->count =
+ ext2fs_cpu_to_le16((*limit)->limit);
+ *prev_ent = (struct ext2_dx_entry *) (outdir->buf + *prev_offset);
+ (*prev_ent)->block = ext2fs_cpu_to_le32(outdir->num);
+
+ if (i != 1)
+ (*prev_ent)->hash =
+ ext2fs_cpu_to_le32(outdir->hashes[i]);
+
+ retval = get_next_block(fs, outdir, &block_start);
+ if (retval)
+ return retval;
+
+ *next_ent = set_int_node(fs, block_start);
+ *limit = (struct ext2_dx_countlimit *)(*next_ent);
+ if (next_offset)
+ *next_offset = ((char *) *next_ent - outdir->buf);
+
+ *next_count = (*limit)->limit;
+ (*prev_offset) += sizeof(struct ext2_dx_entry);
+ (*prev_count)--;
+
+ return 0;
+}
+
/*
* This function takes the leaf nodes which have been written in
* outdir, and populates the root node and any necessary interior nodes.
@@ -612,13 +649,13 @@ static errcode_t calculate_tree(ext2_filsys fs,
ext2_ino_t ino,
ext2_ino_t parent)
{
- struct ext2_dx_root_info *root_info;
- struct ext2_dx_entry *root, *dx_ent = 0;
- struct ext2_dx_countlimit *root_limit, *limit;
+ struct ext2_dx_root_info *root_info;
+ struct ext2_dx_entry *root, *int_ent, *dx_ent = 0;
+ struct ext2_dx_countlimit *root_limit, *int_limit, *limit;
errcode_t retval;
char * block_start;
- int i, c1, c2, nblks;
- int limit_offset, root_offset;
+ int i, c1, c2, c3, nblks;
+ int limit_offset, int_offset, root_offset;

root_info = set_root_node(fs, outdir->buf, ino, parent);
root_offset = limit_offset = ((char *) root_info - outdir->buf) +
@@ -628,7 +665,7 @@ static errcode_t calculate_tree(ext2_filsys fs,
nblks = outdir->num;

/* Write out the pointer blocks */
- if (nblks-1 <= c1) {
+ if (nblks - 1 <= c1) {
/* Just write out the root block, and we're done */
root = (struct ext2_dx_entry *) (outdir->buf + root_offset);
for (i=1; i < nblks; i++) {
@@ -639,31 +676,20 @@ static errcode_t calculate_tree(ext2_filsys fs,
root++;
c1--;
}
- } else {
+ } else if (nblks - 1 <= ext2fs_htree_intnode_maxrecs(fs, c1)) {
c2 = 0;
- limit = 0;
+ limit = NULL;
root_info->indirect_levels = 1;
for (i=1; i < nblks; i++) {
- if (c1 == 0)
+ if (c2 == 0 && c1 == 0)
return ENOSPC;
if (c2 == 0) {
- if (limit)
- limit->limit = limit->count =
- ext2fs_cpu_to_le16(limit->limit);
- root = (struct ext2_dx_entry *)
- (outdir->buf + root_offset);
- root->block = ext2fs_cpu_to_le32(outdir->num);
- if (i != 1)
- root->hash =
- ext2fs_cpu_to_le32(outdir->hashes[i]);
- if ((retval = get_next_block(fs, outdir,
- &block_start)))
+ retval = alloc_blocks(fs, &limit, &root,
+ &dx_ent, &root_offset,
+ NULL, outdir, i, &c1,
+ &c2);
+ if (retval)
return retval;
- dx_ent = set_int_node(fs, block_start);
- limit = (struct ext2_dx_countlimit *) dx_ent;
- c2 = limit->limit;
- root_offset += sizeof(struct ext2_dx_entry);
- c1--;
}
dx_ent->block = ext2fs_cpu_to_le32(i);
if (c2 != limit->limit)
@@ -674,6 +700,45 @@ static errcode_t calculate_tree(ext2_filsys fs,
}
limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
limit->limit = ext2fs_cpu_to_le16(limit->limit);
+ } else {
+ c2 = 0;
+ c3 = 0;
+ limit = NULL;
+ int_limit = 0;
+ root_info->indirect_levels = 2;
+ for (i = 1; i < nblks; i++) {
+ if (c3 == 0 && c2 == 0 && c1 == 0)
+ return ENOSPC;
+ if (c3 == 0 && c2 == 0) {
+ retval = alloc_blocks(fs, &int_limit, &root,
+ &int_ent, &root_offset,
+ &int_offset, outdir, i,
+ &c1, &c2);
+ if (retval)
+ return retval;
+ }
+ if (c3 == 0) {
+ retval = alloc_blocks(fs, &limit, &int_ent,
+ &dx_ent, &int_offset,
+ NULL, outdir, i, &c2,
+ &c3);
+ if (retval)
+ return retval;
+
+ }
+ dx_ent->block = ext2fs_cpu_to_le32(i);
+ if (c3 != limit->limit)
+ dx_ent->hash =
+ ext2fs_cpu_to_le32(outdir->hashes[i]);
+ dx_ent++;
+ c3--;
+ }
+ int_limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
+ int_limit->limit = ext2fs_cpu_to_le16(limit->limit);
+
+ limit->count = ext2fs_cpu_to_le16(limit->limit - c3);
+ limit->limit = ext2fs_cpu_to_le16(limit->limit);
+
}
root_limit = (struct ext2_dx_countlimit *) (outdir->buf + limit_offset);
root_limit->count = ext2fs_cpu_to_le16(root_limit->limit - c1);
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index d714b44..79698ce 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1943,6 +1943,11 @@ _INLINE_ unsigned int ext2_dir_htree_level(ext2_filsys fs)
return EXT4_HTREE_LEVEL_COMPAT;
}

+_INLINE_ int ext2fs_htree_intnode_maxrecs(ext2_filsys fs, int blocks)
+{
+ return blocks * ((fs->blocksize - 8) / sizeof(struct ext2_dx_entry));
+}
+
/*
* This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b)
*/
--
1.7.1

2017-02-15 15:45:53

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v3 4/4] tests: 3 level hash tree test

From: Artem Blagodarenko <[email protected]>

Test is added that recreate directory (-fD fsck option)
with 47.5k of 255-symbol name files. This amount of files
can not be stored only in 2 hevel htree, so 3 levels are used.

Signed-off-by: Artem Blagodarenko <[email protected]>
---
tests/f_large_dir/expect | 12 +++++++++++
tests/f_large_dir/name | 1 +
tests/f_large_dir/script | 47 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 60 insertions(+), 0 deletions(-)

diff --git a/tests/f_large_dir/expect b/tests/f_large_dir/expect
new file mode 100644
index 0000000..b099460
--- /dev/null
+++ b/tests/f_large_dir/expect
@@ -0,0 +1,12 @@
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 3A: Optimizing directories
+Pass 4: Checking reference counts
+Inode 13 ref count is 1, should be 47245. Fix? yes
+
+Pass 5: Checking group summary information
+
+test.img: ***** FILE SYSTEM WAS MODIFIED *****
+test.img: 13/115368 files (0.0% non-contiguous), 32817/460800 blocks
+Exit status is 1
diff --git a/tests/f_large_dir/name b/tests/f_large_dir/name
new file mode 100644
index 0000000..4b96890
--- /dev/null
+++ b/tests/f_large_dir/name
@@ -0,0 +1 @@
+optimize 3 level htree directories
diff --git a/tests/f_large_dir/script b/tests/f_large_dir/script
new file mode 100644
index 0000000..e68576d
--- /dev/null
+++ b/tests/f_large_dir/script
@@ -0,0 +1,47 @@
+OUT=$test_name.log
+EXP=$test_dir/expect
+E2FSCK=../e2fsck/e2fsck
+
+NAMELEN=255
+DIRENT_SZ=8
+BLOCKSZ=1024
+DIRENT_PER_LEAF=$((BLOCKSZ / (NAMELEN + DIRENT_SZ)))
+HEADER=32
+INDEX_SZ=8
+INDEX_L1=$(((BLOCKSZ - HEADER) / INDEX_SZ))
+INDEX_L2=$(((BLOCKSZ - DIRENT_SZ) / INDEX_SZ))
+ENTRIES=$((INDEX_L1 * INDEX_L2 * DIRENT_PER_LEAF))
+
+cp /dev/null $OUT
+$MKE2FS -b 1024 -O large_dir,uninit_bg,dir_nlink -F $TMPFILE 460800 > /dev/null
+{
+ echo "feature large_dir"
+ echo "mkdir /foo"
+ echo "cd /foo"
+ touch foofile
+ echo "write foofile foofile"
+ for ((i = 0; i < $ENTRIES; i++)); do
+ [[ $(( i % DIRENT_PER_LEAF )) -eq 0 ]] && echo "expand ./"
+ [[ $(( i % 5000 )) -eq 0 ]] && >&2 echo "$i processed"
+ printf "ln foofile %0255X\n" $i
+ done
+} | $DEBUGFS -w -f /dev/stdin $TMPFILE > /dev/null
+
+$E2FSCK -yfD $TMPFILE > $OUT.new 2>&1
+status=$?
+echo Exit status is $status >> $OUT.new
+sed -f $cmd_dir/filter.sed -e "s;$TMPFILE;test.img;" $OUT.new >> $OUT
+rm -f $OUT.new
+
+cmp -s $OUT $EXP
+RC=$?
+if [ $RC -eq 0 ]; then
+ echo "$test_name: $test_description: ok"
+ touch $test_name.ok
+else
+ echo "$test_name: $test_description: failed"
+ diff -u $EXP $OUT > $test_name.failed
+fi
+
+
+
--
1.7.1

2017-02-15 17:43:21

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v3 2/4] e2fsprogs: add support for 3-level htree

From: Artem Blagodarenko <[email protected]>

The INCOMPAT_LARGEDIR feature allows larger directories to
be created, both with directory sizes over 2GB and and a
maximum htree depth of 3 instead of the current limit of 2.
These features are needed in order to exceed the currently
limit of approximately 10M entries in a single directory
for 4KB blocksize (~100k for 1KB).

debugfs, e2fsck, ext2fs, mke2fs and tune2fs support is
added.

Signed-off-by: Alexey Lyashkov <[email protected]>
Signed-off-by: Artem Blagodarenko <[email protected]>
---
e2fsck/pass1.c | 5 +++--
e2fsck/pass2.c | 5 +++--
lib/ext2fs/ext2_fs.h | 3 ++-
lib/ext2fs/ext2fs.h | 15 ++++++++++++++-
misc/mke2fs.c | 3 ++-
misc/tune2fs.c | 3 ++-
6 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index ce37176..11546d7 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -1716,7 +1716,8 @@ void e2fsck_pass1(e2fsck_t ctx)
}

if (inode->i_faddr || frag || fsize ||
- (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high))
+ (!ext2fs_has_feature_largedir(fs->super) &&
+ (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high)))
mark_inode_bad(ctx, ino);
if ((fs->super->s_creator_os != EXT2_OS_HURD) &&
!ext2fs_has_feature_64bit(fs->super) &&
@@ -2469,7 +2470,7 @@ static int handle_htree(e2fsck_t ctx, struct problem_context *pctx,
return 1;

pctx->num = root->indirect_levels;
- if ((root->indirect_levels > 1) &&
+ if ((root->indirect_levels > ext2_dir_htree_level(fs)) &&
fix_problem(ctx, PR_1_HTREE_DEPTH, pctx))
return 1;

diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
index b89ebc9..2f41fc4 100644
--- a/e2fsck/pass2.c
+++ b/e2fsck/pass2.c
@@ -1058,7 +1058,8 @@ inline_read_fail:
dx_db->flags |= DX_FLAG_FIRST | DX_FLAG_LAST;
if ((root->reserved_zero ||
root->info_length < 8 ||
- root->indirect_levels > 1) &&
+ root->indirect_levels >=
+ ext2_dir_htree_level(fs)) &&
fix_problem(ctx, PR_2_HTREE_BAD_ROOT, &cd->pctx)) {
clear_htree(ctx, ino);
dx_dir->numblocks = 0;
@@ -1811,7 +1812,7 @@ int e2fsck_process_bad_inode(e2fsck_t ctx, ext2_ino_t dir,
} else
not_fixed++;
}
- if (inode.i_size_high &&
+ if (inode.i_size_high && !ext2fs_has_feature_largedir(fs->super) &&
LINUX_S_ISDIR(inode.i_mode)) {
if (fix_problem(ctx, PR_2_DIR_SIZE_HIGH_ZERO, &pctx)) {
inode.i_size_high = 0;
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 195e366..66b7058 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -921,7 +921,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, 4, ENCRYPT)

#define EXT2_FEATURE_COMPAT_SUPP 0
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \
- EXT4_FEATURE_INCOMPAT_MMP)
+ EXT4_FEATURE_INCOMPAT_MMP| \
+ EXT4_FEATURE_INCOMPAT_LARGEDIR)
#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 786ded8..c68be50 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -588,7 +588,8 @@ typedef struct ext2_icount *ext2_icount_t;
EXT4_FEATURE_INCOMPAT_64BIT|\
EXT4_FEATURE_INCOMPAT_INLINE_DATA|\
EXT4_FEATURE_INCOMPAT_ENCRYPT|\
- EXT4_FEATURE_INCOMPAT_CSUM_SEED)
+ EXT4_FEATURE_INCOMPAT_CSUM_SEED|\
+ EXT4_FEATURE_INCOMPAT_LARGEDIR)

#define EXT2_LIB_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
EXT4_FEATURE_RO_COMPAT_HUGE_FILE|\
@@ -1924,6 +1925,18 @@ _INLINE_ blk_t ext2fs_inode_data_blocks(ext2_filsys fs,
return (blk_t) ext2fs_inode_data_blocks2(fs, inode);
}

+/* htree levels for ext4 */
+#define EXT4_HTREE_LEVEL_COMPAT 2
+#define EXT4_HTREE_LEVEL 3
+
+static inline unsigned int ext2_dir_htree_level(ext2_filsys fs)
+{
+ if (ext2fs_has_feature_largedir(fs->super))
+ return EXT4_HTREE_LEVEL;
+
+ return EXT4_HTREE_LEVEL_COMPAT;
+}
+
/*
* This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b)
*/
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 9f18c83..b2bf461 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -1081,7 +1081,8 @@ static __u32 ok_features[3] = {
EXT4_FEATURE_INCOMPAT_64BIT|
EXT4_FEATURE_INCOMPAT_INLINE_DATA|
EXT4_FEATURE_INCOMPAT_ENCRYPT |
- EXT4_FEATURE_INCOMPAT_CSUM_SEED,
+ EXT4_FEATURE_INCOMPAT_CSUM_SEED |
+ EXT4_FEATURE_INCOMPAT_LARGEDIR,
/* R/O compat */
EXT2_FEATURE_RO_COMPAT_LARGE_FILE|
EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
diff --git a/misc/tune2fs.c b/misc/tune2fs.c
index 6239577..f78d105 100644
--- a/misc/tune2fs.c
+++ b/misc/tune2fs.c
@@ -156,7 +156,8 @@ static __u32 ok_features[3] = {
EXT4_FEATURE_INCOMPAT_MMP |
EXT4_FEATURE_INCOMPAT_64BIT |
EXT4_FEATURE_INCOMPAT_ENCRYPT |
- EXT4_FEATURE_INCOMPAT_CSUM_SEED,
+ EXT4_FEATURE_INCOMPAT_CSUM_SEED |
+ EXT4_FEATURE_INCOMPAT_LARGEDIR,
/* R/O compat */
EXT2_FEATURE_RO_COMPAT_LARGE_FILE |
EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
--
1.7.1

2017-02-15 17:45:05

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v4 3/4] e2fsck: 3 level hash tree directory optimization

From: Artem Blagodarenko <[email protected]>

e2fsck fix for partitions with 3 level hash directries.
Additional level is added to e2fsck -D codepath.

Signed-off-by: Artem Blagodarenko <[email protected]>
---
debugfs/htree.c | 3 +-
e2fsck/e2fsck.h | 1 +
e2fsck/pass2.c | 68 ++++++++++++++++++++++--------
e2fsck/rehash.c | 115 ++++++++++++++++++++++++++++++++++++++++-----------
lib/ext2fs/ext2fs.h | 5 ++
5 files changed, 148 insertions(+), 44 deletions(-)

diff --git a/debugfs/htree.c b/debugfs/htree.c
index 54e55e2..8c18666 100644
--- a/debugfs/htree.c
+++ b/debugfs/htree.c
@@ -287,7 +287,8 @@ void do_htree_dump(int argc, char *argv[])
fprintf(pager, "\t Indirect levels: %d\n", rootnode->indirect_levels);
fprintf(pager, "\t Flags: %d\n", rootnode->unused_flags);

- ent = (struct ext2_dx_entry *) (buf + 24 + rootnode->info_length);
+ ent = (struct ext2_dx_entry *)
+ ((char *)rootnode + rootnode->info_length);

htree_dump_int_node(current_fs, ino, &inode, rootnode, ent,
buf + current_fs->blocksize,
diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index f356810..a4efbdf 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -122,6 +122,7 @@ struct dx_dirblock_info {
blk64_t phys;
int flags;
blk64_t parent;
+ blk64_t previous;
ext2_dirhash_t min_hash;
ext2_dirhash_t max_hash;
ext2_dirhash_t node_min_hash;
diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
index 2f41fc4..c1c4e48 100644
--- a/e2fsck/pass2.c
+++ b/e2fsck/pass2.c
@@ -85,6 +85,39 @@ struct check_dir_struct {
unsigned long long next_ra_off;
};

+static void update_parents(struct dx_dir_info *dx_dir, int type)
+{
+ struct dx_dirblock_info *dx_db, *dx_parent, *dx_previous;
+ int b;
+
+ for (b = 0, dx_db = dx_dir->dx_block;
+ b < dx_dir->numblocks;
+ b++, dx_db++) {
+ dx_parent = &dx_dir->dx_block[dx_db->parent];
+ if (dx_db->type != type)
+ continue;
+
+ /*
+ * XXX Make sure dx_parent->min_hash > dx_db->min_hash
+ */
+ if (dx_db->flags & DX_FLAG_FIRST) {
+ dx_parent->min_hash = dx_db->min_hash;
+ if (dx_parent->previous) {
+ dx_previous =
+ &dx_dir->dx_block[dx_parent->previous];
+ dx_previous->node_max_hash =
+ dx_parent->min_hash;
+ }
+ }
+ /*
+ * XXX Make sure dx_parent->max_hash < dx_db->max_hash
+ */
+ if (dx_db->flags & DX_FLAG_LAST) {
+ dx_parent->max_hash = dx_db->max_hash;
+ }
+ }
+}
+
void e2fsck_pass2(e2fsck_t ctx)
{
struct ext2_super_block *sb = ctx->fs->super;
@@ -182,24 +215,11 @@ void e2fsck_pass2(e2fsck_t ctx)
* Find all of the first and last leaf blocks, and
* update their parent's min and max hash values
*/
- for (b=0, dx_db = dx_dir->dx_block;
- b < dx_dir->numblocks;
- b++, dx_db++) {
- if ((dx_db->type != DX_DIRBLOCK_LEAF) ||
- !(dx_db->flags & (DX_FLAG_FIRST | DX_FLAG_LAST)))
- continue;
- dx_parent = &dx_dir->dx_block[dx_db->parent];
- /*
- * XXX Make sure dx_parent->min_hash > dx_db->min_hash
- */
- if (dx_db->flags & DX_FLAG_FIRST)
- dx_parent->min_hash = dx_db->min_hash;
- /*
- * XXX Make sure dx_parent->max_hash < dx_db->max_hash
- */
- if (dx_db->flags & DX_FLAG_LAST)
- dx_parent->max_hash = dx_db->max_hash;
- }
+ update_parents(dx_dir, DX_DIRBLOCK_LEAF);
+
+ /* for 3 level htree: update 2 level parent's min
+ * and max hash values */
+ update_parents(dx_dir, DX_DIRBLOCK_NODE);

for (b=0, dx_db = dx_dir->dx_block;
b < dx_dir->numblocks;
@@ -642,6 +662,10 @@ static void parse_int_node(ext2_filsys fs,
dx_db->flags |= DX_FLAG_REFERENCED;
dx_db->parent = db->blockcnt;
}
+
+ dx_db->previous =
+ i ? ext2fs_le32_to_cpu(ent[i-1].block & 0x0ffffff) : 0;
+
if (hash < min_hash)
min_hash = hash;
if (hash > max_hash)
@@ -949,6 +973,14 @@ static int check_dir_block(ext2_filsys fs,
return DIRENT_ABORT;
}

+ /* This will allow (at some point in the future) to punch out empty
+ * directory blocks and reduce the space used by a directory that grows
+ * very large and then the files are deleted. For now, all that is
+ * needed is to avoid e2fsck filling in these holes as part of
+ * feature flag. */
+ if (db->blk == 0 && ext2fs_has_feature_largedir(fs))
+ return 0;
+
if (db->blk == 0 && !inline_data_size) {
if (allocate_dir_block(ctx, db, buf, &cd->pctx))
return 0;
diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c
index 22a58f3..7dcb386 100644
--- a/e2fsck/rehash.c
+++ b/e2fsck/rehash.c
@@ -603,6 +603,43 @@ static struct ext2_dx_entry *set_int_node(ext2_filsys fs, char *buf)
return (struct ext2_dx_entry *) limits;
}

+static int alloc_blocks(ext2_filsys fs,
+ struct ext2_dx_countlimit **limit,
+ struct ext2_dx_entry **prev_ent,
+ struct ext2_dx_entry **next_ent,
+ int *prev_offset, int *next_offset,
+ struct out_dir *outdir, int i,
+ int *prev_count, int *next_count)
+{
+ errcode_t retval;
+ char *block_start;
+
+ if (*limit)
+ (*limit)->limit = (*limit)->count =
+ ext2fs_cpu_to_le16((*limit)->limit);
+ *prev_ent = (struct ext2_dx_entry *) (outdir->buf + *prev_offset);
+ (*prev_ent)->block = ext2fs_cpu_to_le32(outdir->num);
+
+ if (i != 1)
+ (*prev_ent)->hash =
+ ext2fs_cpu_to_le32(outdir->hashes[i]);
+
+ retval = get_next_block(fs, outdir, &block_start);
+ if (retval)
+ return retval;
+
+ *next_ent = set_int_node(fs, block_start);
+ *limit = (struct ext2_dx_countlimit *)(*next_ent);
+ if (next_offset)
+ *next_offset = ((char *) *next_ent - outdir->buf);
+
+ *next_count = (*limit)->limit;
+ (*prev_offset) += sizeof(struct ext2_dx_entry);
+ (*prev_count)--;
+
+ return 0;
+}
+
/*
* This function takes the leaf nodes which have been written in
* outdir, and populates the root node and any necessary interior nodes.
@@ -612,13 +649,13 @@ static errcode_t calculate_tree(ext2_filsys fs,
ext2_ino_t ino,
ext2_ino_t parent)
{
- struct ext2_dx_root_info *root_info;
- struct ext2_dx_entry *root, *dx_ent = 0;
- struct ext2_dx_countlimit *root_limit, *limit;
+ struct ext2_dx_root_info *root_info;
+ struct ext2_dx_entry *root, *int_ent, *dx_ent = 0;
+ struct ext2_dx_countlimit *root_limit, *int_limit, *limit;
errcode_t retval;
char * block_start;
- int i, c1, c2, nblks;
- int limit_offset, root_offset;
+ int i, c1, c2, c3, nblks;
+ int limit_offset, int_offset, root_offset;

root_info = set_root_node(fs, outdir->buf, ino, parent);
root_offset = limit_offset = ((char *) root_info - outdir->buf) +
@@ -628,7 +665,7 @@ static errcode_t calculate_tree(ext2_filsys fs,
nblks = outdir->num;

/* Write out the pointer blocks */
- if (nblks-1 <= c1) {
+ if (nblks - 1 <= c1) {
/* Just write out the root block, and we're done */
root = (struct ext2_dx_entry *) (outdir->buf + root_offset);
for (i=1; i < nblks; i++) {
@@ -639,31 +676,20 @@ static errcode_t calculate_tree(ext2_filsys fs,
root++;
c1--;
}
- } else {
+ } else if (nblks - 1 <= ext2fs_htree_intnode_maxrecs(fs, c1)) {
c2 = 0;
- limit = 0;
+ limit = NULL;
root_info->indirect_levels = 1;
for (i=1; i < nblks; i++) {
- if (c1 == 0)
+ if (c2 == 0 && c1 == 0)
return ENOSPC;
if (c2 == 0) {
- if (limit)
- limit->limit = limit->count =
- ext2fs_cpu_to_le16(limit->limit);
- root = (struct ext2_dx_entry *)
- (outdir->buf + root_offset);
- root->block = ext2fs_cpu_to_le32(outdir->num);
- if (i != 1)
- root->hash =
- ext2fs_cpu_to_le32(outdir->hashes[i]);
- if ((retval = get_next_block(fs, outdir,
- &block_start)))
+ retval = alloc_blocks(fs, &limit, &root,
+ &dx_ent, &root_offset,
+ NULL, outdir, i, &c1,
+ &c2);
+ if (retval)
return retval;
- dx_ent = set_int_node(fs, block_start);
- limit = (struct ext2_dx_countlimit *) dx_ent;
- c2 = limit->limit;
- root_offset += sizeof(struct ext2_dx_entry);
- c1--;
}
dx_ent->block = ext2fs_cpu_to_le32(i);
if (c2 != limit->limit)
@@ -674,6 +700,45 @@ static errcode_t calculate_tree(ext2_filsys fs,
}
limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
limit->limit = ext2fs_cpu_to_le16(limit->limit);
+ } else {
+ c2 = 0;
+ c3 = 0;
+ limit = NULL;
+ int_limit = 0;
+ root_info->indirect_levels = 2;
+ for (i = 1; i < nblks; i++) {
+ if (c3 == 0 && c2 == 0 && c1 == 0)
+ return ENOSPC;
+ if (c3 == 0 && c2 == 0) {
+ retval = alloc_blocks(fs, &int_limit, &root,
+ &int_ent, &root_offset,
+ &int_offset, outdir, i,
+ &c1, &c2);
+ if (retval)
+ return retval;
+ }
+ if (c3 == 0) {
+ retval = alloc_blocks(fs, &limit, &int_ent,
+ &dx_ent, &int_offset,
+ NULL, outdir, i, &c2,
+ &c3);
+ if (retval)
+ return retval;
+
+ }
+ dx_ent->block = ext2fs_cpu_to_le32(i);
+ if (c3 != limit->limit)
+ dx_ent->hash =
+ ext2fs_cpu_to_le32(outdir->hashes[i]);
+ dx_ent++;
+ c3--;
+ }
+ int_limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
+ int_limit->limit = ext2fs_cpu_to_le16(limit->limit);
+
+ limit->count = ext2fs_cpu_to_le16(limit->limit - c3);
+ limit->limit = ext2fs_cpu_to_le16(limit->limit);
+
}
root_limit = (struct ext2_dx_countlimit *) (outdir->buf + limit_offset);
root_limit->count = ext2fs_cpu_to_le16(root_limit->limit - c1);
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index c68be50..baa422c 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1937,6 +1937,11 @@ static inline unsigned int ext2_dir_htree_level(ext2_filsys fs)
return EXT4_HTREE_LEVEL_COMPAT;
}

+_INLINE_ int ext2fs_htree_intnode_maxrecs(ext2_filsys fs, int blocks)
+{
+ return blocks * ((fs->blocksize - 8) / sizeof(struct ext2_dx_entry));
+}
+
/*
* This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b)
*/
--
1.7.1

2017-02-17 03:55:46

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH v3 2/4] e2fsprogs: add support for 3-level htree


> On Feb 15, 2017, at 10:43 AM, Artem Blagodarenko <[email protected]> wrote:
>
> From: Artem Blagodarenko <[email protected]>
>
> The INCOMPAT_LARGEDIR feature allows larger directories to
> be created, both with directory sizes over 2GB and and a
> maximum htree depth of 3 instead of the current limit of 2.
> These features are needed in order to exceed the currently
> limit of approximately 10M entries in a single directory
> for 4KB blocksize (~100k for 1KB).
>
> debugfs, e2fsck, ext2fs, mke2fs and tune2fs support is
> added.
>
> Signed-off-by: Alexey Lyashkov <[email protected]>
> Signed-off-by: Artem Blagodarenko <[email protected]>

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> e2fsck/pass1.c | 5 +++--
> e2fsck/pass2.c | 5 +++--
> lib/ext2fs/ext2_fs.h | 3 ++-
> lib/ext2fs/ext2fs.h | 15 ++++++++++++++-
> misc/mke2fs.c | 3 ++-
> misc/tune2fs.c | 3 ++-
> 6 files changed, 26 insertions(+), 8 deletions(-)
>
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index ce37176..11546d7 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -1716,7 +1716,8 @@ void e2fsck_pass1(e2fsck_t ctx)
> }
>
> if (inode->i_faddr || frag || fsize ||
> - (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high))
> + (!ext2fs_has_feature_largedir(fs->super) &&
> + (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high)))
> mark_inode_bad(ctx, ino);
> if ((fs->super->s_creator_os != EXT2_OS_HURD) &&
> !ext2fs_has_feature_64bit(fs->super) &&
> @@ -2469,7 +2470,7 @@ static int handle_htree(e2fsck_t ctx, struct problem_context *pctx,
> return 1;
>
> pctx->num = root->indirect_levels;
> - if ((root->indirect_levels > 1) &&
> + if ((root->indirect_levels > ext2_dir_htree_level(fs)) &&
> fix_problem(ctx, PR_1_HTREE_DEPTH, pctx))
> return 1;
>
> diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
> index b89ebc9..2f41fc4 100644
> --- a/e2fsck/pass2.c
> +++ b/e2fsck/pass2.c
> @@ -1058,7 +1058,8 @@ inline_read_fail:
> dx_db->flags |= DX_FLAG_FIRST | DX_FLAG_LAST;
> if ((root->reserved_zero ||
> root->info_length < 8 ||
> - root->indirect_levels > 1) &&
> + root->indirect_levels >=
> + ext2_dir_htree_level(fs)) &&
> fix_problem(ctx, PR_2_HTREE_BAD_ROOT, &cd->pctx)) {
> clear_htree(ctx, ino);
> dx_dir->numblocks = 0;
> @@ -1811,7 +1812,7 @@ int e2fsck_process_bad_inode(e2fsck_t ctx, ext2_ino_t dir,
> } else
> not_fixed++;
> }
> - if (inode.i_size_high &&
> + if (inode.i_size_high && !ext2fs_has_feature_largedir(fs->super) &&
> LINUX_S_ISDIR(inode.i_mode)) {
> if (fix_problem(ctx, PR_2_DIR_SIZE_HIGH_ZERO, &pctx)) {
> inode.i_size_high = 0;
> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> index 195e366..66b7058 100644
> --- a/lib/ext2fs/ext2_fs.h
> +++ b/lib/ext2fs/ext2_fs.h
> @@ -921,7 +921,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, 4, ENCRYPT)
>
> #define EXT2_FEATURE_COMPAT_SUPP 0
> #define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \
> - EXT4_FEATURE_INCOMPAT_MMP)
> + EXT4_FEATURE_INCOMPAT_MMP| \
> + EXT4_FEATURE_INCOMPAT_LARGEDIR)
> #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
> EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
> EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
> diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
> index 786ded8..c68be50 100644
> --- a/lib/ext2fs/ext2fs.h
> +++ b/lib/ext2fs/ext2fs.h
> @@ -588,7 +588,8 @@ typedef struct ext2_icount *ext2_icount_t;
> EXT4_FEATURE_INCOMPAT_64BIT|\
> EXT4_FEATURE_INCOMPAT_INLINE_DATA|\
> EXT4_FEATURE_INCOMPAT_ENCRYPT|\
> - EXT4_FEATURE_INCOMPAT_CSUM_SEED)
> + EXT4_FEATURE_INCOMPAT_CSUM_SEED|\
> + EXT4_FEATURE_INCOMPAT_LARGEDIR)
>
> #define EXT2_LIB_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|\
> @@ -1924,6 +1925,18 @@ _INLINE_ blk_t ext2fs_inode_data_blocks(ext2_filsys fs,
> return (blk_t) ext2fs_inode_data_blocks2(fs, inode);
> }
>
> +/* htree levels for ext4 */
> +#define EXT4_HTREE_LEVEL_COMPAT 2
> +#define EXT4_HTREE_LEVEL 3
> +
> +static inline unsigned int ext2_dir_htree_level(ext2_filsys fs)
> +{
> + if (ext2fs_has_feature_largedir(fs->super))
> + return EXT4_HTREE_LEVEL;
> +
> + return EXT4_HTREE_LEVEL_COMPAT;
> +}
> +
> /*
> * This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b)
> */
> diff --git a/misc/mke2fs.c b/misc/mke2fs.c
> index 9f18c83..b2bf461 100644
> --- a/misc/mke2fs.c
> +++ b/misc/mke2fs.c
> @@ -1081,7 +1081,8 @@ static __u32 ok_features[3] = {
> EXT4_FEATURE_INCOMPAT_64BIT|
> EXT4_FEATURE_INCOMPAT_INLINE_DATA|
> EXT4_FEATURE_INCOMPAT_ENCRYPT |
> - EXT4_FEATURE_INCOMPAT_CSUM_SEED,
> + EXT4_FEATURE_INCOMPAT_CSUM_SEED |
> + EXT4_FEATURE_INCOMPAT_LARGEDIR,
> /* R/O compat */
> EXT2_FEATURE_RO_COMPAT_LARGE_FILE|
> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
> diff --git a/misc/tune2fs.c b/misc/tune2fs.c
> index 6239577..f78d105 100644
> --- a/misc/tune2fs.c
> +++ b/misc/tune2fs.c
> @@ -156,7 +156,8 @@ static __u32 ok_features[3] = {
> EXT4_FEATURE_INCOMPAT_MMP |
> EXT4_FEATURE_INCOMPAT_64BIT |
> EXT4_FEATURE_INCOMPAT_ENCRYPT |
> - EXT4_FEATURE_INCOMPAT_CSUM_SEED,
> + EXT4_FEATURE_INCOMPAT_CSUM_SEED |
> + EXT4_FEATURE_INCOMPAT_LARGEDIR,
> /* R/O compat */
> EXT2_FEATURE_RO_COMPAT_LARGE_FILE |
> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
> --
> 1.7.1
>


Cheers, Andreas






Attachments:
signature.asc (195.00 B)
Message signed with OpenPGP

2017-02-17 03:57:41

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH v4 3/4] e2fsck: 3 level hash tree directory optimization


> On Feb 15, 2017, at 10:45 AM, Artem Blagodarenko <[email protected]> wrote:
>
> From: Artem Blagodarenko <[email protected]>
>
> e2fsck fix for partitions with 3 level hash directries.
> Additional level is added to e2fsck -D codepath.
>
> Signed-off-by: Artem Blagodarenko <[email protected]>

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> debugfs/htree.c | 3 +-
> e2fsck/e2fsck.h | 1 +
> e2fsck/pass2.c | 68 ++++++++++++++++++++++--------
> e2fsck/rehash.c | 115 ++++++++++++++++++++++++++++++++++++++++-----------
> lib/ext2fs/ext2fs.h | 5 ++
> 5 files changed, 148 insertions(+), 44 deletions(-)
>
> diff --git a/debugfs/htree.c b/debugfs/htree.c
> index 54e55e2..8c18666 100644
> --- a/debugfs/htree.c
> +++ b/debugfs/htree.c
> @@ -287,7 +287,8 @@ void do_htree_dump(int argc, char *argv[])
> fprintf(pager, "\t Indirect levels: %d\n", rootnode->indirect_levels);
> fprintf(pager, "\t Flags: %d\n", rootnode->unused_flags);
>
> - ent = (struct ext2_dx_entry *) (buf + 24 + rootnode->info_length);
> + ent = (struct ext2_dx_entry *)
> + ((char *)rootnode + rootnode->info_length);
>
> htree_dump_int_node(current_fs, ino, &inode, rootnode, ent,
> buf + current_fs->blocksize,
> diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
> index f356810..a4efbdf 100644
> --- a/e2fsck/e2fsck.h
> +++ b/e2fsck/e2fsck.h
> @@ -122,6 +122,7 @@ struct dx_dirblock_info {
> blk64_t phys;
> int flags;
> blk64_t parent;
> + blk64_t previous;
> ext2_dirhash_t min_hash;
> ext2_dirhash_t max_hash;
> ext2_dirhash_t node_min_hash;
> diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
> index 2f41fc4..c1c4e48 100644
> --- a/e2fsck/pass2.c
> +++ b/e2fsck/pass2.c
> @@ -85,6 +85,39 @@ struct check_dir_struct {
> unsigned long long next_ra_off;
> };
>
> +static void update_parents(struct dx_dir_info *dx_dir, int type)
> +{
> + struct dx_dirblock_info *dx_db, *dx_parent, *dx_previous;
> + int b;
> +
> + for (b = 0, dx_db = dx_dir->dx_block;
> + b < dx_dir->numblocks;
> + b++, dx_db++) {
> + dx_parent = &dx_dir->dx_block[dx_db->parent];
> + if (dx_db->type != type)
> + continue;
> +
> + /*
> + * XXX Make sure dx_parent->min_hash > dx_db->min_hash
> + */
> + if (dx_db->flags & DX_FLAG_FIRST) {
> + dx_parent->min_hash = dx_db->min_hash;
> + if (dx_parent->previous) {
> + dx_previous =
> + &dx_dir->dx_block[dx_parent->previous];
> + dx_previous->node_max_hash =
> + dx_parent->min_hash;
> + }
> + }
> + /*
> + * XXX Make sure dx_parent->max_hash < dx_db->max_hash
> + */
> + if (dx_db->flags & DX_FLAG_LAST) {
> + dx_parent->max_hash = dx_db->max_hash;
> + }
> + }
> +}
> +
> void e2fsck_pass2(e2fsck_t ctx)
> {
> struct ext2_super_block *sb = ctx->fs->super;
> @@ -182,24 +215,11 @@ void e2fsck_pass2(e2fsck_t ctx)
> * Find all of the first and last leaf blocks, and
> * update their parent's min and max hash values
> */
> - for (b=0, dx_db = dx_dir->dx_block;
> - b < dx_dir->numblocks;
> - b++, dx_db++) {
> - if ((dx_db->type != DX_DIRBLOCK_LEAF) ||
> - !(dx_db->flags & (DX_FLAG_FIRST | DX_FLAG_LAST)))
> - continue;
> - dx_parent = &dx_dir->dx_block[dx_db->parent];
> - /*
> - * XXX Make sure dx_parent->min_hash > dx_db->min_hash
> - */
> - if (dx_db->flags & DX_FLAG_FIRST)
> - dx_parent->min_hash = dx_db->min_hash;
> - /*
> - * XXX Make sure dx_parent->max_hash < dx_db->max_hash
> - */
> - if (dx_db->flags & DX_FLAG_LAST)
> - dx_parent->max_hash = dx_db->max_hash;
> - }
> + update_parents(dx_dir, DX_DIRBLOCK_LEAF);
> +
> + /* for 3 level htree: update 2 level parent's min
> + * and max hash values */
> + update_parents(dx_dir, DX_DIRBLOCK_NODE);
>
> for (b=0, dx_db = dx_dir->dx_block;
> b < dx_dir->numblocks;
> @@ -642,6 +662,10 @@ static void parse_int_node(ext2_filsys fs,
> dx_db->flags |= DX_FLAG_REFERENCED;
> dx_db->parent = db->blockcnt;
> }
> +
> + dx_db->previous =
> + i ? ext2fs_le32_to_cpu(ent[i-1].block & 0x0ffffff) : 0;
> +
> if (hash < min_hash)
> min_hash = hash;
> if (hash > max_hash)
> @@ -949,6 +973,14 @@ static int check_dir_block(ext2_filsys fs,
> return DIRENT_ABORT;
> }
>
> + /* This will allow (at some point in the future) to punch out empty
> + * directory blocks and reduce the space used by a directory that grows
> + * very large and then the files are deleted. For now, all that is
> + * needed is to avoid e2fsck filling in these holes as part of
> + * feature flag. */
> + if (db->blk == 0 && ext2fs_has_feature_largedir(fs))
> + return 0;
> +
> if (db->blk == 0 && !inline_data_size) {
> if (allocate_dir_block(ctx, db, buf, &cd->pctx))
> return 0;
> diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c
> index 22a58f3..7dcb386 100644
> --- a/e2fsck/rehash.c
> +++ b/e2fsck/rehash.c
> @@ -603,6 +603,43 @@ static struct ext2_dx_entry *set_int_node(ext2_filsys fs, char *buf)
> return (struct ext2_dx_entry *) limits;
> }
>
> +static int alloc_blocks(ext2_filsys fs,
> + struct ext2_dx_countlimit **limit,
> + struct ext2_dx_entry **prev_ent,
> + struct ext2_dx_entry **next_ent,
> + int *prev_offset, int *next_offset,
> + struct out_dir *outdir, int i,
> + int *prev_count, int *next_count)
> +{
> + errcode_t retval;
> + char *block_start;
> +
> + if (*limit)
> + (*limit)->limit = (*limit)->count =
> + ext2fs_cpu_to_le16((*limit)->limit);
> + *prev_ent = (struct ext2_dx_entry *) (outdir->buf + *prev_offset);
> + (*prev_ent)->block = ext2fs_cpu_to_le32(outdir->num);
> +
> + if (i != 1)
> + (*prev_ent)->hash =
> + ext2fs_cpu_to_le32(outdir->hashes[i]);
> +
> + retval = get_next_block(fs, outdir, &block_start);
> + if (retval)
> + return retval;
> +
> + *next_ent = set_int_node(fs, block_start);
> + *limit = (struct ext2_dx_countlimit *)(*next_ent);
> + if (next_offset)
> + *next_offset = ((char *) *next_ent - outdir->buf);
> +
> + *next_count = (*limit)->limit;
> + (*prev_offset) += sizeof(struct ext2_dx_entry);
> + (*prev_count)--;
> +
> + return 0;
> +}
> +
> /*
> * This function takes the leaf nodes which have been written in
> * outdir, and populates the root node and any necessary interior nodes.
> @@ -612,13 +649,13 @@ static errcode_t calculate_tree(ext2_filsys fs,
> ext2_ino_t ino,
> ext2_ino_t parent)
> {
> - struct ext2_dx_root_info *root_info;
> - struct ext2_dx_entry *root, *dx_ent = 0;
> - struct ext2_dx_countlimit *root_limit, *limit;
> + struct ext2_dx_root_info *root_info;
> + struct ext2_dx_entry *root, *int_ent, *dx_ent = 0;
> + struct ext2_dx_countlimit *root_limit, *int_limit, *limit;
> errcode_t retval;
> char * block_start;
> - int i, c1, c2, nblks;
> - int limit_offset, root_offset;
> + int i, c1, c2, c3, nblks;
> + int limit_offset, int_offset, root_offset;
>
> root_info = set_root_node(fs, outdir->buf, ino, parent);
> root_offset = limit_offset = ((char *) root_info - outdir->buf) +
> @@ -628,7 +665,7 @@ static errcode_t calculate_tree(ext2_filsys fs,
> nblks = outdir->num;
>
> /* Write out the pointer blocks */
> - if (nblks-1 <= c1) {
> + if (nblks - 1 <= c1) {
> /* Just write out the root block, and we're done */
> root = (struct ext2_dx_entry *) (outdir->buf + root_offset);
> for (i=1; i < nblks; i++) {
> @@ -639,31 +676,20 @@ static errcode_t calculate_tree(ext2_filsys fs,
> root++;
> c1--;
> }
> - } else {
> + } else if (nblks - 1 <= ext2fs_htree_intnode_maxrecs(fs, c1)) {
> c2 = 0;
> - limit = 0;
> + limit = NULL;
> root_info->indirect_levels = 1;
> for (i=1; i < nblks; i++) {
> - if (c1 == 0)
> + if (c2 == 0 && c1 == 0)
> return ENOSPC;
> if (c2 == 0) {
> - if (limit)
> - limit->limit = limit->count =
> - ext2fs_cpu_to_le16(limit->limit);
> - root = (struct ext2_dx_entry *)
> - (outdir->buf + root_offset);
> - root->block = ext2fs_cpu_to_le32(outdir->num);
> - if (i != 1)
> - root->hash =
> - ext2fs_cpu_to_le32(outdir->hashes[i]);
> - if ((retval = get_next_block(fs, outdir,
> - &block_start)))
> + retval = alloc_blocks(fs, &limit, &root,
> + &dx_ent, &root_offset,
> + NULL, outdir, i, &c1,
> + &c2);
> + if (retval)
> return retval;
> - dx_ent = set_int_node(fs, block_start);
> - limit = (struct ext2_dx_countlimit *) dx_ent;
> - c2 = limit->limit;
> - root_offset += sizeof(struct ext2_dx_entry);
> - c1--;
> }
> dx_ent->block = ext2fs_cpu_to_le32(i);
> if (c2 != limit->limit)
> @@ -674,6 +700,45 @@ static errcode_t calculate_tree(ext2_filsys fs,
> }
> limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
> limit->limit = ext2fs_cpu_to_le16(limit->limit);
> + } else {
> + c2 = 0;
> + c3 = 0;
> + limit = NULL;
> + int_limit = 0;
> + root_info->indirect_levels = 2;
> + for (i = 1; i < nblks; i++) {
> + if (c3 == 0 && c2 == 0 && c1 == 0)
> + return ENOSPC;
> + if (c3 == 0 && c2 == 0) {
> + retval = alloc_blocks(fs, &int_limit, &root,
> + &int_ent, &root_offset,
> + &int_offset, outdir, i,
> + &c1, &c2);
> + if (retval)
> + return retval;
> + }
> + if (c3 == 0) {
> + retval = alloc_blocks(fs, &limit, &int_ent,
> + &dx_ent, &int_offset,
> + NULL, outdir, i, &c2,
> + &c3);
> + if (retval)
> + return retval;
> +
> + }
> + dx_ent->block = ext2fs_cpu_to_le32(i);
> + if (c3 != limit->limit)
> + dx_ent->hash =
> + ext2fs_cpu_to_le32(outdir->hashes[i]);
> + dx_ent++;
> + c3--;
> + }
> + int_limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
> + int_limit->limit = ext2fs_cpu_to_le16(limit->limit);
> +
> + limit->count = ext2fs_cpu_to_le16(limit->limit - c3);
> + limit->limit = ext2fs_cpu_to_le16(limit->limit);
> +
> }
> root_limit = (struct ext2_dx_countlimit *) (outdir->buf + limit_offset);
> root_limit->count = ext2fs_cpu_to_le16(root_limit->limit - c1);
> diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
> index c68be50..baa422c 100644
> --- a/lib/ext2fs/ext2fs.h
> +++ b/lib/ext2fs/ext2fs.h
> @@ -1937,6 +1937,11 @@ static inline unsigned int ext2_dir_htree_level(ext2_filsys fs)
> return EXT4_HTREE_LEVEL_COMPAT;
> }
>
> +_INLINE_ int ext2fs_htree_intnode_maxrecs(ext2_filsys fs, int blocks)
> +{
> + return blocks * ((fs->blocksize - 8) / sizeof(struct ext2_dx_entry));
> +}
> +
> /*
> * This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b)
> */
> --
> 1.7.1
>


Cheers, Andreas






Attachments:
signature.asc (195.00 B)
Message signed with OpenPGP

2017-02-17 04:05:56

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH v3 4/4] tests: 3 level hash tree test

On Feb 15, 2017, at 8:45 AM, Artem Blagodarenko <[email protected]> wrote:
>
> From: Artem Blagodarenko <[email protected]>
>
> Test is added that recreate directory (-fD fsck option)
> with 47.5k of 255-symbol name files. This amount of files
> can not be stored only in 2 hevel htree, so 3 levels are used.
>
> Signed-off-by: Artem Blagodarenko <[email protected]>
> ---
> tests/f_large_dir/expect | 12 +++++++++++
> tests/f_large_dir/name | 1 +
> tests/f_large_dir/script | 47 ++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 60 insertions(+), 0 deletions(-)
>
> diff --git a/tests/f_large_dir/expect b/tests/f_large_dir/expect
> new file mode 100644
> index 0000000..b099460
> --- /dev/null
> +++ b/tests/f_large_dir/expect
> @@ -0,0 +1,12 @@
> +Pass 1: Checking inodes, blocks, and sizes
> +Pass 2: Checking directory structure
> +Pass 3: Checking directory connectivity
> +Pass 3A: Optimizing directories
> +Pass 4: Checking reference counts
> +Inode 13 ref count is 1, should be 47245. Fix? yes
> +
> +Pass 5: Checking group summary information
> +
> +test.img: ***** FILE SYSTEM WAS MODIFIED *****
> +test.img: 13/115368 files (0.0% non-contiguous), 32817/460800 blocks
> +Exit status is 1
> diff --git a/tests/f_large_dir/name b/tests/f_large_dir/name
> new file mode 100644
> index 0000000..4b96890
> --- /dev/null
> +++ b/tests/f_large_dir/name
> @@ -0,0 +1 @@
> +optimize 3 level htree directories
> diff --git a/tests/f_large_dir/script b/tests/f_large_dir/script
> new file mode 100644
> index 0000000..e68576d
> --- /dev/null
> +++ b/tests/f_large_dir/script
> @@ -0,0 +1,47 @@
> +OUT=$test_name.log
> +EXP=$test_dir/expect
> +E2FSCK=../e2fsck/e2fsck
> +
> +NAMELEN=255
> +DIRENT_SZ=8
> +BLOCKSZ=1024
> +DIRENT_PER_LEAF=$((BLOCKSZ / (NAMELEN + DIRENT_SZ)))
> +HEADER=32
> +INDEX_SZ=8
> +INDEX_L1=$(((BLOCKSZ - HEADER) / INDEX_SZ))
> +INDEX_L2=$(((BLOCKSZ - DIRENT_SZ) / INDEX_SZ))
> +ENTRIES=$((INDEX_L1 * INDEX_L2 * DIRENT_PER_LEAF))
> +
> +cp /dev/null $OUT
> +$MKE2FS -b 1024 -O large_dir,uninit_bg,dir_nlink -F $TMPFILE 460800 > /dev/null

This could use "-b $BLOCKSZ" for consistency, and determine the filesystem size
from the number of leaf blocks, though I'm not sure what that would be offhand.
It could also use "-N 64" or similar to reduce the number of inode blocks allocated.

Just a suggestion, I don't think it is critical and the test is OK as-is.

Reviewed-by: Andreas Dilger <[email protected]>

> +{
> + echo "feature large_dir"
> + echo "mkdir /foo"
> + echo "cd /foo"
> + touch foofile
> + echo "write foofile foofile"
> + for ((i = 0; i < $ENTRIES; i++)); do
> + [[ $(( i % DIRENT_PER_LEAF )) -eq 0 ]] && echo "expand ./"
> + [[ $(( i % 5000 )) -eq 0 ]] && >&2 echo "$i processed"
> + printf "ln foofile %0255X\n" $i
> + done
> +} | $DEBUGFS -w -f /dev/stdin $TMPFILE > /dev/null
> +
> +$E2FSCK -yfD $TMPFILE > $OUT.new 2>&1
> +status=$?
> +echo Exit status is $status >> $OUT.new
> +sed -f $cmd_dir/filter.sed -e "s;$TMPFILE;test.img;" $OUT.new >> $OUT
> +rm -f $OUT.new
> +
> +cmp -s $OUT $EXP
> +RC=$?
> +if [ $RC -eq 0 ]; then
> + echo "$test_name: $test_description: ok"
> + touch $test_name.ok
> +else
> + echo "$test_name: $test_description: failed"
> + diff -u $EXP $OUT > $test_name.failed
> +fi
> +
> +
> +
> --
> 1.7.1
>


Cheers, Andreas






Attachments:
signature.asc (195.00 B)
Message signed with OpenPGP

2017-04-12 09:12:17

by Artem Blagodarenko

[permalink] [raw]
Subject: Re: [PATCH v3 2/4] e2fsprogs: add support for 3-level htree

Hello,

Ted, have you had a chance to look at these patches yet?

Best regards,
Artem Blagodarenko


> On 17 Feb 2017, at 06:55, Andreas Dilger <[email protected]> wrote:
>
>>
>> On Feb 15, 2017, at 10:43 AM, Artem Blagodarenko <[email protected]> wrote:
>>
>> From: Artem Blagodarenko <[email protected]>
>>
>> The INCOMPAT_LARGEDIR feature allows larger directories to
>> be created, both with directory sizes over 2GB and and a
>> maximum htree depth of 3 instead of the current limit of 2.
>> These features are needed in order to exceed the currently
>> limit of approximately 10M entries in a single directory
>> for 4KB blocksize (~100k for 1KB).
>>
>> debugfs, e2fsck, ext2fs, mke2fs and tune2fs support is
>> added.
>>
>> Signed-off-by: Alexey Lyashkov <[email protected]>
>> Signed-off-by: Artem Blagodarenko <[email protected]>
>
> Reviewed-by: Andreas Dilger <[email protected]>
>
>> ---
>> e2fsck/pass1.c | 5 +++--
>> e2fsck/pass2.c | 5 +++--
>> lib/ext2fs/ext2_fs.h | 3 ++-
>> lib/ext2fs/ext2fs.h | 15 ++++++++++++++-
>> misc/mke2fs.c | 3 ++-
>> misc/tune2fs.c | 3 ++-
>> 6 files changed, 26 insertions(+), 8 deletions(-)
>>
>> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
>> index ce37176..11546d7 100644
>> --- a/e2fsck/pass1.c
>> +++ b/e2fsck/pass1.c
>> @@ -1716,7 +1716,8 @@ void e2fsck_pass1(e2fsck_t ctx)
>> }
>>
>> if (inode->i_faddr || frag || fsize ||
>> - (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high))
>> + (!ext2fs_has_feature_largedir(fs->super) &&
>> + (LINUX_S_ISDIR(inode->i_mode) && inode->i_size_high)))
>> mark_inode_bad(ctx, ino);
>> if ((fs->super->s_creator_os != EXT2_OS_HURD) &&
>> !ext2fs_has_feature_64bit(fs->super) &&
>> @@ -2469,7 +2470,7 @@ static int handle_htree(e2fsck_t ctx, struct problem_context *pctx,
>> return 1;
>>
>> pctx->num = root->indirect_levels;
>> - if ((root->indirect_levels > 1) &&
>> + if ((root->indirect_levels > ext2_dir_htree_level(fs)) &&
>> fix_problem(ctx, PR_1_HTREE_DEPTH, pctx))
>> return 1;
>>
>> diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
>> index b89ebc9..2f41fc4 100644
>> --- a/e2fsck/pass2.c
>> +++ b/e2fsck/pass2.c
>> @@ -1058,7 +1058,8 @@ inline_read_fail:
>> dx_db->flags |= DX_FLAG_FIRST | DX_FLAG_LAST;
>> if ((root->reserved_zero ||
>> root->info_length < 8 ||
>> - root->indirect_levels > 1) &&
>> + root->indirect_levels >=
>> + ext2_dir_htree_level(fs)) &&
>> fix_problem(ctx, PR_2_HTREE_BAD_ROOT, &cd->pctx)) {
>> clear_htree(ctx, ino);
>> dx_dir->numblocks = 0;
>> @@ -1811,7 +1812,7 @@ int e2fsck_process_bad_inode(e2fsck_t ctx, ext2_ino_t dir,
>> } else
>> not_fixed++;
>> }
>> - if (inode.i_size_high &&
>> + if (inode.i_size_high && !ext2fs_has_feature_largedir(fs->super) &&
>> LINUX_S_ISDIR(inode.i_mode)) {
>> if (fix_problem(ctx, PR_2_DIR_SIZE_HIGH_ZERO, &pctx)) {
>> inode.i_size_high = 0;
>> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
>> index 195e366..66b7058 100644
>> --- a/lib/ext2fs/ext2_fs.h
>> +++ b/lib/ext2fs/ext2_fs.h
>> @@ -921,7 +921,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, 4, ENCRYPT)
>>
>> #define EXT2_FEATURE_COMPAT_SUPP 0
>> #define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \
>> - EXT4_FEATURE_INCOMPAT_MMP)
>> + EXT4_FEATURE_INCOMPAT_MMP| \
>> + EXT4_FEATURE_INCOMPAT_LARGEDIR)
>> #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
>> EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
>> EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
>> diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
>> index 786ded8..c68be50 100644
>> --- a/lib/ext2fs/ext2fs.h
>> +++ b/lib/ext2fs/ext2fs.h
>> @@ -588,7 +588,8 @@ typedef struct ext2_icount *ext2_icount_t;
>> EXT4_FEATURE_INCOMPAT_64BIT|\
>> EXT4_FEATURE_INCOMPAT_INLINE_DATA|\
>> EXT4_FEATURE_INCOMPAT_ENCRYPT|\
>> - EXT4_FEATURE_INCOMPAT_CSUM_SEED)
>> + EXT4_FEATURE_INCOMPAT_CSUM_SEED|\
>> + EXT4_FEATURE_INCOMPAT_LARGEDIR)
>>
>> #define EXT2_LIB_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
>> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|\
>> @@ -1924,6 +1925,18 @@ _INLINE_ blk_t ext2fs_inode_data_blocks(ext2_filsys fs,
>> return (blk_t) ext2fs_inode_data_blocks2(fs, inode);
>> }
>>
>> +/* htree levels for ext4 */
>> +#define EXT4_HTREE_LEVEL_COMPAT 2
>> +#define EXT4_HTREE_LEVEL 3
>> +
>> +static inline unsigned int ext2_dir_htree_level(ext2_filsys fs)
>> +{
>> + if (ext2fs_has_feature_largedir(fs->super))
>> + return EXT4_HTREE_LEVEL;
>> +
>> + return EXT4_HTREE_LEVEL_COMPAT;
>> +}
>> +
>> /*
>> * This is an efficient, overflow safe way of calculating ceil((1.0 * a) / b)
>> */
>> diff --git a/misc/mke2fs.c b/misc/mke2fs.c
>> index 9f18c83..b2bf461 100644
>> --- a/misc/mke2fs.c
>> +++ b/misc/mke2fs.c
>> @@ -1081,7 +1081,8 @@ static __u32 ok_features[3] = {
>> EXT4_FEATURE_INCOMPAT_64BIT|
>> EXT4_FEATURE_INCOMPAT_INLINE_DATA|
>> EXT4_FEATURE_INCOMPAT_ENCRYPT |
>> - EXT4_FEATURE_INCOMPAT_CSUM_SEED,
>> + EXT4_FEATURE_INCOMPAT_CSUM_SEED |
>> + EXT4_FEATURE_INCOMPAT_LARGEDIR,
>> /* R/O compat */
>> EXT2_FEATURE_RO_COMPAT_LARGE_FILE|
>> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
>> diff --git a/misc/tune2fs.c b/misc/tune2fs.c
>> index 6239577..f78d105 100644
>> --- a/misc/tune2fs.c
>> +++ b/misc/tune2fs.c
>> @@ -156,7 +156,8 @@ static __u32 ok_features[3] = {
>> EXT4_FEATURE_INCOMPAT_MMP |
>> EXT4_FEATURE_INCOMPAT_64BIT |
>> EXT4_FEATURE_INCOMPAT_ENCRYPT |
>> - EXT4_FEATURE_INCOMPAT_CSUM_SEED,
>> + EXT4_FEATURE_INCOMPAT_CSUM_SEED |
>> + EXT4_FEATURE_INCOMPAT_LARGEDIR,
>> /* R/O compat */
>> EXT2_FEATURE_RO_COMPAT_LARGE_FILE |
>> EXT4_FEATURE_RO_COMPAT_HUGE_FILE|
>> --
>> 1.7.1
>>
>
>
> Cheers, Andreas

2017-04-13 15:57:37

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH v3 2/4] e2fsprogs: add support for 3-level htree

I've applied the latest version of these patches to the next branch.
My plan is to release 1.43.5 on the maint branch, and the 3-level
htree will be for an eventual 1.44 release (as it is a new feature).

Cheers,

- Ted