2007-10-04 05:51:09

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] jbd2: fix commit code to properly abort journal

From: Jan Kara <[email protected]>

We should really call journal_abort() and not __journal_abort_hard() in
case of errors. The latter call does not record the error in the journal
superblock and thus filesystem won't be marked as with errors later (and
user could happily mount it without any warning).

Signed-off-by: Jan Kara <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---
fs/jbd2/commit.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b898ee4..6986f33 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -475,7 +475,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
spin_unlock(&journal->j_list_lock);

if (err)
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, err);

jbd2_journal_write_revoke_records(journal, commit_transaction);

@@ -533,7 +533,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)

descriptor = jbd2_journal_get_descriptor_buffer(journal);
if (!descriptor) {
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, -EIO);
continue;
}

@@ -566,7 +566,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
and repeat this loop: we'll fall into the
refile-on-abort condition above. */
if (err) {
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, err);
continue;
}

@@ -757,7 +757,7 @@ wait_for_iobuf:
err = -EIO;

if (err)
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, err);

/* End of a transaction! Finally, we can do checkpoint
processing: any buffers committed as a result of this
--
1.5.3.2.81.g17ed


2007-10-04 05:51:10

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext4: Fix sparse warnings

From: Aneesh Kumar K.V <[email protected]>

Signed-off-by: Aneesh Kumar K.V <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---
fs/ext4/inode.c | 6 ++++--
include/linux/ext4_fs.h | 14 +++++++-------
2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f283522..a2e1ea4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3167,12 +3167,14 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
iloc, handle);
if (ret) {
EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
- if (mnt_count != sbi->s_es->s_mnt_count) {
+ if (mnt_count !=
+ le16_to_cpu(sbi->s_es->s_mnt_count)) {
ext4_warning(inode->i_sb, __FUNCTION__,
"Unable to expand inode %lu. Delete"
" some EAs or run e2fsck.",
inode->i_ino);
- mnt_count = sbi->s_es->s_mnt_count;
+ mnt_count =
+ le16_to_cpu(sbi->s_es->s_mnt_count);
}
}
}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index b77b59f..722d4ef 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -574,13 +574,13 @@ struct ext4_super_block {
/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
__le32 s_r_blocks_count_hi; /* Reserved blocks count */
__le32 s_free_blocks_count_hi; /* Free blocks count */
- __u16 s_min_extra_isize; /* All inodes have at least # bytes */
- __u16 s_want_extra_isize; /* New inodes should reserve # bytes */
- __u32 s_flags; /* Miscellaneous flags */
- __u16 s_raid_stride; /* RAID stride */
- __u16 s_mmp_interval; /* # seconds to wait in MMP checking */
- __u64 s_mmp_block; /* Block for multi-mount protection */
- __u32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
+ __le16 s_min_extra_isize; /* All inodes have at least # bytes */
+ __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
+ __le32 s_flags; /* Miscellaneous flags */
+ __le16 s_raid_stride; /* RAID stride */
+ __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
+ __le64 s_mmp_block; /* Block for multi-mount protection */
+ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
__u32 s_reserved[163]; /* Padding to the end of the block */
};

--
1.5.3.2.81.g17ed

2007-10-04 05:51:09

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext4: Convert bg_inode_bitmap and bg_inode_table

From: Aneesh Kumar K.V <[email protected]>

Convert bg_inode_bitmap and bg_inode_table to bg_inode_bitmap_lo
and bg_inode_table_lo. This helps in finding BUGs due to
direct partial access of these split 64 bit values

Also fix one direct partial access

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/balloc.c | 2 +-
fs/ext4/super.c | 12 ++++++------
include/linux/ext4_fs.h | 4 ++--
3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d1a8882..7a24b3f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -103,7 +103,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
/* Set bits for block and inode bitmaps, and inode table */
ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
- for (bit = le32_to_cpu(gdp->bg_inode_table) - start,
+ for (bit = (ext4_inode_table(sb, gdp) - start),
bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
ext4_set_bit(bit, bh->b_data);
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 02a2418..7548408 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -78,17 +78,17 @@ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
struct ext4_group_desc *bg)
{
- return le32_to_cpu(bg->bg_inode_bitmap) |
+ return le32_to_cpu(bg->bg_inode_bitmap_lo) |
(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
+ (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
}

ext4_fsblk_t ext4_inode_table(struct super_block *sb,
struct ext4_group_desc *bg)
{
- return le32_to_cpu(bg->bg_inode_table) |
+ return le32_to_cpu(bg->bg_inode_table_lo) |
(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
+ (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
}

void ext4_block_bitmap_set(struct super_block *sb,
@@ -102,7 +102,7 @@ void ext4_block_bitmap_set(struct super_block *sb,
void ext4_inode_bitmap_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk)
{
- bg->bg_inode_bitmap = cpu_to_le32((u32)blk);
+ bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
}
@@ -110,7 +110,7 @@ void ext4_inode_bitmap_set(struct super_block *sb,
void ext4_inode_table_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk)
{
- bg->bg_inode_table = cpu_to_le32((u32)blk);
+ bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index c26e30e..5eb4953 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -106,8 +106,8 @@
struct ext4_group_desc
{
__le32 bg_block_bitmap_lo; /* Blocks bitmap block */
- __le32 bg_inode_bitmap; /* Inodes bitmap block */
- __le32 bg_inode_table; /* Inodes table block */
+ __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
+ __le32 bg_inode_table_lo; /* Inodes table block */
__le16 bg_free_blocks_count; /* Free blocks count */
__le16 bg_free_inodes_count; /* Free inodes count */
__le16 bg_used_dirs_count; /* Directories count */
--
1.5.3.2.81.g17ed

2007-10-04 05:51:10

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext4: Convert s_r_blocks_count and s_free_blocks_count

From: Aneesh Kumar K.V <[email protected]>

Convert s_r_blocks_count and s_free_blocks_count to
s_r_blocks_count_lo and s_free_blocks_count_lo

This helps in finding BUGs due to direct partial access of
these split 64 bit values

Also fix direct partial access in ext4 code

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/super.c | 2 +-
include/linux/ext4_fs.h | 12 ++++++------
2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dc7fe4c..dbd114c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2635,7 +2635,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
- es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
+ ext4_free_blocks_count_set(es, buf->f_bfree);
buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
if (buf->f_bfree < ext4_r_blocks_count(es))
buf->f_bavail = 0;
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 339412d..fb31c1a 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -502,8 +502,8 @@ do { \
struct ext4_super_block {
/*00*/ __le32 s_inodes_count; /* Inodes count */
__le32 s_blocks_count_lo; /* Blocks count */
- __le32 s_r_blocks_count; /* Reserved blocks count */
- __le32 s_free_blocks_count; /* Free blocks count */
+ __le32 s_r_blocks_count_lo; /* Reserved blocks count */
+ __le32 s_free_blocks_count_lo; /* Free blocks count */
/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
__le32 s_first_data_block; /* First Data Block */
__le32 s_log_block_size; /* Block size */
@@ -1007,13 +1007,13 @@ static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
{
return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
- le32_to_cpu(es->s_r_blocks_count);
+ le32_to_cpu(es->s_r_blocks_count_lo);
}

static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
{
return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
- le32_to_cpu(es->s_free_blocks_count);
+ le32_to_cpu(es->s_free_blocks_count_lo);
}

static inline void ext4_blocks_count_set(struct ext4_super_block *es,
@@ -1026,14 +1026,14 @@ static inline void ext4_blocks_count_set(struct ext4_super_block *es,
static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
ext4_fsblk_t blk)
{
- es->s_free_blocks_count = cpu_to_le32((u32)blk);
+ es->s_free_blocks_count_lo = cpu_to_le32((u32)blk);
es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
}

static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
ext4_fsblk_t blk)
{
- es->s_r_blocks_count = cpu_to_le32((u32)blk);
+ es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
}

--
1.5.3.2.81.g17ed

2007-10-04 05:51:09

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext4: Convert ext4_extent_idx.ei_leaf to ext4_extent_idx.ei_leaf_lo

From: Aneesh Kumar K.V <[email protected]>

Convert ext4_extent_idx.ei_leaf ext4_extent_idx.ei_leaf_lo
This helps in finding BUGs due to direct partial access of
these split 48 bit values.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/extents.c | 4 ++--
include/linux/ext4_fs_extents.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c03056a..8528774 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -65,7 +65,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
{
ext4_fsblk_t block;

- block = le32_to_cpu(ix->ei_leaf);
+ block = le32_to_cpu(ix->ei_leaf_lo);
block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
return block;
}
@@ -88,7 +88,7 @@ static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
*/
static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
{
- ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+ ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
}

diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index cb2dfbb..d2045a2 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -83,7 +83,7 @@ struct ext4_extent {
*/
struct ext4_extent_idx {
__le32 ei_block; /* index covers logical blocks from 'block' */
- __le32 ei_leaf; /* pointer to the physical block of the next *
+ __le32 ei_leaf_lo; /* pointer to the physical block of the next *
* level. leaf or next index could be there */
__le16 ei_leaf_hi; /* high 16 bits of physical block */
__u16 ei_unused;
--
1.5.3.2.81.g17ed

2007-10-04 05:51:10

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext4: Convert s_blocks_count to s_blocks_count_lo

From: Aneesh Kumar K.V <[email protected]>

Convert s_blocks_count to s_blocks_count_lo
This helps in finding BUGs due to direct partial access of
these split 64 bit values

Also fix direct partial access in ext4 code

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/super.c | 4 ++--
include/linux/ext4_fs.h | 6 +++---
2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7548408..dc7fe4c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2593,7 +2593,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)

if (test_opt(sb, MINIX_DF)) {
sbi->s_overhead_last = 0;
- } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
+ } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
unsigned long ngroups = sbi->s_groups_count, i;
ext4_fsblk_t overhead = 0;
smp_rmb();
@@ -2628,7 +2628,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
overhead += ngroups * (2 + sbi->s_itb_per_group);
sbi->s_overhead_last = overhead;
smp_wmb();
- sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
+ sbi->s_blocks_last = ext4_blocks_count(es);
}

buf->f_type = EXT4_SUPER_MAGIC;
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 5eb4953..339412d 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -501,7 +501,7 @@ do { \
*/
struct ext4_super_block {
/*00*/ __le32 s_inodes_count; /* Inodes count */
- __le32 s_blocks_count; /* Blocks count */
+ __le32 s_blocks_count_lo; /* Blocks count */
__le32 s_r_blocks_count; /* Reserved blocks count */
__le32 s_free_blocks_count; /* Free blocks count */
/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
@@ -1001,7 +1001,7 @@ extern void ext4_inode_table_set(struct super_block *sb,
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
{
return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
- le32_to_cpu(es->s_blocks_count);
+ le32_to_cpu(es->s_blocks_count_lo);
}

static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
@@ -1019,7 +1019,7 @@ static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
static inline void ext4_blocks_count_set(struct ext4_super_block *es,
ext4_fsblk_t blk)
{
- es->s_blocks_count = cpu_to_le32((u32)blk);
+ es->s_blocks_count_lo = cpu_to_le32((u32)blk);
es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
}

--
1.5.3.2.81.g17ed

2007-10-04 05:51:10

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext4: Convert bg_block_bitmap to bg_block_bitmap_lo

From: Aneesh Kumar K.V <[email protected]>

Convert bg_block_bitmap to bg_block_bitmap_lo
This helps in catching some BUGS due to direct
partial access of these split fields.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/super.c | 6 +++---
include/linux/ext4_fs.h | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d8bb279..02a2418 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -70,9 +70,9 @@ static void ext4_write_super_lockfs(struct super_block *sb);
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
struct ext4_group_desc *bg)
{
- return le32_to_cpu(bg->bg_block_bitmap) |
+ return le32_to_cpu(bg->bg_block_bitmap_lo) |
(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
+ (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
}

ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
@@ -94,7 +94,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
void ext4_block_bitmap_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk)
{
- bg->bg_block_bitmap = cpu_to_le32((u32)blk);
+ bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index ab608e8..c26e30e 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -105,7 +105,7 @@
*/
struct ext4_group_desc
{
- __le32 bg_block_bitmap; /* Blocks bitmap block */
+ __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
__le32 bg_inode_bitmap; /* Inodes bitmap block */
__le32 bg_inode_table; /* Inodes table block */
__le16 bg_free_blocks_count; /* Free blocks count */
--
1.5.3.2.81.g17ed

2007-10-04 05:51:10

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] JBD2: debug code cleanup.

From: Jose R. Santos <[email protected]>

Mostly stolen from akpm's JBD cleanup patch.

- use `#ifdef foo' instead of `#if defined(foo)'

- Make journal_enable_debug __read_mostly just for the heck of it

- Make jbd_debugfs_dir and jbd_debug static

- debugfs_remove(NULL) is legal: remove unneeded tests

- remove unnecessary empty loops

Signed-off-by: Jose R. Santos <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---
fs/jbd2/journal.c | 20 ++++++--------------
1 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ff07bff..6ddc553 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1864,16 +1864,14 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
/*
* debugfs tunables
*/
-#if defined(CONFIG_JBD2_DEBUG)
-u8 jbd2_journal_enable_debug;
+#ifdef CONFIG_JBD2_DEBUG
+u8 jbd2_journal_enable_debug __read_mostly;
EXPORT_SYMBOL(jbd2_journal_enable_debug);
-#endif
-
-#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_DEBUG_FS)

#define JBD2_DEBUG_NAME "jbd2-debug"

-struct dentry *jbd2_debugfs_dir, *jbd2_debug;
+static struct dentry *jbd2_debugfs_dir;
+static struct dentry *jbd2_debug;

static void __init jbd2_create_debugfs_entry(void)
{
@@ -1886,24 +1884,18 @@ static void __init jbd2_create_debugfs_entry(void)

static void __exit jbd2_remove_debugfs_entry(void)
{
- if (jbd2_debug)
- debugfs_remove(jbd2_debug);
- if (jbd2_debugfs_dir)
- debugfs_remove(jbd2_debugfs_dir);
+ debugfs_remove(jbd2_debug);
+ debugfs_remove(jbd2_debugfs_dir);
}

#else

static void __init jbd2_create_debugfs_entry(void)
{
- do {
- } while (0);
}

static void __exit jbd2_remove_debugfs_entry(void)
{
- do {
- } while (0);
}

#endif
--
1.5.3.2.81.g17ed

2007-10-04 05:51:10

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext4: Convert ext4_extent.ee_start to ext4_extent.ee_start_lo

From: Aneesh Kumar K.V <[email protected]>

Convert ext4_extent.ee_start to ext4_extent.ee_start_lo
This helps in finding BUGs due to direct partial access of
these split 48 bit values

Also fix direct partial access in ext4 code

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/extents.c | 8 +++-----
include/linux/ext4_fs_extents.h | 2 +-
2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2be404f..c03056a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -52,7 +52,7 @@ static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
{
ext4_fsblk_t block;

- block = le32_to_cpu(ex->ee_start);
+ block = le32_to_cpu(ex->ee_start_lo);
block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
return block;
}
@@ -77,7 +77,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
*/
static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
{
- ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+ ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
}

@@ -1409,8 +1409,7 @@ has_space:
eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
nearex = path[depth].p_ext;
nearex->ee_block = newext->ee_block;
- nearex->ee_start = newext->ee_start;
- nearex->ee_start_hi = newext->ee_start_hi;
+ ext4_ext_store_pblock(nearex, ext_pblock(newext));
nearex->ee_len = newext->ee_len;

merge:
@@ -2177,7 +2176,6 @@ int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
}
/* ex2: iblock to iblock + maxblocks-1 : initialised */
ex2->ee_block = cpu_to_le32(iblock);
- ex2->ee_start = cpu_to_le32(newblock);
ext4_ext_store_pblock(ex2, newblock);
ex2->ee_len = cpu_to_le16(allocated);
if (ex2 != ex)
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index 81406f3..cb2dfbb 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -74,7 +74,7 @@ struct ext4_extent {
__le32 ee_block; /* first logical block extent covers */
__le16 ee_len; /* number of blocks covered by extent */
__le16 ee_start_hi; /* high 16 bits of physical block */
- __le32 ee_start; /* low 32 bits of physical block */
+ __le32 ee_start_lo; /* low 32 bits of physical block */
};

/*
--
1.5.3.2.81.g17ed

2007-10-04 05:51:10

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext4: sparse fixes

From: Aneesh Kumar K.V <[email protected]>

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/fsync.c | 2 +-
fs/ext4/inode.c | 2 +-
fs/ext4/xattr.c | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 2a167d7..8d50879 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -47,7 +47,7 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
struct inode *inode = dentry->d_inode;
int ret = 0;

- J_ASSERT(ext4_journal_current_handle() == 0);
+ J_ASSERT(ext4_journal_current_handle() == NULL);

/*
* data=writeback:
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a2e1ea4..07afd4a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1027,7 +1027,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
}
if (buffer_new(&dummy)) {
J_ASSERT(create != 0);
- J_ASSERT(handle != 0);
+ J_ASSERT(handle != NULL);

/*
* Now that we do not always journal data, we should
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 12c7d65..8638730 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1120,7 +1120,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
int total_ino, total_blk;
void *base, *start, *end;
int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
- int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize;
+ int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);

down_write(&EXT4_I(inode)->xattr_sem);
retry:
@@ -1292,7 +1292,7 @@ retry:

i.name = b_entry_name;
i.value = buffer;
- i.value_len = cpu_to_le32(size);
+ i.value_len = size;
error = ext4_xattr_block_find(inode, &i, bs);
if (error)
goto cleanup;
--
1.5.3.2.81.g17ed

2007-10-04 05:51:10

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext4: Avoid rec_len overflow with 64KB block size

From: Jan Kara <[email protected]>

With 64KB blocksize, a directory entry can have size 64KB which does not fit
into 16 bits we have for entry lenght. So we store 0xffff instead and convert
value when read from / written to disk. The patch also converts some places
to use ext4_next_entry() when we are changing them anyway.

Signed-off-by: Jan Kara <[email protected]>
Signed-off-by: Mingming Cao <[email protected]>
---
fs/ext4/dir.c | 12 ++++----
fs/ext4/namei.c | 76 ++++++++++++++++++++++------------------------
include/linux/ext4_fs.h | 20 ++++++++++++
3 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2ba49ff..8236352 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -67,7 +67,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
unsigned long offset)
{
const char * error_msg = NULL;
- const int rlen = le16_to_cpu(de->rec_len);
+ const int rlen = ext4_rec_len_from_disk(de->rec_len);

if (rlen < EXT4_DIR_REC_LEN(1))
error_msg = "rec_len is smaller than minimal";
@@ -172,10 +172,10 @@ revalidate:
* least that it is non-zero. A
* failure will be detected in the
* dirent test below. */
- if (le16_to_cpu(de->rec_len) <
- EXT4_DIR_REC_LEN(1))
+ if (ext4_rec_len_from_disk(de->rec_len)
+ < EXT4_DIR_REC_LEN(1))
break;
- i += le16_to_cpu(de->rec_len);
+ i += ext4_rec_len_from_disk(de->rec_len);
}
offset = i;
filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
@@ -197,7 +197,7 @@ revalidate:
ret = stored;
goto out;
}
- offset += le16_to_cpu(de->rec_len);
+ offset += ext4_rec_len_from_disk(de->rec_len);
if (le32_to_cpu(de->inode)) {
/* We might block in the next section
* if the data destination is
@@ -219,7 +219,7 @@ revalidate:
goto revalidate;
stored ++;
}
- filp->f_pos += le16_to_cpu(de->rec_len);
+ filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
}
offset = 0;
brelse (bh);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 94ee6f3..bc3825d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -280,7 +280,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
space += EXT4_DIR_REC_LEN(de->name_len);
names++;
}
- de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+ de = ext4_next_entry(de);
}
printk("(%i)\n", names);
return (struct stats) { names, space, 1 };
@@ -551,7 +551,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
*/
static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p)
{
- return (struct ext4_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
+ return (struct ext4_dir_entry_2 *)((char*)p +
+ ext4_rec_len_from_disk(p->rec_len));
}

/*
@@ -720,7 +721,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
cond_resched();
}
/* XXX: do we need to check rec_len == 0 case? -Chris */
- de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+ de = ext4_next_entry(de);
}
return count;
}
@@ -820,7 +821,7 @@ static inline int search_dirblock(struct buffer_head * bh,
return 1;
}
/* prevent looping on a bad block */
- de_len = le16_to_cpu(de->rec_len);
+ de_len = ext4_rec_len_from_disk(de->rec_len);
if (de_len <= 0)
return -1;
offset += de_len;
@@ -1128,7 +1129,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
rec_len = EXT4_DIR_REC_LEN(de->name_len);
memcpy (to, de, rec_len);
((struct ext4_dir_entry_2 *) to)->rec_len =
- cpu_to_le16(rec_len);
+ ext4_rec_len_to_disk(rec_len);
de->inode = 0;
map++;
to += rec_len;
@@ -1147,13 +1148,12 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)

prev = to = de;
while ((char*)de < base + size) {
- next = (struct ext4_dir_entry_2 *) ((char *) de +
- le16_to_cpu(de->rec_len));
+ next = ext4_next_entry(de);
if (de->inode && de->name_len) {
rec_len = EXT4_DIR_REC_LEN(de->name_len);
if (de > to)
memmove(to, de, rec_len);
- to->rec_len = cpu_to_le16(rec_len);
+ to->rec_len = ext4_rec_len_to_disk(rec_len);
prev = to;
to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
}
@@ -1227,8 +1227,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
/* Fancy dance to stay within two buffers */
de2 = dx_move_dirents(data1, data2, map + split, count - split);
de = dx_pack_dirents(data1,blocksize);
- de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
- de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
+ de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
+ de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));

@@ -1297,7 +1297,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
return -EEXIST;
}
nlen = EXT4_DIR_REC_LEN(de->name_len);
- rlen = le16_to_cpu(de->rec_len);
+ rlen = ext4_rec_len_from_disk(de->rec_len);
if ((de->inode? rlen - nlen: rlen) >= reclen)
break;
de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
@@ -1316,11 +1316,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,

/* By now the buffer is marked for journaling */
nlen = EXT4_DIR_REC_LEN(de->name_len);
- rlen = le16_to_cpu(de->rec_len);
+ rlen = ext4_rec_len_from_disk(de->rec_len);
if (de->inode) {
struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
- de1->rec_len = cpu_to_le16(rlen - nlen);
- de->rec_len = cpu_to_le16(nlen);
+ de1->rec_len = ext4_rec_len_to_disk(rlen - nlen);
+ de->rec_len = ext4_rec_len_to_disk(nlen);
de = de1;
}
de->file_type = EXT4_FT_UNKNOWN;
@@ -1397,17 +1397,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,

/* The 0th block becomes the root, move the dirents out */
fde = &root->dotdot;
- de = (struct ext4_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
+ de = (struct ext4_dir_entry_2 *)((char *)fde +
+ ext4_rec_len_from_disk(fde->rec_len));
len = ((char *) root) + blocksize - (char *) de;
memcpy (data1, de, len);
de = (struct ext4_dir_entry_2 *) data1;
top = data1 + len;
- while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top)
+ while ((char *)(de2 = ext4_next_entry(de)) < top)
de = de2;
- de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
+ de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
/* Initialize the root; the dot dirents already exist */
de = (struct ext4_dir_entry_2 *) (&root->dotdot);
- de->rec_len = cpu_to_le16(blocksize - EXT4_DIR_REC_LEN(2));
+ de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2));
memset (&root->info, 0, sizeof(root->info));
root->info.info_length = sizeof(root->info);
root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
@@ -1487,7 +1488,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
return retval;
de = (struct ext4_dir_entry_2 *) bh->b_data;
de->inode = 0;
- de->rec_len = cpu_to_le16(blocksize);
+ de->rec_len = ext4_rec_len_to_disk(blocksize);
return add_dirent_to_buf(handle, dentry, inode, de, bh);
}

@@ -1550,7 +1551,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
goto cleanup;
node2 = (struct dx_node *)(bh2->b_data);
entries2 = node2->entries;
- node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+ node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize);
node2->fake.inode = 0;
BUFFER_TRACE(frame->bh, "get_write_access");
err = ext4_journal_get_write_access(handle, frame->bh);
@@ -1648,9 +1649,9 @@ static int ext4_delete_entry (handle_t *handle,
BUFFER_TRACE(bh, "get_write_access");
ext4_journal_get_write_access(handle, bh);
if (pde)
- pde->rec_len =
- cpu_to_le16(le16_to_cpu(pde->rec_len) +
- le16_to_cpu(de->rec_len));
+ pde->rec_len = ext4_rec_len_to_disk(
+ ext4_rec_len_from_disk(pde->rec_len) +
+ ext4_rec_len_from_disk(de->rec_len));
else
de->inode = 0;
dir->i_version++;
@@ -1658,10 +1659,9 @@ static int ext4_delete_entry (handle_t *handle,
ext4_journal_dirty_metadata(handle, bh);
return 0;
}
- i += le16_to_cpu(de->rec_len);
+ i += ext4_rec_len_from_disk(de->rec_len);
pde = de;
- de = (struct ext4_dir_entry_2 *)
- ((char *) de + le16_to_cpu(de->rec_len));
+ de = ext4_next_entry(de);
}
return -ENOENT;
}
@@ -1824,13 +1824,12 @@ retry:
de = (struct ext4_dir_entry_2 *) dir_block->b_data;
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
- de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de->name_len));
+ de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
- de = (struct ext4_dir_entry_2 *)
- ((char *) de + le16_to_cpu(de->rec_len));
+ de = ext4_next_entry(de);
de->inode = cpu_to_le32(dir->i_ino);
- de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1));
+ de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1));
de->name_len = 2;
strcpy (de->name, "..");
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
@@ -1882,8 +1881,7 @@ static int empty_dir (struct inode * inode)
return 1;
}
de = (struct ext4_dir_entry_2 *) bh->b_data;
- de1 = (struct ext4_dir_entry_2 *)
- ((char *) de + le16_to_cpu(de->rec_len));
+ de1 = ext4_next_entry(de);
if (le32_to_cpu(de->inode) != inode->i_ino ||
!le32_to_cpu(de1->inode) ||
strcmp (".", de->name) ||
@@ -1894,9 +1892,9 @@ static int empty_dir (struct inode * inode)
brelse (bh);
return 1;
}
- offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
- de = (struct ext4_dir_entry_2 *)
- ((char *) de1 + le16_to_cpu(de1->rec_len));
+ offset = ext4_rec_len_from_disk(de->rec_len) +
+ ext4_rec_len_from_disk(de1->rec_len);
+ de = ext4_next_entry(de1);
while (offset < inode->i_size ) {
if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
@@ -1925,9 +1923,8 @@ static int empty_dir (struct inode * inode)
brelse (bh);
return 0;
}
- offset += le16_to_cpu(de->rec_len);
- de = (struct ext4_dir_entry_2 *)
- ((char *) de + le16_to_cpu(de->rec_len));
+ offset += ext4_rec_len_from_disk(de->rec_len);
+ de = ext4_next_entry(de);
}
brelse (bh);
return 1;
@@ -2282,8 +2279,7 @@ retry:
}

#define PARENT_INO(buffer) \
- ((struct ext4_dir_entry_2 *) ((char *) buffer + \
- le16_to_cpu(((struct ext4_dir_entry_2 *) buffer)->rec_len)))->inode
+ (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode)

/*
* Anybody can rename anything with this: the permission checks are left to the
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 5d90616..ab608e8 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -767,6 +767,26 @@ struct ext4_dir_entry_2 {
#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
~EXT4_DIR_ROUND)
+#define EXT4_MAX_REC_LEN ((1<<16)-1)
+
+static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
+{
+ unsigned len = le16_to_cpu(dlen);
+
+ if (len == EXT4_MAX_REC_LEN)
+ return 1 << 16;
+ return len;
+}
+
+static inline __le16 ext4_rec_len_to_disk(unsigned len)
+{
+ if (len == (1 << 16))
+ return cpu_to_le16(EXT4_MAX_REC_LEN);
+ else if (len > (1 << 16))
+ BUG();
+ return cpu_to_le16(len);
+}
+
/*
* Hash Tree Directory indexing
* (c) Daniel Phillips, 2001
--
1.5.3.2.81.g17ed

2007-10-04 05:51:11

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext2

From: Takashi Sato <[email protected]>

This patch set supports large block size(>4k, <=64k) in ext2,
just enlarging the block size limit. But it is NOT possible to have 64kB
blocksize on ext2 without some changes to the directory handling
code. The reason is that an empty 64kB directory block would have a
rec_len == (__u16)2^16 == 0, and this would cause an error to be hit in
the filesystem. The proposed solution is treat 64k rec_len
with a an impossible value like rec_len = 0xffff to handle this.

The Patch-set consists of the following 2 patches.
[1/2] ext2: enlarge blocksize
- Allow blocksize up to pagesize

[2/2] ext2: fix rec_len overflow
- prevent rec_len from overflow with 64KB blocksize

Now on 64k page ppc64 box runs with this patch set we could create a 64k
block size ext2, and able to handle empty directory block.

Signed-off-by: Takashi Sato <[email protected]>
Signed-off-by: Mingming Cao <[email protected]>
---
fs/ext2/super.c | 3 ++-
include/linux/ext2_fs.h | 4 ++--
2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 639a32c..765c805 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -775,7 +775,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
brelse(bh);

if (!sb_set_blocksize(sb, blocksize)) {
- printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n");
+ printk(KERN_ERR "EXT2-fs: bad blocksize %d.\n",
+ blocksize);
goto failed_sbi;
}

diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 153d755..910a705 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -86,8 +86,8 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb)
* Macro-instructions used to manage several block sizes
*/
#define EXT2_MIN_BLOCK_SIZE 1024
-#define EXT2_MAX_BLOCK_SIZE 4096
-#define EXT2_MIN_BLOCK_LOG_SIZE 10
+#define EXT2_MAX_BLOCK_SIZE 65536
+#define EXT2_MIN_BLOCK_LOG_SIZE 10
#ifdef __KERNEL__
# define EXT2_BLOCK_SIZE(s) ((s)->s_blocksize)
#else
--
1.5.3.2.81.g17ed

2007-10-04 05:51:11

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext2: Avoid rec_len overflow with 64KB block size

From: Jan Kara <[email protected]>

With 64KB blocksize, a directory entry can have size 64KB which does not fit
into 16 bits we have for entry lenght. So we store 0xffff instead and convert
value when read from / written to disk.

Signed-off-by: Jan Kara <[email protected]>
Signed-off-by: Mingming Cao <[email protected]>
---
fs/ext2/dir.c | 43 +++++++++++++++++++++++++++++++------------
include/linux/ext2_fs.h | 1 +
2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2bf49d7..1329bdb 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -26,6 +26,24 @@

typedef struct ext2_dir_entry_2 ext2_dirent;

+static inline unsigned ext2_rec_len_from_disk(__le16 dlen)
+{
+ unsigned len = le16_to_cpu(dlen);
+
+ if (len == EXT2_MAX_REC_LEN)
+ return 1 << 16;
+ return len;
+}
+
+static inline __le16 ext2_rec_len_to_disk(unsigned len)
+{
+ if (len == (1 << 16))
+ return cpu_to_le16(EXT2_MAX_REC_LEN);
+ else if (len > (1 << 16))
+ BUG();
+ return cpu_to_le16(len);
+}
+
/*
* ext2 uses block-sized chunks. Arguably, sector-sized ones would be
* more robust, but we have what we have
@@ -95,7 +113,7 @@ static void ext2_check_page(struct page *page)
}
for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
p = (ext2_dirent *)(kaddr + offs);
- rec_len = le16_to_cpu(p->rec_len);
+ rec_len = ext2_rec_len_from_disk(p->rec_len);

if (rec_len < EXT2_DIR_REC_LEN(1))
goto Eshort;
@@ -193,7 +211,8 @@ static inline int ext2_match (int len, const char * const name,
*/
static inline ext2_dirent *ext2_next_entry(ext2_dirent *p)
{
- return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len));
+ return (ext2_dirent *)((char*)p +
+ ext2_rec_len_from_disk(p->rec_len));
}

static inline unsigned
@@ -305,7 +324,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
return 0;
}
}
- filp->f_pos += le16_to_cpu(de->rec_len);
+ filp->f_pos += ext2_rec_len_from_disk(de->rec_len);
}
ext2_put_page(page);
}
@@ -413,7 +432,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
struct page *page, struct inode *inode)
{
unsigned from = (char *) de - (char *) page_address(page);
- unsigned to = from + le16_to_cpu(de->rec_len);
+ unsigned to = from + ext2_rec_len_from_disk(de->rec_len);
int err;

lock_page(page);
@@ -469,7 +488,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
/* We hit i_size */
name_len = 0;
rec_len = chunk_size;
- de->rec_len = cpu_to_le16(chunk_size);
+ de->rec_len = ext2_rec_len_to_disk(chunk_size);
de->inode = 0;
goto got_it;
}
@@ -483,7 +502,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
if (ext2_match (namelen, name, de))
goto out_unlock;
name_len = EXT2_DIR_REC_LEN(de->name_len);
- rec_len = le16_to_cpu(de->rec_len);
+ rec_len = ext2_rec_len_from_disk(de->rec_len);
if (!de->inode && rec_len >= reclen)
goto got_it;
if (rec_len >= name_len + reclen)
@@ -504,8 +523,8 @@ got_it:
goto out_unlock;
if (de->inode) {
ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
- de1->rec_len = cpu_to_le16(rec_len - name_len);
- de->rec_len = cpu_to_le16(name_len);
+ de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
+ de->rec_len = ext2_rec_len_to_disk(name_len);
de = de1;
}
de->name_len = namelen;
@@ -536,7 +555,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
struct inode *inode = mapping->host;
char *kaddr = page_address(page);
unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
- unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
+ unsigned to = ((char*)dir - kaddr) + ext2_rec_len_from_disk(dir->rec_len);
ext2_dirent * pde = NULL;
ext2_dirent * de = (ext2_dirent *) (kaddr + from);
int err;
@@ -557,7 +576,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
err = mapping->a_ops->prepare_write(NULL, page, from, to);
BUG_ON(err);
if (pde)
- pde->rec_len = cpu_to_le16(to-from);
+ pde->rec_len = ext2_rec_len_to_disk(to-from);
dir->inode = 0;
err = ext2_commit_chunk(page, from, to);
inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
@@ -591,14 +610,14 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
memset(kaddr, 0, chunk_size);
de = (struct ext2_dir_entry_2 *)kaddr;
de->name_len = 1;
- de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
+ de->rec_len = ext2_rec_len_to_disk(EXT2_DIR_REC_LEN(1));
memcpy (de->name, ".\0\0", 4);
de->inode = cpu_to_le32(inode->i_ino);
ext2_set_de_type (de, inode);

de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1));
de->name_len = 2;
- de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1));
+ de->rec_len = ext2_rec_len_to_disk(chunk_size - EXT2_DIR_REC_LEN(1));
de->inode = cpu_to_le32(parent->i_ino);
memcpy (de->name, "..\0", 4);
ext2_set_de_type (de, inode);
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 910a705..41063d5 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -557,5 +557,6 @@ enum {
#define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1)
#define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \
~EXT2_DIR_ROUND)
+#define EXT2_MAX_REC_LEN ((1<<16)-1)

#endif /* _LINUX_EXT2_FS_H */
--
1.5.3.2.81.g17ed

2007-10-04 05:51:11

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext4

From: Takashi Sato <[email protected]>

This patch set supports large block size(>4k, <=64k) in ext4,
just enlarging the block size limit. But it is NOT possible to have 64kB
blocksize on ext4 without some changes to the directory handling
code. The reason is that an empty 64kB directory block would have a
rec_len == (__u16)2^16 == 0, and this would cause an error to be hit in
the filesystem. The proposed solution is treat 64k rec_len
with a an impossible value like rec_len = 0xffff to handle this.

The Patch-set consists of the following 2 patches.
[1/2] ext4: enlarge blocksize
- Allow blocksize up to pagesize

[2/2] ext4: fix rec_len overflow
- prevent rec_len from overflow with 64KB blocksize

Now on 64k page ppc64 box runs with this patch set we could create a 64k
block size ext4dev, and able to handle empty directory block.

Signed-off-by: Takashi Sato <[email protected]>
Signed-off-by: Mingming Cao <[email protected]>
---
fs/ext4/super.c | 5 +++++
include/linux/ext4_fs.h | 4 ++--
2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 619db84..d8bb279 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1548,6 +1548,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto out_fail;
}

+ if (!sb_set_blocksize(sb, blocksize)) {
+ printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize);
+ goto out_fail;
+ }
+
/*
* The ext4 superblock will not be buffer aligned for other than 1kB
* block sizes. We need to calculate the offset from buffer start.
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 46b304d..5d90616 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -73,8 +73,8 @@
* Macro-instructions used to manage several block sizes
*/
#define EXT4_MIN_BLOCK_SIZE 1024
-#define EXT4_MAX_BLOCK_SIZE 4096
-#define EXT4_MIN_BLOCK_LOG_SIZE 10
+#define EXT4_MAX_BLOCK_SIZE 65536
+#define EXT4_MIN_BLOCK_LOG_SIZE 10
#ifdef __KERNEL__
# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
#else
--
1.5.3.2.81.g17ed

2007-10-04 05:51:11

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] Once ext4 will not implement fragment, it is believed it will never be

From: Coly Li <[email protected]>

implement in future. Therefore fragment related source code in ext4 should
be obsoleted -- no one will use it.

This patch obsolete fragment from ext4. Another patch posted on linux-ext4
removing fragment supporting from e2fsprogs.

Signed-off-by: Coly Li <[email protected]>
Acked-by: Andreas Dilger <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---
fs/ext4/ialloc.c | 5 -----
fs/ext4/inode.c | 10 ----------
fs/ext4/super.c | 15 ---------------
include/linux/ext4_fs.h | 35 ++++++-----------------------------
include/linux/ext4_fs_i.h | 5 -----
include/linux/ext4_fs_sb.h | 3 ---
6 files changed, 6 insertions(+), 67 deletions(-)

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 427f830..b8b538d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -576,11 +576,6 @@ got:
/* dirsync only applies to directories */
if (!S_ISDIR(mode))
ei->i_flags &= ~EXT4_DIRSYNC_FL;
-#ifdef EXT4_FRAGMENTS
- ei->i_faddr = 0;
- ei->i_frag_no = 0;
- ei->i_frag_size = 0;
-#endif
ei->i_file_acl = 0;
ei->i_dir_acl = 0;
ei->i_dtime = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a4848e0..f283522 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2645,11 +2645,6 @@ void ext4_read_inode(struct inode * inode)
}
inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
-#ifdef EXT4_FRAGMENTS
- ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
- ei->i_frag_no = raw_inode->i_frag;
- ei->i_frag_size = raw_inode->i_fsize;
-#endif
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD))
@@ -2794,11 +2789,6 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
-#ifdef EXT4_FRAGMENTS
- raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
- raw_inode->i_frag = ei->i_frag_no;
- raw_inode->i_fsize = ei->i_frag_size;
-#endif
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD))
raw_inode->i_file_acl_high =
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 42cbdb5..420d39d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1655,14 +1655,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
}
- sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
- le32_to_cpu(es->s_log_frag_size);
- if (blocksize != sbi->s_frag_size) {
- printk(KERN_ERR
- "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n",
- sbi->s_frag_size, blocksize);
- goto failed_mount;
- }
sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
@@ -1676,7 +1668,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
} else
sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
- sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
if (EXT4_INODE_SIZE(sb) == 0)
goto cantfind_ext4;
@@ -1700,12 +1691,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_blocks_per_group);
goto failed_mount;
}
- if (sbi->s_frags_per_group > blocksize * 8) {
- printk (KERN_ERR
- "EXT4-fs: #fragments per group too big: %lu\n",
- sbi->s_frags_per_group);
- goto failed_mount;
- }
if (sbi->s_inodes_per_group > blocksize * 8) {
printk (KERN_ERR
"EXT4-fs: #inodes per group too big: %lu\n",
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index cdee7aa..3baeb99 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -105,20 +105,6 @@
#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))

/*
- * Macro-instructions used to manage fragments
- */
-#define EXT4_MIN_FRAG_SIZE 1024
-#define EXT4_MAX_FRAG_SIZE 4096
-#define EXT4_MIN_FRAG_LOG_SIZE 10
-#ifdef __KERNEL__
-# define EXT4_FRAG_SIZE(s) (EXT4_SB(s)->s_frag_size)
-# define EXT4_FRAGS_PER_BLOCK(s) (EXT4_SB(s)->s_frags_per_block)
-#else
-# define EXT4_FRAG_SIZE(s) (EXT4_MIN_FRAG_SIZE << (s)->s_log_frag_size)
-# define EXT4_FRAGS_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_FRAG_SIZE(s))
-#endif
-
-/*
* Structure of a blocks group descriptor
*/
struct ext4_group_desc
@@ -311,27 +297,24 @@ struct ext4_inode {
__le32 i_generation; /* File version (for NFS) */
__le32 i_file_acl; /* File ACL */
__le32 i_dir_acl; /* Directory ACL */
- __le32 i_faddr; /* Fragment address */
+ __le32 i_obso_faddr; /* Obsoleted fragment address */
union {
struct {
- __u8 l_i_frag; /* Fragment number */
- __u8 l_i_fsize; /* Fragment size */
+ __le16 l_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
__le16 l_i_file_acl_high;
__le16 l_i_uid_high; /* these 2 fields */
__le16 l_i_gid_high; /* were reserved2[0] */
__u32 l_i_reserved2;
} linux2;
struct {
- __u8 h_i_frag; /* Fragment number */
- __u8 h_i_fsize; /* Fragment size */
+ __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
__u16 h_i_mode_high;
__u16 h_i_uid_high;
__u16 h_i_gid_high;
__u32 h_i_author;
} hurd2;
struct {
- __u8 m_i_frag; /* Fragment number */
- __u8 m_i_fsize; /* Fragment size */
+ __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
__le16 m_i_file_acl_high;
__u32 m_i_reserved2[2];
} masix2;
@@ -419,8 +402,6 @@ do { \

#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1
-#define i_frag osd2.linux2.l_i_frag
-#define i_fsize osd2.linux2.l_i_fsize
#define i_file_acl_high osd2.linux2.l_i_file_acl_high
#define i_uid_low i_uid
#define i_gid_low i_gid
@@ -431,8 +412,6 @@ do { \
#elif defined(__GNU__)

#define i_translator osd1.hurd1.h_i_translator
-#define i_frag osd2.hurd2.h_i_frag;
-#define i_fsize osd2.hurd2.h_i_fsize;
#define i_uid_high osd2.hurd2.h_i_uid_high
#define i_gid_high osd2.hurd2.h_i_gid_high
#define i_author osd2.hurd2.h_i_author
@@ -440,8 +419,6 @@ do { \
#elif defined(__masix__)

#define i_reserved1 osd1.masix1.m_i_reserved1
-#define i_frag osd2.masix2.m_i_frag
-#define i_fsize osd2.masix2.m_i_fsize
#define i_file_acl_high osd2.masix2.m_i_file_acl_high
#define i_reserved2 osd2.masix2.m_i_reserved2

@@ -528,9 +505,9 @@ struct ext4_super_block {
/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
__le32 s_first_data_block; /* First Data Block */
__le32 s_log_block_size; /* Block size */
- __le32 s_log_frag_size; /* Fragment size */
+ __le32 s_obso_log_frag_size; /* Obsoleted fragment size */
/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
- __le32 s_frags_per_group; /* # Fragments per group */
+ __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */
__le32 s_inodes_per_group; /* # Inodes per group */
__le32 s_mtime; /* Mount time */
/*30*/ __le32 s_wtime; /* Write time */
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index 1a511e9..86ddfe2 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -78,11 +78,6 @@ struct ext4_ext_cache {
struct ext4_inode_info {
__le32 i_data[15]; /* unconverted */
__u32 i_flags;
-#ifdef EXT4_FRAGMENTS
- __u32 i_faddr;
- __u8 i_frag_no;
- __u8 i_frag_size;
-#endif
ext4_fsblk_t i_file_acl;
__u32 i_dir_acl;
__u32 i_dtime;
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index 1b2ffee..a978fba 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -28,11 +28,8 @@
* third extended-fs super-block data in memory
*/
struct ext4_sb_info {
- unsigned long s_frag_size; /* Size of a fragment in bytes */
unsigned long s_desc_size; /* Size of a group descriptor in bytes */
- unsigned long s_frags_per_block;/* Number of fragments per block */
unsigned long s_inodes_per_block;/* Number of inodes per block */
- unsigned long s_frags_per_group;/* Number of fragments in a group */
unsigned long s_blocks_per_group;/* Number of blocks in a group */
unsigned long s_inodes_per_group;/* Number of inodes in a group */
unsigned long s_itb_per_group; /* Number of inode table blocks per group */
--
1.5.3.2.81.g17ed

2007-10-04 05:51:11

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext4: remove #ifdef CONFIG_EXT4_INDEX

From: Eric Sandeen <[email protected]>

CONFIG_EXT4_INDEX is not an exposed config option in the kernel, and it is
unconditionally defined in ext4_fs.h. tune2fs is already able to turn off
dir indexing, so at this point it's just cluttering up the code. Remove
it.

Signed-off-by: Eric Sandeen <[email protected]>
Signed-off-by: Mingming Cao <[email protected]>
Signed-off-by: "Theodore Ts'o" <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---
fs/ext4/dir.c | 7 -------
fs/ext4/namei.c | 20 --------------------
include/linux/ext4_fs.h | 14 ++------------
3 files changed, 2 insertions(+), 39 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3ab01c0..2ba49ff 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -47,9 +47,7 @@ const struct file_operations ext4_dir_operations = {
.compat_ioctl = ext4_compat_ioctl,
#endif
.fsync = ext4_sync_file, /* BKL held */
-#ifdef CONFIG_EXT4_INDEX
.release = ext4_release_dir,
-#endif
};


@@ -107,7 +105,6 @@ static int ext4_readdir(struct file * filp,

sb = inode->i_sb;

-#ifdef CONFIG_EXT4_INDEX
if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_COMPAT_DIR_INDEX) &&
((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) ||
@@ -123,7 +120,6 @@ static int ext4_readdir(struct file * filp,
*/
EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
}
-#endif
stored = 0;
offset = filp->f_pos & (sb->s_blocksize - 1);

@@ -232,7 +228,6 @@ out:
return ret;
}

-#ifdef CONFIG_EXT4_INDEX
/*
* These functions convert from the major/minor hash to an f_pos
* value.
@@ -518,5 +513,3 @@ static int ext4_release_dir (struct inode * inode, struct file * filp)

return 0;
}
-
-#endif
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5fdb862..94ee6f3 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -144,7 +144,6 @@ struct dx_map_entry
u16 size;
};

-#ifdef CONFIG_EXT4_INDEX
static inline unsigned dx_get_block (struct dx_entry *entry);
static void dx_set_block (struct dx_entry *entry, unsigned value);
static inline unsigned dx_get_hash (struct dx_entry *entry);
@@ -766,8 +765,6 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
dx_set_block(new, block);
dx_set_count(entries, count + 1);
}
-#endif
-

static void ext4_update_dx_flag(struct inode *inode)
{
@@ -869,7 +866,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
name = dentry->d_name.name;
if (namelen > EXT4_NAME_LEN)
return NULL;
-#ifdef CONFIG_EXT4_INDEX
if (is_dx(dir)) {
bh = ext4_dx_find_entry(dentry, res_dir, &err);
/*
@@ -881,7 +877,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
return bh;
dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
}
-#endif
nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
start = EXT4_I(dir)->i_dir_start_lookup;
if (start >= nblocks)
@@ -957,7 +952,6 @@ cleanup_and_exit:
return ret;
}

-#ifdef CONFIG_EXT4_INDEX
static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
struct ext4_dir_entry_2 **res_dir, int *err)
{
@@ -1025,7 +1019,6 @@ errout:
dx_release (frames);
return NULL;
}
-#endif

static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
@@ -1121,7 +1114,6 @@ static inline void ext4_set_de_type(struct super_block *sb,
de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
}

-#ifdef CONFIG_EXT4_INDEX
/*
* Move count entries from end of map between two memory locations.
* Returns pointer to last entry moved.
@@ -1266,8 +1258,6 @@ errout:
*error = err;
return NULL;
}
-#endif
-

/*
* Add a new entry into a directory (leaf) block. If de is non-NULL,
@@ -1364,7 +1354,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
return 0;
}

-#ifdef CONFIG_EXT4_INDEX
/*
* This converts a one block unindexed directory to a 3 block indexed
* directory, and adds the dentry to the indexed directory.
@@ -1443,7 +1432,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,

return add_dirent_to_buf(handle, dentry, inode, de, bh);
}
-#endif

/*
* ext4_add_entry()
@@ -1464,9 +1452,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
struct ext4_dir_entry_2 *de;
struct super_block * sb;
int retval;
-#ifdef CONFIG_EXT4_INDEX
int dx_fallback=0;
-#endif
unsigned blocksize;
u32 block, blocks;

@@ -1474,7 +1460,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
blocksize = sb->s_blocksize;
if (!dentry->d_name.len)
return -EINVAL;
-#ifdef CONFIG_EXT4_INDEX
if (is_dx(dir)) {
retval = ext4_dx_add_entry(handle, dentry, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
@@ -1483,7 +1468,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
dx_fallback++;
ext4_mark_inode_dirty(handle, dir);
}
-#endif
blocks = dir->i_size >> sb->s_blocksize_bits;
for (block = 0, offset = 0; block < blocks; block++) {
bh = ext4_bread(handle, dir, block, 0, &retval);
@@ -1493,11 +1477,9 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
if (retval != -ENOSPC)
return retval;

-#ifdef CONFIG_EXT4_INDEX
if (blocks == 1 && !dx_fallback &&
EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
return make_indexed_dir(handle, dentry, inode, bh);
-#endif
brelse(bh);
}
bh = ext4_append(handle, dir, &block, &retval);
@@ -1509,7 +1491,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
return add_dirent_to_buf(handle, dentry, inode, de, bh);
}

-#ifdef CONFIG_EXT4_INDEX
/*
* Returns 0 for success, or a negative error value
*/
@@ -1644,7 +1625,6 @@ cleanup:
dx_release(frames);
return err;
}
-#endif

/*
* ext4_delete_entry deletes a directory entry by merging it with the
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 3baeb99..151738a 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -36,10 +36,6 @@
/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
#define EXT4_MAX_RESERVE_BLOCKS 1027
#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
-/*
- * Always enable hashed directories
- */
-#define CONFIG_EXT4_INDEX

/*
* Debug code
@@ -766,17 +762,11 @@ struct ext4_dir_entry_2 {
* (c) Daniel Phillips, 2001
*/

-#ifdef CONFIG_EXT4_INDEX
- #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
- EXT4_FEATURE_COMPAT_DIR_INDEX) && \
+#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
+ EXT4_FEATURE_COMPAT_DIR_INDEX) && \
(EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-#else
- #define is_dx(dir) 0
-#define EXT4_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT4_LINK_MAX)
-#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
-#endif

/* Legal values for the dx_root hash_version field: */

--
1.5.3.2.81.g17ed

2007-10-04 05:51:11

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext3: Avoid rec_len overflow with 64KB block size

From: Jan Kara <[email protected]>

With 64KB blocksize, a directory entry can have size 64KB which does not fit
into 16 bits we have for entry lenght. So we store 0xffff instead and convert
value when read from / written to disk. The patch also converts some places
to use ext3_next_entry() when we are changing them anyway.

Signed-off-by: Jan Kara <[email protected]>
Signed-off-by: Mingming Cao <[email protected]>
---
fs/ext3/dir.c | 10 +++---
fs/ext3/namei.c | 90 ++++++++++++++++++++++------------------------
include/linux/ext3_fs.h | 20 ++++++++++
3 files changed, 68 insertions(+), 52 deletions(-)

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index c00723a..3c4c43a 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -69,7 +69,7 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
unsigned long offset)
{
const char * error_msg = NULL;
- const int rlen = le16_to_cpu(de->rec_len);
+ const int rlen = ext3_rec_len_from_disk(de->rec_len);

if (rlen < EXT3_DIR_REC_LEN(1))
error_msg = "rec_len is smaller than minimal";
@@ -177,10 +177,10 @@ revalidate:
* least that it is non-zero. A
* failure will be detected in the
* dirent test below. */
- if (le16_to_cpu(de->rec_len) <
+ if (ext3_rec_len_from_disk(de->rec_len) <
EXT3_DIR_REC_LEN(1))
break;
- i += le16_to_cpu(de->rec_len);
+ i += ext3_rec_len_from_disk(de->rec_len);
}
offset = i;
filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
@@ -201,7 +201,7 @@ revalidate:
ret = stored;
goto out;
}
- offset += le16_to_cpu(de->rec_len);
+ offset += ext3_rec_len_from_disk(de->rec_len);
if (le32_to_cpu(de->inode)) {
/* We might block in the next section
* if the data destination is
@@ -223,7 +223,7 @@ revalidate:
goto revalidate;
stored ++;
}
- filp->f_pos += le16_to_cpu(de->rec_len);
+ filp->f_pos += ext3_rec_len_from_disk(de->rec_len);
}
offset = 0;
brelse (bh);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index c1fa190..2c38eb6 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -144,6 +144,15 @@ struct dx_map_entry
u16 size;
};

+/*
+ * p is at least 6 bytes before the end of page
+ */
+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
+{
+ return (struct ext3_dir_entry_2 *)((char*)p +
+ ext3_rec_len_from_disk(p->rec_len));
+}
+
#ifdef CONFIG_EXT3_INDEX
static inline unsigned dx_get_block (struct dx_entry *entry);
static void dx_set_block (struct dx_entry *entry, unsigned value);
@@ -281,7 +290,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
}
- de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+ de = ext3_next_entry(de);
}
printk("(%i)\n", names);
return (struct stats) { names, space, 1 };
@@ -548,14 +557,6 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,


/*
- * p is at least 6 bytes before the end of page
- */
-static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
-{
- return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
-}
-
-/*
* This function fills a red-black tree with information from a
* directory block. It returns the number directory entries loaded
* into the tree. If there is an error it is returned in err.
@@ -721,7 +722,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
cond_resched();
}
/* XXX: do we need to check rec_len == 0 case? -Chris */
- de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+ de = ext3_next_entry(de);
}
return count;
}
@@ -825,7 +826,7 @@ static inline int search_dirblock(struct buffer_head * bh,
return 1;
}
/* prevent looping on a bad block */
- de_len = le16_to_cpu(de->rec_len);
+ de_len = ext3_rec_len_from_disk(de->rec_len);
if (de_len <= 0)
return -1;
offset += de_len;
@@ -1138,7 +1139,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
rec_len = EXT3_DIR_REC_LEN(de->name_len);
memcpy (to, de, rec_len);
((struct ext3_dir_entry_2 *) to)->rec_len =
- cpu_to_le16(rec_len);
+ ext3_rec_len_to_disk(rec_len);
de->inode = 0;
map++;
to += rec_len;
@@ -1157,13 +1158,12 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)

prev = to = de;
while ((char*)de < base + size) {
- next = (struct ext3_dir_entry_2 *) ((char *) de +
- le16_to_cpu(de->rec_len));
+ next = ext3_next_entry(de);
if (de->inode && de->name_len) {
rec_len = EXT3_DIR_REC_LEN(de->name_len);
if (de > to)
memmove(to, de, rec_len);
- to->rec_len = cpu_to_le16(rec_len);
+ to->rec_len = ext3_rec_len_to_disk(rec_len);
prev = to;
to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
}
@@ -1237,8 +1237,8 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
/* Fancy dance to stay within two buffers */
de2 = dx_move_dirents(data1, data2, map + split, count - split);
de = dx_pack_dirents(data1,blocksize);
- de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
- de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
+ de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
+ de2->rec_len = ext3_rec_len_to_disk(data2 + blocksize - (char *) de2);
dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));

@@ -1309,7 +1309,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
return -EEXIST;
}
nlen = EXT3_DIR_REC_LEN(de->name_len);
- rlen = le16_to_cpu(de->rec_len);
+ rlen = ext3_rec_len_from_disk(de->rec_len);
if ((de->inode? rlen - nlen: rlen) >= reclen)
break;
de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
@@ -1328,11 +1328,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,

/* By now the buffer is marked for journaling */
nlen = EXT3_DIR_REC_LEN(de->name_len);
- rlen = le16_to_cpu(de->rec_len);
+ rlen = ext3_rec_len_from_disk(de->rec_len);
if (de->inode) {
struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
- de1->rec_len = cpu_to_le16(rlen - nlen);
- de->rec_len = cpu_to_le16(nlen);
+ de1->rec_len = ext3_rec_len_to_disk(rlen - nlen);
+ de->rec_len = ext3_rec_len_to_disk(nlen);
de = de1;
}
de->file_type = EXT3_FT_UNKNOWN;
@@ -1410,17 +1410,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,

/* The 0th block becomes the root, move the dirents out */
fde = &root->dotdot;
- de = (struct ext3_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
+ de = (struct ext3_dir_entry_2 *)((char *)fde +
+ ext3_rec_len_from_disk(fde->rec_len));
len = ((char *) root) + blocksize - (char *) de;
memcpy (data1, de, len);
de = (struct ext3_dir_entry_2 *) data1;
top = data1 + len;
- while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top)
+ while ((char *)(de2 = ext3_next_entry(de)) < top)
de = de2;
- de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
+ de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
/* Initialize the root; the dot dirents already exist */
de = (struct ext3_dir_entry_2 *) (&root->dotdot);
- de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
+ de->rec_len = ext3_rec_len_to_disk(blocksize - EXT3_DIR_REC_LEN(2));
memset (&root->info, 0, sizeof(root->info));
root->info.info_length = sizeof(root->info);
root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
@@ -1507,7 +1508,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
return retval;
de = (struct ext3_dir_entry_2 *) bh->b_data;
de->inode = 0;
- de->rec_len = cpu_to_le16(blocksize);
+ de->rec_len = ext3_rec_len_to_disk(blocksize);
return add_dirent_to_buf(handle, dentry, inode, de, bh);
}

@@ -1571,7 +1572,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
goto cleanup;
node2 = (struct dx_node *)(bh2->b_data);
entries2 = node2->entries;
- node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+ node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize);
node2->fake.inode = 0;
BUFFER_TRACE(frame->bh, "get_write_access");
err = ext3_journal_get_write_access(handle, frame->bh);
@@ -1670,9 +1671,9 @@ static int ext3_delete_entry (handle_t *handle,
BUFFER_TRACE(bh, "get_write_access");
ext3_journal_get_write_access(handle, bh);
if (pde)
- pde->rec_len =
- cpu_to_le16(le16_to_cpu(pde->rec_len) +
- le16_to_cpu(de->rec_len));
+ pde->rec_len = ext3_rec_len_to_disk(
+ ext3_rec_len_from_disk(pde->rec_len) +
+ ext3_rec_len_from_disk(de->rec_len));
else
de->inode = 0;
dir->i_version++;
@@ -1680,10 +1681,9 @@ static int ext3_delete_entry (handle_t *handle,
ext3_journal_dirty_metadata(handle, bh);
return 0;
}
- i += le16_to_cpu(de->rec_len);
+ i += ext3_rec_len_from_disk(de->rec_len);
pde = de;
- de = (struct ext3_dir_entry_2 *)
- ((char *) de + le16_to_cpu(de->rec_len));
+ de = ext3_next_entry(de);
}
return -ENOENT;
}
@@ -1817,13 +1817,12 @@ retry:
de = (struct ext3_dir_entry_2 *) dir_block->b_data;
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
- de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len));
+ de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
- de = (struct ext3_dir_entry_2 *)
- ((char *) de + le16_to_cpu(de->rec_len));
+ de = ext3_next_entry(de);
de->inode = cpu_to_le32(dir->i_ino);
- de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
+ de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
de->name_len = 2;
strcpy (de->name, "..");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
@@ -1875,8 +1874,7 @@ static int empty_dir (struct inode * inode)
return 1;
}
de = (struct ext3_dir_entry_2 *) bh->b_data;
- de1 = (struct ext3_dir_entry_2 *)
- ((char *) de + le16_to_cpu(de->rec_len));
+ de1 = ext3_next_entry(de);
if (le32_to_cpu(de->inode) != inode->i_ino ||
!le32_to_cpu(de1->inode) ||
strcmp (".", de->name) ||
@@ -1887,9 +1885,9 @@ static int empty_dir (struct inode * inode)
brelse (bh);
return 1;
}
- offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
- de = (struct ext3_dir_entry_2 *)
- ((char *) de1 + le16_to_cpu(de1->rec_len));
+ offset = ext3_rec_len_from_disk(de->rec_len) +
+ ext3_rec_len_from_disk(de1->rec_len);
+ de = ext3_next_entry(de1);
while (offset < inode->i_size ) {
if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
@@ -1918,9 +1916,8 @@ static int empty_dir (struct inode * inode)
brelse (bh);
return 0;
}
- offset += le16_to_cpu(de->rec_len);
- de = (struct ext3_dir_entry_2 *)
- ((char *) de + le16_to_cpu(de->rec_len));
+ offset += ext3_rec_len_from_disk(de->rec_len);
+ de = ext3_next_entry(de);
}
brelse (bh);
return 1;
@@ -2274,8 +2271,7 @@ retry:
}

#define PARENT_INO(buffer) \
- ((struct ext3_dir_entry_2 *) ((char *) buffer + \
- le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode
+ (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)

/*
* Anybody can rename anything with this: the permission checks are left to the
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 7aa5556..d9e378d 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -660,6 +660,26 @@ struct ext3_dir_entry_2 {
#define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
#define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
~EXT3_DIR_ROUND)
+#define EXT3_MAX_REC_LEN ((1<<16)-1)
+
+static inline unsigned ext3_rec_len_from_disk(__le16 dlen)
+{
+ unsigned len = le16_to_cpu(dlen);
+
+ if (len == EXT3_MAX_REC_LEN)
+ return 1 << 16;
+ return len;
+}
+
+static inline __le16 ext3_rec_len_to_disk(unsigned len)
+{
+ if (len == (1 << 16))
+ return cpu_to_le16(EXT3_MAX_REC_LEN);
+ else if (len > (1 << 16))
+ BUG();
+ return cpu_to_le16(len);
+}
+
/*
* Hash Tree Directory indexing
* (c) Daniel Phillips, 2001
--
1.5.3.2.81.g17ed

2007-10-04 05:51:11

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext3

From: Takashi Sato <[email protected]>

This patch set supports large block size(>4k, <=64k) in ext3
just enlarging the block size limit. But it is NOT possible to have 64kB
blocksize on ext3 without some changes to the directory handling
code. The reason is that an empty 64kB directory block would have a
rec_len == (__u16)2^16 == 0, and this would cause an error to be hit in
the filesystem. The proposed solution is treat 64k rec_len
with a an impossible value like rec_len = 0xffff to handle this.

The Patch-set consists of the following 2 patches.
[1/2] ext3: enlarge blocksize
- Allow blocksize up to pagesize

[2/2] ext3: fix rec_len overflow
- prevent rec_len from overflow with 64KB blocksize

Now on 64k page ppc64 box runs with this patch set we could create a 64k
block size ext3, and able to handle empty directory block.

Signed-off-by: Takashi Sato <[email protected]>
Signed-off-by: Mingming Cao <[email protected]>
---
fs/ext3/super.c | 6 +++++-
include/linux/ext3_fs.h | 4 ++--
2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 9537316..b4bfd36 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1549,7 +1549,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
}

brelse (bh);
- sb_set_blocksize(sb, blocksize);
+ if (!sb_set_blocksize(sb, blocksize)) {
+ printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n",
+ blocksize);
+ goto out_fail;
+ }
logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
bh = sb_bread(sb, logic_sb_block);
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index ece49a8..7aa5556 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -76,8 +76,8 @@
* Macro-instructions used to manage several block sizes
*/
#define EXT3_MIN_BLOCK_SIZE 1024
-#define EXT3_MAX_BLOCK_SIZE 4096
-#define EXT3_MIN_BLOCK_LOG_SIZE 10
+#define EXT3_MAX_BLOCK_SIZE 65536
+#define EXT3_MIN_BLOCK_LOG_SIZE 10
#ifdef __KERNEL__
# define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize)
#else
--
1.5.3.2.81.g17ed

2007-10-04 05:51:11

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] Ext4: Uninitialized Block Groups

From: Andreas Dilger <[email protected]>

In pass1 of e2fsck, every inode table in the fileystem is scanned and checked,
regardless of whether it is in use. This is this the most time consuming part
of the filesystem check. The unintialized block group feature can greatly
reduce e2fsck time by eliminating checking of uninitialized inodes.

With this feature, there is a a high water mark of used inodes for each block
group. Block and inode bitmaps can be uninitialized on disk via a flag in the
group descriptor to avoid reading or scanning them at e2fsck time. A checksum
of each group descriptor is used to ensure that corruption in the group
descriptor's bit flags does not cause incorrect operation.

The feature is enabled through a mkfs option

mke2fs /dev/ -O uninit_groups

A patch adding support for uninitialized block groups to e2fsprogs tools has
been posted to the linux-ext4 mailing list.

The patches have been stress tested with fsstress and fsx. In performance
tests testing e2fsck time, we have seen that e2fsck time on ext3 grows
linearly with the total number of inodes in the filesytem. In ext4 with the
uninitialized block groups feature, the e2fsck time is constant, based
solely on the number of used inodes rather than the total inode count.
Since typical ext4 filesystems only use 1-10% of their inodes, this feature can
greatly reduce e2fsck time for users. With performance improvement of 2-20
times, depending on how full the filesystem is.

The attached graph shows the major improvements in e2fsck times in filesystems
with a large total inode count, but few inodes in use.

In each group descriptor if we have

EXT4_BG_INODE_UNINIT set in bg_flags:
Inode table is not initialized/used in this group. So we can skip
the consistency check during fsck.
EXT4_BG_BLOCK_UNINIT set in bg_flags:
No block in the group is used. So we can skip the block bitmap
verification for this group.

We also add two new fields to group descriptor as a part of
uninitialized group patch.

__le16 bg_itable_unused; /* Unused inodes count */
__le16 bg_checksum; /* crc16(sb_uuid+group+desc) */


bg_itable_unused:

If we have EXT4_BG_INODE_UNINIT not set in bg_flags
then bg_itable_unused will give the offset within
the inode table till the inodes are used. This can be
used by fsck to skip list of inodes that are marked unused.


bg_checksum:
Now that we depend on bg_flags and bg_itable_unused to determine
the block and inode usage, we need to make sure group descriptor
is not corrupt. We add checksum to group descriptor to
detect corruption. If the descriptor is found to be corrupt, we
mark all the blocks and inodes in the group used.


Signed-off-by: Avantika Mathur <[email protected]>
Signed-off-by: Andreas Dilger <[email protected]>
Signed-off-by: Mingming Cao <[email protected]>
Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/Kconfig | 1 +
fs/ext4/balloc.c | 92 ++++++++++++++++++++++++++++-
fs/ext4/group.h | 29 +++++++++
fs/ext4/ialloc.c | 146 ++++++++++++++++++++++++++++++++++++++++++++---
fs/ext4/resize.c | 2 +
fs/ext4/super.c | 47 +++++++++++++++
include/linux/ext4_fs.h | 16 ++++-
7 files changed, 317 insertions(+), 16 deletions(-)
create mode 100644 fs/ext4/group.h

diff --git a/fs/Kconfig b/fs/Kconfig
index f9eed6d..97eef97 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -140,6 +140,7 @@ config EXT4DEV_FS
tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
depends on EXPERIMENTAL
select JBD2
+ select CRC16
help
Ext4dev is a predecessor filesystem of the next generation
extended fs ext4, based on ext3 filesystem code. It will be
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e53b4af..d1a8882 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -20,6 +20,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>

+#include "group.h"
/*
* balloc.c contains the blocks allocation and deallocation routines
*/
@@ -42,6 +43,74 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,

}

+/* Initializes an uninitialized block bitmap if given, and returns the
+ * number of blocks free in the group. */
+unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+ int block_group, struct ext4_group_desc *gdp)
+{
+ unsigned long start;
+ int bit, bit_max;
+ unsigned free_blocks;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (bh) {
+ J_ASSERT_BH(bh, buffer_locked(bh));
+
+ /* If checksum is bad mark all blocks used to prevent allocation
+ * essentially implementing a per-group read-only flag. */
+ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+ ext4_error(sb, __FUNCTION__,
+ "Checksum bad for group %u\n", block_group);
+ gdp->bg_free_blocks_count = 0;
+ gdp->bg_free_inodes_count = 0;
+ gdp->bg_itable_unused = 0;
+ memset(bh->b_data, 0xff, sb->s_blocksize);
+ return 0;
+ }
+ memset(bh->b_data, 0, sb->s_blocksize);
+ }
+
+ /* Check for superblock and gdt backups in this group */
+ bit_max = ext4_bg_has_super(sb, block_group);
+
+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
+ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
+ sbi->s_desc_per_block) {
+ if (bit_max) {
+ bit_max += ext4_bg_num_gdb(sb, block_group);
+ bit_max +=
+ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+ }
+ } else { /* For META_BG_BLOCK_GROUPS */
+ int group_rel = (block_group -
+ le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
+ EXT4_DESC_PER_BLOCK(sb);
+ if (group_rel == 0 || group_rel == 1 ||
+ (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
+ bit_max += 1;
+ }
+
+ /* Last and first groups are always initialized */
+ free_blocks = EXT4_BLOCKS_PER_GROUP(sb) - bit_max;
+
+ if (bh) {
+ for (bit = 0; bit < bit_max; bit++)
+ ext4_set_bit(bit, bh->b_data);
+
+ start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
+ le32_to_cpu(sbi->s_es->s_first_data_block);
+
+ /* Set bits for block and inode bitmaps, and inode table */
+ ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
+ ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
+ for (bit = le32_to_cpu(gdp->bg_inode_table) - start,
+ bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
+ ext4_set_bit(bit, bh->b_data);
+ }
+
+ return free_blocks - sbi->s_itb_per_group - 2;
+}
+
/*
* The free blocks are managed by bitmaps. A file system contains several
* blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
@@ -110,16 +179,29 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
*
* Return buffer_head on success or NULL in case of failure.
*/
-static struct buffer_head *
+struct buffer_head *
read_block_bitmap(struct super_block *sb, unsigned int block_group)
{
struct ext4_group_desc * desc;
struct buffer_head * bh = NULL;

- desc = ext4_get_group_desc (sb, block_group, NULL);
+ desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
goto error_out;
- bh = sb_bread(sb, ext4_block_bitmap(sb, desc));
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ bh = sb_getblk(sb, ext4_block_bitmap(sb, desc));
+ if (!buffer_uptodate(bh)) {
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ ext4_init_block_bitmap(sb, bh, block_group,
+ desc);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ }
+ } else {
+ bh = sb_bread(sb, ext4_block_bitmap(sb,desc));
+ }
if (!bh)
ext4_error (sb, "read_block_bitmap",
"Cannot read block bitmap - "
@@ -586,6 +668,7 @@ do_more:
desc->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
group_freed);
+ desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_mod(&sbi->s_freeblocks_counter, count);

@@ -1644,8 +1727,11 @@ allocated:
ret_block, goal_hits, goal_attempts);

spin_lock(sb_bgl_lock(sbi, group_no));
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
spin_unlock(sb_bgl_lock(sbi, group_no));
percpu_counter_mod(&sbi->s_freeblocks_counter, -num);

diff --git a/fs/ext4/group.h b/fs/ext4/group.h
new file mode 100644
index 0000000..9310979
--- /dev/null
+++ b/fs/ext4/group.h
@@ -0,0 +1,29 @@
+/*
+ * linux/fs/ext4/group.h
+ *
+ * Copyright (C) 2007 Cluster File Systems, Inc
+ *
+ * Author: Andreas Dilger <[email protected]>
+ */
+
+#ifndef _LINUX_EXT4_GROUP_H
+#define _LINUX_EXT4_GROUP_H
+#if defined(CONFIG_CRC16)
+#include <linux/crc16.h>
+#endif
+
+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
+ struct ext4_group_desc *gdp);
+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+ struct ext4_group_desc *gdp);
+struct buffer_head *read_block_bitmap(struct super_block *sb,
+ unsigned int block_group);
+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int group,
+ struct ext4_group_desc *desc);
+#define ext4_free_blocks_after_init(sb, group, desc) \
+ ext4_init_block_bitmap(sb, NULL, group, desc)
+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int group,
+ struct ext4_group_desc *desc);
+#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b8b538d..1fa418c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -28,6 +28,7 @@

#include "xattr.h"
#include "acl.h"
+#include "group.h"

/*
* ialloc.c contains the inodes allocation and deallocation routines
@@ -43,6 +44,52 @@
* the free blocks count in the block.
*/

+/*
+ * To avoid calling the atomic setbit hundreds or thousands of times, we only
+ * need to use it within a single byte (to ensure we get endianness right).
+ * We can use memset for the rest of the bitmap as there are no other users.
+ */
+static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+{
+ int i;
+
+ if (start_bit >= end_bit)
+ return;
+
+ ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
+ ext4_set_bit(i, bitmap);
+ if (i < end_bit)
+ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+}
+
+/* Initializes an uninitialized inode bitmap */
+unsigned ext4_init_inode_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int block_group,
+ struct ext4_group_desc *gdp)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ J_ASSERT_BH(bh, buffer_locked(bh));
+
+ /* If checksum is bad mark all blocks and inodes use to prevent
+ * allocation, essentially implementing a per-group read-only flag. */
+ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+ ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
+ block_group);
+ gdp->bg_free_blocks_count = 0;
+ gdp->bg_free_inodes_count = 0;
+ gdp->bg_itable_unused = 0;
+ memset(bh->b_data, 0xff, sb->s_blocksize);
+ return 0;
+ }
+
+ memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
+ mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
+ bh->b_data);
+
+ return EXT4_INODES_PER_GROUP(sb);
+}

/*
* Read the inode allocation bitmap for a given block_group, reading
@@ -59,8 +106,20 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
goto error_out;
-
- bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
+ if (!buffer_uptodate(bh)) {
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ ext4_init_inode_bitmap(sb, bh, block_group,
+ desc);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ }
+ } else {
+ bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+ }
if (!bh)
ext4_error(sb, "read_inode_bitmap",
"Cannot read inode bitmap - "
@@ -169,6 +228,8 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
if (is_directory)
gdp->bg_used_dirs_count = cpu_to_le16(
le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi,
+ block_group, gdp);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_inc(&sbi->s_freeinodes_counter);
if (is_directory)
@@ -438,7 +499,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
struct ext4_sb_info *sbi;
int err = 0;
struct inode *ret;
- int i;
+ int i, free = 0;

/* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink)
@@ -520,11 +581,13 @@ repeat_in_this_group:
goto out;

got:
- ino += group * EXT4_INODES_PER_GROUP(sb) + 1;
- if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
- ext4_error (sb, "ext4_new_inode",
- "reserved inode or inode > inodes count - "
- "block_group = %d, inode=%lu", group, ino);
+ ino++;
+ if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
+ ino > EXT4_INODES_PER_GROUP(sb)) {
+ ext4_error(sb, __FUNCTION__,
+ "reserved inode or inode > inodes count - "
+ "block_group = %d, inode=%lu", group,
+ ino + group * EXT4_INODES_PER_GROUP(sb));
err = -EIO;
goto fail;
}
@@ -532,13 +595,78 @@ got:
BUFFER_TRACE(bh2, "get_write_access");
err = ext4_journal_get_write_access(handle, bh2);
if (err) goto fail;
+
+ /* We may have to initialize the block bitmap if it isn't already */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
+ gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ struct buffer_head *block_bh = read_block_bitmap(sb, group);
+
+ BUFFER_TRACE(block_bh, "get block bitmap access");
+ err = ext4_journal_get_write_access(handle, block_bh);
+ if (err) {
+ brelse(block_bh);
+ goto fail;
+ }
+
+ free = 0;
+ spin_lock(sb_bgl_lock(sbi, group));
+ /* recheck and clear flag under lock if we still need to */
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+ free = ext4_free_blocks_after_init(sb, group, gdp);
+ gdp->bg_free_blocks_count = cpu_to_le16(free);
+ }
+ spin_unlock(sb_bgl_lock(sbi, group));
+
+ /* Don't need to dirty bitmap block if we didn't change it */
+ if (free) {
+ BUFFER_TRACE(block_bh, "dirty block bitmap");
+ err = ext4_journal_dirty_metadata(handle, block_bh);
+ }
+
+ brelse(block_bh);
+ if (err)
+ goto fail;
+ }
+
spin_lock(sb_bgl_lock(sbi, group));
+ /* If we didn't allocate from within the initialized part of the inode
+ * table then we need to initialize up to this inode. */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+
+ /* When marking the block group with
+ * ~EXT4_BG_INODE_UNINIT we don't want to depend
+ * on the value of bg_itable_unsed even though
+ * mke2fs could have initialized the same for us.
+ * Instead we calculated the value below
+ */
+
+ free = 0;
+ } else {
+ free = EXT4_INODES_PER_GROUP(sb) -
+ le16_to_cpu(gdp->bg_itable_unused);
+ }
+
+ /*
+ * Check the relative inode number against the last used
+ * relative inode number in this group. if it is greater
+ * we need to update the bg_itable_unused count
+ *
+ */
+ if (ino > free)
+ gdp->bg_itable_unused =
+ cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
+ }
+
gdp->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
if (S_ISDIR(mode)) {
gdp->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
}
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
spin_unlock(sb_bgl_lock(sbi, group));
BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
err = ext4_journal_dirty_metadata(handle, bh2);
@@ -560,7 +688,7 @@ got:
inode->i_gid = current->fsgid;
inode->i_mode = mode;

- inode->i_ino = ino;
+ inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index aa11d7d..3359450 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -16,6 +16,7 @@
#include <linux/errno.h>
#include <linux/slab.h>

+#include "group.h"

#define outside(b, first, last) ((b) < (first) || (b) >= (last))
#define inside(b, first, last) ((b) >= (first) && (b) < (last))
@@ -842,6 +843,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);

/*
* Make the new blocks and inodes valid next. We do this before
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 420d39d..b59610d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,12 +37,14 @@
#include <linux/quotaops.h>
#include <linux/seq_file.h>
#include <linux/log2.h>
+#include <linux/crc16.h>

#include <asm/uaccess.h>

#include "xattr.h"
#include "acl.h"
#include "namei.h"
+#include "group.h"

static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
@@ -1237,6 +1239,43 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
return res;
}

+__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
+ struct ext4_group_desc *gdp)
+{
+ __u16 crc = 0;
+
+ if (sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ int offset = offsetof(struct ext4_group_desc, bg_checksum);
+ __le32 le_group = cpu_to_le32(block_group);
+
+ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
+ crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
+ crc = crc16(crc, (__u8 *)gdp, offset);
+ offset += sizeof(gdp->bg_checksum); /* skip checksum */
+ /* for checksum of struct ext4_group_desc do the rest...*/
+ if ((sbi->s_es->s_feature_incompat &
+ cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
+ offset < le16_to_cpu(sbi->s_es->s_desc_size))
+ crc = crc16(crc, (__u8 *)gdp + offset,
+ le16_to_cpu(sbi->s_es->s_desc_size) -
+ offset);
+ }
+
+ return cpu_to_le16(crc);
+}
+
+int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
+ struct ext4_group_desc *gdp)
+{
+ if ((sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
+ (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
+ return 0;
+
+ return 1;
+}
+
/* Called at mount-time, super-block is locked */
static int ext4_check_descriptors (struct super_block * sb)
{
@@ -1291,6 +1330,14 @@ static int ext4_check_descriptors (struct super_block * sb)
i, inode_table);
return 0;
}
+ if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
+ ext4_error(sb, __FUNCTION__,
+ "Checksum for group %d failed (%u!=%u)\n", i,
+ le16_to_cpu(ext4_group_desc_csum(sbi, i,
+ gdp)),
+ le16_to_cpu(gdp->bg_checksum));
+ return 0;
+ }
first_block += EXT4_BLOCKS_PER_GROUP(sb);
gdp = (struct ext4_group_desc *)
((__u8 *)gdp + EXT4_DESC_SIZE(sb));
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 151738a..b77b59f 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -105,19 +105,25 @@
*/
struct ext4_group_desc
{
- __le32 bg_block_bitmap; /* Blocks bitmap block */
- __le32 bg_inode_bitmap; /* Inodes bitmap block */
+ __le32 bg_block_bitmap; /* Blocks bitmap block */
+ __le32 bg_inode_bitmap; /* Inodes bitmap block */
__le32 bg_inode_table; /* Inodes table block */
__le16 bg_free_blocks_count; /* Free blocks count */
__le16 bg_free_inodes_count; /* Free inodes count */
__le16 bg_used_dirs_count; /* Directories count */
- __u16 bg_flags;
- __u32 bg_reserved[3];
+ __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
+ __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
+ __le16 bg_itable_unused; /* Unused inodes count */
+ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
__le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
__le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
__le32 bg_inode_table_hi; /* Inodes table block MSB */
};

+#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
+#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
+#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
+
#ifdef __KERNEL__
#include <linux/ext4_fs_i.h>
#include <linux/ext4_fs_sb.h>
@@ -665,6 +671,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
+#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040

@@ -684,6 +691,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
EXT4_FEATURE_INCOMPAT_64BIT)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
--
1.5.3.2.81.g17ed

2007-10-04 05:51:11

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] This feature relaxes check restrictions on where each block groups meta

From: Jose R. Santos <[email protected]>

data is located within the storage media. This allows for the allocation
of bitmaps or inode tables outside the block group boundaries in cases
where bad blocks forces us to look for new blocks which the owning block
group can not satisfy. This will also allow for new meta-data allocation
schemes to improve performance and scalability.

Signed-off-by: Jose R. Santos <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---
fs/ext4/super.c | 9 +++++++--
include/linux/ext4_fs.h | 4 +++-
2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b59610d..619db84 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1287,13 +1287,17 @@ static int ext4_check_descriptors (struct super_block * sb)
ext4_fsblk_t inode_table;
struct ext4_group_desc * gdp = NULL;
int desc_block = 0;
+ int flexbg_flag = 0;
int i;

+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+ flexbg_flag = 1;
+
ext4_debug ("Checking group descriptors");

for (i = 0; i < sbi->s_groups_count; i++)
{
- if (i == sbi->s_groups_count - 1)
+ if (i == sbi->s_groups_count - 1 || flexbg_flag)
last_block = ext4_blocks_count(sbi->s_es) - 1;
else
last_block = first_block +
@@ -1338,7 +1342,8 @@ static int ext4_check_descriptors (struct super_block * sb)
le16_to_cpu(gdp->bg_checksum));
return 0;
}
- first_block += EXT4_BLOCKS_PER_GROUP(sb);
+ if (!flexbg_flag)
+ first_block += EXT4_BLOCKS_PER_GROUP(sb);
gdp = (struct ext4_group_desc *)
((__u8 *)gdp + EXT4_DESC_SIZE(sb));
}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 722d4ef..46b304d 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -682,13 +682,15 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200

#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
EXT4_FEATURE_INCOMPAT_RECOVER| \
EXT4_FEATURE_INCOMPAT_META_BG| \
EXT4_FEATURE_INCOMPAT_EXTENTS| \
- EXT4_FEATURE_INCOMPAT_64BIT)
+ EXT4_FEATURE_INCOMPAT_64BIT| \
+ EXT4_FEATURE_INCOMPAT_FLEX_BG)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
--
1.5.3.2.81.g17ed