2008-11-20 18:30:13

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 1/5] ext4: unlock group before ext4_error

Otherwise ext4_error will cause BUG because of
scheduling in atomic context.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/mballoc.c | 12 ++++++++++++
1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 772e05b..039a5a6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -460,10 +460,12 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
blocknr +=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);

+ ext4_unlock_group(sb, e4b->bd_group);
ext4_error(sb, __func__, "double-free of inode"
" %lu's block %llu(bit %u in group %u)\n",
inode ? inode->i_ino : 0, blocknr,
first + i, e4b->bd_group);
+ ext4_unlock_group(sb, e4b->bd_group);
}
mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
}
@@ -704,6 +706,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
grp->bb_fragments = fragments;

if (free != grp->bb_free) {
+ ext4_unlock_group(sb, group);
ext4_error(sb, __func__,
"EXT4-fs: group %u: %u blocks in bitmap, %u in gd\n",
group, free, grp->bb_free);
@@ -711,6 +714,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
* If we intent to continue, we consider group descritor
* corrupt and update bb_free using bitmap value
*/
+ ext4_lock_group(sb, group);
grp->bb_free = free;
}

@@ -1625,15 +1629,18 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* free blocks even though group info says we
* we have free blocks
*/
+ ext4_unlock_group(sb, e4b->bd_group);
ext4_error(sb, __func__, "%d free blocks as per "
"group info. But bitmap says 0\n",
free);
+ ext4_lock_group(sb, e4b->bd_group);
break;
}

mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
BUG_ON(ex.fe_len <= 0);
if (free < ex.fe_len) {
+ ext4_unlock_group(sb, e4b->bd_group);
ext4_error(sb, __func__, "%d free blocks as per "
"group info. But got %d blocks\n",
free, ex.fe_len);
@@ -1642,6 +1649,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* indicate that the bitmap is corrupt. So exit
* without claiming the space.
*/
+ ext4_lock_group(sb, e4b->bd_group);
break;
}

@@ -3789,6 +3797,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
bit = next + 1;
}
if (free != pa->pa_free) {
+ ext4_unlock_group(sb, group);
printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
pa, (unsigned long) pa->pa_lstart,
(unsigned long) pa->pa_pstart,
@@ -3799,6 +3808,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
* pa is already deleted so we use the value obtained
* from the bitmap and continue.
*/
+ ext4_lock_group(sb, group);
}
atomic_add(free, &sbi->s_mb_discarded);

@@ -4607,9 +4617,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
else if (block >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
else {
+ ext4_unlock_group(sb, e4b->bd_group);
ext4_error(sb, __func__,
"Double free of blocks %d (%d %d)\n",
block, entry->start_blk, entry->count);
+ ext4_unlock_group(sb, e4b->bd_group);
return 0;
}
}
--
1.6.0.4.735.gea4f



2008-11-20 18:29:19

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values.

Rename the lower bits with suffix _lo and add helper
to access the values. Also rename bg_itable_unused_hi
to bg_pad as in e2fsprogs. bg_itable_unused_hi is never
used before

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/balloc.c | 11 ++++---
fs/ext4/ext4.h | 24 +++++++++++++--
fs/ext4/ialloc.c | 83 ++++++++++++++++++++++++++++-------------------------
fs/ext4/inode.c | 2 +-
fs/ext4/mballoc.c | 15 +++++----
fs/ext4/resize.c | 4 +-
fs/ext4/super.c | 64 +++++++++++++++++++++++++++++++++++++++-
7 files changed, 143 insertions(+), 60 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d268f5f..efe68d9 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -102,9 +102,9 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
ext4_error(sb, __func__,
"Checksum bad for group %u\n", block_group);
- gdp->bg_free_blocks_count = 0;
- gdp->bg_free_inodes_count = 0;
- gdp->bg_itable_unused = 0;
+ ext4_free_blks_set(sb, gdp, 0);
+ ext4_free_inodes_set(sb, gdp, 0);
+ ext4_itable_unused_set(sb, gdp, 0);
memset(bh->b_data, 0xff, sb->s_blocksize);
return 0;
}
@@ -444,7 +444,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
}
}
spin_lock(sb_bgl_lock(sbi, block_group));
- le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed);
+ blocks_freed += ext4_free_blks_count(sb, desc);
+ ext4_free_blks_set(sb, desc, blocks_freed);
desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
@@ -742,7 +743,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
- desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
+ desc_count += ext4_free_blks_count(sb, gdp);
}

return desc_count;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 03aba86..2570883 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -161,9 +161,9 @@ struct ext4_group_desc
__le32 bg_block_bitmap_lo; /* Blocks bitmap block */
__le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
__le32 bg_inode_table_lo; /* Inodes table block */
- __le16 bg_free_blocks_count; /* Free blocks count */
- __le16 bg_free_inodes_count; /* Free inodes count */
- __le16 bg_used_dirs_count; /* Directories count */
+ __le16 bg_free_blocks_count_lo;/* Free blocks count */
+ __le16 bg_free_inodes_count_lo;/* Free inodes count */
+ __le16 bg_used_dirs_count_lo; /* Directories count */
__le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
__u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
__le16 bg_itable_unused; /* Unused inodes count */
@@ -174,7 +174,7 @@ struct ext4_group_desc
__le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
__le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
__le16 bg_used_dirs_count_hi; /* Directories count MSB */
- __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
+ __le16 bg_pad;
__u32 bg_reserved2[3];
};

@@ -1194,12 +1194,28 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
struct ext4_group_desc *bg);
extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
struct ext4_group_desc *bg);
+extern __u32 ext4_free_blks_count(struct super_block *sb,
+ struct ext4_group_desc *bg);
+extern __u32 ext4_free_inodes_count(struct super_block *sb,
+ struct ext4_group_desc *bg);
+extern __u32 ext4_used_dirs_count(struct super_block *sb,
+ struct ext4_group_desc *bg);
+extern __u16 ext4_itable_unused_count(struct super_block *sb,
+ struct ext4_group_desc *bg);
extern void ext4_block_bitmap_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk);
extern void ext4_inode_bitmap_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk);
extern void ext4_inode_table_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk);
+extern void ext4_free_blks_set(struct super_block *sb,
+ struct ext4_group_desc *bg, __u32 count);
+extern void ext4_free_inodes_set(struct super_block *sb,
+ struct ext4_group_desc *bg, __u32 count);
+extern void ext4_used_dirs_set(struct super_block *sb,
+ struct ext4_group_desc *bg, __u32 count);
+extern void ext4_itable_unused_set(struct super_block *sb,
+ struct ext4_group_desc *bg, __u16 count);
/* extents.c */
extern int ext4_ext_journal_restart(handle_t *handle, int needed);
/* defrag.c */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4039af6..84f060b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -76,9 +76,9 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
ext4_error(sb, __func__, "Checksum bad for group %u\n",
block_group);
- gdp->bg_free_blocks_count = 0;
- gdp->bg_free_inodes_count = 0;
- gdp->bg_itable_unused = 0;
+ ext4_free_blks_set(sb, gdp, 0);
+ ext4_free_inodes_set(sb, gdp, 0);
+ ext4_itable_unused_set(sb, gdp, 0);
memset(bh->b_data, 0xff, sb->s_blocksize);
return 0;
}
@@ -168,7 +168,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
struct ext4_group_desc *gdp;
struct ext4_super_block *es;
struct ext4_sb_info *sbi;
- int fatal = 0, err;
+ int fatal = 0, err, count;
ext4_group_t flex_group;

if (atomic_read(&inode->i_count) > 1) {
@@ -236,9 +236,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)

if (gdp) {
spin_lock(sb_bgl_lock(sbi, block_group));
- le16_add_cpu(&gdp->bg_free_inodes_count, 1);
- if (is_directory)
- le16_add_cpu(&gdp->bg_used_dirs_count, -1);
+ count = ext4_free_inodes_count(sb, gdp) + 1;
+ ext4_free_inodes_set(sb, gdp, count);
+ if (is_directory) {
+ count = ext4_used_dirs_count(sb, gdp) - 1;
+ ext4_used_dirs_set(sb, gdp, count);
+ }
gdp->bg_checksum = ext4_group_desc_csum(sbi,
block_group, gdp);
spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -291,13 +294,13 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,

for (group = 0; group < ngroups; group++) {
desc = ext4_get_group_desc(sb, group, NULL);
- if (!desc || !desc->bg_free_inodes_count)
+ if (!desc || !ext4_free_inodes_count(sb, desc))
continue;
- if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+ if (ext4_free_inodes_count(sb, desc) < avefreei)
continue;
if (!best_desc ||
- (le16_to_cpu(desc->bg_free_blocks_count) >
- le16_to_cpu(best_desc->bg_free_blocks_count))) {
+ (ext4_free_blks_count(sb, desc) >
+ ext4_free_blks_count(sb, best_desc))) {
*best_group = group;
best_desc = desc;
ret = 0;
@@ -369,7 +372,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
for (i = best_flex * flex_size; i < ngroups &&
i < (best_flex + 1) * flex_size; i++) {
desc = ext4_get_group_desc(sb, i, &bh);
- if (le16_to_cpu(desc->bg_free_inodes_count)) {
+ if (ext4_free_inodes_count(sb, desc)) {
*best_group = i;
goto out;
}
@@ -443,17 +446,17 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
for (i = 0; i < ngroups; i++) {
grp = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, grp, NULL);
- if (!desc || !desc->bg_free_inodes_count)
+ if (!desc || !ext4_free_inodes_count(sb, desc))
continue;
- if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
+ if (ext4_used_dirs_count(sb, desc) >= best_ndir)
continue;
- if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+ if (ext4_free_inodes_count(sb, desc) < avefreei)
continue;
- if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
+ if (ext4_free_blks_count(sb, desc) < avefreeb)
continue;
*group = grp;
ret = 0;
- best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
+ best_ndir = ext4_used_dirs_count(sb, desc);
}
if (ret == 0)
return ret;
@@ -479,13 +482,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
for (i = 0; i < ngroups; i++) {
*group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, *group, NULL);
- if (!desc || !desc->bg_free_inodes_count)
+ if (!desc || !ext4_free_inodes_count(sb, desc))
continue;
- if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
+ if (ext4_used_dirs_count(sb, desc) >= max_dirs)
continue;
- if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
+ if (ext4_free_inodes_count(sb, desc) < min_inodes)
continue;
- if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
+ if (ext4_free_blks_count(sb, desc) < min_blocks)
continue;
return 0;
}
@@ -494,8 +497,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
for (i = 0; i < ngroups; i++) {
*group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, *group, NULL);
- if (desc && desc->bg_free_inodes_count &&
- le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
+ if (desc && ext4_free_inodes_count(sb, desc) &&
+ ext4_free_inodes_count(sb, desc) >= avefreei)
return 0;
}

@@ -524,8 +527,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
*/
*group = parent_group;
desc = ext4_get_group_desc(sb, *group, NULL);
- if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
- le16_to_cpu(desc->bg_free_blocks_count))
+ if (desc && ext4_free_inodes_count(sb, desc) &&
+ ext4_free_blks_count(sb, desc))
return 0;

/*
@@ -548,8 +551,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
if (*group >= ngroups)
*group -= ngroups;
desc = ext4_get_group_desc(sb, *group, NULL);
- if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
- le16_to_cpu(desc->bg_free_blocks_count))
+ if (desc && ext4_free_inodes_count(sb, desc) &&
+ ext4_free_blks_count(sb, desc))
return 0;
}

@@ -562,7 +565,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
if (++*group >= ngroups)
*group = 0;
desc = ext4_get_group_desc(sb, *group, NULL);
- if (desc && le16_to_cpu(desc->bg_free_inodes_count))
+ if (desc && ext4_free_inodes_count(sb, desc))
return 0;
}

@@ -591,7 +594,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
struct ext4_super_block *es;
struct ext4_inode_info *ei;
struct ext4_sb_info *sbi;
- int ret2, err = 0;
+ int ret2, err = 0, count;
struct inode *ret;
ext4_group_t i;
int free = 0;
@@ -717,7 +720,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
free = ext4_free_blocks_after_init(sb, group, gdp);
- gdp->bg_free_blocks_count = cpu_to_le16(free);
+ ext4_free_blks_set(sb, gdp, free);
gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
gdp);
}
@@ -751,7 +754,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
free = 0;
} else {
free = EXT4_INODES_PER_GROUP(sb) -
- le16_to_cpu(gdp->bg_itable_unused);
+ ext4_itable_unused_count(sb, gdp);
}

/*
@@ -761,13 +764,15 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
*
*/
if (ino > free)
- gdp->bg_itable_unused =
- cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
+ ext4_itable_unused_set(sb, gdp,
+ (EXT4_INODES_PER_GROUP(sb) - ino));
}

- le16_add_cpu(&gdp->bg_free_inodes_count, -1);
+ count = ext4_free_inodes_count(sb, gdp) - 1;
+ ext4_free_inodes_set(sb, gdp, count);
if (S_ISDIR(mode)) {
- le16_add_cpu(&gdp->bg_used_dirs_count, 1);
+ count = ext4_used_dirs_count(sb, gdp) + 1;
+ ext4_used_dirs_set(sb, gdp, count);
}
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
spin_unlock(sb_bgl_lock(sbi, group));
@@ -981,7 +986,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
- desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+ desc_count += ext4_free_inodes_count(sb, gdp);
brelse(bitmap_bh);
bitmap_bh = ext4_read_inode_bitmap(sb, i);
if (!bitmap_bh)
@@ -989,7 +994,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)

x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
- i, le16_to_cpu(gdp->bg_free_inodes_count), x);
+ i, ext4_free_inodes_count(sb, gdp), x);
bitmap_count += x;
}
brelse(bitmap_bh);
@@ -1003,7 +1008,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
- desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+ desc_count += ext4_free_inodes_count(sb, gdp);
cond_resched();
}
return desc_count;
@@ -1020,7 +1025,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
- count += le16_to_cpu(gdp->bg_used_dirs_count);
+ count += ext4_used_dirs_count(sb, gdp);
}
return count;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 76b1d3a..2d31597 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3987,7 +3987,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
num = EXT4_INODES_PER_GROUP(sb);
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
- num -= le16_to_cpu(gdp->bg_itable_unused);
+ num -= ext4_itable_unused_count(sb, gdp);
table += num / inodes_per_block;
if (end > table)
end = table;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 6155c94..685c483 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2494,7 +2494,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
ext4_free_blocks_after_init(sb, group, desc);
} else {
meta_group_info[i]->bb_free =
- le16_to_cpu(desc->bg_free_blocks_count);
+ ext4_free_blks_count(sb, desc);
}

INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
@@ -3066,12 +3066,12 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
- gdp->bg_free_blocks_count =
- cpu_to_le16(ext4_free_blocks_after_init(sb,
- ac->ac_b_ex.fe_group,
- gdp));
+ ext4_free_blks_set(sb, gdp,
+ ext4_free_blocks_after_init(sb,
+ ac->ac_b_ex.fe_group, gdp));
}
- le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
+ len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
+ ext4_free_blks_set(sb, gdp, len);
gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -4863,7 +4863,8 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
}

spin_lock(sb_bgl_lock(sbi, block_group));
- le16_add_cpu(&gdp->bg_free_blocks_count, count);
+ ret = ext4_free_blks_count(sb, gdp) + count;
+ ext4_free_blks_set(sb, gdp, ret);
gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 939cf26..6e61613 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -862,8 +862,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
- gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
- gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
+ ext4_free_blks_set(sb, gdp, input->free_blocks_count);
+ ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);

/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ddfa29b..3e62e09 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -93,6 +93,36 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
}

+__u32 ext4_free_blks_count(struct super_block *sb,
+ struct ext4_group_desc *bg)
+{
+ return le16_to_cpu(bg->bg_free_blocks_count_lo) |
+ (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+ (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
+}
+
+__u32 ext4_free_inodes_count(struct super_block *sb,
+ struct ext4_group_desc *bg)
+{
+ return le16_to_cpu(bg->bg_free_inodes_count_lo) |
+ (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+ (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
+}
+
+__u32 ext4_used_dirs_count(struct super_block *sb,
+ struct ext4_group_desc *bg)
+{
+ return le16_to_cpu(bg->bg_used_dirs_count_lo) |
+ (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+ (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
+}
+
+__u16 ext4_itable_unused_count(struct super_block *sb,
+ struct ext4_group_desc *bg)
+{
+ return le16_to_cpu(bg->bg_itable_unused);
+}
+
void ext4_block_bitmap_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk)
{
@@ -117,6 +147,36 @@ void ext4_inode_table_set(struct super_block *sb,
bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
}

+void ext4_free_blks_set(struct super_block *sb,
+ struct ext4_group_desc *bg, __u32 count)
+{
+ bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
+ if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+ bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
+}
+
+void ext4_free_inodes_set(struct super_block *sb,
+ struct ext4_group_desc *bg, __u32 count)
+{
+ bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
+ if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+ bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
+}
+
+void ext4_used_dirs_set(struct super_block *sb,
+ struct ext4_group_desc *bg, __u32 count)
+{
+ bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
+ if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+ bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
+}
+
+void ext4_itable_unused_set(struct super_block *sb,
+ struct ext4_group_desc *bg, __u16 count)
+{
+ bg->bg_itable_unused = cpu_to_le16((__u16)count);
+}
+
/*
* Wrappers for jbd2_journal_start/end.
*
@@ -1478,9 +1538,9 @@ static int ext4_fill_flex_info(struct super_block *sb)

flex_group = ext4_flex_group(sbi, i);
sbi->s_flex_groups[flex_group].free_inodes +=
- le16_to_cpu(gdp->bg_free_inodes_count);
+ ext4_free_inodes_count(sb, gdp);
sbi->s_flex_groups[flex_group].free_blocks +=
- le16_to_cpu(gdp->bg_free_blocks_count);
+ ext4_free_blks_count(sb, gdp);
}

return 1;
--
1.6.0.4.735.gea4f


2008-11-20 18:31:31

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used

We need to make sure we update the block bitmap and clear
EXT4_BG_BLOCK_UNINIT flag with sb_bgl_lock held. We look
at EXT4_BG_BLOCK_UNINIT and reinit the block bitmap each
time in ext4_read_block_bitmap (introduced by
c806e68f5647109350ec546fee5b526962970fd2 )

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/mballoc.c | 17 ++++++++++++-----
1 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0a53d1b..6155c94 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1071,6 +1071,8 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
__u32 *addr;

len = cur + len;
+ if (lock)
+ spin_lock(lock); \
while (cur < len) {
if ((cur & 31) == 0 && (len - cur) >= 32) {
/* fast path: clear whole word at once */
@@ -1079,9 +1081,11 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
cur += 32;
continue;
}
- mb_clear_bit_atomic(lock, cur, bm);
+ mb_clear_bit(cur, bm);
cur++;
}
+ if (lock)
+ spin_unlock(lock); \
}

static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
@@ -1089,6 +1093,8 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
__u32 *addr;

len = cur + len;
+ if (lock)
+ spin_lock(lock); \
while (cur < len) {
if ((cur & 31) == 0 && (len - cur) >= 32) {
/* fast path: set whole word at once */
@@ -1097,9 +1103,11 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
cur += 32;
continue;
}
- mb_set_bit_atomic(lock, cur, bm);
+ mb_set_bit(cur, bm);
cur++;
}
+ if (lock)
+ spin_unlock(lock); \
}

static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
@@ -3053,10 +3061,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
}
}
#endif
- mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data,
- ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);

2008-11-20 18:34:24

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH 1/5] ext4: unlock group before ext4_error

Hi All,

I intent to sent only three patches. I had some debug
patches in between so git format-patch generated patch
number wrongly. I have also the inode uninit flag clearing race
patch. But I am finding a hang during rename. So didn't sent
the patch in the series. Attaching below for reference. Once
I fix the rename hang I will send the patch again.

commit 0181ece15c1e89c2825e581f03abe746fdebb7cf
Author: Aneesh Kumar K.V <[email protected]>
Date: Thu Nov 20 23:45:02 2008 +0530

ext4: Fix the race between read_inode_bitmap and ext4_new_inode

We need to make sure we update the inode bitmap and clear
EXT4_BG_INODE_UNINIT flag with sb_bgl_lock held. We look
at EXT4_BG_INODE_UNINIT and reinit the inode bitmap each
time in ext4_read_inode_bitmap (introduced by
c806e68f5647109350ec546fee5b526962970fd2 )

Signed-off-by: Aneesh Kumar K.V <[email protected]>

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 84f060b..99b1772 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -572,6 +572,70 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
return -1;
}

+static int ext4_claim_inode(struct super_block *sb,
+ struct buffer_head *inode_bitmap_bh,
+ unsigned long ino, ext4_group_t group, int mode)
+{
+ int free = 0, retval = 0, count;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+
+ spin_lock(sb_bgl_lock(sbi, group));
+ if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
+ /* not a free inode */
+ retval = 1;
+ goto err_ret;
+ }
+ if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
+ ino >= EXT4_INODES_PER_GROUP(sb)) {
+ spin_unlock(sb_bgl_lock(sbi, group));
+ ext4_error(sb, __func__,
+ "reserved inode or inode > inodes count - "
+ "block_group = %u, inode=%lu", group,
+ ino + group * EXT4_INODES_PER_GROUP(sb));
+ return 1;
+ }
+ /* If we didn't allocate from within the initialized part of the inode
+ * table then we need to initialize up to this inode. */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+
+ /* When marking the block group with
+ * ~EXT4_BG_INODE_UNINIT we don't want to depend
+ * on the value of bg_itable_unused even though
+ * mke2fs could have initialized the same for us.
+ * Instead we calculated the value below
+ */
+
+ free = 0;
+ } else {
+ free = EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp);
+ }
+
+ /*
+ * Check the relative inode number against the last used
+ * relative inode number in this group. if it is greater
+ * we need to update the bg_itable_unused count
+ *
+ */
+ if (ino > free)
+ ext4_itable_unused_set(sb, gdp,
+ (EXT4_INODES_PER_GROUP(sb) - ino));
+ }
+ count = ext4_free_inodes_count(sb, gdp) - 1;
+ ext4_free_inodes_set(sb, gdp, count);
+ if (S_ISDIR(mode)) {
+ count = ext4_used_dirs_count(sb, gdp) + 1;
+ ext4_used_dirs_set(sb, gdp, count);
+ }
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+err_ret:
+ spin_unlock(sb_bgl_lock(sbi, group));
+ return retval;
+}
+
/*
* There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both
@@ -585,8 +649,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
{
struct super_block *sb;
- struct buffer_head *bitmap_bh = NULL;
- struct buffer_head *bh2;
+ struct buffer_head *inode_bitmap_bh = NULL;
+ struct buffer_head *group_desc_bh;
ext4_group_t group = 0;
unsigned long ino = 0;
struct inode *inode;
@@ -594,7 +658,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
struct ext4_super_block *es;
struct ext4_inode_info *ei;
struct ext4_sb_info *sbi;
- int ret2, err = 0, count;
+ int ret2, err = 0;
struct inode *ret;
ext4_group_t i;
int free = 0;
@@ -634,40 +698,48 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
for (i = 0; i < sbi->s_groups_count; i++) {
err = -EIO;

- gdp = ext4_get_group_desc(sb, group, &bh2);
+ gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
if (!gdp)
goto fail;

- brelse(bitmap_bh);
- bitmap_bh = ext4_read_inode_bitmap(sb, group);
- if (!bitmap_bh)
+ brelse(inode_bitmap_bh);
+ inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
+ if (!inode_bitmap_bh)
goto fail;

ino = 0;

repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *)
- bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino);
+ inode_bitmap_bh->b_data,
+ EXT4_INODES_PER_GROUP(sb), ino);
if (ino < EXT4_INODES_PER_GROUP(sb)) {

- BUFFER_TRACE(bitmap_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, bitmap_bh);
+ BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle,
+ inode_bitmap_bh);
if (err)
goto fail;

- if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group),
- ino, bitmap_bh->b_data)) {
+ BUFFER_TRACE(group_desc_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle,
+ group_desc_bh);
+ if (err)
+ goto fail;
+ if (!ext4_claim_inode(sb, inode_bitmap_bh,
+ ino, group, mode)) {
/* we won it */
- BUFFER_TRACE(bitmap_bh,
+ BUFFER_TRACE(inode_bitmap_bh,
"call ext4_journal_dirty_metadata");
err = ext4_journal_dirty_metadata(handle,
- bitmap_bh);
+ inode_bitmap_bh);
if (err)
goto fail;
goto got;
}
/* we lost it */
- jbd2_journal_release_buffer(handle, bitmap_bh);
+ jbd2_journal_release_buffer(handle, inode_bitmap_bh);
+ jbd2_journal_release_buffer(handle, group_desc_bh);

if (++ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
@@ -687,30 +759,16 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
goto out;

got:
- ino++;
- if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
- ino > EXT4_INODES_PER_GROUP(sb)) {
- ext4_error(sb, __func__,
- "reserved inode or inode > inodes count - "
- "block_group = %u, inode=%lu", group,
- ino + group * EXT4_INODES_PER_GROUP(sb));
- err = -EIO;
- goto fail;
- }
-
- BUFFER_TRACE(bh2, "get_write_access");
- err = ext4_journal_get_write_access(handle, bh2);
- if (err) goto fail;
-
/* We may have to initialize the block bitmap if it isn't already */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
- struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group);
+ struct buffer_head *block_bitmap_bh;

- BUFFER_TRACE(block_bh, "get block bitmap access");
- err = ext4_journal_get_write_access(handle, block_bh);
+ block_bitmap_bh = ext4_read_block_bitmap(sb, group);
+ BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
+ err = ext4_journal_get_write_access(handle, block_bitmap_bh);
if (err) {
- brelse(block_bh);
+ brelse(block_bitmap_bh);
goto fail;
}

@@ -728,57 +786,19 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)

/* Don't need to dirty bitmap block if we didn't change it */
if (free) {
- BUFFER_TRACE(block_bh, "dirty block bitmap");
- err = ext4_journal_dirty_metadata(handle, block_bh);
+ BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
+ err = ext4_journal_dirty_metadata(handle,
+ block_bitmap_bh);
}

- brelse(block_bh);
+ brelse(block_bitmap_bh);
if (err)
goto fail;
}
-
- spin_lock(sb_bgl_lock(sbi, group));
- /* If we didn't allocate from within the initialized part of the inode
- * table then we need to initialize up to this inode. */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
-
- /* When marking the block group with
- * ~EXT4_BG_INODE_UNINIT we don't want to depend
- * on the value of bg_itable_unused even though
- * mke2fs could have initialized the same for us.
- * Instead we calculated the value below
- */
-
- free = 0;
- } else {
- free = EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp);
- }
-
- /*
- * Check the relative inode number against the last used
- * relative inode number in this group. if it is greater
- * we need to update the bg_itable_unused count
- *
- */
- if (ino > free)
- ext4_itable_unused_set(sb, gdp,
- (EXT4_INODES_PER_GROUP(sb) - ino));
- }
-
- count = ext4_free_inodes_count(sb, gdp) - 1;
- ext4_free_inodes_set(sb, gdp, count);
- if (S_ISDIR(mode)) {
- count = ext4_used_dirs_count(sb, gdp) + 1;
- ext4_used_dirs_set(sb, gdp, count);
- }
- gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
- spin_unlock(sb_bgl_lock(sbi, group));
- BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh2);
- if (err) goto fail;
+ BUFFER_TRACE(group_desc_bh, "call ext4_journal_dirty_metadata");
+ err = ext4_journal_dirty_metadata(handle, group_desc_bh);
+ if (err)
+ goto fail;

percpu_counter_dec(&sbi->s_freeinodes_counter);
if (S_ISDIR(mode))
@@ -876,7 +896,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
iput(inode);
ret = ERR_PTR(err);
really_out:
- brelse(bitmap_bh);
+ brelse(inode_bitmap_bh);
return ret;

fail_free_drop:
@@ -887,7 +907,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
inode->i_flags |= S_NOQUOTA;
inode->i_nlink = 0;
iput(inode);
- brelse(bitmap_bh);
+ brelse(inode_bitmap_bh);
return ERR_PTR(err);
}



2008-11-20 18:35:36

by Peter Staubach

[permalink] [raw]
Subject: Re: [PATCH 1/5] ext4: unlock group before ext4_error

Aneesh Kumar K.V wrote:
> Otherwise ext4_error will cause BUG because of
> scheduling in atomic context.
>
> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> ---
> fs/ext4/mballoc.c | 12 ++++++++++++
> 1 files changed, 12 insertions(+), 0 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 772e05b..039a5a6 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -460,10 +460,12 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
> blocknr +=
> le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
>
> + ext4_unlock_group(sb, e4b->bd_group);
> ext4_error(sb, __func__, "double-free of inode"
> " %lu's block %llu(bit %u in group %u)\n",
> inode ? inode->i_ino : 0, blocknr,
> first + i, e4b->bd_group);
> + ext4_unlock_group(sb, e4b->bd_group);
>

This should be ext4_lock_group(sb, e4b->bd_group); shouldn't it?

Thanx...

ps

> }
> mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
> }
> @@ -704,6 +706,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
> grp->bb_fragments = fragments;
>
> if (free != grp->bb_free) {
> + ext4_unlock_group(sb, group);
> ext4_error(sb, __func__,
> "EXT4-fs: group %u: %u blocks in bitmap, %u in gd\n",
> group, free, grp->bb_free);
> @@ -711,6 +714,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
> * If we intent to continue, we consider group descritor
> * corrupt and update bb_free using bitmap value
> */
> + ext4_lock_group(sb, group);
> grp->bb_free = free;
> }
>
> @@ -1625,15 +1629,18 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
> * free blocks even though group info says we
> * we have free blocks
> */
> + ext4_unlock_group(sb, e4b->bd_group);
> ext4_error(sb, __func__, "%d free blocks as per "
> "group info. But bitmap says 0\n",
> free);
> + ext4_lock_group(sb, e4b->bd_group);
> break;
> }
>
> mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
> BUG_ON(ex.fe_len <= 0);
> if (free < ex.fe_len) {
> + ext4_unlock_group(sb, e4b->bd_group);
> ext4_error(sb, __func__, "%d free blocks as per "
> "group info. But got %d blocks\n",
> free, ex.fe_len);
> @@ -1642,6 +1649,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
> * indicate that the bitmap is corrupt. So exit
> * without claiming the space.
> */
> + ext4_lock_group(sb, e4b->bd_group);
> break;
> }
>
> @@ -3789,6 +3797,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
> bit = next + 1;
> }
> if (free != pa->pa_free) {
> + ext4_unlock_group(sb, group);
> printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
> pa, (unsigned long) pa->pa_lstart,
> (unsigned long) pa->pa_pstart,
> @@ -3799,6 +3808,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
> * pa is already deleted so we use the value obtained
> * from the bitmap and continue.
> */
> + ext4_lock_group(sb, group);
> }
> atomic_add(free, &sbi->s_mb_discarded);
>
> @@ -4607,9 +4617,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
> else if (block >= (entry->start_blk + entry->count))
> n = &(*n)->rb_right;
> else {
> + ext4_unlock_group(sb, e4b->bd_group);
> ext4_error(sb, __func__,
> "Double free of blocks %d (%d %d)\n",
> block, entry->start_blk, entry->count);
> + ext4_unlock_group(sb, e4b->bd_group);
> return 0;
> }
> }
>


2008-11-20 18:48:32

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values.

On Thu, Nov 20, 2008 at 11:56:20PM +0530, Aneesh Kumar K.V wrote:
> Rename the lower bits with suffix _lo and add helper
> to access the values. Also rename bg_itable_unused_hi
> to bg_pad as in e2fsprogs. bg_itable_unused_hi is never
> used before
>
> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> ---
> fs/ext4/balloc.c | 11 ++++---
> fs/ext4/ext4.h | 24 +++++++++++++--
> fs/ext4/ialloc.c | 83 ++++++++++++++++++++++++++++-------------------------
> fs/ext4/inode.c | 2 +-
> fs/ext4/mballoc.c | 15 +++++----
> fs/ext4/resize.c | 4 +-
> fs/ext4/super.c | 64 +++++++++++++++++++++++++++++++++++++++-
> 7 files changed, 143 insertions(+), 60 deletions(-)
>
> diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
> index d268f5f..efe68d9 100644
> --- a/fs/ext4/balloc.c
> +++ b/fs/ext4/balloc.c
> @@ -102,9 +102,9 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
> if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
> ext4_error(sb, __func__,
> "Checksum bad for group %u\n", block_group);
> - gdp->bg_free_blocks_count = 0;
> - gdp->bg_free_inodes_count = 0;
> - gdp->bg_itable_unused = 0;
> + ext4_free_blks_set(sb, gdp, 0);
> + ext4_free_inodes_set(sb, gdp, 0);
> + ext4_itable_unused_set(sb, gdp, 0);
> memset(bh->b_data, 0xff, sb->s_blocksize);
> return 0;
> }
> @@ -444,7 +444,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
> }
> }
> spin_lock(sb_bgl_lock(sbi, block_group));
> - le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed);
> + blocks_freed += ext4_free_blks_count(sb, desc);
> + ext4_free_blks_set(sb, desc, blocks_freed);
> desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
> spin_unlock(sb_bgl_lock(sbi, block_group));
> percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
> @@ -742,7 +743,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
> gdp = ext4_get_group_desc(sb, i, NULL);
> if (!gdp)
> continue;
> - desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
> + desc_count += ext4_free_blks_count(sb, gdp);
> }
>

blocks_freed is used later so use count;

@@ -372,7 +372,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
struct ext4_group_desc *desc;
struct ext4_super_block *es;
struct ext4_sb_info *sbi;
- int err = 0, ret;
+ int err = 0, ret, count;
ext4_grpblk_t blocks_freed;
struct ext4_group_info *grp;

@@ -444,7 +444,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
}
}
spin_lock(sb_bgl_lock(sbi, block_group));
- le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed);
+ count = blocks_freed + ext4_free_blks_count(sb, desc);
+ ext4_free_blks_set(sb, desc, count);
desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);


2008-11-20 18:49:15

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH 1/5] ext4: unlock group before ext4_error

On Thu, Nov 20, 2008 at 01:35:11PM -0500, Peter Staubach wrote:
>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index 772e05b..039a5a6 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -460,10 +460,12 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
>> blocknr +=
>> le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
>> + ext4_unlock_group(sb, e4b->bd_group);
>> ext4_error(sb, __func__, "double-free of inode"
>> " %lu's block %llu(bit %u in group %u)\n",
>> inode ? inode->i_ino : 0, blocknr,
>> first + i, e4b->bd_group);
>> + ext4_unlock_group(sb, e4b->bd_group);
>>
>
> This should be ext4_lock_group(sb, e4b->bd_group); shouldn't it?

Yes, I need to update at two place

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 039a5a6..728221e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -465,7 +465,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
" %lu's block %llu(bit %u in group %u)\n",
inode ? inode->i_ino : 0, blocknr,
first + i, e4b->bd_group);
- ext4_unlock_group(sb, e4b->bd_group);
+ ext4_lock_group(sb, e4b->bd_group);
}
mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
}
@@ -4621,7 +4621,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
ext4_error(sb, __func__,
"Double free of blocks %d (%d %d)\n",
block, entry->start_blk, entry->count);
- ext4_unlock_group(sb, e4b->bd_group);
+ ext4_lock_group(sb, e4b->bd_group);
return 0;
}
}

2008-11-20 22:39:11

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 1/5] ext4: unlock group before ext4_error

On Thu, Nov 20, 2008 at 11:56:18PM +0530, Aneesh Kumar K.V wrote:
> Otherwise ext4_error will cause BUG because of
> scheduling in atomic context.

Granted that it isn't necessarily safe as it is when errors-behaviour
is set to continue, that is the default on a large number of
filesystems. And unlocking the group, calling the ext4_error(), and
then relocking the group can't possibly be safe; after all, we're
holding the lock for a reason!

In the errors=continue case, we don't actually need to unlock and
relock the group, since all we need to do is to printk a message to
the console. In the errors=panic case, we'll never return; and in the
errors=remount-ro case, relocking is kind of pointless but harmless,
since once the journal is aborted, there's no way the allocation is
going to succeed anyway, and a subsequent call will return an error.

This gets ugly to get right. Since the situation where we need to
call ext4_error() while holding a spinlock which should be preserved
in the errors=continue case seems to be unique to mballoc, maybe
something like this?

static int ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
const char *function,
const char *fmt, ...)
{

va_start(args, fmt);
printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
vprintk(fmt, args);
printk("\n");
va_end(args);

if (test_opt(sb, ERROR_CONT)) {
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_commit_super(sb, es, 0);
return 0;
}
ext4_unlock_group(sb, grp);
ext4_handle_error(sb);
/*
* We only get here in the ERRORS_RO case; relocking the group
* may be dangerous, but nothing bad will happen since the
* filesystem will have already been marked read/only and the
* journal has been aborted. We return 1 as a hint to callers
* who might what to use the return value from
* ext4_grp_locked_error() to distinguish beween the
* ERRORS_CONT and ERRORS_RO case, and perhaps return more
* aggressively from the ext4 function in question, with a
* more appropriate error code.
*/
ext4_lock_group(sb, grp);
}

This requires access to two static functions in fs/ext4/super.c, so
probably the best thing to do is to put this function in
fs/ext4/super.c, and move the definition of ext4_[un]lock_group from
mballoc.h to ext4.h.

Comments?

- Ted