2012-08-10 03:17:07

by Dave Jones

[permalink] [raw]
Subject: 3.5.1 ext4_ sleeping while atomic bug.

BUG: sleeping function called from invalid context at include/linux/buffer_head.h:333
in_atomic(): 1, irqs_disabled(): 0, pid: 9894, name: fstest
3 locks held by fstest/9894:
#0: (&type->i_mutex_dir_key#4/1){+.+.+.}, at: [<ffffffff811d5dae>] kern_path_create+0x7e/0x140
#1: (&ei->i_data_sem){++++..}, at: [<ffffffff81252e76>] ext4_map_blocks+0xb6/0x250
#2: (&(&bgl->locks[i].lock)->rlock){+.+...}, at: [<ffffffff8124a5e7>] ext4_validate_block_bitmap+0x77/0x230
Pid: 9894, comm: fstest Not tainted 3.5.1-1.fc17.x86_64.debug #1
Call Trace:
[<ffffffff8109cd0a>] __might_sleep+0x18a/0x240
[<ffffffff811fb430>] __sync_dirty_buffer+0x30/0xf0
[<ffffffff811fb503>] sync_dirty_buffer+0x13/0x20
[<ffffffff81273018>] ext4_commit_super+0x1e8/0x260
[<ffffffff81273283>] save_error_info+0x23/0x30
[<ffffffff81274539>] __ext4_error+0x89/0xa0
[<ffffffff8124a5e7>] ? ext4_validate_block_bitmap+0x77/0x230
[<ffffffff8124a72b>] ext4_validate_block_bitmap+0x1bb/0x230
[<ffffffff8124b0ae>] ext4_read_block_bitmap_nowait+0x8e/0x3b0
[<ffffffff812891c0>] ext4_mb_init_cache+0x160/0x990
[<ffffffff810d16bd>] ? trace_hardirqs_on_caller+0x10d/0x1a0
[<ffffffff81289b16>] ext4_mb_init_group+0x126/0x250
[<ffffffff81289d56>] ext4_mb_good_group+0x116/0x130
[<ffffffff8128c493>] ext4_mb_regular_allocator+0x1a3/0x420
[<ffffffff811aa920>] ? kmem_cache_alloc+0xe0/0x290
[<ffffffff8128e2c1>] ext4_mb_new_blocks+0x4f1/0xb90
[<ffffffff811fad9f>] ? __find_get_block+0xaf/0x220
[<ffffffff81293e7e>] ext4_alloc_branch+0x42e/0x690
[<ffffffff816c6030>] ? _raw_spin_unlock_irq+0x30/0x50
[<ffffffff812949a7>] ext4_ind_map_blocks+0x1e7/0x990
[<ffffffff816c348a>] ? down_write+0x9a/0xb0
[<ffffffff81252e76>] ? ext4_map_blocks+0xb6/0x250
[<ffffffff81252ea5>] ext4_map_blocks+0xe5/0x250
[<ffffffff8125306b>] ext4_getblk+0x5b/0x1f0
[<ffffffff81253218>] ext4_bread+0x18/0xa0
[<ffffffff8125e527>] ext4_mkdir+0x147/0x3d0
[<ffffffff811d2536>] vfs_mkdir+0xa6/0x130
[<ffffffff811d6c6e>] sys_mkdirat+0xbe/0xd0
[<ffffffff811d6c99>] sys_mkdir+0x19/0x20
[<ffffffff816cefe9>] system_call_fastpath+0x16/0x1b



2012-08-10 18:24:17

by Theodore Ts'o

[permalink] [raw]
Subject: Re: 3.5.1 ext4_ sleeping while atomic bug.

Hi Dave,

Thanks for the bug report! The following should address the bug which
you found.

- Ted

>From 05ca87aa00121756b5d41f3d71eb8b51bed3bc92 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <[email protected]>
Date: Fri, 10 Aug 2012 13:57:52 -0400
Subject: [PATCH] ext4: don't call ext4_error while block group is locked

While in ext4_validate_block_bitmap(), if an block allocation bitmap
is found to be invalid, we call ext4_error() while the block group is
still locked. This causes ext4_commit_super() to call a function
which might sleep while in an atomic context.

There's no need to keep the block group locked at this point, so hoist
the ext4_error() call up to ext4_validate_block_bitmap() and release
the block group spinlock before calling ext4_error().

The reported stack trace can be found at:

http://article.gmane.org/gmane.comp.file-systems.ext4/33731

Reported-by: Dave Jones <[email protected]>
Signed-off-by: "Theodore Ts'o" <[email protected]>
Cc: [email protected]
---
fs/ext4/balloc.c | 62 +++++++++++++++++++++++++++++++++-----------------------
fs/ext4/bitmap.c | 1 -
2 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d23b31c..1b50890 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -280,14 +280,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
return desc;
}

-static int ext4_valid_block_bitmap(struct super_block *sb,
- struct ext4_group_desc *desc,
- unsigned int block_group,
- struct buffer_head *bh)
+/*
+ * Return the block number which was discovered to be invalid, or 0 if
+ * the block bitmap is valid.
+ */
+static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
+ struct ext4_group_desc *desc,
+ unsigned int block_group,
+ struct buffer_head *bh)
{
ext4_grpblk_t offset;
ext4_grpblk_t next_zero_bit;
- ext4_fsblk_t bitmap_blk;
+ ext4_fsblk_t blk;
ext4_fsblk_t group_first_block;

if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
@@ -297,37 +301,33 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
* or it has to also read the block group where the bitmaps
* are located to verify they are set.
*/
- return 1;
+ return 0;
}
group_first_block = ext4_group_first_block_no(sb, block_group);

/* check whether block bitmap block number is set */
- bitmap_blk = ext4_block_bitmap(sb, desc);
- offset = bitmap_blk - group_first_block;
+ blk = ext4_block_bitmap(sb, desc);
+ offset = blk - group_first_block;
if (!ext4_test_bit(offset, bh->b_data))
/* bad block bitmap */
- goto err_out;
+ return blk;

/* check whether the inode bitmap block number is set */
- bitmap_blk = ext4_inode_bitmap(sb, desc);
- offset = bitmap_blk - group_first_block;
+ blk = ext4_inode_bitmap(sb, desc);
+ offset = blk - group_first_block;
if (!ext4_test_bit(offset, bh->b_data))
/* bad block bitmap */
- goto err_out;
+ return blk;

/* check whether the inode table block number is set */
- bitmap_blk = ext4_inode_table(sb, desc);
- offset = bitmap_blk - group_first_block;
+ blk = ext4_inode_table(sb, desc);
+ offset = blk - group_first_block;
next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
offset + EXT4_SB(sb)->s_itb_per_group,
offset);
- if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group)
- /* good bitmap for inode tables */
- return 1;
-
-err_out:
- ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
- block_group, bitmap_blk);
+ if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group)
+ /* bad bitmap for inode tables */
+ return blk;
return 0;
}

@@ -336,14 +336,26 @@ void ext4_validate_block_bitmap(struct super_block *sb,
unsigned int block_group,
struct buffer_head *bh)
{
+ ext4_fsblk_t blk;
+
if (buffer_verified(bh))
return;

ext4_lock_group(sb, block_group);
- if (ext4_valid_block_bitmap(sb, desc, block_group, bh) &&
- ext4_block_bitmap_csum_verify(sb, block_group, desc, bh,
- EXT4_BLOCKS_PER_GROUP(sb) / 8))
- set_buffer_verified(bh);
+ blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
+ if (unlikely(blk != 0)) {
+ ext4_unlock_group(sb, block_group);
+ ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
+ block_group, blk);
+ return;
+ }
+ if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
+ desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) {
+ ext4_unlock_group(sb, block_group);
+ ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
+ return;
+ }
+ set_buffer_verified(bh);
ext4_unlock_group(sb, block_group);
}

diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index f8716ea..5c2d181 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -79,7 +79,6 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
if (provided == calculated)
return 1;

- ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group);
return 0;
}

--
1.7.12.rc0.22.gcdd159b