2007-09-01 02:48:49

by Eric Sandeen

[permalink] [raw]
Subject: [RESEND][PATCH] dir_index: error out instead of BUG on corrupt hash dir limit

(resend, this one got lost? Got an acked-by from Andreas
last go-round)

(Andrew, Ted, should I be splitting out ext3 and ext4 patches and
sending separately...?)

Thanks,
-Eric

----------

A corrupt ondisk hash dir limit will trip an assert in dx_probe,
which calls BUG(). Instead, we can just issue the warning and
fail dx_probe like the other 3 tests just before it. Thanks
to aviro for suggesting this... Tested with a hand-crafted
corrupt ext3 image, issues:

EXT3-fs warning (device loop0): dx_probe: Corrupt limit in dir inode 14337

vs. previous:

Assertion failure in dx_probe() at fs/ext3/namei.c:383: "dx_get_limit(entries) == dx_root_limit(dir, root->info.info_length)"
------------[ cut here ]------------
kernel BUG at fs/ext3/namei.c:383!
...


Signed-off-by: Eric Sandeen <[email protected]>

Index: linux-2.6.22-rc4/fs/ext3/namei.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext3/namei.c
+++ linux-2.6.22-rc4/fs/ext3/namei.c
@@ -379,8 +379,16 @@ dx_probe(struct dentry *dentry, struct i

entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt limit in dir inode %ld\n", dir->i_ino);
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
Index: linux-2.6.22-rc4/fs/ext4/namei.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext4/namei.c
+++ linux-2.6.22-rc4/fs/ext4/namei.c
@@ -379,8 +379,16 @@ dx_probe(struct dentry *dentry, struct i

entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt limit in dir inode %ld\n", dir->i_ino);
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{



2007-09-09 13:26:17

by Duane Griffin

[permalink] [raw]
Subject: Re: [RESEND][PATCH] dir_index: error out instead of BUG on corrupt hash dir limit

Hi Eric,

On Fri, Aug 31, 2007 at 09:48:43PM -0500, Eric Sandeen wrote:
> (resend, this one got lost? Got an acked-by from Andreas
> last go-round)

Sorry I missed this first time around. I came up with a very similar
fix recently, following a gentoo bug report. However there are a few
more asserts later that you aren't currently handling. Below is an
incremental patch on top of yours that converts them too. Note that one
of them is in an if (0) block and maybe should be left alone -- what do
you think?

I tested all the changed code paths, except the if (0) one, using a
utility that appropriately corrupts ext3 images. The source code is
attached to the gentoo bug report here:

http://bugs.gentoo.org/show_bug.cgi?id=183207

Signed-off-by: Duane Griffin <[email protected]>

Index: linux-2.6-git/fs/ext3/namei.c
===================================================================
--- linux-2.6-git.orig/fs/ext3/namei.c
+++ linux-2.6-git/fs/ext3/namei.c
@@ -393,7 +393,15 @@ dx_probe(struct dentry *dentry, struct i
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt limit in dir inode %ld\n",
+ dir->i_ino);
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -419,7 +427,11 @@ dx_probe(struct dentry *dentry, struct i
break;
}
}
- assert (at == p - 1);
+ if (at != p - 1) {
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
}

at = p - 1;
@@ -431,8 +443,16 @@ dx_probe(struct dentry *dentry, struct i
if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt limit in dir inode %ld\n",
+ dir->i_ino);
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
Index: linux-2.6-git/fs/ext4/namei.c
===================================================================
--- linux-2.6-git.orig/fs/ext4/namei.c
+++ linux-2.6-git/fs/ext4/namei.c
@@ -393,7 +393,15 @@ dx_probe(struct dentry *dentry, struct i
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt limit in dir inode %ld\n",
+ dir->i_ino);
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -419,7 +427,11 @@ dx_probe(struct dentry *dentry, struct i
break;
}
}
- assert (at == p - 1);
+ if (at != p - 1) {
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
}

at = p - 1;
@@ -431,8 +443,16 @@ dx_probe(struct dentry *dentry, struct i
if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt limit in dir inode %ld\n",
+ dir->i_ino);
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {

--
"I never could learn to drink that blood and call it wine" - Bob Dylan

2007-09-10 14:59:34

by Eric Sandeen

[permalink] [raw]
Subject: Re: [RESEND][PATCH] dir_index: error out instead of BUG on corrupt hash dir limit

Duane Griffin wrote:
> Hi Eric,
>
> On Fri, Aug 31, 2007 at 09:48:43PM -0500, Eric Sandeen wrote:
>> (resend, this one got lost? Got an acked-by from Andreas
>> last go-round)
>
> Sorry I missed this first time around. I came up with a very similar
> fix recently, following a gentoo bug report. However there are a few
> more asserts later that you aren't currently handling. Below is an
> incremental patch on top of yours that converts them too.

Ah, good point... I focused a bit too much on the single problem at hand
didn't I. :)

> Note that one
> of them is in an if (0) block and maybe should be left alone -- what do
> you think?

If it's just there for debug, maybe leaving an assert is ok, to get a
dump & system state etc. If it is converted, a printk would probably be
good so you know you're falling back, otherwise that extra checking is a
bit pointless if it's silent.

> I tested all the changed code paths, except the if (0) one, using a
> utility that appropriately corrupts ext3 images.
> The source code is
> attached to the gentoo bug report here:
>
> http://bugs.gentoo.org/show_bug.cgi?id=183207
>
> Signed-off-by: Duane Griffin <[email protected]>

Looks good, thanks for not ignoring the other asserts. ;-) I wonder if
we should fix up all the new error condition printk's a bit to be more
descriptive of the problem at hand; for example, the one I sent should
maybe say:

+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt root limit in dir inode %ld\n", dir->i_ino);

I wanted to leave the word "corrupt" in there, or at least something to
clue in the user that maybe fsck is in order...

Thanks,
-Eric

2007-09-10 22:06:24

by Duane Griffin

[permalink] [raw]
Subject: Re: [RESEND][PATCH] dir_index: error out instead of BUG on corrupt hash dir limit

On 10/09/2007, Eric Sandeen <[email protected]> wrote:
> Duane Griffin wrote:
> > Sorry I missed this first time around. I came up with a very similar
> > fix recently, following a gentoo bug report. However there are a few
> > more asserts later that you aren't currently handling. Below is an
> > incremental patch on top of yours that converts them too.
>
> Ah, good point... I focused a bit too much on the single problem at hand
> didn't I. :)

Easy to do :)

> > Note that one
> > of them is in an if (0) block and maybe should be left alone -- what do
> > you think?
>
> If it's just there for debug, maybe leaving an assert is ok, to get a
> dump & system state etc. If it is converted, a printk would probably be
> good so you know you're falling back, otherwise that extra checking is a
> bit pointless if it's silent.

Good point. Perhaps best to just back that part out.

> I wonder if
> we should fix up all the new error condition printk's a bit to be more
> descriptive of the problem at hand; for example, the one I sent should
> maybe say:
>
> + ext3_warning(dir->i_sb, __FUNCTION__,
> + "Corrupt root limit in dir inode %ld\n", dir->i_ino);
>
> I wanted to leave the word "corrupt" in there, or at least something to
> clue in the user that maybe fsck is in order...

I struggled with the wording, too. I originally went with "Invalid dx
limit/count", but wasn't terribly happy with it. "Corrupt" is more
accurate and informative. Perhaps the warning should also explicitly
recommend running fsck:

"Corrupt root limit in dir inode %ld, running e2fsck is recommended\n"

Cheers,
Duane.

--
"I never could learn to drink that blood and call it wine" - Bob Dylan

2007-09-10 22:41:58

by Eric Sandeen

[permalink] [raw]
Subject: [PATCH V2] dir_index: error out instead of BUG on corrupt dx dirs

Duane Griffin wrote:

> "Corrupt root limit in dir inode %ld, running e2fsck is recommended\n"
>
>
Probably good, for anything that was read from disk, certainly.

I don't know if it's worth differentiating messages for different types
of corruption (root block vs. others, etc...) - I guess the other error
cases do.

Maybe it'd be best to just consolidate the fsck suggestion message under
fail: ?

Here's a patch rolling up yours with mine + discussed changes, and
consolidating the fsck suggestion message.

How's it look to you? Suppose I'd better run this a bit to be sure it's
not hitting any common cases and issuing new warnings...?

ThAnks!
-Eric

---------------------------------

Convert asserts (BUGs) in dx_probe from bad on-disk data to recoverable
errors with helpful warnings. With help catching other asserts
from Duane Griffin <[email protected]>

Signed-off-by: Eric Sandeen <[email protected]>

Index: linux-2.6.22-rc4/fs/ext3/namei.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext3/namei.c
+++ linux-2.6.22-rc4/fs/ext3/namei.c
@@ -379,13 +379,28 @@ dx_probe(struct dentry *dentry, struct i

entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit\n");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit\n");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +438,15 @@ dx_probe(struct dentry *dentry, struct i
if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit\n");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +454,10 @@ fail2:
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.\n", dir->i_ino);
return NULL;
}

Index: linux-2.6.22-rc4/fs/ext4/namei.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext4/namei.c
+++ linux-2.6.22-rc4/fs/ext4/namei.c
@@ -379,13 +379,28 @@ dx_probe(struct dentry *dentry, struct i

entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit\n");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit\n");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +438,15 @@ dx_probe(struct dentry *dentry, struct i
if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit\n");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +454,10 @@ fail2:
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.\n", dir->i_ino);
return NULL;
}


2007-09-11 01:35:06

by Duane Griffin

[permalink] [raw]
Subject: Re: [PATCH V2] dir_index: error out instead of BUG on corrupt dx dirs

On 10/09/2007, Eric Sandeen <[email protected]> wrote:
> I don't know if it's worth differentiating messages for different types
> of corruption (root block vs. others, etc...) - I guess the other error
> cases do.

Might be useful for a developer wanting to know exactly which error
check was triggering. Unlikely to be of much interest or use to the
user, so I wouldn't worry too much about the exact wording.

> Here's a patch rolling up yours with mine + discussed changes, and
> consolidating the fsck suggestion message.
>
> How's it look to you? Suppose I'd better run this a bit to be sure it's
> not hitting any common cases and issuing new warnings...?

The warnings shouldn't include explicit newlines, aside from that it
looks good to me. I've tested it with the corruption utility and all
combinations (count & limit, root & indirect) seem to work correctly.

> Signed-off-by: Eric Sandeen <[email protected]>

Acked-by: Duane Griffin <[email protected]>

Cheers,
Duane.

--
"I never could learn to drink that blood and call it wine" - Bob Dylan

2007-09-11 01:42:37

by Eric Sandeen

[permalink] [raw]
Subject: [PATCH V3] dir_index: error out instead of BUG on corrupt dx dirs

Duane Griffin wrote:

> The warnings shouldn't include explicit newlines
Urgh, right you are. Once more, with feeling! Thanks for running it through
your utility.

-Eric

---------------------

Convert asserts (BUGs) in dx_probe from bad on-disk data to recoverable
errors with helpful warnings. With help catching other asserts
from Duane Griffin <[email protected]>

Signed-off-by: Eric Sandeen <[email protected]>
Acked-by: Duane Griffin <[email protected]>


Index: linux-2.6.22-rc4/fs/ext3/namei.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext3/namei.c
+++ linux-2.6.22-rc4/fs/ext3/namei.c
@@ -379,13 +379,28 @@ dx_probe(struct dentry *dentry, struct i

entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +438,15 @@ dx_probe(struct dentry *dentry, struct i
if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +454,10 @@ fail2:
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.", dir->i_ino);
return NULL;
}

Index: linux-2.6.22-rc4/fs/ext4/namei.c
===================================================================
--- linux-2.6.22-rc4.orig/fs/ext4/namei.c
+++ linux-2.6.22-rc4/fs/ext4/namei.c
@@ -379,13 +379,28 @@ dx_probe(struct dentry *dentry, struct i

entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +438,15 @@ dx_probe(struct dentry *dentry, struct i
if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +454,10 @@ fail2:
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.", dir->i_ino);
return NULL;
}