2008-06-07 19:20:08

by Andrew Morton

[permalink] [raw]
Subject: Re: [Bugme-new] [Bug 10882] New: Different kernel crashes on corrupted filesystems

On Sat, 7 Jun 2008 09:19:33 -0700 (PDT) [email protected] wrote:

>
> http://bugzilla.kernel.org/show_bug.cgi?id=10882

Various ext3 crashes when mounting corrupted images.


2008-06-21 01:54:27

by Duane Griffin

[permalink] [raw]
Subject: [PATCH] ext3: validate directory entry data before use

Various places in fs/ext3/namei.c use ext3_next_entry to loop over
directory entries, but not all of them verify that entries are valid before
attempting to move to the next one. In the case where rec_len == 0 this
causes an infinite loop.

Introduce a new version of ext3_next_entry that checks the validity of the
entry before using its data to find the next one. Rename the original
function to __ext3_next_entry and use it in places where we already know
the data is valid.

Note that the changes to empty_dir follow the original code in reporting
the directory as empty in the presence of errors.

This patch fixes the first case (image hdb.25.softlockup.gz) reported in
http://bugzilla.kernel.org/show_bug.cgi?id=10882.

Signed-off-by: Duane Griffin <[email protected]>
--

Please note that I've only properly tested the originally reported failure
case (i.e. the changes to ext3_dx_find_entry).

Reviewers may want to pay particular attention to the changes to do_split,
make_indexed_dir and empty_dir. I've tried to follow the original code's
error handling conventions, as noted above for empty_dir, but I'm not sure
this is the best way to do things.

---

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80..f092208 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -185,13 +185,27 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
* p is at least 6 bytes before the end of page
*/
static inline struct ext3_dir_entry_2 *
-ext3_next_entry(struct ext3_dir_entry_2 *p)
+__ext3_next_entry(struct ext3_dir_entry_2 *p)
{
return (struct ext3_dir_entry_2 *)((char *)p +
ext3_rec_len_from_disk(p->rec_len));
}

/*
+ * Checks the directory entry looks sane before using it to find the next one.
+ * Returns NULL and reports an error if an invalid entry is passed.
+ */
+static inline struct ext3_dir_entry_2 *
+ext3_next_entry(const char *func, struct inode *dir,
+ struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
+{
+ if (ext3_check_dir_entry(func, dir, de, bh, offset))
+ return __ext3_next_entry(de);
+ else
+ return NULL;
+}
+
+/*
* Future: use high four bits of block for coalesce-on-delete flags
* Mask them off for now.
*/
@@ -271,15 +285,17 @@ struct stats
unsigned bcount;
};

-static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
- int size, int show_names)
+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct inode *dir,
+ struct buffer_head *bh, int size,
+ int show_names)
{
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) bh->b_data;
unsigned names = 0, space = 0;
char *base = (char *) de;
struct dx_hash_info h = *hinfo;

printk("names: ");
- while ((char *) de < base + size)
+ while (de && (char *) de < base + size)
{
if (de->inode)
{
@@ -295,7 +311,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
}
- de = ext3_next_entry(de);
+ de = ext3_next_entry("dx_show_leaf", dir, de, bh, 0);
}
printk("(%i)\n", names);
return (struct stats) { names, space, 1 };
@@ -319,7 +335,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
stats = levels?
dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
- dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
+ dx_show_leaf(hinfo, dir, bh, blocksize, 0);
names += stats.names;
space += stats.space;
bcount += stats.bcount;
@@ -583,7 +599,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
top = (struct ext3_dir_entry_2 *) ((char *) de +
dir->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de)) {
+ for (; de < top; de = __ext3_next_entry(de)) {
if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
(block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
+((char *)de - bh->b_data))) {
@@ -658,7 +674,12 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
}
if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
- de = ext3_next_entry(de);
+ de = ext3_next_entry("ext3_htree_fill_tree",
+ dir, de, frames[0].bh, 0);
+ if (de == NULL) {
+ err = -EIO;
+ goto errout;
+ }
if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
goto errout;
count++;
@@ -726,8 +747,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
count++;
cond_resched();
}
- /* XXX: do we need to check rec_len == 0 case? -Chris */
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return count;
}
@@ -991,24 +1011,28 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
de = (struct ext3_dir_entry_2 *) bh->b_data;
top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de))
- if (ext3_match (namelen, name, de)) {
+ for (; de < top; de = __ext3_next_entry(de)) {
+ int offset = (block << EXT3_BLOCK_SIZE_BITS(sb))
+ + ((char *) de - bh->b_data);
+
if (!ext3_check_dir_entry("ext3_find_entry",
- dir, de, bh,
- (block<<EXT3_BLOCK_SIZE_BITS(sb))
- +((char *)de - bh->b_data))) {
- brelse (bh);
+ dir, de, bh, offset)) {
+ brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
}
- *res_dir = de;
- dx_release (frames);
- return bh;
+
+ if (ext3_match(namelen, name, de)) {
+ *res_dir = de;
+ dx_release(frames);
+ return bh;
+ }
}
brelse (bh);
+
/* Check to see if we should continue to search */
- retval = ext3_htree_next_block(dir, hash, frame,
- frames, NULL);
+ retval = ext3_htree_next_block(dir, hash, frame, frames, NULL);
+
if (retval < 0) {
ext3_warning(sb, __func__,
"error reading index page in directory #%lu",
@@ -1141,7 +1165,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)

prev = to = de;
while ((char*)de < base + size) {
- next = ext3_next_entry(de);
+ next = __ext3_next_entry(de);
if (de->inode && de->name_len) {
rec_len = EXT3_DIR_REC_LEN(de->name_len);
if (de > to)
@@ -1172,9 +1196,21 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
unsigned split, move, size, i;
- struct ext3_dir_entry_2 *de = NULL, *de2;
+ struct ext3_dir_entry_2 *de, *de2;
int err = 0;

+ /* Verify directory entries are sane before trying anything else. */
+ de = (struct ext3_dir_entry_2 *) data1;
+ de2 = (char *) de + dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+ while (de < de2) {
+ de = ext3_next_entry("do_split", dir, de, *bh, 0);
+ if (de == NULL) {
+ brelse(*bh);
+ *bh = NULL;
+ goto errout;
+ }
+ }
+
bh2 = ext3_append (handle, dir, &newblock, &err);
if (!(bh2)) {
brelse(*bh);
@@ -1397,8 +1433,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
memcpy (data1, de, len);
de = (struct ext3_dir_entry_2 *) data1;
top = data1 + len;
- while ((char *)(de2 = ext3_next_entry(de)) < top)
+
+ while (1) {
+ de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
+ if (de2 == NULL || de2 >= top) {
+ break;
+ }
de = de2;
+ }
+
de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
/* Initialize the root; the dot dirents already exist */
de = (struct ext3_dir_entry_2 *) (&root->dotdot);
@@ -1655,7 +1698,7 @@ static int ext3_delete_entry (handle_t *handle,
}
i += ext3_rec_len_from_disk(de->rec_len);
pde = de;
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return -ENOENT;
}
@@ -1792,7 +1835,7 @@ retry:
de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
de->inode = cpu_to_le32(dir->i_ino);
de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(1));
@@ -1825,7 +1868,7 @@ out_stop:
/*
* routine to check that the specified directory is empty (for rmdir)
*/
-static int empty_dir (struct inode * inode)
+static int empty_dir(struct inode *dir, struct inode *inode)
{
unsigned long offset;
struct buffer_head * bh;
@@ -1846,8 +1889,14 @@ static int empty_dir (struct inode * inode)
inode->i_ino);
return 1;
}
+
de = (struct ext3_dir_entry_2 *) bh->b_data;
- de1 = ext3_next_entry(de);
+ de1 = ext3_next_entry("empty_dir", dir, de, bh, offset);
+ if (de1 == NULL) {
+ brelse(bh);
+ return 1;
+ }
+
if (le32_to_cpu(de->inode) != inode->i_ino ||
!le32_to_cpu(de1->inode) ||
strcmp (".", de->name) ||
@@ -1860,8 +1909,8 @@ static int empty_dir (struct inode * inode)
}
offset = ext3_rec_len_from_disk(de->rec_len) +
ext3_rec_len_from_disk(de1->rec_len);
- de = ext3_next_entry(de1);
- while (offset < inode->i_size ) {
+ de = ext3_next_entry("empty_dir", dir, de1, bh, offset);
+ while (de && offset < inode->i_size) {
if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
err = 0;
@@ -1890,7 +1939,7 @@ static int empty_dir (struct inode * inode)
return 0;
}
offset += ext3_rec_len_from_disk(de->rec_len);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
brelse (bh);
return 1;
@@ -2068,7 +2117,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
goto end_rmdir;

retval = -ENOTEMPTY;
- if (!empty_dir (inode))
+ if (!empty_dir(dir, inode))
goto end_rmdir;

retval = ext3_delete_entry(handle, dir, de, bh);
@@ -2244,7 +2293,7 @@ retry:
}

#define PARENT_INO(buffer) \
- (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
+ (__ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)

/*
* Anybody can rename anything with this: the permission checks are left to the
@@ -2297,7 +2346,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
if (S_ISDIR(old_inode->i_mode)) {
if (new_inode) {
retval = -ENOTEMPTY;
- if (!empty_dir (new_inode))
+ if (!empty_dir(new_dir, new_inode))
goto end_rename;
}
retval = -EIO;

2008-06-21 15:55:01

by Duane Griffin

[permalink] [raw]
Subject: [PATCH, v2] ext3: validate directory entry data before use

Various places in fs/ext3/namei.c use ext3_next_entry to loop over
directory entries, but not all of them verify that entries are valid before
attempting to move to the next one. In the case where rec_len == 0 this
causes an infinite loop.

Introduce a new version of ext3_next_entry that checks the validity of the
entry before using its data to find the next one. Rename the original
function to __ext3_next_entry and use it in places where we already know
the data is valid.

Note that the changes to empty_dir follow the original code in reporting
the directory as empty in the presence of errors.

This patch fixes the first case (image hdb.25.softlockup.gz) reported in
http://bugzilla.kernel.org/show_bug.cgi?id=10882.

Signed-off-by: Duane Griffin <[email protected]>
--

This is version 2. The original patch was an earlier version causing
warnings that I sent by mistake. This version just fixes those.

Please note that I've only properly tested the originally reported failure
case (i.e. the changes to ext3_dx_find_entry).

Reviewers may want to pay particular attention to the changes to do_split,
make_indexed_dir and empty_dir. I've tried to follow the original code's
error handling conventions, as noted above for empty_dir, but I'm not sure
this is the best way to do things.

---

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80..ea0236b 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -185,13 +185,27 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
* p is at least 6 bytes before the end of page
*/
static inline struct ext3_dir_entry_2 *
-ext3_next_entry(struct ext3_dir_entry_2 *p)
+__ext3_next_entry(struct ext3_dir_entry_2 *p)
{
return (struct ext3_dir_entry_2 *)((char *)p +
ext3_rec_len_from_disk(p->rec_len));
}

/*
+ * Checks the directory entry looks sane before using it to find the next one.
+ * Returns NULL and reports an error if an invalid entry is passed.
+ */
+static inline struct ext3_dir_entry_2 *
+ext3_next_entry(const char *func, struct inode *dir,
+ struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
+{
+ if (ext3_check_dir_entry(func, dir, de, bh, offset))
+ return __ext3_next_entry(de);
+ else
+ return NULL;
+}
+
+/*
* Future: use high four bits of block for coalesce-on-delete flags
* Mask them off for now.
*/
@@ -271,15 +285,17 @@ struct stats
unsigned bcount;
};

-static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
- int size, int show_names)
+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct inode *dir,
+ struct buffer_head *bh, int size,
+ int show_names)
{
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) bh->b_data;
unsigned names = 0, space = 0;
char *base = (char *) de;
struct dx_hash_info h = *hinfo;

printk("names: ");
- while ((char *) de < base + size)
+ while (de && (char *) de < base + size)
{
if (de->inode)
{
@@ -295,7 +311,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
}
- de = ext3_next_entry(de);
+ de = ext3_next_entry("dx_show_leaf", dir, de, bh, 0);
}
printk("(%i)\n", names);
return (struct stats) { names, space, 1 };
@@ -319,7 +335,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
stats = levels?
dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
- dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
+ dx_show_leaf(hinfo, dir, bh, blocksize, 0);
names += stats.names;
space += stats.space;
bcount += stats.bcount;
@@ -583,7 +599,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
top = (struct ext3_dir_entry_2 *) ((char *) de +
dir->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de)) {
+ for (; de < top; de = __ext3_next_entry(de)) {
if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
(block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
+((char *)de - bh->b_data))) {
@@ -658,7 +674,12 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
}
if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
- de = ext3_next_entry(de);
+ de = ext3_next_entry("ext3_htree_fill_tree",
+ dir, de, frames[0].bh, 0);
+ if (de == NULL) {
+ err = -EIO;
+ goto errout;
+ }
if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
goto errout;
count++;
@@ -726,8 +747,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
count++;
cond_resched();
}
- /* XXX: do we need to check rec_len == 0 case? -Chris */
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return count;
}
@@ -991,24 +1011,28 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
de = (struct ext3_dir_entry_2 *) bh->b_data;
top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de))
- if (ext3_match (namelen, name, de)) {
+ for (; de < top; de = __ext3_next_entry(de)) {
+ int offset = (block << EXT3_BLOCK_SIZE_BITS(sb))
+ + ((char *) de - bh->b_data);
+
if (!ext3_check_dir_entry("ext3_find_entry",
- dir, de, bh,
- (block<<EXT3_BLOCK_SIZE_BITS(sb))
- +((char *)de - bh->b_data))) {
- brelse (bh);
+ dir, de, bh, offset)) {
+ brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
}
- *res_dir = de;
- dx_release (frames);
- return bh;
+
+ if (ext3_match(namelen, name, de)) {
+ *res_dir = de;
+ dx_release(frames);
+ return bh;
+ }
}
brelse (bh);
+
/* Check to see if we should continue to search */
- retval = ext3_htree_next_block(dir, hash, frame,
- frames, NULL);
+ retval = ext3_htree_next_block(dir, hash, frame, frames, NULL);
+
if (retval < 0) {
ext3_warning(sb, __func__,
"error reading index page in directory #%lu",
@@ -1141,7 +1165,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)

prev = to = de;
while ((char*)de < base + size) {
- next = ext3_next_entry(de);
+ next = __ext3_next_entry(de);
if (de->inode && de->name_len) {
rec_len = EXT3_DIR_REC_LEN(de->name_len);
if (de > to)
@@ -1172,9 +1196,22 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
unsigned split, move, size, i;
- struct ext3_dir_entry_2 *de = NULL, *de2;
+ struct ext3_dir_entry_2 *de, *de2;
int err = 0;

+ /* Verify directory entries are sane before trying anything else. */
+ de = (struct ext3_dir_entry_2 *) data1;
+ de2 = (struct ext3_dir_entry_2 *) data1 +
+ dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+ while (de < de2) {
+ de = ext3_next_entry("do_split", dir, de, *bh, 0);
+ if (de == NULL) {
+ brelse(*bh);
+ *bh = NULL;
+ goto errout;
+ }
+ }
+
bh2 = ext3_append (handle, dir, &newblock, &err);
if (!(bh2)) {
brelse(*bh);
@@ -1397,8 +1434,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
memcpy (data1, de, len);
de = (struct ext3_dir_entry_2 *) data1;
top = data1 + len;
- while ((char *)(de2 = ext3_next_entry(de)) < top)
+
+ while (1) {
+ de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
+ if (de2 == NULL || (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) de2 >= top) {
+ break;
+ }
de = de2;
+ }
+
de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
/* Initialize the root; the dot dirents already exist */
de = (struct ext3_dir_entry_2 *) (&root->dotdot);
@@ -1655,7 +1699,7 @@ static int ext3_delete_entry (handle_t *handle,
}
i += ext3_rec_len_from_disk(de->rec_len);
pde = de;
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return -ENOENT;
}
@@ -1792,7 +1836,7 @@ retry:
de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
de->inode = cpu_to_le32(dir->i_ino);
de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(1));
@@ -1825,7 +1869,7 @@ out_stop:
/*
* routine to check that the specified directory is empty (for rmdir)
*/
-static int empty_dir (struct inode * inode)
+static int empty_dir(struct inode *dir, struct inode *inode)
{
unsigned long offset;
struct buffer_head * bh;
@@ -1846,8 +1890,14 @@ static int empty_dir (struct inode * inode)
inode->i_ino);
return 1;
}
+
de = (struct ext3_dir_entry_2 *) bh->b_data;
- de1 = ext3_next_entry(de);
+ de1 = ext3_next_entry("empty_dir", dir, de, bh, 0);
+ if (de1 == NULL) {
+ brelse(bh);
+ return 1;
+ }
+
if (le32_to_cpu(de->inode) != inode->i_ino ||
!le32_to_cpu(de1->inode) ||
strcmp (".", de->name) ||
@@ -1860,8 +1910,8 @@ static int empty_dir (struct inode * inode)
}
offset = ext3_rec_len_from_disk(de->rec_len) +
ext3_rec_len_from_disk(de1->rec_len);
- de = ext3_next_entry(de1);
- while (offset < inode->i_size ) {
+ de = ext3_next_entry("empty_dir", dir, de1, bh, offset);
+ while (de && offset < inode->i_size) {
if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
err = 0;
@@ -1890,7 +1940,7 @@ static int empty_dir (struct inode * inode)
return 0;
}
offset += ext3_rec_len_from_disk(de->rec_len);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
brelse (bh);
return 1;
@@ -2068,7 +2121,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
goto end_rmdir;

retval = -ENOTEMPTY;
- if (!empty_dir (inode))
+ if (!empty_dir(dir, inode))
goto end_rmdir;

retval = ext3_delete_entry(handle, dir, de, bh);
@@ -2244,7 +2297,7 @@ retry:
}

#define PARENT_INO(buffer) \
- (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
+ (__ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)

/*
* Anybody can rename anything with this: the permission checks are left to the
@@ -2297,7 +2350,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
if (S_ISDIR(old_inode->i_mode)) {
if (new_inode) {
retval = -ENOTEMPTY;
- if (!empty_dir (new_inode))
+ if (!empty_dir(new_dir, new_inode))
goto end_rename;
}
retval = -EIO;

2008-06-21 16:13:56

by Jochen Voß

[permalink] [raw]
Subject: Re: [PATCH, v2] ext3: validate directory entry data before use

Hi Duane,

2008/6/21 Duane Griffin <[email protected]>:
> @@ -1397,8 +1434,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
> memcpy (data1, de, len);
> de = (struct ext3_dir_entry_2 *) data1;
> top = data1 + len;
> - while ((char *)(de2 = ext3_next_entry(de)) < top)
> +
> + while (1) {
> + de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
> + if (de2 == NULL || (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) de2 >= top) {

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This looks very strange!

> + break;
> + }
> de = de2;
> + }
> +
> de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
> /* Initialize the root; the dot dirents already exist */
> de = (struct ext3_dir_entry_2 *) (&root->dotdot);

All the best,
Jochen
--
http://seehuhn.de/

2008-06-21 16:31:44

by Duane Griffin

[permalink] [raw]
Subject: Re: [PATCH, v2] ext3: validate directory entry data before use

2008/6/21 Jochen Vo? <[email protected]>:
> Hi Duane,
>
> 2008/6/21 Duane Griffin <[email protected]>:
>> @@ -1397,8 +1434,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
>> memcpy (data1, de, len);
>> de = (struct ext3_dir_entry_2 *) data1;
>> top = data1 + len;
>> - while ((char *)(de2 = ext3_next_entry(de)) < top)
>> +
>> + while (1) {
>> + de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
>> + if (de2 == NULL || (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) de2 >= top) {
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> This looks very strange!

Aargh, I just can't seem to get this patch out cleanly! That looks
like a vi typo, thanks for catching it so quickly. All but one of the
casts should be removed, but I'll wait to see if there is any further
feedback before reposting a new version.

> All the best,
> Jochen

Cheers,
Duane.

--
"I never could learn to drink that blood and call it wine" - Bob Dylan

2008-06-23 21:56:43

by Duane Griffin

[permalink] [raw]
Subject: [PATCH] ext3: handle corrupted orphan list at mount

If the orphan node list includes valid, untruncatable nodes with nlink > 0
the ext3_orphan_cleanup loop which attempts to delete them will not do so,
causing it to loop forever. Fix by checking for such nodes in the
ext3_orphan_get function.

This patch fixes the second case (image hdb.20000009.softlockup.gz)
reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.

Signed-off-by: Duane Griffin <[email protected]>
--

Note that we can still end up in an infinite loop if the ext3_truncate
errors out early (and keeps doing so). I'm not sure if we should worry
about that. If we did want to handle it then we could change ext3_truncate
to return success/failure and exit the ext3_orphan_cleanup.

---

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 7712682..bc030f4 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
if (IS_ERR(inode))
goto iget_failed;

+ /*
+ * If the orphans has i_nlinks > 0 then it should be able to be
+ * truncated, otherwise it won't be removed from the orphan list
+ * during processing and an infinite loop will result.
+ */
+ if (inode->i_nlink && !ext3_can_truncate(inode))
+ goto bad_orphan;
+
if (NEXT_ORPHAN(inode) > max_ino)
goto bad_orphan;
brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
NEXT_ORPHAN(inode));
printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+ printk(KERN_NOTICE "i_nlink=%lu\n", inode->i_nlink);
/* Avoid freeing blocks if we got a bad deleted inode */
if (inode->i_nlink == 0)
inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf..7b7e234 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2253,6 +2253,14 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
}
}

+int ext3_can_truncate(struct inode *inode)
+{
+ return (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)) &&
+ !ext3_inode_is_fast_symlink(inode) &&
+ !(IS_APPEND(inode) || IS_IMMUTABLE(inode));
+}
+
/*
* ext3_truncate()
*
@@ -2297,12 +2305,7 @@ void ext3_truncate(struct inode *inode)
unsigned blocksize = inode->i_sb->s_blocksize;
struct page *page;

- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)))
- return;
- if (ext3_inode_is_fast_symlink(inode))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ if (!ext3_can_truncate(inode))
return;

/*
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 36c5403..80171ee 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -832,6 +832,7 @@ extern void ext3_discard_reservation (struct inode *);
extern void ext3_dirty_inode(struct inode *);
extern int ext3_change_inode_journal_flag(struct inode *, int);
extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
+extern int ext3_can_truncate(struct inode *inode);
extern void ext3_truncate (struct inode *);
extern void ext3_set_inode_flags(struct inode *);
extern void ext3_get_inode_flags(struct ext3_inode_info *);

2008-06-24 00:09:19

by Duane Griffin

[permalink] [raw]
Subject: Re: [PATCH] ext3: handle corrupted orphan list at mount

2008/6/23 Sami Liedes <[email protected]>:
> You people working hard to fix bugs and implement great filesystems
> almost makes me feel bad for trying to break your code :)

Not at all, it gives me something productive to do with my time! ;)

> Here I get (on x86 gcc 4.3.1):
>
> fs/ext3/ialloc.c: In function 'ext3_orphan_get':
> fs/ext3/ialloc.c:701: warning: format '%lu' expects type 'long unsigned int', but argument 2 has type 'unsigned int'
>
> So it probably should be %u or something.

Thanks, fixed for the next version.

Cheers,
Duane.

--
"I never could learn to drink that blood and call it wine" - Bob Dylan

2008-06-24 00:13:52

by Sami Liedes

[permalink] [raw]
Subject: Re: [PATCH] ext3: handle corrupted orphan list at mount

On Mon, Jun 23, 2008 at 10:56:40PM +0100, Duane Griffin wrote:
> If the orphan node list includes valid, untruncatable nodes with nlink > 0
> the ext3_orphan_cleanup loop which attempts to delete them will not do so,
> causing it to loop forever. Fix by checking for such nodes in the
> ext3_orphan_get function.

You people working hard to fix bugs and implement great filesystems
almost makes me feel bad for trying to break your code :)

> diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
> index 7712682..bc030f4 100644
> --- a/fs/ext3/ialloc.c
> +++ b/fs/ext3/ialloc.c
[...]
> @@ -690,6 +698,7 @@ bad_orphan:
> printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
> NEXT_ORPHAN(inode));
> printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
> + printk(KERN_NOTICE "i_nlink=%lu\n", inode->i_nlink);
^^^

Here I get (on x86 gcc 4.3.1):

fs/ext3/ialloc.c: In function 'ext3_orphan_get':
fs/ext3/ialloc.c:701: warning: format '%lu' expects type 'long unsigned int', but argument 2 has type 'unsigned int'

So it probably should be %u or something.

Sami

2008-06-24 06:36:11

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH] ext3: validate directory entry data before use

On Jun 21, 2008 02:54 +0100, Duane Griffin wrote:
> Various places in fs/ext3/namei.c use ext3_next_entry to loop over
> directory entries, but not all of them verify that entries are valid before
> attempting to move to the next one. In the case where rec_len == 0 this
> causes an infinite loop.
>
> Introduce a new version of ext3_next_entry that checks the validity of the
> entry before using its data to find the next one. Rename the original
> function to __ext3_next_entry and use it in places where we already know
> the data is valid.
>
> Note that the changes to empty_dir follow the original code in reporting
> the directory as empty in the presence of errors.
>
> This patch fixes the first case (image hdb.25.softlockup.gz) reported in
> http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Signed-off-by: Duane Griffin <[email protected]>
> --
>
> Please note that I've only properly tested the originally reported failure
> case (i.e. the changes to ext3_dx_find_entry).
>
> Reviewers may want to pay particular attention to the changes to do_split,
> make_indexed_dir and empty_dir. I've tried to follow the original code's
> error handling conventions, as noted above for empty_dir, but I'm not sure
> this is the best way to do things.

Just as a note, and not to detract from the validity of this patch -
in the ext2 page-based directory code is somewhat more efficient in this
area. It checks each page a single time when it is first read from disk,
marks the page as checked, and then never has to check the page again.

This wasn't implemented for ext3 because it never changed from buffer-
based directories to page-based directories due to the dependence on
buffer heads for the journaling code.

It would be possible to implement this for ext3/ext4 readdir/lookup by
replacing use of ext3_bread() with a new ext3_bread_dir() - a copy of
ext3_bread() that validates the buffer if it actually needs ll_rw_block()
to read the buffer from disk.

I also note in ext3_readdir() for non-indexed directories that the readahead
doesn't check for !buffer_uptodate(bh) before forcing a read of the next
block.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


2008-06-24 13:47:23

by Duane Griffin

[permalink] [raw]
Subject: [PATCH] ext3: handle deleting corrupted indirect blocks

While freeing indirect blocks we attach a journal head to the parent buffer
head, free the blocks, then journal the parent. If the indirect block list
is corrupted and points to the parent the journal head will be detached
when the block is cleared, causing an OOPS.

Check for that explicitly and handle it gracefully.

This patch fixes the third case (image hdb.20000057.nullderef.gz)
reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.

Signed-off-by: Duane Griffin <[email protected]>
---

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf..8019bf2 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2127,7 +2127,20 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,

if (this_bh) {
BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, this_bh);
+
+ /*
+ * The buffer head should have an attached journal head at this
+ * point. However, if the data is corrupted and an indirect
+ * block pointed to itself, it would have been detached when
+ * the block was cleared. Check for this instead of OOPSing.
+ */
+ if (bh2jh(this_bh))
+ ext3_journal_dirty_metadata(handle, this_bh);
+ else
+ ext3_error(inode->i_sb, "ext3_free_data",
+ "circular indirect block detected, "
+ "inode=%lu, block="E3FSBLK,
+ inode->i_ino, this_bh->b_blocknr);
}
}


2008-06-24 13:58:49

by Eric Sandeen

[permalink] [raw]
Subject: Re: [PATCH] ext3: handle deleting corrupted indirect blocks

Duane Griffin wrote:
> While freeing indirect blocks we attach a journal head to the parent buffer
> head, free the blocks, then journal the parent. If the indirect block list
> is corrupted and points to the parent the journal head will be detached
> when the block is cleared, causing an OOPS.
>
> Check for that explicitly and handle it gracefully.
>
> This patch fixes the third case (image hdb.20000057.nullderef.gz)
> reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Signed-off-by: Duane Griffin <[email protected]>
> ---
>
> diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
> index 6ae4ecf..8019bf2 100644
> --- a/fs/ext3/inode.c
> +++ b/fs/ext3/inode.c
> @@ -2127,7 +2127,20 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
>
> if (this_bh) {
> BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
> - ext3_journal_dirty_metadata(handle, this_bh);
> +
> + /*
> + * The buffer head should have an attached journal head at this
> + * point. However, if the data is corrupted and an indirect
> + * block pointed to itself, it would have been detached when
> + * the block was cleared. Check for this instead of OOPSing.
> + */
> + if (bh2jh(this_bh))

Should it also (or only) be checking for buffer_jbd rather than testing
bh2jh which is just shorthand for "is b_private non-null?"

Also maybe I should know this intuitively by now, but what is the path
where b_private / BH_JBD got cleared on this corrupted image?

Thanks,
-Eric

> + ext3_journal_dirty_metadata(handle, this_bh);
> + else
> + ext3_error(inode->i_sb, "ext3_free_data",
> + "circular indirect block detected, "
> + "inode=%lu, block="E3FSBLK,
> + inode->i_ino, this_bh->b_blocknr);
> }
> }
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html


2008-06-24 14:17:04

by Duane Griffin

[permalink] [raw]
Subject: Re: [PATCH] ext3: handle deleting corrupted indirect blocks

2008/6/24 Eric Sandeen <[email protected]>:
>> diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
>> index 6ae4ecf..8019bf2 100644
>> --- a/fs/ext3/inode.c
>> +++ b/fs/ext3/inode.c
>> @@ -2127,7 +2127,20 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
>>
>> if (this_bh) {
>> BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
>> - ext3_journal_dirty_metadata(handle, this_bh);
>> +
>> + /*
>> + * The buffer head should have an attached journal head at this
>> + * point. However, if the data is corrupted and an indirect
>> + * block pointed to itself, it would have been detached when
>> + * the block was cleared. Check for this instead of OOPSing.
>> + */
>> + if (bh2jh(this_bh))
>
> Should it also (or only) be checking for buffer_jbd rather than testing
> bh2jh which is just shorthand for "is b_private non-null?"

I don't think so. The bug was occurring because journal_dirty_metadata
dereferences b_private (via bh2jh) unconditionally. In practice
checking with buffer_jbd should be equivalent, but this way seems more
robust since it is checking the actual pointer being accessed rather
than a separate bit, albeit one that should be in sync with it.

> Also maybe I should know this intuitively by now, but what is the path
> where b_private / BH_JBD got cleared on this corrupted image?

Immediately above the change, in the ext3_free_data function, we call
ext3_clear_blocks to clear the indirect blocks in this parent block.
If one of those blocks happens to actually be the parent block it will
clear b_private / BH_JBD.

I did the check at the end rather than earlier as it seemed more
elegant. I don't think there should be much practical difference,
although it is possible the FS may not be quite so badly corrupted if
we did it the other way (and didn't clear the block at all). To be
honest, I'm not convinced there aren't other similar failure modes
lurking in this code, although I couldn't find any with a quick
review.

Just let me know if you think it should be done another way.

Cheers,
Duane.

--
"I never could learn to drink that blood and call it wine" - Bob Dylan

2008-06-24 16:08:12

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH] ext3: handle corrupted orphan list at mount

> If the orphan node list includes valid, untruncatable nodes with nlink > 0
> the ext3_orphan_cleanup loop which attempts to delete them will not do so,
> causing it to loop forever. Fix by checking for such nodes in the
> ext3_orphan_get function.
>
> This patch fixes the second case (image hdb.20000009.softlockup.gz)
> reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Signed-off-by: Duane Griffin <[email protected]>
> --
>
> Note that we can still end up in an infinite loop if the ext3_truncate
> errors out early (and keeps doing so). I'm not sure if we should worry
> about that. If we did want to handle it then we could change ext3_truncate
> to return success/failure and exit the ext3_orphan_cleanup.
>
> ---
>
> diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
> index 7712682..bc030f4 100644
> --- a/fs/ext3/ialloc.c
> +++ b/fs/ext3/ialloc.c
> @@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
> if (IS_ERR(inode))
> goto iget_failed;
>
> + /*
> + * If the orphans has i_nlinks > 0 then it should be able to be
> + * truncated, otherwise it won't be removed from the orphan list
> + * during processing and an infinite loop will result.
> + */
> + if (inode->i_nlink && !ext3_can_truncate(inode))
> + goto bad_orphan;
> +
Maybe I miss something but shouldn't above rather be ||?

Honza

> if (NEXT_ORPHAN(inode) > max_ino)
> goto bad_orphan;
> brelse(bitmap_bh);
> @@ -690,6 +698,7 @@ bad_orphan:
> printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
> NEXT_ORPHAN(inode));
> printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
> + printk(KERN_NOTICE "i_nlink=%lu\n", inode->i_nlink);
> /* Avoid freeing blocks if we got a bad deleted inode */
> if (inode->i_nlink == 0)
> inode->i_blocks = 0;
> diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
> index 6ae4ecf..7b7e234 100644
> --- a/fs/ext3/inode.c
> +++ b/fs/ext3/inode.c
> @@ -2253,6 +2253,14 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
> }
> }
>
> +int ext3_can_truncate(struct inode *inode)
> +{
> + return (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
> + S_ISLNK(inode->i_mode)) &&
> + !ext3_inode_is_fast_symlink(inode) &&
> + !(IS_APPEND(inode) || IS_IMMUTABLE(inode));
> +}
> +
> /*
> * ext3_truncate()
> *
> @@ -2297,12 +2305,7 @@ void ext3_truncate(struct inode *inode)
> unsigned blocksize = inode->i_sb->s_blocksize;
> struct page *page;
>
> - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
> - S_ISLNK(inode->i_mode)))
> - return;
> - if (ext3_inode_is_fast_symlink(inode))
> - return;
> - if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
> + if (!ext3_can_truncate(inode))
> return;
>
> /*
> diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
> index 36c5403..80171ee 100644
> --- a/include/linux/ext3_fs.h
> +++ b/include/linux/ext3_fs.h
> @@ -832,6 +832,7 @@ extern void ext3_discard_reservation (struct inode *);
> extern void ext3_dirty_inode(struct inode *);
> extern int ext3_change_inode_journal_flag(struct inode *, int);
> extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
> +extern int ext3_can_truncate(struct inode *inode);
> extern void ext3_truncate (struct inode *);
> extern void ext3_set_inode_flags(struct inode *);
> extern void ext3_get_inode_flags(struct ext3_inode_info *);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
Jan Kara <[email protected]>
SuSE CR Labs

2008-06-24 17:16:23

by Duane Griffin

[permalink] [raw]
Subject: Re: [PATCH] ext3: handle corrupted orphan list at mount

2008/6/24 Jan Kara <[email protected]>:
>> + /*
>> + * If the orphans has i_nlinks > 0 then it should be able to be
>> + * truncated, otherwise it won't be removed from the orphan list
>> + * during processing and an infinite loop will result.
>> + */
>> + if (inode->i_nlink && !ext3_can_truncate(inode))
>> + goto bad_orphan;
>> +
> Maybe I miss something but shouldn't above rather be ||?

No, it is correct. If i_nlink == 0 the orphan will be deleted in the
cleanup loop by the iput. If i_nlink > 0 then ext3_truncate is called,
which usually calls ext3_orphan_del on the way out, thereby removing
the node from the orphan list. However, if it exits too early
(basically if the ext3_can_truncate check fails, although there are
other failure conditions such as OOM that can also cause it to exit
early) then it doesn't, hence we end up in the infinite loop. So the
check here says, if this node is not going to be deleted or truncated
then it is invalid.

Cheers,
Duane.

--
"I never could learn to drink that blood and call it wine" - Bob Dylan

2008-06-24 17:18:58

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH] ext3: handle corrupted orphan list at mount

On Tue 24-06-08 18:16:21, Duane Griffin wrote:
> 2008/6/24 Jan Kara <[email protected]>:
> >> + /*
> >> + * If the orphans has i_nlinks > 0 then it should be able to be
> >> + * truncated, otherwise it won't be removed from the orphan list
> >> + * during processing and an infinite loop will result.
> >> + */
> >> + if (inode->i_nlink && !ext3_can_truncate(inode))
> >> + goto bad_orphan;
> >> +
> > Maybe I miss something but shouldn't above rather be ||?
>
> No, it is correct. If i_nlink == 0 the orphan will be deleted in the
> cleanup loop by the iput. If i_nlink > 0 then ext3_truncate is called,
> which usually calls ext3_orphan_del on the way out, thereby removing
> the node from the orphan list. However, if it exits too early
> (basically if the ext3_can_truncate check fails, although there are
> other failure conditions such as OOM that can also cause it to exit
> early) then it doesn't, hence we end up in the infinite loop. So the
> check here says, if this node is not going to be deleted or truncated
> then it is invalid.
Ah, OK, I forgot that you want to handle also truncation without deletion
of the inode itself. Thanks for explanation.

Honza

--
Jan Kara <[email protected]>
SUSE Labs, CR

2008-06-24 21:06:47

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] ext3: handle deleting corrupted indirect blocks

On Tue, 24 Jun 2008 14:47:20 +0100
"Duane Griffin" <[email protected]> wrote:

> While freeing indirect blocks we attach a journal head to the parent buffer
> head, free the blocks, then journal the parent. If the indirect block list
> is corrupted and points to the parent the journal head will be detached
> when the block is cleared, causing an OOPS.
>
> Check for that explicitly and handle it gracefully.
>
> This patch fixes the third case (image hdb.20000057.nullderef.gz)
> reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.

Thanks.

Quite a few minorish ext3 fixes are coming in lately. Is anyone
checking whether they are needed in ext4 and if so, porting them
over?

-mm's current queue is:

ext3-fix-synchronization-of-quota-files-in-journal=data-mode.patch
ext3-fix-typos-in-messages-and-comments-journalled-journaled.patch
ext3-correct-mount-option-parsing-to-detect-when-quota-options-can-be-changed.patch
jbd-replace-potentially-false-assertion-with-if-block.patch
jbd-eliminate-duplicated-code-in-revocation-table-init-destroy-functions.patch
jbd-tidy-up-revoke-cache-initialisation-and-destruction.patch
ext3-improve-some-code-in-rb-tree-part-of-dirc.patch
jbd-fix-race-between-free-buffer-and-commit-trasanction.patch
jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes.patch
jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes-fix.patch
ext3-remove-double-definitions-of-xattr-macros.patch
ext3-handle-corrupted-orphan-list-at-mount.patch
ext3-handle-corrupted-orphan-list-at-mount-cleanup.patch
ext3-handle-corrupted-orphan-list-at-mount-fix.patch
ext3-handle-corrupted-orphan-list-at-mount-cleanup-fix.patch
ext3-dont-read-inode-block-if-the-buffer-has-a-write-error.patch
ext3-handle-deleting-corrupted-indirect-blocks.patch


2008-06-25 00:12:58

by Mingming Cao

[permalink] [raw]
Subject: Re: [PATCH] ext3: handle deleting corrupted indirect blocks


On Tue, 2008-06-24 at 14:05 -0700, Andrew Morton wrote:
> On Tue, 24 Jun 2008 14:47:20 +0100
> "Duane Griffin" <[email protected]> wrote:
>
> > While freeing indirect blocks we attach a journal head to the parent buffer
> > head, free the blocks, then journal the parent. If the indirect block list
> > is corrupted and points to the parent the journal head will be detached
> > when the block is cleared, causing an OOPS.
> >
> > Check for that explicitly and handle it gracefully.
> >
> > This patch fixes the third case (image hdb.20000057.nullderef.gz)
> > reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Thanks.
>
> Quite a few minorish ext3 fixes are coming in lately. Is anyone
> checking whether they are needed in ext4 and if so, porting them
> over?
>
Hi Andrew, thanks for the reminder, I checked and think there are a few
latest ext3 fixes need to port to ext4..


> -mm's current queue is:
>
> ext3-fix-synchronization-of-quota-files-in-journal=data-mode.patch
> ext3-fix-typos-in-messages-and-comments-journalled-journaled.patch
> ext3-correct-mount-option-parsing-to-detect-when-quota-options-can-be-changed.patch

These three (ext4 version) have been pushed to Linus tree(May 14th)

> jbd-replace-potentially-false-assertion-with-if-block.patch

Ext4 has this fix in upstream already

> jbd-eliminate-duplicated-code-in-revocation-table-init-destroy-functions.patch
> jbd-tidy-up-revoke-cache-initialisation-and-destruction.patch

Pushed to Linus already

> ext3-improve-some-code-in-rb-tree-part-of-dirc.patch

Ext4 version is queued in ext4 patch queue

> jbd-fix-race-between-free-buffer-and-commit-trasanction.patch
> jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes.patch
> jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes-fix.patch
jbd2 version is not needed with new ordered mode rewrite

> ext3-remove-double-definitions-of-xattr-macros.patch
ext4 version is in ext4 patch queue

> ext3-handle-corrupted-orphan-list-at-mount.patch
> ext3-handle-corrupted-orphan-list-at-mount-cleanup.patch
> ext3-handle-corrupted-orphan-list-at-mount-fix.patch
> ext3-handle-corrupted-orphan-list-at-mount-cleanup-fix.patch
> ext3-dont-read-inode-block-if-the-buffer-has-a-write-error.patch
> ext3-handle-deleting-corrupted-indirect-blocks.patch
>

These haven't port to ext4 yet. I could port them to ext4 if no one
wants to do so.

Mingming
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html


2008-06-25 00:15:11

by Duane Griffin

[permalink] [raw]
Subject: Re: [PATCH] ext3: handle deleting corrupted indirect blocks

2008/6/24 Andrew Morton <[email protected]>:
> Quite a few minorish ext3 fixes are coming in lately. Is anyone
> checking whether they are needed in ext4 and if so, porting them
> over?

I was planning on doing my ones, once they'd gone through review and
been accepted into your tree. I'd be happy to do others too, if
needed.

> -mm's current queue is:
>
> ext3-fix-synchronization-of-quota-files-in-journal=data-mode.patch
> ext3-fix-typos-in-messages-and-comments-journalled-journaled.patch
> ext3-correct-mount-option-parsing-to-detect-when-quota-options-can-be-changed.patch

> jbd-replace-potentially-false-assertion-with-if-block.patch
> jbd-eliminate-duplicated-code-in-revocation-table-init-destroy-functions.patch
> jbd-tidy-up-revoke-cache-initialisation-and-destruction.patch

These three are mine and should have ext4 patches queued or upstream already.

> ext3-improve-some-code-in-rb-tree-part-of-dirc.patch
> jbd-fix-race-between-free-buffer-and-commit-trasanction.patch
> jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes.patch
> jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes-fix.patch
> ext3-remove-double-definitions-of-xattr-macros.patch

> ext3-handle-corrupted-orphan-list-at-mount.patch
> ext3-handle-corrupted-orphan-list-at-mount-cleanup.patch
> ext3-handle-corrupted-orphan-list-at-mount-fix.patch
> ext3-handle-corrupted-orphan-list-at-mount-cleanup-fix.patch

These four are new ones from me. They don't have ext4 versions yet but
I'll prepare them soon.

> ext3-dont-read-inode-block-if-the-buffer-has-a-write-error.patch

> ext3-handle-deleting-corrupted-indirect-blocks.patch

Ditto this one.

Cheers,
Duane.

--
"I never could learn to drink that blood and call it wine" - Bob Dylan

2008-06-25 10:08:36

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH, v2] ext3: validate directory entry data before use

Hi,

> Various places in fs/ext3/namei.c use ext3_next_entry to loop over
> directory entries, but not all of them verify that entries are valid before
> attempting to move to the next one. In the case where rec_len == 0 this
> causes an infinite loop.
>
> Introduce a new version of ext3_next_entry that checks the validity of the
> entry before using its data to find the next one. Rename the original
> function to __ext3_next_entry and use it in places where we already know
> the data is valid.
>
> Note that the changes to empty_dir follow the original code in reporting
> the directory as empty in the presence of errors.
>
> This patch fixes the first case (image hdb.25.softlockup.gz) reported in
> http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Signed-off-by: Duane Griffin <[email protected]>
> --
>
> This is version 2. The original patch was an earlier version causing
> warnings that I sent by mistake. This version just fixes those.
>
> Please note that I've only properly tested the originally reported failure
> case (i.e. the changes to ext3_dx_find_entry).
>
> Reviewers may want to pay particular attention to the changes to do_split,
> make_indexed_dir and empty_dir. I've tried to follow the original code's
> error handling conventions, as noted above for empty_dir, but I'm not sure
> this is the best way to do things.
The patch looks sane to me, only of few mostly coding style nits..

> ---
>
> diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
> index 0b8cf80..ea0236b 100644
> --- a/fs/ext3/namei.c
> +++ b/fs/ext3/namei.c
> @@ -185,13 +185,27 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
> * p is at least 6 bytes before the end of page
> */
> static inline struct ext3_dir_entry_2 *
> -ext3_next_entry(struct ext3_dir_entry_2 *p)
> +__ext3_next_entry(struct ext3_dir_entry_2 *p)
> {
> return (struct ext3_dir_entry_2 *)((char *)p +
> ext3_rec_len_from_disk(p->rec_len));
> }
>
> /*
> + * Checks the directory entry looks sane before using it to find the next one.
> + * Returns NULL and reports an error if an invalid entry is passed.
> + */
> +static inline struct ext3_dir_entry_2 *
> +ext3_next_entry(const char *func, struct inode *dir,
> + struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
> +{
> + if (ext3_check_dir_entry(func, dir, de, bh, offset))
> + return __ext3_next_entry(de);
> + else
> + return NULL;
> +}
Andrew might complain that the above function is too big to be
inlined. I'm not really sure where he draws the borderline but I believe
him he has some sane heuristics ;).

> +
> +/*
> * Future: use high four bits of block for coalesce-on-delete flags
> * Mask them off for now.
> */
> @@ -271,15 +285,17 @@ struct stats
> unsigned bcount;
> };
>
> -static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
> - int size, int show_names)
> +static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct inode *dir,
> + struct buffer_head *bh, int size,
> + int show_names)
> {
> + struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) bh->b_data;
> unsigned names = 0, space = 0;
> char *base = (char *) de;
> struct dx_hash_info h = *hinfo;
>
> printk("names: ");
> - while ((char *) de < base + size)
> + while (de && (char *) de < base + size)
> {
> if (de->inode)
> {
> @@ -295,7 +311,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
> space += EXT3_DIR_REC_LEN(de->name_len);
> names++;
> }
> - de = ext3_next_entry(de);
> + de = ext3_next_entry("dx_show_leaf", dir, de, bh, 0);
Why don't you use __func__?


> }
> printk("(%i)\n", names);
> return (struct stats) { names, space, 1 };
> @@ -319,7 +335,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
> if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
> stats = levels?
> dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
> - dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
> + dx_show_leaf(hinfo, dir, bh, blocksize, 0);
> names += stats.names;
> space += stats.space;
> bcount += stats.bcount;
> @@ -583,7 +599,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
> top = (struct ext3_dir_entry_2 *) ((char *) de +
> dir->i_sb->s_blocksize -
> EXT3_DIR_REC_LEN(0));
> - for (; de < top; de = ext3_next_entry(de)) {
> + for (; de < top; de = __ext3_next_entry(de)) {
> if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
Here should be __func__ as well - not your fault, I know... Anyway,
maybe you could do global replacement for this ;)

> (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
> +((char *)de - bh->b_data))) {
> @@ -658,7 +674,12 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
> }
> if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
> de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
> - de = ext3_next_entry(de);
> + de = ext3_next_entry("ext3_htree_fill_tree",
> + dir, de, frames[0].bh, 0);
> + if (de == NULL) {
> + err = -EIO;
> + goto errout;
> + }
> if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
> goto errout;
> count++;
> @@ -726,8 +747,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
> count++;
> cond_resched();
> }
> - /* XXX: do we need to check rec_len == 0 case? -Chris */
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> return count;
> }
> @@ -991,24 +1011,28 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
> EXT3_DIR_REC_LEN(0));
> - for (; de < top; de = ext3_next_entry(de))
> - if (ext3_match (namelen, name, de)) {
> + for (; de < top; de = __ext3_next_entry(de)) {
> + int offset = (block << EXT3_BLOCK_SIZE_BITS(sb))
> + + ((char *) de - bh->b_data);
> +
> if (!ext3_check_dir_entry("ext3_find_entry",
> - dir, de, bh,
> - (block<<EXT3_BLOCK_SIZE_BITS(sb))
> - +((char *)de - bh->b_data))) {
> - brelse (bh);
> + dir, de, bh, offset)) {
> + brelse(bh);
> *err = ERR_BAD_DX_DIR;
> goto errout;
> }
> - *res_dir = de;
> - dx_release (frames);
> - return bh;
> +
> + if (ext3_match(namelen, name, de)) {
> + *res_dir = de;
> + dx_release(frames);
> + return bh;
> + }
> }
> brelse (bh);
> +
> /* Check to see if we should continue to search */
> - retval = ext3_htree_next_block(dir, hash, frame,
> - frames, NULL);
> + retval = ext3_htree_next_block(dir, hash, frame, frames, NULL);
> +
> if (retval < 0) {
> ext3_warning(sb, __func__,
> "error reading index page in directory #%lu",
> @@ -1141,7 +1165,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
>
> prev = to = de;
> while ((char*)de < base + size) {
> - next = ext3_next_entry(de);
> + next = __ext3_next_entry(de);
> if (de->inode && de->name_len) {
> rec_len = EXT3_DIR_REC_LEN(de->name_len);
> if (de > to)
> @@ -1172,9 +1196,22 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
> struct dx_map_entry *map;
> char *data1 = (*bh)->b_data, *data2;
> unsigned split, move, size, i;
> - struct ext3_dir_entry_2 *de = NULL, *de2;
> + struct ext3_dir_entry_2 *de, *de2;
> int err = 0;
>
> + /* Verify directory entries are sane before trying anything else. */
> + de = (struct ext3_dir_entry_2 *) data1;
> + de2 = (struct ext3_dir_entry_2 *) data1 +
> + dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
> + while (de < de2) {
> + de = ext3_next_entry("do_split", dir, de, *bh, 0);
> + if (de == NULL) {
> + brelse(*bh);
> + *bh = NULL;
> + goto errout;
> + }
> + }
> +
> bh2 = ext3_append (handle, dir, &newblock, &err);
> if (!(bh2)) {
> brelse(*bh);
> @@ -1397,8 +1434,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
> memcpy (data1, de, len);
> de = (struct ext3_dir_entry_2 *) data1;
> top = data1 + len;
> - while ((char *)(de2 = ext3_next_entry(de)) < top)
> +
> + while (1) {
> + de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
> + if (de2 == NULL || (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) de2 >= top) {
> + break;
> + }
Apart from (char *) thing, you also don't need braces above. Maybe the
whole while loop would be nicer as:
de2 = de;
while (de2 != NULL && de2 < top) {
de = de2;
de2 = ext3_next_entry(__func__, dir, de, bh, 0);
}

> de = de2;
> + }
> +
> de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
> /* Initialize the root; the dot dirents already exist */
> de = (struct ext3_dir_entry_2 *) (&root->dotdot);
> @@ -1655,7 +1699,7 @@ static int ext3_delete_entry (handle_t *handle,
> }
> i += ext3_rec_len_from_disk(de->rec_len);
> pde = de;
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> return -ENOENT;
> }
> @@ -1792,7 +1836,7 @@ retry:
> de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
> strcpy (de->name, ".");
> ext3_set_de_type(dir->i_sb, de, S_IFDIR);
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> de->inode = cpu_to_le32(dir->i_ino);
> de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
> EXT3_DIR_REC_LEN(1));
> @@ -1825,7 +1869,7 @@ out_stop:
> /*
> * routine to check that the specified directory is empty (for rmdir)
> */
> -static int empty_dir (struct inode * inode)
> +static int empty_dir(struct inode *dir, struct inode *inode)
> {
> unsigned long offset;
> struct buffer_head * bh;
> @@ -1846,8 +1890,14 @@ static int empty_dir (struct inode * inode)
> inode->i_ino);
> return 1;
> }
> +
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> - de1 = ext3_next_entry(de);
> + de1 = ext3_next_entry("empty_dir", dir, de, bh, 0);
> + if (de1 == NULL) {
> + brelse(bh);
> + return 1;
> + }
> +
> if (le32_to_cpu(de->inode) != inode->i_ino ||
> !le32_to_cpu(de1->inode) ||
> strcmp (".", de->name) ||
> @@ -1860,8 +1910,8 @@ static int empty_dir (struct inode * inode)
> }
> offset = ext3_rec_len_from_disk(de->rec_len) +
> ext3_rec_len_from_disk(de1->rec_len);
> - de = ext3_next_entry(de1);
> - while (offset < inode->i_size ) {
> + de = ext3_next_entry("empty_dir", dir, de1, bh, offset);
> + while (de && offset < inode->i_size) {
> if (!bh ||
> (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
> err = 0;
> @@ -1890,7 +1940,7 @@ static int empty_dir (struct inode * inode)
> return 0;
> }
> offset += ext3_rec_len_from_disk(de->rec_len);
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> brelse (bh);
> return 1;
> @@ -2068,7 +2121,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
> goto end_rmdir;
>
> retval = -ENOTEMPTY;
> - if (!empty_dir (inode))
> + if (!empty_dir(dir, inode))
> goto end_rmdir;
>
> retval = ext3_delete_entry(handle, dir, de, bh);
> @@ -2244,7 +2297,7 @@ retry:
> }
>
> #define PARENT_INO(buffer) \
> - (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
> + (__ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
>
> /*
> * Anybody can rename anything with this: the permission checks are left to the
> @@ -2297,7 +2350,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
> if (S_ISDIR(old_inode->i_mode)) {
> if (new_inode) {
> retval = -ENOTEMPTY;
> - if (!empty_dir (new_inode))
> + if (!empty_dir(new_dir, new_inode))
> goto end_rename;
> }
> retval = -EIO;

Honza
--
Jan Kara <[email protected]>
SuSE CR Labs

2008-06-25 11:30:13

by Duane Griffin

[permalink] [raw]
Subject: Re: [PATCH, v2] ext3: validate directory entry data before use

On Wed, Jun 25, 2008 at 12:08:35PM +0200, Jan Kara wrote:
> The patch looks sane to me, only of few mostly coding style nits..

Thanks for the review!

> > +static inline struct ext3_dir_entry_2 *
> > +ext3_next_entry(const char *func, struct inode *dir,
> > + struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
> > +{
> > + if (ext3_check_dir_entry(func, dir, de, bh, offset))
> > + return __ext3_next_entry(de);
> > + else
> > + return NULL;
> > +}
> Andrew might complain that the above function is too big to be
> inlined. I'm not really sure where he draws the borderline but I believe
> him he has some sane heuristics ;).

Oh, I'd certainly believe him! ;) Say the word and I'll change it.

> > - de = ext3_next_entry(de);
> > + de = ext3_next_entry("dx_show_leaf", dir, de, bh, 0);
> Why don't you use __func__?

Good question. Fixed.

> > - for (; de < top; de = ext3_next_entry(de)) {
> > + for (; de < top; de = __ext3_next_entry(de)) {
> > if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
> Here should be __func__ as well - not your fault, I know... Anyway,
> maybe you could do global replacement for this ;)

Done, fixing a couple of incorrect strings along the way, thereby
proving the usefulness of the exercise. A macro would make things
slightly tidier, but I'm not sure it is worth doing. Let me know if you
think so and I'll add it.

> > + while (1) {
> > + de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
> > + if (de2 == NULL || (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) de2 >= top) {
> > + break;
> > + }
> Apart from (char *) thing, you also don't need braces above. Maybe the
> whole while loop would be nicer as:
> de2 = de;
> while (de2 != NULL && de2 < top) {
> de = de2;
> de2 = ext3_next_entry(__func__, dir, de, bh, 0);
> }

Agreed, much nicer. Updated accordingly.

> Jan Kara <[email protected]>
> SuSE CR Labs

Cheers,
Duane.

--
"I never could learn to drink that blood and call it wine" - Bob Dylan

2008-06-25 12:11:48

by Duane Griffin

[permalink] [raw]
Subject: [PATCH, v3] ext3: validate directory entry data before use

Various places in fs/ext3/namei.c use ext3_next_entry to loop over
directory entries, but not all of them verify that entries are valid before
attempting to move to the next one. In the case where rec_len == 0 this
causes an infinite loop.

Introduce a new version of ext3_next_entry that checks the validity of the
entry before using its data to find the next one. Rename the original
function to __ext3_next_entry and use it in places where we already know
the data is valid.

Note that the changes to empty_dir follow the original code in reporting
the directory as empty in the presence of errors.

This patch fixes the first case (image hdb.25.softlockup.gz) reported in
http://bugzilla.kernel.org/show_bug.cgi?id=10882.

Signed-off-by: Duane Griffin <[email protected]>
--

This is version 3. It includes some tidy-ups from v2 as suggested by
Jochen Voss and Jan Kara. It also replaces hard-coded function name strings
with __func__ in all calls to ext3_check_dir_entry, including one in an
otherwise untouched file. I don't think a separate patch for this is
necessary, but if it would be preferred I'd be happy to split it out.

I've removed some whitespace in a couple of places in order to fit lines
into 80 columns, so there are a couple of checkpatch warnings. Let me know
if you think it would be better to split the lines or go over 80 cols.

Please note that I've only properly tested the originally reported failure
case (i.e. the changes to ext3_dx_find_entry).

Reviewers may want to pay particular attention to the changes to do_split,
make_indexed_dir and empty_dir. I've tried to follow the original code's
error handling conventions, as noted above for empty_dir, but I'm not sure
this is the best way to do things.

---

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 8ca3bfd..2a8ab33 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -187,7 +187,7 @@ revalidate:
while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
- if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
+ if (!ext3_check_dir_entry (__func__, inode, de,
bh, offset)) {
/* On error, skip the f_pos to the
next block. */
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80..bb35255 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -185,13 +185,27 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
* p is at least 6 bytes before the end of page
*/
static inline struct ext3_dir_entry_2 *
-ext3_next_entry(struct ext3_dir_entry_2 *p)
+__ext3_next_entry(struct ext3_dir_entry_2 *p)
{
return (struct ext3_dir_entry_2 *)((char *)p +
ext3_rec_len_from_disk(p->rec_len));
}

/*
+ * Checks the directory entry looks sane before using it to find the next one.
+ * Returns NULL and reports an error if an invalid entry is passed.
+ */
+static inline struct ext3_dir_entry_2 *
+ext3_next_entry(const char *func, struct inode *dir,
+ struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
+{
+ if (ext3_check_dir_entry(func, dir, de, bh, offset))
+ return __ext3_next_entry(de);
+ else
+ return NULL;
+}
+
+/*
* Future: use high four bits of block for coalesce-on-delete flags
* Mask them off for now.
*/
@@ -271,15 +285,17 @@ struct stats
unsigned bcount;
};

-static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
- int size, int show_names)
+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct inode *dir,
+ struct buffer_head *bh, int size,
+ int show_names)
{
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) bh->b_data;
unsigned names = 0, space = 0;
char *base = (char *) de;
struct dx_hash_info h = *hinfo;

printk("names: ");
- while ((char *) de < base + size)
+ while (de && (char *) de < base + size)
{
if (de->inode)
{
@@ -295,7 +311,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
}
- de = ext3_next_entry(de);
+ de = ext3_next_entry(__func__, dir, de, bh, 0);
}
printk("(%i)\n", names);
return (struct stats) { names, space, 1 };
@@ -319,7 +335,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
stats = levels?
dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
- dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
+ dx_show_leaf(hinfo, dir, bh, blocksize, 0);
names += stats.names;
space += stats.space;
bcount += stats.bcount;
@@ -583,8 +599,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
top = (struct ext3_dir_entry_2 *) ((char *) de +
dir->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de)) {
- if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
+ for (; de < top; de = __ext3_next_entry(de)) {
+ if (!ext3_check_dir_entry(__func__, dir, de, bh,
(block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
+((char *)de - bh->b_data))) {
/* On error, skip the f_pos to the next block. */
@@ -658,7 +674,11 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
}
if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
- de = ext3_next_entry(de);
+ de = ext3_next_entry(__func__, dir, de, frames[0].bh, 0);
+ if (de == NULL) {
+ err = -EIO;
+ goto errout;
+ }
if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
goto errout;
count++;
@@ -726,8 +746,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
count++;
cond_resched();
}
- /* XXX: do we need to check rec_len == 0 case? -Chris */
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return count;
}
@@ -822,8 +841,7 @@ static inline int search_dirblock(struct buffer_head * bh,
if ((char *) de + namelen <= dlimit &&
ext3_match (namelen, name, de)) {
/* found a match - just to be sure, do a full check */
- if (!ext3_check_dir_entry("ext3_find_entry",
- dir, de, bh, offset))
+ if (!ext3_check_dir_entry(__func__, dir, de, bh,offset))
return -1;
*res_dir = de;
return 1;
@@ -991,24 +1009,27 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
de = (struct ext3_dir_entry_2 *) bh->b_data;
top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de))
- if (ext3_match (namelen, name, de)) {
- if (!ext3_check_dir_entry("ext3_find_entry",
- dir, de, bh,
- (block<<EXT3_BLOCK_SIZE_BITS(sb))
- +((char *)de - bh->b_data))) {
- brelse (bh);
+ for (; de < top; de = __ext3_next_entry(de)) {
+ int offset = (block << EXT3_BLOCK_SIZE_BITS(sb))
+ + ((char *) de - bh->b_data);
+
+ if (!ext3_check_dir_entry(__func__, dir,de,bh,offset)) {
+ brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
}
- *res_dir = de;
- dx_release (frames);
- return bh;
+
+ if (ext3_match(namelen, name, de)) {
+ *res_dir = de;
+ dx_release(frames);
+ return bh;
+ }
}
brelse (bh);
+
/* Check to see if we should continue to search */
- retval = ext3_htree_next_block(dir, hash, frame,
- frames, NULL);
+ retval = ext3_htree_next_block(dir, hash, frame, frames, NULL);
+
if (retval < 0) {
ext3_warning(sb, __func__,
"error reading index page in directory #%lu",
@@ -1141,7 +1162,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)

prev = to = de;
while ((char*)de < base + size) {
- next = ext3_next_entry(de);
+ next = __ext3_next_entry(de);
if (de->inode && de->name_len) {
rec_len = EXT3_DIR_REC_LEN(de->name_len);
if (de > to)
@@ -1172,9 +1193,22 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
unsigned split, move, size, i;
- struct ext3_dir_entry_2 *de = NULL, *de2;
+ struct ext3_dir_entry_2 *de, *de2;
int err = 0;

+ /* Verify directory entries are sane before trying anything else. */
+ de = (struct ext3_dir_entry_2 *) data1;
+ de2 = (struct ext3_dir_entry_2 *) data1 +
+ dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+ while (de < de2) {
+ de = ext3_next_entry(__func__, dir, de, *bh, 0);
+ if (de == NULL) {
+ brelse(*bh);
+ *bh = NULL;
+ goto errout;
+ }
+ }
+
bh2 = ext3_append (handle, dir, &newblock, &err);
if (!(bh2)) {
brelse(*bh);
@@ -1281,8 +1315,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
de = (struct ext3_dir_entry_2 *)bh->b_data;
top = bh->b_data + dir->i_sb->s_blocksize - reclen;
while ((char *) de <= top) {
- if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
- bh, offset)) {
+ if (!ext3_check_dir_entry(__func__, dir,de,bh,offset)) {
brelse (bh);
return -EIO;
}
@@ -1397,8 +1430,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
memcpy (data1, de, len);
de = (struct ext3_dir_entry_2 *) data1;
top = data1 + len;
- while ((char *)(de2 = ext3_next_entry(de)) < top)
+
+ de2 = de;
+ while (de2 != NULL && de2 < top) {
de = de2;
+ de2 = ext3_next_entry(__func__, dir, de, bh, 0);
+ }
+
de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
/* Initialize the root; the dot dirents already exist */
de = (struct ext3_dir_entry_2 *) (&root->dotdot);
@@ -1637,7 +1675,7 @@ static int ext3_delete_entry (handle_t *handle,
pde = NULL;
de = (struct ext3_dir_entry_2 *) bh->b_data;
while (i < bh->b_size) {
- if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
+ if (!ext3_check_dir_entry(__func__, dir, de, bh, i))
return -EIO;
if (de == de_del) {
BUFFER_TRACE(bh, "get_write_access");
@@ -1655,7 +1693,7 @@ static int ext3_delete_entry (handle_t *handle,
}
i += ext3_rec_len_from_disk(de->rec_len);
pde = de;
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return -ENOENT;
}
@@ -1792,7 +1830,7 @@ retry:
de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
de->inode = cpu_to_le32(dir->i_ino);
de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(1));
@@ -1825,7 +1863,7 @@ out_stop:
/*
* routine to check that the specified directory is empty (for rmdir)
*/
-static int empty_dir (struct inode * inode)
+static int empty_dir(struct inode *dir, struct inode *inode)
{
unsigned long offset;
struct buffer_head * bh;
@@ -1846,8 +1884,14 @@ static int empty_dir (struct inode * inode)
inode->i_ino);
return 1;
}
+
de = (struct ext3_dir_entry_2 *) bh->b_data;
- de1 = ext3_next_entry(de);
+ de1 = ext3_next_entry(__func__, dir, de, bh, 0);
+ if (de1 == NULL) {
+ brelse(bh);
+ return 1;
+ }
+
if (le32_to_cpu(de->inode) != inode->i_ino ||
!le32_to_cpu(de1->inode) ||
strcmp (".", de->name) ||
@@ -1860,8 +1904,8 @@ static int empty_dir (struct inode * inode)
}
offset = ext3_rec_len_from_disk(de->rec_len) +
ext3_rec_len_from_disk(de1->rec_len);
- de = ext3_next_entry(de1);
- while (offset < inode->i_size ) {
+ de = ext3_next_entry(__func__, dir, de1, bh, offset);
+ while (de && offset < inode->i_size) {
if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
err = 0;
@@ -1879,7 +1923,7 @@ static int empty_dir (struct inode * inode)
}
de = (struct ext3_dir_entry_2 *) bh->b_data;
}
- if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
+ if (!ext3_check_dir_entry(__func__, inode, de, bh, offset)) {
de = (struct ext3_dir_entry_2 *)(bh->b_data +
sb->s_blocksize);
offset = (offset | (sb->s_blocksize - 1)) + 1;
@@ -1890,7 +1934,7 @@ static int empty_dir (struct inode * inode)
return 0;
}
offset += ext3_rec_len_from_disk(de->rec_len);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
brelse (bh);
return 1;
@@ -2068,7 +2112,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
goto end_rmdir;

retval = -ENOTEMPTY;
- if (!empty_dir (inode))
+ if (!empty_dir(dir, inode))
goto end_rmdir;

retval = ext3_delete_entry(handle, dir, de, bh);
@@ -2244,7 +2288,7 @@ retry:
}

#define PARENT_INO(buffer) \
- (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
+ (__ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)

/*
* Anybody can rename anything with this: the permission checks are left to the
@@ -2297,7 +2341,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
if (S_ISDIR(old_inode->i_mode)) {
if (new_inode) {
retval = -ENOTEMPTY;
- if (!empty_dir (new_inode))
+ if (!empty_dir(new_dir, new_inode))
goto end_rename;
}
retval = -EIO;

2008-06-25 12:18:44

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH, v3] ext3: validate directory entry data before use

On Wed 25-06-08 13:11:43, Duane Griffin wrote:
> Various places in fs/ext3/namei.c use ext3_next_entry to loop over
> directory entries, but not all of them verify that entries are valid before
> attempting to move to the next one. In the case where rec_len == 0 this
> causes an infinite loop.
>
> Introduce a new version of ext3_next_entry that checks the validity of the
> entry before using its data to find the next one. Rename the original
> function to __ext3_next_entry and use it in places where we already know
> the data is valid.
>
> Note that the changes to empty_dir follow the original code in reporting
> the directory as empty in the presence of errors.
>
> This patch fixes the first case (image hdb.25.softlockup.gz) reported in
> http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Signed-off-by: Duane Griffin <[email protected]>
For what it's worth, you can add Acked-by: Jan Kara <[email protected]>

Honza
> --
>
> This is version 3. It includes some tidy-ups from v2 as suggested by
> Jochen Voss and Jan Kara. It also replaces hard-coded function name strings
> with __func__ in all calls to ext3_check_dir_entry, including one in an
> otherwise untouched file. I don't think a separate patch for this is
> necessary, but if it would be preferred I'd be happy to split it out.
>
> I've removed some whitespace in a couple of places in order to fit lines
> into 80 columns, so there are a couple of checkpatch warnings. Let me know
> if you think it would be better to split the lines or go over 80 cols.
>
> Please note that I've only properly tested the originally reported failure
> case (i.e. the changes to ext3_dx_find_entry).
>
> Reviewers may want to pay particular attention to the changes to do_split,
> make_indexed_dir and empty_dir. I've tried to follow the original code's
> error handling conventions, as noted above for empty_dir, but I'm not sure
> this is the best way to do things.
>
> ---
>
> diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
> index 8ca3bfd..2a8ab33 100644
> --- a/fs/ext3/dir.c
> +++ b/fs/ext3/dir.c
> @@ -187,7 +187,7 @@ revalidate:
> while (!error && filp->f_pos < inode->i_size
> && offset < sb->s_blocksize) {
> de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
> - if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
> + if (!ext3_check_dir_entry (__func__, inode, de,
> bh, offset)) {
> /* On error, skip the f_pos to the
> next block. */
> diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
> index 0b8cf80..bb35255 100644
> --- a/fs/ext3/namei.c
> +++ b/fs/ext3/namei.c
> @@ -185,13 +185,27 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
> * p is at least 6 bytes before the end of page
> */
> static inline struct ext3_dir_entry_2 *
> -ext3_next_entry(struct ext3_dir_entry_2 *p)
> +__ext3_next_entry(struct ext3_dir_entry_2 *p)
> {
> return (struct ext3_dir_entry_2 *)((char *)p +
> ext3_rec_len_from_disk(p->rec_len));
> }
>
> /*
> + * Checks the directory entry looks sane before using it to find the next one.
> + * Returns NULL and reports an error if an invalid entry is passed.
> + */
> +static inline struct ext3_dir_entry_2 *
> +ext3_next_entry(const char *func, struct inode *dir,
> + struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
> +{
> + if (ext3_check_dir_entry(func, dir, de, bh, offset))
> + return __ext3_next_entry(de);
> + else
> + return NULL;
> +}
> +
> +/*
> * Future: use high four bits of block for coalesce-on-delete flags
> * Mask them off for now.
> */
> @@ -271,15 +285,17 @@ struct stats
> unsigned bcount;
> };
>
> -static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
> - int size, int show_names)
> +static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct inode *dir,
> + struct buffer_head *bh, int size,
> + int show_names)
> {
> + struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) bh->b_data;
> unsigned names = 0, space = 0;
> char *base = (char *) de;
> struct dx_hash_info h = *hinfo;
>
> printk("names: ");
> - while ((char *) de < base + size)
> + while (de && (char *) de < base + size)
> {
> if (de->inode)
> {
> @@ -295,7 +311,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
> space += EXT3_DIR_REC_LEN(de->name_len);
> names++;
> }
> - de = ext3_next_entry(de);
> + de = ext3_next_entry(__func__, dir, de, bh, 0);
> }
> printk("(%i)\n", names);
> return (struct stats) { names, space, 1 };
> @@ -319,7 +335,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
> if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
> stats = levels?
> dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
> - dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
> + dx_show_leaf(hinfo, dir, bh, blocksize, 0);
> names += stats.names;
> space += stats.space;
> bcount += stats.bcount;
> @@ -583,8 +599,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
> top = (struct ext3_dir_entry_2 *) ((char *) de +
> dir->i_sb->s_blocksize -
> EXT3_DIR_REC_LEN(0));
> - for (; de < top; de = ext3_next_entry(de)) {
> - if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
> + for (; de < top; de = __ext3_next_entry(de)) {
> + if (!ext3_check_dir_entry(__func__, dir, de, bh,
> (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
> +((char *)de - bh->b_data))) {
> /* On error, skip the f_pos to the next block. */
> @@ -658,7 +674,11 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
> }
> if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
> de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
> - de = ext3_next_entry(de);
> + de = ext3_next_entry(__func__, dir, de, frames[0].bh, 0);
> + if (de == NULL) {
> + err = -EIO;
> + goto errout;
> + }
> if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
> goto errout;
> count++;
> @@ -726,8 +746,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
> count++;
> cond_resched();
> }
> - /* XXX: do we need to check rec_len == 0 case? -Chris */
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> return count;
> }
> @@ -822,8 +841,7 @@ static inline int search_dirblock(struct buffer_head * bh,
> if ((char *) de + namelen <= dlimit &&
> ext3_match (namelen, name, de)) {
> /* found a match - just to be sure, do a full check */
> - if (!ext3_check_dir_entry("ext3_find_entry",
> - dir, de, bh, offset))
> + if (!ext3_check_dir_entry(__func__, dir, de, bh,offset))
> return -1;
> *res_dir = de;
> return 1;
> @@ -991,24 +1009,27 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
> EXT3_DIR_REC_LEN(0));
> - for (; de < top; de = ext3_next_entry(de))
> - if (ext3_match (namelen, name, de)) {
> - if (!ext3_check_dir_entry("ext3_find_entry",
> - dir, de, bh,
> - (block<<EXT3_BLOCK_SIZE_BITS(sb))
> - +((char *)de - bh->b_data))) {
> - brelse (bh);
> + for (; de < top; de = __ext3_next_entry(de)) {
> + int offset = (block << EXT3_BLOCK_SIZE_BITS(sb))
> + + ((char *) de - bh->b_data);
> +
> + if (!ext3_check_dir_entry(__func__, dir,de,bh,offset)) {
> + brelse(bh);
> *err = ERR_BAD_DX_DIR;
> goto errout;
> }
> - *res_dir = de;
> - dx_release (frames);
> - return bh;
> +
> + if (ext3_match(namelen, name, de)) {
> + *res_dir = de;
> + dx_release(frames);
> + return bh;
> + }
> }
> brelse (bh);
> +
> /* Check to see if we should continue to search */
> - retval = ext3_htree_next_block(dir, hash, frame,
> - frames, NULL);
> + retval = ext3_htree_next_block(dir, hash, frame, frames, NULL);
> +
> if (retval < 0) {
> ext3_warning(sb, __func__,
> "error reading index page in directory #%lu",
> @@ -1141,7 +1162,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
>
> prev = to = de;
> while ((char*)de < base + size) {
> - next = ext3_next_entry(de);
> + next = __ext3_next_entry(de);
> if (de->inode && de->name_len) {
> rec_len = EXT3_DIR_REC_LEN(de->name_len);
> if (de > to)
> @@ -1172,9 +1193,22 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
> struct dx_map_entry *map;
> char *data1 = (*bh)->b_data, *data2;
> unsigned split, move, size, i;
> - struct ext3_dir_entry_2 *de = NULL, *de2;
> + struct ext3_dir_entry_2 *de, *de2;
> int err = 0;
>
> + /* Verify directory entries are sane before trying anything else. */
> + de = (struct ext3_dir_entry_2 *) data1;
> + de2 = (struct ext3_dir_entry_2 *) data1 +
> + dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
> + while (de < de2) {
> + de = ext3_next_entry(__func__, dir, de, *bh, 0);
> + if (de == NULL) {
> + brelse(*bh);
> + *bh = NULL;
> + goto errout;
> + }
> + }
> +
> bh2 = ext3_append (handle, dir, &newblock, &err);
> if (!(bh2)) {
> brelse(*bh);
> @@ -1281,8 +1315,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
> de = (struct ext3_dir_entry_2 *)bh->b_data;
> top = bh->b_data + dir->i_sb->s_blocksize - reclen;
> while ((char *) de <= top) {
> - if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
> - bh, offset)) {
> + if (!ext3_check_dir_entry(__func__, dir,de,bh,offset)) {
> brelse (bh);
> return -EIO;
> }
> @@ -1397,8 +1430,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
> memcpy (data1, de, len);
> de = (struct ext3_dir_entry_2 *) data1;
> top = data1 + len;
> - while ((char *)(de2 = ext3_next_entry(de)) < top)
> +
> + de2 = de;
> + while (de2 != NULL && de2 < top) {
> de = de2;
> + de2 = ext3_next_entry(__func__, dir, de, bh, 0);
> + }
> +
> de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
> /* Initialize the root; the dot dirents already exist */
> de = (struct ext3_dir_entry_2 *) (&root->dotdot);
> @@ -1637,7 +1675,7 @@ static int ext3_delete_entry (handle_t *handle,
> pde = NULL;
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> while (i < bh->b_size) {
> - if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
> + if (!ext3_check_dir_entry(__func__, dir, de, bh, i))
> return -EIO;
> if (de == de_del) {
> BUFFER_TRACE(bh, "get_write_access");
> @@ -1655,7 +1693,7 @@ static int ext3_delete_entry (handle_t *handle,
> }
> i += ext3_rec_len_from_disk(de->rec_len);
> pde = de;
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> return -ENOENT;
> }
> @@ -1792,7 +1830,7 @@ retry:
> de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
> strcpy (de->name, ".");
> ext3_set_de_type(dir->i_sb, de, S_IFDIR);
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> de->inode = cpu_to_le32(dir->i_ino);
> de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
> EXT3_DIR_REC_LEN(1));
> @@ -1825,7 +1863,7 @@ out_stop:
> /*
> * routine to check that the specified directory is empty (for rmdir)
> */
> -static int empty_dir (struct inode * inode)
> +static int empty_dir(struct inode *dir, struct inode *inode)
> {
> unsigned long offset;
> struct buffer_head * bh;
> @@ -1846,8 +1884,14 @@ static int empty_dir (struct inode * inode)
> inode->i_ino);
> return 1;
> }
> +
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> - de1 = ext3_next_entry(de);
> + de1 = ext3_next_entry(__func__, dir, de, bh, 0);
> + if (de1 == NULL) {
> + brelse(bh);
> + return 1;
> + }
> +
> if (le32_to_cpu(de->inode) != inode->i_ino ||
> !le32_to_cpu(de1->inode) ||
> strcmp (".", de->name) ||
> @@ -1860,8 +1904,8 @@ static int empty_dir (struct inode * inode)
> }
> offset = ext3_rec_len_from_disk(de->rec_len) +
> ext3_rec_len_from_disk(de1->rec_len);
> - de = ext3_next_entry(de1);
> - while (offset < inode->i_size ) {
> + de = ext3_next_entry(__func__, dir, de1, bh, offset);
> + while (de && offset < inode->i_size) {
> if (!bh ||
> (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
> err = 0;
> @@ -1879,7 +1923,7 @@ static int empty_dir (struct inode * inode)
> }
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> }
> - if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
> + if (!ext3_check_dir_entry(__func__, inode, de, bh, offset)) {
> de = (struct ext3_dir_entry_2 *)(bh->b_data +
> sb->s_blocksize);
> offset = (offset | (sb->s_blocksize - 1)) + 1;
> @@ -1890,7 +1934,7 @@ static int empty_dir (struct inode * inode)
> return 0;
> }
> offset += ext3_rec_len_from_disk(de->rec_len);
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> brelse (bh);
> return 1;
> @@ -2068,7 +2112,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
> goto end_rmdir;
>
> retval = -ENOTEMPTY;
> - if (!empty_dir (inode))
> + if (!empty_dir(dir, inode))
> goto end_rmdir;
>
> retval = ext3_delete_entry(handle, dir, de, bh);
> @@ -2244,7 +2288,7 @@ retry:
> }
>
> #define PARENT_INO(buffer) \
> - (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
> + (__ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
>
> /*
> * Anybody can rename anything with this: the permission checks are left to the
> @@ -2297,7 +2341,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
> if (S_ISDIR(old_inode->i_mode)) {
> if (new_inode) {
> retval = -ENOTEMPTY;
> - if (!empty_dir (new_inode))
> + if (!empty_dir(new_dir, new_inode))
> goto end_rename;
> }
> retval = -EIO;
--
Jan Kara <[email protected]>
SUSE Labs, CR