2012-12-04 12:11:30

by Forrest Liu

[permalink] [raw]
Subject: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
of extent tree is greater than 1.

Signed-off-by: Forrest Liu <[email protected]>
---
fs/ext4/extents.c | 24 ++++++++++++++++++++----
1 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d3dd618..b10b8c0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2190,13 +2190,15 @@ errout:
* removes index from the index block.
*/
static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
+ struct ext4_ext_path *path, int depth)
{
int err;
ext4_fsblk_t leaf;
+ __le32 border;

/* free index block */
- path--;
+ depth--;
+ path = path + depth;
leaf = ext4_idx_pblock(path->p_idx);
if (unlikely(path->p_hdr->eh_entries == 0)) {
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
@@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,

ext4_free_blocks(handle, inode, NULL, leaf, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+
+ border = path->p_idx->ei_block;
+ while (--depth >= 0) {
+ if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+ break;
+ path--;
+ err = ext4_ext_get_access(handle, inode, path);
+ if (err)
+ break;
+ path->p_idx->ei_block = border;
+ err = ext4_ext_dirty(handle, inode, path);
+ if (err)
+ break;
+ }
return err;
}

@@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
/* if this leaf is free, then we should
* remove it from index block above */
if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
- err = ext4_ext_rm_idx(handle, inode, path + depth);
+ err = ext4_ext_rm_idx(handle, inode, path, depth);

out:
return err;
@@ -2760,7 +2776,7 @@ again:
/* index is empty, remove it;
* handle must be already prepared by the
* truncatei_leaf() */
- err = ext4_ext_rm_idx(handle, inode, path + i);
+ err = ext4_ext_rm_idx(handle, inode, path, i);
}
/* root level has p_bh == NULL, brelse() eats this */
brelse(path[i].p_bh);
--
1.7.5.4



2012-12-04 12:29:24

by Forrest Liu

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

This problem is easily to reproduce

Create a file with size larger than 1GB.

dd if=/dev/zero of=/test_file bs=1M count=1024

Punch every even block in test_file, and then use debugfs to dump
extents, followings is dumped result

2/ 2 339/340 231197 - 231197 3917597 - 3917597 1
2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
0/ 2 2/ 2 231201 - 262143 3901486 30943
1/ 2 1/ 46 231201 - 231880 3901488 680
2/ 2 1/340 231201 - 231201 3917601 - 3917601 1
2/ 2 2/340 231203 - 231203 3917603 - 3917603 1

Punch blocks #231779 ~#231201 , to remove extent index, and then use
debugfs to dump extents, followings is dumped result

2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
0/ 2 2/ 2 231201 - 262143 3901486 30943
-------> logical block index didn't update when remove extent index
1/ 2 1/ 45 231881 - 232560 3901490 680
2/ 2 1/340 231881 - 231881 3918281 - 3918281 1
2/ 2 2/340 231883 - 231883 3918283 - 3918283 1

Thanks,
Forrest

2012/12/4 Forrest Liu <[email protected]>:
> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
> of extent tree is greater than 1.
>
> Signed-off-by: Forrest Liu <[email protected]>
> ---
> fs/ext4/extents.c | 24 ++++++++++++++++++++----
> 1 files changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index d3dd618..b10b8c0 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2190,13 +2190,15 @@ errout:
> * removes index from the index block.
> */
> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
> - struct ext4_ext_path *path)
> + struct ext4_ext_path *path, int depth)
> {
> int err;
> ext4_fsblk_t leaf;
> + __le32 border;
>
> /* free index block */
> - path--;
> + depth--;
> + path = path + depth;
> leaf = ext4_idx_pblock(path->p_idx);
> if (unlikely(path->p_hdr->eh_entries == 0)) {
> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>
> ext4_free_blocks(handle, inode, NULL, leaf, 1,
> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
> +
> + border = path->p_idx->ei_block;
> + while (--depth >= 0) {
> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
> + break;
> + path--;
> + err = ext4_ext_get_access(handle, inode, path);
> + if (err)
> + break;
> + path->p_idx->ei_block = border;
> + err = ext4_ext_dirty(handle, inode, path);
> + if (err)
> + break;
> + }
> return err;
> }
>
> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
> /* if this leaf is free, then we should
> * remove it from index block above */
> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
> - err = ext4_ext_rm_idx(handle, inode, path + depth);
> + err = ext4_ext_rm_idx(handle, inode, path, depth);
>
> out:
> return err;
> @@ -2760,7 +2776,7 @@ again:
> /* index is empty, remove it;
> * handle must be already prepared by the
> * truncatei_leaf() */
> - err = ext4_ext_rm_idx(handle, inode, path + i);
> + err = ext4_ext_rm_idx(handle, inode, path, i);
> }
> /* root level has p_bh == NULL, brelse() eats this */
> brelse(path[i].p_bh);
> --
> 1.7.5.4
>

2012-12-05 06:13:14

by Forrest Liu

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

2012/12/4 forrest <[email protected]>:
> This problem is easily to reproduce
>
> Create a file with size larger than 1GB.
>
> dd if=/dev/zero of=/test_file bs=1M count=1024
>
> Punch every even block in test_file, and then use debugfs to dump
> extents, followings is dumped result
>
> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1
> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
> 0/ 2 2/ 2 231201 - 262143 3901486 30943
> 1/ 2 1/ 46 231201 - 231880 3901488 680
> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1
> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1
>
> Punch blocks #231779 ~#231201 , to remove extent index, and then use
> debugfs to dump extents, followings is dumped result
>
> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
> 0/ 2 2/ 2 231201 - 262143 3901486 30943
> -------> logical block index didn't update when remove extent index
> 1/ 2 1/ 45 231881 - 232560 3901490 680
> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1
> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1

After blocks #231202~#231880 had been punch out, theses blocks can't map again.
If we do map a block in #231202~#231880, we will get error messages
like followings.

EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
offset 231203 with max blocks 1 with error -5
EXT4-fs (md2): This should not happen!! Data will be lost

Regards,
Forrest

>
> 2012/12/4 Forrest Liu <[email protected]>:
>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>> of extent tree is greater than 1.
>>
>> Signed-off-by: Forrest Liu <[email protected]>
>> ---
>> fs/ext4/extents.c | 24 ++++++++++++++++++++----
>> 1 files changed, 20 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index d3dd618..b10b8c0 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -2190,13 +2190,15 @@ errout:
>> * removes index from the index block.
>> */
>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>> - struct ext4_ext_path *path)
>> + struct ext4_ext_path *path, int depth)
>> {
>> int err;
>> ext4_fsblk_t leaf;
>> + __le32 border;
>>
>> /* free index block */
>> - path--;
>> + depth--;
>> + path = path + depth;
>> leaf = ext4_idx_pblock(path->p_idx);
>> if (unlikely(path->p_hdr->eh_entries == 0)) {
>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>
>> ext4_free_blocks(handle, inode, NULL, leaf, 1,
>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>> +
>> + border = path->p_idx->ei_block;
>> + while (--depth >= 0) {
>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>> + break;
>> + path--;
>> + err = ext4_ext_get_access(handle, inode, path);
>> + if (err)
>> + break;
>> + path->p_idx->ei_block = border;
>> + err = ext4_ext_dirty(handle, inode, path);
>> + if (err)
>> + break;
>> + }
>> return err;
>> }
>>
>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>> /* if this leaf is free, then we should
>> * remove it from index block above */
>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>> - err = ext4_ext_rm_idx(handle, inode, path + depth);
>> + err = ext4_ext_rm_idx(handle, inode, path, depth);
>>
>> out:
>> return err;
>> @@ -2760,7 +2776,7 @@ again:
>> /* index is empty, remove it;
>> * handle must be already prepared by the
>> * truncatei_leaf() */
>> - err = ext4_ext_rm_idx(handle, inode, path + i);
>> + err = ext4_ext_rm_idx(handle, inode, path, i);
>> }
>> /* root level has p_bh == NULL, brelse() eats this */
>> brelse(path[i].p_bh);
>> --
>> 1.7.5.4
>>

2012-12-05 07:53:41

by Ashish Sangwan

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

I have tested it and the problem is real.
It happens when depth => 2 and punch hole completely removes any of
the first extent indexes at depth 1.
The change in ei_block is not propagated backwards to index at depth 0.

However, AFAICS, the problematic call to ext4_ext_rm_idx is from
ext4_ext_rm_leaf and there is no need to change in
ext4_ext_remove_space or in ext4_ext_rm_idx.

Forrest, what do you think about below change ?
basically its your code, but its been added to ext4_ext_rm_leaf after
removing index.

@@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,

/* if this leaf is free, then we should
* remove it from index block above */
- if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
err = ext4_ext_rm_idx(handle, inode, path + depth);
+ /* If this was the first extent index in node,
+ * propagates the ei_block updation info to top */
+ if(!err) {
+ --depth;
+ path = path + depth;
+ while (--depth >= 0) {
+ if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+ break;
+ path--;
+ err = ext4_ext_get_access(handle, inode, path);
+ if (err)
+ break;
+ path->p_idx->ei_block = border;
+ err = ext4_ext_dirty(handle, inode, path);
+ if (err)
+ break;
+ }
+ }
+ }
+

On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <[email protected]> wrote:
> 2012/12/4 forrest <[email protected]>:
>> This problem is easily to reproduce
>>
>> Create a file with size larger than 1GB.
>>
>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>
>> Punch every even block in test_file, and then use debugfs to dump
>> extents, followings is dumped result
>>
>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1
>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>> 1/ 2 1/ 46 231201 - 231880 3901488 680
>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1
>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1
>>
>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>> debugfs to dump extents, followings is dumped result
>>
>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>> -------> logical block index didn't update when remove extent index
>> 1/ 2 1/ 45 231881 - 232560 3901490 680
>> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1
>> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1
>
> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
> If we do map a block in #231202~#231880, we will get error messages
> like followings.
>
> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
> offset 231203 with max blocks 1 with error -5
> EXT4-fs (md2): This should not happen!! Data will be lost
>
> Regards,
> Forrest
>
>>
>> 2012/12/4 Forrest Liu <[email protected]>:
>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>> of extent tree is greater than 1.
>>>
>>> Signed-off-by: Forrest Liu <[email protected]>
>>> ---
>>> fs/ext4/extents.c | 24 ++++++++++++++++++++----
>>> 1 files changed, 20 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>> index d3dd618..b10b8c0 100644
>>> --- a/fs/ext4/extents.c
>>> +++ b/fs/ext4/extents.c
>>> @@ -2190,13 +2190,15 @@ errout:
>>> * removes index from the index block.
>>> */
>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>> - struct ext4_ext_path *path)
>>> + struct ext4_ext_path *path, int depth)
>>> {
>>> int err;
>>> ext4_fsblk_t leaf;
>>> + __le32 border;
>>>
>>> /* free index block */
>>> - path--;
>>> + depth--;
>>> + path = path + depth;
>>> leaf = ext4_idx_pblock(path->p_idx);
>>> if (unlikely(path->p_hdr->eh_entries == 0)) {
>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>
>>> ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>> +
>>> + border = path->p_idx->ei_block;
>>> + while (--depth >= 0) {
>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>> + break;
>>> + path--;
>>> + err = ext4_ext_get_access(handle, inode, path);
>>> + if (err)
>>> + break;
>>> + path->p_idx->ei_block = border;
>>> + err = ext4_ext_dirty(handle, inode, path);
>>> + if (err)
>>> + break;
>>> + }
>>> return err;
>>> }
>>>
>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>> /* if this leaf is free, then we should
>>> * remove it from index block above */
>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>> - err = ext4_ext_rm_idx(handle, inode, path + depth);
>>> + err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>
>>> out:
>>> return err;
>>> @@ -2760,7 +2776,7 @@ again:
>>> /* index is empty, remove it;
>>> * handle must be already prepared by the
>>> * truncatei_leaf() */
>>> - err = ext4_ext_rm_idx(handle, inode, path + i);
>>> + err = ext4_ext_rm_idx(handle, inode, path, i);
>>> }
>>> /* root level has p_bh == NULL, brelse() eats this */
>>> brelse(path[i].p_bh);
>>> --
>>> 1.7.5.4
>>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2012-12-05 08:09:05

by Ashish Sangwan

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

Sorry 1 typo.
It should be
+ path->p_idx->ei_block =(path + 1)->p_idx->ei_block ;
instead of
+ path->p_idx->ei_block = border;

On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <[email protected]> wrote:
> I have tested it and the problem is real.
> It happens when depth => 2 and punch hole completely removes any of
> the first extent indexes at depth 1.
> The change in ei_block is not propagated backwards to index at depth 0.
>
> However, AFAICS, the problematic call to ext4_ext_rm_idx is from
> ext4_ext_rm_leaf and there is no need to change in
> ext4_ext_remove_space or in ext4_ext_rm_idx.
>
> Forrest, what do you think about below change ?
> basically its your code, but its been added to ext4_ext_rm_leaf after
> removing index.
>
> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>
> /* if this leaf is free, then we should
> * remove it from index block above */
> - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
> err = ext4_ext_rm_idx(handle, inode, path + depth);
> + /* If this was the first extent index in node,
> + * propagates the ei_block updation info to top */
> + if(!err) {
> + --depth;
> + path = path + depth;
> + while (--depth >= 0) {
> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
> + break;
> + path--;
> + err = ext4_ext_get_access(handle, inode, path);
> + if (err)
> + break;
> + path->p_idx->ei_block = border;
> + err = ext4_ext_dirty(handle, inode, path);
> + if (err)
> + break;
> + }
> + }
> + }
> +
>
> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <[email protected]> wrote:
>> 2012/12/4 forrest <[email protected]>:
>>> This problem is easily to reproduce
>>>
>>> Create a file with size larger than 1GB.
>>>
>>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>>
>>> Punch every even block in test_file, and then use debugfs to dump
>>> extents, followings is dumped result
>>>
>>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1
>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>>> 1/ 2 1/ 46 231201 - 231880 3901488 680
>>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1
>>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1
>>>
>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>>> debugfs to dump extents, followings is dumped result
>>>
>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>>> -------> logical block index didn't update when remove extent index
>>> 1/ 2 1/ 45 231881 - 232560 3901490 680
>>> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1
>>> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1
>>
>> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
>> If we do map a block in #231202~#231880, we will get error messages
>> like followings.
>>
>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
>> offset 231203 with max blocks 1 with error -5
>> EXT4-fs (md2): This should not happen!! Data will be lost
>>
>> Regards,
>> Forrest
>>
>>>
>>> 2012/12/4 Forrest Liu <[email protected]>:
>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>> of extent tree is greater than 1.
>>>>
>>>> Signed-off-by: Forrest Liu <[email protected]>
>>>> ---
>>>> fs/ext4/extents.c | 24 ++++++++++++++++++++----
>>>> 1 files changed, 20 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>>> index d3dd618..b10b8c0 100644
>>>> --- a/fs/ext4/extents.c
>>>> +++ b/fs/ext4/extents.c
>>>> @@ -2190,13 +2190,15 @@ errout:
>>>> * removes index from the index block.
>>>> */
>>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>> - struct ext4_ext_path *path)
>>>> + struct ext4_ext_path *path, int depth)
>>>> {
>>>> int err;
>>>> ext4_fsblk_t leaf;
>>>> + __le32 border;
>>>>
>>>> /* free index block */
>>>> - path--;
>>>> + depth--;
>>>> + path = path + depth;
>>>> leaf = ext4_idx_pblock(path->p_idx);
>>>> if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>
>>>> ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>>> +
>>>> + border = path->p_idx->ei_block;
>>>> + while (--depth >= 0) {
>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>> + break;
>>>> + path--;
>>>> + err = ext4_ext_get_access(handle, inode, path);
>>>> + if (err)
>>>> + break;
>>>> + path->p_idx->ei_block = border;
>>>> + err = ext4_ext_dirty(handle, inode, path);
>>>> + if (err)
>>>> + break;
>>>> + }
>>>> return err;
>>>> }
>>>>
>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>> /* if this leaf is free, then we should
>>>> * remove it from index block above */
>>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>> - err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>> + err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>>
>>>> out:
>>>> return err;
>>>> @@ -2760,7 +2776,7 @@ again:
>>>> /* index is empty, remove it;
>>>> * handle must be already prepared by the
>>>> * truncatei_leaf() */
>>>> - err = ext4_ext_rm_idx(handle, inode, path + i);
>>>> + err = ext4_ext_rm_idx(handle, inode, path, i);
>>>> }
>>>> /* root level has p_bh == NULL, brelse() eats this */
>>>> brelse(path[i].p_bh);
>>>> --
>>>> 1.7.5.4
>>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html

2012-12-06 11:40:50

by Forrest Liu

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

Hi Ashish,
Thanks for your review and fixed hole punch bugs.

We can't propogate ei_block only in ext4_ext_rm_leave.

There have two cases to call ext4_ext_rm_idx
1) extent block has no entry, ext4_ext_rm_leave calls
ext4_ext_rm_idx to release extent block.

2) extent index block has no entry, ext4_ext_remove_space
calls ext4_ext_rm_idx to release extent index block.

Consider remove third level of extent index block that indexed by
second level of extent index, and the extent index in second level
is first entry in block.

In this case, ei_block will not propogate to top level correctly,
if we only do propogation in ext4_ext_rm_leave.

Regards,
Forrest

2012/12/5 Ashish Sangwan <[email protected]>:
> Sorry 1 typo.
> It should be
> + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ;
> instead of
> + path->p_idx->ei_block = border;
>
> On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <[email protected]> wrote:
>> I have tested it and the problem is real.
>> It happens when depth => 2 and punch hole completely removes any of
>> the first extent indexes at depth 1.
>> The change in ei_block is not propagated backwards to index at depth 0.
>>
>> However, AFAICS, the problematic call to ext4_ext_rm_idx is from
>> ext4_ext_rm_leaf and there is no need to change in
>> ext4_ext_remove_space or in ext4_ext_rm_idx.
>>
>> Forrest, what do you think about below change ?
>> basically its your code, but its been added to ext4_ext_rm_leaf after
>> removing index.
>>
>> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>
>> /* if this leaf is free, then we should
>> * remove it from index block above */
>> - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>> err = ext4_ext_rm_idx(handle, inode, path + depth);
>> + /* If this was the first extent index in node,
>> + * propagates the ei_block updation info to top */
>> + if(!err) {
>> + --depth;
>> + path = path + depth;
>> + while (--depth >= 0) {
>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>> + break;
>> + path--;
>> + err = ext4_ext_get_access(handle, inode, path);
>> + if (err)
>> + break;
>> + path->p_idx->ei_block = border;
>> + err = ext4_ext_dirty(handle, inode, path);
>> + if (err)
>> + break;
>> + }
>> + }
>> + }
>> +
>>
>> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <[email protected]> wrote:
>>> 2012/12/4 forrest <[email protected]>:
>>>> This problem is easily to reproduce
>>>>
>>>> Create a file with size larger than 1GB.
>>>>
>>>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>>>
>>>> Punch every even block in test_file, and then use debugfs to dump
>>>> extents, followings is dumped result
>>>>
>>>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1
>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>>>> 1/ 2 1/ 46 231201 - 231880 3901488 680
>>>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1
>>>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1
>>>>
>>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>>>> debugfs to dump extents, followings is dumped result
>>>>
>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>>>> -------> logical block index didn't update when remove extent index
>>>> 1/ 2 1/ 45 231881 - 232560 3901490 680
>>>> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1
>>>> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1
>>>
>>> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
>>> If we do map a block in #231202~#231880, we will get error messages
>>> like followings.
>>>
>>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
>>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
>>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
>>> offset 231203 with max blocks 1 with error -5
>>> EXT4-fs (md2): This should not happen!! Data will be lost
>>>
>>> Regards,
>>> Forrest
>>>
>>>>
>>>> 2012/12/4 Forrest Liu <[email protected]>:
>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>> of extent tree is greater than 1.
>>>>>
>>>>> Signed-off-by: Forrest Liu <[email protected]>
>>>>> ---
>>>>> fs/ext4/extents.c | 24 ++++++++++++++++++++----
>>>>> 1 files changed, 20 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>>>> index d3dd618..b10b8c0 100644
>>>>> --- a/fs/ext4/extents.c
>>>>> +++ b/fs/ext4/extents.c
>>>>> @@ -2190,13 +2190,15 @@ errout:
>>>>> * removes index from the index block.
>>>>> */
>>>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>> - struct ext4_ext_path *path)
>>>>> + struct ext4_ext_path *path, int depth)
>>>>> {
>>>>> int err;
>>>>> ext4_fsblk_t leaf;
>>>>> + __le32 border;
>>>>>
>>>>> /* free index block */
>>>>> - path--;
>>>>> + depth--;
>>>>> + path = path + depth;
>>>>> leaf = ext4_idx_pblock(path->p_idx);
>>>>> if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>
>>>>> ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>>>> +
>>>>> + border = path->p_idx->ei_block;
>>>>> + while (--depth >= 0) {
>>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>>> + break;
>>>>> + path--;
>>>>> + err = ext4_ext_get_access(handle, inode, path);
>>>>> + if (err)
>>>>> + break;
>>>>> + path->p_idx->ei_block = border;
>>>>> + err = ext4_ext_dirty(handle, inode, path);
>>>>> + if (err)
>>>>> + break;
>>>>> + }
>>>>> return err;
>>>>> }
>>>>>
>>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>> /* if this leaf is free, then we should
>>>>> * remove it from index block above */
>>>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>>> - err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>>> + err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>>>
>>>>> out:
>>>>> return err;
>>>>> @@ -2760,7 +2776,7 @@ again:
>>>>> /* index is empty, remove it;
>>>>> * handle must be already prepared by the
>>>>> * truncatei_leaf() */
>>>>> - err = ext4_ext_rm_idx(handle, inode, path + i);
>>>>> + err = ext4_ext_rm_idx(handle, inode, path, i);
>>>>> }
>>>>> /* root level has p_bh == NULL, brelse() eats this */
>>>>> brelse(path[i].p_bh);
>>>>> --
>>>>> 1.7.5.4
>>>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>>> the body of a message to [email protected]
>>> More majordomo info at http://vger.kernel.org/majordomo-info.html

2012-12-06 13:16:43

by Ashish Sangwan

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

On Thu, Dec 6, 2012 at 5:05 PM, Forrest Liu <[email protected]> wrote:
> Hi Ashish,
> Thanks for your review and fixed hole punch bugs.
>
> We can't propogate ei_block only in ext4_ext_rm_leave.
>
> There have two cases to call ext4_ext_rm_idx
> 1) extent block has no entry, ext4_ext_rm_leave calls
> ext4_ext_rm_idx to release extent block.
>

1.1) And when the extent block is removed, the corresponding index
entry has also to be removed. And if this was the first index entry,
all the other index entries, are also updated. This updation will
continue till we reach the root of extent tree OR the currently
updated index entry is not the first entry in the node.

> 2) extent index block has no entry, ext4_ext_remove_space
> calls ext4_ext_rm_idx to release extent index block.
>
> Consider remove third level of extent index block that indexed by
> second level of extent index, and the extent index in second level
> is first entry in block.
>
So, I think the above case would be handled by point (1.1) i.e. the
earlier index updation done from within ext4_ext_rm_leaf.
The earlier ei_block updation will reach to the top of tree.

> In this case, ei_block will not propogate to top level correctly,
> if we only do propogation in ext4_ext_rm_leave.

And thanks for pointing the problem. Can you reproduce it with my code
suggesstion even with depth 3?
If no, than please resend the patch with suggested changes.
The intent here is to solve the problem without changing any function
signature and with minimal changes.

Regards,
Ashish
>
> Regards,
> Forrest
>
> 2012/12/5 Ashish Sangwan <[email protected]>:
>> Sorry 1 typo.
>> It should be
>> + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ;
>> instead of
>> + path->p_idx->ei_block = border;
>>
>> On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <[email protected]> wrote:
>>> I have tested it and the problem is real.
>>> It happens when depth => 2 and punch hole completely removes any of
>>> the first extent indexes at depth 1.
>>> The change in ei_block is not propagated backwards to index at depth 0.
>>>
>>> However, AFAICS, the problematic call to ext4_ext_rm_idx is from
>>> ext4_ext_rm_leaf and there is no need to change in
>>> ext4_ext_remove_space or in ext4_ext_rm_idx.
>>>
>>> Forrest, what do you think about below change ?
>>> basically its your code, but its been added to ext4_ext_rm_leaf after
>>> removing index.
>>>
>>> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>
>>> /* if this leaf is free, then we should
>>> * remove it from index block above */
>>> - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>>> err = ext4_ext_rm_idx(handle, inode, path + depth);
>>> + /* If this was the first extent index in node,
>>> + * propagates the ei_block updation info to top */
>>> + if(!err) {
>>> + --depth;
>>> + path = path + depth;
>>> + while (--depth >= 0) {
>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>> + break;
>>> + path--;
>>> + err = ext4_ext_get_access(handle, inode, path);
>>> + if (err)
>>> + break;
>>> + path->p_idx->ei_block = border;
>>> + err = ext4_ext_dirty(handle, inode, path);
>>> + if (err)
>>> + break;
>>> + }
>>> + }
>>> + }
>>> +
>>>
>>> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <[email protected]> wrote:
>>>> 2012/12/4 forrest <[email protected]>:
>>>>> This problem is easily to reproduce
>>>>>
>>>>> Create a file with size larger than 1GB.
>>>>>
>>>>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>>>>
>>>>> Punch every even block in test_file, and then use debugfs to dump
>>>>> extents, followings is dumped result
>>>>>
>>>>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1
>>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>>>>> 1/ 2 1/ 46 231201 - 231880 3901488 680
>>>>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1
>>>>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1
>>>>>
>>>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>>>>> debugfs to dump extents, followings is dumped result
>>>>>
>>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>>>>> -------> logical block index didn't update when remove extent index
>>>>> 1/ 2 1/ 45 231881 - 232560 3901490 680
>>>>> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1
>>>>> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1
>>>>
>>>> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
>>>> If we do map a block in #231202~#231880, we will get error messages
>>>> like followings.
>>>>
>>>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
>>>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
>>>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
>>>> offset 231203 with max blocks 1 with error -5
>>>> EXT4-fs (md2): This should not happen!! Data will be lost
>>>>
>>>> Regards,
>>>> Forrest
>>>>
>>>>>
>>>>> 2012/12/4 Forrest Liu <[email protected]>:
>>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>>> of extent tree is greater than 1.
>>>>>>
>>>>>> Signed-off-by: Forrest Liu <[email protected]>
>>>>>> ---
>>>>>> fs/ext4/extents.c | 24 ++++++++++++++++++++----
>>>>>> 1 files changed, 20 insertions(+), 4 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>>>>> index d3dd618..b10b8c0 100644
>>>>>> --- a/fs/ext4/extents.c
>>>>>> +++ b/fs/ext4/extents.c
>>>>>> @@ -2190,13 +2190,15 @@ errout:
>>>>>> * removes index from the index block.
>>>>>> */
>>>>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>> - struct ext4_ext_path *path)
>>>>>> + struct ext4_ext_path *path, int depth)
>>>>>> {
>>>>>> int err;
>>>>>> ext4_fsblk_t leaf;
>>>>>> + __le32 border;
>>>>>>
>>>>>> /* free index block */
>>>>>> - path--;
>>>>>> + depth--;
>>>>>> + path = path + depth;
>>>>>> leaf = ext4_idx_pblock(path->p_idx);
>>>>>> if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>>
>>>>>> ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>>>>> +
>>>>>> + border = path->p_idx->ei_block;
>>>>>> + while (--depth >= 0) {
>>>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>>>> + break;
>>>>>> + path--;
>>>>>> + err = ext4_ext_get_access(handle, inode, path);
>>>>>> + if (err)
>>>>>> + break;
>>>>>> + path->p_idx->ei_block = border;
>>>>>> + err = ext4_ext_dirty(handle, inode, path);
>>>>>> + if (err)
>>>>>> + break;
>>>>>> + }
>>>>>> return err;
>>>>>> }
>>>>>>
>>>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>>> /* if this leaf is free, then we should
>>>>>> * remove it from index block above */
>>>>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>>>> - err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>>>> + err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>>>>
>>>>>> out:
>>>>>> return err;
>>>>>> @@ -2760,7 +2776,7 @@ again:
>>>>>> /* index is empty, remove it;
>>>>>> * handle must be already prepared by the
>>>>>> * truncatei_leaf() */
>>>>>> - err = ext4_ext_rm_idx(handle, inode, path + i);
>>>>>> + err = ext4_ext_rm_idx(handle, inode, path, i);
>>>>>> }
>>>>>> /* root level has p_bh == NULL, brelse() eats this */
>>>>>> brelse(path[i].p_bh);
>>>>>> --
>>>>>> 1.7.5.4
>>>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>>>> the body of a message to [email protected]
>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html

2012-12-06 14:44:46

by Forrest Liu

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

Hi Ashish,

There have a chance to do a trivial update, and this case will be
covered in ext4_ext_remove_space.

+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
err = ext4_ext_rm_idx(handle, inode, path + depth);
+ /* If this was the first extent index in node,
+ * propagates the ei_block updation info to top */
+ if(!err) {
+ --depth;
+ path = path + depth;
+ while (--depth >= 0) {
+ if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+ break;
+ path--;
+ err = ext4_ext_get_access(handle, inode, path);
+ if (err)
+ break;

trivial update, when (path + 1)->p_idx->eh_entries == 0

+ path->p_idx->ei_block =(path +
1)->p_idx->ei_block ;
+ err = ext4_ext_dirty(handle, inode, path);
+ if (err)
+ break;
+ }
+ }
+ }
+

I will trying to reproduce the problem tomorrow.

Thanks,
Forrest

2012/12/6 Ashish Sangwan <[email protected]>:
> On Thu, Dec 6, 2012 at 5:05 PM, Forrest Liu <[email protected]> wrote:
>> Hi Ashish,
>> Thanks for your review and fixed hole punch bugs.
>>
>> We can't propogate ei_block only in ext4_ext_rm_leave.
>>
>> There have two cases to call ext4_ext_rm_idx
>> 1) extent block has no entry, ext4_ext_rm_leave calls
>> ext4_ext_rm_idx to release extent block.
>>
>
> 1.1) And when the extent block is removed, the corresponding index
> entry has also to be removed. And if this was the first index entry,
> all the other index entries, are also updated. This updation will
> continue till we reach the root of extent tree OR the currently
> updated index entry is not the first entry in the node.
>
>> 2) extent index block has no entry, ext4_ext_remove_space
>> calls ext4_ext_rm_idx to release extent index block.
>>
>> Consider remove third level of extent index block that indexed by
>> second level of extent index, and the extent index in second level
>> is first entry in block.
>>
> So, I think the above case would be handled by point (1.1) i.e. the
> earlier index updation done from within ext4_ext_rm_leaf.
> The earlier ei_block updation will reach to the top of tree.
>
>> In this case, ei_block will not propogate to top level correctly,
>> if we only do propogation in ext4_ext_rm_leave.
>
> And thanks for pointing the problem. Can you reproduce it with my code
> suggesstion even with depth 3?
> If no, than please resend the patch with suggested changes.
> The intent here is to solve the problem without changing any function
> signature and with minimal changes.
>
> Regards,
> Ashish
>>
>> Regards,
>> Forrest
>>
>> 2012/12/5 Ashish Sangwan <[email protected]>:
>>> Sorry 1 typo.
>>> It should be
>>> + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ;
>>> instead of
>>> + path->p_idx->ei_block = border;
>>>
>>> On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <[email protected]> wrote:
>>>> I have tested it and the problem is real.
>>>> It happens when depth => 2 and punch hole completely removes any of
>>>> the first extent indexes at depth 1.
>>>> The change in ei_block is not propagated backwards to index at depth 0.
>>>>
>>>> However, AFAICS, the problematic call to ext4_ext_rm_idx is from
>>>> ext4_ext_rm_leaf and there is no need to change in
>>>> ext4_ext_remove_space or in ext4_ext_rm_idx.
>>>>
>>>> Forrest, what do you think about below change ?
>>>> basically its your code, but its been added to ext4_ext_rm_leaf after
>>>> removing index.
>>>>
>>>> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>
>>>> /* if this leaf is free, then we should
>>>> * remove it from index block above */
>>>> - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>>>> err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>> + /* If this was the first extent index in node,
>>>> + * propagates the ei_block updation info to top */
>>>> + if(!err) {
>>>> + --depth;
>>>> + path = path + depth;
>>>> + while (--depth >= 0) {
>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>> + break;
>>>> + path--;
>>>> + err = ext4_ext_get_access(handle, inode, path);
>>>> + if (err)
>>>> + break;
>>>> + path->p_idx->ei_block = border;
>>>> + err = ext4_ext_dirty(handle, inode, path);
>>>> + if (err)
>>>> + break;
>>>> + }
>>>> + }
>>>> + }
>>>> +
>>>>
>>>> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <[email protected]> wrote:
>>>>> 2012/12/4 forrest <[email protected]>:
>>>>>> This problem is easily to reproduce
>>>>>>
>>>>>> Create a file with size larger than 1GB.
>>>>>>
>>>>>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>>>>>
>>>>>> Punch every even block in test_file, and then use debugfs to dump
>>>>>> extents, followings is dumped result
>>>>>>
>>>>>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1
>>>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>>>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>>>>>> 1/ 2 1/ 46 231201 - 231880 3901488 680
>>>>>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1
>>>>>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1
>>>>>>
>>>>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>>>>>> debugfs to dump extents, followings is dumped result
>>>>>>
>>>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>>>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>>>>>> -------> logical block index didn't update when remove extent index
>>>>>> 1/ 2 1/ 45 231881 - 232560 3901490 680
>>>>>> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1
>>>>>> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1
>>>>>
>>>>> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
>>>>> If we do map a block in #231202~#231880, we will get error messages
>>>>> like followings.
>>>>>
>>>>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
>>>>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
>>>>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
>>>>> offset 231203 with max blocks 1 with error -5
>>>>> EXT4-fs (md2): This should not happen!! Data will be lost
>>>>>
>>>>> Regards,
>>>>> Forrest
>>>>>
>>>>>>
>>>>>> 2012/12/4 Forrest Liu <[email protected]>:
>>>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>>>> of extent tree is greater than 1.
>>>>>>>
>>>>>>> Signed-off-by: Forrest Liu <[email protected]>
>>>>>>> ---
>>>>>>> fs/ext4/extents.c | 24 ++++++++++++++++++++----
>>>>>>> 1 files changed, 20 insertions(+), 4 deletions(-)
>>>>>>>
>>>>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>>>>>> index d3dd618..b10b8c0 100644
>>>>>>> --- a/fs/ext4/extents.c
>>>>>>> +++ b/fs/ext4/extents.c
>>>>>>> @@ -2190,13 +2190,15 @@ errout:
>>>>>>> * removes index from the index block.
>>>>>>> */
>>>>>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>>> - struct ext4_ext_path *path)
>>>>>>> + struct ext4_ext_path *path, int depth)
>>>>>>> {
>>>>>>> int err;
>>>>>>> ext4_fsblk_t leaf;
>>>>>>> + __le32 border;
>>>>>>>
>>>>>>> /* free index block */
>>>>>>> - path--;
>>>>>>> + depth--;
>>>>>>> + path = path + depth;
>>>>>>> leaf = ext4_idx_pblock(path->p_idx);
>>>>>>> if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>>>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>>>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>>>
>>>>>>> ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>>>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>>>>>> +
>>>>>>> + border = path->p_idx->ei_block;
>>>>>>> + while (--depth >= 0) {
>>>>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>>>>> + break;
>>>>>>> + path--;
>>>>>>> + err = ext4_ext_get_access(handle, inode, path);
>>>>>>> + if (err)
>>>>>>> + break;
>>>>>>> + path->p_idx->ei_block = border;
>>>>>>> + err = ext4_ext_dirty(handle, inode, path);
>>>>>>> + if (err)
>>>>>>> + break;
>>>>>>> + }
>>>>>>> return err;
>>>>>>> }
>>>>>>>
>>>>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>>>> /* if this leaf is free, then we should
>>>>>>> * remove it from index block above */
>>>>>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>>>>> - err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>>>>> + err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>>>>>
>>>>>>> out:
>>>>>>> return err;
>>>>>>> @@ -2760,7 +2776,7 @@ again:
>>>>>>> /* index is empty, remove it;
>>>>>>> * handle must be already prepared by the
>>>>>>> * truncatei_leaf() */
>>>>>>> - err = ext4_ext_rm_idx(handle, inode, path + i);
>>>>>>> + err = ext4_ext_rm_idx(handle, inode, path, i);
>>>>>>> }
>>>>>>> /* root level has p_bh == NULL, brelse() eats this */
>>>>>>> brelse(path[i].p_bh);
>>>>>>> --
>>>>>>> 1.7.5.4
>>>>>>>
>>>>> --
>>>>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>>>>> the body of a message to [email protected]
>>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html

2012-12-06 15:45:33

by Eric Sandeen

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

On 12/4/12 6:11 AM, Forrest Liu wrote:
> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
> of extent tree is greater than 1.

This is interesting; we had 2 reports of similar corruption on the
list, I wonder if the application in question was doing hole punching.
I didn't expect that they were, so TBH I was pretty much ignoring
the hole-punch cases for parent index updates. Hm. I'll have
to look into that.

Could you turn your testcase into an xfstest regression test?

-Eric

> Signed-off-by: Forrest Liu <[email protected]>
> ---
> fs/ext4/extents.c | 24 ++++++++++++++++++++----
> 1 files changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index d3dd618..b10b8c0 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2190,13 +2190,15 @@ errout:
> * removes index from the index block.
> */
> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
> - struct ext4_ext_path *path)
> + struct ext4_ext_path *path, int depth)
> {
> int err;
> ext4_fsblk_t leaf;
> + __le32 border;
>
> /* free index block */
> - path--;
> + depth--;
> + path = path + depth;
> leaf = ext4_idx_pblock(path->p_idx);
> if (unlikely(path->p_hdr->eh_entries == 0)) {
> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>
> ext4_free_blocks(handle, inode, NULL, leaf, 1,
> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
> +
> + border = path->p_idx->ei_block;
> + while (--depth >= 0) {
> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
> + break;
> + path--;
> + err = ext4_ext_get_access(handle, inode, path);
> + if (err)
> + break;
> + path->p_idx->ei_block = border;
> + err = ext4_ext_dirty(handle, inode, path);
> + if (err)
> + break;
> + }
> return err;
> }
>
> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
> /* if this leaf is free, then we should
> * remove it from index block above */
> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
> - err = ext4_ext_rm_idx(handle, inode, path + depth);
> + err = ext4_ext_rm_idx(handle, inode, path, depth);
>
> out:
> return err;
> @@ -2760,7 +2776,7 @@ again:
> /* index is empty, remove it;
> * handle must be already prepared by the
> * truncatei_leaf() */
> - err = ext4_ext_rm_idx(handle, inode, path + i);
> + err = ext4_ext_rm_idx(handle, inode, path, i);
> }
> /* root level has p_bh == NULL, brelse() eats this */
> brelse(path[i].p_bh);
>


2012-12-06 15:48:09

by Eric Sandeen

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

On 12/6/12 9:45 AM, Eric Sandeen wrote:
> On 12/4/12 6:11 AM, Forrest Liu wrote:
>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>> of extent tree is greater than 1.
>
> This is interesting; we had 2 reports of similar corruption on the
> list, I wonder if the application in question was doing hole punching.
> I didn't expect that they were, so TBH I was pretty much ignoring
> the hole-punch cases for parent index updates. Hm. I'll have
> to look into that.
>
> Could you turn your testcase into an xfstest regression test?

Also, please note that I sent an e2fsck patch to try to fix this
problem after the fact; it'd be great if in your testing, you could
also confirm that e2fsck w/ my patch fixes it correctly.

Thanks,
-Eric

> -Eric
>
>> Signed-off-by: Forrest Liu <[email protected]>
>> ---
>> fs/ext4/extents.c | 24 ++++++++++++++++++++----
>> 1 files changed, 20 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index d3dd618..b10b8c0 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -2190,13 +2190,15 @@ errout:
>> * removes index from the index block.
>> */
>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>> - struct ext4_ext_path *path)
>> + struct ext4_ext_path *path, int depth)
>> {
>> int err;
>> ext4_fsblk_t leaf;
>> + __le32 border;
>>
>> /* free index block */
>> - path--;
>> + depth--;
>> + path = path + depth;
>> leaf = ext4_idx_pblock(path->p_idx);
>> if (unlikely(path->p_hdr->eh_entries == 0)) {
>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>
>> ext4_free_blocks(handle, inode, NULL, leaf, 1,
>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>> +
>> + border = path->p_idx->ei_block;
>> + while (--depth >= 0) {
>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>> + break;
>> + path--;
>> + err = ext4_ext_get_access(handle, inode, path);
>> + if (err)
>> + break;
>> + path->p_idx->ei_block = border;
>> + err = ext4_ext_dirty(handle, inode, path);
>> + if (err)
>> + break;
>> + }
>> return err;
>> }
>>
>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>> /* if this leaf is free, then we should
>> * remove it from index block above */
>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>> - err = ext4_ext_rm_idx(handle, inode, path + depth);
>> + err = ext4_ext_rm_idx(handle, inode, path, depth);
>>
>> out:
>> return err;
>> @@ -2760,7 +2776,7 @@ again:
>> /* index is empty, remove it;
>> * handle must be already prepared by the
>> * truncatei_leaf() */
>> - err = ext4_ext_rm_idx(handle, inode, path + i);
>> + err = ext4_ext_rm_idx(handle, inode, path, i);
>> }
>> /* root level has p_bh == NULL, brelse() eats this */
>> brelse(path[i].p_bh);
>>
>


2012-12-07 05:11:47

by Ashish Sangwan

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

On Thu, Dec 6, 2012 at 8:06 PM, Forrest Liu <[email protected]> wrote:
> Hi Ashish,
>
> There have a chance to do a trivial update, and this case will be
> covered in ext4_ext_remove_space.
>
> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
> err = ext4_ext_rm_idx(handle, inode, path + depth);
> + /* If this was the first extent index in node,
> + * propagates the ei_block updation info to top */
> + if(!err) {
> + --depth;
> + path = path + depth;
> + while (--depth >= 0) {
> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
> + break;
> + path--;
> + err = ext4_ext_get_access(handle, inode, path);
> + if (err)
> + break;
>
> trivial update, when (path + 1)->p_idx->eh_entries == 0
>
> + path->p_idx->ei_block =(path +
> 1)->p_idx->ei_block ;
> + err = ext4_ext_dirty(handle, inode, path);
> + if (err)
> + break;
> + }
> + }
> + }
> +
>
> I will trying to reproduce the problem tomorrow.
I think it will work fine atleast till depth = 2.
It would be great if you could check with depth > 2.
If still having this problem, we can think more.

>
> Thanks,
> Forrest
>
> 2012/12/6 Ashish Sangwan <[email protected]>:
>> On Thu, Dec 6, 2012 at 5:05 PM, Forrest Liu <[email protected]> wrote:
>>> Hi Ashish,
>>> Thanks for your review and fixed hole punch bugs.
>>>
>>> We can't propogate ei_block only in ext4_ext_rm_leave.
>>>
>>> There have two cases to call ext4_ext_rm_idx
>>> 1) extent block has no entry, ext4_ext_rm_leave calls
>>> ext4_ext_rm_idx to release extent block.
>>>
>>
>> 1.1) And when the extent block is removed, the corresponding index
>> entry has also to be removed. And if this was the first index entry,
>> all the other index entries, are also updated. This updation will
>> continue till we reach the root of extent tree OR the currently
>> updated index entry is not the first entry in the node.
>>
>>> 2) extent index block has no entry, ext4_ext_remove_space
>>> calls ext4_ext_rm_idx to release extent index block.
>>>
>>> Consider remove third level of extent index block that indexed by
>>> second level of extent index, and the extent index in second level
>>> is first entry in block.
>>>
>> So, I think the above case would be handled by point (1.1) i.e. the
>> earlier index updation done from within ext4_ext_rm_leaf.
>> The earlier ei_block updation will reach to the top of tree.
>>
>>> In this case, ei_block will not propogate to top level correctly,
>>> if we only do propogation in ext4_ext_rm_leave.
>>
>> And thanks for pointing the problem. Can you reproduce it with my code
>> suggesstion even with depth 3?
>> If no, than please resend the patch with suggested changes.
>> The intent here is to solve the problem without changing any function
>> signature and with minimal changes.
>>
>> Regards,
>> Ashish
>>>
>>> Regards,
>>> Forrest
>>>
>>> 2012/12/5 Ashish Sangwan <[email protected]>:
>>>> Sorry 1 typo.
>>>> It should be
>>>> + path->p_idx->ei_block =(path + 1)->p_idx->ei_block ;
>>>> instead of
>>>> + path->p_idx->ei_block = border;
>>>>
>>>> On Wed, Dec 5, 2012 at 1:23 PM, Ashish Sangwan <[email protected]> wrote:
>>>>> I have tested it and the problem is real.
>>>>> It happens when depth => 2 and punch hole completely removes any of
>>>>> the first extent indexes at depth 1.
>>>>> The change in ei_block is not propagated backwards to index at depth 0.
>>>>>
>>>>> However, AFAICS, the problematic call to ext4_ext_rm_idx is from
>>>>> ext4_ext_rm_leaf and there is no need to change in
>>>>> ext4_ext_remove_space or in ext4_ext_rm_idx.
>>>>>
>>>>> Forrest, what do you think about below change ?
>>>>> basically its your code, but its been added to ext4_ext_rm_leaf after
>>>>> removing index.
>>>>>
>>>>> @@ -2448,8 +2464,28 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>>
>>>>> /* if this leaf is free, then we should
>>>>> * remove it from index block above */
>>>>> - if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>>> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>>>>> err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>>> + /* If this was the first extent index in node,
>>>>> + * propagates the ei_block updation info to top */
>>>>> + if(!err) {
>>>>> + --depth;
>>>>> + path = path + depth;
>>>>> + while (--depth >= 0) {
>>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>>> + break;
>>>>> + path--;
>>>>> + err = ext4_ext_get_access(handle, inode, path);
>>>>> + if (err)
>>>>> + break;
>>>>> + path->p_idx->ei_block = border;
>>>>> + err = ext4_ext_dirty(handle, inode, path);
>>>>> + if (err)
>>>>> + break;
>>>>> + }
>>>>> + }
>>>>> + }
>>>>> +
>>>>>
>>>>> On Wed, Dec 5, 2012 at 11:43 AM, Forrest Liu <[email protected]> wrote:
>>>>>> 2012/12/4 forrest <[email protected]>:
>>>>>>> This problem is easily to reproduce
>>>>>>>
>>>>>>> Create a file with size larger than 1GB.
>>>>>>>
>>>>>>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>>>>>>
>>>>>>> Punch every even block in test_file, and then use debugfs to dump
>>>>>>> extents, followings is dumped result
>>>>>>>
>>>>>>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1
>>>>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>>>>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>>>>>>> 1/ 2 1/ 46 231201 - 231880 3901488 680
>>>>>>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1
>>>>>>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1
>>>>>>>
>>>>>>> Punch blocks #231779 ~#231201 , to remove extent index, and then use
>>>>>>> debugfs to dump extents, followings is dumped result
>>>>>>>
>>>>>>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>>>>>>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>>>>>>> -------> logical block index didn't update when remove extent index
>>>>>>> 1/ 2 1/ 45 231881 - 232560 3901490 680
>>>>>>> 2/ 2 1/340 231881 - 231881 3918281 - 3918281 1
>>>>>>> 2/ 2 2/340 231883 - 231883 3918283 - 3918283 1
>>>>>>
>>>>>> After blocks #231202~#231880 had been punch out, theses blocks can't map again.
>>>>>> If we do map a block in #231202~#231880, we will get error messages
>>>>>> like followings.
>>>>>>
>>>>>> EXT4-fs error (device md2): ext4_ext_search_left:1239: inode #13: comm
>>>>>> flush-9:2: ix (231201) != EXT_FIRST_INDEX (1) (depth 0)!
>>>>>> EXT4-fs (md2): delayed block allocation failed for inode 13 at logical
>>>>>> offset 231203 with max blocks 1 with error -5
>>>>>> EXT4-fs (md2): This should not happen!! Data will be lost
>>>>>>
>>>>>> Regards,
>>>>>> Forrest
>>>>>>
>>>>>>>
>>>>>>> 2012/12/4 Forrest Liu <[email protected]>:
>>>>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>>>>> of extent tree is greater than 1.
>>>>>>>>
>>>>>>>> Signed-off-by: Forrest Liu <[email protected]>
>>>>>>>> ---
>>>>>>>> fs/ext4/extents.c | 24 ++++++++++++++++++++----
>>>>>>>> 1 files changed, 20 insertions(+), 4 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>>>>>>> index d3dd618..b10b8c0 100644
>>>>>>>> --- a/fs/ext4/extents.c
>>>>>>>> +++ b/fs/ext4/extents.c
>>>>>>>> @@ -2190,13 +2190,15 @@ errout:
>>>>>>>> * removes index from the index block.
>>>>>>>> */
>>>>>>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>>>> - struct ext4_ext_path *path)
>>>>>>>> + struct ext4_ext_path *path, int depth)
>>>>>>>> {
>>>>>>>> int err;
>>>>>>>> ext4_fsblk_t leaf;
>>>>>>>> + __le32 border;
>>>>>>>>
>>>>>>>> /* free index block */
>>>>>>>> - path--;
>>>>>>>> + depth--;
>>>>>>>> + path = path + depth;
>>>>>>>> leaf = ext4_idx_pblock(path->p_idx);
>>>>>>>> if (unlikely(path->p_hdr->eh_entries == 0)) {
>>>>>>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>>>>>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>>>>>>
>>>>>>>> ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>>>>>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>>>>>>> +
>>>>>>>> + border = path->p_idx->ei_block;
>>>>>>>> + while (--depth >= 0) {
>>>>>>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>>>>>>> + break;
>>>>>>>> + path--;
>>>>>>>> + err = ext4_ext_get_access(handle, inode, path);
>>>>>>>> + if (err)
>>>>>>>> + break;
>>>>>>>> + path->p_idx->ei_block = border;
>>>>>>>> + err = ext4_ext_dirty(handle, inode, path);
>>>>>>>> + if (err)
>>>>>>>> + break;
>>>>>>>> + }
>>>>>>>> return err;
>>>>>>>> }
>>>>>>>>
>>>>>>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>>>>>>> /* if this leaf is free, then we should
>>>>>>>> * remove it from index block above */
>>>>>>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>>>>>>> - err = ext4_ext_rm_idx(handle, inode, path + depth);
>>>>>>>> + err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>>>>>>
>>>>>>>> out:
>>>>>>>> return err;
>>>>>>>> @@ -2760,7 +2776,7 @@ again:
>>>>>>>> /* index is empty, remove it;
>>>>>>>> * handle must be already prepared by the
>>>>>>>> * truncatei_leaf() */
>>>>>>>> - err = ext4_ext_rm_idx(handle, inode, path + i);
>>>>>>>> + err = ext4_ext_rm_idx(handle, inode, path, i);
>>>>>>>> }
>>>>>>>> /* root level has p_bh == NULL, brelse() eats this */
>>>>>>>> brelse(path[i].p_bh);
>>>>>>>> --
>>>>>>>> 1.7.5.4
>>>>>>>>
>>>>>> --
>>>>>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>>>>>> the body of a message to [email protected]
>>>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html

2012-12-07 05:53:13

by Ashish Sangwan

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <[email protected]> wrote:
> On 12/6/12 9:45 AM, Eric Sandeen wrote:
>> On 12/4/12 6:11 AM, Forrest Liu wrote:
>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>> of extent tree is greater than 1.
>>
>> This is interesting; we had 2 reports of similar corruption on the
>> list, I wonder if the application in question was doing hole punching.
>> I didn't expect that they were, so TBH I was pretty much ignoring
>> the hole-punch cases for parent index updates. Hm. I'll have
>> to look into that.
>>
>> Could you turn your testcase into an xfstest regression test?
>
> Also, please note that I sent an e2fsck patch to try to fix this
> problem after the fact; it'd be great if in your testing, you could
> also confirm that e2fsck w/ my patch fixes it correctly.
>
I checked you patch.
This was the extent tree situation after removing 1st extent index:
debugfs: ex abc
Level Entries Logical Physical Length Flags
0/ 2 1/ 1 0 - 8399 32857 8400
1/ 2 1/ 4 2048 - 4081 4138 2034
2/ 2 1/339 2048 - 2053 69632 - 69637 6
2/ 2 2/339 2054 - 2059 69656 - 69661 6

E2fsck's output with your patch=>
Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f
e2fsck 1.42.6.1 (06-DEC-2012)
Pass 1: Checking inodes, blocks, and sizes
Interior extent node level 0 of inode 31:
Logical start 0 does not match logical start 2048 at next level. Fix<y>? yes
Inode 31, i_blocks is 50856, should be 16280. Fix<y>? yes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 4: Checking reference counts
Pass 5: Checking group summary information
Block bitmap differences: -4138 -(73712--73717) -73728
-(98342--98347) -(98354--98359) -(98366--98371) -(98378--98383)
-(98390--98395) -(98402--98407)
< Similarly this was huge list of around 50-60 lines, so I skip to last >
910--106915) -(106922--106927) -(106934--106939) -(106946--106951)
-(106958--106963) -(122872--122879)
Fix<y>? yes
Free blocks count wrong for group #0 (14308, counted=14309).
Fix<y>? yes
Free blocks count wrong for group #2 (12297, counted=12304).
Fix<y>? yes
Free blocks count wrong for group #3 (9770, counted=14084).
Fix<y>? yes
Free blocks count wrong (52407, counted=56729).
Fix<y>? yes
/dev/sda1: ***** FILE SYSTEM WAS MODIFIED *****
/dev/sda1: 9872/126592 files (0.1% non-contiguous), 69775/126504 blocks
Linux#>

> Thanks,
> -Eric
>
>> -Eric
>>
>>> Signed-off-by: Forrest Liu <[email protected]>
>>> ---
>>> fs/ext4/extents.c | 24 ++++++++++++++++++++----
>>> 1 files changed, 20 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>>> index d3dd618..b10b8c0 100644
>>> --- a/fs/ext4/extents.c
>>> +++ b/fs/ext4/extents.c
>>> @@ -2190,13 +2190,15 @@ errout:
>>> * removes index from the index block.
>>> */
>>> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>> - struct ext4_ext_path *path)
>>> + struct ext4_ext_path *path, int depth)
>>> {
>>> int err;
>>> ext4_fsblk_t leaf;
>>> + __le32 border;
>>>
>>> /* free index block */
>>> - path--;
>>> + depth--;
>>> + path = path + depth;
>>> leaf = ext4_idx_pblock(path->p_idx);
>>> if (unlikely(path->p_hdr->eh_entries == 0)) {
>>> EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
>>> @@ -2221,6 +2223,20 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
>>>
>>> ext4_free_blocks(handle, inode, NULL, leaf, 1,
>>> EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
>>> +
>>> + border = path->p_idx->ei_block;
>>> + while (--depth >= 0) {
>>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>>> + break;
>>> + path--;
>>> + err = ext4_ext_get_access(handle, inode, path);
>>> + if (err)
>>> + break;
>>> + path->p_idx->ei_block = border;
>>> + err = ext4_ext_dirty(handle, inode, path);
>>> + if (err)
>>> + break;
>>> + }
>>> return err;
>>> }
>>>
>>> @@ -2557,7 +2573,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>>> /* if this leaf is free, then we should
>>> * remove it from index block above */
>>> if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
>>> - err = ext4_ext_rm_idx(handle, inode, path + depth);
>>> + err = ext4_ext_rm_idx(handle, inode, path, depth);
>>>
>>> out:
>>> return err;
>>> @@ -2760,7 +2776,7 @@ again:
>>> /* index is empty, remove it;
>>> * handle must be already prepared by the
>>> * truncatei_leaf() */
>>> - err = ext4_ext_rm_idx(handle, inode, path + i);
>>> + err = ext4_ext_rm_idx(handle, inode, path, i);
>>> }
>>> /* root level has p_bh == NULL, brelse() eats this */
>>> brelse(path[i].p_bh);
>>>
>>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2012-12-07 12:14:00

by Forrest Liu

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

2012/12/7 Ashish Sangwan <[email protected]>:
> On Thu, Dec 6, 2012 at 8:06 PM, Forrest Liu <[email protected]> wrote:
>> Hi Ashish,
>>
>> There have a chance to do a trivial update, and this case will be
>> covered in ext4_ext_remove_space.
>>
>> + if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) {
>> err = ext4_ext_rm_idx(handle, inode, path + depth);
>> + /* If this was the first extent index in node,
>> + * propagates the ei_block updation info to top */
>> + if(!err) {
>> + --depth;
>> + path = path + depth;
>> + while (--depth >= 0) {
>> + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
>> + break;
>> + path--;
>> + err = ext4_ext_get_access(handle, inode, path);
>> + if (err)
>> + break;
>>
>> trivial update, when (path + 1)->p_idx->eh_entries == 0
>>
>> + path->p_idx->ei_block =(path +
>> 1)->p_idx->ei_block ;
>> + err = ext4_ext_dirty(handle, inode, path);
>> + if (err)
>> + break;
>> + }
>> + }
>> + }
>> +
>>
>> I will trying to reproduce the problem tomorrow.
> I think it will work fine atleast till depth = 2.
> It would be great if you could check with depth > 2.
> If still having this problem, we can think more.

Hi Ashish,

First, create a file lager than 4GB, and then punch out every even blocks;
Use ex command in debugfs to dump extents, belowing is the result.

Level Entries Logical Physical Length Flags
0/ 3 1/ 1 1 - 1048575 40013 1048575
1/ 3 1/ 5 1 - 231160 40012 231160
2/ 3 1/340 1 - 680 40011 680
3/ 3 1/340 1 - 1 2228225 - 2228225 1
3/ 3 2/340 3 - 3 2228227 - 2228227 1
3/ 3 3/340 5 - 5 2228229 - 2228229 1


punch out 231159, 231157, ..., 1..


Level Entries Logical Physical Length Flags
0/ 3 1/ 1 1 - 1048575 40013 1048575
1/ 3 1/ 4 231161 - 462320 2457604 231160
2/ 3 1/340 231161 - 231840 2457606 680
3/ 3 1/340 231161 - 231161 2459385 - 2459385 1


Then, we get incorrect logical index, if only do correction in ext4_ext_rm_leaf.


Thanks,
Forrest

2012-12-07 12:26:05

by Forrest Liu

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

2012/12/7 Ashish Sangwan <[email protected]>:
> On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <[email protected]> wrote:
>> On 12/6/12 9:45 AM, Eric Sandeen wrote:
>>> On 12/4/12 6:11 AM, Forrest Liu wrote:
>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>> of extent tree is greater than 1.
>>>
>>> This is interesting; we had 2 reports of similar corruption on the
>>> list, I wonder if the application in question was doing hole punching.
>>> I didn't expect that they were, so TBH I was pretty much ignoring
>>> the hole-punch cases for parent index updates. Hm. I'll have
>>> to look into that.
>>> Could you turn your testcase into an xfstest regression test?

Hi Eric,
I will check how to do that.
>>
>> Also, please note that I sent an e2fsck patch to try to fix this
>> problem after the fact; it'd be great if in your testing, you could
>> also confirm that e2fsck w/ my patch fixes it correctly.
>>
> I checked you patch.
> This was the extent tree situation after removing 1st extent index:
> debugfs: ex abc
> Level Entries Logical Physical Length Flags
> 0/ 2 1/ 1 0 - 8399 32857 8400
> 1/ 2 1/ 4 2048 - 4081 4138 2034
> 2/ 2 1/339 2048 - 2053 69632 - 69637 6
> 2/ 2 2/339 2054 - 2059 69656 - 69661 6
>
> E2fsck's output with your patch=>
> Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f
> e2fsck 1.42.6.1 (06-DEC-2012)
> Pass 1: Checking inodes, blocks, and sizes
> Interior extent node level 0 of inode 31:
> Logical start 0 does not match logical start 2048 at next level. Fix<y>? yes
> Inode 31, i_blocks is 50856, should be 16280. Fix<y>? yes

I got similar result, pb->num_blocks is incorrect if
ext2fs_extent_fix_parents called.

> Pass 2: Checking directory structure
> Pass 3: Checking directory connectivity
> Pass 4: Checking reference counts
> Pass 5: Checking group summary information
> Block bitmap differences: -4138 -(73712--73717) -73728
> -(98342--98347) -(98354--98359) -(98366--98371) -(98378--98383)
> -(98390--98395) -(98402--98407)
> < Similarly this was huge list of around 50-60 lines, so I skip to last >
> 910--106915) -(106922--106927) -(106934--106939) -(106946--106951)
> -(106958--106963) -(122872--122879)
> Fix<y>? yes
> Free blocks count wrong for group #0 (14308, counted=14309).
> Fix<y>? yes
> Free blocks count wrong for group #2 (12297, counted=12304).
> Fix<y>? yes
> Free blocks count wrong for group #3 (9770, counted=14084).
> Fix<y>? yes
> Free blocks count wrong (52407, counted=56729).
> Fix<y>? yes
> /dev/sda1: ***** FILE SYSTEM WAS MODIFIED *****
> /dev/sda1: 9872/126592 files (0.1% non-contiguous), 69775/126504 blocks
> Linux#>
>
>> Thanks,
>> -Eric
>>
>>> -Eric
>>>

2012-12-07 22:17:19

by Eric Sandeen

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

On 12/7/12 6:26 AM, Forrest Liu wrote:
> 2012/12/7 Ashish Sangwan <[email protected]>:
>> On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <[email protected]> wrote:
>>> On 12/6/12 9:45 AM, Eric Sandeen wrote:
>>>> On 12/4/12 6:11 AM, Forrest Liu wrote:
>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>> of extent tree is greater than 1.
>>>>
>>>> This is interesting; we had 2 reports of similar corruption on the
>>>> list, I wonder if the application in question was doing hole punching.
>>>> I didn't expect that they were, so TBH I was pretty much ignoring
>>>> the hole-punch cases for parent index updates. Hm. I'll have
>>>> to look into that.
>>>> Could you turn your testcase into an xfstest regression test?
>
> Hi Eric,
> I will check how to do that.
>>>
>>> Also, please note that I sent an e2fsck patch to try to fix this
>>> problem after the fact; it'd be great if in your testing, you could
>>> also confirm that e2fsck w/ my patch fixes it correctly.
>>>
>> I checked you patch.
>> This was the extent tree situation after removing 1st extent index:
>> debugfs: ex abc
>> Level Entries Logical Physical Length Flags
>> 0/ 2 1/ 1 0 - 8399 32857 8400
>> 1/ 2 1/ 4 2048 - 4081 4138 2034
>> 2/ 2 1/339 2048 - 2053 69632 - 69637 6
>> 2/ 2 2/339 2054 - 2059 69656 - 69661 6
>>
>> E2fsck's output with your patch=>
>> Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f
>> e2fsck 1.42.6.1 (06-DEC-2012)
>> Pass 1: Checking inodes, blocks, and sizes
>> Interior extent node level 0 of inode 31:
>> Logical start 0 does not match logical start 2048 at next level. Fix<y>? yes
>> Inode 31, i_blocks is 50856, should be 16280. Fix<y>? yes
>
> I got similar result, pb->num_blocks is incorrect if
> ext2fs_extent_fix_parents called.

Maybe I should ask what it looks like without my patch? I didn't
think my patch would change anything at all about block count
or use.

-Eric


2012-12-07 22:29:27

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

Hi Forrest,

Can you share an updated version of your patch (with Ashish's
modification if you've tried testing that)? And also can you send us
some automated way to create your test case? That might make it
easier for us all to test the same thing and all be on the same page.

I assume you have some C program which which is doing all of the
punching?

I started trying to create something via a script like this:

touch testfile
fallocate -l 1G testfile
for i in $(seq 0 8192 1073741824) ; do fallocate -p -o $i -l 4k testfile ; done

.. where fallocate is from contrib/fallocate.c in the e2fsprogs
sources (not the fallocate from util-linux) with the attached patch to
add support for the -p option to do the pnuching.... but then I
decided it might be better if we all standardized on the same test
case. Ideally, if the test case is small enough, we can put it in the
commit message, or at very least we should look into getting it into
xfstests, and Eric has suggested.

Cheers,

- Ted

commit 2658b9616fe63d2fd9e2654676877ab014639a92
Author: Theodore Ts'o <[email protected]>
Date: Thu Dec 6 11:21:44 2012 -0500

contrib/fallocate: add support for punch functionality

Also fix the -o option so it works correctly (instead of core
dumping).

Signed-off-by: "Theodore Ts'o" <[email protected]>

diff --git a/contrib/fallocate.c b/contrib/fallocate.c
index 0e8319f..1436b70 100644
--- a/contrib/fallocate.c
+++ b/contrib/fallocate.c
@@ -35,6 +35,7 @@

// #include <linux/falloc.h>
#define FALLOC_FL_KEEP_SIZE 0x01
+#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */

void usage(void)
{
@@ -94,12 +95,17 @@ int main(int argc, char **argv)
int error;
int tflag = 0;

- while ((opt = getopt(argc, argv, "nl:ot")) != -1) {
+ while ((opt = getopt(argc, argv, "npl:o:t")) != -1) {
switch(opt) {
case 'n':
/* do not change filesize */
falloc_mode = FALLOC_FL_KEEP_SIZE;
break;
+ case 'p':
+ /* punch mode */
+ falloc_mode = (FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_KEEP_SIZE);
+ break;
case 'l':
length = cvtnum(optarg);
break;

2012-12-08 17:31:05

by Eric Sandeen

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

On 12/4/12 6:29 AM, forrest wrote:
> This problem is easily to reproduce
>
> Create a file with size larger than 1GB.
>
> dd if=/dev/zero of=/test_file bs=1M count=1024
>
> Punch every even block in test_file, and then use debugfs to dump
> extents, followings is dumped result
>
> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1
> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
> 0/ 2 2/ 2 231201 - 262143 3901486 30943
> 1/ 2 1/ 46 231201 - 231880 3901488 680
> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1
> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1
>
> Punch blocks #231779 ~#231201 ,

Can you explain what you mean by the above? Which blocks get punched?

-Eric

> to remove extent index, and then use
> debugfs to dump extents, followings is dumped result



2012-12-08 18:56:56

by Forrest Liu

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

2012/12/9 Eric Sandeen <[email protected]>:
> On 12/4/12 6:29 AM, forrest wrote:
>> This problem is easily to reproduce
>>
>> Create a file with size larger than 1GB.
>>
>> dd if=/dev/zero of=/test_file bs=1M count=1024
>>
>> Punch every even block in test_file, and then use debugfs to dump
>> extents, followings is dumped result
>>
>> 2/ 2 339/340 231197 - 231197 3917597 - 3917597 1
>> 2/ 2 340/340 231199 - 231199 3917599 - 3917599 1
>> 0/ 2 2/ 2 231201 - 262143 3901486 30943
>> 1/ 2 1/ 46 231201 - 231880 3901488 680
>> 2/ 2 1/340 231201 - 231201 3917601 - 3917601 1
>> 2/ 2 2/340 231203 - 231203 3917603 - 3917603 1
>>
>> Punch blocks #231779 ~#231201 ,
>
> Can you explain what you mean by the above? Which blocks get punched?

Hi Eric,

At first, i found blowing code may have some problem

ext4_ext_rm_leaf ()
{
...
/////when eh->eh_entries equals to zero, logical start value
will not be update;
//// If parent is first index in block, this cause problem.
if (correct_index && eh->eh_entries)
err = ext4_ext_correct_indexes(handle, inode, path);
}

To prove that, I tried to trigger this case (eh->eh_entries = 0)

1) create a file which size greater than 1GB

2) punch all even blocks 0, 2, 4, 6, 8, ....... (blocks depends on
file size) one by one to make extent tree with depth equals to 2

3) use debugfs to find an extent index entry at level1 (assume leaf at
level2) witch logical start value is equals to is't parent node.

In example, founded entry is
1/ 2 1/ 46 231201 - 231880 3901488 680

4 ) punch blocks covered by extent index found by step3 one by one.
In example,
231879, 231877, ..., 231201..

5 ) Then, we can use debugfs to check if logical start value is correct or not

In example, 231201 is not equals to 231881.

0/ 2 2/ 2 231201 - 262143 3901486 30943
1/ 2 1/ 45 231881 - 232560 3901490 680

Thanks,
-Forrest

>
> -Eric
>
>> to remove extent index, and then use
>> debugfs to dump extents, followings is dumped result
>
>

2012-12-08 19:01:09

by Forrest Liu

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

2012/12/8 Eric Sandeen <[email protected]>:
> On 12/7/12 6:26 AM, Forrest Liu wrote:
>> 2012/12/7 Ashish Sangwan <[email protected]>:
>>> On Thu, Dec 6, 2012 at 9:18 PM, Eric Sandeen <[email protected]> wrote:
>>>> On 12/6/12 9:45 AM, Eric Sandeen wrote:
>>>>> On 12/4/12 6:11 AM, Forrest Liu wrote:
>>>>>> Extent indexes didn't update correctly in ext4_ext_rm_idx, when depth
>>>>>> of extent tree is greater than 1.
>>>>>
>>>>> This is interesting; we had 2 reports of similar corruption on the
>>>>> list, I wonder if the application in question was doing hole punching.
>>>>> I didn't expect that they were, so TBH I was pretty much ignoring
>>>>> the hole-punch cases for parent index updates. Hm. I'll have
>>>>> to look into that.
>>>>> Could you turn your testcase into an xfstest regression test?
>>
>> Hi Eric,
>> I will check how to do that.
>>>>
>>>> Also, please note that I sent an e2fsck patch to try to fix this
>>>> problem after the fact; it'd be great if in your testing, you could
>>>> also confirm that e2fsck w/ my patch fixes it correctly.
>>>>
>>> I checked you patch.
>>> This was the extent tree situation after removing 1st extent index:
>>> debugfs: ex abc
>>> Level Entries Logical Physical Length Flags
>>> 0/ 2 1/ 1 0 - 8399 32857 8400
>>> 1/ 2 1/ 4 2048 - 4081 4138 2034
>>> 2/ 2 1/339 2048 - 2053 69632 - 69637 6
>>> 2/ 2 2/339 2054 - 2059 69656 - 69661 6
>>>
>>> E2fsck's output with your patch=>
>>> Linux#> /dtv/usb/sdb1/e2fsck /dev/sda1 -f
>>> e2fsck 1.42.6.1 (06-DEC-2012)
>>> Pass 1: Checking inodes, blocks, and sizes
>>> Interior extent node level 0 of inode 31:
>>> Logical start 0 does not match logical start 2048 at next level. Fix<y>? yes
>>> Inode 31, i_blocks is 50856, should be 16280. Fix<y>? yes
>>
>> I got similar result, pb->num_blocks is incorrect if
>> ext2fs_extent_fix_parents called.
>
> Maybe I should ask what it looks like without my patch? I didn't
> think my patch would change anything at all about block count
> or use.
>
> -Eric
>
When i choose not to repair problem of logical start value, there have
no message like

Inode 31, i_blocks is 50856, should be 16280. Fix<y>?

-Forrest

2012-12-08 19:30:00

by Forrest Liu

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

Hi Ted,
I have test patch with Ashish's modification, and that have
problem when depth of extent tree is greater than 2.

I will make a test case with your patch.

Thanks,
Forrest


2012/12/8 Theodore Ts'o <[email protected]>:
> Hi Forrest,
>
> Can you share an updated version of your patch (with Ashish's
> modification if you've tried testing that)? And also can you send us
> some automated way to create your test case? That might make it
> easier for us all to test the same thing and all be on the same page.
>
> I assume you have some C program which which is doing all of the
> punching?
>
> I started trying to create something via a script like this:
>
> touch testfile
> fallocate -l 1G testfile
> for i in $(seq 0 8192 1073741824) ; do fallocate -p -o $i -l 4k testfile ; done
>
> .. where fallocate is from contrib/fallocate.c in the e2fsprogs
> sources (not the fallocate from util-linux) with the attached patch to
> add support for the -p option to do the pnuching.... but then I
> decided it might be better if we all standardized on the same test
> case. Ideally, if the test case is small enough, we can put it in the
> commit message, or at very least we should look into getting it into
> xfstests, and Eric has suggested.
>
> Cheers,
>
> - Ted
>
> commit 2658b9616fe63d2fd9e2654676877ab014639a92
> Author: Theodore Ts'o <[email protected]>
> Date: Thu Dec 6 11:21:44 2012 -0500
>
> contrib/fallocate: add support for punch functionality
>
> Also fix the -o option so it works correctly (instead of core
> dumping).
>
> Signed-off-by: "Theodore Ts'o" <[email protected]>
>
> diff --git a/contrib/fallocate.c b/contrib/fallocate.c
> index 0e8319f..1436b70 100644
> --- a/contrib/fallocate.c
> +++ b/contrib/fallocate.c
> @@ -35,6 +35,7 @@
>
> // #include <linux/falloc.h>
> #define FALLOC_FL_KEEP_SIZE 0x01
> +#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
>
> void usage(void)
> {
> @@ -94,12 +95,17 @@ int main(int argc, char **argv)
> int error;
> int tflag = 0;
>
> - while ((opt = getopt(argc, argv, "nl:ot")) != -1) {
> + while ((opt = getopt(argc, argv, "npl:o:t")) != -1) {
> switch(opt) {
> case 'n':
> /* do not change filesize */
> falloc_mode = FALLOC_FL_KEEP_SIZE;
> break;
> + case 'p':
> + /* punch mode */
> + falloc_mode = (FALLOC_FL_PUNCH_HOLE |
> + FALLOC_FL_KEEP_SIZE);
> + break;
> case 'l':
> length = cvtnum(optarg);
> break;

2012-12-10 06:22:40

by Ashish Sangwan

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix extent tree corruption that incurred by hole punch

> Hi Ashish,
>
> First, create a file lager than 4GB, and then punch out every even blocks;
> Use ex command in debugfs to dump extents, belowing is the result.
>
> Level Entries Logical Physical Length Flags
> 0/ 3 1/ 1 1 - 1048575 40013 1048575
> 1/ 3 1/ 5 1 - 231160 40012 231160
> 2/ 3 1/340 1 - 680 40011 680
> 3/ 3 1/340 1 - 1 2228225 - 2228225 1
> 3/ 3 2/340 3 - 3 2228227 - 2228227 1
> 3/ 3 3/340 5 - 5 2228229 - 2228229 1
>
>
> punch out 231159, 231157, ..., 1..
>
>
> Level Entries Logical Physical Length Flags
> 0/ 3 1/ 1 1 - 1048575 40013 1048575
> 1/ 3 1/ 4 231161 - 462320 2457604 231160
> 2/ 3 1/340 231161 - 231840 2457606 680
> 3/ 3 1/340 231161 - 231161 2459385 - 2459385 1
>
>
> Then, we get incorrect logical index, if only do correction in ext4_ext_rm_leaf.

Ok, In your patch we can do without varibale border and instead use:
path->p_idx->ei_block = (path+1)->p_idx->ei_block;

Rest of the patch seems Ok.

Thanks,
Ashish
>
>
> Thanks,
> Forrest