The ext4_ext_get_blocks() and ext4_ext_insert_extent() routines do not
check for extent overlap, when a new extent needs to be inserted in an
inode. An overlap is possible when the new extent being inserted has
ee_block that is not part of any of the existing extents, but the
tail/center portion of this new extent _is_. This is possible only when
we are writing/preallocating blocks across a hole.
This problem was first sighted while stress testing (using modified
fsx-linux stress test) persistent preallocation patches that I posted
earlier. Though I am not able to reproduce this bug (extent overlap)
without the persistent preallocation patches (because a write through a
hole results in get_blocks() of a single block at a time), but I think
that it is an independant problem and should be solved with a separate
patch. Hence this patch.
Comments please. Thanks!
Signed-off-by: Amit Arora ([email protected])
---
fs/ext4/extents.c | 71 +++++++++++++++++++++++++++++++++++++---
include/linux/ext4_fs_extents.h | 1
2 files changed, 68 insertions(+), 4 deletions(-)
Index: linux-2.6.19.prealloc/fs/ext4/extents.c
===================================================================
--- linux-2.6.19.prealloc.orig/fs/ext4/extents.c 2007-01-02 14:21:57.000000000 +0530
+++ linux-2.6.19.prealloc/fs/ext4/extents.c 2007-01-02 14:22:00.000000000 +0530
@@ -1119,6 +1119,44 @@
}
/*
+ * ext4_ext_check_overlap:
+ * check if a portion of the "newext" extent overlaps with an
+ * existing extent.
+ */
+struct ext4_extent * ext4_ext_check_overlap(struct inode *inode,
+ struct ext4_extent *newext)
+{
+ struct ext4_ext_path *path;
+ struct ext4_extent *ex;
+ unsigned int depth, b1, b2, len1;
+
+ b1 = le32_to_cpu(newext->ee_block);
+ len1 = le16_to_cpu(newext->ee_len);
+ path = ext4_ext_find_extent(inode, b1, NULL);
+ if (IS_ERR(path))
+ return NULL;
+
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ if (!ex)
+ return NULL;
+
+ b2 = ext4_ext_next_allocated_block(path);
+ if (b2 == EXT_MAX_BLOCK)
+ return NULL;
+ path = ext4_ext_find_extent(inode, b2, path);
+ if (IS_ERR(path))
+ return NULL;
+ BUG_ON(path[depth].p_hdr == NULL);
+ ex = path[depth].p_ext;
+
+ if (b1 + len1 > b2)
+ return ex;
+
+ return NULL;
+}
+
+/*
* ext4_ext_insert_extent:
* tries to merge requsted extent into the existing extent or
* inserts requested extent as new one into the tree,
@@ -1129,7 +1167,7 @@
struct ext4_extent *newext)
{
struct ext4_extent_header * eh;
- struct ext4_extent *ex, *fex;
+ struct ext4_extent *ex, *fex, *rex;
struct ext4_extent *nearex; /* nearest extent */
struct ext4_ext_path *npath = NULL;
int depth, len, err, next;
@@ -1139,6 +1177,18 @@
ex = path[depth].p_ext;
BUG_ON(path[depth].p_hdr == NULL);
+ /* check for overlap */
+ rex = ext4_ext_check_overlap(inode, newext);
+ if (rex) {
+ printk(KERN_ERR "ERROR: ex=%u/%u overlaps newext=%u/%u\n",
+ le32_to_cpu(rex->ee_block),
+ le16_to_cpu(rex->ee_len),
+ le32_to_cpu(newext->ee_block),
+ le16_to_cpu(newext->ee_len));
+ ext4_ext_show_leaf(inode, path);
+ BUG();
+ }
+
/* try to insert block into found extent and return */
if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append %d block to %d:%d (from %llu)\n",
@@ -1921,7 +1971,7 @@
int create, int extend_disksize)
{
struct ext4_ext_path *path = NULL;
- struct ext4_extent newex, *ex;
+ struct ext4_extent newex, *ex, *ex2;
ext4_fsblk_t goal, newblock;
int err = 0, depth;
unsigned long allocated = 0;
@@ -1984,6 +2034,10 @@
*/
if (ee_len > EXT_MAX_LEN)
goto out2;
+
+ if (iblock < ee_block && iblock + max_blocks >= ee_block)
+ allocated = ee_block - iblock;
+
/* if found extent covers block, simply return it */
if (iblock >= ee_block && iblock < ee_block + ee_len) {
newblock = iblock - ee_block + ee_start;
@@ -2016,7 +2070,17 @@
/* allocate new block */
goal = ext4_ext_find_goal(inode, path, iblock);
- allocated = max_blocks;
+
+ /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
+ newex.ee_block = cpu_to_le32(iblock);
+ if (!allocated) {
+ newex.ee_len = cpu_to_le16(max_blocks);
+ ex2 = ext4_ext_check_overlap(inode, &newex);
+ if (ex2)
+ allocated = le32_to_cpu(ex2->ee_block) - iblock;
+ else
+ allocated = max_blocks;
+ }
newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
if (!newblock)
goto out2;
@@ -2024,7 +2088,6 @@
goal, newblock, allocated);
/* try to insert new extent into found leaf and return */
- newex.ee_block = cpu_to_le32(iblock);
ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(allocated);
err = ext4_ext_insert_extent(handle, inode, path, &newex);
Index: linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h
===================================================================
--- linux-2.6.19.prealloc.orig/include/linux/ext4_fs_extents.h 2007-01-02 14:21:57.000000000 +0530
+++ linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h 2007-01-02 14:22:00.000000000 +0530
@@ -190,6 +190,7 @@
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern struct ext4_extent * ext4_ext_check_overlap(struct inode *, struct ext4_extent *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *);
--
Regards,
Amit Arora
>>>>> Amit K Arora (AKA) writes:
AKA> The ext4_ext_get_blocks() and ext4_ext_insert_extent() routines do not
AKA> check for extent overlap, when a new extent needs to be inserted in an
AKA> inode. An overlap is possible when the new extent being inserted has
AKA> ee_block that is not part of any of the existing extents, but the
AKA> tail/center portion of this new extent _is_. This is possible only when
AKA> we are writing/preallocating blocks across a hole.
not sure I understand ... you shouldn't insert an extent that overlap
any existing extent. when you write block(s), you first check is
it already allocated and insert new extent only if it's not. for
preallocated block(s), you should adapt existing extent(s) so that
they don't overlap new extent you're inserting. am I missing something?
also, I think that modification of existing extent(s) (not merging)
isn't safe.
thanks, Alex
On Tue, Jan 02, 2007 at 12:25:21PM +0300, Alex Tomas (AT) wrote:
> >>>>> Amit K Arora (AKA) writes:
>
> AKA> The ext4_ext_get_blocks() and ext4_ext_insert_extent() routines do not
> AKA> check for extent overlap, when a new extent needs to be inserted in an
> AKA> inode. An overlap is possible when the new extent being inserted has
> AKA> ee_block that is not part of any of the existing extents, but the
> AKA> tail/center portion of this new extent _is_. This is possible only when
> AKA> we are writing/preallocating blocks across a hole.
>
> AT> not sure I understand ... you shouldn't insert an extent that overlap
> AT> any existing extent. when you write block(s), you first check is
> AT> it already allocated and insert new extent only if it's not.
You are right. That is what this patch does.
The current ext4 code is inserting an overlapped extent in a particular
scenario (explained above). The suggested patch fixes this by having a
check in get_blocks() for _not_ inserting an extent that may overlap
with an existing one.
> AT> for preallocated block(s), you should adapt existing extent(s) so that
> AT> they don't overlap new extent you're inserting. am I missing something?
The patch makes the new extent being inserted adjust its length based on any
existing extent that may overlap, so that the overlap does not happen at
all.
> AT> also, I think that modification of existing extent(s) (not merging)
> AT> isn't safe.
The existing extent(s) are not being modified in any way here. We check
if there is an overlap between the new extent being inserted by
get_blocks(), with an existing one. If there is, we update the new extent
(being inserted) accordingly. The existing extent is not touched (unless
the insert_extent() does a merge, if possible).
Please let me know if the intentions are still not clear here. Thanks!
Regards,
Amit Arora
On Tue, 2007-01-02 at 14:39 +0530, Amit K. Arora wrote:
> This problem was first sighted while stress testing (using modified
> fsx-linux stress test) persistent preallocation patches that I posted
> earlier. Though I am not able to reproduce this bug (extent overlap)
> without the persistent preallocation patches (because a write through a
> hole results in get_blocks() of a single block at a time), but I think
> that it is an independant problem and should be solved with a separate
> patch. Hence this patch.
>
Ah...without preallocation, I guess the problem still will be uncovered
when we have delayed allocation, that makes multiple block allocation
across hole possible.
> /*
> + * ext4_ext_check_overlap:
> + * check if a portion of the "newext" extent overlaps with an
> + * existing extent.
> + */
> +struct ext4_extent * ext4_ext_check_overlap(struct inode *inode,
> + struct ext4_extent *newext)
> +{
> + struct ext4_ext_path *path;
> + struct ext4_extent *ex;
> + unsigned int depth, b1, b2, len1;
> +
> + b1 = le32_to_cpu(newext->ee_block);
> + len1 = le16_to_cpu(newext->ee_len);
> + path = ext4_ext_find_extent(inode, b1, NULL);
> + if (IS_ERR(path))
> + return NULL;
> +
> + depth = ext_depth(inode);
> + ex = path[depth].p_ext;
> + if (!ex)
> + return NULL;
> +
I am confused, when we come here, isn't we confirmed that we need block
allocation, thus there is no extent start from b1?
> + b2 = ext4_ext_next_allocated_block(path);
> + if (b2 == EXT_MAX_BLOCK)
> +
> return NULL;
> + path = ext4_ext_find_extent(inode, b2, path);
> + if (IS_ERR(path))
> + return NULL;
> + BUG_ON(path[depth].p_hdr == NULL);
> + ex = path[depth].p_ext;
> +
How useful to have the next extent pointer?It seems only used to print
out warning messages. I am a little concerned about the expensive
ext4_ext_find_extent(). After all ext4_ext_next_allocated_block()
already returns the start block of next extent, isn't it?
> + if (b1 + len1 > b2)
> + return ex;
> +
> + return NULL;
> +}
> +
> +/*
> * ext4_ext_insert_extent:
> * tries to merge requsted extent into the existing extent or
> * inserts requested extent as new one into the tree,
> @@ -1129,7 +1167,7 @@
> struct ext4_extent *newext)
> {
> struct ext4_extent_header * eh;
> - struct ext4_extent *ex, *fex;
> + struct ext4_extent *ex, *fex, *rex;
> struct ext4_extent *nearex; /* nearest extent */
> struct ext4_ext_path *npath = NULL;
> int depth, len, err, next;
> @@ -1139,6 +1177,18 @@
> ex = path[depth].p_ext;
> BUG_ON(path[depth].p_hdr == NULL);
>
> + /* check for overlap */
> + rex = ext4_ext_check_overlap(inode, newext);
> + if (rex) {
> + printk(KERN_ERR "ERROR: ex=%u/%u overlaps newext=%u/%u\n",
> + le32_to_cpu(rex->ee_block),
> + le16_to_cpu(rex->ee_len),
> + le32_to_cpu(newext->ee_block),
> + le16_to_cpu(newext->ee_len));
> + ext4_ext_show_leaf(inode, path);
> + BUG();
> + }
> +
> /* try to insert block into found extent and return */
> if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
> ext_debug("append %d block to %d:%d (from %llu)\n",
> @@ -1921,7 +1971,7 @@
> int create, int extend_disksize)
> {
> struct ext4_ext_path *path = NULL;
> - struct ext4_extent newex, *ex;
> + struct ext4_extent newex, *ex, *ex2;
> ext4_fsblk_t goal, newblock;
> int err = 0, depth;
> unsigned long allocated = 0;
> @@ -1984,6 +2034,10 @@
> */
> if (ee_len > EXT_MAX_LEN)
> goto out2;
> +
> + if (iblock < ee_block && iblock + max_blocks >= ee_block)
> + allocated = ee_block - iblock;
> +
> /* if found extent covers block, simply return it */
> if (iblock >= ee_block && iblock < ee_block + ee_len) {
> newblock = iblock - ee_block + ee_start;
> @@ -2016,7 +2070,17 @@
>
> /* allocate new block */
> goal = ext4_ext_find_goal(inode, path, iblock);
> - allocated = max_blocks;
> +
> + /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
> + newex.ee_block = cpu_to_le32(iblock);
> + if (!allocated) {
> + newex.ee_len = cpu_to_le16(max_blocks);
> + ex2 = ext4_ext_check_overlap(inode, &newex);
> + if (ex2)
> + allocated = le32_to_cpu(ex2->ee_block) - iblock;
> + else
> + allocated = max_blocks;
> + }
> newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
> if (!newblock)
> goto out2;
> @@ -2024,7 +2088,6 @@
> goal, newblock, allocated);
>
> /* try to insert new extent into found leaf and return */
> - newex.ee_block = cpu_to_le32(iblock);
> ext4_ext_store_pblock(&newex, newblock);
> newex.ee_len = cpu_to_le16(allocated);
> err = ext4_ext_insert_extent(handle, inode, path, &newex);
> Index: linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h
> ===================================================================
> --- linux-2.6.19.prealloc.orig/include/linux/ext4_fs_extents.h 2007-01-02 14:21:57.000000000 +0530
> +++ linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h 2007-01-02 14:22:00.000000000 +0530
> @@ -190,6 +190,7 @@
>
> extern int ext4_extent_tree_init(handle_t *, struct inode *);
> extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
> +extern struct ext4_extent * ext4_ext_check_overlap(struct inode *, struct ext4_extent *);
> extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
> extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
> extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *);
> --
> Regards,
> Amit Arora
> -
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jan 02, 2007 at 05:35:28PM -0800, Mingming Cao wrote:
> > +struct ext4_extent * ext4_ext_check_overlap(struct inode *inode,
> > + struct ext4_extent *newext)
> > +{
> > + struct ext4_ext_path *path;
> > + struct ext4_extent *ex;
> > + unsigned int depth, b1, b2, len1;
> > +
> > + b1 = le32_to_cpu(newext->ee_block);
> > + len1 = le16_to_cpu(newext->ee_len);
> > + path = ext4_ext_find_extent(inode, b1, NULL);
> > + if (IS_ERR(path))
> > + return NULL;
> > +
> > + depth = ext_depth(inode);
> > + ex = path[depth].p_ext;
> > + if (!ex)
> > + return NULL;
> > +
>
> I am confused, when we come here, isn't we confirmed that we need block
> allocation, thus there is no extent start from b1?
Yes, we are sure here that there is no extent which covers b1 block.
Since I couldn't find a direct way to get the next extent (extent on the
right from the "would be" position of the new extent in the tree), we
make a call to ext4_ext_find_extent() to get the extent on the left, and
then use this to call ext4_ext_next_allocated_block() to get the logical
block number (LBN) of the "next" extent in the tree. This LBN is
compared with the LBN of the new extent plus its length, to detect an
overlap.
>
> > + b2 = ext4_ext_next_allocated_block(path);
> > + if (b2 == EXT_MAX_BLOCK)
> > +
> > return NULL;
> > + path = ext4_ext_find_extent(inode, b2, path);
> > + if (IS_ERR(path))
> > + return NULL;
> > + BUG_ON(path[depth].p_hdr == NULL);
> > + ex = path[depth].p_ext;
> > +
>
> How useful to have the next extent pointer?It seems only used to print
> out warning messages. I am a little concerned about the expensive
> ext4_ext_find_extent(). After all ext4_ext_next_allocated_block()
> already returns the start block of next extent, isn't it?
Ok, agreed. Will get rid of this extra code.
--
Regards,
Amit Arora
>>>>> Amit K Arora (AKA) writes:
AKA> On Tue, Jan 02, 2007 at 12:25:21PM +0300, Alex Tomas (AT) wrote:
>> >>>>> Amit K Arora (AKA) writes:
>>
AKA> The ext4_ext_get_blocks() and ext4_ext_insert_extent() routines do not
AKA> check for extent overlap, when a new extent needs to be inserted in an
AKA> inode. An overlap is possible when the new extent being inserted has
AKA> ee_block that is not part of any of the existing extents, but the
AKA> tail/center portion of this new extent _is_. This is possible only when
AKA> we are writing/preallocating blocks across a hole.
>>
AT> not sure I understand ... you shouldn't insert an extent that overlap
AT> any existing extent. when you write block(s), you first check is
AT> it already allocated and insert new extent only if it's not.
AKA> You are right. That is what this patch does.
AKA> The current ext4 code is inserting an overlapped extent in a particular
AKA> scenario (explained above). The suggested patch fixes this by having a
AKA> check in get_blocks() for _not_ inserting an extent that may overlap
AKA> with an existing one.
I think that stuff that converts uninitialized blocks
to initialized ones should be a separate codepath and
shouldn't be done in the insert path. and an insert
(basic tree manipulation) should BUG_ON() one tries
to add extent with a block which is already covered
by the tree.
IMHO, get_blocks() should look like:
path = find_path()
if (found extent covers request block(s)) {
if (extent is uninitialized) {
convert();
}
}
where
function convert()
{
/* adopt existing extent so that it
* doesn't cover requested blocks */
/* insert head or tail of existing
* extent, if necessary */
/* insert new extent of initialized blocks */
}
thanks, Alex
Alex Tomas wrote:
>>>>>>Amit K Arora (AKA) writes:
>
>
> AKA> On Tue, Jan 02, 2007 at 12:25:21PM +0300, Alex Tomas (AT) wrote:
> >> >>>>> Amit K Arora (AKA) writes:
> >>
> AKA> The ext4_ext_get_blocks() and ext4_ext_insert_extent() routines do not
> AKA> check for extent overlap, when a new extent needs to be inserted in an
> AKA> inode. An overlap is possible when the new extent being inserted has
> AKA> ee_block that is not part of any of the existing extents, but the
> AKA> tail/center portion of this new extent _is_. This is possible only when
> AKA> we are writing/preallocating blocks across a hole.
> >>
> AT> not sure I understand ... you shouldn't insert an extent that overlap
> AT> any existing extent. when you write block(s), you first check is
> AT> it already allocated and insert new extent only if it's not.
>
> AKA> You are right. That is what this patch does.
> AKA> The current ext4 code is inserting an overlapped extent in a particular
> AKA> scenario (explained above). The suggested patch fixes this by having a
> AKA> check in get_blocks() for _not_ inserting an extent that may overlap
> AKA> with an existing one.
>
> I think that stuff that converts uninitialized blocks
> to initialized ones should be a separate codepath and
> shouldn't be done in the insert path. and an insert
> (basic tree manipulation) should BUG_ON() one tries
> to add extent with a block which is already covered
> by the tree.
>
> IMHO, get_blocks() should look like:
>
> path = find_path()
> if (found extent covers request block(s)) {
> if (extent is uninitialized) {
> convert();
> }
> }
>
> where
> function convert()
> {
> /* adopt existing extent so that it
> * doesn't cover requested blocks */
>
> /* insert head or tail of existing
> * extent, if necessary */
>
> /* insert new extent of initialized blocks */
> }
>
> thanks, Alex
I was thing about the same thing. The current ext4_ext_get_blocks()
function becomes very bulky. The code to convert uninitialized blocks to
initialized ones is pretty selfcontained, and worth the effort to put it
into a seperate function.
But the bug Amit pointed here is unrelated to the code convert
uninitialized blocks to initialized ones. Rather, it's related to do
multiple block allocation across on a window with parts already have
blocks allocated. Without the check, the current code just simply
allocate the requested extent and insert it into the tree which might
overlap with existing extent.
Mingming
On Wed, Jan 03, 2007 at 10:07:01AM -0800, Mingming Cao wrote:
> Alex Tomas wrote:
> >I think that stuff that converts uninitialized blocks
> >to initialized ones should be a separate codepath and
> >shouldn't be done in the insert path. and an insert
> >(basic tree manipulation) should BUG_ON() one tries
> >to add extent with a block which is already covered
> >by the tree.
> >
> >IMHO, get_blocks() should look like:
> >
> > path = find_path()
> > if (found extent covers request block(s)) {
> > if (extent is uninitialized) {
> > convert();
> > }
> > }
> >
> >where
> > function convert()
> > {
> > /* adopt existing extent so that it
> > * doesn't cover requested blocks */
> >
> > /* insert head or tail of existing
> > * extent, if necessary */
> >
> > /* insert new extent of initialized blocks */
> > }
> >
> >thanks, Alex
>
> I was thing about the same thing. The current ext4_ext_get_blocks()
> function becomes very bulky. The code to convert uninitialized blocks to
> initialized ones is pretty selfcontained, and worth the effort to put it
> into a seperate function.
Ok. I will move this code to a new function.
--
Regards,
Amit Arora
On Wed, Jan 03, 2007 at 11:36:01AM +0530, Amit K. Arora wrote:
> >
> > > + b2 = ext4_ext_next_allocated_block(path);
> > > + if (b2 == EXT_MAX_BLOCK)
> > > +
> > > return NULL;
> > > + path = ext4_ext_find_extent(inode, b2, path);
> > > + if (IS_ERR(path))
> > > + return NULL;
> > > + BUG_ON(path[depth].p_hdr == NULL);
> > > + ex = path[depth].p_ext;
> > > +
> >
> > How useful to have the next extent pointer?It seems only used to print
> > out warning messages. I am a little concerned about the expensive
> > ext4_ext_find_extent(). After all ext4_ext_next_allocated_block()
> > already returns the start block of next extent, isn't it?
>
> Ok, agreed. Will get rid of this extra code.
Here is the new patch. Please review.
Thanks!
Signed-off-by: Amit Arora <[email protected]>
---
fs/ext4/extents.c | 71 ++++++++++++++++++++++++++++++++++++++--
include/linux/ext4_fs_extents.h | 1
2 files changed, 70 insertions(+), 2 deletions(-)
Index: linux-2.6.19.prealloc/fs/ext4/extents.c
===================================================================
--- linux-2.6.19.prealloc.orig/fs/ext4/extents.c
+++ linux-2.6.19.prealloc/fs/ext4/extents.c
@@ -1119,6 +1119,45 @@ ext4_can_extents_be_merged(struct inode
}
/*
+ * ext4_ext_check_overlap:
+ * check if a portion of the "newext" extent overlaps with an
+ * existing extent.
+ *
+ * Returns 1 if it finds an extent which may overlap with the
+ * new extent, and puts the logical block number of the first block
+ * of this extent at a location pointed by the "block" argument.
+ * If there is no such extent, it returns 0.
+ * Returns errno, incase of an error.
+ */
+int ext4_ext_check_overlap(struct inode *inode,
+ struct ext4_extent *newext,
+ unsigned long *block)
+{
+ struct ext4_ext_path *path;
+ unsigned int depth, b1, len1;
+ int ret = 0;
+
+ b1 = le32_to_cpu(newext->ee_block);
+ len1 = le16_to_cpu(newext->ee_len);
+ path = ext4_ext_find_extent(inode, b1, NULL);
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
+ goto out;
+ }
+ depth = ext_depth(inode);
+ BUG_ON(path[depth].p_ext == NULL && depth != 0);
+
+ *block = ext4_ext_next_allocated_block(path);
+ if (*block == EXT_MAX_BLOCK)
+ goto out;
+
+ if (b1 + len1 > *block)
+ ret = 1;
+out:
+ return ret;
+}
+
+/*
* ext4_ext_insert_extent:
* tries to merge requsted extent into the existing extent or
* inserts requested extent as new one into the tree,
@@ -1133,12 +1172,25 @@ int ext4_ext_insert_extent(handle_t *han
struct ext4_extent *nearex; /* nearest extent */
struct ext4_ext_path *npath = NULL;
int depth, len, err, next;
+ unsigned long oblock;
BUG_ON(newext->ee_len == 0);
depth = ext_depth(inode);
ex = path[depth].p_ext;
BUG_ON(path[depth].p_hdr == NULL);
+ /* check for overlap */
+ err = ext4_ext_check_overlap(inode, newext, &oblock);
+ if (err > 0) {
+ printk(KERN_ERR "ERROR: newext=%u/%u overlaps with an "
+ "existing extent, which starts with %lu\n",
+ le32_to_cpu(newext->ee_block),
+ le16_to_cpu(newext->ee_len),
+ oblock);
+ ext4_ext_show_leaf(inode, path);
+ BUG();
+ }
+
/* try to insert block into found extent and return */
if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append %d block to %d:%d (from %llu)\n",
@@ -1984,6 +2036,10 @@ int ext4_ext_get_blocks(handle_t *handle
*/
if (ee_len > EXT_MAX_LEN)
goto out2;
+
+ if (iblock < ee_block && iblock + max_blocks >= ee_block)
+ allocated = ee_block - iblock;
+
/* if found extent covers block, simply return it */
if (iblock >= ee_block && iblock < ee_block + ee_len) {
newblock = iblock - ee_block + ee_start;
@@ -2016,7 +2072,19 @@ int ext4_ext_get_blocks(handle_t *handle
/* allocate new block */
goal = ext4_ext_find_goal(inode, path, iblock);
- allocated = max_blocks;
+
+ /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
+ newex.ee_block = cpu_to_le32(iblock);
+ if (!allocated) {
+ newex.ee_len = cpu_to_le16(max_blocks);
+ err = ext4_ext_check_overlap(inode, &newex, &allocated);
+ if (err < 0)
+ goto out2;
+ else if (!err)
+ allocated = max_blocks;
+ else
+ allocated = allocated - iblock;
+ }
newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
if (!newblock)
goto out2;
@@ -2024,7 +2092,6 @@ int ext4_ext_get_blocks(handle_t *handle
goal, newblock, allocated);
/* try to insert new extent into found leaf and return */
- newex.ee_block = cpu_to_le32(iblock);
ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(allocated);
err = ext4_ext_insert_extent(handle, inode, path, &newex);
Index: linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h
===================================================================
--- linux-2.6.19.prealloc.orig/include/linux/ext4_fs_extents.h
+++ linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h
@@ -190,6 +190,7 @@ ext4_ext_invalidate_cache(struct inode *
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, unsigned long *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *);
--
Regards,
Amit Arora
>>>>> Mingming Cao (MC) writes:
MC> But the bug Amit pointed here is unrelated to the code convert
MC> uninitialized blocks to initialized ones. Rather, it's related to do
MC> multiple block allocation across on a window with parts already have
MC> blocks allocated. Without the check, the current code just simply
MC> allocate the requested extent and insert it into the tree which might
MC> overlap with existing extent.
correct. thanks for catching. in delayed allocation patch
get_blocks() isn't used and ext4_ext_walk_space() works
right in this case.
thanks, Alex
>>>>> Amit K Arora (AKA) writes:
AKA> +int ext4_ext_check_overlap(struct inode *inode,
AKA> + struct ext4_extent *newext,
AKA> + unsigned long *block)
AKA> +{
AKA> + struct ext4_ext_path *path;
AKA> + unsigned int depth, b1, len1;
AKA> + int ret = 0;
AKA> +
AKA> + b1 = le32_to_cpu(newext->ee_block);
AKA> + len1 = le16_to_cpu(newext->ee_len);
AKA> + path = ext4_ext_find_extent(inode, b1, NULL);
AKA> + if (IS_ERR(path)) {
AKA> + ret = PTR_ERR(path);
AKA> + goto out;
AKA> + }
AKA> + depth = ext_depth(inode);
AKA> + BUG_ON(path[depth].p_ext == NULL && depth != 0);
AKA> +
AKA> + *block = ext4_ext_next_allocated_block(path);
AKA> + if (*block == EXT_MAX_BLOCK)
AKA> + goto out;
AKA> +
AKA> + if (b1 + len1 > *block)
AKA> + ret = 1;
AKA> +out:
AKA> + return ret;
ext4_ext_find_extent() allocates 'path', I'd expect
kfree() in the function.
thanks, Alex
>>>>> Amit K Arora (AKA) writes:
AKA> +int ext4_ext_check_overlap(struct inode *inode,
AKA> + struct ext4_extent *newext,
AKA> + unsigned long *block)
AKA> +{
AKA> + struct ext4_ext_path *path;
AKA> + unsigned int depth, b1, len1;
AKA> + int ret = 0;
AKA> +
AKA> + b1 = le32_to_cpu(newext->ee_block);
AKA> + len1 = le16_to_cpu(newext->ee_len);
AKA> + path = ext4_ext_find_extent(inode, b1, NULL);
AKA> + if (IS_ERR(path)) {
AKA> + ret = PTR_ERR(path);
AKA> + goto out;
AKA> + }
AKA> + depth = ext_depth(inode);
AKA> + BUG_ON(path[depth].p_ext == NULL && depth != 0);
AKA> +
AKA> + *block = ext4_ext_next_allocated_block(path);
AKA> + if (*block == EXT_MAX_BLOCK)
AKA> + goto out;
AKA> +
AKA> + if (b1 + len1 > *block)
AKA> + ret = 1;
AKA> +out:
AKA> + return ret;
AKA> +}
I'm also not sure we need ext4_ext_find_extent() here.
there are two possibilities:
1) extent in found path covers block(s) before requested ones
then ext4_ext_next_allocated_block(path) can be used
2) extent in found path covers block(s) after request ones
then ee_block from that extent can be used.
thanks, Alex
PS. if I remember the code right .. ;)
On Thu, Jan 04, 2007 at 01:25:35PM +0300, Alex Tomas wrote:
> >>>>> Amit K Arora (AKA) writes:
>
> AKA> +int ext4_ext_check_overlap(struct inode *inode,
> AKA> + struct ext4_extent *newext,
> AKA> + unsigned long *block)
> AKA> +{
> AKA> + struct ext4_ext_path *path;
> AKA> + unsigned int depth, b1, len1;
> AKA> + int ret = 0;
> AKA> +
> AKA> + b1 = le32_to_cpu(newext->ee_block);
> AKA> + len1 = le16_to_cpu(newext->ee_len);
> AKA> + path = ext4_ext_find_extent(inode, b1, NULL);
> AKA> + if (IS_ERR(path)) {
> AKA> + ret = PTR_ERR(path);
> AKA> + goto out;
> AKA> + }
> AKA> + depth = ext_depth(inode);
> AKA> + BUG_ON(path[depth].p_ext == NULL && depth != 0);
> AKA> +
> AKA> + *block = ext4_ext_next_allocated_block(path);
> AKA> + if (*block == EXT_MAX_BLOCK)
> AKA> + goto out;
> AKA> +
> AKA> + if (b1 + len1 > *block)
> AKA> + ret = 1;
> AKA> +out:
> AKA> + return ret;
>
> <AT> ext4_ext_find_extent() allocates 'path', I'd expect
> <AT> kfree() in the function.
Ok. I will add following after "out:"
if (path) {
ext4_ext_drop_refs(path);
kfree(path);
}
And also set "path = NULL;" before calling "goto out", incase
ext4_ext_find_extent() returns an error.
--
Regards,
Amit Arora
On Thu, Jan 04, 2007 at 01:39:24PM +0300, Alex Tomas (AT) wrote:
> >>>>> Amit K Arora (AKA) writes:
>
> AKA> +int ext4_ext_check_overlap(struct inode *inode,
> AKA> + struct ext4_extent *newext,
> AKA> + unsigned long *block)
> AKA> +{
> AKA> + struct ext4_ext_path *path;
> AKA> + unsigned int depth, b1, len1;
> AKA> + int ret = 0;
> AKA> +
> AKA> + b1 = le32_to_cpu(newext->ee_block);
> AKA> + len1 = le16_to_cpu(newext->ee_len);
> AKA> + path = ext4_ext_find_extent(inode, b1, NULL);
> AKA> + if (IS_ERR(path)) {
> AKA> + ret = PTR_ERR(path);
> AKA> + goto out;
> AKA> + }
> AKA> + depth = ext_depth(inode);
> AKA> + BUG_ON(path[depth].p_ext == NULL && depth != 0);
> AKA> +
> AKA> + *block = ext4_ext_next_allocated_block(path);
> AKA> + if (*block == EXT_MAX_BLOCK)
> AKA> + goto out;
> AKA> +
> AKA> + if (b1 + len1 > *block)
> AKA> + ret = 1;
> AKA> +out:
> AKA> + return ret;
> AKA> +}
> AT> I'm also not sure we need ext4_ext_find_extent() here.
Do you mean ext4_ext_next_allocated_block() above ? We anyhow have to
call find_extent() to get the possible neighbouring extent.
> AT> there are two possibilities:
>
> AT> 1) extent in found path covers block(s) before requested ones
> AT> then ext4_ext_next_allocated_block(path) can be used
>
> AT> 2) extent in found path covers block(s) after request ones
> AT> then ee_block from that extent can be used.
You are right. In the case the requested block(s) lie within a hole, when
this hole starts from the begining of the file, this will be true. i.e.,
find_blocks() will return the extent after the requested block(s). In all
other cases, it will return the extent before the requested block(s)
(assuming there is no existing extent which covers the start of the
requested blocks).
Will change the code accordingly to handle this corner case. Thanks for
pointing this out !
--
Regards,
Amit Arora
>>>>> Amit K Arora (AKA) writes:
AT> I'm also not sure we need ext4_ext_find_extent() here.
AKA> Do you mean ext4_ext_next_allocated_block() above ? We anyhow have to
AKA> call find_extent() to get the possible neighbouring extent.
no, I exactly meant ext4_ext_find_extent(). it's expensive
compared to ext4_ext_next_allocated_block().
and if I understand right, you don't need whole extent, you
just need to know next allocated block, which can be retrieved
from index even. this is what ext4_ext_next_allocated_block() does.
AT> there are two possibilities:
>>
AT> 1) extent in found path covers block(s) before requested ones
AT> then ext4_ext_next_allocated_block(path) can be used
>>
AT> 2) extent in found path covers block(s) after request ones
AT> then ee_block from that extent can be used.
AKA> You are right. In the case the requested block(s) lie within a hole, when
AKA> this hole starts from the begining of the file, this will be true. i.e.,
AKA> find_blocks() will return the extent after the requested block(s). In all
AKA> other cases, it will return the extent before the requested block(s)
AKA> (assuming there is no existing extent which covers the start of the
AKA> requested blocks).
existing blocks are to be handled properly in get_blocks():
/* if found extent covers block, simply return it */
if (iblock >= ee_block && iblock < ee_block + ee_len) {
newblock = iblock - ee_block + ee_start;
/* number of remaining blocks in the extent */
allocated = ee_len - (iblock - ee_block);
ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock,
ee_block, ee_len, newblock);
ext4_ext_put_in_cache(inode, ee_block, ee_len,
ee_start, EXT4_EXT_CACHE_EXTENT);
AKA> Will change the code accordingly to handle this corner case. Thanks for
AKA> pointing this out !
my pleasure.
thanks, Alex
On Thu, Jan 04, 2007 at 02:37:40PM +0300, Alex Tomas (AT) wrote:
> >>>>> Amit K Arora (AKA) writes:
> AT> I'm also not sure we need ext4_ext_find_extent() here.
> AKA> Do you mean ext4_ext_next_allocated_block() above ? We anyhow have to
> AKA> call find_extent() to get the possible neighbouring extent.
> AT> no, I exactly meant ext4_ext_find_extent(). it's expensive
> AT> compared to ext4_ext_next_allocated_block().
> AT> and if I understand right, you don't need whole extent, you
> AT> just need to know next allocated block, which can be retrieved
> AT> from index even. this is what ext4_ext_next_allocated_block() does.
Ok, Thanks!
Here is the updated patch.
Signed-off-by: Amit Arora <[email protected]>
---
fs/ext4/extents.c | 67 ++++++++++++++++++++++++++++++++++++++--
include/linux/ext4_fs_extents.h | 1
2 files changed, 66 insertions(+), 2 deletions(-)
Index: linux-2.6.19.prealloc/fs/ext4/extents.c
===================================================================
--- linux-2.6.19.prealloc.orig/fs/ext4/extents.c
+++ linux-2.6.19.prealloc/fs/ext4/extents.c
@@ -1119,6 +1119,43 @@ ext4_can_extents_be_merged(struct inode
}
/*
+ * ext4_ext_check_overlap:
+ * check if a portion of the "newext" extent overlaps with an
+ * existing extent.
+ *
+ * If there is an overlap discovered, it returns the (logical) block
+ * number of the first block in the next extent (the existing extent
+ * which covers few of the new requested blocks)
+ * If there is no overlap found, it returns 0.
+ */
+unsigned int ext4_ext_check_overlap(struct inode *inode,
+ struct ext4_extent *newext,
+ struct ext4_ext_path *path)
+{
+ unsigned int depth, b1, len1, b2;
+
+ b1 = le32_to_cpu(newext->ee_block);
+ len1 = le16_to_cpu(newext->ee_len);
+ depth = ext_depth(inode);
+ if (!path[depth].p_ext)
+ goto out;
+ b2 = le32_to_cpu(path[depth].p_ext->ee_block);
+
+ /* get the next allocated block if the extent in the path
+ * is before the requested block(s) */
+ if (b2 < b1) {
+ b2 = ext4_ext_next_allocated_block(path);
+ if (b2 == EXT_MAX_BLOCK)
+ goto out;
+ }
+
+ if (b1 + len1 > b2)
+ return b2;
+out:
+ return 0;
+}
+
+/*
* ext4_ext_insert_extent:
* tries to merge requsted extent into the existing extent or
* inserts requested extent as new one into the tree,
@@ -1133,12 +1170,25 @@ int ext4_ext_insert_extent(handle_t *han
struct ext4_extent *nearex; /* nearest extent */
struct ext4_ext_path *npath = NULL;
int depth, len, err, next;
+ unsigned int oblock;
BUG_ON(newext->ee_len == 0);
depth = ext_depth(inode);
ex = path[depth].p_ext;
BUG_ON(path[depth].p_hdr == NULL);
+ /* check for overlap */
+ oblock = ext4_ext_check_overlap(inode, newext, path);
+ if (oblock) {
+ printk(KERN_ERR "ERROR: newext=%u/%u overlaps with an "
+ "existing extent, which starts with %u\n",
+ le32_to_cpu(newext->ee_block),
+ le16_to_cpu(newext->ee_len),
+ oblock);
+ ext4_ext_show_leaf(inode, path);
+ BUG();
+ }
+
/* try to insert block into found extent and return */
if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append %d block to %d:%d (from %llu)\n",
@@ -1984,6 +2034,10 @@ int ext4_ext_get_blocks(handle_t *handle
*/
if (ee_len > EXT_MAX_LEN)
goto out2;
+
+ if (iblock < ee_block && iblock + max_blocks >= ee_block)
+ allocated = ee_block - iblock;
+
/* if found extent covers block, simply return it */
if (iblock >= ee_block && iblock < ee_block + ee_len) {
newblock = iblock - ee_block + ee_start;
@@ -2016,7 +2070,17 @@ int ext4_ext_get_blocks(handle_t *handle
/* allocate new block */
goal = ext4_ext_find_goal(inode, path, iblock);
- allocated = max_blocks;
+
+ /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
+ newex.ee_block = cpu_to_le32(iblock);
+ if (!allocated) {
+ newex.ee_len = cpu_to_le16(max_blocks);
+ allocated = ext4_ext_check_overlap(inode, &newex, path);
+ if (allocated)
+ allocated = allocated - iblock;
+ else
+ allocated = max_blocks;
+ }
newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
if (!newblock)
goto out2;
@@ -2024,7 +2088,6 @@ int ext4_ext_get_blocks(handle_t *handle
goal, newblock, allocated);
/* try to insert new extent into found leaf and return */
- newex.ee_block = cpu_to_le32(iblock);
ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(allocated);
err = ext4_ext_insert_extent(handle, inode, path, &newex);
Index: linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h
===================================================================
--- linux-2.6.19.prealloc.orig/include/linux/ext4_fs_extents.h
+++ linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h
@@ -190,6 +190,7 @@ ext4_ext_invalidate_cache(struct inode *
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *);
Alex Tomas wrote:
>>>>>>Mingming Cao (MC) writes:
>
>
> MC> But the bug Amit pointed here is unrelated to the code convert
> MC> uninitialized blocks to initialized ones. Rather, it's related to do
> MC> multiple block allocation across on a window with parts already have
> MC> blocks allocated. Without the check, the current code just simply
> MC> allocate the requested extent and insert it into the tree which might
> MC> overlap with existing extent.
>
> correct. thanks for catching. in delayed allocation patch
> get_blocks() isn't used and ext4_ext_walk_space() works
> right in this case.
>
Yep, I realized that yesterday. That explains we never see this overlap
problem when we testing extent+delalloc+mballoc last year.
ext4_ext_walk_space did almost all the overlap check. I think we
could reuse that code.
Mingming
> thanks, Alex
> -
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi, Amit,
Have you looked at ext4_ext_walk_space()? It calculate the right extent
length to allocate to avoid overlap before calling block allocation
callback function is called.
Amit K. Arora wrote:
> /*
> + * ext4_ext_check_overlap:
> + * check if a portion of the "newext" extent overlaps with an
> + * existing extent.
> + *
> + * If there is an overlap discovered, it returns the (logical) block
> + * number of the first block in the next extent (the existing extent
> + * which covers few of the new requested blocks)
> + * If there is no overlap found, it returns 0.
> + */
What if the start logical block of the exisitng extent is 0 and there is
overlap? I think that is possible. For example, the exisitng extent is
(0,100) and you want to insert new extent (0,500), this will certainly
fail to report the overlap.
> +unsigned int ext4_ext_check_overlap(struct inode *inode,
We shall be consistant with other data type used for logical block,
right now is unsigned long. Probably replace that with ext4_fsblk_t type
when that cleanup is introduced.
> + struct ext4_extent *newext,
> + struct ext4_ext_path *path)
> +{
> + unsigned int depth, b1, len1, b2;
> +
unsigned long type for b1 and b2.
> + b1 = le32_to_cpu(newext->ee_block);
> + len1 = le16_to_cpu(newext->ee_len);
> + depth = ext_depth(inode);
> + if (!path[depth].p_ext)
> + goto out;
> + b2 = le32_to_cpu(path[depth].p_ext->ee_block);
> +
> + /* get the next allocated block if the extent in the path
> + * is before the requested block(s) */
> + if (b2 < b1) {
> + b2 = ext4_ext_next_allocated_block(path);
> + if (b2 == EXT_MAX_BLOCK)
> + goto out;
> + }
> +
> + if (b1 + len1 > b2)
> + return b2;
> +out:
> + return 0;
> +}
> +
Since this overlap check function is called inside
ext4_ext_insert_extent(), I think this function should check for all
kinds of overlaps. Here you only check if the new extent is overlap with
the next extent. Looking at ext4_ext_walk_space(), there are total three
kinds of overlaps:
1) righ port of new extent overlap with path->p_ext,
2) left port of new extent overlap with path->p_ext
2) right port of new extent overlap with next extent
I think we are almost repeating the same logic in ext4_ext_walk_space()
here.
> +/*
> * ext4_ext_insert_extent:
> * tries to merge requsted extent into the existing extent or
> * inserts requested extent as new one into the tree,
> @@ -1133,12 +1170,25 @@ int ext4_ext_insert_extent(handle_t *han
> struct ext4_extent *nearex; /* nearest extent */
> struct ext4_ext_path *npath = NULL;
> int depth, len, err, next;
> + unsigned int oblock;
>
unsigned long type for oblock
> BUG_ON(newext->ee_len == 0);
> depth = ext_depth(inode);
> ex = path[depth].p_ext;
> BUG_ON(path[depth].p_hdr == NULL);
>
> + /* check for overlap */
> + oblock = ext4_ext_check_overlap(inode, newext, path);
> + if (oblock) {
> + printk(KERN_ERR "ERROR: newext=%u/%u overlaps with an "
> + "existing extent, which starts with %u\n",
> + le32_to_cpu(newext->ee_block),
> + le16_to_cpu(newext->ee_len),
> + oblock);
> + ext4_ext_show_leaf(inode, path);
> + BUG();
> + }
How about return true or false from ext4_ext_check_overlap()? Inside
that function put the correct new extent logical block number and extent
length that safe to insert? Afterall the returning oblock is used in
ext4_ext_get_blocks() to calculate the safe extent to allocate.
> +
> /* try to insert block into found extent and return */
> if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
> ext_debug("append %d block to %d:%d (from %llu)\n",
> @@ -1984,6 +2034,10 @@ int ext4_ext_get_blocks(handle_t *handle
> */
> if (ee_len > EXT_MAX_LEN)
> goto out2;
> +
> + if (iblock < ee_block && iblock + max_blocks >= ee_block)
> + allocated = ee_block - iblock;
> +
> /* if found extent covers block, simply return it */
> if (iblock >= ee_block && iblock < ee_block + ee_len) {
> newblock = iblock - ee_block + ee_start;
Here I realize that the way that ext4_ext_get_blocks() handles the
requested extent has hole on the right side is: simply returns the left
port of the extent which already has blocks allocated. This is actually
what non_extent get_blocks does also. caller of get_blocks() including
preallocation code in ioctl will continue calling get_blocks to allocate
blocks for the hole.
Probably we shall make this clear in the comment.
> @@ -2016,7 +2070,17 @@ int ext4_ext_get_blocks(handle_t *handle
>
> /* allocate new block */
> goal = ext4_ext_find_goal(inode, path, iblock);
> - allocated = max_blocks;
> +
> + /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
> + newex.ee_block = cpu_to_le32(iblock);
> + if (!allocated) {
> + newex.ee_len = cpu_to_le16(max_blocks);
> + allocated = ext4_ext_check_overlap(inode, &newex, path);
> + if (allocated)
> + allocated = allocated - iblock;
> + else
> + allocated = max_blocks;
> + }
> newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
> if (!newblock)
> goto out2;
> @@ -2024,7 +2088,6 @@ int ext4_ext_get_blocks(handle_t *handle
> goal, newblock, allocated);
>
> /* try to insert new extent into found leaf and return */
> - newex.ee_block = cpu_to_le32(iblock);
> ext4_ext_store_pblock(&newex, newblock);
> newex.ee_len = cpu_to_le16(allocated);
> err = ext4_ext_insert_extent(handle, inode, path, &newex);
> Index: linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h
> ===================================================================
> --- linux-2.6.19.prealloc.orig/include/linux/ext4_fs_extents.h
> +++ linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h
> @@ -190,6 +190,7 @@ ext4_ext_invalidate_cache(struct inode *
>
> extern int ext4_extent_tree_init(handle_t *, struct inode *);
> extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
> +extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
> extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
> extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
> extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *);
>
>>>>> Amit K Arora (AKA) writes:
AKA> @@ -1984,6 +2034,10 @@ int ext4_ext_get_blocks(handle_t *handle
AKA> */
AKA> if (ee_len > EXT_MAX_LEN)
AKA> goto out2;
AKA> +
AKA> + if (iblock < ee_block && iblock + max_blocks >= ee_block)
AKA> + allocated = ee_block - iblock;
AKA> +
AKA> /* if found extent covers block, simply return it */
AKA> if (iblock >= ee_block && iblock < ee_block + ee_len) {
AKA> newblock = iblock - ee_block + ee_start;
I thought existing code already does this:
/* if found extent covers block, simply return it */
if (iblock >= ee_block && iblock < ee_block + ee_len) {
newblock = iblock - ee_block + ee_start;
/* number of remaining blocks in the extent */
allocated = ee_len - (iblock - ee_block);
thanks, Alex
>>>>> Mingming Cao (MC) writes:
MC> Hi, Amit,
MC> Have you looked at ext4_ext_walk_space()? It calculate the right
MC> extent length to allocate to avoid overlap before calling block
MC> allocation callback function is called.
well, it doesn't use cache.
MC> What if the start logical block of the exisitng extent is 0 and there
MC> is overlap? I think that is possible. For example, the exisitng extent
MC> is (0,100) and you want to insert new extent (0,500), this will
MC> certainly fail to report the overlap.
I think this situation must not happen in the first place.
get_blocks() should first find existing blocks and return them
(0,100), subsequent get_blocks() should be called for the
following blocks (100,500) and handle them properly.
MC> Since this overlap check function is called inside
MC> ext4_ext_insert_extent(), I think this function should check for all
MC> kinds of overlaps. Here you only check if the new extent is overlap
MC> with the next extent. Looking at ext4_ext_walk_space(), there are
MC> total three kinds of overlaps:
MC> 1) righ port of new extent overlap with path->p_ext,
MC> 2) left port of new extent overlap with path->p_ext
MC> 2) right port of new extent overlap with next extent
MC> I think we are almost repeating the same logic in
MC> ext4_ext_walk_space() here.
I tend to agree.
thanks, Alex
Alex Tomas wrote:
>>>>>>Amit K Arora (AKA) writes:
>
>
> AKA> @@ -1984,6 +2034,10 @@ int ext4_ext_get_blocks(handle_t *handle
> AKA> */
> AKA> if (ee_len > EXT_MAX_LEN)
> AKA> goto out2;
> AKA> +
> AKA> + if (iblock < ee_block && iblock + max_blocks >= ee_block)
> AKA> + allocated = ee_block - iblock;
> AKA> +
> AKA> /* if found extent covers block, simply return it */
> AKA> if (iblock >= ee_block && iblock < ee_block + ee_len) {
> AKA> newblock = iblock - ee_block + ee_start;
>
> I thought existing code already does this:
>
> /* if found extent covers block, simply return it */
> if (iblock >= ee_block && iblock < ee_block + ee_len) {
> newblock = iblock - ee_block + ee_start;
> /* number of remaining blocks in the extent */
> allocated = ee_len - (iblock - ee_block);
>
>
> thanks, Alex
That's different: the existing code address the case when the left part
of the new extent overlaps with an exisitng extent, in that case I
understand it just returns the allocated part of extent, and continue
the block allocation in the next call of get_blocks().
Well Amit's new code here trying to address the case when the right part
of the new extent overlap with an exisitng extent. He was trying to
update the new extent length to prevent that. As I mentioned ealier we
could put this code into ext4_ext_check_overlap,let it judge whether
there is overlap, and if so, what's the right start block number and length
Thanks,
Mingming
On Thu, Jan 04, 2007 at 11:47:36AM -0800, Mingming Cao (MC) wrote:
> Alex Tomas (AT) wrote:
>
> >>>>>>Amit K Arora (AKA) writes:
> >
> >
> > > AKA> @@ -1984,6 +2034,10 @@ int ext4_ext_get_blocks(handle_t *handle
> > > AKA> */
> > > AKA> if (ee_len > EXT_MAX_LEN)
> > > AKA> goto out2;
> > > AKA> +
> > > AKA> + if (iblock < ee_block && iblock + max_blocks >=
> > > ee_block)
> > > AKA> + allocated = ee_block - iblock;
> > > AKA> +
> > > AKA> /* if found extent covers block, simply return it */
> > > AKA> if (iblock >= ee_block && iblock < ee_block +
> > > ee_len) {
> > > AKA> newblock = iblock - ee_block + ee_start;
> > >
> > AT> I thought existing code already does this:
> >
> > AT> /* if found extent covers block, simply return it */
> > AT> if (iblock >= ee_block && iblock < ee_block + ee_len) {
> > AT> newblock = iblock - ee_block + ee_start;
> > AT> /* number of remaining blocks in the extent */
> > AT> allocated = ee_len - (iblock - ee_block);
> MC> That's different: the existing code address the case when the left part
> MC> of the new extent overlaps with an exisitng extent, in that case I
> MC> understand it just returns the allocated part of extent, and continue
> MC> the block allocation in the next call of get_blocks().
Right.
> MC> Well Amit's new code here trying to address the case when the right part
> MC> of the new extent overlap with an exisitng extent. He was trying to
> MC> update the new extent length to prevent that. As I mentioned ealier we
> MC> could put this code into ext4_ext_check_overlap,let it judge whether
> MC> there is overlap, and if so, what's the right start block number and length
Yes, this check will no longer be required with the modified
ext4_ext_check_overlap, which will check for this condition as well.
--
Regards,
Amit Arora
>
> Thanks,
> Mingming
On Thu, Jan 04, 2007 at 10:50:00AM -0800, Mingming Cao wrote:
> Hi, Amit,
Hi Mingming,
> Have you looked at ext4_ext_walk_space()? It calculate the right extent
> length to allocate to avoid overlap before calling block allocation
> callback function is called.
Yes. More on this below...
>
> Amit K. Arora wrote:
> > /*
> >+ * ext4_ext_check_overlap:
> >+ * check if a portion of the "newext" extent overlaps with an
> >+ * existing extent.
> >+ *
> >+ * If there is an overlap discovered, it returns the (logical) block
> >+ * number of the first block in the next extent (the existing extent
> >+ * which covers few of the new requested blocks)
> >+ * If there is no overlap found, it returns 0.
> >+ */
>
> What if the start logical block of the exisitng extent is 0 and there is
> overlap? I think that is possible. For example, the exisitng extent is
> (0,100) and you want to insert new extent (0,500), this will certainly
> fail to report the overlap.
As Alex mentioned, this case is taken care of by ext4_ext_get_blocks().
> >+unsigned int ext4_ext_check_overlap(struct inode *inode,
>
> We shall be consistant with other data type used for logical block,
> right now is unsigned long. Probably replace that with ext4_fsblk_t type
> when that cleanup is introduced.
Ok, will use "unsigned long".
>
> >+ struct ext4_extent *newext,
> >+ struct ext4_ext_path *path)
> >+{
> >+ unsigned int depth, b1, len1, b2;
> >+
> unsigned long type for b1 and b2.
Ok.
>
> >+ b1 = le32_to_cpu(newext->ee_block);
> >+ len1 = le16_to_cpu(newext->ee_len);
> >+ depth = ext_depth(inode);
> >+ if (!path[depth].p_ext)
> >+ goto out;
> >+ b2 = le32_to_cpu(path[depth].p_ext->ee_block);
> >+
> >+ /* get the next allocated block if the extent in the path
> >+ * is before the requested block(s) */
> >+ if (b2 < b1) {
> >+ b2 = ext4_ext_next_allocated_block(path);
> >+ if (b2 == EXT_MAX_BLOCK)
> >+ goto out;
> >+ }
> >+
> >+ if (b1 + len1 > b2)
> >+ return b2;
> >+out:
> >+ return 0;
> >+}
> >+
>
> Since this overlap check function is called inside
> ext4_ext_insert_extent(), I think this function should check for all
> kinds of overlaps. Here you only check if the new extent is overlap with
> the next extent. Looking at ext4_ext_walk_space(), there are total three
> kinds of overlaps:
> 1) righ port of new extent overlap with path->p_ext,
> 2) left port of new extent overlap with path->p_ext
> 3) right port of new extent overlap with next extent
I think all the three conditions above are being checked. The second
condition is taken care of by the ext4_ext_get_blocks(). And the rest
two checks are being made in the ext4_ext_check_overlap().
check_overlap() first checks if the right portion of the new extent
overlaps with the path->p_ext. If not, then only it checks for an
overlap with the next extent.
>
> I think we are almost repeating the same logic in ext4_ext_walk_space()
> here.
I understand that some portion of the logic in ext4_ext_walk_space() is
being duplicated here in check_overlap(). But, if we have to use
walk_space(), we will need to write a new helper function which will
result in some duplicate code in get_blocks() and
ext4_wb_handle_extent() (like, calling ext4_new_blocks and then
insert_extent()) as well. Unless, ext4_wb_handle_extent() is modified to match
our requirement of persistent preallocation. I am not sure how
complicated and worth that may be.
>
> >+/*
> > * ext4_ext_insert_extent:
> > * tries to merge requsted extent into the existing extent or
> > * inserts requested extent as new one into the tree,
> >@@ -1133,12 +1170,25 @@ int ext4_ext_insert_extent(handle_t *han
> > struct ext4_extent *nearex; /* nearest extent */
> > struct ext4_ext_path *npath = NULL;
> > int depth, len, err, next;
> >+ unsigned int oblock;
> >
> unsigned long type for oblock
Ok.
>
> > BUG_ON(newext->ee_len == 0);
> > depth = ext_depth(inode);
> > ex = path[depth].p_ext;
> > BUG_ON(path[depth].p_hdr == NULL);
> >
> >+ /* check for overlap */
> >+ oblock = ext4_ext_check_overlap(inode, newext, path);
> >+ if (oblock) {
> >+ printk(KERN_ERR "ERROR: newext=%u/%u overlaps with an "
> >+ "existing extent, which starts with %u\n",
> >+ le32_to_cpu(newext->ee_block),
> >+ le16_to_cpu(newext->ee_len),
> >+ oblock);
> >+ ext4_ext_show_leaf(inode, path);
> >+ BUG();
> >+ }
>
> How about return true or false from ext4_ext_check_overlap()? Inside
> that function put the correct new extent logical block number and extent
> length that safe to insert? Afterall the returning oblock is used in
> ext4_ext_get_blocks() to calculate the safe extent to allocate.
Ok.
>
> >+
> > /* try to insert block into found extent and return */
> > if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
> > ext_debug("append %d block to %d:%d (from %llu)\n",
> >@@ -1984,6 +2034,10 @@ int ext4_ext_get_blocks(handle_t *handle
> > */
> > if (ee_len > EXT_MAX_LEN)
> > goto out2;
> >+
> >+ if (iblock < ee_block && iblock + max_blocks >= ee_block)
> >+ allocated = ee_block - iblock;
> >+
> > /* if found extent covers block, simply return it */
> > if (iblock >= ee_block && iblock < ee_block + ee_len) {
> > newblock = iblock - ee_block + ee_start;
>
> Here I realize that the way that ext4_ext_get_blocks() handles the
> requested extent has hole on the right side is: simply returns the left
> port of the extent which already has blocks allocated. This is actually
> what non_extent get_blocks does also. caller of get_blocks() including
> preallocation code in ioctl will continue calling get_blocks to allocate
> blocks for the hole.
>
> Probably we shall make this clear in the comment.
Yes, a comment will be nice here. Not sure if that should be added as part
of this patch, since the required comment is generic and not related to
this bug. What do you think ?
Thanks!
--
Regards,
Amit Arora
This is the revised version of the patch. This takes care of following
review comments from Mingming and Alex:
a> Not to use ext4_ext_find_extent() in check_overlap(), since it is an
expensive operation.
b> Use "unsigned long" for (logical) block numbers everywhere.
c> Return true/false by check_overlap(), rather than extent pointer or
the block number.
d> Update the extent length of nexext in check_overlap(), if there is
an overlap detected.
Signed-off-by: Amit Arora <[email protected]>
---
fs/ext4/extents.c | 50 ++++++++++++++++++++++++++++++++++++++--
include/linux/ext4_fs_extents.h | 1
2 files changed, 49 insertions(+), 2 deletions(-)
Index: linux-2.6.19.prealloc/fs/ext4/extents.c
===================================================================
--- linux-2.6.19.prealloc.orig/fs/ext4/extents.c
+++ linux-2.6.19.prealloc/fs/ext4/extents.c
@@ -1119,6 +1119,45 @@ ext4_can_extents_be_merged(struct inode
}
/*
+ * ext4_ext_check_overlap:
+ * check if a portion of the "newext" extent overlaps with an
+ * existing extent.
+ *
+ * If there is an overlap discovered, it updates the length of the newext
+ * such that there will be no overlap, and then returns 1.
+ * If there is no overlap found, it returns 0.
+ */
+unsigned int ext4_ext_check_overlap(struct inode *inode,
+ struct ext4_extent *newext,
+ struct ext4_ext_path *path)
+{
+ unsigned long b1, b2;
+ unsigned int depth, len1;
+
+ b1 = le32_to_cpu(newext->ee_block);
+ len1 = le16_to_cpu(newext->ee_len);
+ depth = ext_depth(inode);
+ if (!path[depth].p_ext)
+ goto out;
+ b2 = le32_to_cpu(path[depth].p_ext->ee_block);
+
+ /* get the next allocated block if the extent in the path
+ * is before the requested block(s) */
+ if (b2 < b1) {
+ b2 = ext4_ext_next_allocated_block(path);
+ if (b2 == EXT_MAX_BLOCK)
+ goto out;
+ }
+
+ if (b1 + len1 > b2) {
+ newext->ee_len = cpu_to_le16(b2 - b1);
+ return 1;
+ }
+out:
+ return 0;
+}
+
+/*
* ext4_ext_insert_extent:
* tries to merge requsted extent into the existing extent or
* inserts requested extent as new one into the tree,
@@ -2016,7 +2055,15 @@ int ext4_ext_get_blocks(handle_t *handle
/* allocate new block */
goal = ext4_ext_find_goal(inode, path, iblock);
- allocated = max_blocks;
+
+ /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
+ newex.ee_block = cpu_to_le32(iblock);
+ newex.ee_len = cpu_to_le16(max_blocks);
+ err = ext4_ext_check_overlap(inode, &newex, path);
+ if (err)
+ allocated = le16_to_cpu(newex.ee_len);
+ else
+ allocated = max_blocks;
newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
if (!newblock)
goto out2;
@@ -2024,7 +2071,6 @@ int ext4_ext_get_blocks(handle_t *handle
goal, newblock, allocated);
/* try to insert new extent into found leaf and return */
- newex.ee_block = cpu_to_le32(iblock);
ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(allocated);
err = ext4_ext_insert_extent(handle, inode, path, &newex);
Index: linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h
===================================================================
--- linux-2.6.19.prealloc.orig/include/linux/ext4_fs_extents.h
+++ linux-2.6.19.prealloc/include/linux/ext4_fs_extents.h
@@ -190,6 +190,7 @@ ext4_ext_invalidate_cache(struct inode *
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *);
--
Regards,
Amit Arora