From: Zhang Yi <[email protected]>
In ext4_map_blocks(), if we can't find a range of mapping in the
extents cache, we are calling ext4_ext_map_blocks() to search the real
path and ext4_ext_determine_hole() to determine the hole range. But if
the querying range was partially or completely overlaped by a delalloc
extent, we can't find it in the real extent path, so the returned hole
length could be incorrect.
Fortunately, ext4_ext_put_gap_in_cache() have already handle delalloc
extent, but it searches start from the expanded hole_start, doesn't
start from the querying range, so the delalloc extent found could not be
the one that overlaped the querying range, plus, it also didn't adjust
the hole length. Let's just remove ext4_ext_put_gap_in_cache(), handle
delalloc and insert adjusted hole extent in ext4_ext_determine_hole().
Signed-off-by: Zhang Yi <[email protected]>
Suggested-by: Jan Kara <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
---
fs/ext4/extents.c | 111 +++++++++++++++++++++++++++++-----------------
1 file changed, 70 insertions(+), 41 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d5efe076d3d3..e0b7e48c4c67 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode,
/*
- * ext4_ext_determine_hole - determine hole around given block
+ * ext4_ext_find_hole - find hole around given block according to the given path
* @inode: inode we lookup in
* @path: path in extent tree to @lblk
* @lblk: pointer to logical block around which we want to determine hole
@@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode,
* The function returns the length of a hole starting at @lblk. We update @lblk
* to the beginning of the hole if we managed to find it.
*/
-static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
- struct ext4_ext_path *path,
- ext4_lblk_t *lblk)
+static ext4_lblk_t ext4_ext_find_hole(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t *lblk)
{
int depth = ext_depth(inode);
struct ext4_extent *ex;
@@ -2270,30 +2270,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
return len;
}
-/*
- * ext4_ext_put_gap_in_cache:
- * calculate boundaries of the gap that the requested block fits into
- * and cache this gap
- */
-static void
-ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
- ext4_lblk_t hole_len)
-{
- struct extent_status es;
-
- ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
- hole_start + hole_len - 1, &es);
- if (es.es_len) {
- /* There's delayed extent containing lblock? */
- if (es.es_lblk <= hole_start)
- return;
- hole_len = min(es.es_lblk - hole_start, hole_len);
- }
- ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
- ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
- EXTENT_STATUS_HOLE);
-}
-
/*
* ext4_ext_rm_idx:
* removes index from the index block.
@@ -4062,6 +4038,69 @@ static int get_implied_cluster_alloc(struct super_block *sb,
return 0;
}
+/*
+ * Determine hole length around the given logical block, first try to
+ * locate and expand the hole from the given @path, and then adjust it
+ * if it's partially or completely converted to delayed extents, insert
+ * it into the extent cache tree if it's indeed a hole, finally return
+ * the length of the determined extent.
+ */
+static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t lblk)
+{
+ ext4_lblk_t hole_start, len;
+ struct extent_status es;
+
+ hole_start = lblk;
+ len = ext4_ext_find_hole(inode, path, &hole_start);
+again:
+ ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
+ hole_start + len - 1, &es);
+ if (!es.es_len)
+ goto insert_hole;
+
+ /*
+ * There's a delalloc extent in the hole, handle it if the delalloc
+ * extent is in front of, behind and straddle the queried range.
+ */
+ if (lblk >= es.es_lblk + es.es_len) {
+ /*
+ * The delalloc extent is in front of the queried range,
+ * find again from the queried start block.
+ */
+ len -= lblk - hole_start;
+ hole_start = lblk;
+ goto again;
+ } else if (in_range(lblk, es.es_lblk, es.es_len)) {
+ /*
+ * The delalloc extent containing lblk, it must have been
+ * added after ext4_map_blocks() checked the extent status
+ * tree, adjust the length to the delalloc extent's after
+ * lblk.
+ */
+ len = es.es_lblk + es.es_len - lblk;
+ return len;
+ } else {
+ /*
+ * The delalloc extent is partially or completely behind
+ * the queried range, update hole length until the
+ * beginning of the delalloc extent.
+ */
+ len = min(es.es_lblk - hole_start, len);
+ }
+
+insert_hole:
+ /* Put just found gap into cache to speed up subsequent requests */
+ ext_debug(inode, " -> %u:%u\n", hole_start, len);
+ ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE);
+
+ /* Update hole_len to reflect hole size after lblk */
+ if (hole_start != lblk)
+ len -= lblk - hole_start;
+
+ return len;
+}
/*
* Block allocation/map/preallocation routine for extents based files
@@ -4179,22 +4218,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* we couldn't try to create block if create flag is zero
*/
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
- ext4_lblk_t hole_start, hole_len;
+ ext4_lblk_t len;
- hole_start = map->m_lblk;
- hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
- /*
- * put just found gap into cache to speed up
- * subsequent requests
- */
- ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
+ len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk);
- /* Update hole_len to reflect hole size after map->m_lblk */
- if (hole_start != map->m_lblk)
- hole_len -= map->m_lblk - hole_start;
map->m_pblk = 0;
- map->m_len = min_t(unsigned int, map->m_len, hole_len);
-
+ map->m_len = min_t(unsigned int, map->m_len, len);
goto out;
}
--
2.39.2
On 2024/5/10 17:41, Luis Henriques wrote:
> On Fri 10 May 2024 11:39:48 AM +08, Zhang Yi wrote;
>
>> On 2024/5/10 1:23, Luis Henriques wrote:
>>> On Thu 09 May 2024 12:39:53 PM -04, Theodore Ts'o wrote;
>>>
>>>> On Thu, May 09, 2024 at 04:16:34PM +0100, Luis Henriques wrote:
>>>>>
>>>>> It's looks like it's easy to trigger an infinite loop here using fstest
>>>>> generic/039. If I understand it correctly (which doesn't happen as often
>>>>> as I'd like), this is due to an integer overflow in the 'if' condition,
>>>>> and should be fixed with the patch below.
>>>>
>>>> Thanks for the report. However, I can't reproduce the failure, and
>>>> looking at generic/039, I don't see how it could be relevant to the
>>>> code path in question. Generic/039 creates a test symlink with two
>>>> hard links in the same directory, syncs the file system, and then
>>>> removes one of the hard links, and then drops access to the block
>>>> device using dmflakey. So I don't see how the extent code would be
>>>> involved at all. Are you sure that you have the correct test listed?
>>>
>>> Yep, I just retested and it's definitely generic/039. I'm using a simple
>>> test environment, with virtme-ng.
>>>
>>>> Looking at the code in question in fs/ext4/extents.c:
>>>>
>>>> again:
>>>> ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
>>>> hole_start + len - 1, &es);
>>>> if (!es.es_len)
>>>> goto insert_hole;
>>>>
>>>> * There's a delalloc extent in the hole, handle it if the delalloc
>>>> * extent is in front of, behind and straddle the queried range.
>>>> */
>>>> - if (lblk >= es.es_lblk + es.es_len) {
>>>> + if (lblk >= ((__u64) es.es_lblk) + es.es_len) {
>>>> /*
>>>> * The delalloc extent is in front of the queried range,
>>>> * find again from the queried start block.
>>>> len -= lblk - hole_start;
>>>> hole_start = lblk;
>>>> goto again;
>>>>
>>>> lblk and es.es_lblk are both __u32. So the infinite loop is
>>>> presumably because es.es_lblk + es.es_len has overflowed. This should
>>>> never happen(tm), and in fact we have a test for this case which
>>>
>>> If I instrument the code, I can see that es.es_len is definitely set to
>>> EXT_MAX_BLOCKS, which will overflow.
>>>
>>
>> Thanks for the report. After looking at the code, I think the root
>> cause of this issue is the variable es was not initialized on replaying
>> fast commit. ext4_es_find_extent_range() will return directly when
>> EXT4_FC_REPLAY flag is set, and then the es.len becomes stall.
>>
>> I can always reproduce this issue on generic/039 with
>> MKFS_OPTIONS="-O fast_commit".
>>
>> This uninitialization problem originally existed in the old
>> ext4_ext_put_gap_in_cache(), but it didn't trigger any real problem
>> since we never check and use extent cache when replaying fast commit.
>> So I suppose the correct fix would be to unconditionally initialize
>> the es variable.
>
> Oh, you're absolutely right -- the extent_status 'es' struct isn't being
> initialized in that case. I totally failed to see that. And yes, I also
> failed to mention I had 'fast_commit' feature enabled, sorry!
>
> Thanks a lot for figuring this out, Yi. I'm looking at this code and
> trying to understand if it would be safe to call __es_find_extent_range()
> when EXT4_FC_REPLAY is in progress. Probably not, and probably better to
> simply do:
>
> es->es_lblk = es->es_len = es->es_pblk = 0;
>
> in that case. I'll send out a patch later today.
>
Yeah, I'm glad it could help.
Thanks,
Yi.