2021-09-03 06:18:33

by yangerkun

[permalink] [raw]
Subject: [PATCH 0/3] bugfix for insert/collapse fallocate

yangerkun (3):
ext4: correct the left/middle/right debug message for binsearch
ext4: ensure enough credits in ext4_ext_shift_path_extents
ext4: stop use path once restart journal in
ext4_ext_shift_path_extents

fs/ext4/extents.c | 77 ++++++++++++++++++++++-------------------------
1 file changed, 36 insertions(+), 41 deletions(-)

--
2.31.1


2021-09-03 06:18:33

by yangerkun

[permalink] [raw]
Subject: [PATCH 2/3] ext4: ensure enough credits in ext4_ext_shift_path_extents

Like ext4_ext_rm_leaf, we can ensure enough credits before every call
that will consume credits. This can remove ext4_access_path which only
used in ext4_ext_shift_path_extents. It's a prepare patch for the next
bugfix patch.

Signed-off-by: yangerkun <[email protected]>
---
fs/ext4/extents.c | 49 +++++++++++++++--------------------------------
1 file changed, 15 insertions(+), 34 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7ae32078b48f..a6fb0350f062 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4974,36 +4974,6 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
}

-/*
- * ext4_access_path:
- * Function to access the path buffer for marking it dirty.
- * It also checks if there are sufficient credits left in the journal handle
- * to update path.
- */
-static int
-ext4_access_path(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
-{
- int credits, err;
-
- if (!ext4_handle_valid(handle))
- return 0;
-
- /*
- * Check if need to extend journal credits
- * 3 for leaf, sb, and inode plus 2 (bmap and group
- * descriptor) for each block group; assume two block
- * groups
- */
- credits = ext4_writepage_trans_blocks(inode);
- err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0);
- if (err < 0)
- return err;
-
- err = ext4_ext_get_access(handle, inode, path);
- return err;
-}
-
/*
* ext4_ext_shift_path_extents:
* Shift the extents of a path structure lying between path[depth].p_ext
@@ -5018,6 +4988,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
int depth, err = 0;
struct ext4_extent *ex_start, *ex_last;
bool update = false;
+ int credits, restart_credits;
depth = path->p_depth;

while (depth >= 0) {
@@ -5027,13 +4998,23 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
return -EFSCORRUPTED;

ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
+ /* leaf + sb + inode */
+ credits = 3;
+ if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) {
+ update = true;
+ /* extent tree + sb + inode */
+ credits = depth + 2;
+ }

- err = ext4_access_path(handle, inode, path + depth);
+ restart_credits = ext4_writepage_trans_blocks(inode);
+ err = ext4_datasem_ensure_credits(handle, inode, credits,
+ restart_credits, 0);
if (err)
goto out;

- if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
- update = true;
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;

while (ex_start <= ex_last) {
if (SHIFT == SHIFT_LEFT) {
@@ -5064,7 +5045,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
}

/* Update index too */
- err = ext4_access_path(handle, inode, path + depth);
+ err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;

--
2.31.1

2021-09-03 06:19:48

by yangerkun

[permalink] [raw]
Subject: [PATCH 1/3] ext4: correct the left/middle/right debug message for binsearch

The debuginfo for binsearch want to show the left/middle/right extent
while the process search for the goal block. However we show this info
after we change right or left.

Signed-off-by: yangerkun <[email protected]>
---
fs/ext4/extents.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c33e0a2cb6c3..7ae32078b48f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -713,13 +713,14 @@ ext4_ext_binsearch_idx(struct inode *inode,
r = EXT_LAST_INDEX(eh);
while (l <= r) {
m = l + (r - l) / 2;
+ ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
+ le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
+ r, le32_to_cpu(r->ei_block));
+
if (block < le32_to_cpu(m->ei_block))
r = m - 1;
else
l = m + 1;
- ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
- le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
- r, le32_to_cpu(r->ei_block));
}

path->p_idx = l - 1;
@@ -781,13 +782,14 @@ ext4_ext_binsearch(struct inode *inode,

while (l <= r) {
m = l + (r - l) / 2;
+ ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
+ le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
+ r, le32_to_cpu(r->ee_block));
+
if (block < le32_to_cpu(m->ee_block))
r = m - 1;
else
l = m + 1;
- ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
- le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
- r, le32_to_cpu(r->ee_block));
}

path->p_ext = l - 1;
--
2.31.1

2021-09-03 06:19:49

by yangerkun

[permalink] [raw]
Subject: [PATCH 3/3] ext4: stop use path once restart journal in ext4_ext_shift_path_extents

We get a BUG as follow:

[52117.465187] ------------[ cut here ]------------
[52117.465686] kernel BUG at fs/ext4/extents.c:1756!
...
[52117.478306] Call Trace:
[52117.478565] ext4_ext_shift_extents+0x3ee/0x710
[52117.479020] ext4_fallocate+0x139c/0x1b40
[52117.479405] ? __do_sys_newfstat+0x6b/0x80
[52117.479805] vfs_fallocate+0x151/0x4b0
[52117.480177] ksys_fallocate+0x4a/0xa0
[52117.480533] __x64_sys_fallocate+0x22/0x30
[52117.480930] do_syscall_64+0x35/0x80
[52117.481277] entry_SYSCALL_64_after_hwframe+0x44/0xae
[52117.481769] RIP: 0033:0x7fa062f855ca

static int ext4_ext_try_to_merge_right(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *ex)
{
struct ext4_extent_header *eh;
unsigned int depth, len;
int merge_done = 0, unwritten;

depth = ext_depth(inode);
BUG_ON(path[depth].p_hdr == NULL); <=== trigger here
eh = path[depth].p_hdr;

Normally, we protect extent tree with i_data_sem, and once we really
need drop i_data_sem, we should reload the ext4_ext_path array after we
recatch i_data_sem since extent tree may has changed, the 'again' in
ext4_ext_remove_space give us a sample. But the other case
ext4_ext_shift_path_extents seems forget to do this(ext4_access_path may
drop i_data_sem and recatch it with not enough credits), and will lead
the upper BUG when there is a parallel extents split which will grow the
extent tree.

Fix it by introduce the again in ext4_ext_shift_extents.

Signed-off-by: yangerkun <[email protected]>
---
fs/ext4/extents.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a6fb0350f062..0aa14f6ca914 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5009,8 +5009,11 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
restart_credits = ext4_writepage_trans_blocks(inode);
err = ext4_datasem_ensure_credits(handle, inode, credits,
restart_credits, 0);
- if (err)
+ if (err) {
+ if (err > 0)
+ err = -EAGAIN;
goto out;
+ }

err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -5084,6 +5087,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
int ret = 0, depth;
struct ext4_extent *extent;
ext4_lblk_t stop, *iterator, ex_start, ex_end;
+ ext4_lblk_t tmp = EXT_MAX_BLOCKS;

/* Let path point to the last extent */
path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
@@ -5137,11 +5141,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
* till we reach stop. In case of right shift, iterator points to stop
* and it is decreased till we reach start.
*/
+again:
if (SHIFT == SHIFT_LEFT)
iterator = &start;
else
iterator = &stop;

+ if (tmp != EXT_MAX_BLOCKS)
+ *iterator = tmp;
+
/*
* Its safe to start updating extents. Start and stop are unsigned, so
* in case of right shift if extent with 0 block is reached, iterator
@@ -5170,6 +5178,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
}
}

+ tmp = *iterator;
if (SHIFT == SHIFT_LEFT) {
extent = EXT_LAST_EXTENT(path[depth].p_hdr);
*iterator = le32_to_cpu(extent->ee_block) +
@@ -5188,6 +5197,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
}
ret = ext4_ext_shift_path_extents(path, shift, inode,
handle, SHIFT);
+ /* iterator can be NULL which means we should break */
+ if (ret == -EAGAIN)
+ goto again;
if (ret)
break;
}
--
2.31.1

2021-09-24 09:32:48

by yangerkun

[permalink] [raw]
Subject: Re: [PATCH 0/3] bugfix for insert/collapse fallocate

gently ping...

在 2021/9/3 14:27, yangerkun 写道:
> yangerkun (3):
> ext4: correct the left/middle/right debug message for binsearch
> ext4: ensure enough credits in ext4_ext_shift_path_extents
> ext4: stop use path once restart journal in
> ext4_ext_shift_path_extents
>
> fs/ext4/extents.c | 77 ++++++++++++++++++++++-------------------------
> 1 file changed, 36 insertions(+), 41 deletions(-)
>

2021-09-30 16:13:29

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/3] ext4: correct the left/middle/right debug message for binsearch

On Fri 03-09-21 14:27:46, yangerkun wrote:
> The debuginfo for binsearch want to show the left/middle/right extent
> while the process search for the goal block. However we show this info
> after we change right or left.
>
> Signed-off-by: yangerkun <[email protected]>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <[email protected]>

Honza

> ---
> fs/ext4/extents.c | 14 ++++++++------
> 1 file changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index c33e0a2cb6c3..7ae32078b48f 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -713,13 +713,14 @@ ext4_ext_binsearch_idx(struct inode *inode,
> r = EXT_LAST_INDEX(eh);
> while (l <= r) {
> m = l + (r - l) / 2;
> + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
> + le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
> + r, le32_to_cpu(r->ei_block));
> +
> if (block < le32_to_cpu(m->ei_block))
> r = m - 1;
> else
> l = m + 1;
> - ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
> - le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
> - r, le32_to_cpu(r->ei_block));
> }
>
> path->p_idx = l - 1;
> @@ -781,13 +782,14 @@ ext4_ext_binsearch(struct inode *inode,
>
> while (l <= r) {
> m = l + (r - l) / 2;
> + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
> + le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
> + r, le32_to_cpu(r->ee_block));
> +
> if (block < le32_to_cpu(m->ee_block))
> r = m - 1;
> else
> l = m + 1;
> - ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
> - le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
> - r, le32_to_cpu(r->ee_block));
> }
>
> path->p_ext = l - 1;
> --
> 2.31.1
>
--
Jan Kara <[email protected]>
SUSE Labs, CR

2021-09-30 16:22:03

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 2/3] ext4: ensure enough credits in ext4_ext_shift_path_extents

On Fri 03-09-21 14:27:47, yangerkun wrote:
> Like ext4_ext_rm_leaf, we can ensure enough credits before every call
> that will consume credits. This can remove ext4_access_path which only
> used in ext4_ext_shift_path_extents. It's a prepare patch for the next
> bugfix patch.
>
> Signed-off-by: yangerkun <[email protected]>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <[email protected]>

Honza

> ---
> fs/ext4/extents.c | 49 +++++++++++++++--------------------------------
> 1 file changed, 15 insertions(+), 34 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 7ae32078b48f..a6fb0350f062 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4974,36 +4974,6 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
> return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
> }
>
> -/*
> - * ext4_access_path:
> - * Function to access the path buffer for marking it dirty.
> - * It also checks if there are sufficient credits left in the journal handle
> - * to update path.
> - */
> -static int
> -ext4_access_path(handle_t *handle, struct inode *inode,
> - struct ext4_ext_path *path)
> -{
> - int credits, err;
> -
> - if (!ext4_handle_valid(handle))
> - return 0;
> -
> - /*
> - * Check if need to extend journal credits
> - * 3 for leaf, sb, and inode plus 2 (bmap and group
> - * descriptor) for each block group; assume two block
> - * groups
> - */
> - credits = ext4_writepage_trans_blocks(inode);
> - err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0);
> - if (err < 0)
> - return err;
> -
> - err = ext4_ext_get_access(handle, inode, path);
> - return err;
> -}
> -
> /*
> * ext4_ext_shift_path_extents:
> * Shift the extents of a path structure lying between path[depth].p_ext
> @@ -5018,6 +4988,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
> int depth, err = 0;
> struct ext4_extent *ex_start, *ex_last;
> bool update = false;
> + int credits, restart_credits;
> depth = path->p_depth;
>
> while (depth >= 0) {
> @@ -5027,13 +4998,23 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
> return -EFSCORRUPTED;
>
> ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
> + /* leaf + sb + inode */
> + credits = 3;
> + if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) {
> + update = true;
> + /* extent tree + sb + inode */
> + credits = depth + 2;
> + }
>
> - err = ext4_access_path(handle, inode, path + depth);
> + restart_credits = ext4_writepage_trans_blocks(inode);
> + err = ext4_datasem_ensure_credits(handle, inode, credits,
> + restart_credits, 0);
> if (err)
> goto out;
>
> - if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
> - update = true;
> + err = ext4_ext_get_access(handle, inode, path + depth);
> + if (err)
> + goto out;
>
> while (ex_start <= ex_last) {
> if (SHIFT == SHIFT_LEFT) {
> @@ -5064,7 +5045,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
> }
>
> /* Update index too */
> - err = ext4_access_path(handle, inode, path + depth);
> + err = ext4_ext_get_access(handle, inode, path + depth);
> if (err)
> goto out;
>
> --
> 2.31.1
>
--
Jan Kara <[email protected]>
SUSE Labs, CR

2021-09-30 16:43:39

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 3/3] ext4: stop use path once restart journal in ext4_ext_shift_path_extents

Let me improve English a bit:

On Fri 03-09-21 14:27:48, yangerkun wrote:
> We get a BUG as follow:

We hit the following bug:

>
> [52117.465187] ------------[ cut here ]------------
> [52117.465686] kernel BUG at fs/ext4/extents.c:1756!
> ...
> [52117.478306] Call Trace:
> [52117.478565] ext4_ext_shift_extents+0x3ee/0x710
> [52117.479020] ext4_fallocate+0x139c/0x1b40
> [52117.479405] ? __do_sys_newfstat+0x6b/0x80
> [52117.479805] vfs_fallocate+0x151/0x4b0
> [52117.480177] ksys_fallocate+0x4a/0xa0
> [52117.480533] __x64_sys_fallocate+0x22/0x30
> [52117.480930] do_syscall_64+0x35/0x80
> [52117.481277] entry_SYSCALL_64_after_hwframe+0x44/0xae
> [52117.481769] RIP: 0033:0x7fa062f855ca
>
> static int ext4_ext_try_to_merge_right(struct inode *inode,
> struct ext4_ext_path *path,
> struct ext4_extent *ex)
> {
> struct ext4_extent_header *eh;
> unsigned int depth, len;
> int merge_done = 0, unwritten;
>
> depth = ext_depth(inode);
> BUG_ON(path[depth].p_hdr == NULL); <=== trigger here
> eh = path[depth].p_hdr;
>
> Normally, we protect extent tree with i_data_sem, and once we really
> need drop i_data_sem, we should reload the ext4_ext_path array after we
> recatch i_data_sem since extent tree may has changed, the 'again' in
> ext4_ext_remove_space give us a sample. But the other case
> ext4_ext_shift_path_extents seems forget to do this(ext4_access_path may
> drop i_data_sem and recatch it with not enough credits), and will lead
> the upper BUG when there is a parallel extents split which will grow the
> extent tree.

Normally, the extent tree is protected by i_data_sem and if we drop
i_data_sem in ext4_datasem_ensure_credits(), we need to reload
ext4_ext_path array after reacquiring i_data_sem since the extent tree may
have changed. The 'again' label in ext4_ext_remove_space() is an example of
this. But ext4_ext_shift_path_extents() forgets to reload ext4_ext_path and
thus can cause the above mentioned BUG when there is a parallel extents
split which will grow the extent tree.

>
> Fix it by introduce the again in ext4_ext_shift_extents.
>
> Signed-off-by: yangerkun <[email protected]>
> ---
> fs/ext4/extents.c | 14 +++++++++++++-
> 1 file changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index a6fb0350f062..0aa14f6ca914 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -5009,8 +5009,11 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
> restart_credits = ext4_writepage_trans_blocks(inode);
> err = ext4_datasem_ensure_credits(handle, inode, credits,
> restart_credits, 0);
> - if (err)
> + if (err) {
> + if (err > 0)
> + err = -EAGAIN;
> goto out;
> + }

Hum, I'd note that the previous patch actually broke
ext4_ext_shift_path_extents() which could return 1 after patch 2/3 and
probably confuse code upwards in the stack and now you fix it up in this
patch. Can you perhaps fixup the previous patch by changing the condition
to:
if (err < 0)

and then change it here?

>
> err = ext4_ext_get_access(handle, inode, path + depth);
> if (err)
> @@ -5084,6 +5087,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
> int ret = 0, depth;
> struct ext4_extent *extent;
> ext4_lblk_t stop, *iterator, ex_start, ex_end;
> + ext4_lblk_t tmp = EXT_MAX_BLOCKS;

Can you perhaps name this more descriptively than 'tmp'? Something like
restart_lblk or something like that?

> /* Let path point to the last extent */
> path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
> @@ -5137,11 +5141,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
> * till we reach stop. In case of right shift, iterator points to stop
> * and it is decreased till we reach start.
> */
> +again:
> if (SHIFT == SHIFT_LEFT)
> iterator = &start;
> else
> iterator = &stop;
>
> + if (tmp != EXT_MAX_BLOCKS)
> + *iterator = tmp;
> +
> /*
> * Its safe to start updating extents. Start and stop are unsigned, so
> * in case of right shift if extent with 0 block is reached, iterator
> @@ -5170,6 +5178,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
> }
> }
>
> + tmp = *iterator;
> if (SHIFT == SHIFT_LEFT) {
> extent = EXT_LAST_EXTENT(path[depth].p_hdr);
> *iterator = le32_to_cpu(extent->ee_block) +
> @@ -5188,6 +5197,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
> }
> ret = ext4_ext_shift_path_extents(path, shift, inode,
> handle, SHIFT);
> + /* iterator can be NULL which means we should break */
> + if (ret == -EAGAIN)
> + goto again;

Hum, but while we dropped i_data_sem, the extent depth may have increased
so we may need larger 'path' now?

Otherwise the patch looks good.

Honza

--
Jan Kara <[email protected]>
SUSE Labs, CR

2021-10-07 16:08:28

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 0/3] bugfix for insert/collapse fallocate

On Fri, 3 Sep 2021 14:27:45 +0800, yangerkun wrote:
> yangerkun (3):
> ext4: correct the left/middle/right debug message for binsearch
> ext4: ensure enough credits in ext4_ext_shift_path_extents
> ext4: stop use path once restart journal in
> ext4_ext_shift_path_extents
>
> fs/ext4/extents.c | 77 ++++++++++++++++++++++-------------------------
> 1 file changed, 36 insertions(+), 41 deletions(-)
>
> [...]

Applied, thanks!

[1/3] ext4: correct the left/middle/right debug message for binsearch
commit: 189487c02b3865c35be3ced27ebc33f7bfe86220
[2/3] ext4: ensure enough credits in ext4_ext_shift_path_extents
commit: 42c018ecf2bcf37c21476942bb96662fad7133c0
[3/3] ext4: stop use path once restart journal in ext4_ext_shift_path_extents
commit: d56aaa1fa17ff4b45383c8beb36bb6a1cf835e22

Best regards,
--
Theodore Ts'o <[email protected]>

2021-10-08 02:10:45

by yangerkun

[permalink] [raw]
Subject: Re: [PATCH 3/3] ext4: stop use path once restart journal in ext4_ext_shift_path_extents



在 2021/10/1 0:43, Jan Kara 写道:
> Let me improve English a bit:
>
> On Fri 03-09-21 14:27:48, yangerkun wrote:
>> We get a BUG as follow:
>
> We hit the following bug:
>
>>
>> [52117.465187] ------------[ cut here ]------------
>> [52117.465686] kernel BUG at fs/ext4/extents.c:1756!
>> ...
>> [52117.478306] Call Trace:
>> [52117.478565] ext4_ext_shift_extents+0x3ee/0x710
>> [52117.479020] ext4_fallocate+0x139c/0x1b40
>> [52117.479405] ? __do_sys_newfstat+0x6b/0x80
>> [52117.479805] vfs_fallocate+0x151/0x4b0
>> [52117.480177] ksys_fallocate+0x4a/0xa0
>> [52117.480533] __x64_sys_fallocate+0x22/0x30
>> [52117.480930] do_syscall_64+0x35/0x80
>> [52117.481277] entry_SYSCALL_64_after_hwframe+0x44/0xae
>> [52117.481769] RIP: 0033:0x7fa062f855ca
>>
>> static int ext4_ext_try_to_merge_right(struct inode *inode,
>> struct ext4_ext_path *path,
>> struct ext4_extent *ex)
>> {
>> struct ext4_extent_header *eh;
>> unsigned int depth, len;
>> int merge_done = 0, unwritten;
>>
>> depth = ext_depth(inode);
>> BUG_ON(path[depth].p_hdr == NULL); <=== trigger here
>> eh = path[depth].p_hdr;
>>
>> Normally, we protect extent tree with i_data_sem, and once we really
>> need drop i_data_sem, we should reload the ext4_ext_path array after we
>> recatch i_data_sem since extent tree may has changed, the 'again' in
>> ext4_ext_remove_space give us a sample. But the other case
>> ext4_ext_shift_path_extents seems forget to do this(ext4_access_path may
>> drop i_data_sem and recatch it with not enough credits), and will lead
>> the upper BUG when there is a parallel extents split which will grow the
>> extent tree.
>
> Normally, the extent tree is protected by i_data_sem and if we drop
> i_data_sem in ext4_datasem_ensure_credits(), we need to reload
> ext4_ext_path array after reacquiring i_data_sem since the extent tree may
> have changed. The 'again' label in ext4_ext_remove_space() is an example of
> this. But ext4_ext_shift_path_extents() forgets to reload ext4_ext_path and
> thus can cause the above mentioned BUG when there is a parallel extents
> split which will grow the extent tree.
>
>>
>> Fix it by introduce the again in ext4_ext_shift_extents.
>>
>> Signed-off-by: yangerkun <[email protected]>
>> ---
>> fs/ext4/extents.c | 14 +++++++++++++-
>> 1 file changed, 13 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index a6fb0350f062..0aa14f6ca914 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -5009,8 +5009,11 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
>> restart_credits = ext4_writepage_trans_blocks(inode);
>> err = ext4_datasem_ensure_credits(handle, inode, credits,
>> restart_credits, 0);
>> - if (err)
>> + if (err) {
>> + if (err > 0)
>> + err = -EAGAIN;
>> goto out;
>> + }
>
> Hum, I'd note that the previous patch actually broke
> ext4_ext_shift_path_extents() which could return 1 after patch 2/3 and
> probably confuse code upwards in the stack and now you fix it up in this
> patch. Can you perhaps fixup the previous patch by changing the condition
> to:
> if (err < 0)
>
> and then change it here?

Thanks for your review! Ted has fix this by add some comments in patch 2/3!


>
>>
>> err = ext4_ext_get_access(handle, inode, path + depth);
>> if (err)
>> @@ -5084,6 +5087,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
>> int ret = 0, depth;
>> struct ext4_extent *extent;
>> ext4_lblk_t stop, *iterator, ex_start, ex_end;
>> + ext4_lblk_t tmp = EXT_MAX_BLOCKS;
>
> Can you perhaps name this more descriptively than 'tmp'? Something like
> restart_lblk or something like that?

Agree. I'll pay attention next time!

>
>> /* Let path point to the last extent */
>> path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
>> @@ -5137,11 +5141,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
>> * till we reach stop. In case of right shift, iterator points to stop
>> * and it is decreased till we reach start.
>> */
>> +again:
>> if (SHIFT == SHIFT_LEFT)
>> iterator = &start;
>> else
>> iterator = &stop;
>>
>> + if (tmp != EXT_MAX_BLOCKS)
>> + *iterator = tmp;
>> +
>> /*
>> * Its safe to start updating extents. Start and stop are unsigned, so
>> * in case of right shift if extent with 0 block is reached, iterator
>> @@ -5170,6 +5178,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
>> }
>> }
>>
>> + tmp = *iterator;
>> if (SHIFT == SHIFT_LEFT) {
>> extent = EXT_LAST_EXTENT(path[depth].p_hdr);
>> *iterator = le32_to_cpu(extent->ee_block) +
>> @@ -5188,6 +5197,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
>> }
>> ret = ext4_ext_shift_path_extents(path, shift, inode,
>> handle, SHIFT);
>> + /* iterator can be NULL which means we should break */
>> + if (ret == -EAGAIN)
>> + goto again;
>
> Hum, but while we dropped i_data_sem, the extent depth may have increased
> so we may need larger 'path' now?
>
> Otherwise the patch looks good.

The ext4_find_extent in ext4_ext_shift_extents can handle this case. It
seems OK.


>
> Honza
>