2021-03-06 03:35:57

by Gao Xiang

[permalink] [raw]
Subject: [PATCH] erofs: fix bio->bi_max_vecs behavior change

From: Gao Xiang <[email protected]>

Martin reported an issue that directory read could be hung on the
latest -rc kernel with some certain image. The root cause is that
commit baa2c7c97153 ("block: set .bi_max_vecs as actual allocated
vector number") changes .bi_max_vecs behavior. bio->bi_max_vecs
is set as actual allocated vector number rather than the requested
number now.
Let's avoid using .bi_max_vecs completely instead.

Reported-by: Martin DEVERA <[email protected]>
Signed-off-by: Gao Xiang <[email protected]>
---
Hi Chao,

Could you take some time on reviewing this patchset in advance?
I'd like to upstream this regression fix asap since it has noticable
impact on 5.12-rc kernel.

Thanks,
Gao Xiang

fs/erofs/data.c | 13 ++-----------
1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index f88851c5c250..fa25d0eab5de 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -231,14 +231,6 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
goto submit_bio_retry;

*last_block = current_block;
-
- /* shift in advance in case of it followed by too many gaps */
- if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
- /* err should reassign to 0 after submitting */
- err = 0;
- goto submit_bio_out;
- }
-
return bio;

err_out:
@@ -252,7 +244,6 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,

/* if updated manually, continuous pages has a gap */
if (bio)
-submit_bio_out:
submit_bio(bio);
return err ? ERR_PTR(err) : NULL;
}
@@ -274,7 +265,8 @@ static int erofs_raw_access_readpage(struct file *file, struct page *page)
if (IS_ERR(bio))
return PTR_ERR(bio);

- DBG_BUGON(bio); /* since we have only one bio -- must be NULL */
+ if (bio)
+ submit_bio(bio);
return 0;
}

@@ -305,7 +297,6 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
put_page(page);
}

- /* the rare case (end in gaps) */
if (bio)
submit_bio(bio);
}
--
2.20.1


2021-03-06 04:30:58

by Gao Xiang

[permalink] [raw]
Subject: [PATCH v2] erofs: fix bio->bi_max_vecs behavior change

From: Gao Xiang <[email protected]>

Martin reported an issue that directory read could be hung on the
latest -rc kernel with some certain image. The root cause is that
commit baa2c7c97153 ("block: set .bi_max_vecs as actual allocated
vector number") changes .bi_max_vecs behavior. bio->bi_max_vecs
is set as actual allocated vector number rather than the requested
number now.

Let's avoid using .bi_max_vecs completely instead.

Reported-by: Martin DEVERA <[email protected]>
Signed-off-by: Gao Xiang <[email protected]>
---
change since v1:
- since bio->bi_max_vecs doesn't record extent blocks anymore,
introduce a remaining extent block to avoid extent excess.

fs/erofs/data.c | 28 +++++++++++-----------------
1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index f88851c5c250..1249e74b3bf0 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -129,6 +129,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
struct page *page,
erofs_off_t *last_block,
unsigned int nblocks,
+ unsigned int *eblks,
bool ra)
{
struct inode *const inode = mapping->host;
@@ -145,8 +146,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,

/* note that for readpage case, bio also equals to NULL */
if (bio &&
- /* not continuous */
- *last_block + 1 != current_block) {
+ (*last_block + 1 != current_block || !*eblks)) {
submit_bio_retry:
submit_bio(bio);
bio = NULL;
@@ -216,7 +216,8 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);

- bio = bio_alloc(GFP_NOIO, bio_max_segs(nblocks));
+ *eblks = bio_max_segs(nblocks);
+ bio = bio_alloc(GFP_NOIO, *eblks);

bio->bi_end_io = erofs_readendio;
bio_set_dev(bio, sb->s_bdev);
@@ -229,16 +230,8 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
/* out of the extent or bio is full */
if (err < PAGE_SIZE)
goto submit_bio_retry;
-
+ --*eblks;
*last_block = current_block;
-
- /* shift in advance in case of it followed by too many gaps */
- if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
- /* err should reassign to 0 after submitting */
- err = 0;
- goto submit_bio_out;
- }
-
return bio;

err_out:
@@ -252,7 +245,6 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,

/* if updated manually, continuous pages has a gap */
if (bio)
-submit_bio_out:
submit_bio(bio);
return err ? ERR_PTR(err) : NULL;
}
@@ -264,23 +256,26 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
static int erofs_raw_access_readpage(struct file *file, struct page *page)
{
erofs_off_t last_block;
+ unsigned int eblks;
struct bio *bio;

trace_erofs_readpage(page, true);

bio = erofs_read_raw_page(NULL, page->mapping,
- page, &last_block, 1, false);
+ page, &last_block, 1, &eblks, false);

if (IS_ERR(bio))
return PTR_ERR(bio);

- DBG_BUGON(bio); /* since we have only one bio -- must be NULL */
+ if (bio)
+ submit_bio(bio);
return 0;
}

static void erofs_raw_access_readahead(struct readahead_control *rac)
{
erofs_off_t last_block;
+ unsigned int eblks;
struct bio *bio = NULL;
struct page *page;

@@ -291,7 +286,7 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
prefetchw(&page->flags);

bio = erofs_read_raw_page(bio, rac->mapping, page, &last_block,
- readahead_count(rac), true);
+ readahead_count(rac), &eblks, true);

/* all the page errors are ignored when readahead */
if (IS_ERR(bio)) {
@@ -305,7 +300,6 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
put_page(page);
}

- /* the rare case (end in gaps) */
if (bio)
submit_bio(bio);
}
--
2.20.1

2021-03-08 08:34:52

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v2] erofs: fix bio->bi_max_vecs behavior change

On 2021/3/6 12:04, Gao Xiang wrote:
> From: Gao Xiang <[email protected]>
>
> Martin reported an issue that directory read could be hung on the
> latest -rc kernel with some certain image. The root cause is that
> commit baa2c7c97153 ("block: set .bi_max_vecs as actual allocated
> vector number") changes .bi_max_vecs behavior. bio->bi_max_vecs
> is set as actual allocated vector number rather than the requested
> number now.
>
> Let's avoid using .bi_max_vecs completely instead.
>
> Reported-by: Martin DEVERA <[email protected]>
> Signed-off-by: Gao Xiang <[email protected]>

Looks good to me, btw, it needs to Cc stable mailing list?

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2021-03-08 08:36:53

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH v2] erofs: fix bio->bi_max_vecs behavior change

Hi Chao,

On Mon, Mar 08, 2021 at 09:29:30AM +0800, Chao Yu wrote:
> On 2021/3/6 12:04, Gao Xiang wrote:
> > From: Gao Xiang <[email protected]>
> >
> > Martin reported an issue that directory read could be hung on the
> > latest -rc kernel with some certain image. The root cause is that
> > commit baa2c7c97153 ("block: set .bi_max_vecs as actual allocated
> > vector number") changes .bi_max_vecs behavior. bio->bi_max_vecs
> > is set as actual allocated vector number rather than the requested
> > number now.
> >
> > Let's avoid using .bi_max_vecs completely instead.
> >
> > Reported-by: Martin DEVERA <[email protected]>
> > Signed-off-by: Gao Xiang <[email protected]>
>
> Looks good to me, btw, it needs to Cc stable mailing list?
>
> Reviewed-by: Chao Yu <[email protected]>
>

Thanks for your review. <= 5.11 kernels are not impacted.
For now, this only impacts 5.12-rc due to a bio behavior
change (see commit baa2c7c97153). So personally I think
just leave as it is is fine.

Thanks,
Gao Xiang

> Thanks,
>

2021-03-08 08:39:37

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v2] erofs: fix bio->bi_max_vecs behavior change

On 2021/3/8 10:36, Gao Xiang wrote:
> Hi Chao,
>
> On Mon, Mar 08, 2021 at 09:29:30AM +0800, Chao Yu wrote:
>> On 2021/3/6 12:04, Gao Xiang wrote:
>>> From: Gao Xiang <[email protected]>
>>>
>>> Martin reported an issue that directory read could be hung on the
>>> latest -rc kernel with some certain image. The root cause is that
>>> commit baa2c7c97153 ("block: set .bi_max_vecs as actual allocated
>>> vector number") changes .bi_max_vecs behavior. bio->bi_max_vecs
>>> is set as actual allocated vector number rather than the requested
>>> number now.
>>>
>>> Let's avoid using .bi_max_vecs completely instead.
>>>
>>> Reported-by: Martin DEVERA <[email protected]>
>>> Signed-off-by: Gao Xiang <[email protected]>
>>
>> Looks good to me, btw, it needs to Cc stable mailing list?
>>
>> Reviewed-by: Chao Yu <[email protected]>
>>
>
> Thanks for your review. <= 5.11 kernels are not impacted.
> For now, this only impacts 5.12-rc due to a bio behavior
> change (see commit baa2c7c97153). So personally I think
> just leave as it is is fine.

Okay, so that's fine if you send pull request before 5.12 formal release. ;)

Thanks,

>
> Thanks,
> Gao Xiang
>
>> Thanks,
>>
>
> .
>

2021-03-08 08:40:41

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH v2] erofs: fix bio->bi_max_vecs behavior change

On Mon, Mar 08, 2021 at 10:52:19AM +0800, Chao Yu wrote:
> On 2021/3/8 10:36, Gao Xiang wrote:
> > Hi Chao,
> >
> > On Mon, Mar 08, 2021 at 09:29:30AM +0800, Chao Yu wrote:
> > > On 2021/3/6 12:04, Gao Xiang wrote:
> > > > From: Gao Xiang <[email protected]>
> > > >
> > > > Martin reported an issue that directory read could be hung on the
> > > > latest -rc kernel with some certain image. The root cause is that
> > > > commit baa2c7c97153 ("block: set .bi_max_vecs as actual allocated
> > > > vector number") changes .bi_max_vecs behavior. bio->bi_max_vecs
> > > > is set as actual allocated vector number rather than the requested
> > > > number now.
> > > >
> > > > Let's avoid using .bi_max_vecs completely instead.
> > > >
> > > > Reported-by: Martin DEVERA <[email protected]>
> > > > Signed-off-by: Gao Xiang <[email protected]>
> > >
> > > Looks good to me, btw, it needs to Cc stable mailing list?
> > >
> > > Reviewed-by: Chao Yu <[email protected]>
> > >
> >
> > Thanks for your review. <= 5.11 kernels are not impacted.
> > For now, this only impacts 5.12-rc due to a bio behavior
> > change (see commit baa2c7c97153). So personally I think
> > just leave as it is is fine.
>
> Okay, so that's fine if you send pull request before 5.12 formal release. ;)

Yeah, it's an urgent commit and have very negative impact.
I pushed out -fixes branch just now. After leaving in linux-next
for days, will upstream it then asap... :-(

Thanks,
Gao Xiang

>
> Thanks,
>
> >
> > Thanks,
> > Gao Xiang
> >
> > > Thanks,
> > >
> >
> > .
> >
>

2021-03-19 02:17:01

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v2] erofs: fix bio->bi_max_vecs behavior change

On 2021/3/6 12:04, Gao Xiang wrote:
> From: Gao Xiang <[email protected]>
>
> Martin reported an issue that directory read could be hung on the
> latest -rc kernel with some certain image. The root cause is that
> commit baa2c7c97153 ("block: set .bi_max_vecs as actual allocated
> vector number") changes .bi_max_vecs behavior. bio->bi_max_vecs
> is set as actual allocated vector number rather than the requested
> number now.
>
> Let's avoid using .bi_max_vecs completely instead.
>
> Reported-by: Martin DEVERA <[email protected]>
> Signed-off-by: Gao Xiang <[email protected]>
> ---
> change since v1:
> - since bio->bi_max_vecs doesn't record extent blocks anymore,
> introduce a remaining extent block to avoid extent excess.
>
> fs/erofs/data.c | 28 +++++++++++-----------------
> 1 file changed, 11 insertions(+), 17 deletions(-)
>
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index f88851c5c250..1249e74b3bf0 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -129,6 +129,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
> struct page *page,
> erofs_off_t *last_block,
> unsigned int nblocks,
> + unsigned int *eblks,
> bool ra)
> {
> struct inode *const inode = mapping->host;
> @@ -145,8 +146,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
>
> /* note that for readpage case, bio also equals to NULL */
> if (bio &&
> - /* not continuous */
> - *last_block + 1 != current_block) {
> + (*last_block + 1 != current_block || !*eblks)) {

Xiang,

I found below function during checking bi_max_vecs usage in f2fs:

/**
* bio_full - check if the bio is full
* @bio: bio to check
* @len: length of one segment to be added
*
* Return true if @bio is full and one segment with @len bytes can't be
* added to the bio, otherwise return false
*/
static inline bool bio_full(struct bio *bio, unsigned len)
{
if (bio->bi_vcnt >= bio->bi_max_vecs)
return true;

if (bio->bi_iter.bi_size > UINT_MAX - len)
return true;

return false;
}

Could you please check that whether it will be better to use bio_full()
rather than using left-space-in-bio maintained by erofs itself? something
like:

if (bio && (bio_full(bio, PAGE_SIZE) ||
/* not continuous */
(*last_block + 1 != current_block))

I'm thinking we need to decouple bio detail implementation as much as
possible, to avoid regression whenever bio used/max size definition
updates, though I've no idea how to fix f2fs case.

Let me know if you have other concern.

Thanks,

> submit_bio_retry:
> submit_bio(bio);
> bio = NULL;
> @@ -216,7 +216,8 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
> if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
> nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
>
> - bio = bio_alloc(GFP_NOIO, bio_max_segs(nblocks));
> + *eblks = bio_max_segs(nblocks);
> + bio = bio_alloc(GFP_NOIO, *eblks);
>
> bio->bi_end_io = erofs_readendio;
> bio_set_dev(bio, sb->s_bdev);
> @@ -229,16 +230,8 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
> /* out of the extent or bio is full */
> if (err < PAGE_SIZE)
> goto submit_bio_retry;
> -
> + --*eblks;
> *last_block = current_block;
> -
> - /* shift in advance in case of it followed by too many gaps */
> - if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
> - /* err should reassign to 0 after submitting */
> - err = 0;
> - goto submit_bio_out;
> - }
> -
> return bio;
>
> err_out:
> @@ -252,7 +245,6 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
>
> /* if updated manually, continuous pages has a gap */
> if (bio)
> -submit_bio_out:
> submit_bio(bio);
> return err ? ERR_PTR(err) : NULL;
> }
> @@ -264,23 +256,26 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
> static int erofs_raw_access_readpage(struct file *file, struct page *page)
> {
> erofs_off_t last_block;
> + unsigned int eblks;
> struct bio *bio;
>
> trace_erofs_readpage(page, true);
>
> bio = erofs_read_raw_page(NULL, page->mapping,
> - page, &last_block, 1, false);
> + page, &last_block, 1, &eblks, false);
>
> if (IS_ERR(bio))
> return PTR_ERR(bio);
>
> - DBG_BUGON(bio); /* since we have only one bio -- must be NULL */
> + if (bio)
> + submit_bio(bio);
> return 0;
> }
>
> static void erofs_raw_access_readahead(struct readahead_control *rac)
> {
> erofs_off_t last_block;
> + unsigned int eblks;
> struct bio *bio = NULL;
> struct page *page;
>
> @@ -291,7 +286,7 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
> prefetchw(&page->flags);
>
> bio = erofs_read_raw_page(bio, rac->mapping, page, &last_block,
> - readahead_count(rac), true);
> + readahead_count(rac), &eblks, true);
>
> /* all the page errors are ignored when readahead */
> if (IS_ERR(bio)) {
> @@ -305,7 +300,6 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
> put_page(page);
> }
>
> - /* the rare case (end in gaps) */
> if (bio)
> submit_bio(bio);
> }
>

2021-03-19 04:20:19

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH v2] erofs: fix bio->bi_max_vecs behavior change

Hi Chao,

On Fri, Mar 19, 2021 at 10:15:18AM +0800, Chao Yu wrote:
> On 2021/3/6 12:04, Gao Xiang wrote:

...

> > + (*last_block + 1 != current_block || !*eblks)) {
>
> Xiang,
>
> I found below function during checking bi_max_vecs usage in f2fs:
>
> /**
> * bio_full - check if the bio is full
> * @bio: bio to check
> * @len: length of one segment to be added
> *
> * Return true if @bio is full and one segment with @len bytes can't be
> * added to the bio, otherwise return false
> */
> static inline bool bio_full(struct bio *bio, unsigned len)
> {
> if (bio->bi_vcnt >= bio->bi_max_vecs)
> return true;
>
> if (bio->bi_iter.bi_size > UINT_MAX - len)
> return true;
>
> return false;
> }
>
> Could you please check that whether it will be better to use bio_full()
> rather than using left-space-in-bio maintained by erofs itself? something
> like:
>
> if (bio && (bio_full(bio, PAGE_SIZE) ||
> /* not continuous */
> (*last_block + 1 != current_block))
>
> I'm thinking we need to decouple bio detail implementation as much as
> possible, to avoid regression whenever bio used/max size definition
> updates, though I've no idea how to fix f2fs case.

Thanks for your suggestion.

Not quite sure I understand the idea... The original problem was that
when EROFS bio_alloc, the number of requested bvec also partially stood
for remaining blocks of the current on-disk extent to limit the read
length. but after that bio behavior change, bi_max_vec could be increased
internally by block layer (e.g. 1 --> 4), so bi_max_vecs is no longer
as what we expect (I mean passed-in). so could cause read request
out-of-bound or hung. That's why I decided to record it manually (never
rely on bio statistics anymore...)

Also btw, AFAIK, Jianan is still investigating to use iomap instead
(mainly resolve tail-packing inline path). And I'm also busy in big
pcluster and LZMA new features for the next cycle. So I think we might
leave it just as is and it would be replaced with iomap in the future.

Thanks,
Gao Xiang

>
> Let me know if you have other concern.
>
> Thanks,