2021-04-27 08:22:57

by 李扬韬

[permalink] [raw]
Subject: [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

When do_checkpoint() fails, the prefree bitmap is not cleared,
but these segments are already in the free state. If these segments
are used, the segments in use will be reset to the free state when
f2fs_clear_prefree_segments is called next time.

So reset free segments to prefree status when do_checkpoint() fail
to avoid this situation.

Signed-off-by: Yangtao Li <[email protected]>
---
fs/f2fs/checkpoint.c | 6 ++++--
fs/f2fs/f2fs.h | 1 +
fs/f2fs/segment.c | 13 +++++++++++++
fs/f2fs/segment.h | 7 ++++++-
4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index be5415a0dbbc..0200af4d02ef 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1647,10 +1647,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_save_inmem_curseg(sbi);

err = do_checkpoint(sbi, cpc);
- if (err)
+ if (err) {
f2fs_release_discard_addrs(sbi);
- else
+ f2fs_set_free_as_prefree_segments(sbi);
+ } else {
f2fs_clear_prefree_segments(sbi, cpc);
+ }

f2fs_restore_inmem_curseg(sbi);
stop:
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e2d302ae3a46..1618e9a74e89 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3369,6 +3369,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
+void f2fs_set_free_as_prefree_segments(struct f2fs_sb_info *sbi);
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c2866561263e..334e499a0f43 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1959,6 +1959,19 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
mutex_unlock(&dirty_i->seglist_lock);
}

+void f2fs_set_free_as_prefree_segments(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned int segno;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) {
+ if (__set_test_and_inuse(sbi, segno))
+ test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]);
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+}
+
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
struct cp_control *cpc)
{
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index e9a7a637d688..5da8d1100b87 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -487,19 +487,24 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
spin_unlock(&free_i->segmap_lock);
}

-static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
+static inline bool __set_test_and_inuse(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+ bool is_inuse = false;

spin_lock(&free_i->segmap_lock);
if (!test_and_set_bit(segno, free_i->free_segmap)) {
free_i->free_segments--;
if (!test_and_set_bit(secno, free_i->free_secmap))
free_i->free_sections--;
+ } else {
+ is_inuse = true;
}
spin_unlock(&free_i->segmap_lock);
+
+ return is_inuse;
}

static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
--
2.25.1


2021-04-27 12:38:43

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 2021/4/27 16:21, Yangtao Li wrote:
> When do_checkpoint() fails, the prefree bitmap is not cleared,
> but these segments are already in the free state. If these segments
> are used, the segments in use will be reset to the free state when
> f2fs_clear_prefree_segments is called next time.
>
> So reset free segments to prefree status when do_checkpoint() fail
> to avoid this situation.
>
> Signed-off-by: Yangtao Li <[email protected]>
> ---
> fs/f2fs/checkpoint.c | 6 ++++--
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/segment.c | 13 +++++++++++++
> fs/f2fs/segment.h | 7 ++++++-
> 4 files changed, 24 insertions(+), 3 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index be5415a0dbbc..0200af4d02ef 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1647,10 +1647,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> f2fs_save_inmem_curseg(sbi);
>
> err = do_checkpoint(sbi, cpc);
> - if (err)
> + if (err) {
> f2fs_release_discard_addrs(sbi);
> - else
> + f2fs_set_free_as_prefree_segments(sbi);
> + } else {
> f2fs_clear_prefree_segments(sbi, cpc);
> + }
>
> f2fs_restore_inmem_curseg(sbi);
> stop:
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index e2d302ae3a46..1618e9a74e89 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3369,6 +3369,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
> void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
> void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
> bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
> +void f2fs_set_free_as_prefree_segments(struct f2fs_sb_info *sbi);
> void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
> struct cp_control *cpc);
> void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index c2866561263e..334e499a0f43 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1959,6 +1959,19 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
> mutex_unlock(&dirty_i->seglist_lock);
> }
>
> +void f2fs_set_free_as_prefree_segments(struct f2fs_sb_info *sbi)
> +{
> + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> + unsigned int segno;
> +
> + mutex_lock(&dirty_i->seglist_lock);
> + for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) {
> + if (__set_test_and_inuse(sbi, segno))
> + test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]);
> + }
> + mutex_unlock(&dirty_i->seglist_lock);
> +}
> +
> void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
> struct cp_control *cpc)
> {
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index e9a7a637d688..5da8d1100b87 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -487,19 +487,24 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
> spin_unlock(&free_i->segmap_lock);
> }
>
> -static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
> +static inline bool __set_test_and_inuse(struct f2fs_sb_info *sbi,
> unsigned int segno)
> {
> struct free_segmap_info *free_i = FREE_I(sbi);
> unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
> + bool is_inuse = false;
>
> spin_lock(&free_i->segmap_lock);
> if (!test_and_set_bit(segno, free_i->free_segmap)) {
> free_i->free_segments--;
> if (!test_and_set_bit(secno, free_i->free_secmap))

if (!IS_CURSEC(sbi, secno) &&
!test_and_set_bit(secno, free_i->free_secmap))

I think just reverting dirty/free bitmap is not enough if checkpoint fails,
due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
to overwrite last valid meta/node/data, then filesystem will be corrupted.

So I suggest to set cp_error if do_checkpoint() fails until we can handle
all cases, which is not so easy.

How do you think?

Thanks,

> free_i->free_sections--;
> + } else {
> + is_inuse = true;
> }
> spin_unlock(&free_i->segmap_lock);
> +
> + return is_inuse;
> }
>
> static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
>

2021-07-19 09:05:16

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 2021/4/27 20:37, Chao Yu wrote:
> I think just reverting dirty/free bitmap is not enough if checkpoint fails,
> due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
> to overwrite last valid meta/node/data, then filesystem will be corrupted.
>
> So I suggest to set cp_error if do_checkpoint() fails until we can handle
> all cases, which is not so easy.
>
> How do you think?

Let's add below patch first before you figure out the patch which covers all
things.

From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
From: Chao Yu <[email protected]>
Date: Mon, 19 Jul 2021 16:37:44 +0800
Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed

During f2fs_write_checkpoint(), once we failed in
f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
such as prefree bitmap, nat/sit version bitmap won't be recovered,
it may cause f2fs image to be inconsistent, let's just set CP error
flag to avoid further updates until we figure out a scheme to rollback
all metadatas in such condition.

Reported-by: Yangtao Li <[email protected]>
Signed-off-by: Yangtao Li <[email protected]>
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/checkpoint.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6c208108d69c..096c85022f62 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)

/* write cached NAT/SIT entries to NAT/SIT area */
err = f2fs_flush_nat_entries(sbi, cpc);
- if (err)
+ if (err) {
+ f2fs_stop_checkpoint(sbi, false);
goto stop;
+ }

f2fs_flush_sit_entries(sbi, cpc);

@@ -1648,10 +1650,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_save_inmem_curseg(sbi);

err = do_checkpoint(sbi, cpc);
- if (err)
+ if (err) {
+ f2fs_stop_checkpoint(sbi, false);
f2fs_release_discard_addrs(sbi);
- else
+ } else {
f2fs_clear_prefree_segments(sbi, cpc);
+ }

f2fs_restore_inmem_curseg(sbi);
stop:
--
2.22.1

2021-07-20 02:06:58

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 07/19, Chao Yu wrote:
> On 2021/4/27 20:37, Chao Yu wrote:
> > I think just reverting dirty/free bitmap is not enough if checkpoint fails,
> > due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
> > to overwrite last valid meta/node/data, then filesystem will be corrupted.
> >
> > So I suggest to set cp_error if do_checkpoint() fails until we can handle
> > all cases, which is not so easy.
> >
> > How do you think?
>
> Let's add below patch first before you figure out the patch which covers all
> things.
>
> From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
> From: Chao Yu <[email protected]>
> Date: Mon, 19 Jul 2021 16:37:44 +0800
> Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed
>
> During f2fs_write_checkpoint(), once we failed in
> f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
> such as prefree bitmap, nat/sit version bitmap won't be recovered,
> it may cause f2fs image to be inconsistent, let's just set CP error
> flag to avoid further updates until we figure out a scheme to rollback
> all metadatas in such condition.
>
> Reported-by: Yangtao Li <[email protected]>
> Signed-off-by: Yangtao Li <[email protected]>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/checkpoint.c | 10 +++++++---
> 1 file changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 6c208108d69c..096c85022f62 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>
> /* write cached NAT/SIT entries to NAT/SIT area */
> err = f2fs_flush_nat_entries(sbi, cpc);
> - if (err)
> + if (err) {
> + f2fs_stop_checkpoint(sbi, false);

I think we should abuse this, since we can get any known ENOMEM as well.

> goto stop;
> + }
>
> f2fs_flush_sit_entries(sbi, cpc);
>
> @@ -1648,10 +1650,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> f2fs_save_inmem_curseg(sbi);
>
> err = do_checkpoint(sbi, cpc);
> - if (err)
> + if (err) {
> + f2fs_stop_checkpoint(sbi, false);
> f2fs_release_discard_addrs(sbi);
> - else
> + } else {
> f2fs_clear_prefree_segments(sbi, cpc);
> + }
>
> f2fs_restore_inmem_curseg(sbi);
> stop:
> --
> 2.22.1

2021-07-20 02:35:43

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 2021/7/20 2:25, Jaegeuk Kim wrote:
> On 07/19, Chao Yu wrote:
>> On 2021/4/27 20:37, Chao Yu wrote:
>>> I think just reverting dirty/free bitmap is not enough if checkpoint fails,
>>> due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
>>> to overwrite last valid meta/node/data, then filesystem will be corrupted.
>>>
>>> So I suggest to set cp_error if do_checkpoint() fails until we can handle
>>> all cases, which is not so easy.
>>>
>>> How do you think?
>>
>> Let's add below patch first before you figure out the patch which covers all
>> things.
>>
>> From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
>> From: Chao Yu <[email protected]>
>> Date: Mon, 19 Jul 2021 16:37:44 +0800
>> Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed
>>
>> During f2fs_write_checkpoint(), once we failed in
>> f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
>> such as prefree bitmap, nat/sit version bitmap won't be recovered,
>> it may cause f2fs image to be inconsistent, let's just set CP error
>> flag to avoid further updates until we figure out a scheme to rollback
>> all metadatas in such condition.
>>
>> Reported-by: Yangtao Li <[email protected]>
>> Signed-off-by: Yangtao Li <[email protected]>
>> Signed-off-by: Chao Yu <[email protected]>
>> ---
>> fs/f2fs/checkpoint.c | 10 +++++++---
>> 1 file changed, 7 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index 6c208108d69c..096c85022f62 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>
>> /* write cached NAT/SIT entries to NAT/SIT area */
>> err = f2fs_flush_nat_entries(sbi, cpc);
>> - if (err)
>> + if (err) {
>> + f2fs_stop_checkpoint(sbi, false);
>
> I think we should abuse this, since we can get any known ENOMEM as well.

Yup, but one critical issue here is it can break A/B update of NAT area,
so, in order to fix this hole, how about using NOFAIL memory allocation
in f2fs_flush_nat_entries() first until we figure out the finial scheme?

Thanks,

>
>> goto stop;
>> + }
>>
>> f2fs_flush_sit_entries(sbi, cpc);
>>
>> @@ -1648,10 +1650,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>> f2fs_save_inmem_curseg(sbi);
>>
>> err = do_checkpoint(sbi, cpc);
>> - if (err)
>> + if (err) {
>> + f2fs_stop_checkpoint(sbi, false);
>> f2fs_release_discard_addrs(sbi);
>> - else
>> + } else {
>> f2fs_clear_prefree_segments(sbi, cpc);
>> + }
>>
>> f2fs_restore_inmem_curseg(sbi);
>> stop:
>> --
>> 2.22.1

2021-07-29 01:45:48

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

Ping,

On 2021/7/20 8:04, Chao Yu wrote:
> On 2021/7/20 2:25, Jaegeuk Kim wrote:
>> On 07/19, Chao Yu wrote:
>>> On 2021/4/27 20:37, Chao Yu wrote:
>>>> I think just reverting dirty/free bitmap is not enough if checkpoint fails,
>>>> due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
>>>> to overwrite last valid meta/node/data, then filesystem will be corrupted.
>>>>
>>>> So I suggest to set cp_error if do_checkpoint() fails until we can handle
>>>> all cases, which is not so easy.
>>>>
>>>> How do you think?
>>>
>>> Let's add below patch first before you figure out the patch which covers all
>>> things.
>>>
>>> From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
>>> From: Chao Yu <[email protected]>
>>> Date: Mon, 19 Jul 2021 16:37:44 +0800
>>> Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed
>>>
>>> During f2fs_write_checkpoint(), once we failed in
>>> f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
>>> such as prefree bitmap, nat/sit version bitmap won't be recovered,
>>> it may cause f2fs image to be inconsistent, let's just set CP error
>>> flag to avoid further updates until we figure out a scheme to rollback
>>> all metadatas in such condition.
>>>
>>> Reported-by: Yangtao Li <[email protected]>
>>> Signed-off-by: Yangtao Li <[email protected]>
>>> Signed-off-by: Chao Yu <[email protected]>
>>> ---
>>> fs/f2fs/checkpoint.c | 10 +++++++---
>>> 1 file changed, 7 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>> index 6c208108d69c..096c85022f62 100644
>>> --- a/fs/f2fs/checkpoint.c
>>> +++ b/fs/f2fs/checkpoint.c
>>> @@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>
>>> /* write cached NAT/SIT entries to NAT/SIT area */
>>> err = f2fs_flush_nat_entries(sbi, cpc);
>>> - if (err)
>>> + if (err) {
>>> + f2fs_stop_checkpoint(sbi, false);
>>
>> I think we should abuse this, since we can get any known ENOMEM as well.
>
> Yup, but one critical issue here is it can break A/B update of NAT area,
> so, in order to fix this hole, how about using NOFAIL memory allocation
> in f2fs_flush_nat_entries() first until we figure out the finial scheme?
>
> Thanks,
>
>>
>>> goto stop;
>>> + }
>>>
>>> f2fs_flush_sit_entries(sbi, cpc);
>>>
>>> @@ -1648,10 +1650,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>> f2fs_save_inmem_curseg(sbi);
>>>
>>> err = do_checkpoint(sbi, cpc);
>>> - if (err)
>>> + if (err) {
>>> + f2fs_stop_checkpoint(sbi, false);
>>> f2fs_release_discard_addrs(sbi);
>>> - else
>>> + } else {
>>> f2fs_clear_prefree_segments(sbi, cpc);
>>> + }
>>>
>>> f2fs_restore_inmem_curseg(sbi);
>>> stop:
>>> --
>>> 2.22.1
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>

2021-07-30 22:21:06

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 07/20, Chao Yu wrote:
> On 2021/7/20 2:25, Jaegeuk Kim wrote:
> > On 07/19, Chao Yu wrote:
> > > On 2021/4/27 20:37, Chao Yu wrote:
> > > > I think just reverting dirty/free bitmap is not enough if checkpoint fails,
> > > > due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
> > > > to overwrite last valid meta/node/data, then filesystem will be corrupted.
> > > >
> > > > So I suggest to set cp_error if do_checkpoint() fails until we can handle
> > > > all cases, which is not so easy.
> > > >
> > > > How do you think?
> > >
> > > Let's add below patch first before you figure out the patch which covers all
> > > things.
> > >
> > > From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
> > > From: Chao Yu <[email protected]>
> > > Date: Mon, 19 Jul 2021 16:37:44 +0800
> > > Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed
> > >
> > > During f2fs_write_checkpoint(), once we failed in
> > > f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
> > > such as prefree bitmap, nat/sit version bitmap won't be recovered,
> > > it may cause f2fs image to be inconsistent, let's just set CP error
> > > flag to avoid further updates until we figure out a scheme to rollback
> > > all metadatas in such condition.
> > >
> > > Reported-by: Yangtao Li <[email protected]>
> > > Signed-off-by: Yangtao Li <[email protected]>
> > > Signed-off-by: Chao Yu <[email protected]>
> > > ---
> > > fs/f2fs/checkpoint.c | 10 +++++++---
> > > 1 file changed, 7 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > index 6c208108d69c..096c85022f62 100644
> > > --- a/fs/f2fs/checkpoint.c
> > > +++ b/fs/f2fs/checkpoint.c
> > > @@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> > >
> > > /* write cached NAT/SIT entries to NAT/SIT area */
> > > err = f2fs_flush_nat_entries(sbi, cpc);
> > > - if (err)
> > > + if (err) {
> > > + f2fs_stop_checkpoint(sbi, false);
> >
> > I think we should abuse this, since we can get any known ENOMEM as well.
>
> Yup, but one critical issue here is it can break A/B update of NAT area,
> so, in order to fix this hole, how about using NOFAIL memory allocation
> in f2fs_flush_nat_entries() first until we figure out the finial scheme?

NOFAIL is risky, so how about adding a retry logic on ENOMEM with a message
and then giving up if we can't get the memory? BTW, what about EIO or other
family?

>
> Thanks,
>
> >
> > > goto stop;
> > > + }
> > >
> > > f2fs_flush_sit_entries(sbi, cpc);
> > >
> > > @@ -1648,10 +1650,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> > > f2fs_save_inmem_curseg(sbi);
> > >
> > > err = do_checkpoint(sbi, cpc);
> > > - if (err)
> > > + if (err) {
> > > + f2fs_stop_checkpoint(sbi, false);
> > > f2fs_release_discard_addrs(sbi);
> > > - else
> > > + } else {
> > > f2fs_clear_prefree_segments(sbi, cpc);
> > > + }
> > >
> > > f2fs_restore_inmem_curseg(sbi);
> > > stop:
> > > --
> > > 2.22.1

2021-08-01 10:01:08

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 2021/7/31 6:18, Jaegeuk Kim wrote:
> On 07/20, Chao Yu wrote:
>> On 2021/7/20 2:25, Jaegeuk Kim wrote:
>>> On 07/19, Chao Yu wrote:
>>>> On 2021/4/27 20:37, Chao Yu wrote:
>>>>> I think just reverting dirty/free bitmap is not enough if checkpoint fails,
>>>>> due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
>>>>> to overwrite last valid meta/node/data, then filesystem will be corrupted.
>>>>>
>>>>> So I suggest to set cp_error if do_checkpoint() fails until we can handle
>>>>> all cases, which is not so easy.
>>>>>
>>>>> How do you think?
>>>>
>>>> Let's add below patch first before you figure out the patch which covers all
>>>> things.
>>>>
>>>> From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
>>>> From: Chao Yu <[email protected]>
>>>> Date: Mon, 19 Jul 2021 16:37:44 +0800
>>>> Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed
>>>>
>>>> During f2fs_write_checkpoint(), once we failed in
>>>> f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
>>>> such as prefree bitmap, nat/sit version bitmap won't be recovered,
>>>> it may cause f2fs image to be inconsistent, let's just set CP error
>>>> flag to avoid further updates until we figure out a scheme to rollback
>>>> all metadatas in such condition.
>>>>
>>>> Reported-by: Yangtao Li <[email protected]>
>>>> Signed-off-by: Yangtao Li <[email protected]>
>>>> Signed-off-by: Chao Yu <[email protected]>
>>>> ---
>>>> fs/f2fs/checkpoint.c | 10 +++++++---
>>>> 1 file changed, 7 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>> index 6c208108d69c..096c85022f62 100644
>>>> --- a/fs/f2fs/checkpoint.c
>>>> +++ b/fs/f2fs/checkpoint.c
>>>> @@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>
>>>> /* write cached NAT/SIT entries to NAT/SIT area */
>>>> err = f2fs_flush_nat_entries(sbi, cpc);
>>>> - if (err)
>>>> + if (err) {
>>>> + f2fs_stop_checkpoint(sbi, false);
>>>
>>> I think we should abuse this, since we can get any known ENOMEM as well.
>>
>> Yup, but one critical issue here is it can break A/B update of NAT area,
>> so, in order to fix this hole, how about using NOFAIL memory allocation
>> in f2fs_flush_nat_entries() first until we figure out the finial scheme?
>
> NOFAIL is risky, so how about adding a retry logic on ENOMEM with a message
> and then giving up if we can't get the memory? BTW, what about EIO or other
> family?

How about this?

From ffb50d9a8220be7d9e159b8555533adcf11957a8 Mon Sep 17 00:00:00 2001
From: Chao Yu <[email protected]>
Date: Mon, 19 Jul 2021 16:37:44 +0800
Subject: [PATCH v2] f2fs: fix to stop filesystem update once CP failed

During f2fs_write_checkpoint(), once we failed in
f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
such as prefree bitmap, nat/sit version bitmap won't be recovered,
it may cause f2fs image to be inconsistent, let's just set CP error
flag to avoid further updates until we figure out a scheme to rollback
all metadatas in such condition.

Reported-by: Yangtao Li <[email protected]>
Signed-off-by: Yangtao Li <[email protected]>
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/checkpoint.c | 12 +++++++++---
fs/f2fs/node.c | 9 ++++++++-
2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6c208108d69c..f3f66871ae42 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1639,8 +1639,11 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)

/* write cached NAT/SIT entries to NAT/SIT area */
err = f2fs_flush_nat_entries(sbi, cpc);
- if (err)
+ if (err) {
+ f2fs_err(sbi, "f2fs_flush_nat_entries failed err:%d, stop checkpoint", err);
+ f2fs_stop_checkpoint(sbi, false);
goto stop;
+ }

f2fs_flush_sit_entries(sbi, cpc);

@@ -1648,10 +1651,13 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_save_inmem_curseg(sbi);

err = do_checkpoint(sbi, cpc);
- if (err)
+ if (err) {
+ f2fs_err(sbi, "do_checkpoint failed err:%d, stop checkpoint", err);
+ f2fs_stop_checkpoint(sbi, false);
f2fs_release_discard_addrs(sbi);
- else
+ } else {
f2fs_clear_prefree_segments(sbi, cpc);
+ }

f2fs_restore_inmem_curseg(sbi);
stop:
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 5840b82ce311..7162836d71c1 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -141,13 +141,20 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
void *src_addr;
void *dst_addr;
struct f2fs_nm_info *nm_i = NM_I(sbi);
+ bool retried = false;

dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid));

+retry:
/* get current nat block page with lock */
src_page = get_current_nat_page(sbi, nid);
- if (IS_ERR(src_page))
+ if (IS_ERR(src_page)) {
+ if (PTR_ERR(src_page) == -ENOMEM && !retried) {
+ retried = true;
+ goto retry;
+ }
return src_page;
+ }
dst_page = f2fs_grab_meta_page(sbi, dst_off);
f2fs_bug_on(sbi, PageDirty(src_page));

--
2.22.1



2021-08-02 18:03:26

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 08/01, Chao Yu wrote:
> On 2021/7/31 6:18, Jaegeuk Kim wrote:
> > On 07/20, Chao Yu wrote:
> > > On 2021/7/20 2:25, Jaegeuk Kim wrote:
> > > > On 07/19, Chao Yu wrote:
> > > > > On 2021/4/27 20:37, Chao Yu wrote:
> > > > > > I think just reverting dirty/free bitmap is not enough if checkpoint fails,
> > > > > > due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
> > > > > > to overwrite last valid meta/node/data, then filesystem will be corrupted.
> > > > > >
> > > > > > So I suggest to set cp_error if do_checkpoint() fails until we can handle
> > > > > > all cases, which is not so easy.
> > > > > >
> > > > > > How do you think?
> > > > >
> > > > > Let's add below patch first before you figure out the patch which covers all
> > > > > things.
> > > > >
> > > > > From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
> > > > > From: Chao Yu <[email protected]>
> > > > > Date: Mon, 19 Jul 2021 16:37:44 +0800
> > > > > Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed
> > > > >
> > > > > During f2fs_write_checkpoint(), once we failed in
> > > > > f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
> > > > > such as prefree bitmap, nat/sit version bitmap won't be recovered,
> > > > > it may cause f2fs image to be inconsistent, let's just set CP error
> > > > > flag to avoid further updates until we figure out a scheme to rollback
> > > > > all metadatas in such condition.
> > > > >
> > > > > Reported-by: Yangtao Li <[email protected]>
> > > > > Signed-off-by: Yangtao Li <[email protected]>
> > > > > Signed-off-by: Chao Yu <[email protected]>
> > > > > ---
> > > > > fs/f2fs/checkpoint.c | 10 +++++++---
> > > > > 1 file changed, 7 insertions(+), 3 deletions(-)
> > > > >
> > > > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > > > index 6c208108d69c..096c85022f62 100644
> > > > > --- a/fs/f2fs/checkpoint.c
> > > > > +++ b/fs/f2fs/checkpoint.c
> > > > > @@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> > > > >
> > > > > /* write cached NAT/SIT entries to NAT/SIT area */
> > > > > err = f2fs_flush_nat_entries(sbi, cpc);
> > > > > - if (err)
> > > > > + if (err) {
> > > > > + f2fs_stop_checkpoint(sbi, false);
> > > >
> > > > I think we should abuse this, since we can get any known ENOMEM as well.
> > >
> > > Yup, but one critical issue here is it can break A/B update of NAT area,
> > > so, in order to fix this hole, how about using NOFAIL memory allocation
> > > in f2fs_flush_nat_entries() first until we figure out the finial scheme?
> >
> > NOFAIL is risky, so how about adding a retry logic on ENOMEM with a message
> > and then giving up if we can't get the memory? BTW, what about EIO or other
> > family?
>
> How about this?

Hmm, it seems we won't get ENOMEM.

__flush_nat_entry_set
-> get_next_nat_page
-> ...
-> __get_meta_page
-> repeat on ENOMEM, but stop_checkpoint on EIO

If we have an error here, we should have stopped checkpoint. Have you seen other
issue?

>
> From ffb50d9a8220be7d9e159b8555533adcf11957a8 Mon Sep 17 00:00:00 2001
> From: Chao Yu <[email protected]>
> Date: Mon, 19 Jul 2021 16:37:44 +0800
> Subject: [PATCH v2] f2fs: fix to stop filesystem update once CP failed
>
> During f2fs_write_checkpoint(), once we failed in
> f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
> such as prefree bitmap, nat/sit version bitmap won't be recovered,
> it may cause f2fs image to be inconsistent, let's just set CP error
> flag to avoid further updates until we figure out a scheme to rollback
> all metadatas in such condition.
>
> Reported-by: Yangtao Li <[email protected]>
> Signed-off-by: Yangtao Li <[email protected]>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/checkpoint.c | 12 +++++++++---
> fs/f2fs/node.c | 9 ++++++++-
> 2 files changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 6c208108d69c..f3f66871ae42 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1639,8 +1639,11 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>
> /* write cached NAT/SIT entries to NAT/SIT area */
> err = f2fs_flush_nat_entries(sbi, cpc);
> - if (err)
> + if (err) {
> + f2fs_err(sbi, "f2fs_flush_nat_entries failed err:%d, stop checkpoint", err);
> + f2fs_stop_checkpoint(sbi, false);
> goto stop;
> + }
>
> f2fs_flush_sit_entries(sbi, cpc);
>
> @@ -1648,10 +1651,13 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> f2fs_save_inmem_curseg(sbi);
>
> err = do_checkpoint(sbi, cpc);
> - if (err)
> + if (err) {
> + f2fs_err(sbi, "do_checkpoint failed err:%d, stop checkpoint", err);
> + f2fs_stop_checkpoint(sbi, false);
> f2fs_release_discard_addrs(sbi);
> - else
> + } else {
> f2fs_clear_prefree_segments(sbi, cpc);
> + }
>
> f2fs_restore_inmem_curseg(sbi);
> stop:
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 5840b82ce311..7162836d71c1 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -141,13 +141,20 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
> void *src_addr;
> void *dst_addr;
> struct f2fs_nm_info *nm_i = NM_I(sbi);
> + bool retried = false;
>
> dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid));
>
> +retry:
> /* get current nat block page with lock */
> src_page = get_current_nat_page(sbi, nid);
> - if (IS_ERR(src_page))
> + if (IS_ERR(src_page)) {
> + if (PTR_ERR(src_page) == -ENOMEM && !retried) {
> + retried = true;
> + goto retry;
> + }
> return src_page;
> + }
> dst_page = f2fs_grab_meta_page(sbi, dst_off);
> f2fs_bug_on(sbi, PageDirty(src_page));
>
> --
> 2.22.1
>

2021-08-03 01:02:40

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 2021/8/3 1:59, Jaegeuk Kim wrote:
> On 08/01, Chao Yu wrote:
>> On 2021/7/31 6:18, Jaegeuk Kim wrote:
>>> On 07/20, Chao Yu wrote:
>>>> On 2021/7/20 2:25, Jaegeuk Kim wrote:
>>>>> On 07/19, Chao Yu wrote:
>>>>>> On 2021/4/27 20:37, Chao Yu wrote:
>>>>>>> I think just reverting dirty/free bitmap is not enough if checkpoint fails,
>>>>>>> due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
>>>>>>> to overwrite last valid meta/node/data, then filesystem will be corrupted.
>>>>>>>
>>>>>>> So I suggest to set cp_error if do_checkpoint() fails until we can handle
>>>>>>> all cases, which is not so easy.
>>>>>>>
>>>>>>> How do you think?
>>>>>>
>>>>>> Let's add below patch first before you figure out the patch which covers all
>>>>>> things.
>>>>>>
>>>>>> From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
>>>>>> From: Chao Yu <[email protected]>
>>>>>> Date: Mon, 19 Jul 2021 16:37:44 +0800
>>>>>> Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed
>>>>>>
>>>>>> During f2fs_write_checkpoint(), once we failed in
>>>>>> f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
>>>>>> such as prefree bitmap, nat/sit version bitmap won't be recovered,
>>>>>> it may cause f2fs image to be inconsistent, let's just set CP error
>>>>>> flag to avoid further updates until we figure out a scheme to rollback
>>>>>> all metadatas in such condition.
>>>>>>
>>>>>> Reported-by: Yangtao Li <[email protected]>
>>>>>> Signed-off-by: Yangtao Li <[email protected]>
>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>> ---
>>>>>> fs/f2fs/checkpoint.c | 10 +++++++---
>>>>>> 1 file changed, 7 insertions(+), 3 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>> index 6c208108d69c..096c85022f62 100644
>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>> @@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>
>>>>>> /* write cached NAT/SIT entries to NAT/SIT area */
>>>>>> err = f2fs_flush_nat_entries(sbi, cpc);
>>>>>> - if (err)
>>>>>> + if (err) {
>>>>>> + f2fs_stop_checkpoint(sbi, false);
>>>>>
>>>>> I think we should abuse this, since we can get any known ENOMEM as well.
>>>>
>>>> Yup, but one critical issue here is it can break A/B update of NAT area,
>>>> so, in order to fix this hole, how about using NOFAIL memory allocation
>>>> in f2fs_flush_nat_entries() first until we figure out the finial scheme?
>>>
>>> NOFAIL is risky, so how about adding a retry logic on ENOMEM with a message
>>> and then giving up if we can't get the memory? BTW, what about EIO or other
>>> family?
>>
>> How about this?
>
> Hmm, it seems we won't get ENOMEM.
>
> __flush_nat_entry_set
> -> get_next_nat_page
> -> ...
> -> __get_meta_page
> -> repeat on ENOMEM, but stop_checkpoint on EIO

Correct, I missed to check __get_meta_page() and f2fs_get_meta_page_retry().

>
> If we have an error here, we should have stopped checkpoint. Have you seen other
> issue?

Still we should fix the case from below path?

- f2fs_write_checkpoint
- do_checkpoint
- f2fs_flush_device_cache failed

Thanks,

2021-08-03 01:45:50

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 08/03, Chao Yu wrote:
> On 2021/8/3 1:59, Jaegeuk Kim wrote:
> > On 08/01, Chao Yu wrote:
> > > On 2021/7/31 6:18, Jaegeuk Kim wrote:
> > > > On 07/20, Chao Yu wrote:
> > > > > On 2021/7/20 2:25, Jaegeuk Kim wrote:
> > > > > > On 07/19, Chao Yu wrote:
> > > > > > > On 2021/4/27 20:37, Chao Yu wrote:
> > > > > > > > I think just reverting dirty/free bitmap is not enough if checkpoint fails,
> > > > > > > > due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
> > > > > > > > to overwrite last valid meta/node/data, then filesystem will be corrupted.
> > > > > > > >
> > > > > > > > So I suggest to set cp_error if do_checkpoint() fails until we can handle
> > > > > > > > all cases, which is not so easy.
> > > > > > > >
> > > > > > > > How do you think?
> > > > > > >
> > > > > > > Let's add below patch first before you figure out the patch which covers all
> > > > > > > things.
> > > > > > >
> > > > > > > From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
> > > > > > > From: Chao Yu <[email protected]>
> > > > > > > Date: Mon, 19 Jul 2021 16:37:44 +0800
> > > > > > > Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed
> > > > > > >
> > > > > > > During f2fs_write_checkpoint(), once we failed in
> > > > > > > f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
> > > > > > > such as prefree bitmap, nat/sit version bitmap won't be recovered,
> > > > > > > it may cause f2fs image to be inconsistent, let's just set CP error
> > > > > > > flag to avoid further updates until we figure out a scheme to rollback
> > > > > > > all metadatas in such condition.
> > > > > > >
> > > > > > > Reported-by: Yangtao Li <[email protected]>
> > > > > > > Signed-off-by: Yangtao Li <[email protected]>
> > > > > > > Signed-off-by: Chao Yu <[email protected]>
> > > > > > > ---
> > > > > > > fs/f2fs/checkpoint.c | 10 +++++++---
> > > > > > > 1 file changed, 7 insertions(+), 3 deletions(-)
> > > > > > >
> > > > > > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > > > > > index 6c208108d69c..096c85022f62 100644
> > > > > > > --- a/fs/f2fs/checkpoint.c
> > > > > > > +++ b/fs/f2fs/checkpoint.c
> > > > > > > @@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> > > > > > >
> > > > > > > /* write cached NAT/SIT entries to NAT/SIT area */
> > > > > > > err = f2fs_flush_nat_entries(sbi, cpc);
> > > > > > > - if (err)
> > > > > > > + if (err) {
> > > > > > > + f2fs_stop_checkpoint(sbi, false);
> > > > > >
> > > > > > I think we should abuse this, since we can get any known ENOMEM as well.
> > > > >
> > > > > Yup, but one critical issue here is it can break A/B update of NAT area,
> > > > > so, in order to fix this hole, how about using NOFAIL memory allocation
> > > > > in f2fs_flush_nat_entries() first until we figure out the finial scheme?
> > > >
> > > > NOFAIL is risky, so how about adding a retry logic on ENOMEM with a message
> > > > and then giving up if we can't get the memory? BTW, what about EIO or other
> > > > family?
> > >
> > > How about this?
> >
> > Hmm, it seems we won't get ENOMEM.
> >
> > __flush_nat_entry_set
> > -> get_next_nat_page
> > -> ...
> > -> __get_meta_page
> > -> repeat on ENOMEM, but stop_checkpoint on EIO
>
> Correct, I missed to check __get_meta_page() and f2fs_get_meta_page_retry().
>
> >
> > If we have an error here, we should have stopped checkpoint. Have you seen other
> > issue?
>
> Still we should fix the case from below path?
>
> - f2fs_write_checkpoint
> - do_checkpoint
> - f2fs_flush_device_cache failed

What about adding a retry logic to deal with EIO in __submit_flush_wait()?
We probably need to retry submitting FLUSH commands, and then give up
with f2fs_stop_checkpoint(). And, then how about adding f2fs_bug_on() if
f2fs_flush_nat_entries() returns error without f2fs_cp_error()?

>
> Thanks,

2021-08-03 02:59:50

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 2021/8/3 9:44, Jaegeuk Kim wrote:
> On 08/03, Chao Yu wrote:
>> On 2021/8/3 1:59, Jaegeuk Kim wrote:
>>> On 08/01, Chao Yu wrote:
>>>> On 2021/7/31 6:18, Jaegeuk Kim wrote:
>>>>> On 07/20, Chao Yu wrote:
>>>>>> On 2021/7/20 2:25, Jaegeuk Kim wrote:
>>>>>>> On 07/19, Chao Yu wrote:
>>>>>>>> On 2021/4/27 20:37, Chao Yu wrote:
>>>>>>>>> I think just reverting dirty/free bitmap is not enough if checkpoint fails,
>>>>>>>>> due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
>>>>>>>>> to overwrite last valid meta/node/data, then filesystem will be corrupted.
>>>>>>>>>
>>>>>>>>> So I suggest to set cp_error if do_checkpoint() fails until we can handle
>>>>>>>>> all cases, which is not so easy.
>>>>>>>>>
>>>>>>>>> How do you think?
>>>>>>>>
>>>>>>>> Let's add below patch first before you figure out the patch which covers all
>>>>>>>> things.
>>>>>>>>
>>>>>>>> From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
>>>>>>>> From: Chao Yu <[email protected]>
>>>>>>>> Date: Mon, 19 Jul 2021 16:37:44 +0800
>>>>>>>> Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed
>>>>>>>>
>>>>>>>> During f2fs_write_checkpoint(), once we failed in
>>>>>>>> f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
>>>>>>>> such as prefree bitmap, nat/sit version bitmap won't be recovered,
>>>>>>>> it may cause f2fs image to be inconsistent, let's just set CP error
>>>>>>>> flag to avoid further updates until we figure out a scheme to rollback
>>>>>>>> all metadatas in such condition.
>>>>>>>>
>>>>>>>> Reported-by: Yangtao Li <[email protected]>
>>>>>>>> Signed-off-by: Yangtao Li <[email protected]>
>>>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>>>> ---
>>>>>>>> fs/f2fs/checkpoint.c | 10 +++++++---
>>>>>>>> 1 file changed, 7 insertions(+), 3 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>>>> index 6c208108d69c..096c85022f62 100644
>>>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>>>> @@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>>>>>>>>
>>>>>>>> /* write cached NAT/SIT entries to NAT/SIT area */
>>>>>>>> err = f2fs_flush_nat_entries(sbi, cpc);
>>>>>>>> - if (err)
>>>>>>>> + if (err) {
>>>>>>>> + f2fs_stop_checkpoint(sbi, false);
>>>>>>>
>>>>>>> I think we should abuse this, since we can get any known ENOMEM as well.
>>>>>>
>>>>>> Yup, but one critical issue here is it can break A/B update of NAT area,
>>>>>> so, in order to fix this hole, how about using NOFAIL memory allocation
>>>>>> in f2fs_flush_nat_entries() first until we figure out the finial scheme?
>>>>>
>>>>> NOFAIL is risky, so how about adding a retry logic on ENOMEM with a message
>>>>> and then giving up if we can't get the memory? BTW, what about EIO or other
>>>>> family?
>>>>
>>>> How about this?
>>>
>>> Hmm, it seems we won't get ENOMEM.
>>>
>>> __flush_nat_entry_set
>>> -> get_next_nat_page
>>> -> ...
>>> -> __get_meta_page
>>> -> repeat on ENOMEM, but stop_checkpoint on EIO
>>
>> Correct, I missed to check __get_meta_page() and f2fs_get_meta_page_retry().
>>
>>>
>>> If we have an error here, we should have stopped checkpoint. Have you seen other
>>> issue?
>>
>> Still we should fix the case from below path?
>>
>> - f2fs_write_checkpoint
>> - do_checkpoint
>> - f2fs_flush_device_cache failed
>
> What about adding a retry logic to deal with EIO in __submit_flush_wait()?
> We probably need to retry submitting FLUSH commands, and then give up
> with f2fs_stop_checkpoint(). And, then how about adding f2fs_bug_on() if
> f2fs_flush_nat_entries() returns error without f2fs_cp_error()?

Agreed, how about this?

From 357da99968f6a31375ea47423b691400c5f66547 Mon Sep 17 00:00:00 2001
From: Chao Yu <[email protected]>
Date: Mon, 19 Jul 2021 16:37:44 +0800
Subject: [PATCH v3] f2fs: fix to stop filesystem update once CP failed

During f2fs_write_checkpoint(), once we failed in
f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
such as prefree bitmap, nat/sit version bitmap won't be recovered,
it may cause f2fs image to be inconsistent, let's just set CP error
flag to avoid further updates until we figure out a scheme to rollback
all metadatas in such condition.

Reported-by: Yangtao Li <[email protected]>
Signed-off-by: Yangtao Li <[email protected]>
Signed-off-by: Chao Yu <[email protected]>
---
v3:
- add f2fs_bug_on in error path of f2fs_flush_nat_entries()
- add retry logic in f2fs_flush_device_cache()
fs/f2fs/checkpoint.c | 12 +++++++++---
fs/f2fs/f2fs.h | 1 +
fs/f2fs/segment.c | 8 +++++++-
3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6c208108d69c..1d5a2a867282 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1639,8 +1639,11 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)

/* write cached NAT/SIT entries to NAT/SIT area */
err = f2fs_flush_nat_entries(sbi, cpc);
- if (err)
+ if (err) {
+ f2fs_err(sbi, "f2fs_flush_nat_entries failed err:%d, stop checkpoint", err);
+ f2fs_bug_on(sbi, !f2fs_cp_error(sbi));
goto stop;
+ }

f2fs_flush_sit_entries(sbi, cpc);

@@ -1648,10 +1651,13 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_save_inmem_curseg(sbi);

err = do_checkpoint(sbi, cpc);
- if (err)
+ if (err) {
+ f2fs_err(sbi, "do_checkpoint failed err:%d, stop checkpoint", err);
+ f2fs_stop_checkpoint(sbi, false);
f2fs_release_discard_addrs(sbi);
- else
+ } else {
f2fs_clear_prefree_segments(sbi, cpc);
+ }

f2fs_restore_inmem_curseg(sbi);
stop:
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1feef4cb78b6..97eae0e066ee 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -548,6 +548,7 @@ enum {
};

#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
+#define DEFAULT_RETRY_FLUSH_COUNT 3 /* maximum retry flush count */

/* congestion wait timeout value, default: 20ms */
#define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20))
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 80f26158e304..37c7a05659fe 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -776,9 +776,15 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
return 0;

for (i = 1; i < sbi->s_ndevs; i++) {
+ int count = DEFAULT_RETRY_FLUSH_COUNT;
+
if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
continue;
- ret = __submit_flush_wait(sbi, FDEV(i).bdev);
+
+ do {
+ ret = __submit_flush_wait(sbi, FDEV(i).bdev);
+ } while (ret && --count);
+
if (ret)
break;

--
2.22.1



2021-08-03 18:22:48

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 08/03, Chao Yu wrote:
> On 2021/8/3 9:44, Jaegeuk Kim wrote:
> > On 08/03, Chao Yu wrote:
> > > On 2021/8/3 1:59, Jaegeuk Kim wrote:
> > > > On 08/01, Chao Yu wrote:
> > > > > On 2021/7/31 6:18, Jaegeuk Kim wrote:
> > > > > > On 07/20, Chao Yu wrote:
> > > > > > > On 2021/7/20 2:25, Jaegeuk Kim wrote:
> > > > > > > > On 07/19, Chao Yu wrote:
> > > > > > > > > On 2021/4/27 20:37, Chao Yu wrote:
> > > > > > > > > > I think just reverting dirty/free bitmap is not enough if checkpoint fails,
> > > > > > > > > > due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
> > > > > > > > > > to overwrite last valid meta/node/data, then filesystem will be corrupted.
> > > > > > > > > >
> > > > > > > > > > So I suggest to set cp_error if do_checkpoint() fails until we can handle
> > > > > > > > > > all cases, which is not so easy.
> > > > > > > > > >
> > > > > > > > > > How do you think?
> > > > > > > > >
> > > > > > > > > Let's add below patch first before you figure out the patch which covers all
> > > > > > > > > things.
> > > > > > > > >
> > > > > > > > > From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
> > > > > > > > > From: Chao Yu <[email protected]>
> > > > > > > > > Date: Mon, 19 Jul 2021 16:37:44 +0800
> > > > > > > > > Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed
> > > > > > > > >
> > > > > > > > > During f2fs_write_checkpoint(), once we failed in
> > > > > > > > > f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
> > > > > > > > > such as prefree bitmap, nat/sit version bitmap won't be recovered,
> > > > > > > > > it may cause f2fs image to be inconsistent, let's just set CP error
> > > > > > > > > flag to avoid further updates until we figure out a scheme to rollback
> > > > > > > > > all metadatas in such condition.
> > > > > > > > >
> > > > > > > > > Reported-by: Yangtao Li <[email protected]>
> > > > > > > > > Signed-off-by: Yangtao Li <[email protected]>
> > > > > > > > > Signed-off-by: Chao Yu <[email protected]>
> > > > > > > > > ---
> > > > > > > > > fs/f2fs/checkpoint.c | 10 +++++++---
> > > > > > > > > 1 file changed, 7 insertions(+), 3 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > > > > > > > index 6c208108d69c..096c85022f62 100644
> > > > > > > > > --- a/fs/f2fs/checkpoint.c
> > > > > > > > > +++ b/fs/f2fs/checkpoint.c
> > > > > > > > > @@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> > > > > > > > >
> > > > > > > > > /* write cached NAT/SIT entries to NAT/SIT area */
> > > > > > > > > err = f2fs_flush_nat_entries(sbi, cpc);
> > > > > > > > > - if (err)
> > > > > > > > > + if (err) {
> > > > > > > > > + f2fs_stop_checkpoint(sbi, false);
> > > > > > > >
> > > > > > > > I think we should abuse this, since we can get any known ENOMEM as well.
> > > > > > >
> > > > > > > Yup, but one critical issue here is it can break A/B update of NAT area,
> > > > > > > so, in order to fix this hole, how about using NOFAIL memory allocation
> > > > > > > in f2fs_flush_nat_entries() first until we figure out the finial scheme?
> > > > > >
> > > > > > NOFAIL is risky, so how about adding a retry logic on ENOMEM with a message
> > > > > > and then giving up if we can't get the memory? BTW, what about EIO or other
> > > > > > family?
> > > > >
> > > > > How about this?
> > > >
> > > > Hmm, it seems we won't get ENOMEM.
> > > >
> > > > __flush_nat_entry_set
> > > > -> get_next_nat_page
> > > > -> ...
> > > > -> __get_meta_page
> > > > -> repeat on ENOMEM, but stop_checkpoint on EIO
> > >
> > > Correct, I missed to check __get_meta_page() and f2fs_get_meta_page_retry().
> > >
> > > >
> > > > If we have an error here, we should have stopped checkpoint. Have you seen other
> > > > issue?
> > >
> > > Still we should fix the case from below path?
> > >
> > > - f2fs_write_checkpoint
> > > - do_checkpoint
> > > - f2fs_flush_device_cache failed
> >
> > What about adding a retry logic to deal with EIO in __submit_flush_wait()?
> > We probably need to retry submitting FLUSH commands, and then give up
> > with f2fs_stop_checkpoint(). And, then how about adding f2fs_bug_on() if
> > f2fs_flush_nat_entries() returns error without f2fs_cp_error()?
>
> Agreed, how about this?
>
> From 357da99968f6a31375ea47423b691400c5f66547 Mon Sep 17 00:00:00 2001
> From: Chao Yu <[email protected]>
> Date: Mon, 19 Jul 2021 16:37:44 +0800
> Subject: [PATCH v3] f2fs: fix to stop filesystem update once CP failed
>
> During f2fs_write_checkpoint(), once we failed in
> f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
> such as prefree bitmap, nat/sit version bitmap won't be recovered,
> it may cause f2fs image to be inconsistent, let's just set CP error
> flag to avoid further updates until we figure out a scheme to rollback
> all metadatas in such condition.
>
> Reported-by: Yangtao Li <[email protected]>
> Signed-off-by: Yangtao Li <[email protected]>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> v3:
> - add f2fs_bug_on in error path of f2fs_flush_nat_entries()
> - add retry logic in f2fs_flush_device_cache()
> fs/f2fs/checkpoint.c | 12 +++++++++---
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/segment.c | 8 +++++++-
> 3 files changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 6c208108d69c..1d5a2a867282 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1639,8 +1639,11 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>
> /* write cached NAT/SIT entries to NAT/SIT area */
> err = f2fs_flush_nat_entries(sbi, cpc);
> - if (err)
> + if (err) {
> + f2fs_err(sbi, "f2fs_flush_nat_entries failed err:%d, stop checkpoint", err);
> + f2fs_bug_on(sbi, !f2fs_cp_error(sbi));
> goto stop;
> + }
>
> f2fs_flush_sit_entries(sbi, cpc);
>
> @@ -1648,10 +1651,13 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> f2fs_save_inmem_curseg(sbi);
>
> err = do_checkpoint(sbi, cpc);
> - if (err)
> + if (err) {
> + f2fs_err(sbi, "do_checkpoint failed err:%d, stop checkpoint", err);
> + f2fs_stop_checkpoint(sbi, false);
> f2fs_release_discard_addrs(sbi);
> - else
> + } else {
> f2fs_clear_prefree_segments(sbi, cpc);
> + }
>
> f2fs_restore_inmem_curseg(sbi);
> stop:
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 1feef4cb78b6..97eae0e066ee 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -548,6 +548,7 @@ enum {
> };
>
> #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
> +#define DEFAULT_RETRY_FLUSH_COUNT 3 /* maximum retry flush count */
>
> /* congestion wait timeout value, default: 20ms */
> #define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20))
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 80f26158e304..37c7a05659fe 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -776,9 +776,15 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
> return 0;
>
> for (i = 1; i < sbi->s_ndevs; i++) {
> + int count = DEFAULT_RETRY_FLUSH_COUNT;
> +
> if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
> continue;
> - ret = __submit_flush_wait(sbi, FDEV(i).bdev);
> +
> + do {
> + ret = __submit_flush_wait(sbi, FDEV(i).bdev);
> + } while (ret && --count);
> +
> if (ret)

Actually, we need to stop checkpoint here?

> break;
>
> --
> 2.22.1
>

2021-08-03 18:57:19

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: reset free segment to prefree status when do_checkpoint() fail

On 08/03, Chao Yu wrote:
> On 2021/8/3 9:44, Jaegeuk Kim wrote:
> > On 08/03, Chao Yu wrote:
> > > On 2021/8/3 1:59, Jaegeuk Kim wrote:
> > > > On 08/01, Chao Yu wrote:
> > > > > On 2021/7/31 6:18, Jaegeuk Kim wrote:
> > > > > > On 07/20, Chao Yu wrote:
> > > > > > > On 2021/7/20 2:25, Jaegeuk Kim wrote:
> > > > > > > > On 07/19, Chao Yu wrote:
> > > > > > > > > On 2021/4/27 20:37, Chao Yu wrote:
> > > > > > > > > > I think just reverting dirty/free bitmap is not enough if checkpoint fails,
> > > > > > > > > > due to we have updated sbi->cur_cp_pack and nat/sit bitmap, next CP tries
> > > > > > > > > > to overwrite last valid meta/node/data, then filesystem will be corrupted.
> > > > > > > > > >
> > > > > > > > > > So I suggest to set cp_error if do_checkpoint() fails until we can handle
> > > > > > > > > > all cases, which is not so easy.
> > > > > > > > > >
> > > > > > > > > > How do you think?
> > > > > > > > >
> > > > > > > > > Let's add below patch first before you figure out the patch which covers all
> > > > > > > > > things.
> > > > > > > > >
> > > > > > > > > From 3af957c98e9e04259f8bb93ca0b74ba164f3f27e Mon Sep 17 00:00:00 2001
> > > > > > > > > From: Chao Yu <[email protected]>
> > > > > > > > > Date: Mon, 19 Jul 2021 16:37:44 +0800
> > > > > > > > > Subject: [PATCH] f2fs: fix to stop filesystem update once CP failed
> > > > > > > > >
> > > > > > > > > During f2fs_write_checkpoint(), once we failed in
> > > > > > > > > f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
> > > > > > > > > such as prefree bitmap, nat/sit version bitmap won't be recovered,
> > > > > > > > > it may cause f2fs image to be inconsistent, let's just set CP error
> > > > > > > > > flag to avoid further updates until we figure out a scheme to rollback
> > > > > > > > > all metadatas in such condition.
> > > > > > > > >
> > > > > > > > > Reported-by: Yangtao Li <[email protected]>
> > > > > > > > > Signed-off-by: Yangtao Li <[email protected]>
> > > > > > > > > Signed-off-by: Chao Yu <[email protected]>
> > > > > > > > > ---
> > > > > > > > > fs/f2fs/checkpoint.c | 10 +++++++---
> > > > > > > > > 1 file changed, 7 insertions(+), 3 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > > > > > > > index 6c208108d69c..096c85022f62 100644
> > > > > > > > > --- a/fs/f2fs/checkpoint.c
> > > > > > > > > +++ b/fs/f2fs/checkpoint.c
> > > > > > > > > @@ -1639,8 +1639,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> > > > > > > > >
> > > > > > > > > /* write cached NAT/SIT entries to NAT/SIT area */
> > > > > > > > > err = f2fs_flush_nat_entries(sbi, cpc);
> > > > > > > > > - if (err)
> > > > > > > > > + if (err) {
> > > > > > > > > + f2fs_stop_checkpoint(sbi, false);
> > > > > > > >
> > > > > > > > I think we should abuse this, since we can get any known ENOMEM as well.
> > > > > > >
> > > > > > > Yup, but one critical issue here is it can break A/B update of NAT area,
> > > > > > > so, in order to fix this hole, how about using NOFAIL memory allocation
> > > > > > > in f2fs_flush_nat_entries() first until we figure out the finial scheme?
> > > > > >
> > > > > > NOFAIL is risky, so how about adding a retry logic on ENOMEM with a message
> > > > > > and then giving up if we can't get the memory? BTW, what about EIO or other
> > > > > > family?
> > > > >
> > > > > How about this?
> > > >
> > > > Hmm, it seems we won't get ENOMEM.
> > > >
> > > > __flush_nat_entry_set
> > > > -> get_next_nat_page
> > > > -> ...
> > > > -> __get_meta_page
> > > > -> repeat on ENOMEM, but stop_checkpoint on EIO
> > >
> > > Correct, I missed to check __get_meta_page() and f2fs_get_meta_page_retry().
> > >
> > > >
> > > > If we have an error here, we should have stopped checkpoint. Have you seen other
> > > > issue?
> > >
> > > Still we should fix the case from below path?
> > >
> > > - f2fs_write_checkpoint
> > > - do_checkpoint
> > > - f2fs_flush_device_cache failed
> >
> > What about adding a retry logic to deal with EIO in __submit_flush_wait()?
> > We probably need to retry submitting FLUSH commands, and then give up
> > with f2fs_stop_checkpoint(). And, then how about adding f2fs_bug_on() if
> > f2fs_flush_nat_entries() returns error without f2fs_cp_error()?
>
> Agreed, how about this?
>
> From 357da99968f6a31375ea47423b691400c5f66547 Mon Sep 17 00:00:00 2001
> From: Chao Yu <[email protected]>
> Date: Mon, 19 Jul 2021 16:37:44 +0800
> Subject: [PATCH v3] f2fs: fix to stop filesystem update once CP failed
>
> During f2fs_write_checkpoint(), once we failed in
> f2fs_flush_nat_entries() or do_checkpoint(), metadata of filesystem
> such as prefree bitmap, nat/sit version bitmap won't be recovered,
> it may cause f2fs image to be inconsistent, let's just set CP error
> flag to avoid further updates until we figure out a scheme to rollback
> all metadatas in such condition.
>
> Reported-by: Yangtao Li <[email protected]>
> Signed-off-by: Yangtao Li <[email protected]>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> v3:
> - add f2fs_bug_on in error path of f2fs_flush_nat_entries()
> - add retry logic in f2fs_flush_device_cache()
> fs/f2fs/checkpoint.c | 12 +++++++++---
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/segment.c | 8 +++++++-
> 3 files changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 6c208108d69c..1d5a2a867282 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1639,8 +1639,11 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>
> /* write cached NAT/SIT entries to NAT/SIT area */
> err = f2fs_flush_nat_entries(sbi, cpc);
> - if (err)
> + if (err) {
> + f2fs_err(sbi, "f2fs_flush_nat_entries failed err:%d, stop checkpoint", err);
> + f2fs_bug_on(sbi, !f2fs_cp_error(sbi));
> goto stop;
> + }
>
> f2fs_flush_sit_entries(sbi, cpc);
>
> @@ -1648,10 +1651,13 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> f2fs_save_inmem_curseg(sbi);
>
> err = do_checkpoint(sbi, cpc);
> - if (err)
> + if (err) {
> + f2fs_err(sbi, "do_checkpoint failed err:%d, stop checkpoint", err);

f2fs_bug_on(sbi, !f2fs_cp_error(sbi)); ?

> + f2fs_stop_checkpoint(sbi, false);
> f2fs_release_discard_addrs(sbi);
> - else
> + } else {
> f2fs_clear_prefree_segments(sbi, cpc);
> + }
>
> f2fs_restore_inmem_curseg(sbi);
> stop:
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 1feef4cb78b6..97eae0e066ee 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -548,6 +548,7 @@ enum {
> };
>
> #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
> +#define DEFAULT_RETRY_FLUSH_COUNT 3 /* maximum retry flush count */
>
> /* congestion wait timeout value, default: 20ms */
> #define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20))
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 80f26158e304..37c7a05659fe 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -776,9 +776,15 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
> return 0;
>
> for (i = 1; i < sbi->s_ndevs; i++) {
> + int count = DEFAULT_RETRY_FLUSH_COUNT;

Just use DEFAULT_RETRY_IO_COUNT?

> +
> if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
> continue;
> - ret = __submit_flush_wait(sbi, FDEV(i).bdev);
> +
> + do {
> + ret = __submit_flush_wait(sbi, FDEV(i).bdev);

congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);

> + } while (ret && --count);
> +
> if (ret)
> break;
>
> --
> 2.22.1
>