2024-03-07 15:12:47

by Chao Yu

[permalink] [raw]
Subject: [PATCH] f2fs: fix to truncate meta inode pages forcely

Below race case can cause data corruption:

Thread A GC thread
- f2fs_inplace_write_data
- gc_data_segment
- ra_data_block
- locked meta_inode page
- invalidate_mapping_pages
: fail to invalidate meta_inode page
due to lock failure or dirty|writeback
status
- f2fs_submit_page_bio
: write last dirty data to old blkaddr
- move_data_block
- load old data from meta_inode page
- f2fs_submit_page_write
: write old data to new blkaddr

Because invalidate_mapping_pages() will skip invalidating page when the
page has unclear status including locked, dirty, writeback and so on, so
we need to use truncate_inode_pages_range() instead of
invalidate_mapping_pages() to make sure meta_inode page will be dropped.

Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC")
Fixes: e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/checkpoint.c | 5 +++--
fs/f2fs/f2fs.h | 28 +++++++++++++++++++++++++++-
fs/f2fs/segment.c | 5 ++---
include/linux/f2fs_fs.h | 1 +
4 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index a09a9609e228..55b7d2cf030f 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1598,8 +1598,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
*/
if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
f2fs_sb_has_compression(sbi))
- invalidate_mapping_pages(META_MAPPING(sbi),
- MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
+ f2fs_bug_on(sbi,
+ invalidate_inode_pages2_range(META_MAPPING(sbi),
+ MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1));

f2fs_release_ino_entry(sbi, false);

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4836e7cb0efe..9814e5981a6a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4655,10 +4655,36 @@ static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi)
return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb);
}

+static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi,
+ block_t blkaddr, unsigned int cnt)
+{
+ bool need_submit = false;
+ int i = 0;
+
+ do {
+ struct page *page;
+
+ page = find_get_page(META_MAPPING(sbi), blkaddr + i);
+ if (page) {
+ if (PageWriteback(page))
+ need_submit = true;
+ f2fs_put_page(page, 0);
+ }
+ } while (++i < cnt && !need_submit);
+
+ if (need_submit)
+ f2fs_submit_merged_write_cond(sbi, sbi->meta_inode,
+ NULL, 0, DATA);
+
+ truncate_inode_pages_range(META_MAPPING(sbi),
+ F2FS_BLK_TO_BYTES((loff_t)blkaddr),
+ F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1)));
+}
+
static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi,
block_t blkaddr)
{
- invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr);
+ f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1);
f2fs_invalidate_compress_page(sbi, blkaddr);
}

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4ff3b2d14ddf..20af48d7f784 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3741,8 +3741,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
}

if (fio->post_read)
- invalidate_mapping_pages(META_MAPPING(sbi),
- fio->new_blkaddr, fio->new_blkaddr);
+ f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);

stat_inc_inplace_blocks(fio->sbi);

@@ -3932,7 +3931,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
for (i = 0; i < len; i++)
f2fs_wait_on_block_writeback(inode, blkaddr + i);

- invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
+ f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
}

static int read_compacted_summaries(struct f2fs_sb_info *sbi)
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 755e9a41b196..a357287eac1e 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -27,6 +27,7 @@

#define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS)
#define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS)
+#define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1)

/* 0, 1(node nid), 2(meta nid) are reserved node id */
#define F2FS_RESERVED_NODE_NUM 3
--
2.40.1



2024-03-07 18:38:00

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH] f2fs: fix to truncate meta inode pages forcely

On 03/07, Chao Yu wrote:
> Below race case can cause data corruption:
>
> Thread A GC thread
> - f2fs_inplace_write_data
> - gc_data_segment
> - ra_data_block
> - locked meta_inode page
> - invalidate_mapping_pages
> : fail to invalidate meta_inode page
> due to lock failure or dirty|writeback
> status

Wasn't the original data page locked in both cases?

> - f2fs_submit_page_bio
> : write last dirty data to old blkaddr
> - move_data_block
> - load old data from meta_inode page
> - f2fs_submit_page_write
> : write old data to new blkaddr
>
> Because invalidate_mapping_pages() will skip invalidating page when the
> page has unclear status including locked, dirty, writeback and so on, so
> we need to use truncate_inode_pages_range() instead of
> invalidate_mapping_pages() to make sure meta_inode page will be dropped.
>
> Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC")
> Fixes: e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write")
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/checkpoint.c | 5 +++--
> fs/f2fs/f2fs.h | 28 +++++++++++++++++++++++++++-
> fs/f2fs/segment.c | 5 ++---
> include/linux/f2fs_fs.h | 1 +
> 4 files changed, 33 insertions(+), 6 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index a09a9609e228..55b7d2cf030f 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1598,8 +1598,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
> */
> if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
> f2fs_sb_has_compression(sbi))
> - invalidate_mapping_pages(META_MAPPING(sbi),
> - MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
> + f2fs_bug_on(sbi,
> + invalidate_inode_pages2_range(META_MAPPING(sbi),
> + MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1));
>
> f2fs_release_ino_entry(sbi, false);
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 4836e7cb0efe..9814e5981a6a 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -4655,10 +4655,36 @@ static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi)
> return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb);
> }
>
> +static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi,
> + block_t blkaddr, unsigned int cnt)
> +{
> + bool need_submit = false;
> + int i = 0;
> +
> + do {
> + struct page *page;
> +
> + page = find_get_page(META_MAPPING(sbi), blkaddr + i);
> + if (page) {
> + if (PageWriteback(page))
> + need_submit = true;
> + f2fs_put_page(page, 0);
> + }
> + } while (++i < cnt && !need_submit);
> +
> + if (need_submit)
> + f2fs_submit_merged_write_cond(sbi, sbi->meta_inode,
> + NULL, 0, DATA);
> +
> + truncate_inode_pages_range(META_MAPPING(sbi),
> + F2FS_BLK_TO_BYTES((loff_t)blkaddr),
> + F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1)));
> +}
> +
> static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi,
> block_t blkaddr)
> {
> - invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr);
> + f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1);
> f2fs_invalidate_compress_page(sbi, blkaddr);
> }
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4ff3b2d14ddf..20af48d7f784 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3741,8 +3741,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
> }
>
> if (fio->post_read)
> - invalidate_mapping_pages(META_MAPPING(sbi),
> - fio->new_blkaddr, fio->new_blkaddr);
> + f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
>
> stat_inc_inplace_blocks(fio->sbi);
>
> @@ -3932,7 +3931,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
> for (i = 0; i < len; i++)
> f2fs_wait_on_block_writeback(inode, blkaddr + i);
>
> - invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
> + f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
> }
>
> static int read_compacted_summaries(struct f2fs_sb_info *sbi)
> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> index 755e9a41b196..a357287eac1e 100644
> --- a/include/linux/f2fs_fs.h
> +++ b/include/linux/f2fs_fs.h
> @@ -27,6 +27,7 @@
>
> #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS)
> #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS)
> +#define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1)
>
> /* 0, 1(node nid), 2(meta nid) are reserved node id */
> #define F2FS_RESERVED_NODE_NUM 3
> --
> 2.40.1

2024-03-08 00:55:44

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH] f2fs: fix to truncate meta inode pages forcely

On 2024/3/8 2:37, Jaegeuk Kim wrote:
> On 03/07, Chao Yu wrote:
>> Below race case can cause data corruption:
>>
>> Thread A GC thread
>> - f2fs_inplace_write_data
>> - gc_data_segment
>> - ra_data_block
>> - locked meta_inode page
>> - invalidate_mapping_pages
>> : fail to invalidate meta_inode page
>> due to lock failure or dirty|writeback
>> status
>
> Wasn't the original data page locked in both cases?

Oh, the race case needs to fixed as below:

Thread A GC thread
- gc_data_segment
- ra_data_block
- locked meta_inode page
- f2fs_inplace_write_data
- invalidate_mapping_pages
: fail to invalidate meta_inode page
due to lock failure or dirty|writeback
status
- f2fs_submit_page_bio
: write last dirty data to old blkaddr
- move_data_block
- load old data from meta_inode page
- f2fs_submit_page_write
: write old data to new blkaddr

There is a hole in between ra_data_block() and move_data_block(),
in where the data page is unlocked.

Thanks,

>
>> - f2fs_submit_page_bio
>> : write last dirty data to old blkaddr
>> - move_data_block
>> - load old data from meta_inode page
>> - f2fs_submit_page_write
>> : write old data to new blkaddr
>>
>> Because invalidate_mapping_pages() will skip invalidating page when the
>> page has unclear status including locked, dirty, writeback and so on, so
>> we need to use truncate_inode_pages_range() instead of
>> invalidate_mapping_pages() to make sure meta_inode page will be dropped.
>>
>> Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC")
>> Fixes: e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write")
>> Signed-off-by: Chao Yu <[email protected]>
>> ---
>> fs/f2fs/checkpoint.c | 5 +++--
>> fs/f2fs/f2fs.h | 28 +++++++++++++++++++++++++++-
>> fs/f2fs/segment.c | 5 ++---
>> include/linux/f2fs_fs.h | 1 +
>> 4 files changed, 33 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index a09a9609e228..55b7d2cf030f 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -1598,8 +1598,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
>> */
>> if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
>> f2fs_sb_has_compression(sbi))
>> - invalidate_mapping_pages(META_MAPPING(sbi),
>> - MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
>> + f2fs_bug_on(sbi,
>> + invalidate_inode_pages2_range(META_MAPPING(sbi),
>> + MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1));
>>
>> f2fs_release_ino_entry(sbi, false);
>>
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 4836e7cb0efe..9814e5981a6a 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -4655,10 +4655,36 @@ static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi)
>> return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb);
>> }
>>
>> +static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi,
>> + block_t blkaddr, unsigned int cnt)
>> +{
>> + bool need_submit = false;
>> + int i = 0;
>> +
>> + do {
>> + struct page *page;
>> +
>> + page = find_get_page(META_MAPPING(sbi), blkaddr + i);
>> + if (page) {
>> + if (PageWriteback(page))
>> + need_submit = true;
>> + f2fs_put_page(page, 0);
>> + }
>> + } while (++i < cnt && !need_submit);
>> +
>> + if (need_submit)
>> + f2fs_submit_merged_write_cond(sbi, sbi->meta_inode,
>> + NULL, 0, DATA);
>> +
>> + truncate_inode_pages_range(META_MAPPING(sbi),
>> + F2FS_BLK_TO_BYTES((loff_t)blkaddr),
>> + F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1)));
>> +}
>> +
>> static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi,
>> block_t blkaddr)
>> {
>> - invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr);
>> + f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1);
>> f2fs_invalidate_compress_page(sbi, blkaddr);
>> }
>>
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 4ff3b2d14ddf..20af48d7f784 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -3741,8 +3741,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
>> }
>>
>> if (fio->post_read)
>> - invalidate_mapping_pages(META_MAPPING(sbi),
>> - fio->new_blkaddr, fio->new_blkaddr);
>> + f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
>>
>> stat_inc_inplace_blocks(fio->sbi);
>>
>> @@ -3932,7 +3931,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
>> for (i = 0; i < len; i++)
>> f2fs_wait_on_block_writeback(inode, blkaddr + i);
>>
>> - invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
>> + f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
>> }
>>
>> static int read_compacted_summaries(struct f2fs_sb_info *sbi)
>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
>> index 755e9a41b196..a357287eac1e 100644
>> --- a/include/linux/f2fs_fs.h
>> +++ b/include/linux/f2fs_fs.h
>> @@ -27,6 +27,7 @@
>>
>> #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS)
>> #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS)
>> +#define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1)
>>
>> /* 0, 1(node nid), 2(meta nid) are reserved node id */
>> #define F2FS_RESERVED_NODE_NUM 3
>> --
>> 2.40.1