2024-01-11 06:42:28

by Chao Yu

[permalink] [raw]
Subject: [PATCH v4 1/6] f2fs: compress: fix to guarantee persisting compressed blocks by CP

If data block in compressed cluster is not persisted with metadata
during checkpoint, after SPOR, the data may be corrupted, let's
guarantee to write compressed page by checkpoint.

Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/compress.c | 4 +++-
fs/f2fs/data.c | 17 +++++++++--------
fs/f2fs/f2fs.h | 4 +++-
3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index c5a4364c4482..9940b7886e5d 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1418,6 +1418,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
struct f2fs_sb_info *sbi = bio->bi_private;
struct compress_io_ctx *cic =
(struct compress_io_ctx *)page_private(page);
+ enum count_type type = WB_DATA_TYPE(page,
+ f2fs_is_compressed_page(page));
int i;

if (unlikely(bio->bi_status))
@@ -1425,7 +1427,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)

f2fs_compress_free_page(page);

- dec_page_count(sbi, F2FS_WB_DATA);
+ dec_page_count(sbi, type);

if (atomic_dec_return(&cic->pending_pages))
return;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index dce8defdf4c7..81f9e2cc49e2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -48,7 +48,7 @@ void f2fs_destroy_bioset(void)
bioset_exit(&f2fs_bioset);
}

-static bool __is_cp_guaranteed(struct page *page)
+bool f2fs_is_cp_guaranteed(struct page *page)
{
struct address_space *mapping = page->mapping;
struct inode *inode;
@@ -65,8 +65,6 @@ static bool __is_cp_guaranteed(struct page *page)
S_ISDIR(inode->i_mode))
return true;

- if (f2fs_is_compressed_page(page))
- return false;
if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
page_private_gcing(page))
return true;
@@ -338,7 +336,7 @@ static void f2fs_write_end_io(struct bio *bio)

bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
- enum count_type type = WB_DATA_TYPE(page);
+ enum count_type type = WB_DATA_TYPE(page, false);

if (page_private_dummy(page)) {
clear_page_private_dummy(page);
@@ -762,7 +760,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);

inc_page_count(fio->sbi, is_read_io(fio->op) ?
- __read_io_type(page) : WB_DATA_TYPE(fio->page));
+ __read_io_type(page) : WB_DATA_TYPE(fio->page, false));

if (is_read_io(bio_op(bio)))
f2fs_submit_read_bio(fio->sbi, bio, fio->type);
@@ -973,7 +971,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
if (fio->io_wbc)
wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);

- inc_page_count(fio->sbi, WB_DATA_TYPE(page));
+ inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));

*fio->last_block = fio->new_blkaddr;
*fio->bio = bio;
@@ -1007,6 +1005,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
struct page *bio_page;
+ enum count_type type;

f2fs_bug_on(sbi, is_read_io(fio->op));

@@ -1046,7 +1045,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
/* set submitted = true as a return value */
fio->submitted = 1;

- inc_page_count(sbi, WB_DATA_TYPE(bio_page));
+ type = WB_DATA_TYPE(bio_page, fio->compressed_page);
+ inc_page_count(sbi, type);

if (io->bio &&
(!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
@@ -1059,7 +1059,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
if (F2FS_IO_ALIGNED(sbi) &&
(fio->type == DATA || fio->type == NODE) &&
fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
- dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+ dec_page_count(sbi, WB_DATA_TYPE(bio_page,
+ fio->compressed_page));
fio->retry = 1;
goto skip;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 65294e3b0bef..50f3d546ded8 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1080,7 +1080,8 @@ struct f2fs_sm_info {
* f2fs monitors the number of several block types such as on-writeback,
* dirty dentry blocks, dirty node blocks, and dirty meta blocks.
*/
-#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
+#define WB_DATA_TYPE(p, f) \
+ (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
@@ -3804,6 +3805,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi);
*/
int __init f2fs_init_bioset(void);
void f2fs_destroy_bioset(void);
+bool f2fs_is_cp_guaranteed(struct page *page);
int f2fs_init_bio_entry_cache(void);
void f2fs_destroy_bio_entry_cache(void);
void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
--
2.40.1



2024-01-11 06:42:36

by Chao Yu

[permalink] [raw]
Subject: [PATCH v4 2/6] f2fs: compress: fix to cover normal cluster write with cp_rwsem

When we overwrite compressed cluster w/ normal cluster, we should
not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data
will be corrupted if partial blocks were persisted before CP & SPOR,
due to cluster metadata wasn't updated atomically.

Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/compress.c | 20 ++++++++++++++------
fs/f2fs/data.c | 3 ++-
2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 9940b7886e5d..bf4cfab67aec 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1448,7 +1448,8 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
enum iostat_type io_type)
{
struct address_space *mapping = cc->inode->i_mapping;
- int _submitted, compr_blocks, ret, i;
+ struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+ int _submitted, compr_blocks, ret = 0, i;

compr_blocks = f2fs_compressed_blocks(cc);

@@ -1463,6 +1464,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
if (compr_blocks < 0)
return compr_blocks;

+ /* overwrite compressed cluster w/ normal cluster */
+ if (compr_blocks > 0)
+ f2fs_lock_op(sbi);
+
for (i = 0; i < cc->cluster_size; i++) {
if (!cc->rpages[i])
continue;
@@ -1495,26 +1500,29 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
unlock_page(cc->rpages[i]);
ret = 0;
} else if (ret == -EAGAIN) {
+ ret = 0;
/*
* for quota file, just redirty left pages to
* avoid deadlock caused by cluster update race
* from foreground operation.
*/
if (IS_NOQUOTA(cc->inode))
- return 0;
- ret = 0;
+ goto out;
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
goto retry_write;
}
- return ret;
+ goto out;
}

*submitted += _submitted;
}

- f2fs_balance_fs(F2FS_M_SB(mapping), true);
+out:
+ if (compr_blocks > 0)
+ f2fs_unlock_op(sbi);

- return 0;
+ f2fs_balance_fs(sbi, true);
+ return ret;
}

int f2fs_write_multi_pages(struct compress_ctx *cc,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 81f9e2cc49e2..b171a9980f6a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2839,7 +2839,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
.encrypted_page = NULL,
.submitted = 0,
.compr_blocks = compr_blocks,
- .need_lock = LOCK_RETRY,
+ .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
.post_read = f2fs_post_read_required(inode) ? 1 : 0,
.io_type = io_type,
.io_wbc = wbc,
@@ -2920,6 +2920,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
if (err == -EAGAIN) {
err = f2fs_do_write_data_page(&fio);
if (err == -EAGAIN) {
+ f2fs_bug_on(sbi, compr_blocks);
fio.need_lock = LOCK_REQ;
err = f2fs_do_write_data_page(&fio);
}
--
2.40.1


2024-01-11 06:42:53

by Chao Yu

[permalink] [raw]
Subject: [PATCH v4 3/6] f2fs: compress: fix to check unreleased compressed cluster

From: Sheng Yong <[email protected]>

Compressed cluster may not be released due to we can fail in
release_compress_blocks(), fix to handle reserved compressed
cluster correctly in reserve_compress_blocks().

Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Sheng Yong <[email protected]>
Signed-off-by: Chao Yu <[email protected]>
---
v4:
- merge check condition suggested by Daeho.
fs/f2fs/file.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 026d05a7edd8..80d9c4c096f0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3624,7 +3624,13 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
goto next;
}

- if (__is_valid_data_blkaddr(blkaddr)) {
+ /*
+ * compressed cluster was not released due to it
+ * fails in release_compress_blocks(), so NEW_ADDR
+ * is a possible case.
+ */
+ if (blkaddr == NEW_ADDR ||
+ __is_valid_data_blkaddr(blkaddr)) {
compr_blocks++;
continue;
}
@@ -3633,6 +3639,9 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
}

reserved = cluster_size - compr_blocks;
+ if (!reserved)
+ goto next;
+
ret = inc_valid_block_count(sbi, dn->inode, &reserved);
if (ret)
return ret;
--
2.40.1


2024-01-11 06:43:09

by Chao Yu

[permalink] [raw]
Subject: [PATCH v4 4/6] f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode

In reserve_compress_blocks(), we update blkaddrs of dnode in prior to
inc_valid_block_count(), it may cause inconsistent status bewteen
i_blocks and blkaddrs once inc_valid_block_count() fails.

To fix this issue, it needs to reverse their invoking order.

Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/data.c | 5 +++--
fs/f2fs/f2fs.h | 7 ++++++-
fs/f2fs/file.c | 26 ++++++++++++++------------
fs/f2fs/segment.c | 2 +-
4 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b171a9980f6a..8d2ace723310 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1219,7 +1219,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)

if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
- if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
+ err = inc_valid_block_count(sbi, dn->inode, &count, true);
+ if (unlikely(err))
return err;

trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
@@ -1476,7 +1477,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)

dn->data_blkaddr = f2fs_data_blkaddr(dn);
if (dn->data_blkaddr == NULL_ADDR) {
- err = inc_valid_block_count(sbi, dn->inode, &count);
+ err = inc_valid_block_count(sbi, dn->inode, &count, true);
if (unlikely(err))
return err;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 50f3d546ded8..69e71460a950 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2252,7 +2252,7 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,

static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
- struct inode *inode, blkcnt_t *count)
+ struct inode *inode, blkcnt_t *count, bool partial)
{
blkcnt_t diff = 0, release = 0;
block_t avail_user_block_count;
@@ -2292,6 +2292,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
avail_user_block_count = 0;
}
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
+ if (!partial) {
+ spin_unlock(&sbi->stat_lock);
+ goto enospc;
+ }
+
diff = sbi->total_valid_block_count - avail_user_block_count;
if (diff > *count)
diff = *count;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 80d9c4c096f0..53c495651789 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3614,14 +3614,16 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
blkcnt_t reserved;
int ret;

- for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
- blkaddr = f2fs_data_blkaddr(dn);
+ for (i = 0; i < cluster_size; i++) {
+ blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ dn->ofs_in_node + i);

if (i == 0) {
- if (blkaddr == COMPRESS_ADDR)
- continue;
- dn->ofs_in_node += cluster_size;
- goto next;
+ if (blkaddr != COMPRESS_ADDR) {
+ dn->ofs_in_node += cluster_size;
+ goto next;
+ }
+ continue;
}

/*
@@ -3634,20 +3636,20 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
compr_blocks++;
continue;
}
-
- f2fs_set_data_blkaddr(dn, NEW_ADDR);
}

reserved = cluster_size - compr_blocks;
if (!reserved)
goto next;

- ret = inc_valid_block_count(sbi, dn->inode, &reserved);
- if (ret)
+ ret = inc_valid_block_count(sbi, dn->inode, &reserved, false);
+ if (unlikely(ret))
return ret;

- if (reserved != cluster_size - compr_blocks)
- return -ENOSPC;
+ for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
+ if (f2fs_data_blkaddr(dn) == NULL_ADDR)
+ f2fs_set_data_blkaddr(dn, NEW_ADDR);
+ }

f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4c8836ded90f..ef5b3848426b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -248,7 +248,7 @@ static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
} else {
blkcnt_t count = 1;

- err = inc_valid_block_count(sbi, inode, &count);
+ err = inc_valid_block_count(sbi, inode, &count, true);
if (err) {
f2fs_put_dnode(&dn);
return err;
--
2.40.1


2024-01-11 06:43:22

by Chao Yu

[permalink] [raw]
Subject: [PATCH v4 5/6] f2fs: fix to remove unnecessary f2fs_bug_on() to avoid panic

verify_blkaddr() will trigger panic once we inject fault into
f2fs_is_valid_blkaddr(), fix to remove this unnecessary f2fs_bug_on().

Fixes: 18792e64c86d ("f2fs: support fault injection for f2fs_is_valid_blkaddr()")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/f2fs.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 69e71460a950..ab710bb6d8b3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3470,11 +3470,9 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type))
f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.",
blkaddr, type);
- f2fs_bug_on(sbi, 1);
- }
}

static inline bool __is_valid_data_blkaddr(block_t blkaddr)
--
2.40.1


2024-01-11 06:43:50

by Chao Yu

[permalink] [raw]
Subject: [PATCH v4 6/6] f2fs: introduce FAULT_BLKADDR_CONSISTENCE

We will encounter below inconsistent status when FAULT_BLKADDR type
fault injection is on.

Info: checkpoint state = d6 : nat_bits crc fsck compacted_summary orphan_inodes sudden-power-off
[ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c100 has i_blocks: 000000c0, but has 191 blocks
[FIX] (fsck_chk_inode_blk:1260) --> [0x1c100] i_blocks=0x000000c0 -> 0xbf
[FIX] (fsck_chk_inode_blk:1269) --> [0x1c100] i_compr_blocks=0x00000026 -> 0x27
[ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1cadb has i_blocks: 0000002f, but has 46 blocks
[FIX] (fsck_chk_inode_blk:1260) --> [0x1cadb] i_blocks=0x0000002f -> 0x2e
[FIX] (fsck_chk_inode_blk:1269) --> [0x1cadb] i_compr_blocks=0x00000011 -> 0x12
[ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c62c has i_blocks: 00000002, but has 1 blocks
[FIX] (fsck_chk_inode_blk:1260) --> [0x1c62c] i_blocks=0x00000002 -> 0x1

After we inject fault into f2fs_is_valid_blkaddr() during truncation,
a) it missed to increase @nr_free or @valid_blocks
b) it can cause in blkaddr leak in truncated dnode
Which may cause inconsistent status.

This patch separates FAULT_BLKADDR_CONSISTENCE from FAULT_BLKADDR,
and rename FAULT_BLKADDR to FAULT_BLKADDR_VALIDITY
so that we can:
a) use FAULT_BLKADDR_CONSISTENCE in f2fs_truncate_data_blocks_range()
to simulate inconsistent issue independently, then it can verify fsck
repair flow.
b) FAULT_BLKADDR_VALIDITY fault will not cause any inconsistent status,
we can just use it to check error path handling in kernel side.

Signed-off-by: Chao Yu <[email protected]>
---
v4:
- rename macro to FAULT_BLKADDR_CONSISTENCE and FAULT_BLKADDR_VALIDITY
suggested by Jaegeuk.
Documentation/ABI/testing/sysfs-fs-f2fs | 47 +++++++++++++------------
Documentation/filesystems/f2fs.rst | 47 +++++++++++++------------
fs/f2fs/checkpoint.c | 19 +++++++---
fs/f2fs/f2fs.h | 5 ++-
fs/f2fs/file.c | 8 +++--
fs/f2fs/super.c | 37 +++++++++----------
6 files changed, 92 insertions(+), 71 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 99fa87a43926..48c135e24eb5 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -701,29 +701,30 @@ Description: Support configuring fault injection type, should be
enabled with fault_injection option, fault type value
is shown below, it supports single or combined type.

- =================== ===========
- Type_Name Type_Value
- =================== ===========
- FAULT_KMALLOC 0x000000001
- FAULT_KVMALLOC 0x000000002
- FAULT_PAGE_ALLOC 0x000000004
- FAULT_PAGE_GET 0x000000008
- FAULT_ALLOC_BIO 0x000000010 (obsolete)
- FAULT_ALLOC_NID 0x000000020
- FAULT_ORPHAN 0x000000040
- FAULT_BLOCK 0x000000080
- FAULT_DIR_DEPTH 0x000000100
- FAULT_EVICT_INODE 0x000000200
- FAULT_TRUNCATE 0x000000400
- FAULT_READ_IO 0x000000800
- FAULT_CHECKPOINT 0x000001000
- FAULT_DISCARD 0x000002000
- FAULT_WRITE_IO 0x000004000
- FAULT_SLAB_ALLOC 0x000008000
- FAULT_DQUOT_INIT 0x000010000
- FAULT_LOCK_OP 0x000020000
- FAULT_BLKADDR 0x000040000
- =================== ===========
+ =========================== ===========
+ Type_Name Type_Value
+ =========================== ===========
+ FAULT_KMALLOC 0x000000001
+ FAULT_KVMALLOC 0x000000002
+ FAULT_PAGE_ALLOC 0x000000004
+ FAULT_PAGE_GET 0x000000008
+ FAULT_ALLOC_BIO 0x000000010 (obsolete)
+ FAULT_ALLOC_NID 0x000000020
+ FAULT_ORPHAN 0x000000040
+ FAULT_BLOCK 0x000000080
+ FAULT_DIR_DEPTH 0x000000100
+ FAULT_EVICT_INODE 0x000000200
+ FAULT_TRUNCATE 0x000000400
+ FAULT_READ_IO 0x000000800
+ FAULT_CHECKPOINT 0x000001000
+ FAULT_DISCARD 0x000002000
+ FAULT_WRITE_IO 0x000004000
+ FAULT_SLAB_ALLOC 0x000008000
+ FAULT_DQUOT_INIT 0x000010000
+ FAULT_LOCK_OP 0x000020000
+ FAULT_BLKADDR_VALIDITY 0x000040000
+ FAULT_BLKADDR_CONSISTENCE 0x000080000
+ =========================== ===========

What: /sys/fs/f2fs/<disk>/discard_io_aware_gran
Date: January 2023
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index d32c6209685d..32cbfa864f38 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -184,29 +184,30 @@ fault_type=%d Support configuring fault injection type, should be
enabled with fault_injection option, fault type value
is shown below, it supports single or combined type.

- =================== ===========
- Type_Name Type_Value
- =================== ===========
- FAULT_KMALLOC 0x000000001
- FAULT_KVMALLOC 0x000000002
- FAULT_PAGE_ALLOC 0x000000004
- FAULT_PAGE_GET 0x000000008
- FAULT_ALLOC_BIO 0x000000010 (obsolete)
- FAULT_ALLOC_NID 0x000000020
- FAULT_ORPHAN 0x000000040
- FAULT_BLOCK 0x000000080
- FAULT_DIR_DEPTH 0x000000100
- FAULT_EVICT_INODE 0x000000200
- FAULT_TRUNCATE 0x000000400
- FAULT_READ_IO 0x000000800
- FAULT_CHECKPOINT 0x000001000
- FAULT_DISCARD 0x000002000
- FAULT_WRITE_IO 0x000004000
- FAULT_SLAB_ALLOC 0x000008000
- FAULT_DQUOT_INIT 0x000010000
- FAULT_LOCK_OP 0x000020000
- FAULT_BLKADDR 0x000040000
- =================== ===========
+ =========================== ===========
+ Type_Name Type_Value
+ =========================== ===========
+ FAULT_KMALLOC 0x000000001
+ FAULT_KVMALLOC 0x000000002
+ FAULT_PAGE_ALLOC 0x000000004
+ FAULT_PAGE_GET 0x000000008
+ FAULT_ALLOC_BIO 0x000000010 (obsolete)
+ FAULT_ALLOC_NID 0x000000020
+ FAULT_ORPHAN 0x000000040
+ FAULT_BLOCK 0x000000080
+ FAULT_DIR_DEPTH 0x000000100
+ FAULT_EVICT_INODE 0x000000200
+ FAULT_TRUNCATE 0x000000400
+ FAULT_READ_IO 0x000000800
+ FAULT_CHECKPOINT 0x000001000
+ FAULT_DISCARD 0x000002000
+ FAULT_WRITE_IO 0x000004000
+ FAULT_SLAB_ALLOC 0x000008000
+ FAULT_DQUOT_INIT 0x000010000
+ FAULT_LOCK_OP 0x000020000
+ FAULT_BLKADDR_VALIDITY 0x000040000
+ FAULT_BLKADDR_CONSISTENCE 0x000080000
+ =========================== ===========
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index b0597a539fc5..b85820e70f5e 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -170,12 +170,9 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
return exist;
}

-bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
- if (time_to_inject(sbi, FAULT_BLKADDR))
- return false;
-
switch (type) {
case META_NAT:
break;
@@ -230,6 +227,20 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
return true;
}

+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (time_to_inject(sbi, FAULT_BLKADDR_VALIDITY))
+ return false;
+ return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
+}
+
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
+}
+
/*
* Readahead CP/NAT/SIT/SSA/POR pages
*/
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ab710bb6d8b3..4481f68d6418 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -60,7 +60,8 @@ enum {
FAULT_SLAB_ALLOC,
FAULT_DQUOT_INIT,
FAULT_LOCK_OP,
- FAULT_BLKADDR,
+ FAULT_BLKADDR_VALIDITY,
+ FAULT_BLKADDR_CONSISTENCE,
FAULT_MAX,
};

@@ -3768,6 +3769,8 @@ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 53c495651789..0e4c871d6aed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -590,9 +590,13 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
f2fs_set_data_blkaddr(dn, NULL_ADDR);

if (__is_valid_data_blkaddr(blkaddr)) {
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
- DATA_GENERIC_ENHANCE))
+ if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
+ continue;
+ if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
continue;
+ }
if (compressed_cluster)
valid_blocks++;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 206d03c82d96..4de5478972b2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -44,24 +44,25 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION

const char *f2fs_fault_name[FAULT_MAX] = {
- [FAULT_KMALLOC] = "kmalloc",
- [FAULT_KVMALLOC] = "kvmalloc",
- [FAULT_PAGE_ALLOC] = "page alloc",
- [FAULT_PAGE_GET] = "page get",
- [FAULT_ALLOC_NID] = "alloc nid",
- [FAULT_ORPHAN] = "orphan",
- [FAULT_BLOCK] = "no more block",
- [FAULT_DIR_DEPTH] = "too big dir depth",
- [FAULT_EVICT_INODE] = "evict_inode fail",
- [FAULT_TRUNCATE] = "truncate fail",
- [FAULT_READ_IO] = "read IO error",
- [FAULT_CHECKPOINT] = "checkpoint error",
- [FAULT_DISCARD] = "discard error",
- [FAULT_WRITE_IO] = "write IO error",
- [FAULT_SLAB_ALLOC] = "slab alloc",
- [FAULT_DQUOT_INIT] = "dquot initialize",
- [FAULT_LOCK_OP] = "lock_op",
- [FAULT_BLKADDR] = "invalid blkaddr",
+ [FAULT_KMALLOC] = "kmalloc",
+ [FAULT_KVMALLOC] = "kvmalloc",
+ [FAULT_PAGE_ALLOC] = "page alloc",
+ [FAULT_PAGE_GET] = "page get",
+ [FAULT_ALLOC_NID] = "alloc nid",
+ [FAULT_ORPHAN] = "orphan",
+ [FAULT_BLOCK] = "no more block",
+ [FAULT_DIR_DEPTH] = "too big dir depth",
+ [FAULT_EVICT_INODE] = "evict_inode fail",
+ [FAULT_TRUNCATE] = "truncate fail",
+ [FAULT_READ_IO] = "read IO error",
+ [FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
+ [FAULT_WRITE_IO] = "write IO error",
+ [FAULT_SLAB_ALLOC] = "slab alloc",
+ [FAULT_DQUOT_INIT] = "dquot initialize",
+ [FAULT_LOCK_OP] = "lock_op",
+ [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr",
+ [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr",
};

void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
--
2.40.1


2024-01-12 22:15:57

by Daeho Jeong

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v4 2/6] f2fs: compress: fix to cover normal cluster write with cp_rwsem

Reviewed-by: Daeho Jeong <[email protected]>

On Wed, Jan 10, 2024 at 10:43 PM Chao Yu <[email protected]> wrote:
>
> When we overwrite compressed cluster w/ normal cluster, we should
> not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data
> will be corrupted if partial blocks were persisted before CP & SPOR,
> due to cluster metadata wasn't updated atomically.
>
> Fixes: 4c8ff7095bef ("f2fs: support data compression")
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/compress.c | 20 ++++++++++++++------
> fs/f2fs/data.c | 3 ++-
> 2 files changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index 9940b7886e5d..bf4cfab67aec 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -1448,7 +1448,8 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> enum iostat_type io_type)
> {
> struct address_space *mapping = cc->inode->i_mapping;
> - int _submitted, compr_blocks, ret, i;
> + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
> + int _submitted, compr_blocks, ret = 0, i;
>
> compr_blocks = f2fs_compressed_blocks(cc);
>
> @@ -1463,6 +1464,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> if (compr_blocks < 0)
> return compr_blocks;
>
> + /* overwrite compressed cluster w/ normal cluster */
> + if (compr_blocks > 0)
> + f2fs_lock_op(sbi);
> +
> for (i = 0; i < cc->cluster_size; i++) {
> if (!cc->rpages[i])
> continue;
> @@ -1495,26 +1500,29 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> unlock_page(cc->rpages[i]);
> ret = 0;
> } else if (ret == -EAGAIN) {
> + ret = 0;
> /*
> * for quota file, just redirty left pages to
> * avoid deadlock caused by cluster update race
> * from foreground operation.
> */
> if (IS_NOQUOTA(cc->inode))
> - return 0;
> - ret = 0;
> + goto out;
> f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
> goto retry_write;
> }
> - return ret;
> + goto out;
> }
>
> *submitted += _submitted;
> }
>
> - f2fs_balance_fs(F2FS_M_SB(mapping), true);
> +out:
> + if (compr_blocks > 0)
> + f2fs_unlock_op(sbi);
>
> - return 0;
> + f2fs_balance_fs(sbi, true);
> + return ret;
> }
>
> int f2fs_write_multi_pages(struct compress_ctx *cc,
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 81f9e2cc49e2..b171a9980f6a 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2839,7 +2839,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
> .encrypted_page = NULL,
> .submitted = 0,
> .compr_blocks = compr_blocks,
> - .need_lock = LOCK_RETRY,
> + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
> .post_read = f2fs_post_read_required(inode) ? 1 : 0,
> .io_type = io_type,
> .io_wbc = wbc,
> @@ -2920,6 +2920,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
> if (err == -EAGAIN) {
> err = f2fs_do_write_data_page(&fio);
> if (err == -EAGAIN) {
> + f2fs_bug_on(sbi, compr_blocks);
> fio.need_lock = LOCK_REQ;
> err = f2fs_do_write_data_page(&fio);
> }
> --
> 2.40.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2024-01-12 22:20:34

by Daeho Jeong

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v4 4/6] f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode

Reviewed-by: Daeho Jeong <[email protected]>

On Wed, Jan 10, 2024 at 10:43 PM Chao Yu <[email protected]> wrote:
>
> In reserve_compress_blocks(), we update blkaddrs of dnode in prior to
> inc_valid_block_count(), it may cause inconsistent status bewteen
> i_blocks and blkaddrs once inc_valid_block_count() fails.
>
> To fix this issue, it needs to reverse their invoking order.
>
> Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS")
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/data.c | 5 +++--
> fs/f2fs/f2fs.h | 7 ++++++-
> fs/f2fs/file.c | 26 ++++++++++++++------------
> fs/f2fs/segment.c | 2 +-
> 4 files changed, 24 insertions(+), 16 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index b171a9980f6a..8d2ace723310 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1219,7 +1219,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
>
> if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
> return -EPERM;
> - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
> + err = inc_valid_block_count(sbi, dn->inode, &count, true);
> + if (unlikely(err))
> return err;
>
> trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
> @@ -1476,7 +1477,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
>
> dn->data_blkaddr = f2fs_data_blkaddr(dn);
> if (dn->data_blkaddr == NULL_ADDR) {
> - err = inc_valid_block_count(sbi, dn->inode, &count);
> + err = inc_valid_block_count(sbi, dn->inode, &count, true);
> if (unlikely(err))
> return err;
> }
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 50f3d546ded8..69e71460a950 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -2252,7 +2252,7 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
>
> static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
> static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
> - struct inode *inode, blkcnt_t *count)
> + struct inode *inode, blkcnt_t *count, bool partial)
> {
> blkcnt_t diff = 0, release = 0;
> block_t avail_user_block_count;
> @@ -2292,6 +2292,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
> avail_user_block_count = 0;
> }
> if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
> + if (!partial) {
> + spin_unlock(&sbi->stat_lock);
> + goto enospc;
> + }
> +
> diff = sbi->total_valid_block_count - avail_user_block_count;
> if (diff > *count)
> diff = *count;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 80d9c4c096f0..53c495651789 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -3614,14 +3614,16 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
> blkcnt_t reserved;
> int ret;
>
> - for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
> - blkaddr = f2fs_data_blkaddr(dn);
> + for (i = 0; i < cluster_size; i++) {
> + blkaddr = data_blkaddr(dn->inode, dn->node_page,
> + dn->ofs_in_node + i);
>
> if (i == 0) {
> - if (blkaddr == COMPRESS_ADDR)
> - continue;
> - dn->ofs_in_node += cluster_size;
> - goto next;
> + if (blkaddr != COMPRESS_ADDR) {
> + dn->ofs_in_node += cluster_size;
> + goto next;
> + }
> + continue;
> }
>
> /*
> @@ -3634,20 +3636,20 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
> compr_blocks++;
> continue;
> }
> -
> - f2fs_set_data_blkaddr(dn, NEW_ADDR);
> }
>
> reserved = cluster_size - compr_blocks;
> if (!reserved)
> goto next;
>
> - ret = inc_valid_block_count(sbi, dn->inode, &reserved);
> - if (ret)
> + ret = inc_valid_block_count(sbi, dn->inode, &reserved, false);
> + if (unlikely(ret))
> return ret;
>
> - if (reserved != cluster_size - compr_blocks)
> - return -ENOSPC;
> + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
> + if (f2fs_data_blkaddr(dn) == NULL_ADDR)
> + f2fs_set_data_blkaddr(dn, NEW_ADDR);
> + }
>
> f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4c8836ded90f..ef5b3848426b 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -248,7 +248,7 @@ static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
> } else {
> blkcnt_t count = 1;
>
> - err = inc_valid_block_count(sbi, inode, &count);
> + err = inc_valid_block_count(sbi, inode, &count, true);
> if (err) {
> f2fs_put_dnode(&dn);
> return err;
> --
> 2.40.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2024-01-12 22:21:12

by Daeho Jeong

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v4 5/6] f2fs: fix to remove unnecessary f2fs_bug_on() to avoid panic

Reviewed-by: Daeho Jeong <[email protected]>

On Wed, Jan 10, 2024 at 10:43 PM Chao Yu <[email protected]> wrote:
>
> verify_blkaddr() will trigger panic once we inject fault into
> f2fs_is_valid_blkaddr(), fix to remove this unnecessary f2fs_bug_on().
>
> Fixes: 18792e64c86d ("f2fs: support fault injection for f2fs_is_valid_blkaddr()")
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/f2fs.h | 4 +---
> 1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 69e71460a950..ab710bb6d8b3 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3470,11 +3470,9 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
> block_t blkaddr, int type)
> {
> - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
> + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type))
> f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.",
> blkaddr, type);
> - f2fs_bug_on(sbi, 1);
> - }
> }
>
> static inline bool __is_valid_data_blkaddr(block_t blkaddr)
> --
> 2.40.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2024-01-12 22:23:34

by Daeho Jeong

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v4 6/6] f2fs: introduce FAULT_BLKADDR_CONSISTENCE

Reviewed-by: Daeho Jeong <[email protected]>

On Wed, Jan 10, 2024 at 10:43 PM Chao Yu <[email protected]> wrote:
>
> We will encounter below inconsistent status when FAULT_BLKADDR type
> fault injection is on.
>
> Info: checkpoint state = d6 : nat_bits crc fsck compacted_summary orphan_inodes sudden-power-off
> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c100 has i_blocks: 000000c0, but has 191 blocks
> [FIX] (fsck_chk_inode_blk:1260) --> [0x1c100] i_blocks=0x000000c0 -> 0xbf
> [FIX] (fsck_chk_inode_blk:1269) --> [0x1c100] i_compr_blocks=0x00000026 -> 0x27
> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1cadb has i_blocks: 0000002f, but has 46 blocks
> [FIX] (fsck_chk_inode_blk:1260) --> [0x1cadb] i_blocks=0x0000002f -> 0x2e
> [FIX] (fsck_chk_inode_blk:1269) --> [0x1cadb] i_compr_blocks=0x00000011 -> 0x12
> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c62c has i_blocks: 00000002, but has 1 blocks
> [FIX] (fsck_chk_inode_blk:1260) --> [0x1c62c] i_blocks=0x00000002 -> 0x1
>
> After we inject fault into f2fs_is_valid_blkaddr() during truncation,
> a) it missed to increase @nr_free or @valid_blocks
> b) it can cause in blkaddr leak in truncated dnode
> Which may cause inconsistent status.
>
> This patch separates FAULT_BLKADDR_CONSISTENCE from FAULT_BLKADDR,
> and rename FAULT_BLKADDR to FAULT_BLKADDR_VALIDITY
> so that we can:
> a) use FAULT_BLKADDR_CONSISTENCE in f2fs_truncate_data_blocks_range()
> to simulate inconsistent issue independently, then it can verify fsck
> repair flow.
> b) FAULT_BLKADDR_VALIDITY fault will not cause any inconsistent status,
> we can just use it to check error path handling in kernel side.
>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> v4:
> - rename macro to FAULT_BLKADDR_CONSISTENCE and FAULT_BLKADDR_VALIDITY
> suggested by Jaegeuk.
> Documentation/ABI/testing/sysfs-fs-f2fs | 47 +++++++++++++------------
> Documentation/filesystems/f2fs.rst | 47 +++++++++++++------------
> fs/f2fs/checkpoint.c | 19 +++++++---
> fs/f2fs/f2fs.h | 5 ++-
> fs/f2fs/file.c | 8 +++--
> fs/f2fs/super.c | 37 +++++++++----------
> 6 files changed, 92 insertions(+), 71 deletions(-)
>
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> index 99fa87a43926..48c135e24eb5 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -701,29 +701,30 @@ Description: Support configuring fault injection type, should be
> enabled with fault_injection option, fault type value
> is shown below, it supports single or combined type.
>
> - =================== ===========
> - Type_Name Type_Value
> - =================== ===========
> - FAULT_KMALLOC 0x000000001
> - FAULT_KVMALLOC 0x000000002
> - FAULT_PAGE_ALLOC 0x000000004
> - FAULT_PAGE_GET 0x000000008
> - FAULT_ALLOC_BIO 0x000000010 (obsolete)
> - FAULT_ALLOC_NID 0x000000020
> - FAULT_ORPHAN 0x000000040
> - FAULT_BLOCK 0x000000080
> - FAULT_DIR_DEPTH 0x000000100
> - FAULT_EVICT_INODE 0x000000200
> - FAULT_TRUNCATE 0x000000400
> - FAULT_READ_IO 0x000000800
> - FAULT_CHECKPOINT 0x000001000
> - FAULT_DISCARD 0x000002000
> - FAULT_WRITE_IO 0x000004000
> - FAULT_SLAB_ALLOC 0x000008000
> - FAULT_DQUOT_INIT 0x000010000
> - FAULT_LOCK_OP 0x000020000
> - FAULT_BLKADDR 0x000040000
> - =================== ===========
> + =========================== ===========
> + Type_Name Type_Value
> + =========================== ===========
> + FAULT_KMALLOC 0x000000001
> + FAULT_KVMALLOC 0x000000002
> + FAULT_PAGE_ALLOC 0x000000004
> + FAULT_PAGE_GET 0x000000008
> + FAULT_ALLOC_BIO 0x000000010 (obsolete)
> + FAULT_ALLOC_NID 0x000000020
> + FAULT_ORPHAN 0x000000040
> + FAULT_BLOCK 0x000000080
> + FAULT_DIR_DEPTH 0x000000100
> + FAULT_EVICT_INODE 0x000000200
> + FAULT_TRUNCATE 0x000000400
> + FAULT_READ_IO 0x000000800
> + FAULT_CHECKPOINT 0x000001000
> + FAULT_DISCARD 0x000002000
> + FAULT_WRITE_IO 0x000004000
> + FAULT_SLAB_ALLOC 0x000008000
> + FAULT_DQUOT_INIT 0x000010000
> + FAULT_LOCK_OP 0x000020000
> + FAULT_BLKADDR_VALIDITY 0x000040000
> + FAULT_BLKADDR_CONSISTENCE 0x000080000
> + =========================== ===========
>
> What: /sys/fs/f2fs/<disk>/discard_io_aware_gran
> Date: January 2023
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index d32c6209685d..32cbfa864f38 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -184,29 +184,30 @@ fault_type=%d Support configuring fault injection type, should be
> enabled with fault_injection option, fault type value
> is shown below, it supports single or combined type.
>
> - =================== ===========
> - Type_Name Type_Value
> - =================== ===========
> - FAULT_KMALLOC 0x000000001
> - FAULT_KVMALLOC 0x000000002
> - FAULT_PAGE_ALLOC 0x000000004
> - FAULT_PAGE_GET 0x000000008
> - FAULT_ALLOC_BIO 0x000000010 (obsolete)
> - FAULT_ALLOC_NID 0x000000020
> - FAULT_ORPHAN 0x000000040
> - FAULT_BLOCK 0x000000080
> - FAULT_DIR_DEPTH 0x000000100
> - FAULT_EVICT_INODE 0x000000200
> - FAULT_TRUNCATE 0x000000400
> - FAULT_READ_IO 0x000000800
> - FAULT_CHECKPOINT 0x000001000
> - FAULT_DISCARD 0x000002000
> - FAULT_WRITE_IO 0x000004000
> - FAULT_SLAB_ALLOC 0x000008000
> - FAULT_DQUOT_INIT 0x000010000
> - FAULT_LOCK_OP 0x000020000
> - FAULT_BLKADDR 0x000040000
> - =================== ===========
> + =========================== ===========
> + Type_Name Type_Value
> + =========================== ===========
> + FAULT_KMALLOC 0x000000001
> + FAULT_KVMALLOC 0x000000002
> + FAULT_PAGE_ALLOC 0x000000004
> + FAULT_PAGE_GET 0x000000008
> + FAULT_ALLOC_BIO 0x000000010 (obsolete)
> + FAULT_ALLOC_NID 0x000000020
> + FAULT_ORPHAN 0x000000040
> + FAULT_BLOCK 0x000000080
> + FAULT_DIR_DEPTH 0x000000100
> + FAULT_EVICT_INODE 0x000000200
> + FAULT_TRUNCATE 0x000000400
> + FAULT_READ_IO 0x000000800
> + FAULT_CHECKPOINT 0x000001000
> + FAULT_DISCARD 0x000002000
> + FAULT_WRITE_IO 0x000004000
> + FAULT_SLAB_ALLOC 0x000008000
> + FAULT_DQUOT_INIT 0x000010000
> + FAULT_LOCK_OP 0x000020000
> + FAULT_BLKADDR_VALIDITY 0x000040000
> + FAULT_BLKADDR_CONSISTENCE 0x000080000
> + =========================== ===========
> mode=%s Control block allocation mode which supports "adaptive"
> and "lfs". In "lfs" mode, there should be no random
> writes towards main area.
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index b0597a539fc5..b85820e70f5e 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -170,12 +170,9 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
> return exist;
> }
>
> -bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> +static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> block_t blkaddr, int type)
> {
> - if (time_to_inject(sbi, FAULT_BLKADDR))
> - return false;
> -
> switch (type) {
> case META_NAT:
> break;
> @@ -230,6 +227,20 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> return true;
> }
>
> +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> + block_t blkaddr, int type)
> +{
> + if (time_to_inject(sbi, FAULT_BLKADDR_VALIDITY))
> + return false;
> + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
> +}
> +
> +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
> + block_t blkaddr, int type)
> +{
> + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
> +}
> +
> /*
> * Readahead CP/NAT/SIT/SSA/POR pages
> */
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ab710bb6d8b3..4481f68d6418 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -60,7 +60,8 @@ enum {
> FAULT_SLAB_ALLOC,
> FAULT_DQUOT_INIT,
> FAULT_LOCK_OP,
> - FAULT_BLKADDR,
> + FAULT_BLKADDR_VALIDITY,
> + FAULT_BLKADDR_CONSISTENCE,
> FAULT_MAX,
> };
>
> @@ -3768,6 +3769,8 @@ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
> struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
> bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> block_t blkaddr, int type);
> +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
> + block_t blkaddr, int type);
> int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
> int type, bool sync);
> void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 53c495651789..0e4c871d6aed 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -590,9 +590,13 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
> f2fs_set_data_blkaddr(dn, NULL_ADDR);
>
> if (__is_valid_data_blkaddr(blkaddr)) {
> - if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
> - DATA_GENERIC_ENHANCE))
> + if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
> + continue;
> + if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
> + DATA_GENERIC_ENHANCE)) {
> + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
> continue;
> + }
> if (compressed_cluster)
> valid_blocks++;
> }
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 206d03c82d96..4de5478972b2 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -44,24 +44,25 @@ static struct kmem_cache *f2fs_inode_cachep;
> #ifdef CONFIG_F2FS_FAULT_INJECTION
>
> const char *f2fs_fault_name[FAULT_MAX] = {
> - [FAULT_KMALLOC] = "kmalloc",
> - [FAULT_KVMALLOC] = "kvmalloc",
> - [FAULT_PAGE_ALLOC] = "page alloc",
> - [FAULT_PAGE_GET] = "page get",
> - [FAULT_ALLOC_NID] = "alloc nid",
> - [FAULT_ORPHAN] = "orphan",
> - [FAULT_BLOCK] = "no more block",
> - [FAULT_DIR_DEPTH] = "too big dir depth",
> - [FAULT_EVICT_INODE] = "evict_inode fail",
> - [FAULT_TRUNCATE] = "truncate fail",
> - [FAULT_READ_IO] = "read IO error",
> - [FAULT_CHECKPOINT] = "checkpoint error",
> - [FAULT_DISCARD] = "discard error",
> - [FAULT_WRITE_IO] = "write IO error",
> - [FAULT_SLAB_ALLOC] = "slab alloc",
> - [FAULT_DQUOT_INIT] = "dquot initialize",
> - [FAULT_LOCK_OP] = "lock_op",
> - [FAULT_BLKADDR] = "invalid blkaddr",
> + [FAULT_KMALLOC] = "kmalloc",
> + [FAULT_KVMALLOC] = "kvmalloc",
> + [FAULT_PAGE_ALLOC] = "page alloc",
> + [FAULT_PAGE_GET] = "page get",
> + [FAULT_ALLOC_NID] = "alloc nid",
> + [FAULT_ORPHAN] = "orphan",
> + [FAULT_BLOCK] = "no more block",
> + [FAULT_DIR_DEPTH] = "too big dir depth",
> + [FAULT_EVICT_INODE] = "evict_inode fail",
> + [FAULT_TRUNCATE] = "truncate fail",
> + [FAULT_READ_IO] = "read IO error",
> + [FAULT_CHECKPOINT] = "checkpoint error",
> + [FAULT_DISCARD] = "discard error",
> + [FAULT_WRITE_IO] = "write IO error",
> + [FAULT_SLAB_ALLOC] = "slab alloc",
> + [FAULT_DQUOT_INIT] = "dquot initialize",
> + [FAULT_LOCK_OP] = "lock_op",
> + [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr",
> + [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr",
> };
>
> void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
> --
> 2.40.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2024-01-13 01:39:56

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH v4 2/6] f2fs: compress: fix to cover normal cluster write with cp_rwsem

Cleaned up a bit:

--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1443,13 +1443,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
}

static int f2fs_write_raw_pages(struct compress_ctx *cc,
- int *submitted,
+ int *submitted_p,
struct writeback_control *wbc,
enum iostat_type io_type)
{
struct address_space *mapping = cc->inode->i_mapping;
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
- int _submitted, compr_blocks, ret = 0, i;
+ int submitted, compr_blocks, i;
+ int ret = 0;

compr_blocks = f2fs_compressed_blocks(cc);

@@ -1492,7 +1493,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
if (!clear_page_dirty_for_io(cc->rpages[i]))
goto continue_unlock;

- ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
+ ret = f2fs_write_single_data_page(cc->rpages[i], &submitted,
NULL, NULL, wbc, io_type,
compr_blocks, false);
if (ret) {
@@ -1514,7 +1515,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
goto out;
}

- *submitted += _submitted;
+ *submitted_p += submitted;
}

out:

On 01/11, Chao Yu wrote:
> When we overwrite compressed cluster w/ normal cluster, we should
> not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data
> will be corrupted if partial blocks were persisted before CP & SPOR,
> due to cluster metadata wasn't updated atomically.
>
> Fixes: 4c8ff7095bef ("f2fs: support data compression")
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/compress.c | 20 ++++++++++++++------
> fs/f2fs/data.c | 3 ++-
> 2 files changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index 9940b7886e5d..bf4cfab67aec 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -1448,7 +1448,8 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> enum iostat_type io_type)
> {
> struct address_space *mapping = cc->inode->i_mapping;
> - int _submitted, compr_blocks, ret, i;
> + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
> + int _submitted, compr_blocks, ret = 0, i;
>
> compr_blocks = f2fs_compressed_blocks(cc);
>
> @@ -1463,6 +1464,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> if (compr_blocks < 0)
> return compr_blocks;
>
> + /* overwrite compressed cluster w/ normal cluster */
> + if (compr_blocks > 0)
> + f2fs_lock_op(sbi);
> +
> for (i = 0; i < cc->cluster_size; i++) {
> if (!cc->rpages[i])
> continue;
> @@ -1495,26 +1500,29 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> unlock_page(cc->rpages[i]);
> ret = 0;
> } else if (ret == -EAGAIN) {
> + ret = 0;
> /*
> * for quota file, just redirty left pages to
> * avoid deadlock caused by cluster update race
> * from foreground operation.
> */
> if (IS_NOQUOTA(cc->inode))
> - return 0;
> - ret = 0;
> + goto out;
> f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
> goto retry_write;
> }
> - return ret;
> + goto out;
> }
>
> *submitted += _submitted;
> }
>
> - f2fs_balance_fs(F2FS_M_SB(mapping), true);
> +out:
> + if (compr_blocks > 0)
> + f2fs_unlock_op(sbi);
>
> - return 0;
> + f2fs_balance_fs(sbi, true);
> + return ret;
> }
>
> int f2fs_write_multi_pages(struct compress_ctx *cc,
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 81f9e2cc49e2..b171a9980f6a 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2839,7 +2839,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
> .encrypted_page = NULL,
> .submitted = 0,
> .compr_blocks = compr_blocks,
> - .need_lock = LOCK_RETRY,
> + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
> .post_read = f2fs_post_read_required(inode) ? 1 : 0,
> .io_type = io_type,
> .io_wbc = wbc,
> @@ -2920,6 +2920,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
> if (err == -EAGAIN) {
> err = f2fs_do_write_data_page(&fio);
> if (err == -EAGAIN) {
> + f2fs_bug_on(sbi, compr_blocks);
> fio.need_lock = LOCK_REQ;
> err = f2fs_do_write_data_page(&fio);
> }
> --
> 2.40.1

2024-01-13 02:34:07

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v4 2/6] f2fs: compress: fix to cover normal cluster write with cp_rwsem

Thanks, let me resend v5 w/ blow cleanups.

On 2024/1/13 9:39, Jaegeuk Kim wrote:
> Cleaned up a bit:
>
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -1443,13 +1443,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
> }
>
> static int f2fs_write_raw_pages(struct compress_ctx *cc,
> - int *submitted,
> + int *submitted_p,
> struct writeback_control *wbc,
> enum iostat_type io_type)
> {
> struct address_space *mapping = cc->inode->i_mapping;
> struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
> - int _submitted, compr_blocks, ret = 0, i;
> + int submitted, compr_blocks, i;
> + int ret = 0;
>
> compr_blocks = f2fs_compressed_blocks(cc);
>
> @@ -1492,7 +1493,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> if (!clear_page_dirty_for_io(cc->rpages[i]))
> goto continue_unlock;
>
> - ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
> + ret = f2fs_write_single_data_page(cc->rpages[i], &submitted,
> NULL, NULL, wbc, io_type,
> compr_blocks, false);
> if (ret) {
> @@ -1514,7 +1515,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> goto out;
> }
>
> - *submitted += _submitted;
> + *submitted_p += submitted;
> }
>
> out:
>
> On 01/11, Chao Yu wrote:
>> When we overwrite compressed cluster w/ normal cluster, we should
>> not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data
>> will be corrupted if partial blocks were persisted before CP & SPOR,
>> due to cluster metadata wasn't updated atomically.
>>
>> Fixes: 4c8ff7095bef ("f2fs: support data compression")
>> Signed-off-by: Chao Yu <[email protected]>
>> ---
>> fs/f2fs/compress.c | 20 ++++++++++++++------
>> fs/f2fs/data.c | 3 ++-
>> 2 files changed, 16 insertions(+), 7 deletions(-)
>>
>> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
>> index 9940b7886e5d..bf4cfab67aec 100644
>> --- a/fs/f2fs/compress.c
>> +++ b/fs/f2fs/compress.c
>> @@ -1448,7 +1448,8 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
>> enum iostat_type io_type)
>> {
>> struct address_space *mapping = cc->inode->i_mapping;
>> - int _submitted, compr_blocks, ret, i;
>> + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
>> + int _submitted, compr_blocks, ret = 0, i;
>>
>> compr_blocks = f2fs_compressed_blocks(cc);
>>
>> @@ -1463,6 +1464,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
>> if (compr_blocks < 0)
>> return compr_blocks;
>>
>> + /* overwrite compressed cluster w/ normal cluster */
>> + if (compr_blocks > 0)
>> + f2fs_lock_op(sbi);
>> +
>> for (i = 0; i < cc->cluster_size; i++) {
>> if (!cc->rpages[i])
>> continue;
>> @@ -1495,26 +1500,29 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
>> unlock_page(cc->rpages[i]);
>> ret = 0;
>> } else if (ret == -EAGAIN) {
>> + ret = 0;
>> /*
>> * for quota file, just redirty left pages to
>> * avoid deadlock caused by cluster update race
>> * from foreground operation.
>> */
>> if (IS_NOQUOTA(cc->inode))
>> - return 0;
>> - ret = 0;
>> + goto out;
>> f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
>> goto retry_write;
>> }
>> - return ret;
>> + goto out;
>> }
>>
>> *submitted += _submitted;
>> }
>>
>> - f2fs_balance_fs(F2FS_M_SB(mapping), true);
>> +out:
>> + if (compr_blocks > 0)
>> + f2fs_unlock_op(sbi);
>>
>> - return 0;
>> + f2fs_balance_fs(sbi, true);
>> + return ret;
>> }
>>
>> int f2fs_write_multi_pages(struct compress_ctx *cc,
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index 81f9e2cc49e2..b171a9980f6a 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -2839,7 +2839,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
>> .encrypted_page = NULL,
>> .submitted = 0,
>> .compr_blocks = compr_blocks,
>> - .need_lock = LOCK_RETRY,
>> + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
>> .post_read = f2fs_post_read_required(inode) ? 1 : 0,
>> .io_type = io_type,
>> .io_wbc = wbc,
>> @@ -2920,6 +2920,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
>> if (err == -EAGAIN) {
>> err = f2fs_do_write_data_page(&fio);
>> if (err == -EAGAIN) {
>> + f2fs_bug_on(sbi, compr_blocks);
>> fio.need_lock = LOCK_REQ;
>> err = f2fs_do_write_data_page(&fio);
>> }
>> --
>> 2.40.1