If data block in compressed cluster is not persisted with metadata
during checkpoint, after SPOR, the data may be corrupted, let's
guarantee to write compressed page by checkpoint.
Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/compress.c | 4 +++-
fs/f2fs/data.c | 17 +++++++++--------
fs/f2fs/f2fs.h | 4 +++-
3 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index c5a4364c4482..9940b7886e5d 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1418,6 +1418,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
struct f2fs_sb_info *sbi = bio->bi_private;
struct compress_io_ctx *cic =
(struct compress_io_ctx *)page_private(page);
+ enum count_type type = WB_DATA_TYPE(page,
+ f2fs_is_compressed_page(page));
int i;
if (unlikely(bio->bi_status))
@@ -1425,7 +1427,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
f2fs_compress_free_page(page);
- dec_page_count(sbi, F2FS_WB_DATA);
+ dec_page_count(sbi, type);
if (atomic_dec_return(&cic->pending_pages))
return;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index dce8defdf4c7..81f9e2cc49e2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -48,7 +48,7 @@ void f2fs_destroy_bioset(void)
bioset_exit(&f2fs_bioset);
}
-static bool __is_cp_guaranteed(struct page *page)
+bool f2fs_is_cp_guaranteed(struct page *page)
{
struct address_space *mapping = page->mapping;
struct inode *inode;
@@ -65,8 +65,6 @@ static bool __is_cp_guaranteed(struct page *page)
S_ISDIR(inode->i_mode))
return true;
- if (f2fs_is_compressed_page(page))
- return false;
if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
page_private_gcing(page))
return true;
@@ -338,7 +336,7 @@ static void f2fs_write_end_io(struct bio *bio)
bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
- enum count_type type = WB_DATA_TYPE(page);
+ enum count_type type = WB_DATA_TYPE(page, false);
if (page_private_dummy(page)) {
clear_page_private_dummy(page);
@@ -762,7 +760,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
- __read_io_type(page) : WB_DATA_TYPE(fio->page));
+ __read_io_type(page) : WB_DATA_TYPE(fio->page, false));
if (is_read_io(bio_op(bio)))
f2fs_submit_read_bio(fio->sbi, bio, fio->type);
@@ -973,7 +971,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
if (fio->io_wbc)
wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
- inc_page_count(fio->sbi, WB_DATA_TYPE(page));
+ inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
*fio->last_block = fio->new_blkaddr;
*fio->bio = bio;
@@ -1007,6 +1005,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
struct page *bio_page;
+ enum count_type type;
f2fs_bug_on(sbi, is_read_io(fio->op));
@@ -1046,7 +1045,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
/* set submitted = true as a return value */
fio->submitted = 1;
- inc_page_count(sbi, WB_DATA_TYPE(bio_page));
+ type = WB_DATA_TYPE(bio_page, fio->compressed_page);
+ inc_page_count(sbi, type);
if (io->bio &&
(!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
@@ -1059,7 +1059,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
if (F2FS_IO_ALIGNED(sbi) &&
(fio->type == DATA || fio->type == NODE) &&
fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
- dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+ dec_page_count(sbi, WB_DATA_TYPE(bio_page,
+ fio->compressed_page));
fio->retry = 1;
goto skip;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 65294e3b0bef..50f3d546ded8 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1080,7 +1080,8 @@ struct f2fs_sm_info {
* f2fs monitors the number of several block types such as on-writeback,
* dirty dentry blocks, dirty node blocks, and dirty meta blocks.
*/
-#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
+#define WB_DATA_TYPE(p, f) \
+ (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
@@ -3804,6 +3805,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi);
*/
int __init f2fs_init_bioset(void);
void f2fs_destroy_bioset(void);
+bool f2fs_is_cp_guaranteed(struct page *page);
int f2fs_init_bio_entry_cache(void);
void f2fs_destroy_bio_entry_cache(void);
void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
--
2.40.1
When we overwrite compressed cluster w/ normal cluster, we should
not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data
will be corrupted if partial blocks were persisted before CP & SPOR,
due to cluster metadata wasn't updated atomically.
Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/compress.c | 20 ++++++++++++++------
fs/f2fs/data.c | 3 ++-
2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 9940b7886e5d..bf4cfab67aec 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1448,7 +1448,8 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
enum iostat_type io_type)
{
struct address_space *mapping = cc->inode->i_mapping;
- int _submitted, compr_blocks, ret, i;
+ struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+ int _submitted, compr_blocks, ret = 0, i;
compr_blocks = f2fs_compressed_blocks(cc);
@@ -1463,6 +1464,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
if (compr_blocks < 0)
return compr_blocks;
+ /* overwrite compressed cluster w/ normal cluster */
+ if (compr_blocks > 0)
+ f2fs_lock_op(sbi);
+
for (i = 0; i < cc->cluster_size; i++) {
if (!cc->rpages[i])
continue;
@@ -1495,26 +1500,29 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
unlock_page(cc->rpages[i]);
ret = 0;
} else if (ret == -EAGAIN) {
+ ret = 0;
/*
* for quota file, just redirty left pages to
* avoid deadlock caused by cluster update race
* from foreground operation.
*/
if (IS_NOQUOTA(cc->inode))
- return 0;
- ret = 0;
+ goto out;
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
goto retry_write;
}
- return ret;
+ goto out;
}
*submitted += _submitted;
}
- f2fs_balance_fs(F2FS_M_SB(mapping), true);
+out:
+ if (compr_blocks > 0)
+ f2fs_unlock_op(sbi);
- return 0;
+ f2fs_balance_fs(sbi, true);
+ return ret;
}
int f2fs_write_multi_pages(struct compress_ctx *cc,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 81f9e2cc49e2..b171a9980f6a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2839,7 +2839,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
.encrypted_page = NULL,
.submitted = 0,
.compr_blocks = compr_blocks,
- .need_lock = LOCK_RETRY,
+ .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
.post_read = f2fs_post_read_required(inode) ? 1 : 0,
.io_type = io_type,
.io_wbc = wbc,
@@ -2920,6 +2920,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
if (err == -EAGAIN) {
err = f2fs_do_write_data_page(&fio);
if (err == -EAGAIN) {
+ f2fs_bug_on(sbi, compr_blocks);
fio.need_lock = LOCK_REQ;
err = f2fs_do_write_data_page(&fio);
}
--
2.40.1
From: Sheng Yong <[email protected]>
Compressed cluster may not be released due to we can fail in
release_compress_blocks(), fix to handle reserved compressed
cluster correctly in reserve_compress_blocks().
Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Sheng Yong <[email protected]>
Signed-off-by: Chao Yu <[email protected]>
---
v4:
- merge check condition suggested by Daeho.
fs/f2fs/file.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 026d05a7edd8..80d9c4c096f0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3624,7 +3624,13 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
goto next;
}
- if (__is_valid_data_blkaddr(blkaddr)) {
+ /*
+ * compressed cluster was not released due to it
+ * fails in release_compress_blocks(), so NEW_ADDR
+ * is a possible case.
+ */
+ if (blkaddr == NEW_ADDR ||
+ __is_valid_data_blkaddr(blkaddr)) {
compr_blocks++;
continue;
}
@@ -3633,6 +3639,9 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
}
reserved = cluster_size - compr_blocks;
+ if (!reserved)
+ goto next;
+
ret = inc_valid_block_count(sbi, dn->inode, &reserved);
if (ret)
return ret;
--
2.40.1
In reserve_compress_blocks(), we update blkaddrs of dnode in prior to
inc_valid_block_count(), it may cause inconsistent status bewteen
i_blocks and blkaddrs once inc_valid_block_count() fails.
To fix this issue, it needs to reverse their invoking order.
Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/data.c | 5 +++--
fs/f2fs/f2fs.h | 7 ++++++-
fs/f2fs/file.c | 26 ++++++++++++++------------
fs/f2fs/segment.c | 2 +-
4 files changed, 24 insertions(+), 16 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b171a9980f6a..8d2ace723310 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1219,7 +1219,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
- if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
+ err = inc_valid_block_count(sbi, dn->inode, &count, true);
+ if (unlikely(err))
return err;
trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
@@ -1476,7 +1477,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
dn->data_blkaddr = f2fs_data_blkaddr(dn);
if (dn->data_blkaddr == NULL_ADDR) {
- err = inc_valid_block_count(sbi, dn->inode, &count);
+ err = inc_valid_block_count(sbi, dn->inode, &count, true);
if (unlikely(err))
return err;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 50f3d546ded8..69e71460a950 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2252,7 +2252,7 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
- struct inode *inode, blkcnt_t *count)
+ struct inode *inode, blkcnt_t *count, bool partial)
{
blkcnt_t diff = 0, release = 0;
block_t avail_user_block_count;
@@ -2292,6 +2292,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
avail_user_block_count = 0;
}
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
+ if (!partial) {
+ spin_unlock(&sbi->stat_lock);
+ goto enospc;
+ }
+
diff = sbi->total_valid_block_count - avail_user_block_count;
if (diff > *count)
diff = *count;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 80d9c4c096f0..53c495651789 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3614,14 +3614,16 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
blkcnt_t reserved;
int ret;
- for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
- blkaddr = f2fs_data_blkaddr(dn);
+ for (i = 0; i < cluster_size; i++) {
+ blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ dn->ofs_in_node + i);
if (i == 0) {
- if (blkaddr == COMPRESS_ADDR)
- continue;
- dn->ofs_in_node += cluster_size;
- goto next;
+ if (blkaddr != COMPRESS_ADDR) {
+ dn->ofs_in_node += cluster_size;
+ goto next;
+ }
+ continue;
}
/*
@@ -3634,20 +3636,20 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
compr_blocks++;
continue;
}
-
- f2fs_set_data_blkaddr(dn, NEW_ADDR);
}
reserved = cluster_size - compr_blocks;
if (!reserved)
goto next;
- ret = inc_valid_block_count(sbi, dn->inode, &reserved);
- if (ret)
+ ret = inc_valid_block_count(sbi, dn->inode, &reserved, false);
+ if (unlikely(ret))
return ret;
- if (reserved != cluster_size - compr_blocks)
- return -ENOSPC;
+ for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
+ if (f2fs_data_blkaddr(dn) == NULL_ADDR)
+ f2fs_set_data_blkaddr(dn, NEW_ADDR);
+ }
f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4c8836ded90f..ef5b3848426b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -248,7 +248,7 @@ static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
} else {
blkcnt_t count = 1;
- err = inc_valid_block_count(sbi, inode, &count);
+ err = inc_valid_block_count(sbi, inode, &count, true);
if (err) {
f2fs_put_dnode(&dn);
return err;
--
2.40.1
verify_blkaddr() will trigger panic once we inject fault into
f2fs_is_valid_blkaddr(), fix to remove this unnecessary f2fs_bug_on().
Fixes: 18792e64c86d ("f2fs: support fault injection for f2fs_is_valid_blkaddr()")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/f2fs.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 69e71460a950..ab710bb6d8b3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3470,11 +3470,9 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type))
f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.",
blkaddr, type);
- f2fs_bug_on(sbi, 1);
- }
}
static inline bool __is_valid_data_blkaddr(block_t blkaddr)
--
2.40.1
We will encounter below inconsistent status when FAULT_BLKADDR type
fault injection is on.
Info: checkpoint state = d6 : nat_bits crc fsck compacted_summary orphan_inodes sudden-power-off
[ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c100 has i_blocks: 000000c0, but has 191 blocks
[FIX] (fsck_chk_inode_blk:1260) --> [0x1c100] i_blocks=0x000000c0 -> 0xbf
[FIX] (fsck_chk_inode_blk:1269) --> [0x1c100] i_compr_blocks=0x00000026 -> 0x27
[ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1cadb has i_blocks: 0000002f, but has 46 blocks
[FIX] (fsck_chk_inode_blk:1260) --> [0x1cadb] i_blocks=0x0000002f -> 0x2e
[FIX] (fsck_chk_inode_blk:1269) --> [0x1cadb] i_compr_blocks=0x00000011 -> 0x12
[ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c62c has i_blocks: 00000002, but has 1 blocks
[FIX] (fsck_chk_inode_blk:1260) --> [0x1c62c] i_blocks=0x00000002 -> 0x1
After we inject fault into f2fs_is_valid_blkaddr() during truncation,
a) it missed to increase @nr_free or @valid_blocks
b) it can cause in blkaddr leak in truncated dnode
Which may cause inconsistent status.
This patch separates FAULT_BLKADDR_CONSISTENCE from FAULT_BLKADDR,
and rename FAULT_BLKADDR to FAULT_BLKADDR_VALIDITY
so that we can:
a) use FAULT_BLKADDR_CONSISTENCE in f2fs_truncate_data_blocks_range()
to simulate inconsistent issue independently, then it can verify fsck
repair flow.
b) FAULT_BLKADDR_VALIDITY fault will not cause any inconsistent status,
we can just use it to check error path handling in kernel side.
Signed-off-by: Chao Yu <[email protected]>
---
v4:
- rename macro to FAULT_BLKADDR_CONSISTENCE and FAULT_BLKADDR_VALIDITY
suggested by Jaegeuk.
Documentation/ABI/testing/sysfs-fs-f2fs | 47 +++++++++++++------------
Documentation/filesystems/f2fs.rst | 47 +++++++++++++------------
fs/f2fs/checkpoint.c | 19 +++++++---
fs/f2fs/f2fs.h | 5 ++-
fs/f2fs/file.c | 8 +++--
fs/f2fs/super.c | 37 +++++++++----------
6 files changed, 92 insertions(+), 71 deletions(-)
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 99fa87a43926..48c135e24eb5 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -701,29 +701,30 @@ Description: Support configuring fault injection type, should be
enabled with fault_injection option, fault type value
is shown below, it supports single or combined type.
- =================== ===========
- Type_Name Type_Value
- =================== ===========
- FAULT_KMALLOC 0x000000001
- FAULT_KVMALLOC 0x000000002
- FAULT_PAGE_ALLOC 0x000000004
- FAULT_PAGE_GET 0x000000008
- FAULT_ALLOC_BIO 0x000000010 (obsolete)
- FAULT_ALLOC_NID 0x000000020
- FAULT_ORPHAN 0x000000040
- FAULT_BLOCK 0x000000080
- FAULT_DIR_DEPTH 0x000000100
- FAULT_EVICT_INODE 0x000000200
- FAULT_TRUNCATE 0x000000400
- FAULT_READ_IO 0x000000800
- FAULT_CHECKPOINT 0x000001000
- FAULT_DISCARD 0x000002000
- FAULT_WRITE_IO 0x000004000
- FAULT_SLAB_ALLOC 0x000008000
- FAULT_DQUOT_INIT 0x000010000
- FAULT_LOCK_OP 0x000020000
- FAULT_BLKADDR 0x000040000
- =================== ===========
+ =========================== ===========
+ Type_Name Type_Value
+ =========================== ===========
+ FAULT_KMALLOC 0x000000001
+ FAULT_KVMALLOC 0x000000002
+ FAULT_PAGE_ALLOC 0x000000004
+ FAULT_PAGE_GET 0x000000008
+ FAULT_ALLOC_BIO 0x000000010 (obsolete)
+ FAULT_ALLOC_NID 0x000000020
+ FAULT_ORPHAN 0x000000040
+ FAULT_BLOCK 0x000000080
+ FAULT_DIR_DEPTH 0x000000100
+ FAULT_EVICT_INODE 0x000000200
+ FAULT_TRUNCATE 0x000000400
+ FAULT_READ_IO 0x000000800
+ FAULT_CHECKPOINT 0x000001000
+ FAULT_DISCARD 0x000002000
+ FAULT_WRITE_IO 0x000004000
+ FAULT_SLAB_ALLOC 0x000008000
+ FAULT_DQUOT_INIT 0x000010000
+ FAULT_LOCK_OP 0x000020000
+ FAULT_BLKADDR_VALIDITY 0x000040000
+ FAULT_BLKADDR_CONSISTENCE 0x000080000
+ =========================== ===========
What: /sys/fs/f2fs/<disk>/discard_io_aware_gran
Date: January 2023
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index d32c6209685d..32cbfa864f38 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -184,29 +184,30 @@ fault_type=%d Support configuring fault injection type, should be
enabled with fault_injection option, fault type value
is shown below, it supports single or combined type.
- =================== ===========
- Type_Name Type_Value
- =================== ===========
- FAULT_KMALLOC 0x000000001
- FAULT_KVMALLOC 0x000000002
- FAULT_PAGE_ALLOC 0x000000004
- FAULT_PAGE_GET 0x000000008
- FAULT_ALLOC_BIO 0x000000010 (obsolete)
- FAULT_ALLOC_NID 0x000000020
- FAULT_ORPHAN 0x000000040
- FAULT_BLOCK 0x000000080
- FAULT_DIR_DEPTH 0x000000100
- FAULT_EVICT_INODE 0x000000200
- FAULT_TRUNCATE 0x000000400
- FAULT_READ_IO 0x000000800
- FAULT_CHECKPOINT 0x000001000
- FAULT_DISCARD 0x000002000
- FAULT_WRITE_IO 0x000004000
- FAULT_SLAB_ALLOC 0x000008000
- FAULT_DQUOT_INIT 0x000010000
- FAULT_LOCK_OP 0x000020000
- FAULT_BLKADDR 0x000040000
- =================== ===========
+ =========================== ===========
+ Type_Name Type_Value
+ =========================== ===========
+ FAULT_KMALLOC 0x000000001
+ FAULT_KVMALLOC 0x000000002
+ FAULT_PAGE_ALLOC 0x000000004
+ FAULT_PAGE_GET 0x000000008
+ FAULT_ALLOC_BIO 0x000000010 (obsolete)
+ FAULT_ALLOC_NID 0x000000020
+ FAULT_ORPHAN 0x000000040
+ FAULT_BLOCK 0x000000080
+ FAULT_DIR_DEPTH 0x000000100
+ FAULT_EVICT_INODE 0x000000200
+ FAULT_TRUNCATE 0x000000400
+ FAULT_READ_IO 0x000000800
+ FAULT_CHECKPOINT 0x000001000
+ FAULT_DISCARD 0x000002000
+ FAULT_WRITE_IO 0x000004000
+ FAULT_SLAB_ALLOC 0x000008000
+ FAULT_DQUOT_INIT 0x000010000
+ FAULT_LOCK_OP 0x000020000
+ FAULT_BLKADDR_VALIDITY 0x000040000
+ FAULT_BLKADDR_CONSISTENCE 0x000080000
+ =========================== ===========
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index b0597a539fc5..b85820e70f5e 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -170,12 +170,9 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
return exist;
}
-bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
- if (time_to_inject(sbi, FAULT_BLKADDR))
- return false;
-
switch (type) {
case META_NAT:
break;
@@ -230,6 +227,20 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
return true;
}
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (time_to_inject(sbi, FAULT_BLKADDR_VALIDITY))
+ return false;
+ return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
+}
+
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
+}
+
/*
* Readahead CP/NAT/SIT/SSA/POR pages
*/
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ab710bb6d8b3..4481f68d6418 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -60,7 +60,8 @@ enum {
FAULT_SLAB_ALLOC,
FAULT_DQUOT_INIT,
FAULT_LOCK_OP,
- FAULT_BLKADDR,
+ FAULT_BLKADDR_VALIDITY,
+ FAULT_BLKADDR_CONSISTENCE,
FAULT_MAX,
};
@@ -3768,6 +3769,8 @@ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 53c495651789..0e4c871d6aed 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -590,9 +590,13 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
f2fs_set_data_blkaddr(dn, NULL_ADDR);
if (__is_valid_data_blkaddr(blkaddr)) {
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
- DATA_GENERIC_ENHANCE))
+ if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
+ continue;
+ if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
continue;
+ }
if (compressed_cluster)
valid_blocks++;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 206d03c82d96..4de5478972b2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -44,24 +44,25 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
const char *f2fs_fault_name[FAULT_MAX] = {
- [FAULT_KMALLOC] = "kmalloc",
- [FAULT_KVMALLOC] = "kvmalloc",
- [FAULT_PAGE_ALLOC] = "page alloc",
- [FAULT_PAGE_GET] = "page get",
- [FAULT_ALLOC_NID] = "alloc nid",
- [FAULT_ORPHAN] = "orphan",
- [FAULT_BLOCK] = "no more block",
- [FAULT_DIR_DEPTH] = "too big dir depth",
- [FAULT_EVICT_INODE] = "evict_inode fail",
- [FAULT_TRUNCATE] = "truncate fail",
- [FAULT_READ_IO] = "read IO error",
- [FAULT_CHECKPOINT] = "checkpoint error",
- [FAULT_DISCARD] = "discard error",
- [FAULT_WRITE_IO] = "write IO error",
- [FAULT_SLAB_ALLOC] = "slab alloc",
- [FAULT_DQUOT_INIT] = "dquot initialize",
- [FAULT_LOCK_OP] = "lock_op",
- [FAULT_BLKADDR] = "invalid blkaddr",
+ [FAULT_KMALLOC] = "kmalloc",
+ [FAULT_KVMALLOC] = "kvmalloc",
+ [FAULT_PAGE_ALLOC] = "page alloc",
+ [FAULT_PAGE_GET] = "page get",
+ [FAULT_ALLOC_NID] = "alloc nid",
+ [FAULT_ORPHAN] = "orphan",
+ [FAULT_BLOCK] = "no more block",
+ [FAULT_DIR_DEPTH] = "too big dir depth",
+ [FAULT_EVICT_INODE] = "evict_inode fail",
+ [FAULT_TRUNCATE] = "truncate fail",
+ [FAULT_READ_IO] = "read IO error",
+ [FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
+ [FAULT_WRITE_IO] = "write IO error",
+ [FAULT_SLAB_ALLOC] = "slab alloc",
+ [FAULT_DQUOT_INIT] = "dquot initialize",
+ [FAULT_LOCK_OP] = "lock_op",
+ [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr",
+ [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr",
};
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
--
2.40.1
Reviewed-by: Daeho Jeong <[email protected]>
On Wed, Jan 10, 2024 at 10:43 PM Chao Yu <[email protected]> wrote:
>
> When we overwrite compressed cluster w/ normal cluster, we should
> not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data
> will be corrupted if partial blocks were persisted before CP & SPOR,
> due to cluster metadata wasn't updated atomically.
>
> Fixes: 4c8ff7095bef ("f2fs: support data compression")
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/compress.c | 20 ++++++++++++++------
> fs/f2fs/data.c | 3 ++-
> 2 files changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index 9940b7886e5d..bf4cfab67aec 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -1448,7 +1448,8 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> enum iostat_type io_type)
> {
> struct address_space *mapping = cc->inode->i_mapping;
> - int _submitted, compr_blocks, ret, i;
> + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
> + int _submitted, compr_blocks, ret = 0, i;
>
> compr_blocks = f2fs_compressed_blocks(cc);
>
> @@ -1463,6 +1464,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> if (compr_blocks < 0)
> return compr_blocks;
>
> + /* overwrite compressed cluster w/ normal cluster */
> + if (compr_blocks > 0)
> + f2fs_lock_op(sbi);
> +
> for (i = 0; i < cc->cluster_size; i++) {
> if (!cc->rpages[i])
> continue;
> @@ -1495,26 +1500,29 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> unlock_page(cc->rpages[i]);
> ret = 0;
> } else if (ret == -EAGAIN) {
> + ret = 0;
> /*
> * for quota file, just redirty left pages to
> * avoid deadlock caused by cluster update race
> * from foreground operation.
> */
> if (IS_NOQUOTA(cc->inode))
> - return 0;
> - ret = 0;
> + goto out;
> f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
> goto retry_write;
> }
> - return ret;
> + goto out;
> }
>
> *submitted += _submitted;
> }
>
> - f2fs_balance_fs(F2FS_M_SB(mapping), true);
> +out:
> + if (compr_blocks > 0)
> + f2fs_unlock_op(sbi);
>
> - return 0;
> + f2fs_balance_fs(sbi, true);
> + return ret;
> }
>
> int f2fs_write_multi_pages(struct compress_ctx *cc,
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 81f9e2cc49e2..b171a9980f6a 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2839,7 +2839,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
> .encrypted_page = NULL,
> .submitted = 0,
> .compr_blocks = compr_blocks,
> - .need_lock = LOCK_RETRY,
> + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
> .post_read = f2fs_post_read_required(inode) ? 1 : 0,
> .io_type = io_type,
> .io_wbc = wbc,
> @@ -2920,6 +2920,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
> if (err == -EAGAIN) {
> err = f2fs_do_write_data_page(&fio);
> if (err == -EAGAIN) {
> + f2fs_bug_on(sbi, compr_blocks);
> fio.need_lock = LOCK_REQ;
> err = f2fs_do_write_data_page(&fio);
> }
> --
> 2.40.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Reviewed-by: Daeho Jeong <[email protected]>
On Wed, Jan 10, 2024 at 10:43 PM Chao Yu <[email protected]> wrote:
>
> In reserve_compress_blocks(), we update blkaddrs of dnode in prior to
> inc_valid_block_count(), it may cause inconsistent status bewteen
> i_blocks and blkaddrs once inc_valid_block_count() fails.
>
> To fix this issue, it needs to reverse their invoking order.
>
> Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS")
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/data.c | 5 +++--
> fs/f2fs/f2fs.h | 7 ++++++-
> fs/f2fs/file.c | 26 ++++++++++++++------------
> fs/f2fs/segment.c | 2 +-
> 4 files changed, 24 insertions(+), 16 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index b171a9980f6a..8d2ace723310 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1219,7 +1219,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
>
> if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
> return -EPERM;
> - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
> + err = inc_valid_block_count(sbi, dn->inode, &count, true);
> + if (unlikely(err))
> return err;
>
> trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
> @@ -1476,7 +1477,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
>
> dn->data_blkaddr = f2fs_data_blkaddr(dn);
> if (dn->data_blkaddr == NULL_ADDR) {
> - err = inc_valid_block_count(sbi, dn->inode, &count);
> + err = inc_valid_block_count(sbi, dn->inode, &count, true);
> if (unlikely(err))
> return err;
> }
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 50f3d546ded8..69e71460a950 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -2252,7 +2252,7 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
>
> static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
> static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
> - struct inode *inode, blkcnt_t *count)
> + struct inode *inode, blkcnt_t *count, bool partial)
> {
> blkcnt_t diff = 0, release = 0;
> block_t avail_user_block_count;
> @@ -2292,6 +2292,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
> avail_user_block_count = 0;
> }
> if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
> + if (!partial) {
> + spin_unlock(&sbi->stat_lock);
> + goto enospc;
> + }
> +
> diff = sbi->total_valid_block_count - avail_user_block_count;
> if (diff > *count)
> diff = *count;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 80d9c4c096f0..53c495651789 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -3614,14 +3614,16 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
> blkcnt_t reserved;
> int ret;
>
> - for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
> - blkaddr = f2fs_data_blkaddr(dn);
> + for (i = 0; i < cluster_size; i++) {
> + blkaddr = data_blkaddr(dn->inode, dn->node_page,
> + dn->ofs_in_node + i);
>
> if (i == 0) {
> - if (blkaddr == COMPRESS_ADDR)
> - continue;
> - dn->ofs_in_node += cluster_size;
> - goto next;
> + if (blkaddr != COMPRESS_ADDR) {
> + dn->ofs_in_node += cluster_size;
> + goto next;
> + }
> + continue;
> }
>
> /*
> @@ -3634,20 +3636,20 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
> compr_blocks++;
> continue;
> }
> -
> - f2fs_set_data_blkaddr(dn, NEW_ADDR);
> }
>
> reserved = cluster_size - compr_blocks;
> if (!reserved)
> goto next;
>
> - ret = inc_valid_block_count(sbi, dn->inode, &reserved);
> - if (ret)
> + ret = inc_valid_block_count(sbi, dn->inode, &reserved, false);
> + if (unlikely(ret))
> return ret;
>
> - if (reserved != cluster_size - compr_blocks)
> - return -ENOSPC;
> + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
> + if (f2fs_data_blkaddr(dn) == NULL_ADDR)
> + f2fs_set_data_blkaddr(dn, NEW_ADDR);
> + }
>
> f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4c8836ded90f..ef5b3848426b 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -248,7 +248,7 @@ static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
> } else {
> blkcnt_t count = 1;
>
> - err = inc_valid_block_count(sbi, inode, &count);
> + err = inc_valid_block_count(sbi, inode, &count, true);
> if (err) {
> f2fs_put_dnode(&dn);
> return err;
> --
> 2.40.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Reviewed-by: Daeho Jeong <[email protected]>
On Wed, Jan 10, 2024 at 10:43 PM Chao Yu <[email protected]> wrote:
>
> verify_blkaddr() will trigger panic once we inject fault into
> f2fs_is_valid_blkaddr(), fix to remove this unnecessary f2fs_bug_on().
>
> Fixes: 18792e64c86d ("f2fs: support fault injection for f2fs_is_valid_blkaddr()")
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/f2fs.h | 4 +---
> 1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 69e71460a950..ab710bb6d8b3 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3470,11 +3470,9 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
> block_t blkaddr, int type)
> {
> - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
> + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type))
> f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.",
> blkaddr, type);
> - f2fs_bug_on(sbi, 1);
> - }
> }
>
> static inline bool __is_valid_data_blkaddr(block_t blkaddr)
> --
> 2.40.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Reviewed-by: Daeho Jeong <[email protected]>
On Wed, Jan 10, 2024 at 10:43 PM Chao Yu <[email protected]> wrote:
>
> We will encounter below inconsistent status when FAULT_BLKADDR type
> fault injection is on.
>
> Info: checkpoint state = d6 : nat_bits crc fsck compacted_summary orphan_inodes sudden-power-off
> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c100 has i_blocks: 000000c0, but has 191 blocks
> [FIX] (fsck_chk_inode_blk:1260) --> [0x1c100] i_blocks=0x000000c0 -> 0xbf
> [FIX] (fsck_chk_inode_blk:1269) --> [0x1c100] i_compr_blocks=0x00000026 -> 0x27
> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1cadb has i_blocks: 0000002f, but has 46 blocks
> [FIX] (fsck_chk_inode_blk:1260) --> [0x1cadb] i_blocks=0x0000002f -> 0x2e
> [FIX] (fsck_chk_inode_blk:1269) --> [0x1cadb] i_compr_blocks=0x00000011 -> 0x12
> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c62c has i_blocks: 00000002, but has 1 blocks
> [FIX] (fsck_chk_inode_blk:1260) --> [0x1c62c] i_blocks=0x00000002 -> 0x1
>
> After we inject fault into f2fs_is_valid_blkaddr() during truncation,
> a) it missed to increase @nr_free or @valid_blocks
> b) it can cause in blkaddr leak in truncated dnode
> Which may cause inconsistent status.
>
> This patch separates FAULT_BLKADDR_CONSISTENCE from FAULT_BLKADDR,
> and rename FAULT_BLKADDR to FAULT_BLKADDR_VALIDITY
> so that we can:
> a) use FAULT_BLKADDR_CONSISTENCE in f2fs_truncate_data_blocks_range()
> to simulate inconsistent issue independently, then it can verify fsck
> repair flow.
> b) FAULT_BLKADDR_VALIDITY fault will not cause any inconsistent status,
> we can just use it to check error path handling in kernel side.
>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> v4:
> - rename macro to FAULT_BLKADDR_CONSISTENCE and FAULT_BLKADDR_VALIDITY
> suggested by Jaegeuk.
> Documentation/ABI/testing/sysfs-fs-f2fs | 47 +++++++++++++------------
> Documentation/filesystems/f2fs.rst | 47 +++++++++++++------------
> fs/f2fs/checkpoint.c | 19 +++++++---
> fs/f2fs/f2fs.h | 5 ++-
> fs/f2fs/file.c | 8 +++--
> fs/f2fs/super.c | 37 +++++++++----------
> 6 files changed, 92 insertions(+), 71 deletions(-)
>
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> index 99fa87a43926..48c135e24eb5 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -701,29 +701,30 @@ Description: Support configuring fault injection type, should be
> enabled with fault_injection option, fault type value
> is shown below, it supports single or combined type.
>
> - =================== ===========
> - Type_Name Type_Value
> - =================== ===========
> - FAULT_KMALLOC 0x000000001
> - FAULT_KVMALLOC 0x000000002
> - FAULT_PAGE_ALLOC 0x000000004
> - FAULT_PAGE_GET 0x000000008
> - FAULT_ALLOC_BIO 0x000000010 (obsolete)
> - FAULT_ALLOC_NID 0x000000020
> - FAULT_ORPHAN 0x000000040
> - FAULT_BLOCK 0x000000080
> - FAULT_DIR_DEPTH 0x000000100
> - FAULT_EVICT_INODE 0x000000200
> - FAULT_TRUNCATE 0x000000400
> - FAULT_READ_IO 0x000000800
> - FAULT_CHECKPOINT 0x000001000
> - FAULT_DISCARD 0x000002000
> - FAULT_WRITE_IO 0x000004000
> - FAULT_SLAB_ALLOC 0x000008000
> - FAULT_DQUOT_INIT 0x000010000
> - FAULT_LOCK_OP 0x000020000
> - FAULT_BLKADDR 0x000040000
> - =================== ===========
> + =========================== ===========
> + Type_Name Type_Value
> + =========================== ===========
> + FAULT_KMALLOC 0x000000001
> + FAULT_KVMALLOC 0x000000002
> + FAULT_PAGE_ALLOC 0x000000004
> + FAULT_PAGE_GET 0x000000008
> + FAULT_ALLOC_BIO 0x000000010 (obsolete)
> + FAULT_ALLOC_NID 0x000000020
> + FAULT_ORPHAN 0x000000040
> + FAULT_BLOCK 0x000000080
> + FAULT_DIR_DEPTH 0x000000100
> + FAULT_EVICT_INODE 0x000000200
> + FAULT_TRUNCATE 0x000000400
> + FAULT_READ_IO 0x000000800
> + FAULT_CHECKPOINT 0x000001000
> + FAULT_DISCARD 0x000002000
> + FAULT_WRITE_IO 0x000004000
> + FAULT_SLAB_ALLOC 0x000008000
> + FAULT_DQUOT_INIT 0x000010000
> + FAULT_LOCK_OP 0x000020000
> + FAULT_BLKADDR_VALIDITY 0x000040000
> + FAULT_BLKADDR_CONSISTENCE 0x000080000
> + =========================== ===========
>
> What: /sys/fs/f2fs/<disk>/discard_io_aware_gran
> Date: January 2023
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index d32c6209685d..32cbfa864f38 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -184,29 +184,30 @@ fault_type=%d Support configuring fault injection type, should be
> enabled with fault_injection option, fault type value
> is shown below, it supports single or combined type.
>
> - =================== ===========
> - Type_Name Type_Value
> - =================== ===========
> - FAULT_KMALLOC 0x000000001
> - FAULT_KVMALLOC 0x000000002
> - FAULT_PAGE_ALLOC 0x000000004
> - FAULT_PAGE_GET 0x000000008
> - FAULT_ALLOC_BIO 0x000000010 (obsolete)
> - FAULT_ALLOC_NID 0x000000020
> - FAULT_ORPHAN 0x000000040
> - FAULT_BLOCK 0x000000080
> - FAULT_DIR_DEPTH 0x000000100
> - FAULT_EVICT_INODE 0x000000200
> - FAULT_TRUNCATE 0x000000400
> - FAULT_READ_IO 0x000000800
> - FAULT_CHECKPOINT 0x000001000
> - FAULT_DISCARD 0x000002000
> - FAULT_WRITE_IO 0x000004000
> - FAULT_SLAB_ALLOC 0x000008000
> - FAULT_DQUOT_INIT 0x000010000
> - FAULT_LOCK_OP 0x000020000
> - FAULT_BLKADDR 0x000040000
> - =================== ===========
> + =========================== ===========
> + Type_Name Type_Value
> + =========================== ===========
> + FAULT_KMALLOC 0x000000001
> + FAULT_KVMALLOC 0x000000002
> + FAULT_PAGE_ALLOC 0x000000004
> + FAULT_PAGE_GET 0x000000008
> + FAULT_ALLOC_BIO 0x000000010 (obsolete)
> + FAULT_ALLOC_NID 0x000000020
> + FAULT_ORPHAN 0x000000040
> + FAULT_BLOCK 0x000000080
> + FAULT_DIR_DEPTH 0x000000100
> + FAULT_EVICT_INODE 0x000000200
> + FAULT_TRUNCATE 0x000000400
> + FAULT_READ_IO 0x000000800
> + FAULT_CHECKPOINT 0x000001000
> + FAULT_DISCARD 0x000002000
> + FAULT_WRITE_IO 0x000004000
> + FAULT_SLAB_ALLOC 0x000008000
> + FAULT_DQUOT_INIT 0x000010000
> + FAULT_LOCK_OP 0x000020000
> + FAULT_BLKADDR_VALIDITY 0x000040000
> + FAULT_BLKADDR_CONSISTENCE 0x000080000
> + =========================== ===========
> mode=%s Control block allocation mode which supports "adaptive"
> and "lfs". In "lfs" mode, there should be no random
> writes towards main area.
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index b0597a539fc5..b85820e70f5e 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -170,12 +170,9 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
> return exist;
> }
>
> -bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> +static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> block_t blkaddr, int type)
> {
> - if (time_to_inject(sbi, FAULT_BLKADDR))
> - return false;
> -
> switch (type) {
> case META_NAT:
> break;
> @@ -230,6 +227,20 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> return true;
> }
>
> +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> + block_t blkaddr, int type)
> +{
> + if (time_to_inject(sbi, FAULT_BLKADDR_VALIDITY))
> + return false;
> + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
> +}
> +
> +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
> + block_t blkaddr, int type)
> +{
> + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
> +}
> +
> /*
> * Readahead CP/NAT/SIT/SSA/POR pages
> */
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ab710bb6d8b3..4481f68d6418 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -60,7 +60,8 @@ enum {
> FAULT_SLAB_ALLOC,
> FAULT_DQUOT_INIT,
> FAULT_LOCK_OP,
> - FAULT_BLKADDR,
> + FAULT_BLKADDR_VALIDITY,
> + FAULT_BLKADDR_CONSISTENCE,
> FAULT_MAX,
> };
>
> @@ -3768,6 +3769,8 @@ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
> struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
> bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> block_t blkaddr, int type);
> +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
> + block_t blkaddr, int type);
> int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
> int type, bool sync);
> void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 53c495651789..0e4c871d6aed 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -590,9 +590,13 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
> f2fs_set_data_blkaddr(dn, NULL_ADDR);
>
> if (__is_valid_data_blkaddr(blkaddr)) {
> - if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
> - DATA_GENERIC_ENHANCE))
> + if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
> + continue;
> + if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
> + DATA_GENERIC_ENHANCE)) {
> + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
> continue;
> + }
> if (compressed_cluster)
> valid_blocks++;
> }
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 206d03c82d96..4de5478972b2 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -44,24 +44,25 @@ static struct kmem_cache *f2fs_inode_cachep;
> #ifdef CONFIG_F2FS_FAULT_INJECTION
>
> const char *f2fs_fault_name[FAULT_MAX] = {
> - [FAULT_KMALLOC] = "kmalloc",
> - [FAULT_KVMALLOC] = "kvmalloc",
> - [FAULT_PAGE_ALLOC] = "page alloc",
> - [FAULT_PAGE_GET] = "page get",
> - [FAULT_ALLOC_NID] = "alloc nid",
> - [FAULT_ORPHAN] = "orphan",
> - [FAULT_BLOCK] = "no more block",
> - [FAULT_DIR_DEPTH] = "too big dir depth",
> - [FAULT_EVICT_INODE] = "evict_inode fail",
> - [FAULT_TRUNCATE] = "truncate fail",
> - [FAULT_READ_IO] = "read IO error",
> - [FAULT_CHECKPOINT] = "checkpoint error",
> - [FAULT_DISCARD] = "discard error",
> - [FAULT_WRITE_IO] = "write IO error",
> - [FAULT_SLAB_ALLOC] = "slab alloc",
> - [FAULT_DQUOT_INIT] = "dquot initialize",
> - [FAULT_LOCK_OP] = "lock_op",
> - [FAULT_BLKADDR] = "invalid blkaddr",
> + [FAULT_KMALLOC] = "kmalloc",
> + [FAULT_KVMALLOC] = "kvmalloc",
> + [FAULT_PAGE_ALLOC] = "page alloc",
> + [FAULT_PAGE_GET] = "page get",
> + [FAULT_ALLOC_NID] = "alloc nid",
> + [FAULT_ORPHAN] = "orphan",
> + [FAULT_BLOCK] = "no more block",
> + [FAULT_DIR_DEPTH] = "too big dir depth",
> + [FAULT_EVICT_INODE] = "evict_inode fail",
> + [FAULT_TRUNCATE] = "truncate fail",
> + [FAULT_READ_IO] = "read IO error",
> + [FAULT_CHECKPOINT] = "checkpoint error",
> + [FAULT_DISCARD] = "discard error",
> + [FAULT_WRITE_IO] = "write IO error",
> + [FAULT_SLAB_ALLOC] = "slab alloc",
> + [FAULT_DQUOT_INIT] = "dquot initialize",
> + [FAULT_LOCK_OP] = "lock_op",
> + [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr",
> + [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr",
> };
>
> void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
> --
> 2.40.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Cleaned up a bit:
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1443,13 +1443,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
}
static int f2fs_write_raw_pages(struct compress_ctx *cc,
- int *submitted,
+ int *submitted_p,
struct writeback_control *wbc,
enum iostat_type io_type)
{
struct address_space *mapping = cc->inode->i_mapping;
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
- int _submitted, compr_blocks, ret = 0, i;
+ int submitted, compr_blocks, i;
+ int ret = 0;
compr_blocks = f2fs_compressed_blocks(cc);
@@ -1492,7 +1493,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
if (!clear_page_dirty_for_io(cc->rpages[i]))
goto continue_unlock;
- ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
+ ret = f2fs_write_single_data_page(cc->rpages[i], &submitted,
NULL, NULL, wbc, io_type,
compr_blocks, false);
if (ret) {
@@ -1514,7 +1515,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
goto out;
}
- *submitted += _submitted;
+ *submitted_p += submitted;
}
out:
On 01/11, Chao Yu wrote:
> When we overwrite compressed cluster w/ normal cluster, we should
> not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data
> will be corrupted if partial blocks were persisted before CP & SPOR,
> due to cluster metadata wasn't updated atomically.
>
> Fixes: 4c8ff7095bef ("f2fs: support data compression")
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/compress.c | 20 ++++++++++++++------
> fs/f2fs/data.c | 3 ++-
> 2 files changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index 9940b7886e5d..bf4cfab67aec 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -1448,7 +1448,8 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> enum iostat_type io_type)
> {
> struct address_space *mapping = cc->inode->i_mapping;
> - int _submitted, compr_blocks, ret, i;
> + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
> + int _submitted, compr_blocks, ret = 0, i;
>
> compr_blocks = f2fs_compressed_blocks(cc);
>
> @@ -1463,6 +1464,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> if (compr_blocks < 0)
> return compr_blocks;
>
> + /* overwrite compressed cluster w/ normal cluster */
> + if (compr_blocks > 0)
> + f2fs_lock_op(sbi);
> +
> for (i = 0; i < cc->cluster_size; i++) {
> if (!cc->rpages[i])
> continue;
> @@ -1495,26 +1500,29 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> unlock_page(cc->rpages[i]);
> ret = 0;
> } else if (ret == -EAGAIN) {
> + ret = 0;
> /*
> * for quota file, just redirty left pages to
> * avoid deadlock caused by cluster update race
> * from foreground operation.
> */
> if (IS_NOQUOTA(cc->inode))
> - return 0;
> - ret = 0;
> + goto out;
> f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
> goto retry_write;
> }
> - return ret;
> + goto out;
> }
>
> *submitted += _submitted;
> }
>
> - f2fs_balance_fs(F2FS_M_SB(mapping), true);
> +out:
> + if (compr_blocks > 0)
> + f2fs_unlock_op(sbi);
>
> - return 0;
> + f2fs_balance_fs(sbi, true);
> + return ret;
> }
>
> int f2fs_write_multi_pages(struct compress_ctx *cc,
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 81f9e2cc49e2..b171a9980f6a 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2839,7 +2839,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
> .encrypted_page = NULL,
> .submitted = 0,
> .compr_blocks = compr_blocks,
> - .need_lock = LOCK_RETRY,
> + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
> .post_read = f2fs_post_read_required(inode) ? 1 : 0,
> .io_type = io_type,
> .io_wbc = wbc,
> @@ -2920,6 +2920,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
> if (err == -EAGAIN) {
> err = f2fs_do_write_data_page(&fio);
> if (err == -EAGAIN) {
> + f2fs_bug_on(sbi, compr_blocks);
> fio.need_lock = LOCK_REQ;
> err = f2fs_do_write_data_page(&fio);
> }
> --
> 2.40.1
Thanks, let me resend v5 w/ blow cleanups.
On 2024/1/13 9:39, Jaegeuk Kim wrote:
> Cleaned up a bit:
>
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -1443,13 +1443,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
> }
>
> static int f2fs_write_raw_pages(struct compress_ctx *cc,
> - int *submitted,
> + int *submitted_p,
> struct writeback_control *wbc,
> enum iostat_type io_type)
> {
> struct address_space *mapping = cc->inode->i_mapping;
> struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
> - int _submitted, compr_blocks, ret = 0, i;
> + int submitted, compr_blocks, i;
> + int ret = 0;
>
> compr_blocks = f2fs_compressed_blocks(cc);
>
> @@ -1492,7 +1493,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> if (!clear_page_dirty_for_io(cc->rpages[i]))
> goto continue_unlock;
>
> - ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
> + ret = f2fs_write_single_data_page(cc->rpages[i], &submitted,
> NULL, NULL, wbc, io_type,
> compr_blocks, false);
> if (ret) {
> @@ -1514,7 +1515,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
> goto out;
> }
>
> - *submitted += _submitted;
> + *submitted_p += submitted;
> }
>
> out:
>
> On 01/11, Chao Yu wrote:
>> When we overwrite compressed cluster w/ normal cluster, we should
>> not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data
>> will be corrupted if partial blocks were persisted before CP & SPOR,
>> due to cluster metadata wasn't updated atomically.
>>
>> Fixes: 4c8ff7095bef ("f2fs: support data compression")
>> Signed-off-by: Chao Yu <[email protected]>
>> ---
>> fs/f2fs/compress.c | 20 ++++++++++++++------
>> fs/f2fs/data.c | 3 ++-
>> 2 files changed, 16 insertions(+), 7 deletions(-)
>>
>> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
>> index 9940b7886e5d..bf4cfab67aec 100644
>> --- a/fs/f2fs/compress.c
>> +++ b/fs/f2fs/compress.c
>> @@ -1448,7 +1448,8 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
>> enum iostat_type io_type)
>> {
>> struct address_space *mapping = cc->inode->i_mapping;
>> - int _submitted, compr_blocks, ret, i;
>> + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
>> + int _submitted, compr_blocks, ret = 0, i;
>>
>> compr_blocks = f2fs_compressed_blocks(cc);
>>
>> @@ -1463,6 +1464,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
>> if (compr_blocks < 0)
>> return compr_blocks;
>>
>> + /* overwrite compressed cluster w/ normal cluster */
>> + if (compr_blocks > 0)
>> + f2fs_lock_op(sbi);
>> +
>> for (i = 0; i < cc->cluster_size; i++) {
>> if (!cc->rpages[i])
>> continue;
>> @@ -1495,26 +1500,29 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
>> unlock_page(cc->rpages[i]);
>> ret = 0;
>> } else if (ret == -EAGAIN) {
>> + ret = 0;
>> /*
>> * for quota file, just redirty left pages to
>> * avoid deadlock caused by cluster update race
>> * from foreground operation.
>> */
>> if (IS_NOQUOTA(cc->inode))
>> - return 0;
>> - ret = 0;
>> + goto out;
>> f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
>> goto retry_write;
>> }
>> - return ret;
>> + goto out;
>> }
>>
>> *submitted += _submitted;
>> }
>>
>> - f2fs_balance_fs(F2FS_M_SB(mapping), true);
>> +out:
>> + if (compr_blocks > 0)
>> + f2fs_unlock_op(sbi);
>>
>> - return 0;
>> + f2fs_balance_fs(sbi, true);
>> + return ret;
>> }
>>
>> int f2fs_write_multi_pages(struct compress_ctx *cc,
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index 81f9e2cc49e2..b171a9980f6a 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -2839,7 +2839,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
>> .encrypted_page = NULL,
>> .submitted = 0,
>> .compr_blocks = compr_blocks,
>> - .need_lock = LOCK_RETRY,
>> + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
>> .post_read = f2fs_post_read_required(inode) ? 1 : 0,
>> .io_type = io_type,
>> .io_wbc = wbc,
>> @@ -2920,6 +2920,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
>> if (err == -EAGAIN) {
>> err = f2fs_do_write_data_page(&fio);
>> if (err == -EAGAIN) {
>> + f2fs_bug_on(sbi, compr_blocks);
>> fio.need_lock = LOCK_REQ;
>> err = f2fs_do_write_data_page(&fio);
>> }
>> --
>> 2.40.1