If data block in compressed cluster is not persisted with metadata
during checkpoint, after SPOR, the data may be corrupted, let's
guarantee to write compressed page by checkpoint.
Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Chao Yu <[email protected]>
---
v3:
- treat compressed page as CP guaranteed data explictly.
fs/f2fs/compress.c | 4 +++-
fs/f2fs/data.c | 17 +++++++++--------
fs/f2fs/f2fs.h | 4 +++-
3 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index c5a4364c4482..9940b7886e5d 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1418,6 +1418,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
struct f2fs_sb_info *sbi = bio->bi_private;
struct compress_io_ctx *cic =
(struct compress_io_ctx *)page_private(page);
+ enum count_type type = WB_DATA_TYPE(page,
+ f2fs_is_compressed_page(page));
int i;
if (unlikely(bio->bi_status))
@@ -1425,7 +1427,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
f2fs_compress_free_page(page);
- dec_page_count(sbi, F2FS_WB_DATA);
+ dec_page_count(sbi, type);
if (atomic_dec_return(&cic->pending_pages))
return;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index dce8defdf4c7..81f9e2cc49e2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -48,7 +48,7 @@ void f2fs_destroy_bioset(void)
bioset_exit(&f2fs_bioset);
}
-static bool __is_cp_guaranteed(struct page *page)
+bool f2fs_is_cp_guaranteed(struct page *page)
{
struct address_space *mapping = page->mapping;
struct inode *inode;
@@ -65,8 +65,6 @@ static bool __is_cp_guaranteed(struct page *page)
S_ISDIR(inode->i_mode))
return true;
- if (f2fs_is_compressed_page(page))
- return false;
if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
page_private_gcing(page))
return true;
@@ -338,7 +336,7 @@ static void f2fs_write_end_io(struct bio *bio)
bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
- enum count_type type = WB_DATA_TYPE(page);
+ enum count_type type = WB_DATA_TYPE(page, false);
if (page_private_dummy(page)) {
clear_page_private_dummy(page);
@@ -762,7 +760,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
- __read_io_type(page) : WB_DATA_TYPE(fio->page));
+ __read_io_type(page) : WB_DATA_TYPE(fio->page, false));
if (is_read_io(bio_op(bio)))
f2fs_submit_read_bio(fio->sbi, bio, fio->type);
@@ -973,7 +971,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
if (fio->io_wbc)
wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
- inc_page_count(fio->sbi, WB_DATA_TYPE(page));
+ inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
*fio->last_block = fio->new_blkaddr;
*fio->bio = bio;
@@ -1007,6 +1005,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
struct page *bio_page;
+ enum count_type type;
f2fs_bug_on(sbi, is_read_io(fio->op));
@@ -1046,7 +1045,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
/* set submitted = true as a return value */
fio->submitted = 1;
- inc_page_count(sbi, WB_DATA_TYPE(bio_page));
+ type = WB_DATA_TYPE(bio_page, fio->compressed_page);
+ inc_page_count(sbi, type);
if (io->bio &&
(!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
@@ -1059,7 +1059,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
if (F2FS_IO_ALIGNED(sbi) &&
(fio->type == DATA || fio->type == NODE) &&
fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
- dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+ dec_page_count(sbi, WB_DATA_TYPE(bio_page,
+ fio->compressed_page));
fio->retry = 1;
goto skip;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 65294e3b0bef..50f3d546ded8 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1080,7 +1080,8 @@ struct f2fs_sm_info {
* f2fs monitors the number of several block types such as on-writeback,
* dirty dentry blocks, dirty node blocks, and dirty meta blocks.
*/
-#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
+#define WB_DATA_TYPE(p, f) \
+ (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
@@ -3804,6 +3805,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi);
*/
int __init f2fs_init_bioset(void);
void f2fs_destroy_bioset(void);
+bool f2fs_is_cp_guaranteed(struct page *page);
int f2fs_init_bio_entry_cache(void);
void f2fs_destroy_bio_entry_cache(void);
void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
--
2.40.1
When we overwrite compressed cluster w/ normal cluster, we should
not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data
will be corrupted if partial blocks were persisted before CP & SPOR,
due to cluster metadata wasn't updated atomically.
Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/compress.c | 20 ++++++++++++++------
fs/f2fs/data.c | 3 ++-
2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 9940b7886e5d..bf4cfab67aec 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1448,7 +1448,8 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
enum iostat_type io_type)
{
struct address_space *mapping = cc->inode->i_mapping;
- int _submitted, compr_blocks, ret, i;
+ struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+ int _submitted, compr_blocks, ret = 0, i;
compr_blocks = f2fs_compressed_blocks(cc);
@@ -1463,6 +1464,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
if (compr_blocks < 0)
return compr_blocks;
+ /* overwrite compressed cluster w/ normal cluster */
+ if (compr_blocks > 0)
+ f2fs_lock_op(sbi);
+
for (i = 0; i < cc->cluster_size; i++) {
if (!cc->rpages[i])
continue;
@@ -1495,26 +1500,29 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
unlock_page(cc->rpages[i]);
ret = 0;
} else if (ret == -EAGAIN) {
+ ret = 0;
/*
* for quota file, just redirty left pages to
* avoid deadlock caused by cluster update race
* from foreground operation.
*/
if (IS_NOQUOTA(cc->inode))
- return 0;
- ret = 0;
+ goto out;
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
goto retry_write;
}
- return ret;
+ goto out;
}
*submitted += _submitted;
}
- f2fs_balance_fs(F2FS_M_SB(mapping), true);
+out:
+ if (compr_blocks > 0)
+ f2fs_unlock_op(sbi);
- return 0;
+ f2fs_balance_fs(sbi, true);
+ return ret;
}
int f2fs_write_multi_pages(struct compress_ctx *cc,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 81f9e2cc49e2..b171a9980f6a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2839,7 +2839,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
.encrypted_page = NULL,
.submitted = 0,
.compr_blocks = compr_blocks,
- .need_lock = LOCK_RETRY,
+ .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
.post_read = f2fs_post_read_required(inode) ? 1 : 0,
.io_type = io_type,
.io_wbc = wbc,
@@ -2920,6 +2920,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
if (err == -EAGAIN) {
err = f2fs_do_write_data_page(&fio);
if (err == -EAGAIN) {
+ f2fs_bug_on(sbi, compr_blocks);
fio.need_lock = LOCK_REQ;
err = f2fs_do_write_data_page(&fio);
}
--
2.40.1
From: Sheng Yong <[email protected]>
Compressed cluster may not be released due to we can fail in
release_compress_blocks(), fix to handle reserved compressed
cluster correctly in reserve_compress_blocks().
Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Sheng Yong <[email protected]>
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/file.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 026d05a7edd8..782ae3be48f6 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3624,6 +3624,15 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
goto next;
}
+ /*
+ * compressed cluster was not released due to
+ * it fails in release_compress_blocks().
+ */
+ if (blkaddr == NEW_ADDR) {
+ compr_blocks++;
+ continue;
+ }
+
if (__is_valid_data_blkaddr(blkaddr)) {
compr_blocks++;
continue;
@@ -3633,6 +3642,9 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
}
reserved = cluster_size - compr_blocks;
+ if (!reserved)
+ goto next;
+
ret = inc_valid_block_count(sbi, dn->inode, &reserved);
if (ret)
return ret;
--
2.40.1
In reserve_compress_blocks(), we update blkaddrs of dnode in prior to
inc_valid_block_count(), it may cause inconsistent status bewteen
i_blocks and blkaddrs once inc_valid_block_count() fails.
To fix this issue, it needs to reverse their invoking order.
Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/data.c | 5 +++--
fs/f2fs/f2fs.h | 7 ++++++-
fs/f2fs/file.c | 26 ++++++++++++++------------
fs/f2fs/segment.c | 2 +-
4 files changed, 24 insertions(+), 16 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b171a9980f6a..8d2ace723310 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1219,7 +1219,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
- if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
+ err = inc_valid_block_count(sbi, dn->inode, &count, true);
+ if (unlikely(err))
return err;
trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
@@ -1476,7 +1477,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
dn->data_blkaddr = f2fs_data_blkaddr(dn);
if (dn->data_blkaddr == NULL_ADDR) {
- err = inc_valid_block_count(sbi, dn->inode, &count);
+ err = inc_valid_block_count(sbi, dn->inode, &count, true);
if (unlikely(err))
return err;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 50f3d546ded8..69e71460a950 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2252,7 +2252,7 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
- struct inode *inode, blkcnt_t *count)
+ struct inode *inode, blkcnt_t *count, bool partial)
{
blkcnt_t diff = 0, release = 0;
block_t avail_user_block_count;
@@ -2292,6 +2292,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
avail_user_block_count = 0;
}
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
+ if (!partial) {
+ spin_unlock(&sbi->stat_lock);
+ goto enospc;
+ }
+
diff = sbi->total_valid_block_count - avail_user_block_count;
if (diff > *count)
diff = *count;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 782ae3be48f6..9f4e21b5916c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3614,14 +3614,16 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
blkcnt_t reserved;
int ret;
- for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
- blkaddr = f2fs_data_blkaddr(dn);
+ for (i = 0; i < cluster_size; i++) {
+ blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ dn->ofs_in_node + i);
if (i == 0) {
- if (blkaddr == COMPRESS_ADDR)
- continue;
- dn->ofs_in_node += cluster_size;
- goto next;
+ if (blkaddr != COMPRESS_ADDR) {
+ dn->ofs_in_node += cluster_size;
+ goto next;
+ }
+ continue;
}
/*
@@ -3637,20 +3639,20 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
compr_blocks++;
continue;
}
-
- f2fs_set_data_blkaddr(dn, NEW_ADDR);
}
reserved = cluster_size - compr_blocks;
if (!reserved)
goto next;
- ret = inc_valid_block_count(sbi, dn->inode, &reserved);
- if (ret)
+ ret = inc_valid_block_count(sbi, dn->inode, &reserved, false);
+ if (unlikely(ret))
return ret;
- if (reserved != cluster_size - compr_blocks)
- return -ENOSPC;
+ for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
+ if (f2fs_data_blkaddr(dn) == NULL_ADDR)
+ f2fs_set_data_blkaddr(dn, NEW_ADDR);
+ }
f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 61da26eb61cc..9632e9977c90 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -248,7 +248,7 @@ static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
} else {
blkcnt_t count = 1;
- err = inc_valid_block_count(sbi, inode, &count);
+ err = inc_valid_block_count(sbi, inode, &count, true);
if (err) {
f2fs_put_dnode(&dn);
return err;
--
2.40.1
verify_blkaddr() will trigger panic once we inject fault into
f2fs_is_valid_blkaddr(), fix to remove this unnecessary f2fs_bug_on().
Fixes: 18792e64c86d ("f2fs: support fault injection for f2fs_is_valid_blkaddr()")
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/f2fs.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 69e71460a950..ab710bb6d8b3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3470,11 +3470,9 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type))
f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.",
blkaddr, type);
- f2fs_bug_on(sbi, 1);
- }
}
static inline bool __is_valid_data_blkaddr(block_t blkaddr)
--
2.40.1
We will encounter below inconsistent status when FAULT_BLKADDR type
fault injection is on.
Info: checkpoint state = d6 : nat_bits crc fsck compacted_summary orphan_inodes sudden-power-off
[ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c100 has i_blocks: 000000c0, but has 191 blocks
[FIX] (fsck_chk_inode_blk:1260) --> [0x1c100] i_blocks=0x000000c0 -> 0xbf
[FIX] (fsck_chk_inode_blk:1269) --> [0x1c100] i_compr_blocks=0x00000026 -> 0x27
[ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1cadb has i_blocks: 0000002f, but has 46 blocks
[FIX] (fsck_chk_inode_blk:1260) --> [0x1cadb] i_blocks=0x0000002f -> 0x2e
[FIX] (fsck_chk_inode_blk:1269) --> [0x1cadb] i_compr_blocks=0x00000011 -> 0x12
[ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c62c has i_blocks: 00000002, but has 1 blocks
[FIX] (fsck_chk_inode_blk:1260) --> [0x1c62c] i_blocks=0x00000002 -> 0x1
After we inject fault into f2fs_is_valid_blkaddr() during truncation,
a) it missed to increase @nr_free or @valid_blocks
b) it can cause in blkaddr leak in truncated dnode
Which may cause inconsistent status.
This patch separates FAULT_BLKADDR_INCONSISTENCE from FAULT_BLKADDR,
so that we can:
a) use FAULT_BLKADDR_INCONSISTENCE in f2fs_truncate_data_blocks_range()
to simulate inconsistent issue independently,
b) FAULT_BLKADDR fault will not cause any inconsistent status, we can
just use it to check error path handling in kernel side.
Signed-off-by: Chao Yu <[email protected]>
---
v3:
- rename FAULT_INCONSISTENCE as Jaegeuk's suggestion.
Documentation/ABI/testing/sysfs-fs-f2fs | 47 +++++++++++++------------
Documentation/filesystems/f2fs.rst | 47 +++++++++++++------------
fs/f2fs/checkpoint.c | 19 +++++++---
fs/f2fs/f2fs.h | 3 ++
fs/f2fs/file.c | 8 +++--
fs/f2fs/super.c | 37 +++++++++----------
6 files changed, 91 insertions(+), 70 deletions(-)
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 4f1d4e636d67..039a16ebaaaf 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -686,29 +686,30 @@ Description: Support configuring fault injection type, should be
enabled with fault_injection option, fault type value
is shown below, it supports single or combined type.
- =================== ===========
- Type_Name Type_Value
- =================== ===========
- FAULT_KMALLOC 0x000000001
- FAULT_KVMALLOC 0x000000002
- FAULT_PAGE_ALLOC 0x000000004
- FAULT_PAGE_GET 0x000000008
- FAULT_ALLOC_BIO 0x000000010 (obsolete)
- FAULT_ALLOC_NID 0x000000020
- FAULT_ORPHAN 0x000000040
- FAULT_BLOCK 0x000000080
- FAULT_DIR_DEPTH 0x000000100
- FAULT_EVICT_INODE 0x000000200
- FAULT_TRUNCATE 0x000000400
- FAULT_READ_IO 0x000000800
- FAULT_CHECKPOINT 0x000001000
- FAULT_DISCARD 0x000002000
- FAULT_WRITE_IO 0x000004000
- FAULT_SLAB_ALLOC 0x000008000
- FAULT_DQUOT_INIT 0x000010000
- FAULT_LOCK_OP 0x000020000
- FAULT_BLKADDR 0x000040000
- =================== ===========
+ =========================== ===========
+ Type_Name Type_Value
+ =========================== ===========
+ FAULT_KMALLOC 0x000000001
+ FAULT_KVMALLOC 0x000000002
+ FAULT_PAGE_ALLOC 0x000000004
+ FAULT_PAGE_GET 0x000000008
+ FAULT_ALLOC_BIO 0x000000010 (obsolete)
+ FAULT_ALLOC_NID 0x000000020
+ FAULT_ORPHAN 0x000000040
+ FAULT_BLOCK 0x000000080
+ FAULT_DIR_DEPTH 0x000000100
+ FAULT_EVICT_INODE 0x000000200
+ FAULT_TRUNCATE 0x000000400
+ FAULT_READ_IO 0x000000800
+ FAULT_CHECKPOINT 0x000001000
+ FAULT_DISCARD 0x000002000
+ FAULT_WRITE_IO 0x000004000
+ FAULT_SLAB_ALLOC 0x000008000
+ FAULT_DQUOT_INIT 0x000010000
+ FAULT_LOCK_OP 0x000020000
+ FAULT_BLKADDR 0x000040000
+ FAULT_BLKADDR_INCONSISTENCE 0x000080000
+ =========================== ===========
What: /sys/fs/f2fs/<disk>/discard_io_aware_gran
Date: January 2023
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index d32c6209685d..b7c5c3f6df1c 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -184,29 +184,30 @@ fault_type=%d Support configuring fault injection type, should be
enabled with fault_injection option, fault type value
is shown below, it supports single or combined type.
- =================== ===========
- Type_Name Type_Value
- =================== ===========
- FAULT_KMALLOC 0x000000001
- FAULT_KVMALLOC 0x000000002
- FAULT_PAGE_ALLOC 0x000000004
- FAULT_PAGE_GET 0x000000008
- FAULT_ALLOC_BIO 0x000000010 (obsolete)
- FAULT_ALLOC_NID 0x000000020
- FAULT_ORPHAN 0x000000040
- FAULT_BLOCK 0x000000080
- FAULT_DIR_DEPTH 0x000000100
- FAULT_EVICT_INODE 0x000000200
- FAULT_TRUNCATE 0x000000400
- FAULT_READ_IO 0x000000800
- FAULT_CHECKPOINT 0x000001000
- FAULT_DISCARD 0x000002000
- FAULT_WRITE_IO 0x000004000
- FAULT_SLAB_ALLOC 0x000008000
- FAULT_DQUOT_INIT 0x000010000
- FAULT_LOCK_OP 0x000020000
- FAULT_BLKADDR 0x000040000
- =================== ===========
+ =========================== ===========
+ Type_Name Type_Value
+ =========================== ===========
+ FAULT_KMALLOC 0x000000001
+ FAULT_KVMALLOC 0x000000002
+ FAULT_PAGE_ALLOC 0x000000004
+ FAULT_PAGE_GET 0x000000008
+ FAULT_ALLOC_BIO 0x000000010 (obsolete)
+ FAULT_ALLOC_NID 0x000000020
+ FAULT_ORPHAN 0x000000040
+ FAULT_BLOCK 0x000000080
+ FAULT_DIR_DEPTH 0x000000100
+ FAULT_EVICT_INODE 0x000000200
+ FAULT_TRUNCATE 0x000000400
+ FAULT_READ_IO 0x000000800
+ FAULT_CHECKPOINT 0x000001000
+ FAULT_DISCARD 0x000002000
+ FAULT_WRITE_IO 0x000004000
+ FAULT_SLAB_ALLOC 0x000008000
+ FAULT_DQUOT_INIT 0x000010000
+ FAULT_LOCK_OP 0x000020000
+ FAULT_BLKADDR 0x000040000
+ FAULT_BLKADDR_INCONSISTENCE 0x000080000
+ =========================== ===========
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index b0597a539fc5..84546f529cf0 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -170,12 +170,9 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
return exist;
}
-bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
- if (time_to_inject(sbi, FAULT_BLKADDR))
- return false;
-
switch (type) {
case META_NAT:
break;
@@ -230,6 +227,20 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
return true;
}
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (time_to_inject(sbi, FAULT_BLKADDR))
+ return false;
+ return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
+}
+
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
+}
+
/*
* Readahead CP/NAT/SIT/SSA/POR pages
*/
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ab710bb6d8b3..e0acfec0558d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -61,6 +61,7 @@ enum {
FAULT_DQUOT_INIT,
FAULT_LOCK_OP,
FAULT_BLKADDR,
+ FAULT_BLKADDR_INCONSISTENCE,
FAULT_MAX,
};
@@ -3768,6 +3769,8 @@ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 9f4e21b5916c..32a7a413584b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -590,9 +590,13 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
f2fs_set_data_blkaddr(dn, NULL_ADDR);
if (__is_valid_data_blkaddr(blkaddr)) {
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
- DATA_GENERIC_ENHANCE))
+ if (time_to_inject(sbi, FAULT_BLKADDR_INCONSISTENCE))
+ continue;
+ if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
continue;
+ }
if (compressed_cluster)
valid_blocks++;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 206d03c82d96..87a803f36a50 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -44,24 +44,25 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
const char *f2fs_fault_name[FAULT_MAX] = {
- [FAULT_KMALLOC] = "kmalloc",
- [FAULT_KVMALLOC] = "kvmalloc",
- [FAULT_PAGE_ALLOC] = "page alloc",
- [FAULT_PAGE_GET] = "page get",
- [FAULT_ALLOC_NID] = "alloc nid",
- [FAULT_ORPHAN] = "orphan",
- [FAULT_BLOCK] = "no more block",
- [FAULT_DIR_DEPTH] = "too big dir depth",
- [FAULT_EVICT_INODE] = "evict_inode fail",
- [FAULT_TRUNCATE] = "truncate fail",
- [FAULT_READ_IO] = "read IO error",
- [FAULT_CHECKPOINT] = "checkpoint error",
- [FAULT_DISCARD] = "discard error",
- [FAULT_WRITE_IO] = "write IO error",
- [FAULT_SLAB_ALLOC] = "slab alloc",
- [FAULT_DQUOT_INIT] = "dquot initialize",
- [FAULT_LOCK_OP] = "lock_op",
- [FAULT_BLKADDR] = "invalid blkaddr",
+ [FAULT_KMALLOC] = "kmalloc",
+ [FAULT_KVMALLOC] = "kvmalloc",
+ [FAULT_PAGE_ALLOC] = "page alloc",
+ [FAULT_PAGE_GET] = "page get",
+ [FAULT_ALLOC_NID] = "alloc nid",
+ [FAULT_ORPHAN] = "orphan",
+ [FAULT_BLOCK] = "no more block",
+ [FAULT_DIR_DEPTH] = "too big dir depth",
+ [FAULT_EVICT_INODE] = "evict_inode fail",
+ [FAULT_TRUNCATE] = "truncate fail",
+ [FAULT_READ_IO] = "read IO error",
+ [FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
+ [FAULT_WRITE_IO] = "write IO error",
+ [FAULT_SLAB_ALLOC] = "slab alloc",
+ [FAULT_DQUOT_INIT] = "dquot initialize",
+ [FAULT_LOCK_OP] = "lock_op",
+ [FAULT_BLKADDR] = "invalid blkaddr",
+ [FAULT_BLKADDR_INCONSISTENCE] = "inconsistent blkaddr",
};
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
--
2.40.1
On 12/28, Chao Yu wrote:
> We will encounter below inconsistent status when FAULT_BLKADDR type
> fault injection is on.
>
> Info: checkpoint state = d6 : nat_bits crc fsck compacted_summary orphan_inodes sudden-power-off
> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c100 has i_blocks: 000000c0, but has 191 blocks
> [FIX] (fsck_chk_inode_blk:1260) --> [0x1c100] i_blocks=0x000000c0 -> 0xbf
> [FIX] (fsck_chk_inode_blk:1269) --> [0x1c100] i_compr_blocks=0x00000026 -> 0x27
> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1cadb has i_blocks: 0000002f, but has 46 blocks
> [FIX] (fsck_chk_inode_blk:1260) --> [0x1cadb] i_blocks=0x0000002f -> 0x2e
> [FIX] (fsck_chk_inode_blk:1269) --> [0x1cadb] i_compr_blocks=0x00000011 -> 0x12
> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c62c has i_blocks: 00000002, but has 1 blocks
> [FIX] (fsck_chk_inode_blk:1260) --> [0x1c62c] i_blocks=0x00000002 -> 0x1
>
> After we inject fault into f2fs_is_valid_blkaddr() during truncation,
> a) it missed to increase @nr_free or @valid_blocks
> b) it can cause in blkaddr leak in truncated dnode
> Which may cause inconsistent status.
>
> This patch separates FAULT_BLKADDR_INCONSISTENCE from FAULT_BLKADDR,
> so that we can:
> a) use FAULT_BLKADDR_INCONSISTENCE in f2fs_truncate_data_blocks_range()
> to simulate inconsistent issue independently,
> b) FAULT_BLKADDR fault will not cause any inconsistent status, we can
> just use it to check error path handling in kernel side.
How about defining FAULT_BLKADDR_VALIDITY and FAULT_BLKADDR_CONSISTENCY?
>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> v3:
> - rename FAULT_INCONSISTENCE as Jaegeuk's suggestion.
> Documentation/ABI/testing/sysfs-fs-f2fs | 47 +++++++++++++------------
> Documentation/filesystems/f2fs.rst | 47 +++++++++++++------------
> fs/f2fs/checkpoint.c | 19 +++++++---
> fs/f2fs/f2fs.h | 3 ++
> fs/f2fs/file.c | 8 +++--
> fs/f2fs/super.c | 37 +++++++++----------
> 6 files changed, 91 insertions(+), 70 deletions(-)
>
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> index 4f1d4e636d67..039a16ebaaaf 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -686,29 +686,30 @@ Description: Support configuring fault injection type, should be
> enabled with fault_injection option, fault type value
> is shown below, it supports single or combined type.
>
> - =================== ===========
> - Type_Name Type_Value
> - =================== ===========
> - FAULT_KMALLOC 0x000000001
> - FAULT_KVMALLOC 0x000000002
> - FAULT_PAGE_ALLOC 0x000000004
> - FAULT_PAGE_GET 0x000000008
> - FAULT_ALLOC_BIO 0x000000010 (obsolete)
> - FAULT_ALLOC_NID 0x000000020
> - FAULT_ORPHAN 0x000000040
> - FAULT_BLOCK 0x000000080
> - FAULT_DIR_DEPTH 0x000000100
> - FAULT_EVICT_INODE 0x000000200
> - FAULT_TRUNCATE 0x000000400
> - FAULT_READ_IO 0x000000800
> - FAULT_CHECKPOINT 0x000001000
> - FAULT_DISCARD 0x000002000
> - FAULT_WRITE_IO 0x000004000
> - FAULT_SLAB_ALLOC 0x000008000
> - FAULT_DQUOT_INIT 0x000010000
> - FAULT_LOCK_OP 0x000020000
> - FAULT_BLKADDR 0x000040000
> - =================== ===========
> + =========================== ===========
> + Type_Name Type_Value
> + =========================== ===========
> + FAULT_KMALLOC 0x000000001
> + FAULT_KVMALLOC 0x000000002
> + FAULT_PAGE_ALLOC 0x000000004
> + FAULT_PAGE_GET 0x000000008
> + FAULT_ALLOC_BIO 0x000000010 (obsolete)
> + FAULT_ALLOC_NID 0x000000020
> + FAULT_ORPHAN 0x000000040
> + FAULT_BLOCK 0x000000080
> + FAULT_DIR_DEPTH 0x000000100
> + FAULT_EVICT_INODE 0x000000200
> + FAULT_TRUNCATE 0x000000400
> + FAULT_READ_IO 0x000000800
> + FAULT_CHECKPOINT 0x000001000
> + FAULT_DISCARD 0x000002000
> + FAULT_WRITE_IO 0x000004000
> + FAULT_SLAB_ALLOC 0x000008000
> + FAULT_DQUOT_INIT 0x000010000
> + FAULT_LOCK_OP 0x000020000
> + FAULT_BLKADDR 0x000040000
> + FAULT_BLKADDR_INCONSISTENCE 0x000080000
> + =========================== ===========
>
> What: /sys/fs/f2fs/<disk>/discard_io_aware_gran
> Date: January 2023
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index d32c6209685d..b7c5c3f6df1c 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -184,29 +184,30 @@ fault_type=%d Support configuring fault injection type, should be
> enabled with fault_injection option, fault type value
> is shown below, it supports single or combined type.
>
> - =================== ===========
> - Type_Name Type_Value
> - =================== ===========
> - FAULT_KMALLOC 0x000000001
> - FAULT_KVMALLOC 0x000000002
> - FAULT_PAGE_ALLOC 0x000000004
> - FAULT_PAGE_GET 0x000000008
> - FAULT_ALLOC_BIO 0x000000010 (obsolete)
> - FAULT_ALLOC_NID 0x000000020
> - FAULT_ORPHAN 0x000000040
> - FAULT_BLOCK 0x000000080
> - FAULT_DIR_DEPTH 0x000000100
> - FAULT_EVICT_INODE 0x000000200
> - FAULT_TRUNCATE 0x000000400
> - FAULT_READ_IO 0x000000800
> - FAULT_CHECKPOINT 0x000001000
> - FAULT_DISCARD 0x000002000
> - FAULT_WRITE_IO 0x000004000
> - FAULT_SLAB_ALLOC 0x000008000
> - FAULT_DQUOT_INIT 0x000010000
> - FAULT_LOCK_OP 0x000020000
> - FAULT_BLKADDR 0x000040000
> - =================== ===========
> + =========================== ===========
> + Type_Name Type_Value
> + =========================== ===========
> + FAULT_KMALLOC 0x000000001
> + FAULT_KVMALLOC 0x000000002
> + FAULT_PAGE_ALLOC 0x000000004
> + FAULT_PAGE_GET 0x000000008
> + FAULT_ALLOC_BIO 0x000000010 (obsolete)
> + FAULT_ALLOC_NID 0x000000020
> + FAULT_ORPHAN 0x000000040
> + FAULT_BLOCK 0x000000080
> + FAULT_DIR_DEPTH 0x000000100
> + FAULT_EVICT_INODE 0x000000200
> + FAULT_TRUNCATE 0x000000400
> + FAULT_READ_IO 0x000000800
> + FAULT_CHECKPOINT 0x000001000
> + FAULT_DISCARD 0x000002000
> + FAULT_WRITE_IO 0x000004000
> + FAULT_SLAB_ALLOC 0x000008000
> + FAULT_DQUOT_INIT 0x000010000
> + FAULT_LOCK_OP 0x000020000
> + FAULT_BLKADDR 0x000040000
> + FAULT_BLKADDR_INCONSISTENCE 0x000080000
> + =========================== ===========
> mode=%s Control block allocation mode which supports "adaptive"
> and "lfs". In "lfs" mode, there should be no random
> writes towards main area.
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index b0597a539fc5..84546f529cf0 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -170,12 +170,9 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
> return exist;
> }
>
> -bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> +static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> block_t blkaddr, int type)
> {
> - if (time_to_inject(sbi, FAULT_BLKADDR))
> - return false;
> -
> switch (type) {
> case META_NAT:
> break;
> @@ -230,6 +227,20 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> return true;
> }
>
> +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> + block_t blkaddr, int type)
> +{
> + if (time_to_inject(sbi, FAULT_BLKADDR))
> + return false;
> + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
> +}
> +
> +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
> + block_t blkaddr, int type)
> +{
> + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
> +}
> +
> /*
> * Readahead CP/NAT/SIT/SSA/POR pages
> */
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ab710bb6d8b3..e0acfec0558d 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -61,6 +61,7 @@ enum {
> FAULT_DQUOT_INIT,
> FAULT_LOCK_OP,
> FAULT_BLKADDR,
> + FAULT_BLKADDR_INCONSISTENCE,
> FAULT_MAX,
> };
>
> @@ -3768,6 +3769,8 @@ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
> struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
> bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
> block_t blkaddr, int type);
> +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
> + block_t blkaddr, int type);
> int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
> int type, bool sync);
> void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 9f4e21b5916c..32a7a413584b 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -590,9 +590,13 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
> f2fs_set_data_blkaddr(dn, NULL_ADDR);
>
> if (__is_valid_data_blkaddr(blkaddr)) {
> - if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
> - DATA_GENERIC_ENHANCE))
> + if (time_to_inject(sbi, FAULT_BLKADDR_INCONSISTENCE))
> + continue;
> + if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
> + DATA_GENERIC_ENHANCE)) {
> + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
> continue;
> + }
> if (compressed_cluster)
> valid_blocks++;
> }
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 206d03c82d96..87a803f36a50 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -44,24 +44,25 @@ static struct kmem_cache *f2fs_inode_cachep;
> #ifdef CONFIG_F2FS_FAULT_INJECTION
>
> const char *f2fs_fault_name[FAULT_MAX] = {
> - [FAULT_KMALLOC] = "kmalloc",
> - [FAULT_KVMALLOC] = "kvmalloc",
> - [FAULT_PAGE_ALLOC] = "page alloc",
> - [FAULT_PAGE_GET] = "page get",
> - [FAULT_ALLOC_NID] = "alloc nid",
> - [FAULT_ORPHAN] = "orphan",
> - [FAULT_BLOCK] = "no more block",
> - [FAULT_DIR_DEPTH] = "too big dir depth",
> - [FAULT_EVICT_INODE] = "evict_inode fail",
> - [FAULT_TRUNCATE] = "truncate fail",
> - [FAULT_READ_IO] = "read IO error",
> - [FAULT_CHECKPOINT] = "checkpoint error",
> - [FAULT_DISCARD] = "discard error",
> - [FAULT_WRITE_IO] = "write IO error",
> - [FAULT_SLAB_ALLOC] = "slab alloc",
> - [FAULT_DQUOT_INIT] = "dquot initialize",
> - [FAULT_LOCK_OP] = "lock_op",
> - [FAULT_BLKADDR] = "invalid blkaddr",
> + [FAULT_KMALLOC] = "kmalloc",
> + [FAULT_KVMALLOC] = "kvmalloc",
> + [FAULT_PAGE_ALLOC] = "page alloc",
> + [FAULT_PAGE_GET] = "page get",
> + [FAULT_ALLOC_NID] = "alloc nid",
> + [FAULT_ORPHAN] = "orphan",
> + [FAULT_BLOCK] = "no more block",
> + [FAULT_DIR_DEPTH] = "too big dir depth",
> + [FAULT_EVICT_INODE] = "evict_inode fail",
> + [FAULT_TRUNCATE] = "truncate fail",
> + [FAULT_READ_IO] = "read IO error",
> + [FAULT_CHECKPOINT] = "checkpoint error",
> + [FAULT_DISCARD] = "discard error",
> + [FAULT_WRITE_IO] = "write IO error",
> + [FAULT_SLAB_ALLOC] = "slab alloc",
> + [FAULT_DQUOT_INIT] = "dquot initialize",
> + [FAULT_LOCK_OP] = "lock_op",
> + [FAULT_BLKADDR] = "invalid blkaddr",
> + [FAULT_BLKADDR_INCONSISTENCE] = "inconsistent blkaddr",
> };
>
> void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
> --
> 2.40.1
On Thu, Dec 28, 2023 at 6:33 AM Chao Yu <[email protected]> wrote:
>
> If data block in compressed cluster is not persisted with metadata
> during checkpoint, after SPOR, the data may be corrupted, let's
> guarantee to write compressed page by checkpoint.
>
> Fixes: 4c8ff7095bef ("f2fs: support data compression")
> Signed-off-by: Chao Yu <[email protected]>
> ---
> v3:
> - treat compressed page as CP guaranteed data explictly.
> fs/f2fs/compress.c | 4 +++-
> fs/f2fs/data.c | 17 +++++++++--------
> fs/f2fs/f2fs.h | 4 +++-
> 3 files changed, 15 insertions(+), 10 deletions(-)
>
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index c5a4364c4482..9940b7886e5d 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -1418,6 +1418,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
> struct f2fs_sb_info *sbi = bio->bi_private;
> struct compress_io_ctx *cic =
> (struct compress_io_ctx *)page_private(page);
> + enum count_type type = WB_DATA_TYPE(page,
> + f2fs_is_compressed_page(page));
> int i;
>
> if (unlikely(bio->bi_status))
> @@ -1425,7 +1427,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
>
> f2fs_compress_free_page(page);
>
> - dec_page_count(sbi, F2FS_WB_DATA);
> + dec_page_count(sbi, type);
>
> if (atomic_dec_return(&cic->pending_pages))
> return;
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index dce8defdf4c7..81f9e2cc49e2 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -48,7 +48,7 @@ void f2fs_destroy_bioset(void)
> bioset_exit(&f2fs_bioset);
> }
>
> -static bool __is_cp_guaranteed(struct page *page)
> +bool f2fs_is_cp_guaranteed(struct page *page)
> {
> struct address_space *mapping = page->mapping;
> struct inode *inode;
> @@ -65,8 +65,6 @@ static bool __is_cp_guaranteed(struct page *page)
> S_ISDIR(inode->i_mode))
> return true;
>
> - if (f2fs_is_compressed_page(page))
> - return false;
Out of curiosity, why don't we simply change the above to "return true"?
> if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
> page_private_gcing(page))
> return true;
> @@ -338,7 +336,7 @@ static void f2fs_write_end_io(struct bio *bio)
>
> bio_for_each_segment_all(bvec, bio, iter_all) {
> struct page *page = bvec->bv_page;
> - enum count_type type = WB_DATA_TYPE(page);
> + enum count_type type = WB_DATA_TYPE(page, false);
>
> if (page_private_dummy(page)) {
> clear_page_private_dummy(page);
> @@ -762,7 +760,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
> wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
>
> inc_page_count(fio->sbi, is_read_io(fio->op) ?
> - __read_io_type(page) : WB_DATA_TYPE(fio->page));
> + __read_io_type(page) : WB_DATA_TYPE(fio->page, false));
>
> if (is_read_io(bio_op(bio)))
> f2fs_submit_read_bio(fio->sbi, bio, fio->type);
> @@ -973,7 +971,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
> if (fio->io_wbc)
> wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
>
> - inc_page_count(fio->sbi, WB_DATA_TYPE(page));
> + inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
>
> *fio->last_block = fio->new_blkaddr;
> *fio->bio = bio;
> @@ -1007,6 +1005,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
> struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
> struct page *bio_page;
> + enum count_type type;
>
> f2fs_bug_on(sbi, is_read_io(fio->op));
>
> @@ -1046,7 +1045,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> /* set submitted = true as a return value */
> fio->submitted = 1;
>
> - inc_page_count(sbi, WB_DATA_TYPE(bio_page));
> + type = WB_DATA_TYPE(bio_page, fio->compressed_page);
> + inc_page_count(sbi, type);
>
> if (io->bio &&
> (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
> @@ -1059,7 +1059,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> if (F2FS_IO_ALIGNED(sbi) &&
> (fio->type == DATA || fio->type == NODE) &&
> fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
> - dec_page_count(sbi, WB_DATA_TYPE(bio_page));
> + dec_page_count(sbi, WB_DATA_TYPE(bio_page,
> + fio->compressed_page));
> fio->retry = 1;
> goto skip;
> }
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 65294e3b0bef..50f3d546ded8 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1080,7 +1080,8 @@ struct f2fs_sm_info {
> * f2fs monitors the number of several block types such as on-writeback,
> * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
> */
> -#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
> +#define WB_DATA_TYPE(p, f) \
> + (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
> enum count_type {
> F2FS_DIRTY_DENTS,
> F2FS_DIRTY_DATA,
> @@ -3804,6 +3805,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi);
> */
> int __init f2fs_init_bioset(void);
> void f2fs_destroy_bioset(void);
> +bool f2fs_is_cp_guaranteed(struct page *page);
> int f2fs_init_bio_entry_cache(void);
> void f2fs_destroy_bio_entry_cache(void);
> void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
> --
> 2.40.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
On Thu, Dec 28, 2023 at 6:33 AM Chao Yu <[email protected]> wrote:
>
> From: Sheng Yong <[email protected]>
>
> Compressed cluster may not be released due to we can fail in
> release_compress_blocks(), fix to handle reserved compressed
> cluster correctly in reserve_compress_blocks().
>
> Fixes: 4c8ff7095bef ("f2fs: support data compression")
> Signed-off-by: Sheng Yong <[email protected]>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/file.c | 12 ++++++++++++
> 1 file changed, 12 insertions(+)
>
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 026d05a7edd8..782ae3be48f6 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -3624,6 +3624,15 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
> goto next;
> }
>
> + /*
> + * compressed cluster was not released due to
> + * it fails in release_compress_blocks().
> + */
> + if (blkaddr == NEW_ADDR) {
> + compr_blocks++;
> + continue;
> + }
> +
> if (__is_valid_data_blkaddr(blkaddr)) {
> compr_blocks++;
> continue;
How about merging two conditions like "blkaddr == NEW_ADDR ||
__is_valid_data_blkaddr(blkaddr)"?
> @@ -3633,6 +3642,9 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
> }
>
> reserved = cluster_size - compr_blocks;
> + if (!reserved)
> + goto next;
> +
How can the reserved variable be zero?
> ret = inc_valid_block_count(sbi, dn->inode, &reserved);
> if (ret)
> return ret;
> --
> 2.40.1
>
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
On 2024/1/11 8:55, Daeho Jeong wrote:
> On Thu, Dec 28, 2023 at 6:33 AM Chao Yu <[email protected]> wrote:
>>
>> If data block in compressed cluster is not persisted with metadata
>> during checkpoint, after SPOR, the data may be corrupted, let's
>> guarantee to write compressed page by checkpoint.
>>
>> Fixes: 4c8ff7095bef ("f2fs: support data compression")
>> Signed-off-by: Chao Yu <[email protected]>
>> ---
>> v3:
>> - treat compressed page as CP guaranteed data explictly.
>> fs/f2fs/compress.c | 4 +++-
>> fs/f2fs/data.c | 17 +++++++++--------
>> fs/f2fs/f2fs.h | 4 +++-
>> 3 files changed, 15 insertions(+), 10 deletions(-)
>>
>> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
>> index c5a4364c4482..9940b7886e5d 100644
>> --- a/fs/f2fs/compress.c
>> +++ b/fs/f2fs/compress.c
>> @@ -1418,6 +1418,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
>> struct f2fs_sb_info *sbi = bio->bi_private;
>> struct compress_io_ctx *cic =
>> (struct compress_io_ctx *)page_private(page);
>> + enum count_type type = WB_DATA_TYPE(page,
>> + f2fs_is_compressed_page(page));
>> int i;
>>
>> if (unlikely(bio->bi_status))
>> @@ -1425,7 +1427,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
>>
>> f2fs_compress_free_page(page);
>>
>> - dec_page_count(sbi, F2FS_WB_DATA);
>> + dec_page_count(sbi, type);
>>
>> if (atomic_dec_return(&cic->pending_pages))
>> return;
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index dce8defdf4c7..81f9e2cc49e2 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -48,7 +48,7 @@ void f2fs_destroy_bioset(void)
>> bioset_exit(&f2fs_bioset);
>> }
>>
>> -static bool __is_cp_guaranteed(struct page *page)
>> +bool f2fs_is_cp_guaranteed(struct page *page)
>> {
>> struct address_space *mapping = page->mapping;
>> struct inode *inode;
>> @@ -65,8 +65,6 @@ static bool __is_cp_guaranteed(struct page *page)
>> S_ISDIR(inode->i_mode))
>> return true;
>>
>> - if (f2fs_is_compressed_page(page))
>> - return false;
>
> Out of curiosity, why don't we simply change the above to "return true"?
Daeho,
I used the implementation, please check v1 and related comments
from Jaegeuk and me, let me know if that was not clear enough. :)
https://lore.kernel.org/linux-f2fs-devel/[email protected]/
>
>> if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
>> page_private_gcing(page))
>> return true;
>> @@ -338,7 +336,7 @@ static void f2fs_write_end_io(struct bio *bio)
>>
>> bio_for_each_segment_all(bvec, bio, iter_all) {
>> struct page *page = bvec->bv_page;
>> - enum count_type type = WB_DATA_TYPE(page);
>> + enum count_type type = WB_DATA_TYPE(page, false);
>>
>> if (page_private_dummy(page)) {
>> clear_page_private_dummy(page);
>> @@ -762,7 +760,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
>> wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
>>
>> inc_page_count(fio->sbi, is_read_io(fio->op) ?
>> - __read_io_type(page) : WB_DATA_TYPE(fio->page));
>> + __read_io_type(page) : WB_DATA_TYPE(fio->page, false));
>>
>> if (is_read_io(bio_op(bio)))
>> f2fs_submit_read_bio(fio->sbi, bio, fio->type);
>> @@ -973,7 +971,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>> if (fio->io_wbc)
>> wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
>>
>> - inc_page_count(fio->sbi, WB_DATA_TYPE(page));
>> + inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
>>
>> *fio->last_block = fio->new_blkaddr;
>> *fio->bio = bio;
>> @@ -1007,6 +1005,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>> enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
>> struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>> struct page *bio_page;
>> + enum count_type type;
>>
>> f2fs_bug_on(sbi, is_read_io(fio->op));
>>
>> @@ -1046,7 +1045,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>> /* set submitted = true as a return value */
>> fio->submitted = 1;
>>
>> - inc_page_count(sbi, WB_DATA_TYPE(bio_page));
>> + type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>> + inc_page_count(sbi, type);
>>
>> if (io->bio &&
>> (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>> @@ -1059,7 +1059,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>> if (F2FS_IO_ALIGNED(sbi) &&
>> (fio->type == DATA || fio->type == NODE) &&
>> fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
>> - dec_page_count(sbi, WB_DATA_TYPE(bio_page));
>> + dec_page_count(sbi, WB_DATA_TYPE(bio_page,
>> + fio->compressed_page));
>> fio->retry = 1;
>> goto skip;
>> }
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 65294e3b0bef..50f3d546ded8 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -1080,7 +1080,8 @@ struct f2fs_sm_info {
>> * f2fs monitors the number of several block types such as on-writeback,
>> * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
>> */
>> -#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
>> +#define WB_DATA_TYPE(p, f) \
>> + (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
>> enum count_type {
>> F2FS_DIRTY_DENTS,
>> F2FS_DIRTY_DATA,
>> @@ -3804,6 +3805,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi);
>> */
>> int __init f2fs_init_bioset(void);
>> void f2fs_destroy_bioset(void);
>> +bool f2fs_is_cp_guaranteed(struct page *page);
>> int f2fs_init_bio_entry_cache(void);
>> void f2fs_destroy_bio_entry_cache(void);
>> void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>> --
>> 2.40.1
>>
>>
>>
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> [email protected]
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
On 2024/1/11 9:18, Daeho Jeong wrote:
> On Thu, Dec 28, 2023 at 6:33 AM Chao Yu <[email protected]> wrote:
>>
>> From: Sheng Yong <[email protected]>
>>
>> Compressed cluster may not be released due to we can fail in
>> release_compress_blocks(), fix to handle reserved compressed
>> cluster correctly in reserve_compress_blocks().
>>
>> Fixes: 4c8ff7095bef ("f2fs: support data compression")
>> Signed-off-by: Sheng Yong <[email protected]>
>> Signed-off-by: Chao Yu <[email protected]>
>> ---
>> fs/f2fs/file.c | 12 ++++++++++++
>> 1 file changed, 12 insertions(+)
>>
>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>> index 026d05a7edd8..782ae3be48f6 100644
>> --- a/fs/f2fs/file.c
>> +++ b/fs/f2fs/file.c
>> @@ -3624,6 +3624,15 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
>> goto next;
>> }
>>
>> + /*
>> + * compressed cluster was not released due to
>> + * it fails in release_compress_blocks().
>> + */
>> + if (blkaddr == NEW_ADDR) {
>> + compr_blocks++;
>> + continue;
>> + }
>> +
>> if (__is_valid_data_blkaddr(blkaddr)) {
>> compr_blocks++;
>> continue;
>
> How about merging two conditions like "blkaddr == NEW_ADDR ||
> __is_valid_data_blkaddr(blkaddr)"?
Oh, sure.
>
>> @@ -3633,6 +3642,9 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
>> }
>>
>> reserved = cluster_size - compr_blocks;
>> + if (!reserved)
>> + goto next;
>> +
>
> How can the reserved variable be zero?
I guess it can happen if a cluster was not released during
release_compress_blocks(), then all blocks in the cluster should
has been reserved, so, in this round of reserving, it needs to skip
reserve blocks, right?
Thanks,
>
>> ret = inc_valid_block_count(sbi, dn->inode, &reserved);
>> if (ret)
>> return ret;
>> --
>> 2.40.1
>>
>>
>>
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> [email protected]
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
On 2024/1/3 4:55, Jaegeuk Kim wrote:
> On 12/28, Chao Yu wrote:
>> We will encounter below inconsistent status when FAULT_BLKADDR type
>> fault injection is on.
>>
>> Info: checkpoint state = d6 : nat_bits crc fsck compacted_summary orphan_inodes sudden-power-off
>> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c100 has i_blocks: 000000c0, but has 191 blocks
>> [FIX] (fsck_chk_inode_blk:1260) --> [0x1c100] i_blocks=0x000000c0 -> 0xbf
>> [FIX] (fsck_chk_inode_blk:1269) --> [0x1c100] i_compr_blocks=0x00000026 -> 0x27
>> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1cadb has i_blocks: 0000002f, but has 46 blocks
>> [FIX] (fsck_chk_inode_blk:1260) --> [0x1cadb] i_blocks=0x0000002f -> 0x2e
>> [FIX] (fsck_chk_inode_blk:1269) --> [0x1cadb] i_compr_blocks=0x00000011 -> 0x12
>> [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c62c has i_blocks: 00000002, but has 1 blocks
>> [FIX] (fsck_chk_inode_blk:1260) --> [0x1c62c] i_blocks=0x00000002 -> 0x1
>>
>> After we inject fault into f2fs_is_valid_blkaddr() during truncation,
>> a) it missed to increase @nr_free or @valid_blocks
>> b) it can cause in blkaddr leak in truncated dnode
>> Which may cause inconsistent status.
>>
>> This patch separates FAULT_BLKADDR_INCONSISTENCE from FAULT_BLKADDR,
>> so that we can:
>> a) use FAULT_BLKADDR_INCONSISTENCE in f2fs_truncate_data_blocks_range()
>> to simulate inconsistent issue independently,
>> b) FAULT_BLKADDR fault will not cause any inconsistent status, we can
>> just use it to check error path handling in kernel side.
>
> How about defining FAULT_BLKADDR_VALIDITY and FAULT_BLKADDR_CONSISTENCY?
Better, :)
Thanks,
On Wed, Jan 10, 2024 at 5:33 PM Chao Yu <[email protected]> wrote:
>
> On 2024/1/11 9:18, Daeho Jeong wrote:
> > On Thu, Dec 28, 2023 at 6:33 AM Chao Yu <[email protected]> wrote:
> >>
> >> From: Sheng Yong <[email protected]>
> >>
> >> Compressed cluster may not be released due to we can fail in
> >> release_compress_blocks(), fix to handle reserved compressed
> >> cluster correctly in reserve_compress_blocks().
> >>
> >> Fixes: 4c8ff7095bef ("f2fs: support data compression")
> >> Signed-off-by: Sheng Yong <[email protected]>
> >> Signed-off-by: Chao Yu <[email protected]>
> >> ---
> >> fs/f2fs/file.c | 12 ++++++++++++
> >> 1 file changed, 12 insertions(+)
> >>
> >> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >> index 026d05a7edd8..782ae3be48f6 100644
> >> --- a/fs/f2fs/file.c
> >> +++ b/fs/f2fs/file.c
> >> @@ -3624,6 +3624,15 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
> >> goto next;
> >> }
> >>
> >> + /*
> >> + * compressed cluster was not released due to
> >> + * it fails in release_compress_blocks().
> >> + */
> >> + if (blkaddr == NEW_ADDR) {
> >> + compr_blocks++;
> >> + continue;
> >> + }
> >> +
> >> if (__is_valid_data_blkaddr(blkaddr)) {
> >> compr_blocks++;
> >> continue;
> >
> > How about merging two conditions like "blkaddr == NEW_ADDR ||
> > __is_valid_data_blkaddr(blkaddr)"?
>
> Oh, sure.
>
> >
> >> @@ -3633,6 +3642,9 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
> >> }
> >>
> >> reserved = cluster_size - compr_blocks;
> >> + if (!reserved)
> >> + goto next;
> >> +
> >
> > How can the reserved variable be zero?
>
> I guess it can happen if a cluster was not released during
> release_compress_blocks(), then all blocks in the cluster should
> has been reserved, so, in this round of reserving, it needs to skip
> reserve blocks, right?
Let's assume cluster_size is 4. How can compr_blocks be 4?
if (i == 0) {
if (blkaddr == COMPRESS_ADDR)
continue;
dn->ofs_in_node += cluster_size;
goto next;
}
We skip the block having COMPRESS_ADDR when counting compr_blocks.
So, the maximum value of compr_blocks should be 3, right?
>
> Thanks,
>
> >
> >> ret = inc_valid_block_count(sbi, dn->inode, &reserved);
> >> if (ret)
> >> return ret;
> >> --
> >> 2.40.1
> >>
> >>
> >>
> >> _______________________________________________
> >> Linux-f2fs-devel mailing list
> >> [email protected]
> >> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
On Wed, Jan 10, 2024 at 5:26 PM Chao Yu <[email protected]> wrote:
>
> On 2024/1/11 8:55, Daeho Jeong wrote:
> > On Thu, Dec 28, 2023 at 6:33 AM Chao Yu <[email protected]> wrote:
> >>
> >> If data block in compressed cluster is not persisted with metadata
> >> during checkpoint, after SPOR, the data may be corrupted, let's
> >> guarantee to write compressed page by checkpoint.
> >>
> >> Fixes: 4c8ff7095bef ("f2fs: support data compression")
> >> Signed-off-by: Chao Yu <[email protected]>
> >> ---
> >> v3:
> >> - treat compressed page as CP guaranteed data explictly.
> >> fs/f2fs/compress.c | 4 +++-
> >> fs/f2fs/data.c | 17 +++++++++--------
> >> fs/f2fs/f2fs.h | 4 +++-
> >> 3 files changed, 15 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> >> index c5a4364c4482..9940b7886e5d 100644
> >> --- a/fs/f2fs/compress.c
> >> +++ b/fs/f2fs/compress.c
> >> @@ -1418,6 +1418,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
> >> struct f2fs_sb_info *sbi = bio->bi_private;
> >> struct compress_io_ctx *cic =
> >> (struct compress_io_ctx *)page_private(page);
> >> + enum count_type type = WB_DATA_TYPE(page,
> >> + f2fs_is_compressed_page(page));
> >> int i;
> >>
> >> if (unlikely(bio->bi_status))
> >> @@ -1425,7 +1427,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
> >>
> >> f2fs_compress_free_page(page);
> >>
> >> - dec_page_count(sbi, F2FS_WB_DATA);
> >> + dec_page_count(sbi, type);
> >>
> >> if (atomic_dec_return(&cic->pending_pages))
> >> return;
> >> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >> index dce8defdf4c7..81f9e2cc49e2 100644
> >> --- a/fs/f2fs/data.c
> >> +++ b/fs/f2fs/data.c
> >> @@ -48,7 +48,7 @@ void f2fs_destroy_bioset(void)
> >> bioset_exit(&f2fs_bioset);
> >> }
> >>
> >> -static bool __is_cp_guaranteed(struct page *page)
> >> +bool f2fs_is_cp_guaranteed(struct page *page)
> >> {
> >> struct address_space *mapping = page->mapping;
> >> struct inode *inode;
> >> @@ -65,8 +65,6 @@ static bool __is_cp_guaranteed(struct page *page)
> >> S_ISDIR(inode->i_mode))
> >> return true;
> >>
> >> - if (f2fs_is_compressed_page(page))
> >> - return false;
> >
> > Out of curiosity, why don't we simply change the above to "return true"?
>
> Daeho,
>
> I used the implementation, please check v1 and related comments
> from Jaegeuk and me, let me know if that was not clear enough. :)
>
> https://lore.kernel.org/linux-f2fs-devel/[email protected]/
Oh, I missed it. Sorry~
>
> >
> >> if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
> >> page_private_gcing(page))
> >> return true;
> >> @@ -338,7 +336,7 @@ static void f2fs_write_end_io(struct bio *bio)
> >>
> >> bio_for_each_segment_all(bvec, bio, iter_all) {
> >> struct page *page = bvec->bv_page;
> >> - enum count_type type = WB_DATA_TYPE(page);
> >> + enum count_type type = WB_DATA_TYPE(page, false);
> >>
> >> if (page_private_dummy(page)) {
> >> clear_page_private_dummy(page);
> >> @@ -762,7 +760,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
> >> wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
> >>
> >> inc_page_count(fio->sbi, is_read_io(fio->op) ?
> >> - __read_io_type(page) : WB_DATA_TYPE(fio->page));
> >> + __read_io_type(page) : WB_DATA_TYPE(fio->page, false));
> >>
> >> if (is_read_io(bio_op(bio)))
> >> f2fs_submit_read_bio(fio->sbi, bio, fio->type);
> >> @@ -973,7 +971,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
> >> if (fio->io_wbc)
> >> wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
> >>
> >> - inc_page_count(fio->sbi, WB_DATA_TYPE(page));
> >> + inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
> >>
> >> *fio->last_block = fio->new_blkaddr;
> >> *fio->bio = bio;
> >> @@ -1007,6 +1005,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> >> enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
> >> struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
> >> struct page *bio_page;
> >> + enum count_type type;
> >>
> >> f2fs_bug_on(sbi, is_read_io(fio->op));
> >>
> >> @@ -1046,7 +1045,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> >> /* set submitted = true as a return value */
> >> fio->submitted = 1;
> >>
> >> - inc_page_count(sbi, WB_DATA_TYPE(bio_page));
> >> + type = WB_DATA_TYPE(bio_page, fio->compressed_page);
> >> + inc_page_count(sbi, type);
> >>
> >> if (io->bio &&
> >> (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
> >> @@ -1059,7 +1059,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> >> if (F2FS_IO_ALIGNED(sbi) &&
> >> (fio->type == DATA || fio->type == NODE) &&
> >> fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
> >> - dec_page_count(sbi, WB_DATA_TYPE(bio_page));
> >> + dec_page_count(sbi, WB_DATA_TYPE(bio_page,
> >> + fio->compressed_page));
> >> fio->retry = 1;
> >> goto skip;
> >> }
> >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >> index 65294e3b0bef..50f3d546ded8 100644
> >> --- a/fs/f2fs/f2fs.h
> >> +++ b/fs/f2fs/f2fs.h
> >> @@ -1080,7 +1080,8 @@ struct f2fs_sm_info {
> >> * f2fs monitors the number of several block types such as on-writeback,
> >> * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
> >> */
> >> -#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
> >> +#define WB_DATA_TYPE(p, f) \
> >> + (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
> >> enum count_type {
> >> F2FS_DIRTY_DENTS,
> >> F2FS_DIRTY_DATA,
> >> @@ -3804,6 +3805,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi);
> >> */
> >> int __init f2fs_init_bioset(void);
> >> void f2fs_destroy_bioset(void);
> >> +bool f2fs_is_cp_guaranteed(struct page *page);
> >> int f2fs_init_bio_entry_cache(void);
> >> void f2fs_destroy_bio_entry_cache(void);
> >> void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
> >> --
> >> 2.40.1
> >>
> >>
> >>
> >> _______________________________________________
> >> Linux-f2fs-devel mailing list
> >> [email protected]
> >> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
On 2024/1/12 1:15, Daeho Jeong wrote:
> On Wed, Jan 10, 2024 at 5:33 PM Chao Yu <[email protected]> wrote:
>>
>> On 2024/1/11 9:18, Daeho Jeong wrote:
>>> On Thu, Dec 28, 2023 at 6:33 AM Chao Yu <[email protected]> wrote:
>>>>
>>>> From: Sheng Yong <[email protected]>
>>>>
>>>> Compressed cluster may not be released due to we can fail in
>>>> release_compress_blocks(), fix to handle reserved compressed
>>>> cluster correctly in reserve_compress_blocks().
>>>>
>>>> Fixes: 4c8ff7095bef ("f2fs: support data compression")
>>>> Signed-off-by: Sheng Yong <[email protected]>
>>>> Signed-off-by: Chao Yu <[email protected]>
>>>> ---
>>>> fs/f2fs/file.c | 12 ++++++++++++
>>>> 1 file changed, 12 insertions(+)
>>>>
>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>> index 026d05a7edd8..782ae3be48f6 100644
>>>> --- a/fs/f2fs/file.c
>>>> +++ b/fs/f2fs/file.c
>>>> @@ -3624,6 +3624,15 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
>>>> goto next;
>>>> }
>>>>
>>>> + /*
>>>> + * compressed cluster was not released due to
>>>> + * it fails in release_compress_blocks().
>>>> + */
>>>> + if (blkaddr == NEW_ADDR) {
>>>> + compr_blocks++;
>>>> + continue;
>>>> + }
>>>> +
>>>> if (__is_valid_data_blkaddr(blkaddr)) {
>>>> compr_blocks++;
>>>> continue;
>>>
>>> How about merging two conditions like "blkaddr == NEW_ADDR ||
>>> __is_valid_data_blkaddr(blkaddr)"?
>>
>> Oh, sure.
>>
>>>
>>>> @@ -3633,6 +3642,9 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
>>>> }
>>>>
>>>> reserved = cluster_size - compr_blocks;
>>>> + if (!reserved)
>>>> + goto next;
>>>> +
>>>
>>> How can the reserved variable be zero?
>>
>> I guess it can happen if a cluster was not released during
>> release_compress_blocks(), then all blocks in the cluster should
>> has been reserved, so, in this round of reserving, it needs to skip
>> reserve blocks, right?
>
> Let's assume cluster_size is 4. How can compr_blocks be 4?
>
> if (i == 0) {
> if (blkaddr == COMPRESS_ADDR)
> continue;
> dn->ofs_in_node += cluster_size;
> goto next;
> }
>
> We skip the block having COMPRESS_ADDR when counting compr_blocks.
> So, the maximum value of compr_blocks should be 3, right?
Ah, got it, and I think you're right.
Should fix the condition as below?
/* for the case all blocks in cluster were reserved */
if (reserved == 1)
goto next;
Thanks,
>
>>
>> Thanks,
>>
>>>
>>>> ret = inc_valid_block_count(sbi, dn->inode, &reserved);
>>>> if (ret)
>>>> return ret;
>>>> --
>>>> 2.40.1
>>>>
>>>>
>>>>
>>>> _______________________________________________
>>>> Linux-f2fs-devel mailing list
>>>> [email protected]
>>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
Reviewed-by: Daeho Jeong <[email protected]>
On Thu, Jan 11, 2024 at 9:17 AM Daeho Jeong <[email protected]> wrote:
>
> On Wed, Jan 10, 2024 at 5:26 PM Chao Yu <[email protected]> wrote:
> >
> > On 2024/1/11 8:55, Daeho Jeong wrote:
> > > On Thu, Dec 28, 2023 at 6:33 AM Chao Yu <[email protected]> wrote:
> > >>
> > >> If data block in compressed cluster is not persisted with metadata
> > >> during checkpoint, after SPOR, the data may be corrupted, let's
> > >> guarantee to write compressed page by checkpoint.
> > >>
> > >> Fixes: 4c8ff7095bef ("f2fs: support data compression")
> > >> Signed-off-by: Chao Yu <[email protected]>
> > >> ---
> > >> v3:
> > >> - treat compressed page as CP guaranteed data explictly.
> > >> fs/f2fs/compress.c | 4 +++-
> > >> fs/f2fs/data.c | 17 +++++++++--------
> > >> fs/f2fs/f2fs.h | 4 +++-
> > >> 3 files changed, 15 insertions(+), 10 deletions(-)
> > >>
> > >> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> > >> index c5a4364c4482..9940b7886e5d 100644
> > >> --- a/fs/f2fs/compress.c
> > >> +++ b/fs/f2fs/compress.c
> > >> @@ -1418,6 +1418,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
> > >> struct f2fs_sb_info *sbi = bio->bi_private;
> > >> struct compress_io_ctx *cic =
> > >> (struct compress_io_ctx *)page_private(page);
> > >> + enum count_type type = WB_DATA_TYPE(page,
> > >> + f2fs_is_compressed_page(page));
> > >> int i;
> > >>
> > >> if (unlikely(bio->bi_status))
> > >> @@ -1425,7 +1427,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
> > >>
> > >> f2fs_compress_free_page(page);
> > >>
> > >> - dec_page_count(sbi, F2FS_WB_DATA);
> > >> + dec_page_count(sbi, type);
> > >>
> > >> if (atomic_dec_return(&cic->pending_pages))
> > >> return;
> > >> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > >> index dce8defdf4c7..81f9e2cc49e2 100644
> > >> --- a/fs/f2fs/data.c
> > >> +++ b/fs/f2fs/data.c
> > >> @@ -48,7 +48,7 @@ void f2fs_destroy_bioset(void)
> > >> bioset_exit(&f2fs_bioset);
> > >> }
> > >>
> > >> -static bool __is_cp_guaranteed(struct page *page)
> > >> +bool f2fs_is_cp_guaranteed(struct page *page)
> > >> {
> > >> struct address_space *mapping = page->mapping;
> > >> struct inode *inode;
> > >> @@ -65,8 +65,6 @@ static bool __is_cp_guaranteed(struct page *page)
> > >> S_ISDIR(inode->i_mode))
> > >> return true;
> > >>
> > >> - if (f2fs_is_compressed_page(page))
> > >> - return false;
> > >
> > > Out of curiosity, why don't we simply change the above to "return true"?
> >
> > Daeho,
> >
> > I used the implementation, please check v1 and related comments
> > from Jaegeuk and me, let me know if that was not clear enough. :)
> >
> > https://lore.kernel.org/linux-f2fs-devel/[email protected]/
>
> Oh, I missed it. Sorry~
>
> >
> > >
> > >> if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
> > >> page_private_gcing(page))
> > >> return true;
> > >> @@ -338,7 +336,7 @@ static void f2fs_write_end_io(struct bio *bio)
> > >>
> > >> bio_for_each_segment_all(bvec, bio, iter_all) {
> > >> struct page *page = bvec->bv_page;
> > >> - enum count_type type = WB_DATA_TYPE(page);
> > >> + enum count_type type = WB_DATA_TYPE(page, false);
> > >>
> > >> if (page_private_dummy(page)) {
> > >> clear_page_private_dummy(page);
> > >> @@ -762,7 +760,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
> > >> wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
> > >>
> > >> inc_page_count(fio->sbi, is_read_io(fio->op) ?
> > >> - __read_io_type(page) : WB_DATA_TYPE(fio->page));
> > >> + __read_io_type(page) : WB_DATA_TYPE(fio->page, false));
> > >>
> > >> if (is_read_io(bio_op(bio)))
> > >> f2fs_submit_read_bio(fio->sbi, bio, fio->type);
> > >> @@ -973,7 +971,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
> > >> if (fio->io_wbc)
> > >> wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
> > >>
> > >> - inc_page_count(fio->sbi, WB_DATA_TYPE(page));
> > >> + inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
> > >>
> > >> *fio->last_block = fio->new_blkaddr;
> > >> *fio->bio = bio;
> > >> @@ -1007,6 +1005,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > >> enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
> > >> struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
> > >> struct page *bio_page;
> > >> + enum count_type type;
> > >>
> > >> f2fs_bug_on(sbi, is_read_io(fio->op));
> > >>
> > >> @@ -1046,7 +1045,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > >> /* set submitted = true as a return value */
> > >> fio->submitted = 1;
> > >>
> > >> - inc_page_count(sbi, WB_DATA_TYPE(bio_page));
> > >> + type = WB_DATA_TYPE(bio_page, fio->compressed_page);
> > >> + inc_page_count(sbi, type);
> > >>
> > >> if (io->bio &&
> > >> (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
> > >> @@ -1059,7 +1059,8 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > >> if (F2FS_IO_ALIGNED(sbi) &&
> > >> (fio->type == DATA || fio->type == NODE) &&
> > >> fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
> > >> - dec_page_count(sbi, WB_DATA_TYPE(bio_page));
> > >> + dec_page_count(sbi, WB_DATA_TYPE(bio_page,
> > >> + fio->compressed_page));
> > >> fio->retry = 1;
> > >> goto skip;
> > >> }
> > >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > >> index 65294e3b0bef..50f3d546ded8 100644
> > >> --- a/fs/f2fs/f2fs.h
> > >> +++ b/fs/f2fs/f2fs.h
> > >> @@ -1080,7 +1080,8 @@ struct f2fs_sm_info {
> > >> * f2fs monitors the number of several block types such as on-writeback,
> > >> * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
> > >> */
> > >> -#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
> > >> +#define WB_DATA_TYPE(p, f) \
> > >> + (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
> > >> enum count_type {
> > >> F2FS_DIRTY_DENTS,
> > >> F2FS_DIRTY_DATA,
> > >> @@ -3804,6 +3805,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi);
> > >> */
> > >> int __init f2fs_init_bioset(void);
> > >> void f2fs_destroy_bioset(void);
> > >> +bool f2fs_is_cp_guaranteed(struct page *page);
> > >> int f2fs_init_bio_entry_cache(void);
> > >> void f2fs_destroy_bio_entry_cache(void);
> > >> void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
> > >> --
> > >> 2.40.1
> > >>
> > >>
> > >>
> > >> _______________________________________________
> > >> Linux-f2fs-devel mailing list
> > >> [email protected]
> > >> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
On Thu, Jan 11, 2024 at 5:06 PM Chao Yu <[email protected]> wrote:
>
> On 2024/1/12 1:15, Daeho Jeong wrote:
> > On Wed, Jan 10, 2024 at 5:33 PM Chao Yu <[email protected]> wrote:
> >>
> >> On 2024/1/11 9:18, Daeho Jeong wrote:
> >>> On Thu, Dec 28, 2023 at 6:33 AM Chao Yu <[email protected]> wrote:
> >>>>
> >>>> From: Sheng Yong <[email protected]>
> >>>>
> >>>> Compressed cluster may not be released due to we can fail in
> >>>> release_compress_blocks(), fix to handle reserved compressed
> >>>> cluster correctly in reserve_compress_blocks().
> >>>>
> >>>> Fixes: 4c8ff7095bef ("f2fs: support data compression")
> >>>> Signed-off-by: Sheng Yong <[email protected]>
> >>>> Signed-off-by: Chao Yu <[email protected]>
> >>>> ---
> >>>> fs/f2fs/file.c | 12 ++++++++++++
> >>>> 1 file changed, 12 insertions(+)
> >>>>
> >>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >>>> index 026d05a7edd8..782ae3be48f6 100644
> >>>> --- a/fs/f2fs/file.c
> >>>> +++ b/fs/f2fs/file.c
> >>>> @@ -3624,6 +3624,15 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
> >>>> goto next;
> >>>> }
> >>>>
> >>>> + /*
> >>>> + * compressed cluster was not released due to
> >>>> + * it fails in release_compress_blocks().
> >>>> + */
> >>>> + if (blkaddr == NEW_ADDR) {
> >>>> + compr_blocks++;
> >>>> + continue;
> >>>> + }
> >>>> +
> >>>> if (__is_valid_data_blkaddr(blkaddr)) {
> >>>> compr_blocks++;
> >>>> continue;
> >>>
> >>> How about merging two conditions like "blkaddr == NEW_ADDR ||
> >>> __is_valid_data_blkaddr(blkaddr)"?
> >>
> >> Oh, sure.
> >>
> >>>
> >>>> @@ -3633,6 +3642,9 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
> >>>> }
> >>>>
> >>>> reserved = cluster_size - compr_blocks;
> >>>> + if (!reserved)
> >>>> + goto next;
> >>>> +
> >>>
> >>> How can the reserved variable be zero?
> >>
> >> I guess it can happen if a cluster was not released during
> >> release_compress_blocks(), then all blocks in the cluster should
> >> has been reserved, so, in this round of reserving, it needs to skip
> >> reserve blocks, right?
> >
> > Let's assume cluster_size is 4. How can compr_blocks be 4?
> >
> > if (i == 0) {
> > if (blkaddr == COMPRESS_ADDR)
> > continue;
> > dn->ofs_in_node += cluster_size;
> > goto next;
> > }
> >
> > We skip the block having COMPRESS_ADDR when counting compr_blocks.
> > So, the maximum value of compr_blocks should be 3, right?
>
> Ah, got it, and I think you're right.
>
> Should fix the condition as below?
>
> /* for the case all blocks in cluster were reserved */
> if (reserved == 1)
> goto next;
It looks good to me.
>
> Thanks,
>
> >
> >>
> >> Thanks,
> >>
> >>>
> >>>> ret = inc_valid_block_count(sbi, dn->inode, &reserved);
> >>>> if (ret)
> >>>> return ret;
> >>>> --
> >>>> 2.40.1
> >>>>
> >>>>
> >>>>
> >>>> _______________________________________________
> >>>> Linux-f2fs-devel mailing list
> >>>> [email protected]
> >>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel