2020-06-18 06:38:51

by Chao Yu

[permalink] [raw]
Subject: [PATCH 1/5] f2fs: fix to wait page writeback before update

to make page content stable for special device like raid.

Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/dir.c | 2 ++
fs/f2fs/extent_cache.c | 18 +++++++++---------
fs/f2fs/f2fs.h | 2 +-
fs/f2fs/file.c | 1 +
fs/f2fs/inline.c | 2 ++
fs/f2fs/inode.c | 3 +--
6 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index d35976785e8c..91e86747a604 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -495,6 +495,8 @@ static int make_empty_dir(struct inode *inode,
if (IS_ERR(dentry_page))
return PTR_ERR(dentry_page);

+ f2fs_bug_on(F2FS_I_SB(inode), PageWriteback(dentry_page));
+
dentry_blk = page_address(dentry_page);

make_dentry_ptr_block(NULL, &d, dentry_blk);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index e60078460ad1..686c68b98610 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
}

/* return true, if inode page is changed */
-static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
struct extent_tree *et;
struct extent_node *en;
struct extent_info ei;
@@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
if (!f2fs_may_extent_tree(inode)) {
/* drop largest extent */
if (i_ext && i_ext->len) {
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
i_ext->len = 0;
- return true;
+ set_page_dirty(ipage);
+ return;
}
- return false;
+ return;
}

et = __grab_extent_tree(inode);

if (!i_ext || !i_ext->len)
- return false;
+ return;

get_extent_info(&ei, i_ext);

@@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
}
out:
write_unlock(&et->lock);
- return false;
}

-bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
{
- bool ret = __f2fs_init_extent_tree(inode, i_ext);
+ __f2fs_init_extent_tree(inode, ipage);

if (!F2FS_I(inode)->extent_tree)
set_inode_flag(inode, FI_NO_EXTENT);
-
- return ret;
}

static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b35a50f4953c..326c12fa0da5 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3795,7 +3795,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
struct rb_root_cached *root);
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
-bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
+void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
void f2fs_drop_extent_tree(struct inode *inode);
unsigned int f2fs_destroy_extent_node(struct inode *inode);
void f2fs_destroy_extent_tree(struct inode *inode);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3268f8dd59bb..1862073b96d2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
f2fs_put_page(psrc, 1);
return PTR_ERR(pdst);
}
+ f2fs_wait_on_page_writeback(pdst, DATA, true, true);
f2fs_copy_page(psrc, pdst);
set_page_dirty(pdst);
f2fs_put_page(pdst, 1);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index dbade310dc79..4bcbc486c9e2 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -340,6 +340,8 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr d;
void *inline_dentry;

+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
+
inline_dentry = inline_data_addr(inode, ipage);

make_dentry_ptr_inline(inode, &d, inline_dentry);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 44582a4db513..7c156eb26dd7 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;

- if (f2fs_init_extent_tree(inode, &ri->i_ext))
- set_page_dirty(node_page);
+ f2fs_init_extent_tree(inode, node_page);

get_inline_info(inode, ri);

--
2.18.0.rc1


2020-06-18 06:38:55

by Chao Yu

[permalink] [raw]
Subject: [PATCH 5/5] f2fs: show more debug info for per-temperature log

- Add to account and show per-log dirty_seg, full_seg and valid_blocks
in debugfs.
- reformat printed info.

TYPE segno secno zoneno dirty_seg full_seg valid_blk
- COLD data: 1523 1523 1523 1 0 399
- WARM data: 769 769 769 20 255 133098
- HOT data: 767 767 767 9 0 167
- Dir dnode: 22 22 22 3 0 70
- File dnode: 722 722 722 14 10 6505
- Indir nodes: 2 2 2 1 0 3

Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/debug.c | 67 ++++++++++++++++++++++++++++++++++++++++---------
fs/f2fs/f2fs.h | 3 +++
2 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0dbcb0f9c019..aa1fd2de11ba 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -174,6 +174,29 @@ static void update_general_status(struct f2fs_sb_info *sbi)
for (i = META_CP; i < META_MAX; i++)
si->meta_count[i] = atomic_read(&sbi->meta_count[i]);

+ for (i = 0; i < NO_CHECK_TYPE; i++) {
+ si->dirty_seg[i] = 0;
+ si->full_seg[i] = 0;
+ si->valid_blks[i] = 0;
+ }
+
+ for (i = 0; i < MAIN_SEGS(sbi); i++) {
+ int blks = get_seg_entry(sbi, i)->valid_blocks;
+ int type = get_seg_entry(sbi, i)->type;
+
+ if (!blks)
+ continue;
+
+ if (IS_CURSEG(sbi, i))
+ continue;
+
+ if (blks == sbi->blocks_per_seg)
+ si->full_seg[type]++;
+ else
+ si->dirty_seg[type]++;
+ si->valid_blks[type] += blks;
+ }
+
for (i = 0; i < 2; i++) {
si->segment_count[i] = sbi->segment_count[i];
si->block_count[i] = sbi->block_count[i];
@@ -329,30 +352,50 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
- seq_printf(s, " - COLD data: %d, %d, %d\n",
+ seq_printf(s, " TYPE %8s %8s %8s %10s %10s %10s\n",
+ "segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk");
+ seq_printf(s, " - COLD data: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_COLD_DATA],
si->cursec[CURSEG_COLD_DATA],
- si->curzone[CURSEG_COLD_DATA]);
- seq_printf(s, " - WARM data: %d, %d, %d\n",
+ si->curzone[CURSEG_COLD_DATA],
+ si->dirty_seg[CURSEG_COLD_DATA],
+ si->full_seg[CURSEG_COLD_DATA],
+ si->valid_blks[CURSEG_COLD_DATA]);
+ seq_printf(s, " - WARM data: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_WARM_DATA],
si->cursec[CURSEG_WARM_DATA],
- si->curzone[CURSEG_WARM_DATA]);
- seq_printf(s, " - HOT data: %d, %d, %d\n",
+ si->curzone[CURSEG_WARM_DATA],
+ si->dirty_seg[CURSEG_WARM_DATA],
+ si->full_seg[CURSEG_WARM_DATA],
+ si->valid_blks[CURSEG_WARM_DATA]);
+ seq_printf(s, " - HOT data: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_HOT_DATA],
si->cursec[CURSEG_HOT_DATA],
- si->curzone[CURSEG_HOT_DATA]);
- seq_printf(s, " - Dir dnode: %d, %d, %d\n",
+ si->curzone[CURSEG_HOT_DATA],
+ si->dirty_seg[CURSEG_HOT_DATA],
+ si->full_seg[CURSEG_HOT_DATA],
+ si->valid_blks[CURSEG_HOT_DATA]);
+ seq_printf(s, " - Dir dnode: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_HOT_NODE],
si->cursec[CURSEG_HOT_NODE],
- si->curzone[CURSEG_HOT_NODE]);
- seq_printf(s, " - File dnode: %d, %d, %d\n",
+ si->curzone[CURSEG_HOT_NODE],
+ si->dirty_seg[CURSEG_HOT_NODE],
+ si->full_seg[CURSEG_HOT_NODE],
+ si->valid_blks[CURSEG_HOT_NODE]);
+ seq_printf(s, " - File dnode: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_WARM_NODE],
si->cursec[CURSEG_WARM_NODE],
- si->curzone[CURSEG_WARM_NODE]);
- seq_printf(s, " - Indir nodes: %d, %d, %d\n",
+ si->curzone[CURSEG_WARM_NODE],
+ si->dirty_seg[CURSEG_WARM_NODE],
+ si->full_seg[CURSEG_WARM_NODE],
+ si->valid_blks[CURSEG_WARM_NODE]);
+ seq_printf(s, " - Indir nodes: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_COLD_NODE],
si->cursec[CURSEG_COLD_NODE],
- si->curzone[CURSEG_COLD_NODE]);
+ si->curzone[CURSEG_COLD_NODE],
+ si->dirty_seg[CURSEG_COLD_NODE],
+ si->full_seg[CURSEG_COLD_NODE],
+ si->valid_blks[CURSEG_COLD_NODE]);
seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
si->main_area_segs - si->dirty_count -
si->prefree_count - si->free_segs,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 72a667f1d678..70565d81320b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3536,6 +3536,9 @@ struct f2fs_stat_info {
int curseg[NR_CURSEG_TYPE];
int cursec[NR_CURSEG_TYPE];
int curzone[NR_CURSEG_TYPE];
+ unsigned int dirty_seg[NR_CURSEG_TYPE];
+ unsigned int full_seg[NR_CURSEG_TYPE];
+ unsigned int valid_blks[NR_CURSEG_TYPE];

unsigned int meta_count[META_MAX];
unsigned int segment_count[2];
--
2.18.0.rc1

2020-06-18 06:38:55

by Chao Yu

[permalink] [raw]
Subject: [PATCH 4/5] f2fs: clean up parameter of f2fs_allocate_data_block()

Use validation of @fio to inidcate whether caller want to serialize IOs
in io.io_list or not, then @add_list will be redundant, remove it.

Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/data.c | 2 +-
fs/f2fs/f2fs.h | 2 +-
fs/f2fs/gc.c | 2 +-
fs/f2fs/segment.c | 6 +++---
4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cbdf062d3562..dfd322515357 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1366,7 +1366,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
old_blkaddr = dn->data_blkaddr;
f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
- &sum, seg_type, NULL, false);
+ &sum, seg_type, NULL);
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b74f0f5fcf3a..72a667f1d678 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3350,7 +3350,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
- struct f2fs_io_info *fio, bool add_list);
+ struct f2fs_io_info *fio);
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type, bool ordered, bool locked);
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 3d27b939627e..bfc4eb2d8038 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -859,7 +859,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
}

f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
- &sum, CURSEG_COLD_DATA, NULL, false);
+ &sum, CURSEG_COLD_DATA, NULL);

fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index cb861ed98ee3..113114f98087 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3089,7 +3089,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
- struct f2fs_io_info *fio, bool add_list)
+ struct f2fs_io_info *fio)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -3157,7 +3157,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
if (F2FS_IO_ALIGNED(sbi))
fio->retry = false;

- if (add_list) {
+ if (fio) {
struct f2fs_bio_info *io;

INIT_LIST_HEAD(&fio->list);
@@ -3206,7 +3206,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
down_read(&fio->sbi->io_order_lock);
reallocate:
f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
- &fio->new_blkaddr, sum, type, fio, true);
+ &fio->new_blkaddr, sum, type, fio);
if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
invalidate_mapping_pages(META_MAPPING(fio->sbi),
fio->old_blkaddr, fio->old_blkaddr);
--
2.18.0.rc1

2020-06-18 06:40:15

by Chao Yu

[permalink] [raw]
Subject: [PATCH 2/5] f2fs: add prefix for exported symbols

to avoid polluting global symbol namespace.

Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/compress.c | 4 ++--
fs/f2fs/data.c | 14 +++++++-------
fs/f2fs/f2fs.h | 4 ++--
fs/f2fs/file.c | 4 ++--
fs/f2fs/gc.c | 2 +-
fs/f2fs/segment.c | 2 +-
6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 1e02a8c106b0..36b51795b0c3 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -949,7 +949,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc,
}

if (prealloc) {
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);

set_new_dnode(&dn, cc->inode, NULL, NULL, 0);

@@ -964,7 +964,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc,
break;
}

- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
}

if (likely(!ret)) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 326c63879ddc..c78ce08f6400 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1426,7 +1426,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
return err;
}

-void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (lock)
@@ -1491,7 +1491,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,

next_dnode:
if (map->m_may_create)
- __do_map_lock(sbi, flag, true);
+ f2fs_do_map_lock(sbi, flag, true);

/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -1640,7 +1640,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
f2fs_put_dnode(&dn);

if (map->m_may_create) {
- __do_map_lock(sbi, flag, false);
+ f2fs_do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
goto next_dnode;
@@ -1666,7 +1666,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
f2fs_put_dnode(&dn);
unlock_out:
if (map->m_may_create) {
- __do_map_lock(sbi, flag, false);
+ f2fs_do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed);
}
out:
@@ -3217,7 +3217,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,

if (f2fs_has_inline_data(inode) ||
(pos & PAGE_MASK) >= i_size_read(inode)) {
- __do_map_lock(sbi, flag, true);
+ f2fs_do_map_lock(sbi, flag, true);
locked = true;
}

@@ -3254,7 +3254,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err || dn.data_blkaddr == NULL_ADDR) {
f2fs_put_dnode(&dn);
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
+ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
true);
WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
locked = true;
@@ -3270,7 +3270,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
f2fs_put_dnode(&dn);
unlock_out:
if (locked)
- __do_map_lock(sbi, flag, false);
+ f2fs_do_map_lock(sbi, flag, false);
return err;
}

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 326c12fa0da5..b74f0f5fcf3a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3325,7 +3325,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
-void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end);
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
@@ -3448,7 +3448,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
struct page *f2fs_get_new_data_page(struct inode *inode,
struct page *ipage, pgoff_t index, bool new_i_size);
int f2fs_do_write_data_page(struct f2fs_io_info *fio);
-void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
+void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int create, int flag);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 1862073b96d2..9d95e3bd4dfe 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -105,11 +105,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)

if (need_alloc) {
/* block allocation */
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_get_block(&dn, page->index);
f2fs_put_dnode(&dn);
- __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+ f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
}

#ifdef CONFIG_F2FS_FS_COMPRESSION
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 5b95d5a146eb..3d27b939627e 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1434,7 +1434,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,

/* Move out cursegs from the target range */
for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
- allocate_segment_for_resize(sbi, type, start, end);
+ f2fs_allocate_segment_for_resize(sbi, type, start, end);

/* do GC to move out valid blocks in the range */
for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 196f31503511..5b2a6f865a6d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2674,7 +2674,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
stat_inc_seg_type(sbi, curseg);
}

-void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
--
2.18.0.rc1

2020-06-18 06:40:56

by Chao Yu

[permalink] [raw]
Subject: [PATCH 3/5] f2fs: shrink node_write lock coverage

- to avoid race between checkpoint and quota file writeback, it
just needs to hold read lock of node_write in writeback path.
- node_write lock has covered all LFS data write paths, it's not
necessary, we only need to hold node_write lock at write path of
quota file.

Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/compress.c | 18 +++++++++++++++---
fs/f2fs/data.c | 12 ++++++++++++
fs/f2fs/segment.c | 11 -----------
3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 36b51795b0c3..3ff6c0305ec6 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1096,8 +1096,16 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
loff_t psize;
int i, err;

- if (!IS_NOQUOTA(inode) && !f2fs_trylock_op(sbi))
+ if (IS_NOQUOTA(inode)) {
+ /*
+ * We need to wait for node_write to avoid block allocation during
+ * checkpoint. This can only happen to quota writes which can cause
+ * the below discard race condition.
+ */
+ down_read(&sbi->node_write);
+ } else if (!f2fs_trylock_op(sbi)) {
return -EAGAIN;
+ }

set_new_dnode(&dn, cc->inode, NULL, NULL, 0);

@@ -1203,7 +1211,9 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);

f2fs_put_dnode(&dn);
- if (!IS_NOQUOTA(inode))
+ if (IS_NOQUOTA(inode))
+ up_read(&sbi->node_write);
+ else
f2fs_unlock_op(sbi);

spin_lock(&fi->i_size_lock);
@@ -1230,7 +1240,9 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
out_put_dnode:
f2fs_put_dnode(&dn);
out_unlock_op:
- if (!IS_NOQUOTA(inode))
+ if (IS_NOQUOTA(inode))
+ up_read(&sbi->node_write);
+ else
f2fs_unlock_op(sbi);
return -EAGAIN;
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c78ce08f6400..cbdf062d3562 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2719,8 +2719,20 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,

/* Dentry/quota blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
+ /*
+ * We need to wait for node_write to avoid block allocation during
+ * checkpoint. This can only happen to quota writes which can cause
+ * the below discard race condition.
+ */
+ if (IS_NOQUOTA(inode))
+ down_read(&sbi->node_write);
+
fio.need_lock = LOCK_DONE;
err = f2fs_do_write_data_page(&fio);
+
+ if (IS_NOQUOTA(inode))
+ up_read(&sbi->node_write);
+
goto done;
}

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5b2a6f865a6d..cb861ed98ee3 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3107,14 +3107,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
type = CURSEG_COLD_DATA;
}

- /*
- * We need to wait for node_write to avoid block allocation during
- * checkpoint. This can only happen to quota writes which can cause
- * the below discard race condition.
- */
- if (IS_DATASEG(type))
- down_write(&sbi->node_write);
-
down_read(&SM_I(sbi)->curseg_lock);

mutex_lock(&curseg->curseg_mutex);
@@ -3180,9 +3172,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,

up_read(&SM_I(sbi)->curseg_lock);

- if (IS_DATASEG(type))
- up_write(&sbi->node_write);
-
if (put_pin_sem)
up_read(&sbi->pin_sem);
}
--
2.18.0.rc1

2020-06-19 00:03:33

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 1/5] f2fs: fix to wait page writeback before update

Hi Chao,

On 06/18, Chao Yu wrote:
> to make page content stable for special device like raid.

Could you elaborate the problem a bit?

>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/dir.c | 2 ++
> fs/f2fs/extent_cache.c | 18 +++++++++---------
> fs/f2fs/f2fs.h | 2 +-
> fs/f2fs/file.c | 1 +
> fs/f2fs/inline.c | 2 ++
> fs/f2fs/inode.c | 3 +--
> 6 files changed, 16 insertions(+), 12 deletions(-)
>
> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> index d35976785e8c..91e86747a604 100644
> --- a/fs/f2fs/dir.c
> +++ b/fs/f2fs/dir.c
> @@ -495,6 +495,8 @@ static int make_empty_dir(struct inode *inode,
> if (IS_ERR(dentry_page))
> return PTR_ERR(dentry_page);
>
> + f2fs_bug_on(F2FS_I_SB(inode), PageWriteback(dentry_page));
> +
> dentry_blk = page_address(dentry_page);
>
> make_dentry_ptr_block(NULL, &d, dentry_blk);
> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
> index e60078460ad1..686c68b98610 100644
> --- a/fs/f2fs/extent_cache.c
> +++ b/fs/f2fs/extent_cache.c
> @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
> }
>
> /* return true, if inode page is changed */
> -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
> +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
> {
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
> struct extent_tree *et;
> struct extent_node *en;
> struct extent_info ei;
> @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
> if (!f2fs_may_extent_tree(inode)) {
> /* drop largest extent */
> if (i_ext && i_ext->len) {
> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
> i_ext->len = 0;
> - return true;
> + set_page_dirty(ipage);
> + return;
> }
> - return false;
> + return;
> }
>
> et = __grab_extent_tree(inode);
>
> if (!i_ext || !i_ext->len)
> - return false;
> + return;
>
> get_extent_info(&ei, i_ext);
>
> @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
> }
> out:
> write_unlock(&et->lock);
> - return false;
> }
>
> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
> {
> - bool ret = __f2fs_init_extent_tree(inode, i_ext);
> + __f2fs_init_extent_tree(inode, ipage);
>
> if (!F2FS_I(inode)->extent_tree)
> set_inode_flag(inode, FI_NO_EXTENT);
> -
> - return ret;
> }
>
> static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index b35a50f4953c..326c12fa0da5 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3795,7 +3795,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
> bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
> struct rb_root_cached *root);
> unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
> void f2fs_drop_extent_tree(struct inode *inode);
> unsigned int f2fs_destroy_extent_node(struct inode *inode);
> void f2fs_destroy_extent_tree(struct inode *inode);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 3268f8dd59bb..1862073b96d2 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
> f2fs_put_page(psrc, 1);
> return PTR_ERR(pdst);
> }
> + f2fs_wait_on_page_writeback(pdst, DATA, true, true);
> f2fs_copy_page(psrc, pdst);
> set_page_dirty(pdst);
> f2fs_put_page(pdst, 1);
> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> index dbade310dc79..4bcbc486c9e2 100644
> --- a/fs/f2fs/inline.c
> +++ b/fs/f2fs/inline.c
> @@ -340,6 +340,8 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
> struct f2fs_dentry_ptr d;
> void *inline_dentry;
>
> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
> +
> inline_dentry = inline_data_addr(inode, ipage);
>
> make_dentry_ptr_inline(inode, &d, inline_dentry);
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index 44582a4db513..7c156eb26dd7 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
> fi->i_pino = le32_to_cpu(ri->i_pino);
> fi->i_dir_level = ri->i_dir_level;
>
> - if (f2fs_init_extent_tree(inode, &ri->i_ext))
> - set_page_dirty(node_page);
> + f2fs_init_extent_tree(inode, node_page);
>
> get_inline_info(inode, ri);
>
> --
> 2.18.0.rc1

2020-06-19 04:13:52

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH 1/5] f2fs: fix to wait page writeback before update

Hi Jaegeuk,

On 2020/6/19 7:59, Jaegeuk Kim wrote:
> Hi Chao,
>
> On 06/18, Chao Yu wrote:
>> to make page content stable for special device like raid.
>
> Could you elaborate the problem a bit?

Some devices like raid5 wants page content to be stable, because
it will calculate parity info based page content, if page is not
stable, parity info could be corrupted, result in data inconsistency
in stripe.

Thanks,

>
>>
>> Signed-off-by: Chao Yu <[email protected]>
>> ---
>> fs/f2fs/dir.c | 2 ++
>> fs/f2fs/extent_cache.c | 18 +++++++++---------
>> fs/f2fs/f2fs.h | 2 +-
>> fs/f2fs/file.c | 1 +
>> fs/f2fs/inline.c | 2 ++
>> fs/f2fs/inode.c | 3 +--
>> 6 files changed, 16 insertions(+), 12 deletions(-)
>>
>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
>> index d35976785e8c..91e86747a604 100644
>> --- a/fs/f2fs/dir.c
>> +++ b/fs/f2fs/dir.c
>> @@ -495,6 +495,8 @@ static int make_empty_dir(struct inode *inode,
>> if (IS_ERR(dentry_page))
>> return PTR_ERR(dentry_page);
>>
>> + f2fs_bug_on(F2FS_I_SB(inode), PageWriteback(dentry_page));
>> +
>> dentry_blk = page_address(dentry_page);
>>
>> make_dentry_ptr_block(NULL, &d, dentry_blk);
>> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
>> index e60078460ad1..686c68b98610 100644
>> --- a/fs/f2fs/extent_cache.c
>> +++ b/fs/f2fs/extent_cache.c
>> @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
>> }
>>
>> /* return true, if inode page is changed */
>> -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
>> +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
>> {
>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>> + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
>> struct extent_tree *et;
>> struct extent_node *en;
>> struct extent_info ei;
>> @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
>> if (!f2fs_may_extent_tree(inode)) {
>> /* drop largest extent */
>> if (i_ext && i_ext->len) {
>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
>> i_ext->len = 0;
>> - return true;
>> + set_page_dirty(ipage);
>> + return;
>> }
>> - return false;
>> + return;
>> }
>>
>> et = __grab_extent_tree(inode);
>>
>> if (!i_ext || !i_ext->len)
>> - return false;
>> + return;
>>
>> get_extent_info(&ei, i_ext);
>>
>> @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
>> }
>> out:
>> write_unlock(&et->lock);
>> - return false;
>> }
>>
>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
>> {
>> - bool ret = __f2fs_init_extent_tree(inode, i_ext);
>> + __f2fs_init_extent_tree(inode, ipage);
>>
>> if (!F2FS_I(inode)->extent_tree)
>> set_inode_flag(inode, FI_NO_EXTENT);
>> -
>> - return ret;
>> }
>>
>> static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index b35a50f4953c..326c12fa0da5 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -3795,7 +3795,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
>> bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
>> struct rb_root_cached *root);
>> unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
>> void f2fs_drop_extent_tree(struct inode *inode);
>> unsigned int f2fs_destroy_extent_node(struct inode *inode);
>> void f2fs_destroy_extent_tree(struct inode *inode);
>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>> index 3268f8dd59bb..1862073b96d2 100644
>> --- a/fs/f2fs/file.c
>> +++ b/fs/f2fs/file.c
>> @@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
>> f2fs_put_page(psrc, 1);
>> return PTR_ERR(pdst);
>> }
>> + f2fs_wait_on_page_writeback(pdst, DATA, true, true);
>> f2fs_copy_page(psrc, pdst);
>> set_page_dirty(pdst);
>> f2fs_put_page(pdst, 1);
>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
>> index dbade310dc79..4bcbc486c9e2 100644
>> --- a/fs/f2fs/inline.c
>> +++ b/fs/f2fs/inline.c
>> @@ -340,6 +340,8 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
>> struct f2fs_dentry_ptr d;
>> void *inline_dentry;
>>
>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
>> +
>> inline_dentry = inline_data_addr(inode, ipage);
>>
>> make_dentry_ptr_inline(inode, &d, inline_dentry);
>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>> index 44582a4db513..7c156eb26dd7 100644
>> --- a/fs/f2fs/inode.c
>> +++ b/fs/f2fs/inode.c
>> @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
>> fi->i_pino = le32_to_cpu(ri->i_pino);
>> fi->i_dir_level = ri->i_dir_level;
>>
>> - if (f2fs_init_extent_tree(inode, &ri->i_ext))
>> - set_page_dirty(node_page);
>> + f2fs_init_extent_tree(inode, node_page);
>>
>> get_inline_info(inode, ri);
>>
>> --
>> 2.18.0.rc1
> .
>

2020-06-19 06:01:55

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 3/5] f2fs: shrink node_write lock coverage

On 06/18, Chao Yu wrote:
> - to avoid race between checkpoint and quota file writeback, it
> just needs to hold read lock of node_write in writeback path.
> - node_write lock has covered all LFS data write paths, it's not
> necessary, we only need to hold node_write lock at write path of
> quota file.

I've added this:

This refactors commit ca7f76e68074 ("f2fs: fix wrong discard space").

>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/compress.c | 18 +++++++++++++++---
> fs/f2fs/data.c | 12 ++++++++++++
> fs/f2fs/segment.c | 11 -----------
> 3 files changed, 27 insertions(+), 14 deletions(-)
>
> diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
> index 36b51795b0c3..3ff6c0305ec6 100644
> --- a/fs/f2fs/compress.c
> +++ b/fs/f2fs/compress.c
> @@ -1096,8 +1096,16 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
> loff_t psize;
> int i, err;
>
> - if (!IS_NOQUOTA(inode) && !f2fs_trylock_op(sbi))
> + if (IS_NOQUOTA(inode)) {
> + /*
> + * We need to wait for node_write to avoid block allocation during
> + * checkpoint. This can only happen to quota writes which can cause
> + * the below discard race condition.
> + */
> + down_read(&sbi->node_write);
> + } else if (!f2fs_trylock_op(sbi)) {
> return -EAGAIN;
> + }
>
> set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
>
> @@ -1203,7 +1211,9 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
> set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
>
> f2fs_put_dnode(&dn);
> - if (!IS_NOQUOTA(inode))
> + if (IS_NOQUOTA(inode))
> + up_read(&sbi->node_write);
> + else
> f2fs_unlock_op(sbi);
>
> spin_lock(&fi->i_size_lock);
> @@ -1230,7 +1240,9 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
> out_put_dnode:
> f2fs_put_dnode(&dn);
> out_unlock_op:
> - if (!IS_NOQUOTA(inode))
> + if (IS_NOQUOTA(inode))
> + up_read(&sbi->node_write);
> + else
> f2fs_unlock_op(sbi);
> return -EAGAIN;
> }
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index c78ce08f6400..cbdf062d3562 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2719,8 +2719,20 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
>
> /* Dentry/quota blocks are controlled by checkpoint */
> if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
> + /*
> + * We need to wait for node_write to avoid block allocation during
> + * checkpoint. This can only happen to quota writes which can cause
> + * the below discard race condition.
> + */
> + if (IS_NOQUOTA(inode))
> + down_read(&sbi->node_write);
> +
> fio.need_lock = LOCK_DONE;
> err = f2fs_do_write_data_page(&fio);
> +
> + if (IS_NOQUOTA(inode))
> + up_read(&sbi->node_write);
> +
> goto done;
> }
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 5b2a6f865a6d..cb861ed98ee3 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3107,14 +3107,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> type = CURSEG_COLD_DATA;
> }
>
> - /*
> - * We need to wait for node_write to avoid block allocation during
> - * checkpoint. This can only happen to quota writes which can cause
> - * the below discard race condition.
> - */
> - if (IS_DATASEG(type))
> - down_write(&sbi->node_write);
> -
> down_read(&SM_I(sbi)->curseg_lock);
>
> mutex_lock(&curseg->curseg_mutex);
> @@ -3180,9 +3172,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>
> up_read(&SM_I(sbi)->curseg_lock);
>
> - if (IS_DATASEG(type))
> - up_write(&sbi->node_write);
> -
> if (put_pin_sem)
> up_read(&sbi->pin_sem);
> }
> --
> 2.18.0.rc1

2020-06-19 06:02:43

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 1/5] f2fs: fix to wait page writeback before update

On 06/19, Chao Yu wrote:
> Hi Jaegeuk,
>
> On 2020/6/19 7:59, Jaegeuk Kim wrote:
> > Hi Chao,
> >
> > On 06/18, Chao Yu wrote:
> >> to make page content stable for special device like raid.
> >
> > Could you elaborate the problem a bit?
>
> Some devices like raid5 wants page content to be stable, because
> it will calculate parity info based page content, if page is not
> stable, parity info could be corrupted, result in data inconsistency
> in stripe.

I don't get the point, since those pages are brand new pages which were not
modified before. If it's on writeback, we should not modify them regardless
of whatever raid configuration. For example, f2fs_new_node_page() waits for
writeback. Am I missing something?

>
> Thanks,
>
> >
> >>
> >> Signed-off-by: Chao Yu <[email protected]>
> >> ---
> >> fs/f2fs/dir.c | 2 ++
> >> fs/f2fs/extent_cache.c | 18 +++++++++---------
> >> fs/f2fs/f2fs.h | 2 +-
> >> fs/f2fs/file.c | 1 +
> >> fs/f2fs/inline.c | 2 ++
> >> fs/f2fs/inode.c | 3 +--
> >> 6 files changed, 16 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> >> index d35976785e8c..91e86747a604 100644
> >> --- a/fs/f2fs/dir.c
> >> +++ b/fs/f2fs/dir.c
> >> @@ -495,6 +495,8 @@ static int make_empty_dir(struct inode *inode,
> >> if (IS_ERR(dentry_page))
> >> return PTR_ERR(dentry_page);
> >>
> >> + f2fs_bug_on(F2FS_I_SB(inode), PageWriteback(dentry_page));
> >> +
> >> dentry_blk = page_address(dentry_page);
> >>
> >> make_dentry_ptr_block(NULL, &d, dentry_blk);
> >> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
> >> index e60078460ad1..686c68b98610 100644
> >> --- a/fs/f2fs/extent_cache.c
> >> +++ b/fs/f2fs/extent_cache.c
> >> @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
> >> }
> >>
> >> /* return true, if inode page is changed */
> >> -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
> >> +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
> >> {
> >> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >> + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
> >> struct extent_tree *et;
> >> struct extent_node *en;
> >> struct extent_info ei;
> >> @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
> >> if (!f2fs_may_extent_tree(inode)) {
> >> /* drop largest extent */
> >> if (i_ext && i_ext->len) {
> >> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
> >> i_ext->len = 0;
> >> - return true;
> >> + set_page_dirty(ipage);
> >> + return;
> >> }
> >> - return false;
> >> + return;
> >> }
> >>
> >> et = __grab_extent_tree(inode);
> >>
> >> if (!i_ext || !i_ext->len)
> >> - return false;
> >> + return;
> >>
> >> get_extent_info(&ei, i_ext);
> >>
> >> @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
> >> }
> >> out:
> >> write_unlock(&et->lock);
> >> - return false;
> >> }
> >>
> >> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
> >> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
> >> {
> >> - bool ret = __f2fs_init_extent_tree(inode, i_ext);
> >> + __f2fs_init_extent_tree(inode, ipage);
> >>
> >> if (!F2FS_I(inode)->extent_tree)
> >> set_inode_flag(inode, FI_NO_EXTENT);
> >> -
> >> - return ret;
> >> }
> >>
> >> static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> >> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >> index b35a50f4953c..326c12fa0da5 100644
> >> --- a/fs/f2fs/f2fs.h
> >> +++ b/fs/f2fs/f2fs.h
> >> @@ -3795,7 +3795,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
> >> bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
> >> struct rb_root_cached *root);
> >> unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
> >> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
> >> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
> >> void f2fs_drop_extent_tree(struct inode *inode);
> >> unsigned int f2fs_destroy_extent_node(struct inode *inode);
> >> void f2fs_destroy_extent_tree(struct inode *inode);
> >> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >> index 3268f8dd59bb..1862073b96d2 100644
> >> --- a/fs/f2fs/file.c
> >> +++ b/fs/f2fs/file.c
> >> @@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
> >> f2fs_put_page(psrc, 1);
> >> return PTR_ERR(pdst);
> >> }
> >> + f2fs_wait_on_page_writeback(pdst, DATA, true, true);
> >> f2fs_copy_page(psrc, pdst);
> >> set_page_dirty(pdst);
> >> f2fs_put_page(pdst, 1);
> >> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> >> index dbade310dc79..4bcbc486c9e2 100644
> >> --- a/fs/f2fs/inline.c
> >> +++ b/fs/f2fs/inline.c
> >> @@ -340,6 +340,8 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
> >> struct f2fs_dentry_ptr d;
> >> void *inline_dentry;
> >>
> >> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
> >> +
> >> inline_dentry = inline_data_addr(inode, ipage);
> >>
> >> make_dentry_ptr_inline(inode, &d, inline_dentry);
> >> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> >> index 44582a4db513..7c156eb26dd7 100644
> >> --- a/fs/f2fs/inode.c
> >> +++ b/fs/f2fs/inode.c
> >> @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
> >> fi->i_pino = le32_to_cpu(ri->i_pino);
> >> fi->i_dir_level = ri->i_dir_level;
> >>
> >> - if (f2fs_init_extent_tree(inode, &ri->i_ext))
> >> - set_page_dirty(node_page);
> >> + f2fs_init_extent_tree(inode, node_page);
> >>
> >> get_inline_info(inode, ri);
> >>
> >> --
> >> 2.18.0.rc1
> > .
> >

2020-06-19 11:14:14

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH 1/5] f2fs: fix to wait page writeback before update

On 2020/6/19 13:49, Jaegeuk Kim wrote:
> On 06/19, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2020/6/19 7:59, Jaegeuk Kim wrote:
>>> Hi Chao,
>>>
>>> On 06/18, Chao Yu wrote:
>>>> to make page content stable for special device like raid.
>>>
>>> Could you elaborate the problem a bit?
>>
>> Some devices like raid5 wants page content to be stable, because
>> it will calculate parity info based page content, if page is not
>> stable, parity info could be corrupted, result in data inconsistency
>> in stripe.
>
> I don't get the point, since those pages are brand new pages which were not
> modified before. If it's on writeback, we should not modify them regardless
> of whatever raid configuration. For example, f2fs_new_node_page() waits for
> writeback. Am I missing something?

I think we should use f2fs_bug_on(, PageWriteback()) rather than
f2fs_wait_on_page_writeback() for brand new page which is allocated just now.
For other paths, we can keep rule that waiting for writeback before updating.

How do you think?

Thanks,

>
>>
>> Thanks,
>>
>>>
>>>>
>>>> Signed-off-by: Chao Yu <[email protected]>
>>>> ---
>>>> fs/f2fs/dir.c | 2 ++
>>>> fs/f2fs/extent_cache.c | 18 +++++++++---------
>>>> fs/f2fs/f2fs.h | 2 +-
>>>> fs/f2fs/file.c | 1 +
>>>> fs/f2fs/inline.c | 2 ++
>>>> fs/f2fs/inode.c | 3 +--
>>>> 6 files changed, 16 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
>>>> index d35976785e8c..91e86747a604 100644
>>>> --- a/fs/f2fs/dir.c
>>>> +++ b/fs/f2fs/dir.c
>>>> @@ -495,6 +495,8 @@ static int make_empty_dir(struct inode *inode,
>>>> if (IS_ERR(dentry_page))
>>>> return PTR_ERR(dentry_page);
>>>>
>>>> + f2fs_bug_on(F2FS_I_SB(inode), PageWriteback(dentry_page));
>>>> +
>>>> dentry_blk = page_address(dentry_page);
>>>>
>>>> make_dentry_ptr_block(NULL, &d, dentry_blk);
>>>> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
>>>> index e60078460ad1..686c68b98610 100644
>>>> --- a/fs/f2fs/extent_cache.c
>>>> +++ b/fs/f2fs/extent_cache.c
>>>> @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
>>>> }
>>>>
>>>> /* return true, if inode page is changed */
>>>> -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
>>>> +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
>>>> {
>>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>> + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
>>>> struct extent_tree *et;
>>>> struct extent_node *en;
>>>> struct extent_info ei;
>>>> @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
>>>> if (!f2fs_may_extent_tree(inode)) {
>>>> /* drop largest extent */
>>>> if (i_ext && i_ext->len) {
>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
>>>> i_ext->len = 0;
>>>> - return true;
>>>> + set_page_dirty(ipage);
>>>> + return;
>>>> }
>>>> - return false;
>>>> + return;
>>>> }
>>>>
>>>> et = __grab_extent_tree(inode);
>>>>
>>>> if (!i_ext || !i_ext->len)
>>>> - return false;
>>>> + return;
>>>>
>>>> get_extent_info(&ei, i_ext);
>>>>
>>>> @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
>>>> }
>>>> out:
>>>> write_unlock(&et->lock);
>>>> - return false;
>>>> }
>>>>
>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
>>>> {
>>>> - bool ret = __f2fs_init_extent_tree(inode, i_ext);
>>>> + __f2fs_init_extent_tree(inode, ipage);
>>>>
>>>> if (!F2FS_I(inode)->extent_tree)
>>>> set_inode_flag(inode, FI_NO_EXTENT);
>>>> -
>>>> - return ret;
>>>> }
>>>>
>>>> static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index b35a50f4953c..326c12fa0da5 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -3795,7 +3795,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
>>>> bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
>>>> struct rb_root_cached *root);
>>>> unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
>>>> void f2fs_drop_extent_tree(struct inode *inode);
>>>> unsigned int f2fs_destroy_extent_node(struct inode *inode);
>>>> void f2fs_destroy_extent_tree(struct inode *inode);
>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>> index 3268f8dd59bb..1862073b96d2 100644
>>>> --- a/fs/f2fs/file.c
>>>> +++ b/fs/f2fs/file.c
>>>> @@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
>>>> f2fs_put_page(psrc, 1);
>>>> return PTR_ERR(pdst);
>>>> }
>>>> + f2fs_wait_on_page_writeback(pdst, DATA, true, true);
>>>> f2fs_copy_page(psrc, pdst);
>>>> set_page_dirty(pdst);
>>>> f2fs_put_page(pdst, 1);
>>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
>>>> index dbade310dc79..4bcbc486c9e2 100644
>>>> --- a/fs/f2fs/inline.c
>>>> +++ b/fs/f2fs/inline.c
>>>> @@ -340,6 +340,8 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
>>>> struct f2fs_dentry_ptr d;
>>>> void *inline_dentry;
>>>>
>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
>>>> +
>>>> inline_dentry = inline_data_addr(inode, ipage);
>>>>
>>>> make_dentry_ptr_inline(inode, &d, inline_dentry);
>>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>>>> index 44582a4db513..7c156eb26dd7 100644
>>>> --- a/fs/f2fs/inode.c
>>>> +++ b/fs/f2fs/inode.c
>>>> @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
>>>> fi->i_pino = le32_to_cpu(ri->i_pino);
>>>> fi->i_dir_level = ri->i_dir_level;
>>>>
>>>> - if (f2fs_init_extent_tree(inode, &ri->i_ext))
>>>> - set_page_dirty(node_page);
>>>> + f2fs_init_extent_tree(inode, node_page);
>>>>
>>>> get_inline_info(inode, ri);
>>>>
>>>> --
>>>> 2.18.0.rc1
>>> .
>>>
> .
>

2020-06-20 04:55:00

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 1/5] f2fs: fix to wait page writeback before update

On 06/19, Chao Yu wrote:
> On 2020/6/19 13:49, Jaegeuk Kim wrote:
> > On 06/19, Chao Yu wrote:
> >> Hi Jaegeuk,
> >>
> >> On 2020/6/19 7:59, Jaegeuk Kim wrote:
> >>> Hi Chao,
> >>>
> >>> On 06/18, Chao Yu wrote:
> >>>> to make page content stable for special device like raid.
> >>>
> >>> Could you elaborate the problem a bit?
> >>
> >> Some devices like raid5 wants page content to be stable, because
> >> it will calculate parity info based page content, if page is not
> >> stable, parity info could be corrupted, result in data inconsistency
> >> in stripe.
> >
> > I don't get the point, since those pages are brand new pages which were not
> > modified before. If it's on writeback, we should not modify them regardless
> > of whatever raid configuration. For example, f2fs_new_node_page() waits for
> > writeback. Am I missing something?
>
> I think we should use f2fs_bug_on(, PageWriteback()) rather than
> f2fs_wait_on_page_writeback() for brand new page which is allocated just now.
> For other paths, we can keep rule that waiting for writeback before updating.
>
> How do you think?
>
> Thanks,
>
> >
> >>
> >> Thanks,
> >>
> >>>
> >>>>
> >>>> Signed-off-by: Chao Yu <[email protected]>
> >>>> ---
> >>>> fs/f2fs/dir.c | 2 ++
> >>>> fs/f2fs/extent_cache.c | 18 +++++++++---------
> >>>> fs/f2fs/f2fs.h | 2 +-
> >>>> fs/f2fs/file.c | 1 +
> >>>> fs/f2fs/inline.c | 2 ++
> >>>> fs/f2fs/inode.c | 3 +--
> >>>> 6 files changed, 16 insertions(+), 12 deletions(-)
> >>>>
> >>>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> >>>> index d35976785e8c..91e86747a604 100644
> >>>> --- a/fs/f2fs/dir.c
> >>>> +++ b/fs/f2fs/dir.c
> >>>> @@ -495,6 +495,8 @@ static int make_empty_dir(struct inode *inode,
> >>>> if (IS_ERR(dentry_page))
> >>>> return PTR_ERR(dentry_page);
> >>>>
> >>>> + f2fs_bug_on(F2FS_I_SB(inode), PageWriteback(dentry_page));
> >>>> +
> >>>> dentry_blk = page_address(dentry_page);
> >>>>
> >>>> make_dentry_ptr_block(NULL, &d, dentry_blk);
> >>>> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
> >>>> index e60078460ad1..686c68b98610 100644
> >>>> --- a/fs/f2fs/extent_cache.c
> >>>> +++ b/fs/f2fs/extent_cache.c
> >>>> @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
> >>>> }
> >>>>
> >>>> /* return true, if inode page is changed */
> >>>> -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
> >>>> +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
> >>>> {
> >>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>> + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
> >>>> struct extent_tree *et;
> >>>> struct extent_node *en;
> >>>> struct extent_info ei;
> >>>> @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
> >>>> if (!f2fs_may_extent_tree(inode)) {
> >>>> /* drop largest extent */
> >>>> if (i_ext && i_ext->len) {
> >>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
> >>>> i_ext->len = 0;
> >>>> - return true;
> >>>> + set_page_dirty(ipage);
> >>>> + return;
> >>>> }
> >>>> - return false;
> >>>> + return;
> >>>> }
> >>>>
> >>>> et = __grab_extent_tree(inode);
> >>>>
> >>>> if (!i_ext || !i_ext->len)
> >>>> - return false;
> >>>> + return;
> >>>>
> >>>> get_extent_info(&ei, i_ext);
> >>>>
> >>>> @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
> >>>> }
> >>>> out:
> >>>> write_unlock(&et->lock);
> >>>> - return false;
> >>>> }
> >>>>
> >>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
> >>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
> >>>> {
> >>>> - bool ret = __f2fs_init_extent_tree(inode, i_ext);
> >>>> + __f2fs_init_extent_tree(inode, ipage);
> >>>>
> >>>> if (!F2FS_I(inode)->extent_tree)
> >>>> set_inode_flag(inode, FI_NO_EXTENT);
> >>>> -
> >>>> - return ret;
> >>>> }
> >>>>
> >>>> static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> >>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>> index b35a50f4953c..326c12fa0da5 100644
> >>>> --- a/fs/f2fs/f2fs.h
> >>>> +++ b/fs/f2fs/f2fs.h
> >>>> @@ -3795,7 +3795,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
> >>>> bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
> >>>> struct rb_root_cached *root);
> >>>> unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
> >>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
> >>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
> >>>> void f2fs_drop_extent_tree(struct inode *inode);
> >>>> unsigned int f2fs_destroy_extent_node(struct inode *inode);
> >>>> void f2fs_destroy_extent_tree(struct inode *inode);
> >>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >>>> index 3268f8dd59bb..1862073b96d2 100644
> >>>> --- a/fs/f2fs/file.c
> >>>> +++ b/fs/f2fs/file.c
> >>>> @@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
> >>>> f2fs_put_page(psrc, 1);
> >>>> return PTR_ERR(pdst);
> >>>> }
> >>>> + f2fs_wait_on_page_writeback(pdst, DATA, true, true);

Do you mean pdst can be under writeback?

> >>>> f2fs_copy_page(psrc, pdst);
> >>>> set_page_dirty(pdst);
> >>>> f2fs_put_page(pdst, 1);
> >>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> >>>> index dbade310dc79..4bcbc486c9e2 100644
> >>>> --- a/fs/f2fs/inline.c
> >>>> +++ b/fs/f2fs/inline.c
> >>>> @@ -340,6 +340,8 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
> >>>> struct f2fs_dentry_ptr d;
> >>>> void *inline_dentry;
> >>>>
> >>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
> >>>> +
> >>>> inline_dentry = inline_data_addr(inode, ipage);
> >>>>
> >>>> make_dentry_ptr_inline(inode, &d, inline_dentry);
> >>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> >>>> index 44582a4db513..7c156eb26dd7 100644
> >>>> --- a/fs/f2fs/inode.c
> >>>> +++ b/fs/f2fs/inode.c
> >>>> @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
> >>>> fi->i_pino = le32_to_cpu(ri->i_pino);
> >>>> fi->i_dir_level = ri->i_dir_level;
> >>>>
> >>>> - if (f2fs_init_extent_tree(inode, &ri->i_ext))
> >>>> - set_page_dirty(node_page);
> >>>> + f2fs_init_extent_tree(inode, node_page);
> >>>>
> >>>> get_inline_info(inode, ri);
> >>>>
> >>>> --
> >>>> 2.18.0.rc1
> >>> .
> >>>
> > .
> >

2020-06-20 05:01:17

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH 1/5] f2fs: fix to wait page writeback before update

On 2020/6/20 6:47, Jaegeuk Kim wrote:
> On 06/19, Chao Yu wrote:
>> On 2020/6/19 13:49, Jaegeuk Kim wrote:
>>> On 06/19, Chao Yu wrote:
>>>> Hi Jaegeuk,
>>>>
>>>> On 2020/6/19 7:59, Jaegeuk Kim wrote:
>>>>> Hi Chao,
>>>>>
>>>>> On 06/18, Chao Yu wrote:
>>>>>> to make page content stable for special device like raid.
>>>>>
>>>>> Could you elaborate the problem a bit?
>>>>
>>>> Some devices like raid5 wants page content to be stable, because
>>>> it will calculate parity info based page content, if page is not
>>>> stable, parity info could be corrupted, result in data inconsistency
>>>> in stripe.
>>>
>>> I don't get the point, since those pages are brand new pages which were not
>>> modified before. If it's on writeback, we should not modify them regardless
>>> of whatever raid configuration. For example, f2fs_new_node_page() waits for
>>> writeback. Am I missing something?
>>
>> I think we should use f2fs_bug_on(, PageWriteback()) rather than
>> f2fs_wait_on_page_writeback() for brand new page which is allocated just now.
>> For other paths, we can keep rule that waiting for writeback before updating.
>>
>> How do you think?
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>> ---
>>>>>> fs/f2fs/dir.c | 2 ++
>>>>>> fs/f2fs/extent_cache.c | 18 +++++++++---------
>>>>>> fs/f2fs/f2fs.h | 2 +-
>>>>>> fs/f2fs/file.c | 1 +
>>>>>> fs/f2fs/inline.c | 2 ++
>>>>>> fs/f2fs/inode.c | 3 +--
>>>>>> 6 files changed, 16 insertions(+), 12 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
>>>>>> index d35976785e8c..91e86747a604 100644
>>>>>> --- a/fs/f2fs/dir.c
>>>>>> +++ b/fs/f2fs/dir.c
>>>>>> @@ -495,6 +495,8 @@ static int make_empty_dir(struct inode *inode,
>>>>>> if (IS_ERR(dentry_page))
>>>>>> return PTR_ERR(dentry_page);
>>>>>>
>>>>>> + f2fs_bug_on(F2FS_I_SB(inode), PageWriteback(dentry_page));
>>>>>> +
>>>>>> dentry_blk = page_address(dentry_page);
>>>>>>
>>>>>> make_dentry_ptr_block(NULL, &d, dentry_blk);
>>>>>> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
>>>>>> index e60078460ad1..686c68b98610 100644
>>>>>> --- a/fs/f2fs/extent_cache.c
>>>>>> +++ b/fs/f2fs/extent_cache.c
>>>>>> @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
>>>>>> }
>>>>>>
>>>>>> /* return true, if inode page is changed */
>>>>>> -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
>>>>>> +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
>>>>>> {
>>>>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>> + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
>>>>>> struct extent_tree *et;
>>>>>> struct extent_node *en;
>>>>>> struct extent_info ei;
>>>>>> @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
>>>>>> if (!f2fs_may_extent_tree(inode)) {
>>>>>> /* drop largest extent */
>>>>>> if (i_ext && i_ext->len) {
>>>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
>>>>>> i_ext->len = 0;
>>>>>> - return true;
>>>>>> + set_page_dirty(ipage);
>>>>>> + return;
>>>>>> }
>>>>>> - return false;
>>>>>> + return;
>>>>>> }
>>>>>>
>>>>>> et = __grab_extent_tree(inode);
>>>>>>
>>>>>> if (!i_ext || !i_ext->len)
>>>>>> - return false;
>>>>>> + return;
>>>>>>
>>>>>> get_extent_info(&ei, i_ext);
>>>>>>
>>>>>> @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
>>>>>> }
>>>>>> out:
>>>>>> write_unlock(&et->lock);
>>>>>> - return false;
>>>>>> }
>>>>>>
>>>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
>>>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
>>>>>> {
>>>>>> - bool ret = __f2fs_init_extent_tree(inode, i_ext);
>>>>>> + __f2fs_init_extent_tree(inode, ipage);
>>>>>>
>>>>>> if (!F2FS_I(inode)->extent_tree)
>>>>>> set_inode_flag(inode, FI_NO_EXTENT);
>>>>>> -
>>>>>> - return ret;
>>>>>> }
>>>>>>
>>>>>> static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>> index b35a50f4953c..326c12fa0da5 100644
>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>> @@ -3795,7 +3795,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
>>>>>> bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
>>>>>> struct rb_root_cached *root);
>>>>>> unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
>>>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
>>>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
>>>>>> void f2fs_drop_extent_tree(struct inode *inode);
>>>>>> unsigned int f2fs_destroy_extent_node(struct inode *inode);
>>>>>> void f2fs_destroy_extent_tree(struct inode *inode);
>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>> index 3268f8dd59bb..1862073b96d2 100644
>>>>>> --- a/fs/f2fs/file.c
>>>>>> +++ b/fs/f2fs/file.c
>>>>>> @@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
>>>>>> f2fs_put_page(psrc, 1);
>>>>>> return PTR_ERR(pdst);
>>>>>> }
>>>>>> + f2fs_wait_on_page_writeback(pdst, DATA, true, true);
>
> Do you mean pdst can be under writeback?

Use f2fs_bug_on(, dirty || writeback) here?

>
>>>>>> f2fs_copy_page(psrc, pdst);
>>>>>> set_page_dirty(pdst);
>>>>>> f2fs_put_page(pdst, 1);
>>>>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
>>>>>> index dbade310dc79..4bcbc486c9e2 100644
>>>>>> --- a/fs/f2fs/inline.c
>>>>>> +++ b/fs/f2fs/inline.c
>>>>>> @@ -340,6 +340,8 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
>>>>>> struct f2fs_dentry_ptr d;
>>>>>> void *inline_dentry;
>>>>>>
>>>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);

f2fs_bug_on(, writeback)?

Thanks,

>>>>>> +
>>>>>> inline_dentry = inline_data_addr(inode, ipage);
>>>>>>
>>>>>> make_dentry_ptr_inline(inode, &d, inline_dentry);
>>>>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>>>>>> index 44582a4db513..7c156eb26dd7 100644
>>>>>> --- a/fs/f2fs/inode.c
>>>>>> +++ b/fs/f2fs/inode.c
>>>>>> @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
>>>>>> fi->i_pino = le32_to_cpu(ri->i_pino);
>>>>>> fi->i_dir_level = ri->i_dir_level;
>>>>>>
>>>>>> - if (f2fs_init_extent_tree(inode, &ri->i_ext))
>>>>>> - set_page_dirty(node_page);
>>>>>> + f2fs_init_extent_tree(inode, node_page);
>>>>>>
>>>>>> get_inline_info(inode, ri);
>>>>>>
>>>>>> --
>>>>>> 2.18.0.rc1
>>>>> .
>>>>>
>>> .
>>>
> .
>

2020-06-21 16:45:58

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 1/5] f2fs: fix to wait page writeback before update

On 06/20, Chao Yu wrote:
> On 2020/6/20 6:47, Jaegeuk Kim wrote:
> > On 06/19, Chao Yu wrote:
> >> On 2020/6/19 13:49, Jaegeuk Kim wrote:
> >>> On 06/19, Chao Yu wrote:
> >>>> Hi Jaegeuk,
> >>>>
> >>>> On 2020/6/19 7:59, Jaegeuk Kim wrote:
> >>>>> Hi Chao,
> >>>>>
> >>>>> On 06/18, Chao Yu wrote:
> >>>>>> to make page content stable for special device like raid.
> >>>>>
> >>>>> Could you elaborate the problem a bit?
> >>>>
> >>>> Some devices like raid5 wants page content to be stable, because
> >>>> it will calculate parity info based page content, if page is not
> >>>> stable, parity info could be corrupted, result in data inconsistency
> >>>> in stripe.
> >>>
> >>> I don't get the point, since those pages are brand new pages which were not
> >>> modified before. If it's on writeback, we should not modify them regardless
> >>> of whatever raid configuration. For example, f2fs_new_node_page() waits for
> >>> writeback. Am I missing something?
> >>
> >> I think we should use f2fs_bug_on(, PageWriteback()) rather than
> >> f2fs_wait_on_page_writeback() for brand new page which is allocated just now.
> >> For other paths, we can keep rule that waiting for writeback before updating.
> >>
> >> How do you think?
> >>
> >> Thanks,
> >>
> >>>
> >>>>
> >>>> Thanks,
> >>>>
> >>>>>
> >>>>>>
> >>>>>> Signed-off-by: Chao Yu <[email protected]>
> >>>>>> ---
> >>>>>> fs/f2fs/dir.c | 2 ++
> >>>>>> fs/f2fs/extent_cache.c | 18 +++++++++---------
> >>>>>> fs/f2fs/f2fs.h | 2 +-
> >>>>>> fs/f2fs/file.c | 1 +
> >>>>>> fs/f2fs/inline.c | 2 ++
> >>>>>> fs/f2fs/inode.c | 3 +--
> >>>>>> 6 files changed, 16 insertions(+), 12 deletions(-)
> >>>>>>
> >>>>>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> >>>>>> index d35976785e8c..91e86747a604 100644
> >>>>>> --- a/fs/f2fs/dir.c
> >>>>>> +++ b/fs/f2fs/dir.c
> >>>>>> @@ -495,6 +495,8 @@ static int make_empty_dir(struct inode *inode,
> >>>>>> if (IS_ERR(dentry_page))
> >>>>>> return PTR_ERR(dentry_page);
> >>>>>>
> >>>>>> + f2fs_bug_on(F2FS_I_SB(inode), PageWriteback(dentry_page));
> >>>>>> +
> >>>>>> dentry_blk = page_address(dentry_page);
> >>>>>>
> >>>>>> make_dentry_ptr_block(NULL, &d, dentry_blk);
> >>>>>> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
> >>>>>> index e60078460ad1..686c68b98610 100644
> >>>>>> --- a/fs/f2fs/extent_cache.c
> >>>>>> +++ b/fs/f2fs/extent_cache.c
> >>>>>> @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
> >>>>>> }
> >>>>>>
> >>>>>> /* return true, if inode page is changed */
> >>>>>> -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
> >>>>>> +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
> >>>>>> {
> >>>>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>>>> + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
> >>>>>> struct extent_tree *et;
> >>>>>> struct extent_node *en;
> >>>>>> struct extent_info ei;
> >>>>>> @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
> >>>>>> if (!f2fs_may_extent_tree(inode)) {
> >>>>>> /* drop largest extent */
> >>>>>> if (i_ext && i_ext->len) {
> >>>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
> >>>>>> i_ext->len = 0;
> >>>>>> - return true;
> >>>>>> + set_page_dirty(ipage);
> >>>>>> + return;
> >>>>>> }
> >>>>>> - return false;
> >>>>>> + return;
> >>>>>> }
> >>>>>>
> >>>>>> et = __grab_extent_tree(inode);
> >>>>>>
> >>>>>> if (!i_ext || !i_ext->len)
> >>>>>> - return false;
> >>>>>> + return;
> >>>>>>
> >>>>>> get_extent_info(&ei, i_ext);
> >>>>>>
> >>>>>> @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
> >>>>>> }
> >>>>>> out:
> >>>>>> write_unlock(&et->lock);
> >>>>>> - return false;
> >>>>>> }
> >>>>>>
> >>>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
> >>>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
> >>>>>> {
> >>>>>> - bool ret = __f2fs_init_extent_tree(inode, i_ext);
> >>>>>> + __f2fs_init_extent_tree(inode, ipage);
> >>>>>>
> >>>>>> if (!F2FS_I(inode)->extent_tree)
> >>>>>> set_inode_flag(inode, FI_NO_EXTENT);
> >>>>>> -
> >>>>>> - return ret;
> >>>>>> }
> >>>>>>
> >>>>>> static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> >>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>>>> index b35a50f4953c..326c12fa0da5 100644
> >>>>>> --- a/fs/f2fs/f2fs.h
> >>>>>> +++ b/fs/f2fs/f2fs.h
> >>>>>> @@ -3795,7 +3795,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
> >>>>>> bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
> >>>>>> struct rb_root_cached *root);
> >>>>>> unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
> >>>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
> >>>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
> >>>>>> void f2fs_drop_extent_tree(struct inode *inode);
> >>>>>> unsigned int f2fs_destroy_extent_node(struct inode *inode);
> >>>>>> void f2fs_destroy_extent_tree(struct inode *inode);
> >>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >>>>>> index 3268f8dd59bb..1862073b96d2 100644
> >>>>>> --- a/fs/f2fs/file.c
> >>>>>> +++ b/fs/f2fs/file.c
> >>>>>> @@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
> >>>>>> f2fs_put_page(psrc, 1);
> >>>>>> return PTR_ERR(pdst);
> >>>>>> }
> >>>>>> + f2fs_wait_on_page_writeback(pdst, DATA, true, true);
> >
> > Do you mean pdst can be under writeback?
>
> Use f2fs_bug_on(, dirty || writeback) here?
>
> >
> >>>>>> f2fs_copy_page(psrc, pdst);
> >>>>>> set_page_dirty(pdst);
> >>>>>> f2fs_put_page(pdst, 1);
> >>>>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> >>>>>> index dbade310dc79..4bcbc486c9e2 100644
> >>>>>> --- a/fs/f2fs/inline.c
> >>>>>> +++ b/fs/f2fs/inline.c
> >>>>>> @@ -340,6 +340,8 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
> >>>>>> struct f2fs_dentry_ptr d;
> >>>>>> void *inline_dentry;
> >>>>>>
> >>>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
>
> f2fs_bug_on(, writeback)?

So, which case do you suspect unstable page for raid?

>
> Thanks,
>
> >>>>>> +
> >>>>>> inline_dentry = inline_data_addr(inode, ipage);
> >>>>>>
> >>>>>> make_dentry_ptr_inline(inode, &d, inline_dentry);
> >>>>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> >>>>>> index 44582a4db513..7c156eb26dd7 100644
> >>>>>> --- a/fs/f2fs/inode.c
> >>>>>> +++ b/fs/f2fs/inode.c
> >>>>>> @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
> >>>>>> fi->i_pino = le32_to_cpu(ri->i_pino);
> >>>>>> fi->i_dir_level = ri->i_dir_level;
> >>>>>>
> >>>>>> - if (f2fs_init_extent_tree(inode, &ri->i_ext))
> >>>>>> - set_page_dirty(node_page);
> >>>>>> + f2fs_init_extent_tree(inode, node_page);
> >>>>>>
> >>>>>> get_inline_info(inode, ri);
> >>>>>>
> >>>>>> --
> >>>>>> 2.18.0.rc1
> >>>>> .
> >>>>>
> >>> .
> >>>
> > .
> >

2020-06-22 00:56:19

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH 1/5] f2fs: fix to wait page writeback before update

On 2020/6/22 0:38, Jaegeuk Kim wrote:
> On 06/20, Chao Yu wrote:
>> On 2020/6/20 6:47, Jaegeuk Kim wrote:
>>> On 06/19, Chao Yu wrote:
>>>> On 2020/6/19 13:49, Jaegeuk Kim wrote:
>>>>> On 06/19, Chao Yu wrote:
>>>>>> Hi Jaegeuk,
>>>>>>
>>>>>> On 2020/6/19 7:59, Jaegeuk Kim wrote:
>>>>>>> Hi Chao,
>>>>>>>
>>>>>>> On 06/18, Chao Yu wrote:
>>>>>>>> to make page content stable for special device like raid.
>>>>>>>
>>>>>>> Could you elaborate the problem a bit?
>>>>>>
>>>>>> Some devices like raid5 wants page content to be stable, because
>>>>>> it will calculate parity info based page content, if page is not
>>>>>> stable, parity info could be corrupted, result in data inconsistency
>>>>>> in stripe.
>>>>>
>>>>> I don't get the point, since those pages are brand new pages which were not
>>>>> modified before. If it's on writeback, we should not modify them regardless
>>>>> of whatever raid configuration. For example, f2fs_new_node_page() waits for
>>>>> writeback. Am I missing something?
>>>>
>>>> I think we should use f2fs_bug_on(, PageWriteback()) rather than
>>>> f2fs_wait_on_page_writeback() for brand new page which is allocated just now.
>>>> For other paths, we can keep rule that waiting for writeback before updating.
>>>>
>>>> How do you think?
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>>>> ---
>>>>>>>> fs/f2fs/dir.c | 2 ++
>>>>>>>> fs/f2fs/extent_cache.c | 18 +++++++++---------
>>>>>>>> fs/f2fs/f2fs.h | 2 +-
>>>>>>>> fs/f2fs/file.c | 1 +
>>>>>>>> fs/f2fs/inline.c | 2 ++
>>>>>>>> fs/f2fs/inode.c | 3 +--
>>>>>>>> 6 files changed, 16 insertions(+), 12 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
>>>>>>>> index d35976785e8c..91e86747a604 100644
>>>>>>>> --- a/fs/f2fs/dir.c
>>>>>>>> +++ b/fs/f2fs/dir.c
>>>>>>>> @@ -495,6 +495,8 @@ static int make_empty_dir(struct inode *inode,
>>>>>>>> if (IS_ERR(dentry_page))
>>>>>>>> return PTR_ERR(dentry_page);
>>>>>>>>
>>>>>>>> + f2fs_bug_on(F2FS_I_SB(inode), PageWriteback(dentry_page));
>>>>>>>> +
>>>>>>>> dentry_blk = page_address(dentry_page);
>>>>>>>>
>>>>>>>> make_dentry_ptr_block(NULL, &d, dentry_blk);
>>>>>>>> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
>>>>>>>> index e60078460ad1..686c68b98610 100644
>>>>>>>> --- a/fs/f2fs/extent_cache.c
>>>>>>>> +++ b/fs/f2fs/extent_cache.c
>>>>>>>> @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
>>>>>>>> }
>>>>>>>>
>>>>>>>> /* return true, if inode page is changed */
>>>>>>>> -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
>>>>>>>> +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
>>>>>>>> {
>>>>>>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>>>> + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
>>>>>>>> struct extent_tree *et;
>>>>>>>> struct extent_node *en;
>>>>>>>> struct extent_info ei;
>>>>>>>> @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
>>>>>>>> if (!f2fs_may_extent_tree(inode)) {
>>>>>>>> /* drop largest extent */
>>>>>>>> if (i_ext && i_ext->len) {
>>>>>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
>>>>>>>> i_ext->len = 0;
>>>>>>>> - return true;
>>>>>>>> + set_page_dirty(ipage);
>>>>>>>> + return;
>>>>>>>> }
>>>>>>>> - return false;
>>>>>>>> + return;
>>>>>>>> }
>>>>>>>>
>>>>>>>> et = __grab_extent_tree(inode);
>>>>>>>>
>>>>>>>> if (!i_ext || !i_ext->len)
>>>>>>>> - return false;
>>>>>>>> + return;
>>>>>>>>
>>>>>>>> get_extent_info(&ei, i_ext);
>>>>>>>>
>>>>>>>> @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
>>>>>>>> }
>>>>>>>> out:
>>>>>>>> write_unlock(&et->lock);
>>>>>>>> - return false;
>>>>>>>> }
>>>>>>>>
>>>>>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
>>>>>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
>>>>>>>> {
>>>>>>>> - bool ret = __f2fs_init_extent_tree(inode, i_ext);
>>>>>>>> + __f2fs_init_extent_tree(inode, ipage);
>>>>>>>>
>>>>>>>> if (!F2FS_I(inode)->extent_tree)
>>>>>>>> set_inode_flag(inode, FI_NO_EXTENT);
>>>>>>>> -
>>>>>>>> - return ret;
>>>>>>>> }
>>>>>>>>
>>>>>>>> static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>>> index b35a50f4953c..326c12fa0da5 100644
>>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>>> @@ -3795,7 +3795,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
>>>>>>>> bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
>>>>>>>> struct rb_root_cached *root);
>>>>>>>> unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
>>>>>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
>>>>>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
>>>>>>>> void f2fs_drop_extent_tree(struct inode *inode);
>>>>>>>> unsigned int f2fs_destroy_extent_node(struct inode *inode);
>>>>>>>> void f2fs_destroy_extent_tree(struct inode *inode);
>>>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>>>> index 3268f8dd59bb..1862073b96d2 100644
>>>>>>>> --- a/fs/f2fs/file.c
>>>>>>>> +++ b/fs/f2fs/file.c
>>>>>>>> @@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
>>>>>>>> f2fs_put_page(psrc, 1);
>>>>>>>> return PTR_ERR(pdst);
>>>>>>>> }
>>>>>>>> + f2fs_wait_on_page_writeback(pdst, DATA, true, true);
>>>
>>> Do you mean pdst can be under writeback?
>>
>> Use f2fs_bug_on(, dirty || writeback) here?
>>
>>>
>>>>>>>> f2fs_copy_page(psrc, pdst);
>>>>>>>> set_page_dirty(pdst);
>>>>>>>> f2fs_put_page(pdst, 1);
>>>>>>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
>>>>>>>> index dbade310dc79..4bcbc486c9e2 100644
>>>>>>>> --- a/fs/f2fs/inline.c
>>>>>>>> +++ b/fs/f2fs/inline.c
>>>>>>>> @@ -340,6 +340,8 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
>>>>>>>> struct f2fs_dentry_ptr d;
>>>>>>>> void *inline_dentry;
>>>>>>>>
>>>>>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
>>
>> f2fs_bug_on(, writeback)?
>
> So, which case do you suspect unstable page for raid?

- gc_node_segment
- f2fs_move_node_page
- __write_node_page
- set_page_writeback

- do_read_inode
- f2fs_init_extent_tree
- __f2fs_init_extent_tree
i_ext->len = 0;

>
>>
>> Thanks,
>>
>>>>>>>> +
>>>>>>>> inline_dentry = inline_data_addr(inode, ipage);
>>>>>>>>
>>>>>>>> make_dentry_ptr_inline(inode, &d, inline_dentry);
>>>>>>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>>>>>>>> index 44582a4db513..7c156eb26dd7 100644
>>>>>>>> --- a/fs/f2fs/inode.c
>>>>>>>> +++ b/fs/f2fs/inode.c
>>>>>>>> @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
>>>>>>>> fi->i_pino = le32_to_cpu(ri->i_pino);
>>>>>>>> fi->i_dir_level = ri->i_dir_level;
>>>>>>>>
>>>>>>>> - if (f2fs_init_extent_tree(inode, &ri->i_ext))
>>>>>>>> - set_page_dirty(node_page);
>>>>>>>> + f2fs_init_extent_tree(inode, node_page);
>>>>>>>>
>>>>>>>> get_inline_info(inode, ri);
>>>>>>>>
>>>>>>>> --
>>>>>>>> 2.18.0.rc1
>>>>>>> .
>>>>>>>
>>>>> .
>>>>>
>>> .
>>>
> .
>

2020-06-24 15:56:53

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 1/5] f2fs: fix to wait page writeback before update

On 06/22, Chao Yu wrote:
> On 2020/6/22 0:38, Jaegeuk Kim wrote:
> > On 06/20, Chao Yu wrote:
> >> On 2020/6/20 6:47, Jaegeuk Kim wrote:
> >>> On 06/19, Chao Yu wrote:
> >>>> On 2020/6/19 13:49, Jaegeuk Kim wrote:
> >>>>> On 06/19, Chao Yu wrote:
> >>>>>> Hi Jaegeuk,
> >>>>>>
> >>>>>> On 2020/6/19 7:59, Jaegeuk Kim wrote:
> >>>>>>> Hi Chao,
> >>>>>>>
> >>>>>>> On 06/18, Chao Yu wrote:
> >>>>>>>> to make page content stable for special device like raid.
> >>>>>>>
> >>>>>>> Could you elaborate the problem a bit?
> >>>>>>
> >>>>>> Some devices like raid5 wants page content to be stable, because
> >>>>>> it will calculate parity info based page content, if page is not
> >>>>>> stable, parity info could be corrupted, result in data inconsistency
> >>>>>> in stripe.
> >>>>>
> >>>>> I don't get the point, since those pages are brand new pages which were not
> >>>>> modified before. If it's on writeback, we should not modify them regardless
> >>>>> of whatever raid configuration. For example, f2fs_new_node_page() waits for
> >>>>> writeback. Am I missing something?
> >>>>
> >>>> I think we should use f2fs_bug_on(, PageWriteback()) rather than
> >>>> f2fs_wait_on_page_writeback() for brand new page which is allocated just now.
> >>>> For other paths, we can keep rule that waiting for writeback before updating.
> >>>>
> >>>> How do you think?
> >>>>
> >>>> Thanks,
> >>>>
> >>>>>
> >>>>>>
> >>>>>> Thanks,
> >>>>>>
> >>>>>>>
> >>>>>>>>
> >>>>>>>> Signed-off-by: Chao Yu <[email protected]>
> >>>>>>>> ---
> >>>>>>>> fs/f2fs/dir.c | 2 ++
> >>>>>>>> fs/f2fs/extent_cache.c | 18 +++++++++---------
> >>>>>>>> fs/f2fs/f2fs.h | 2 +-
> >>>>>>>> fs/f2fs/file.c | 1 +
> >>>>>>>> fs/f2fs/inline.c | 2 ++
> >>>>>>>> fs/f2fs/inode.c | 3 +--
> >>>>>>>> 6 files changed, 16 insertions(+), 12 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> >>>>>>>> index d35976785e8c..91e86747a604 100644
> >>>>>>>> --- a/fs/f2fs/dir.c
> >>>>>>>> +++ b/fs/f2fs/dir.c
> >>>>>>>> @@ -495,6 +495,8 @@ static int make_empty_dir(struct inode *inode,
> >>>>>>>> if (IS_ERR(dentry_page))
> >>>>>>>> return PTR_ERR(dentry_page);
> >>>>>>>>
> >>>>>>>> + f2fs_bug_on(F2FS_I_SB(inode), PageWriteback(dentry_page));
> >>>>>>>> +
> >>>>>>>> dentry_blk = page_address(dentry_page);
> >>>>>>>>
> >>>>>>>> make_dentry_ptr_block(NULL, &d, dentry_blk);
> >>>>>>>> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
> >>>>>>>> index e60078460ad1..686c68b98610 100644
> >>>>>>>> --- a/fs/f2fs/extent_cache.c
> >>>>>>>> +++ b/fs/f2fs/extent_cache.c
> >>>>>>>> @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> /* return true, if inode page is changed */
> >>>>>>>> -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
> >>>>>>>> +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
> >>>>>>>> {
> >>>>>>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>>>>>> + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
> >>>>>>>> struct extent_tree *et;
> >>>>>>>> struct extent_node *en;
> >>>>>>>> struct extent_info ei;
> >>>>>>>> @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
> >>>>>>>> if (!f2fs_may_extent_tree(inode)) {
> >>>>>>>> /* drop largest extent */
> >>>>>>>> if (i_ext && i_ext->len) {
> >>>>>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
> >>>>>>>> i_ext->len = 0;
> >>>>>>>> - return true;
> >>>>>>>> + set_page_dirty(ipage);
> >>>>>>>> + return;
> >>>>>>>> }
> >>>>>>>> - return false;
> >>>>>>>> + return;
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> et = __grab_extent_tree(inode);
> >>>>>>>>
> >>>>>>>> if (!i_ext || !i_ext->len)
> >>>>>>>> - return false;
> >>>>>>>> + return;
> >>>>>>>>
> >>>>>>>> get_extent_info(&ei, i_ext);
> >>>>>>>>
> >>>>>>>> @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
> >>>>>>>> }
> >>>>>>>> out:
> >>>>>>>> write_unlock(&et->lock);
> >>>>>>>> - return false;
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
> >>>>>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
> >>>>>>>> {
> >>>>>>>> - bool ret = __f2fs_init_extent_tree(inode, i_ext);
> >>>>>>>> + __f2fs_init_extent_tree(inode, ipage);
> >>>>>>>>
> >>>>>>>> if (!F2FS_I(inode)->extent_tree)
> >>>>>>>> set_inode_flag(inode, FI_NO_EXTENT);
> >>>>>>>> -
> >>>>>>>> - return ret;
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> >>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>>>>>>> index b35a50f4953c..326c12fa0da5 100644
> >>>>>>>> --- a/fs/f2fs/f2fs.h
> >>>>>>>> +++ b/fs/f2fs/f2fs.h
> >>>>>>>> @@ -3795,7 +3795,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
> >>>>>>>> bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
> >>>>>>>> struct rb_root_cached *root);
> >>>>>>>> unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
> >>>>>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
> >>>>>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
> >>>>>>>> void f2fs_drop_extent_tree(struct inode *inode);
> >>>>>>>> unsigned int f2fs_destroy_extent_node(struct inode *inode);
> >>>>>>>> void f2fs_destroy_extent_tree(struct inode *inode);
> >>>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >>>>>>>> index 3268f8dd59bb..1862073b96d2 100644
> >>>>>>>> --- a/fs/f2fs/file.c
> >>>>>>>> +++ b/fs/f2fs/file.c
> >>>>>>>> @@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
> >>>>>>>> f2fs_put_page(psrc, 1);
> >>>>>>>> return PTR_ERR(pdst);
> >>>>>>>> }
> >>>>>>>> + f2fs_wait_on_page_writeback(pdst, DATA, true, true);
> >>>
> >>> Do you mean pdst can be under writeback?
> >>
> >> Use f2fs_bug_on(, dirty || writeback) here?
> >>
> >>>
> >>>>>>>> f2fs_copy_page(psrc, pdst);
> >>>>>>>> set_page_dirty(pdst);
> >>>>>>>> f2fs_put_page(pdst, 1);
> >>>>>>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> >>>>>>>> index dbade310dc79..4bcbc486c9e2 100644
> >>>>>>>> --- a/fs/f2fs/inline.c
> >>>>>>>> +++ b/fs/f2fs/inline.c
> >>>>>>>> @@ -340,6 +340,8 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
> >>>>>>>> struct f2fs_dentry_ptr d;
> >>>>>>>> void *inline_dentry;
> >>>>>>>>
> >>>>>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
> >>
> >> f2fs_bug_on(, writeback)?
> >
> > So, which case do you suspect unstable page for raid?
>
> - gc_node_segment
> - f2fs_move_node_page
> - __write_node_page
> - set_page_writeback
>
> - do_read_inode
> - f2fs_init_extent_tree
> - __f2fs_init_extent_tree
> i_ext->len = 0;

Could you please add wait_on_writeback on this specific case only
with this backtrace in the description?

Thanks,

>
> >
> >>
> >> Thanks,
> >>
> >>>>>>>> +
> >>>>>>>> inline_dentry = inline_data_addr(inode, ipage);
> >>>>>>>>
> >>>>>>>> make_dentry_ptr_inline(inode, &d, inline_dentry);
> >>>>>>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> >>>>>>>> index 44582a4db513..7c156eb26dd7 100644
> >>>>>>>> --- a/fs/f2fs/inode.c
> >>>>>>>> +++ b/fs/f2fs/inode.c
> >>>>>>>> @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
> >>>>>>>> fi->i_pino = le32_to_cpu(ri->i_pino);
> >>>>>>>> fi->i_dir_level = ri->i_dir_level;
> >>>>>>>>
> >>>>>>>> - if (f2fs_init_extent_tree(inode, &ri->i_ext))
> >>>>>>>> - set_page_dirty(node_page);
> >>>>>>>> + f2fs_init_extent_tree(inode, node_page);
> >>>>>>>>
> >>>>>>>> get_inline_info(inode, ri);
> >>>>>>>>
> >>>>>>>> --
> >>>>>>>> 2.18.0.rc1
> >>>>>>> .
> >>>>>>>
> >>>>> .
> >>>>>
> >>> .
> >>>
> > .
> >

2020-06-24 22:31:52

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 5/5] f2fs: show more debug info for per-temperature log

On 06/18, Chao Yu wrote:
> - Add to account and show per-log dirty_seg, full_seg and valid_blocks
> in debugfs.
> - reformat printed info.
>
> TYPE segno secno zoneno dirty_seg full_seg valid_blk
> - COLD data: 1523 1523 1523 1 0 399
> - WARM data: 769 769 769 20 255 133098
> - HOT data: 767 767 767 9 0 167
> - Dir dnode: 22 22 22 3 0 70
> - File dnode: 722 722 722 14 10 6505
> - Indir nodes: 2 2 2 1 0 3
>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/debug.c | 67 ++++++++++++++++++++++++++++++++++++++++---------
> fs/f2fs/f2fs.h | 3 +++
> 2 files changed, 58 insertions(+), 12 deletions(-)
>
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 0dbcb0f9c019..aa1fd2de11ba 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -174,6 +174,29 @@ static void update_general_status(struct f2fs_sb_info *sbi)
> for (i = META_CP; i < META_MAX; i++)
> si->meta_count[i] = atomic_read(&sbi->meta_count[i]);
>
> + for (i = 0; i < NO_CHECK_TYPE; i++) {
> + si->dirty_seg[i] = 0;
> + si->full_seg[i] = 0;
> + si->valid_blks[i] = 0;
> + }
> +
> + for (i = 0; i < MAIN_SEGS(sbi); i++) {
> + int blks = get_seg_entry(sbi, i)->valid_blocks;
> + int type = get_seg_entry(sbi, i)->type;
> +
> + if (!blks)
> + continue;
> +
> + if (IS_CURSEG(sbi, i))
> + continue;

How about adding current segments as well? Especially, it's hard to see any
valid blocks for cold node with this.

> +
> + if (blks == sbi->blocks_per_seg)
> + si->full_seg[type]++;
> + else
> + si->dirty_seg[type]++;
> + si->valid_blks[type] += blks;
> + }
> +
> for (i = 0; i < 2; i++) {
> si->segment_count[i] = sbi->segment_count[i];
> si->block_count[i] = sbi->block_count[i];
> @@ -329,30 +352,50 @@ static int stat_show(struct seq_file *s, void *v)
> seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
> si->main_area_segs, si->main_area_sections,
> si->main_area_zones);
> - seq_printf(s, " - COLD data: %d, %d, %d\n",
> + seq_printf(s, " TYPE %8s %8s %8s %10s %10s %10s\n",
> + "segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk");
> + seq_printf(s, " - COLD data: %8d %8d %8d %10u %10u %10u\n",
> si->curseg[CURSEG_COLD_DATA],
> si->cursec[CURSEG_COLD_DATA],
> - si->curzone[CURSEG_COLD_DATA]);
> - seq_printf(s, " - WARM data: %d, %d, %d\n",
> + si->curzone[CURSEG_COLD_DATA],
> + si->dirty_seg[CURSEG_COLD_DATA],
> + si->full_seg[CURSEG_COLD_DATA],
> + si->valid_blks[CURSEG_COLD_DATA]);
> + seq_printf(s, " - WARM data: %8d %8d %8d %10u %10u %10u\n",
> si->curseg[CURSEG_WARM_DATA],
> si->cursec[CURSEG_WARM_DATA],
> - si->curzone[CURSEG_WARM_DATA]);
> - seq_printf(s, " - HOT data: %d, %d, %d\n",
> + si->curzone[CURSEG_WARM_DATA],
> + si->dirty_seg[CURSEG_WARM_DATA],
> + si->full_seg[CURSEG_WARM_DATA],
> + si->valid_blks[CURSEG_WARM_DATA]);
> + seq_printf(s, " - HOT data: %8d %8d %8d %10u %10u %10u\n",
> si->curseg[CURSEG_HOT_DATA],
> si->cursec[CURSEG_HOT_DATA],
> - si->curzone[CURSEG_HOT_DATA]);
> - seq_printf(s, " - Dir dnode: %d, %d, %d\n",
> + si->curzone[CURSEG_HOT_DATA],
> + si->dirty_seg[CURSEG_HOT_DATA],
> + si->full_seg[CURSEG_HOT_DATA],
> + si->valid_blks[CURSEG_HOT_DATA]);
> + seq_printf(s, " - Dir dnode: %8d %8d %8d %10u %10u %10u\n",
> si->curseg[CURSEG_HOT_NODE],
> si->cursec[CURSEG_HOT_NODE],
> - si->curzone[CURSEG_HOT_NODE]);
> - seq_printf(s, " - File dnode: %d, %d, %d\n",
> + si->curzone[CURSEG_HOT_NODE],
> + si->dirty_seg[CURSEG_HOT_NODE],
> + si->full_seg[CURSEG_HOT_NODE],
> + si->valid_blks[CURSEG_HOT_NODE]);
> + seq_printf(s, " - File dnode: %8d %8d %8d %10u %10u %10u\n",
> si->curseg[CURSEG_WARM_NODE],
> si->cursec[CURSEG_WARM_NODE],
> - si->curzone[CURSEG_WARM_NODE]);
> - seq_printf(s, " - Indir nodes: %d, %d, %d\n",
> + si->curzone[CURSEG_WARM_NODE],
> + si->dirty_seg[CURSEG_WARM_NODE],
> + si->full_seg[CURSEG_WARM_NODE],
> + si->valid_blks[CURSEG_WARM_NODE]);
> + seq_printf(s, " - Indir nodes: %8d %8d %8d %10u %10u %10u\n",
> si->curseg[CURSEG_COLD_NODE],
> si->cursec[CURSEG_COLD_NODE],
> - si->curzone[CURSEG_COLD_NODE]);
> + si->curzone[CURSEG_COLD_NODE],
> + si->dirty_seg[CURSEG_COLD_NODE],
> + si->full_seg[CURSEG_COLD_NODE],
> + si->valid_blks[CURSEG_COLD_NODE]);
> seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
> si->main_area_segs - si->dirty_count -
> si->prefree_count - si->free_segs,
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 72a667f1d678..70565d81320b 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3536,6 +3536,9 @@ struct f2fs_stat_info {
> int curseg[NR_CURSEG_TYPE];
> int cursec[NR_CURSEG_TYPE];
> int curzone[NR_CURSEG_TYPE];
> + unsigned int dirty_seg[NR_CURSEG_TYPE];
> + unsigned int full_seg[NR_CURSEG_TYPE];
> + unsigned int valid_blks[NR_CURSEG_TYPE];
>
> unsigned int meta_count[META_MAX];
> unsigned int segment_count[2];
> --
> 2.18.0.rc1

2020-06-28 01:27:46

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH 1/5] f2fs: fix to wait page writeback before update

On 2020/6/24 23:55, Jaegeuk Kim wrote:
> On 06/22, Chao Yu wrote:
>> On 2020/6/22 0:38, Jaegeuk Kim wrote:
>>> On 06/20, Chao Yu wrote:
>>>> On 2020/6/20 6:47, Jaegeuk Kim wrote:
>>>>> On 06/19, Chao Yu wrote:
>>>>>> On 2020/6/19 13:49, Jaegeuk Kim wrote:
>>>>>>> On 06/19, Chao Yu wrote:
>>>>>>>> Hi Jaegeuk,
>>>>>>>>
>>>>>>>> On 2020/6/19 7:59, Jaegeuk Kim wrote:
>>>>>>>>> Hi Chao,
>>>>>>>>>
>>>>>>>>> On 06/18, Chao Yu wrote:
>>>>>>>>>> to make page content stable for special device like raid.
>>>>>>>>>
>>>>>>>>> Could you elaborate the problem a bit?
>>>>>>>>
>>>>>>>> Some devices like raid5 wants page content to be stable, because
>>>>>>>> it will calculate parity info based page content, if page is not
>>>>>>>> stable, parity info could be corrupted, result in data inconsistency
>>>>>>>> in stripe.
>>>>>>>
>>>>>>> I don't get the point, since those pages are brand new pages which were not
>>>>>>> modified before. If it's on writeback, we should not modify them regardless
>>>>>>> of whatever raid configuration. For example, f2fs_new_node_page() waits for
>>>>>>> writeback. Am I missing something?
>>>>>>
>>>>>> I think we should use f2fs_bug_on(, PageWriteback()) rather than
>>>>>> f2fs_wait_on_page_writeback() for brand new page which is allocated just now.
>>>>>> For other paths, we can keep rule that waiting for writeback before updating.
>>>>>>
>>>>>> How do you think?
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>>
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>>>>>> ---
>>>>>>>>>> fs/f2fs/dir.c | 2 ++
>>>>>>>>>> fs/f2fs/extent_cache.c | 18 +++++++++---------
>>>>>>>>>> fs/f2fs/f2fs.h | 2 +-
>>>>>>>>>> fs/f2fs/file.c | 1 +
>>>>>>>>>> fs/f2fs/inline.c | 2 ++
>>>>>>>>>> fs/f2fs/inode.c | 3 +--
>>>>>>>>>> 6 files changed, 16 insertions(+), 12 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
>>>>>>>>>> index d35976785e8c..91e86747a604 100644
>>>>>>>>>> --- a/fs/f2fs/dir.c
>>>>>>>>>> +++ b/fs/f2fs/dir.c
>>>>>>>>>> @@ -495,6 +495,8 @@ static int make_empty_dir(struct inode *inode,
>>>>>>>>>> if (IS_ERR(dentry_page))
>>>>>>>>>> return PTR_ERR(dentry_page);
>>>>>>>>>>
>>>>>>>>>> + f2fs_bug_on(F2FS_I_SB(inode), PageWriteback(dentry_page));
>>>>>>>>>> +
>>>>>>>>>> dentry_blk = page_address(dentry_page);
>>>>>>>>>>
>>>>>>>>>> make_dentry_ptr_block(NULL, &d, dentry_blk);
>>>>>>>>>> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
>>>>>>>>>> index e60078460ad1..686c68b98610 100644
>>>>>>>>>> --- a/fs/f2fs/extent_cache.c
>>>>>>>>>> +++ b/fs/f2fs/extent_cache.c
>>>>>>>>>> @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> /* return true, if inode page is changed */
>>>>>>>>>> -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
>>>>>>>>>> +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
>>>>>>>>>> {
>>>>>>>>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>>>>>> + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
>>>>>>>>>> struct extent_tree *et;
>>>>>>>>>> struct extent_node *en;
>>>>>>>>>> struct extent_info ei;
>>>>>>>>>> @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
>>>>>>>>>> if (!f2fs_may_extent_tree(inode)) {
>>>>>>>>>> /* drop largest extent */
>>>>>>>>>> if (i_ext && i_ext->len) {
>>>>>>>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
>>>>>>>>>> i_ext->len = 0;
>>>>>>>>>> - return true;
>>>>>>>>>> + set_page_dirty(ipage);
>>>>>>>>>> + return;
>>>>>>>>>> }
>>>>>>>>>> - return false;
>>>>>>>>>> + return;
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> et = __grab_extent_tree(inode);
>>>>>>>>>>
>>>>>>>>>> if (!i_ext || !i_ext->len)
>>>>>>>>>> - return false;
>>>>>>>>>> + return;
>>>>>>>>>>
>>>>>>>>>> get_extent_info(&ei, i_ext);
>>>>>>>>>>
>>>>>>>>>> @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
>>>>>>>>>> }
>>>>>>>>>> out:
>>>>>>>>>> write_unlock(&et->lock);
>>>>>>>>>> - return false;
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
>>>>>>>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
>>>>>>>>>> {
>>>>>>>>>> - bool ret = __f2fs_init_extent_tree(inode, i_ext);
>>>>>>>>>> + __f2fs_init_extent_tree(inode, ipage);
>>>>>>>>>>
>>>>>>>>>> if (!F2FS_I(inode)->extent_tree)
>>>>>>>>>> set_inode_flag(inode, FI_NO_EXTENT);
>>>>>>>>>> -
>>>>>>>>>> - return ret;
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
>>>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>>>>> index b35a50f4953c..326c12fa0da5 100644
>>>>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>>>>> @@ -3795,7 +3795,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
>>>>>>>>>> bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
>>>>>>>>>> struct rb_root_cached *root);
>>>>>>>>>> unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
>>>>>>>>>> -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
>>>>>>>>>> +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
>>>>>>>>>> void f2fs_drop_extent_tree(struct inode *inode);
>>>>>>>>>> unsigned int f2fs_destroy_extent_node(struct inode *inode);
>>>>>>>>>> void f2fs_destroy_extent_tree(struct inode *inode);
>>>>>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>>>>>> index 3268f8dd59bb..1862073b96d2 100644
>>>>>>>>>> --- a/fs/f2fs/file.c
>>>>>>>>>> +++ b/fs/f2fs/file.c
>>>>>>>>>> @@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
>>>>>>>>>> f2fs_put_page(psrc, 1);
>>>>>>>>>> return PTR_ERR(pdst);
>>>>>>>>>> }
>>>>>>>>>> + f2fs_wait_on_page_writeback(pdst, DATA, true, true);
>>>>>
>>>>> Do you mean pdst can be under writeback?
>>>>
>>>> Use f2fs_bug_on(, dirty || writeback) here?
>>>>
>>>>>
>>>>>>>>>> f2fs_copy_page(psrc, pdst);
>>>>>>>>>> set_page_dirty(pdst);
>>>>>>>>>> f2fs_put_page(pdst, 1);
>>>>>>>>>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
>>>>>>>>>> index dbade310dc79..4bcbc486c9e2 100644
>>>>>>>>>> --- a/fs/f2fs/inline.c
>>>>>>>>>> +++ b/fs/f2fs/inline.c
>>>>>>>>>> @@ -340,6 +340,8 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
>>>>>>>>>> struct f2fs_dentry_ptr d;
>>>>>>>>>> void *inline_dentry;
>>>>>>>>>>
>>>>>>>>>> + f2fs_wait_on_page_writeback(ipage, NODE, true, true);
>>>>
>>>> f2fs_bug_on(, writeback)?
>>>
>>> So, which case do you suspect unstable page for raid?
>>
>> - gc_node_segment
>> - f2fs_move_node_page
>> - __write_node_page
>> - set_page_writeback
>>
>> - do_read_inode
>> - f2fs_init_extent_tree
>> - __f2fs_init_extent_tree
>> i_ext->len = 0;
>
> Could you please add wait_on_writeback on this specific case only
> with this backtrace in the description?

Sure, :)

Thanks,

>
> Thanks,
>
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>>>>>> +
>>>>>>>>>> inline_dentry = inline_data_addr(inode, ipage);
>>>>>>>>>>
>>>>>>>>>> make_dentry_ptr_inline(inode, &d, inline_dentry);
>>>>>>>>>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>>>>>>>>>> index 44582a4db513..7c156eb26dd7 100644
>>>>>>>>>> --- a/fs/f2fs/inode.c
>>>>>>>>>> +++ b/fs/f2fs/inode.c
>>>>>>>>>> @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
>>>>>>>>>> fi->i_pino = le32_to_cpu(ri->i_pino);
>>>>>>>>>> fi->i_dir_level = ri->i_dir_level;
>>>>>>>>>>
>>>>>>>>>> - if (f2fs_init_extent_tree(inode, &ri->i_ext))
>>>>>>>>>> - set_page_dirty(node_page);
>>>>>>>>>> + f2fs_init_extent_tree(inode, node_page);
>>>>>>>>>>
>>>>>>>>>> get_inline_info(inode, ri);
>>>>>>>>>>
>>>>>>>>>> --
>>>>>>>>>> 2.18.0.rc1
>>>>>>>>> .
>>>>>>>>>
>>>>>>> .
>>>>>>>
>>>>> .
>>>>>
>>> .
>>>
> .
>

2020-06-28 01:37:31

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH 5/5] f2fs: show more debug info for per-temperature log

On 2020/6/25 6:31, Jaegeuk Kim wrote:
> On 06/18, Chao Yu wrote:
>> - Add to account and show per-log dirty_seg, full_seg and valid_blocks
>> in debugfs.
>> - reformat printed info.
>>
>> TYPE segno secno zoneno dirty_seg full_seg valid_blk
>> - COLD data: 1523 1523 1523 1 0 399
>> - WARM data: 769 769 769 20 255 133098
>> - HOT data: 767 767 767 9 0 167
>> - Dir dnode: 22 22 22 3 0 70
>> - File dnode: 722 722 722 14 10 6505
>> - Indir nodes: 2 2 2 1 0 3
>>
>> Signed-off-by: Chao Yu <[email protected]>
>> ---
>> fs/f2fs/debug.c | 67 ++++++++++++++++++++++++++++++++++++++++---------
>> fs/f2fs/f2fs.h | 3 +++
>> 2 files changed, 58 insertions(+), 12 deletions(-)
>>
>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>> index 0dbcb0f9c019..aa1fd2de11ba 100644
>> --- a/fs/f2fs/debug.c
>> +++ b/fs/f2fs/debug.c
>> @@ -174,6 +174,29 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>> for (i = META_CP; i < META_MAX; i++)
>> si->meta_count[i] = atomic_read(&sbi->meta_count[i]);
>>
>> + for (i = 0; i < NO_CHECK_TYPE; i++) {
>> + si->dirty_seg[i] = 0;
>> + si->full_seg[i] = 0;
>> + si->valid_blks[i] = 0;
>> + }
>> +
>> + for (i = 0; i < MAIN_SEGS(sbi); i++) {
>> + int blks = get_seg_entry(sbi, i)->valid_blocks;
>> + int type = get_seg_entry(sbi, i)->type;
>> +
>> + if (!blks)
>> + continue;
>> +
>> + if (IS_CURSEG(sbi, i))
>> + continue;
>
> How about adding current segments as well? Especially, it's hard to see any
> valid blocks for cold node with this.

Better, let me update this patch.

>
>> +
>> + if (blks == sbi->blocks_per_seg)
>> + si->full_seg[type]++;
>> + else
>> + si->dirty_seg[type]++;
>> + si->valid_blks[type] += blks;
>> + }
>> +
>> for (i = 0; i < 2; i++) {
>> si->segment_count[i] = sbi->segment_count[i];
>> si->block_count[i] = sbi->block_count[i];
>> @@ -329,30 +352,50 @@ static int stat_show(struct seq_file *s, void *v)
>> seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
>> si->main_area_segs, si->main_area_sections,
>> si->main_area_zones);
>> - seq_printf(s, " - COLD data: %d, %d, %d\n",
>> + seq_printf(s, " TYPE %8s %8s %8s %10s %10s %10s\n",
>> + "segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk");
>> + seq_printf(s, " - COLD data: %8d %8d %8d %10u %10u %10u\n",
>> si->curseg[CURSEG_COLD_DATA],
>> si->cursec[CURSEG_COLD_DATA],
>> - si->curzone[CURSEG_COLD_DATA]);
>> - seq_printf(s, " - WARM data: %d, %d, %d\n",
>> + si->curzone[CURSEG_COLD_DATA],
>> + si->dirty_seg[CURSEG_COLD_DATA],
>> + si->full_seg[CURSEG_COLD_DATA],
>> + si->valid_blks[CURSEG_COLD_DATA]);
>> + seq_printf(s, " - WARM data: %8d %8d %8d %10u %10u %10u\n",
>> si->curseg[CURSEG_WARM_DATA],
>> si->cursec[CURSEG_WARM_DATA],
>> - si->curzone[CURSEG_WARM_DATA]);
>> - seq_printf(s, " - HOT data: %d, %d, %d\n",
>> + si->curzone[CURSEG_WARM_DATA],
>> + si->dirty_seg[CURSEG_WARM_DATA],
>> + si->full_seg[CURSEG_WARM_DATA],
>> + si->valid_blks[CURSEG_WARM_DATA]);
>> + seq_printf(s, " - HOT data: %8d %8d %8d %10u %10u %10u\n",
>> si->curseg[CURSEG_HOT_DATA],
>> si->cursec[CURSEG_HOT_DATA],
>> - si->curzone[CURSEG_HOT_DATA]);
>> - seq_printf(s, " - Dir dnode: %d, %d, %d\n",
>> + si->curzone[CURSEG_HOT_DATA],
>> + si->dirty_seg[CURSEG_HOT_DATA],
>> + si->full_seg[CURSEG_HOT_DATA],
>> + si->valid_blks[CURSEG_HOT_DATA]);
>> + seq_printf(s, " - Dir dnode: %8d %8d %8d %10u %10u %10u\n",
>> si->curseg[CURSEG_HOT_NODE],
>> si->cursec[CURSEG_HOT_NODE],
>> - si->curzone[CURSEG_HOT_NODE]);
>> - seq_printf(s, " - File dnode: %d, %d, %d\n",
>> + si->curzone[CURSEG_HOT_NODE],
>> + si->dirty_seg[CURSEG_HOT_NODE],
>> + si->full_seg[CURSEG_HOT_NODE],
>> + si->valid_blks[CURSEG_HOT_NODE]);
>> + seq_printf(s, " - File dnode: %8d %8d %8d %10u %10u %10u\n",
>> si->curseg[CURSEG_WARM_NODE],
>> si->cursec[CURSEG_WARM_NODE],
>> - si->curzone[CURSEG_WARM_NODE]);
>> - seq_printf(s, " - Indir nodes: %d, %d, %d\n",
>> + si->curzone[CURSEG_WARM_NODE],
>> + si->dirty_seg[CURSEG_WARM_NODE],
>> + si->full_seg[CURSEG_WARM_NODE],
>> + si->valid_blks[CURSEG_WARM_NODE]);
>> + seq_printf(s, " - Indir nodes: %8d %8d %8d %10u %10u %10u\n",
>> si->curseg[CURSEG_COLD_NODE],
>> si->cursec[CURSEG_COLD_NODE],
>> - si->curzone[CURSEG_COLD_NODE]);
>> + si->curzone[CURSEG_COLD_NODE],
>> + si->dirty_seg[CURSEG_COLD_NODE],
>> + si->full_seg[CURSEG_COLD_NODE],
>> + si->valid_blks[CURSEG_COLD_NODE]);
>> seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
>> si->main_area_segs - si->dirty_count -
>> si->prefree_count - si->free_segs,
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 72a667f1d678..70565d81320b 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -3536,6 +3536,9 @@ struct f2fs_stat_info {
>> int curseg[NR_CURSEG_TYPE];
>> int cursec[NR_CURSEG_TYPE];
>> int curzone[NR_CURSEG_TYPE];
>> + unsigned int dirty_seg[NR_CURSEG_TYPE];
>> + unsigned int full_seg[NR_CURSEG_TYPE];
>> + unsigned int valid_blks[NR_CURSEG_TYPE];
>>
>> unsigned int meta_count[META_MAX];
>> unsigned int segment_count[2];
>> --
>> 2.18.0.rc1
> .
>

2024-03-08 06:20:49

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH 1/5] f2fs: fix to wait page writeback before update

On 2020/6/20 6:47, Jaegeuk Kim wrote:
>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>> index 3268f8dd59bb..1862073b96d2 100644
>>>>>> --- a/fs/f2fs/file.c
>>>>>> +++ b/fs/f2fs/file.c
>>>>>> @@ -1250,6 +1250,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
>>>>>> f2fs_put_page(psrc, 1);
>>>>>> return PTR_ERR(pdst);
>>>>>> }
>>>>>> + f2fs_wait_on_page_writeback(pdst, DATA, true, true);
>
> Do you mean pdst can be under writeback?

Jaegeuk,

Look back this again, is it possible that pdst is writebacking
in below race condition? or am I missing something?

Thread A GC Thread
- f2fs_move_file_range
- filemap_write_and_wait_range(dst)
- gc_data_segment
- f2fs_down_write(dst)
- move_data_page
- set_page_writeback(dst_page)
- f2fs_submit_page_write
- f2fs_up_write(dst)
- f2fs_down_write(dst)
- __exchange_data_block
- __clone_blkaddrs
- f2fs_get_new_data_page
- memcpy_page

Thanks,