It would better split small and large IOs separately in order to get more
consecutive big writes.
The default threshold is set to 64KB, but configurable by sysfs/min_hot_blocks.
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 9 +++++++++
fs/f2fs/f2fs.h | 2 ++
fs/f2fs/segment.c | 13 ++++++-------
fs/f2fs/segment.h | 1 +
fs/f2fs/super.c | 2 ++
5 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 090413236b27..8f36080b47c4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1432,6 +1432,8 @@ static int __write_data_page(struct page *page, bool *submitted,
need_balance_fs = true;
else if (has_not_enough_free_secs(sbi, 0, 0))
goto redirty_out;
+ else
+ set_inode_flag(inode, FI_HOT_DATA);
err = -EAGAIN;
if (f2fs_has_inline_data(inode)) {
@@ -1457,6 +1459,7 @@ static int __write_data_page(struct page *page, bool *submitted,
if (wbc->for_reclaim) {
f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
DATA, WRITE);
+ clear_inode_flag(inode, FI_HOT_DATA);
remove_dirty_inode(inode);
submitted = NULL;
}
@@ -1511,6 +1514,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
pagevec_init(&pvec, 0);
+ if (get_dirty_pages(mapping->host) <=
+ SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
+ set_inode_flag(mapping->host, FI_HOT_DATA);
+ else
+ clear_inode_flag(mapping->host, FI_HOT_DATA);
+
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5a49518ee786..32d6f674c114 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -678,6 +678,7 @@ struct f2fs_sm_info {
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
+ unsigned int min_hot_blocks; /* threshold for hot block allocation */
/* for flush command control */
struct flush_cmd_control *fcc_info;
@@ -1717,6 +1718,7 @@ enum {
FI_DO_DEFRAG, /* indicate defragment is running */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
+ FI_HOT_DATA, /* indicate file is hot */
};
static inline void __mark_inode_dirty_flag(struct inode *inode,
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index b5b2a4745328..bff3f3bc7827 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1841,18 +1841,16 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type)
if (p_type == DATA) {
struct inode *inode = page->mapping->host;
- if (S_ISDIR(inode->i_mode))
- return CURSEG_HOT_DATA;
- else if (is_cold_data(page) || file_is_cold(inode))
+ if (is_cold_data(page) || file_is_cold(inode))
return CURSEG_COLD_DATA;
- else
- return CURSEG_WARM_DATA;
+ if (is_inode_flag_set(inode, FI_HOT_DATA))
+ return CURSEG_HOT_DATA;
+ return CURSEG_WARM_DATA;
} else {
if (IS_DNODE(page))
return is_cold_node(page) ? CURSEG_WARM_NODE :
CURSEG_HOT_NODE;
- else
- return CURSEG_COLD_NODE;
+ return CURSEG_COLD_NODE;
}
}
@@ -2959,6 +2957,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
+ sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 31846b0fcb95..57e36c1ce7bd 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -540,6 +540,7 @@ static inline int utilization(struct f2fs_sb_info *sbi)
*/
#define DEF_MIN_IPU_UTIL 70
#define DEF_MIN_FSYNC_BLOCKS 8
+#define DEF_MIN_HOT_BLOCKS 16
enum {
F2FS_IPU_FORCE,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b4c5c6298698..2d78f3c76d18 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -296,6 +296,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
@@ -321,6 +322,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
+ ATTR_LIST(min_hot_blocks),
ATTR_LIST(max_victim_search),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
--
2.11.0
If two threads try to flush dirty pages in different inodes respectively,
f2fs_write_data_pages() will produce WRITE and WRITE_SYNC one at a time,
resulting in a lot of 4KB seperated IOs.
So, this patch gives higher priority to WB_SYNC_ALL IOs and gathers write
IOs with a big WRITE_SYNC'ed bio.
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 15 +++++++++++++--
fs/f2fs/f2fs.h | 3 +++
fs/f2fs/super.c | 2 ++
3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8f36080b47c4..b1cac6d85bcb 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1605,8 +1605,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
last_idx = page->index;
}
- if (--wbc->nr_to_write <= 0 &&
- wbc->sync_mode == WB_SYNC_NONE) {
+ /* give a priority to WB_SYNC threads */
+ if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
+ --wbc->nr_to_write <= 0) &&
+ wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
@@ -1662,9 +1664,18 @@ static int f2fs_write_data_pages(struct address_space *mapping,
trace_f2fs_writepages(mapping->host, wbc, DATA);
+ /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_inc(&sbi->wb_sync_req);
+ else if (atomic_read(&sbi->wb_sync_req))
+ goto skip_write;
+
blk_start_plug(&plug);
ret = f2fs_write_cache_pages(mapping, wbc);
blk_finish_plug(&plug);
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_dec(&sbi->wb_sync_req);
/*
* if some pages were truncated, we cannot guarantee its mapping->host
* to detect pending bios.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 32d6f674c114..fd39db681226 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -888,6 +888,9 @@ struct f2fs_sb_info {
/* # of allocated blocks */
struct percpu_counter alloc_valid_block_count;
+ /* writeback control */
+ atomic_t wb_sync_req; /* count # of WB_SYNC threads */
+
/* valid inode count */
struct percpu_counter total_valid_inode_count;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2d78f3c76d18..cb65e6d0d275 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1566,6 +1566,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
+ atomic_set(&sbi->wb_sync_req, 0);
+
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
mutex_init(&sbi->wio_mutex[NODE]);
--
2.11.0
Hi Jaegeuk,
On 2017/3/30 4:48, Jaegeuk Kim wrote:
> It would better split small and large IOs separately in order to get more
> consecutive big writes.
>
> The default threshold is set to 64KB, but configurable by sysfs/min_hot_blocks.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 9 +++++++++
> fs/f2fs/f2fs.h | 2 ++
> fs/f2fs/segment.c | 13 ++++++-------
> fs/f2fs/segment.h | 1 +
> fs/f2fs/super.c | 2 ++
> 5 files changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 090413236b27..8f36080b47c4 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1432,6 +1432,8 @@ static int __write_data_page(struct page *page, bool *submitted,
> need_balance_fs = true;
> else if (has_not_enough_free_secs(sbi, 0, 0))
> goto redirty_out;
> + else
> + set_inode_flag(inode, FI_HOT_DATA);
Why here we need this, can you explain more about this?
Thanks.
>
> err = -EAGAIN;
> if (f2fs_has_inline_data(inode)) {
> @@ -1457,6 +1459,7 @@ static int __write_data_page(struct page *page, bool *submitted,
> if (wbc->for_reclaim) {
> f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
> DATA, WRITE);
> + clear_inode_flag(inode, FI_HOT_DATA);
> remove_dirty_inode(inode);
> submitted = NULL;
> }
> @@ -1511,6 +1514,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>
> pagevec_init(&pvec, 0);
>
> + if (get_dirty_pages(mapping->host) <=
> + SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
> + set_inode_flag(mapping->host, FI_HOT_DATA);
> + else
> + clear_inode_flag(mapping->host, FI_HOT_DATA);
> +
> if (wbc->range_cyclic) {
> writeback_index = mapping->writeback_index; /* prev offset */
> index = writeback_index;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 5a49518ee786..32d6f674c114 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -678,6 +678,7 @@ struct f2fs_sm_info {
> unsigned int ipu_policy; /* in-place-update policy */
> unsigned int min_ipu_util; /* in-place-update threshold */
> unsigned int min_fsync_blocks; /* threshold for fsync */
> + unsigned int min_hot_blocks; /* threshold for hot block allocation */
>
> /* for flush command control */
> struct flush_cmd_control *fcc_info;
> @@ -1717,6 +1718,7 @@ enum {
> FI_DO_DEFRAG, /* indicate defragment is running */
> FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
> FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
> + FI_HOT_DATA, /* indicate file is hot */
> };
>
> static inline void __mark_inode_dirty_flag(struct inode *inode,
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index b5b2a4745328..bff3f3bc7827 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1841,18 +1841,16 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type)
> if (p_type == DATA) {
> struct inode *inode = page->mapping->host;
>
> - if (S_ISDIR(inode->i_mode))
> - return CURSEG_HOT_DATA;
> - else if (is_cold_data(page) || file_is_cold(inode))
> + if (is_cold_data(page) || file_is_cold(inode))
> return CURSEG_COLD_DATA;
> - else
> - return CURSEG_WARM_DATA;
> + if (is_inode_flag_set(inode, FI_HOT_DATA))
> + return CURSEG_HOT_DATA;
> + return CURSEG_WARM_DATA;
> } else {
> if (IS_DNODE(page))
> return is_cold_node(page) ? CURSEG_WARM_NODE :
> CURSEG_HOT_NODE;
> - else
> - return CURSEG_COLD_NODE;
> + return CURSEG_COLD_NODE;
> }
> }
>
> @@ -2959,6 +2957,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
> sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> + sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>
> sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
>
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 31846b0fcb95..57e36c1ce7bd 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -540,6 +540,7 @@ static inline int utilization(struct f2fs_sb_info *sbi)
> */
> #define DEF_MIN_IPU_UTIL 70
> #define DEF_MIN_FSYNC_BLOCKS 8
> +#define DEF_MIN_HOT_BLOCKS 16
>
> enum {
> F2FS_IPU_FORCE,
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index b4c5c6298698..2d78f3c76d18 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -296,6 +296,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
> F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
> @@ -321,6 +322,7 @@ static struct attribute *f2fs_attrs[] = {
> ATTR_LIST(ipu_policy),
> ATTR_LIST(min_ipu_util),
> ATTR_LIST(min_fsync_blocks),
> + ATTR_LIST(min_hot_blocks),
> ATTR_LIST(max_victim_search),
> ATTR_LIST(dir_level),
> ATTR_LIST(ram_thresh),
>
Hi Jaegeuk,
I try this patch and find it can fix below case:
kworker/u16:3-423 [002] .... 183.812347: submit_bio: kworker/u16:3(423): WRITE block 104749352 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812380: submit_bio: fio(2122): WRITE block 104749360 on mmcblk0p50 (24 sectors)
kworker/u16:3-423 [002] .... 183.812388: submit_bio: kworker/u16:3(423): WRITE block 104749384 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812403: submit_bio: fio(2122): WRITE block 104749392 on mmcblk0p50 (8 sectors)
kworker/u16:3-423 [002] .... 183.812404: submit_bio: kworker/u16:3(423): WRITE block 104749400 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812427: submit_bio: fio(2122): WRITE block 104749408 on mmcblk0p50 (16 sectors)
kworker/u16:3-423 [002] .... 183.812429: submit_bio: kworker/u16:3(423): WRITE block 104749424 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812450: submit_bio: fio(2122): WRITE block 104749432 on mmcblk0p50 (16 sectors)
kworker/u16:3-423 [002] .... 183.812455: submit_bio: kworker/u16:3(423): WRITE block 104749448 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812470: submit_bio: fio(2122): WRITE block 104749456 on mmcblk0p50 (8 sectors)
kworker/u16:3-423 [002] .... 183.812476: submit_bio: kworker/u16:3(423): WRITE block 104749464 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812492: submit_bio: fio(2122): WRITE block 104749472 on mmcblk0p50 (16 sectors)
kworker/u16:3-423 [002] .... 183.812497: submit_bio: kworker/u16:3(423): WRITE block 104749488 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812512: submit_bio: fio(2122): WRITE block 104749496 on mmcblk0p50 (8 sectors)
kworker/u16:3-423 [002] .... 183.812514: submit_bio: kworker/u16:3(423): WRITE block 104749504 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812532: submit_bio: fio(2122): WRITE block 104749512 on mmcblk0p50 (16 sectors)
... ...
Thanks.
On 2017/3/30 4:48, Jaegeuk Kim wrote:
> If two threads try to flush dirty pages in different inodes respectively,
> f2fs_write_data_pages() will produce WRITE and WRITE_SYNC one at a time,
> resulting in a lot of 4KB seperated IOs.
>
> So, this patch gives higher priority to WB_SYNC_ALL IOs and gathers write
> IOs with a big WRITE_SYNC'ed bio.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 15 +++++++++++++--
> fs/f2fs/f2fs.h | 3 +++
> fs/f2fs/super.c | 2 ++
> 3 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 8f36080b47c4..b1cac6d85bcb 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1605,8 +1605,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> last_idx = page->index;
> }
>
> - if (--wbc->nr_to_write <= 0 &&
> - wbc->sync_mode == WB_SYNC_NONE) {
> + /* give a priority to WB_SYNC threads */
> + if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
> + --wbc->nr_to_write <= 0) &&
> + wbc->sync_mode == WB_SYNC_NONE) {
> done = 1;
> break;
> }
> @@ -1662,9 +1664,18 @@ static int f2fs_write_data_pages(struct address_space *mapping,
>
> trace_f2fs_writepages(mapping->host, wbc, DATA);
>
> + /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
> + if (wbc->sync_mode == WB_SYNC_ALL)
> + atomic_inc(&sbi->wb_sync_req);
> + else if (atomic_read(&sbi->wb_sync_req))
> + goto skip_write;
> +
> blk_start_plug(&plug);
> ret = f2fs_write_cache_pages(mapping, wbc);
> blk_finish_plug(&plug);
> +
> + if (wbc->sync_mode == WB_SYNC_ALL)
> + atomic_dec(&sbi->wb_sync_req);
> /*
> * if some pages were truncated, we cannot guarantee its mapping->host
> * to detect pending bios.
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 32d6f674c114..fd39db681226 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -888,6 +888,9 @@ struct f2fs_sb_info {
> /* # of allocated blocks */
> struct percpu_counter alloc_valid_block_count;
>
> + /* writeback control */
> + atomic_t wb_sync_req; /* count # of WB_SYNC threads */
> +
> /* valid inode count */
> struct percpu_counter total_valid_inode_count;
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 2d78f3c76d18..cb65e6d0d275 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1566,6 +1566,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
> for (i = 0; i < NR_COUNT_TYPE; i++)
> atomic_set(&sbi->nr_pages[i], 0);
>
> + atomic_set(&sbi->wb_sync_req, 0);
> +
> INIT_LIST_HEAD(&sbi->s_list);
> mutex_init(&sbi->umount_mutex);
> mutex_init(&sbi->wio_mutex[NODE]);
>
Hi Jaegeuk,
Can we split in place update bios into single sbi->f2fs_bio_info for more page
merged in out place update? This case can be show as below:
in place update submit a bio with one page
out place update submit a bio with one page
in place update submit a bio with one page
out place update submit a bio with one page
... ...
just like WB_SYNC_ALL and WB_SYNC_NONE case.
Thanks.
On 2017/3/30 4:48, Jaegeuk Kim wrote:
> If two threads try to flush dirty pages in different inodes respectively,
> f2fs_write_data_pages() will produce WRITE and WRITE_SYNC one at a time,
> resulting in a lot of 4KB seperated IOs.
>
> So, this patch gives higher priority to WB_SYNC_ALL IOs and gathers write
> IOs with a big WRITE_SYNC'ed bio.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 15 +++++++++++++--
> fs/f2fs/f2fs.h | 3 +++
> fs/f2fs/super.c | 2 ++
> 3 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 8f36080b47c4..b1cac6d85bcb 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1605,8 +1605,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> last_idx = page->index;
> }
>
> - if (--wbc->nr_to_write <= 0 &&
> - wbc->sync_mode == WB_SYNC_NONE) {
> + /* give a priority to WB_SYNC threads */
> + if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
> + --wbc->nr_to_write <= 0) &&
> + wbc->sync_mode == WB_SYNC_NONE) {
> done = 1;
> break;
> }
> @@ -1662,9 +1664,18 @@ static int f2fs_write_data_pages(struct address_space *mapping,
>
> trace_f2fs_writepages(mapping->host, wbc, DATA);
>
> + /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
> + if (wbc->sync_mode == WB_SYNC_ALL)
> + atomic_inc(&sbi->wb_sync_req);
> + else if (atomic_read(&sbi->wb_sync_req))
> + goto skip_write;
> +
> blk_start_plug(&plug);
> ret = f2fs_write_cache_pages(mapping, wbc);
> blk_finish_plug(&plug);
> +
> + if (wbc->sync_mode == WB_SYNC_ALL)
> + atomic_dec(&sbi->wb_sync_req);
> /*
> * if some pages were truncated, we cannot guarantee its mapping->host
> * to detect pending bios.
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 32d6f674c114..fd39db681226 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -888,6 +888,9 @@ struct f2fs_sb_info {
> /* # of allocated blocks */
> struct percpu_counter alloc_valid_block_count;
>
> + /* writeback control */
> + atomic_t wb_sync_req; /* count # of WB_SYNC threads */
> +
> /* valid inode count */
> struct percpu_counter total_valid_inode_count;
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 2d78f3c76d18..cb65e6d0d275 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1566,6 +1566,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
> for (i = 0; i < NR_COUNT_TYPE; i++)
> atomic_set(&sbi->nr_pages[i], 0);
>
> + atomic_set(&sbi->wb_sync_req, 0);
> +
> INIT_LIST_HEAD(&sbi->s_list);
> mutex_init(&sbi->umount_mutex);
> mutex_init(&sbi->wio_mutex[NODE]);
>
On 03/31, heyunlei wrote:
> Hi Jaegeuk,
>
> On 2017/3/30 4:48, Jaegeuk Kim wrote:
> > It would better split small and large IOs separately in order to get more
> > consecutive big writes.
> >
> > The default threshold is set to 64KB, but configurable by sysfs/min_hot_blocks.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/data.c | 9 +++++++++
> > fs/f2fs/f2fs.h | 2 ++
> > fs/f2fs/segment.c | 13 ++++++-------
> > fs/f2fs/segment.h | 1 +
> > fs/f2fs/super.c | 2 ++
> > 5 files changed, 20 insertions(+), 7 deletions(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 090413236b27..8f36080b47c4 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1432,6 +1432,8 @@ static int __write_data_page(struct page *page, bool *submitted,
> > need_balance_fs = true;
> > else if (has_not_enough_free_secs(sbi, 0, 0))
> > goto redirty_out;
> > + else
> > + set_inode_flag(inode, FI_HOT_DATA);
>
> Why here we need this, can you explain more about this?
I fixed this.
Please refer the up-to-date patch that I've been testing.
https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev-test&id=6976ab59090395014368296f154426c9311d69dc
https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev-test&id=65f770f2ead7dfdf661b2da49af1aa814b662c93
Thanks,
>
> Thanks.
>
> >
> > err = -EAGAIN;
> > if (f2fs_has_inline_data(inode)) {
> > @@ -1457,6 +1459,7 @@ static int __write_data_page(struct page *page, bool *submitted,
> > if (wbc->for_reclaim) {
> > f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
> > DATA, WRITE);
> > + clear_inode_flag(inode, FI_HOT_DATA);
> > remove_dirty_inode(inode);
> > submitted = NULL;
> > }
> > @@ -1511,6 +1514,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> >
> > pagevec_init(&pvec, 0);
> >
> > + if (get_dirty_pages(mapping->host) <=
> > + SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
> > + set_inode_flag(mapping->host, FI_HOT_DATA);
> > + else
> > + clear_inode_flag(mapping->host, FI_HOT_DATA);
> > +
> > if (wbc->range_cyclic) {
> > writeback_index = mapping->writeback_index; /* prev offset */
> > index = writeback_index;
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 5a49518ee786..32d6f674c114 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -678,6 +678,7 @@ struct f2fs_sm_info {
> > unsigned int ipu_policy; /* in-place-update policy */
> > unsigned int min_ipu_util; /* in-place-update threshold */
> > unsigned int min_fsync_blocks; /* threshold for fsync */
> > + unsigned int min_hot_blocks; /* threshold for hot block allocation */
> >
> > /* for flush command control */
> > struct flush_cmd_control *fcc_info;
> > @@ -1717,6 +1718,7 @@ enum {
> > FI_DO_DEFRAG, /* indicate defragment is running */
> > FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
> > FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
> > + FI_HOT_DATA, /* indicate file is hot */
> > };
> >
> > static inline void __mark_inode_dirty_flag(struct inode *inode,
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index b5b2a4745328..bff3f3bc7827 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -1841,18 +1841,16 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type)
> > if (p_type == DATA) {
> > struct inode *inode = page->mapping->host;
> >
> > - if (S_ISDIR(inode->i_mode))
> > - return CURSEG_HOT_DATA;
> > - else if (is_cold_data(page) || file_is_cold(inode))
> > + if (is_cold_data(page) || file_is_cold(inode))
> > return CURSEG_COLD_DATA;
> > - else
> > - return CURSEG_WARM_DATA;
> > + if (is_inode_flag_set(inode, FI_HOT_DATA))
> > + return CURSEG_HOT_DATA;
> > + return CURSEG_WARM_DATA;
> > } else {
> > if (IS_DNODE(page))
> > return is_cold_node(page) ? CURSEG_WARM_NODE :
> > CURSEG_HOT_NODE;
> > - else
> > - return CURSEG_COLD_NODE;
> > + return CURSEG_COLD_NODE;
> > }
> > }
> >
> > @@ -2959,6 +2957,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
> > sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> > sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> > sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> > + sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> >
> > sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
> >
> > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > index 31846b0fcb95..57e36c1ce7bd 100644
> > --- a/fs/f2fs/segment.h
> > +++ b/fs/f2fs/segment.h
> > @@ -540,6 +540,7 @@ static inline int utilization(struct f2fs_sb_info *sbi)
> > */
> > #define DEF_MIN_IPU_UTIL 70
> > #define DEF_MIN_FSYNC_BLOCKS 8
> > +#define DEF_MIN_HOT_BLOCKS 16
> >
> > enum {
> > F2FS_IPU_FORCE,
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index b4c5c6298698..2d78f3c76d18 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -296,6 +296,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> > F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> > F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> > F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> > +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> > F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> > F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
> > F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
> > @@ -321,6 +322,7 @@ static struct attribute *f2fs_attrs[] = {
> > ATTR_LIST(ipu_policy),
> > ATTR_LIST(min_ipu_util),
> > ATTR_LIST(min_fsync_blocks),
> > + ATTR_LIST(min_hot_blocks),
> > ATTR_LIST(max_victim_search),
> > ATTR_LIST(dir_level),
> > ATTR_LIST(ram_thresh),
> >
On 03/31, heyunlei wrote:
> Hi Jaegeuk,
>
> Can we split in place update bios into single sbi->f2fs_bio_info for more page
> merged in out place update? This case can be show as below:
>
> in place update submit a bio with one page
> out place update submit a bio with one page
> in place update submit a bio with one page
> out place update submit a bio with one page
> ... ...
>
> just like WB_SYNC_ALL and WB_SYNC_NONE case.
Something like this?
>From d9f00695c5e56c48611ade3ced89432ef2b59a27 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <[email protected]>
Date: Thu, 30 Mar 2017 21:02:46 -0700
Subject: [PATCH] f2fs: submit bio of in-place-update pages
This patch tries to split in-place-update bios from sequential bios.
Suggested-by: Yunlei He <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 2 +-
fs/f2fs/f2fs.h | 2 +-
fs/f2fs/segment.c | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b1cac6d85bcb..1392e7c153bf 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1354,7 +1354,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
!is_cold_data(page) &&
!IS_ATOMIC_WRITTEN_PAGE(page) &&
need_inplace_update(inode))) {
- rewrite_data_page(fio);
+ err = rewrite_data_page(fio);
set_inode_flag(inode, FI_UPDATE_WRITE);
trace_f2fs_do_write_data_page(page, IPU);
} else {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fd39db681226..5a2b8cd13c92 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2207,7 +2207,7 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr);
void write_meta_page(struct f2fs_sb_info *sbi, struct page *page);
void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
-void rewrite_data_page(struct f2fs_io_info *fio);
+int rewrite_data_page(struct f2fs_io_info *fio);
void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
bool recover_curseg, bool recover_newaddr);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index bff3f3bc7827..eedbed62947f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1973,11 +1973,11 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
}
-void rewrite_data_page(struct f2fs_io_info *fio)
+int rewrite_data_page(struct f2fs_io_info *fio)
{
fio->new_blkaddr = fio->old_blkaddr;
stat_inc_inplace_blocks(fio->sbi);
- f2fs_submit_page_mbio(fio);
+ return f2fs_submit_page_bio(fio);
}
void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
--
2.11.0