2017-03-29 20:48:48

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 1/2] f2fs: write small sized IO to hot log

It would better split small and large IOs separately in order to get more
consecutive big writes.

The default threshold is set to 64KB, but configurable by sysfs/min_hot_blocks.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 9 +++++++++
fs/f2fs/f2fs.h | 2 ++
fs/f2fs/segment.c | 13 ++++++-------
fs/f2fs/segment.h | 1 +
fs/f2fs/super.c | 2 ++
5 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 090413236b27..8f36080b47c4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1432,6 +1432,8 @@ static int __write_data_page(struct page *page, bool *submitted,
need_balance_fs = true;
else if (has_not_enough_free_secs(sbi, 0, 0))
goto redirty_out;
+ else
+ set_inode_flag(inode, FI_HOT_DATA);

err = -EAGAIN;
if (f2fs_has_inline_data(inode)) {
@@ -1457,6 +1459,7 @@ static int __write_data_page(struct page *page, bool *submitted,
if (wbc->for_reclaim) {
f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
DATA, WRITE);
+ clear_inode_flag(inode, FI_HOT_DATA);
remove_dirty_inode(inode);
submitted = NULL;
}
@@ -1511,6 +1514,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping,

pagevec_init(&pvec, 0);

+ if (get_dirty_pages(mapping->host) <=
+ SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
+ set_inode_flag(mapping->host, FI_HOT_DATA);
+ else
+ clear_inode_flag(mapping->host, FI_HOT_DATA);
+
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5a49518ee786..32d6f674c114 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -678,6 +678,7 @@ struct f2fs_sm_info {
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
+ unsigned int min_hot_blocks; /* threshold for hot block allocation */

/* for flush command control */
struct flush_cmd_control *fcc_info;
@@ -1717,6 +1718,7 @@ enum {
FI_DO_DEFRAG, /* indicate defragment is running */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
+ FI_HOT_DATA, /* indicate file is hot */
};

static inline void __mark_inode_dirty_flag(struct inode *inode,
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index b5b2a4745328..bff3f3bc7827 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1841,18 +1841,16 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type)
if (p_type == DATA) {
struct inode *inode = page->mapping->host;

- if (S_ISDIR(inode->i_mode))
- return CURSEG_HOT_DATA;
- else if (is_cold_data(page) || file_is_cold(inode))
+ if (is_cold_data(page) || file_is_cold(inode))
return CURSEG_COLD_DATA;
- else
- return CURSEG_WARM_DATA;
+ if (is_inode_flag_set(inode, FI_HOT_DATA))
+ return CURSEG_HOT_DATA;
+ return CURSEG_WARM_DATA;
} else {
if (IS_DNODE(page))
return is_cold_node(page) ? CURSEG_WARM_NODE :
CURSEG_HOT_NODE;
- else
- return CURSEG_COLD_NODE;
+ return CURSEG_COLD_NODE;
}
}

@@ -2959,6 +2957,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
+ sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;

sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;

diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 31846b0fcb95..57e36c1ce7bd 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -540,6 +540,7 @@ static inline int utilization(struct f2fs_sb_info *sbi)
*/
#define DEF_MIN_IPU_UTIL 70
#define DEF_MIN_FSYNC_BLOCKS 8
+#define DEF_MIN_HOT_BLOCKS 16

enum {
F2FS_IPU_FORCE,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b4c5c6298698..2d78f3c76d18 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -296,6 +296,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
@@ -321,6 +322,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
+ ATTR_LIST(min_hot_blocks),
ATTR_LIST(max_victim_search),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
--
2.11.0


2017-03-29 20:48:47

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 2/2] f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE

If two threads try to flush dirty pages in different inodes respectively,
f2fs_write_data_pages() will produce WRITE and WRITE_SYNC one at a time,
resulting in a lot of 4KB seperated IOs.

So, this patch gives higher priority to WB_SYNC_ALL IOs and gathers write
IOs with a big WRITE_SYNC'ed bio.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 15 +++++++++++++--
fs/f2fs/f2fs.h | 3 +++
fs/f2fs/super.c | 2 ++
3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8f36080b47c4..b1cac6d85bcb 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1605,8 +1605,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
last_idx = page->index;
}

- if (--wbc->nr_to_write <= 0 &&
- wbc->sync_mode == WB_SYNC_NONE) {
+ /* give a priority to WB_SYNC threads */
+ if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
+ --wbc->nr_to_write <= 0) &&
+ wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
@@ -1662,9 +1664,18 @@ static int f2fs_write_data_pages(struct address_space *mapping,

trace_f2fs_writepages(mapping->host, wbc, DATA);

+ /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_inc(&sbi->wb_sync_req);
+ else if (atomic_read(&sbi->wb_sync_req))
+ goto skip_write;
+
blk_start_plug(&plug);
ret = f2fs_write_cache_pages(mapping, wbc);
blk_finish_plug(&plug);
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ atomic_dec(&sbi->wb_sync_req);
/*
* if some pages were truncated, we cannot guarantee its mapping->host
* to detect pending bios.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 32d6f674c114..fd39db681226 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -888,6 +888,9 @@ struct f2fs_sb_info {
/* # of allocated blocks */
struct percpu_counter alloc_valid_block_count;

+ /* writeback control */
+ atomic_t wb_sync_req; /* count # of WB_SYNC threads */
+
/* valid inode count */
struct percpu_counter total_valid_inode_count;

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2d78f3c76d18..cb65e6d0d275 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1566,6 +1566,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);

+ atomic_set(&sbi->wb_sync_req, 0);
+
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
mutex_init(&sbi->wio_mutex[NODE]);
--
2.11.0

2017-03-31 03:38:31

by heyunlei

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/2] f2fs: write small sized IO to hot log

Hi Jaegeuk,

On 2017/3/30 4:48, Jaegeuk Kim wrote:
> It would better split small and large IOs separately in order to get more
> consecutive big writes.
>
> The default threshold is set to 64KB, but configurable by sysfs/min_hot_blocks.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 9 +++++++++
> fs/f2fs/f2fs.h | 2 ++
> fs/f2fs/segment.c | 13 ++++++-------
> fs/f2fs/segment.h | 1 +
> fs/f2fs/super.c | 2 ++
> 5 files changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 090413236b27..8f36080b47c4 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1432,6 +1432,8 @@ static int __write_data_page(struct page *page, bool *submitted,
> need_balance_fs = true;
> else if (has_not_enough_free_secs(sbi, 0, 0))
> goto redirty_out;
> + else
> + set_inode_flag(inode, FI_HOT_DATA);

Why here we need this, can you explain more about this?

Thanks.

>
> err = -EAGAIN;
> if (f2fs_has_inline_data(inode)) {
> @@ -1457,6 +1459,7 @@ static int __write_data_page(struct page *page, bool *submitted,
> if (wbc->for_reclaim) {
> f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
> DATA, WRITE);
> + clear_inode_flag(inode, FI_HOT_DATA);
> remove_dirty_inode(inode);
> submitted = NULL;
> }
> @@ -1511,6 +1514,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
>
> pagevec_init(&pvec, 0);
>
> + if (get_dirty_pages(mapping->host) <=
> + SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
> + set_inode_flag(mapping->host, FI_HOT_DATA);
> + else
> + clear_inode_flag(mapping->host, FI_HOT_DATA);
> +
> if (wbc->range_cyclic) {
> writeback_index = mapping->writeback_index; /* prev offset */
> index = writeback_index;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 5a49518ee786..32d6f674c114 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -678,6 +678,7 @@ struct f2fs_sm_info {
> unsigned int ipu_policy; /* in-place-update policy */
> unsigned int min_ipu_util; /* in-place-update threshold */
> unsigned int min_fsync_blocks; /* threshold for fsync */
> + unsigned int min_hot_blocks; /* threshold for hot block allocation */
>
> /* for flush command control */
> struct flush_cmd_control *fcc_info;
> @@ -1717,6 +1718,7 @@ enum {
> FI_DO_DEFRAG, /* indicate defragment is running */
> FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
> FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
> + FI_HOT_DATA, /* indicate file is hot */
> };
>
> static inline void __mark_inode_dirty_flag(struct inode *inode,
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index b5b2a4745328..bff3f3bc7827 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1841,18 +1841,16 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type)
> if (p_type == DATA) {
> struct inode *inode = page->mapping->host;
>
> - if (S_ISDIR(inode->i_mode))
> - return CURSEG_HOT_DATA;
> - else if (is_cold_data(page) || file_is_cold(inode))
> + if (is_cold_data(page) || file_is_cold(inode))
> return CURSEG_COLD_DATA;
> - else
> - return CURSEG_WARM_DATA;
> + if (is_inode_flag_set(inode, FI_HOT_DATA))
> + return CURSEG_HOT_DATA;
> + return CURSEG_WARM_DATA;
> } else {
> if (IS_DNODE(page))
> return is_cold_node(page) ? CURSEG_WARM_NODE :
> CURSEG_HOT_NODE;
> - else
> - return CURSEG_COLD_NODE;
> + return CURSEG_COLD_NODE;
> }
> }
>
> @@ -2959,6 +2957,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
> sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> + sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
>
> sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
>
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 31846b0fcb95..57e36c1ce7bd 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -540,6 +540,7 @@ static inline int utilization(struct f2fs_sb_info *sbi)
> */
> #define DEF_MIN_IPU_UTIL 70
> #define DEF_MIN_FSYNC_BLOCKS 8
> +#define DEF_MIN_HOT_BLOCKS 16
>
> enum {
> F2FS_IPU_FORCE,
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index b4c5c6298698..2d78f3c76d18 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -296,6 +296,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
> F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
> @@ -321,6 +322,7 @@ static struct attribute *f2fs_attrs[] = {
> ATTR_LIST(ipu_policy),
> ATTR_LIST(min_ipu_util),
> ATTR_LIST(min_fsync_blocks),
> + ATTR_LIST(min_hot_blocks),
> ATTR_LIST(max_victim_search),
> ATTR_LIST(dir_level),
> ATTR_LIST(ram_thresh),
>

2017-03-31 03:42:20

by heyunlei

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/2] f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE

Hi Jaegeuk,

I try this patch and find it can fix below case:

kworker/u16:3-423 [002] .... 183.812347: submit_bio: kworker/u16:3(423): WRITE block 104749352 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812380: submit_bio: fio(2122): WRITE block 104749360 on mmcblk0p50 (24 sectors)
kworker/u16:3-423 [002] .... 183.812388: submit_bio: kworker/u16:3(423): WRITE block 104749384 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812403: submit_bio: fio(2122): WRITE block 104749392 on mmcblk0p50 (8 sectors)
kworker/u16:3-423 [002] .... 183.812404: submit_bio: kworker/u16:3(423): WRITE block 104749400 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812427: submit_bio: fio(2122): WRITE block 104749408 on mmcblk0p50 (16 sectors)
kworker/u16:3-423 [002] .... 183.812429: submit_bio: kworker/u16:3(423): WRITE block 104749424 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812450: submit_bio: fio(2122): WRITE block 104749432 on mmcblk0p50 (16 sectors)
kworker/u16:3-423 [002] .... 183.812455: submit_bio: kworker/u16:3(423): WRITE block 104749448 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812470: submit_bio: fio(2122): WRITE block 104749456 on mmcblk0p50 (8 sectors)
kworker/u16:3-423 [002] .... 183.812476: submit_bio: kworker/u16:3(423): WRITE block 104749464 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812492: submit_bio: fio(2122): WRITE block 104749472 on mmcblk0p50 (16 sectors)
kworker/u16:3-423 [002] .... 183.812497: submit_bio: kworker/u16:3(423): WRITE block 104749488 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812512: submit_bio: fio(2122): WRITE block 104749496 on mmcblk0p50 (8 sectors)
kworker/u16:3-423 [002] .... 183.812514: submit_bio: kworker/u16:3(423): WRITE block 104749504 on mmcblk0p50 (8 sectors)
fio-2122 [003] .... 183.812532: submit_bio: fio(2122): WRITE block 104749512 on mmcblk0p50 (16 sectors)

... ...

Thanks.

On 2017/3/30 4:48, Jaegeuk Kim wrote:
> If two threads try to flush dirty pages in different inodes respectively,
> f2fs_write_data_pages() will produce WRITE and WRITE_SYNC one at a time,
> resulting in a lot of 4KB seperated IOs.
>
> So, this patch gives higher priority to WB_SYNC_ALL IOs and gathers write
> IOs with a big WRITE_SYNC'ed bio.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 15 +++++++++++++--
> fs/f2fs/f2fs.h | 3 +++
> fs/f2fs/super.c | 2 ++
> 3 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 8f36080b47c4..b1cac6d85bcb 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1605,8 +1605,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> last_idx = page->index;
> }
>
> - if (--wbc->nr_to_write <= 0 &&
> - wbc->sync_mode == WB_SYNC_NONE) {
> + /* give a priority to WB_SYNC threads */
> + if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
> + --wbc->nr_to_write <= 0) &&
> + wbc->sync_mode == WB_SYNC_NONE) {
> done = 1;
> break;
> }
> @@ -1662,9 +1664,18 @@ static int f2fs_write_data_pages(struct address_space *mapping,
>
> trace_f2fs_writepages(mapping->host, wbc, DATA);
>
> + /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
> + if (wbc->sync_mode == WB_SYNC_ALL)
> + atomic_inc(&sbi->wb_sync_req);
> + else if (atomic_read(&sbi->wb_sync_req))
> + goto skip_write;
> +
> blk_start_plug(&plug);
> ret = f2fs_write_cache_pages(mapping, wbc);
> blk_finish_plug(&plug);
> +
> + if (wbc->sync_mode == WB_SYNC_ALL)
> + atomic_dec(&sbi->wb_sync_req);
> /*
> * if some pages were truncated, we cannot guarantee its mapping->host
> * to detect pending bios.
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 32d6f674c114..fd39db681226 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -888,6 +888,9 @@ struct f2fs_sb_info {
> /* # of allocated blocks */
> struct percpu_counter alloc_valid_block_count;
>
> + /* writeback control */
> + atomic_t wb_sync_req; /* count # of WB_SYNC threads */
> +
> /* valid inode count */
> struct percpu_counter total_valid_inode_count;
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 2d78f3c76d18..cb65e6d0d275 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1566,6 +1566,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
> for (i = 0; i < NR_COUNT_TYPE; i++)
> atomic_set(&sbi->nr_pages[i], 0);
>
> + atomic_set(&sbi->wb_sync_req, 0);
> +
> INIT_LIST_HEAD(&sbi->s_list);
> mutex_init(&sbi->umount_mutex);
> mutex_init(&sbi->wio_mutex[NODE]);
>

2017-03-31 03:52:36

by heyunlei

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/2] f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE

Hi Jaegeuk,

Can we split in place update bios into single sbi->f2fs_bio_info for more page
merged in out place update? This case can be show as below:

in place update submit a bio with one page
out place update submit a bio with one page
in place update submit a bio with one page
out place update submit a bio with one page
... ...

just like WB_SYNC_ALL and WB_SYNC_NONE case.

Thanks.

On 2017/3/30 4:48, Jaegeuk Kim wrote:
> If two threads try to flush dirty pages in different inodes respectively,
> f2fs_write_data_pages() will produce WRITE and WRITE_SYNC one at a time,
> resulting in a lot of 4KB seperated IOs.
>
> So, this patch gives higher priority to WB_SYNC_ALL IOs and gathers write
> IOs with a big WRITE_SYNC'ed bio.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 15 +++++++++++++--
> fs/f2fs/f2fs.h | 3 +++
> fs/f2fs/super.c | 2 ++
> 3 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 8f36080b47c4..b1cac6d85bcb 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1605,8 +1605,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> last_idx = page->index;
> }
>
> - if (--wbc->nr_to_write <= 0 &&
> - wbc->sync_mode == WB_SYNC_NONE) {
> + /* give a priority to WB_SYNC threads */
> + if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
> + --wbc->nr_to_write <= 0) &&
> + wbc->sync_mode == WB_SYNC_NONE) {
> done = 1;
> break;
> }
> @@ -1662,9 +1664,18 @@ static int f2fs_write_data_pages(struct address_space *mapping,
>
> trace_f2fs_writepages(mapping->host, wbc, DATA);
>
> + /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
> + if (wbc->sync_mode == WB_SYNC_ALL)
> + atomic_inc(&sbi->wb_sync_req);
> + else if (atomic_read(&sbi->wb_sync_req))
> + goto skip_write;
> +
> blk_start_plug(&plug);
> ret = f2fs_write_cache_pages(mapping, wbc);
> blk_finish_plug(&plug);
> +
> + if (wbc->sync_mode == WB_SYNC_ALL)
> + atomic_dec(&sbi->wb_sync_req);
> /*
> * if some pages were truncated, we cannot guarantee its mapping->host
> * to detect pending bios.
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 32d6f674c114..fd39db681226 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -888,6 +888,9 @@ struct f2fs_sb_info {
> /* # of allocated blocks */
> struct percpu_counter alloc_valid_block_count;
>
> + /* writeback control */
> + atomic_t wb_sync_req; /* count # of WB_SYNC threads */
> +
> /* valid inode count */
> struct percpu_counter total_valid_inode_count;
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 2d78f3c76d18..cb65e6d0d275 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1566,6 +1566,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
> for (i = 0; i < NR_COUNT_TYPE; i++)
> atomic_set(&sbi->nr_pages[i], 0);
>
> + atomic_set(&sbi->wb_sync_req, 0);
> +
> INIT_LIST_HEAD(&sbi->s_list);
> mutex_init(&sbi->umount_mutex);
> mutex_init(&sbi->wio_mutex[NODE]);
>

2017-03-31 03:54:22

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/2] f2fs: write small sized IO to hot log

On 03/31, heyunlei wrote:
> Hi Jaegeuk,
>
> On 2017/3/30 4:48, Jaegeuk Kim wrote:
> > It would better split small and large IOs separately in order to get more
> > consecutive big writes.
> >
> > The default threshold is set to 64KB, but configurable by sysfs/min_hot_blocks.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/data.c | 9 +++++++++
> > fs/f2fs/f2fs.h | 2 ++
> > fs/f2fs/segment.c | 13 ++++++-------
> > fs/f2fs/segment.h | 1 +
> > fs/f2fs/super.c | 2 ++
> > 5 files changed, 20 insertions(+), 7 deletions(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 090413236b27..8f36080b47c4 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1432,6 +1432,8 @@ static int __write_data_page(struct page *page, bool *submitted,
> > need_balance_fs = true;
> > else if (has_not_enough_free_secs(sbi, 0, 0))
> > goto redirty_out;
> > + else
> > + set_inode_flag(inode, FI_HOT_DATA);
>
> Why here we need this, can you explain more about this?

I fixed this.
Please refer the up-to-date patch that I've been testing.

https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev-test&id=6976ab59090395014368296f154426c9311d69dc
https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev-test&id=65f770f2ead7dfdf661b2da49af1aa814b662c93

Thanks,

>
> Thanks.
>
> >
> > err = -EAGAIN;
> > if (f2fs_has_inline_data(inode)) {
> > @@ -1457,6 +1459,7 @@ static int __write_data_page(struct page *page, bool *submitted,
> > if (wbc->for_reclaim) {
> > f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
> > DATA, WRITE);
> > + clear_inode_flag(inode, FI_HOT_DATA);
> > remove_dirty_inode(inode);
> > submitted = NULL;
> > }
> > @@ -1511,6 +1514,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
> >
> > pagevec_init(&pvec, 0);
> >
> > + if (get_dirty_pages(mapping->host) <=
> > + SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
> > + set_inode_flag(mapping->host, FI_HOT_DATA);
> > + else
> > + clear_inode_flag(mapping->host, FI_HOT_DATA);
> > +
> > if (wbc->range_cyclic) {
> > writeback_index = mapping->writeback_index; /* prev offset */
> > index = writeback_index;
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 5a49518ee786..32d6f674c114 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -678,6 +678,7 @@ struct f2fs_sm_info {
> > unsigned int ipu_policy; /* in-place-update policy */
> > unsigned int min_ipu_util; /* in-place-update threshold */
> > unsigned int min_fsync_blocks; /* threshold for fsync */
> > + unsigned int min_hot_blocks; /* threshold for hot block allocation */
> >
> > /* for flush command control */
> > struct flush_cmd_control *fcc_info;
> > @@ -1717,6 +1718,7 @@ enum {
> > FI_DO_DEFRAG, /* indicate defragment is running */
> > FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
> > FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
> > + FI_HOT_DATA, /* indicate file is hot */
> > };
> >
> > static inline void __mark_inode_dirty_flag(struct inode *inode,
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index b5b2a4745328..bff3f3bc7827 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -1841,18 +1841,16 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type)
> > if (p_type == DATA) {
> > struct inode *inode = page->mapping->host;
> >
> > - if (S_ISDIR(inode->i_mode))
> > - return CURSEG_HOT_DATA;
> > - else if (is_cold_data(page) || file_is_cold(inode))
> > + if (is_cold_data(page) || file_is_cold(inode))
> > return CURSEG_COLD_DATA;
> > - else
> > - return CURSEG_WARM_DATA;
> > + if (is_inode_flag_set(inode, FI_HOT_DATA))
> > + return CURSEG_HOT_DATA;
> > + return CURSEG_WARM_DATA;
> > } else {
> > if (IS_DNODE(page))
> > return is_cold_node(page) ? CURSEG_WARM_NODE :
> > CURSEG_HOT_NODE;
> > - else
> > - return CURSEG_COLD_NODE;
> > + return CURSEG_COLD_NODE;
> > }
> > }
> >
> > @@ -2959,6 +2957,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
> > sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
> > sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
> > sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
> > + sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
> >
> > sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
> >
> > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > index 31846b0fcb95..57e36c1ce7bd 100644
> > --- a/fs/f2fs/segment.h
> > +++ b/fs/f2fs/segment.h
> > @@ -540,6 +540,7 @@ static inline int utilization(struct f2fs_sb_info *sbi)
> > */
> > #define DEF_MIN_IPU_UTIL 70
> > #define DEF_MIN_FSYNC_BLOCKS 8
> > +#define DEF_MIN_HOT_BLOCKS 16
> >
> > enum {
> > F2FS_IPU_FORCE,
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index b4c5c6298698..2d78f3c76d18 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -296,6 +296,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
> > F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
> > F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
> > F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
> > +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
> > F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
> > F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
> > F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
> > @@ -321,6 +322,7 @@ static struct attribute *f2fs_attrs[] = {
> > ATTR_LIST(ipu_policy),
> > ATTR_LIST(min_ipu_util),
> > ATTR_LIST(min_fsync_blocks),
> > + ATTR_LIST(min_hot_blocks),
> > ATTR_LIST(max_victim_search),
> > ATTR_LIST(dir_level),
> > ATTR_LIST(ram_thresh),
> >

2017-03-31 04:18:13

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/2] f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE

On 03/31, heyunlei wrote:
> Hi Jaegeuk,
>
> Can we split in place update bios into single sbi->f2fs_bio_info for more page
> merged in out place update? This case can be show as below:
>
> in place update submit a bio with one page
> out place update submit a bio with one page
> in place update submit a bio with one page
> out place update submit a bio with one page
> ... ...
>
> just like WB_SYNC_ALL and WB_SYNC_NONE case.

Something like this?

>From d9f00695c5e56c48611ade3ced89432ef2b59a27 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <[email protected]>
Date: Thu, 30 Mar 2017 21:02:46 -0700
Subject: [PATCH] f2fs: submit bio of in-place-update pages

This patch tries to split in-place-update bios from sequential bios.

Suggested-by: Yunlei He <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 2 +-
fs/f2fs/f2fs.h | 2 +-
fs/f2fs/segment.c | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b1cac6d85bcb..1392e7c153bf 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1354,7 +1354,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
!is_cold_data(page) &&
!IS_ATOMIC_WRITTEN_PAGE(page) &&
need_inplace_update(inode))) {
- rewrite_data_page(fio);
+ err = rewrite_data_page(fio);
set_inode_flag(inode, FI_UPDATE_WRITE);
trace_f2fs_do_write_data_page(page, IPU);
} else {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fd39db681226..5a2b8cd13c92 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2207,7 +2207,7 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr);
void write_meta_page(struct f2fs_sb_info *sbi, struct page *page);
void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
-void rewrite_data_page(struct f2fs_io_info *fio);
+int rewrite_data_page(struct f2fs_io_info *fio);
void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
bool recover_curseg, bool recover_newaddr);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index bff3f3bc7827..eedbed62947f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1973,11 +1973,11 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
}

-void rewrite_data_page(struct f2fs_io_info *fio)
+int rewrite_data_page(struct f2fs_io_info *fio)
{
fio->new_blkaddr = fio->old_blkaddr;
stat_inc_inplace_blocks(fio->sbi);
- f2fs_submit_page_mbio(fio);
+ return f2fs_submit_page_bio(fio);
}

void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
--
2.11.0