2013-09-30 10:57:48

by Yuan Zhong

[permalink] [raw]
Subject: [f2fs-dev][PATCH]f2fs: avoid congestion_wait when do_checkpoint for better performance


Previously, do_checkpoint() will call congestion_wait() for waiting the pages (previous submitted node/meta/data pages) to be written back.
Because congestion_wait() will set a regular period (e.g. HZ / 50 ) for waiting.
For this reason, there is a situation that after the pages have been written back, but the checkpoint thread still wait for congestion_wait to exit.
This is a problem here, especially, when sync a large number of small files or dirs.
In order to avoid this, a wait_list is introduced, the checkpoint thread will be dropped into the wait_list if the pages have not been written back, and will be waked up by contrast.

Signed-off-by: Yuan Zhong <[email protected]>
---
fs/f2fs/checkpoint.c | 3 +--
fs/f2fs/f2fs.h | 19 +++++++++++++++++++
fs/f2fs/segment.c | 1 +
fs/f2fs/super.c | 1 +
4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index bb31220..cf6b4a5 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -756,8 +756,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
f2fs_put_page(cp_page, 1);

/* wait for previous submitted node/meta pages writeback */
- while (get_pages(sbi, F2FS_WRITEBACK))
- congestion_wait(BLK_RW_ASYNC, HZ / 50);
+ f2fs_writeback_wait(sbi);

filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 608f0df..f8b62cc 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -18,6 +18,7 @@
#include <linux/crc32.h>
#include <linux/magic.h>
#include <linux/kobject.h>
+#include <linux/wait.h>

/*
* For mount options
@@ -430,6 +431,8 @@ struct f2fs_sb_info {
/* For sysfs suppport */
struct kobject s_kobj;
struct completion s_kobj_unregister;
+
+ wait_queue_head_t writeback_wqh;
};

/*
@@ -961,6 +964,22 @@ static inline int f2fs_readonly(struct super_block *sb)
return sb->s_flags & MS_RDONLY;
}

+static inline void f2fs_writeback_wait(struct f2fs_sb_info *sbi)
+{
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&sbi->writeback_wqh, &wait, TASK_UNINTERRUPTIBLE);
+ if (get_pages(sbi, F2FS_WRITEBACK))
+ io_schedule();
+ finish_wait(&sbi->writeback_wqh, &wait);
+}
+
+static inline void f2fs_writeback_wake(struct f2fs_sb_info *sbi)
+{
+ if (!get_pages(sbi, F2FS_WRITEBACK))
+ wake_up_all(&sbi->writeback_wqh);
+}
+
/*
* file.c
*/
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 09af9c7..79293fe 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -597,6 +597,7 @@ static void f2fs_end_io_write(struct bio *bio, int err)

if (p->is_sync)
complete(p->wait);
+ f2fs_writeback_wake(p->sbi);
kfree(p);
bio_put(bio);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 13d0a0f..b31f686 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -818,6 +818,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
+ init_waitqueue_head(&sbi->writeback_wqh);
for (i = 0; i < NR_GLOBAL_LOCKS; i++)
mutex_init(&sbi->fs_lock[i]);
mutex_init(&sbi->node_write);
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m???? ????????I?


2013-10-07 02:42:13

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev][PATCH]f2fs: avoid congestion_wait when do_checkpoint for better performance

Hi,

Please do checkpatch.pl before sending a patch.
Thanks,

2013-09-30 (월), 18:28 +0800, yuan zhong:
> Previously, do_checkpoint() will call congestion_wait() for waiting the pages (previous submitted node/meta/data pages) to be written back.
> Because congestion_wait() will set a regular period (e.g. HZ / 50 ) for waiting.
> For this reason, there is a situation that after the pages have been written back, but the checkpoint thread still wait for congestion_wait to exit.
> This is a problem here, especially, when sync a large number of small files or dirs.
> In order to avoid this, a wait_list is introduced, the checkpoint thread will be dropped into the wait_list if the pages have not been written back, and will be waked up by contrast.
>
> Signed-off-by: Yuan Zhong <[email protected]>
> ---
> fs/f2fs/checkpoint.c | 3 +--
> fs/f2fs/f2fs.h | 19 +++++++++++++++++++
> fs/f2fs/segment.c | 1 +
> fs/f2fs/super.c | 1 +
> 4 files changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bb31220..cf6b4a5 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -756,8 +756,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
> f2fs_put_page(cp_page, 1);
>
> /* wait for previous submitted node/meta pages writeback */
> - while (get_pages(sbi, F2FS_WRITEBACK))
> - congestion_wait(BLK_RW_ASYNC, HZ / 50);
> + f2fs_writeback_wait(sbi);
>
> filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
> filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 608f0df..f8b62cc 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -18,6 +18,7 @@
> #include <linux/crc32.h>
> #include <linux/magic.h>
> #include <linux/kobject.h>
> +#include <linux/wait.h>
>
> /*
> * For mount options
> @@ -430,6 +431,8 @@ struct f2fs_sb_info {
> /* For sysfs suppport */
> struct kobject s_kobj;
> struct completion s_kobj_unregister;
> +
> + wait_queue_head_t writeback_wqh;
> };
>
> /*
> @@ -961,6 +964,22 @@ static inline int f2fs_readonly(struct super_block *sb)
> return sb->s_flags & MS_RDONLY;
> }
>
> +static inline void f2fs_writeback_wait(struct f2fs_sb_info *sbi) {
> + DEFINE_WAIT(wait);
> +
> + prepare_to_wait(&sbi->writeback_wqh, &wait, TASK_UNINTERRUPTIBLE);
> + if (get_pages(sbi, F2FS_WRITEBACK))
> + io_schedule();
> + finish_wait(&sbi->writeback_wqh, &wait); }
> +
> +static inline void f2fs_writeback_wake(struct f2fs_sb_info *sbi) {
> + if (!get_pages(sbi, F2FS_WRITEBACK))
> + wake_up_all(&sbi->writeback_wqh);
> +}
> +
> /*
> * file.c
> */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 09af9c7..79293fe 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -597,6 +597,7 @@ static void f2fs_end_io_write(struct bio *bio, int err)
>
> if (p->is_sync)
> complete(p->wait);
> + f2fs_writeback_wake(p->sbi);
> kfree(p);
> bio_put(bio);
> }
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 13d0a0f..b31f686 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -818,6 +818,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> mutex_init(&sbi->gc_mutex);
> mutex_init(&sbi->writepages);
> mutex_init(&sbi->cp_mutex);
> + init_waitqueue_head(&sbi->writeback_wqh);
> for (i = 0; i < NR_GLOBAL_LOCKS; i++)
> mutex_init(&sbi->fs_lock[i]);
> mutex_init(&sbi->node_write);
>

--
Jaegeuk Kim
Samsung