LinuxLists.cc - [PATCH 1/2] f2fs: run discard jobs when put

2019-01-23 00:05:28

Subject: [PATCH 1/2] f2fs: run discard jobs when put_super

When we umount f2fs, we need to avoid long delay due to discard commands, which
is actually taking tens of seconds, if storage is very slow on UNMAP. So, this
patch introduces timeout-based work on it.

By default, let me give 5 seconds for discard.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++
fs/f2fs/f2fs.h | 5 ++++-
fs/f2fs/segment.c | 11 ++++++++++-
fs/f2fs/super.c | 17 ++++++++---------
fs/f2fs/sysfs.c | 3 +++
5 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index a7ce33199457..91822ce25831 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -86,6 +86,13 @@ Description:
The unit size is one block, now only support configuring in range
of [1, 512].

+What: /sys/fs/f2fs/<disk>/umount_discard_timeout
+Date: January 2019
+Contact: "Jaegeuk Kim" <[email protected]>
+Description:
+ Set timeout to issue discard commands during umount.
+ Default: 5 secs
+
What: /sys/fs/f2fs/<disk>/max_victim_search
Date: January 2014
Contact: "Jaegeuk Kim" <[email protected]>
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0f564883e078..6b6ec5600089 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -191,6 +191,7 @@ enum {
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
#define DEF_DISABLE_INTERVAL 5 /* 5 secs */
+#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */

struct cp_control {
int reason;
@@ -310,6 +311,7 @@ struct discard_policy {
bool sync; /* submit discard with REQ_SYNC flag */
bool ordered; /* issue discard by lba order */
unsigned int granularity; /* discard granularity */
+ int timeout; /* discard timeout for put_super */
};

struct discard_cmd_control {
@@ -1110,6 +1112,7 @@ enum {
DISCARD_TIME,
GC_TIME,
DISABLE_TIME,
+ UMOUNT_DISCARD_TIMEOUT,
MAX_TIME,
};

@@ -3006,7 +3009,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9b79056d705d..97e0faf09ebf 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1037,6 +1037,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,

dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->timeout = MAX_TIME;

if (discard_type == DPOLICY_BG) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -1424,7 +1425,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
int i, issued = 0;
bool io_interrupted = false;

+ if (dpolicy->timeout != MAX_TIME)
+ f2fs_update_time(sbi, dpolicy->timeout);
+
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+ if (dpolicy->timeout != MAX_TIME &&
+ f2fs_time_over(sbi, dpolicy->timeout))
+ break;
+
if (i + 1 < dpolicy->granularity)
break;

@@ -1611,7 +1619,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
}

/* This comes from f2fs_put_super */
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_policy dpolicy;
@@ -1619,6 +1627,7 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)

__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
dcc->discard_granularity);
+ dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
__issue_discard_cmd(sbi, &dpolicy);
dropped = __drop_discard_cmd(sbi);

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ea514acede36..7998ff5418f2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1029,6 +1029,9 @@ static void f2fs_put_super(struct super_block *sb)
int i;
bool dropped;

+ /* be sure to wait for any on-going discard commands */
+ dropped = f2fs_issue_discard_timeout(sbi);
+
f2fs_quota_off_umount(sb);

/* prevent remaining shrinker jobs */
@@ -1044,17 +1047,12 @@ static void f2fs_put_super(struct super_block *sb)
struct cp_control cpc = {
.reason = CP_UMOUNT,
};
- f2fs_write_checkpoint(sbi, &cpc);
- }

- /* be sure to wait for any on-going discard commands */
- dropped = f2fs_wait_discard_bios(sbi);
+ if ((f2fs_hw_support_discard(sbi) ||
+ f2fs_hw_should_discard(sbi)) &&
+ !sbi->discard_blks && !dropped)
+ cpc.reason |= CP_TRIMMED;

- if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
- !sbi->discard_blks && !dropped) {
- struct cp_control cpc = {
- .reason = CP_UMOUNT | CP_TRIMMED,
- };
f2fs_write_checkpoint(sbi, &cpc);
}

@@ -2706,6 +2704,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
+ sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] = DEF_UMOUNT_DISCARD_TIMEOUT;
clear_sbi_flag(sbi, SBI_NEED_FSCK);

for (i = 0; i < NR_COUNT_TYPE; i++)
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 5b83e1d66cb3..18c627e8fc90 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -426,6 +426,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
interval_time[DISCARD_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
+ umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
@@ -483,6 +485,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(idle_interval),
ATTR_LIST(discard_idle_interval),
ATTR_LIST(gc_idle_interval),
+ ATTR_LIST(umount_discard_timeout),
ATTR_LIST(iostat_enable),
ATTR_LIST(readdir_ra),
ATTR_LIST(gc_pin_file_thresh),
--
2.19.0.605.g01d371f741-goog

2019-01-23 00:05:28

by Jaegeuk Kim

[permalink] [raw]

Subject: [PATCH 2/2] f2fs: sync filesystem after roll-forward recovery

Some works after roll-forward recovery can get an error which will release
all the data structures. Let's flush them in order to make it clean.

One possible corruption came from:

[ 90.400500] list_del corruption. prev->next should be ffffffed1f566208, but was (null)
[ 90.675349] Call trace:
[ 90.677869] __list_del_entry_valid+0x94/0xb4
[ 90.682351] remove_dirty_inode+0xac/0x114
[ 90.686563] __f2fs_write_data_pages+0x6a8/0x6c8
[ 90.691302] f2fs_write_data_pages+0x40/0x4c
[ 90.695695] do_writepages+0x80/0xf0
[ 90.699372] __writeback_single_inode+0xdc/0x4ac
[ 90.704113] writeback_sb_inodes+0x280/0x440
[ 90.708501] wb_writeback+0x1b8/0x3d0
[ 90.712267] wb_workfn+0x1a8/0x4d4
[ 90.715765] process_one_work+0x1c0/0x3d4
[ 90.719883] worker_thread+0x224/0x344
[ 90.723739] kthread+0x120/0x130
[ 90.727055] ret_from_fork+0x10/0x18

Reported-by: Sahitya Tummala <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/checkpoint.c | 5 +++--
fs/f2fs/node.c | 4 +++-
fs/f2fs/super.c | 42 +++++++++++++++++++++++++++++++-----------
3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f955cd3e0677..ccccf0ce2f06 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -306,8 +306,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
goto skip_write;

/* collect a number of dirty meta pages and write together */
- if (wbc->for_kupdate ||
- get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
+ if (wbc->sync_mode != WB_SYNC_ALL &&
+ get_pages(sbi, F2FS_DIRTY_META) <
+ nr_pages_to_skip(sbi, META))
goto skip_write;

/* if locked failed, cp will flush dirty pages instead */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4f450e573312..f6ff84e29749 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1920,7 +1920,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
f2fs_balance_fs_bg(sbi);

/* collect a number of dirty node pages and write together */
- if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
+ if (wbc->sync_mode != WB_SYNC_ALL &&
+ get_pages(sbi, F2FS_DIRTY_NODES) <
+ nr_pages_to_skip(sbi, NODE))
goto skip_write;

if (wbc->sync_mode == WB_SYNC_ALL)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7998ff5418f2..2af0db2b738e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1456,9 +1456,16 @@ static int f2fs_enable_quotas(struct super_block *sb);

static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
{
+ unsigned int s_flags = sbi->sb->s_flags;
struct cp_control cpc;
- int err;
+ int err = 0;
+ int ret;

+ if (s_flags & SB_RDONLY) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "checkpoint=disable on readonly fs");
+ return -EINVAL;
+ }
sbi->sb->s_flags |= SB_ACTIVE;

f2fs_update_time(sbi, DISABLE_TIME);
@@ -1466,18 +1473,24 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
while (!f2fs_time_over(sbi, DISABLE_TIME)) {
mutex_lock(&sbi->gc_mutex);
err = f2fs_gc(sbi, true, false, NULL_SEGNO);
- if (err == -ENODATA)
+ if (err == -ENODATA) {
+ err = 0;
break;
+ }
if (err && err != -EAGAIN)
- return err;
+ break;
}

- err = sync_filesystem(sbi->sb);
- if (err)
- return err;
+ ret = sync_filesystem(sbi->sb);
+ if (ret || err) {
+ err = ret ? ret: err;
+ goto restore_flag;
+ }

- if (f2fs_disable_cp_again(sbi))
- return -EAGAIN;
+ if (f2fs_disable_cp_again(sbi)) {
+ err = -EAGAIN;
+ goto restore_flag;
+ }

mutex_lock(&sbi->gc_mutex);
cpc.reason = CP_PAUSE;
@@ -1486,7 +1499,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)

sbi->unusable_block_count = 0;
mutex_unlock(&sbi->gc_mutex);
- return 0;
+restore_flag:
+ sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+ return err;
}

static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
@@ -3356,7 +3371,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
if (test_opt(sbi, DISABLE_CHECKPOINT)) {
err = f2fs_disable_checkpoint(sbi);
if (err)
- goto free_meta;
+ goto sync_free_meta;
} else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
f2fs_enable_checkpoint(sbi);
}
@@ -3369,7 +3384,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
/* After POR, we can run background GC thread.*/
err = f2fs_start_gc_thread(sbi);
if (err)
- goto free_meta;
+ goto sync_free_meta;
}
kvfree(options);

@@ -3391,6 +3406,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
f2fs_update_time(sbi, REQ_TIME);
return 0;

+sync_free_meta:
+ /* safe to flush all the data */
+ sync_filesystem(sbi->sb);
+ retry = false;
+
free_meta:
#ifdef CONFIG_QUOTA
f2fs_truncate_quota_inode_pages(sb);
--
2.19.0.605.g01d371f741-goog

2019-01-23 12:16:46

by Chao Yu

[permalink] [raw]

Subject: Re: [f2fs-dev] [PATCH 1/2] f2fs: run discard jobs when put_super

On 2019/1/23 8:02, Jaegeuk Kim wrote:
> When we umount f2fs, we need to avoid long delay due to discard commands, which
> is actually taking tens of seconds, if storage is very slow on UNMAP. So, this
> patch introduces timeout-based work on it.
>
> By default, let me give 5 seconds for discard.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++
> fs/f2fs/f2fs.h | 5 ++++-
> fs/f2fs/segment.c | 11 ++++++++++-
> fs/f2fs/super.c | 17 ++++++++---------
> fs/f2fs/sysfs.c | 3 +++
> 5 files changed, 32 insertions(+), 11 deletions(-)
>
> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> index a7ce33199457..91822ce25831 100644
> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> @@ -86,6 +86,13 @@ Description:
> The unit size is one block, now only support configuring in range
> of [1, 512].
>
> +What: /sys/fs/f2fs/<disk>/umount_discard_timeout
> +Date: January 2019
> +Contact: "Jaegeuk Kim" <[email protected]>
> +Description:
> + Set timeout to issue discard commands during umount.
> + Default: 5 secs
> +
> What: /sys/fs/f2fs/<disk>/max_victim_search
> Date: January 2014
> Contact: "Jaegeuk Kim" <[email protected]>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 0f564883e078..6b6ec5600089 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -191,6 +191,7 @@ enum {
> #define DEF_CP_INTERVAL 60 /* 60 secs */
> #define DEF_IDLE_INTERVAL 5 /* 5 secs */
> #define DEF_DISABLE_INTERVAL 5 /* 5 secs */
> +#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */
>
> struct cp_control {
> int reason;
> @@ -310,6 +311,7 @@ struct discard_policy {
> bool sync; /* submit discard with REQ_SYNC flag */
> bool ordered; /* issue discard by lba order */
> unsigned int granularity; /* discard granularity */
> + int timeout; /* discard timeout for put_super */
> };
>
> struct discard_cmd_control {
> @@ -1110,6 +1112,7 @@ enum {
> DISCARD_TIME,
> GC_TIME,
> DISABLE_TIME,
> + UMOUNT_DISCARD_TIMEOUT,
> MAX_TIME,

After the patch, MAX_TIME will be 6, so if we set @timeout to 6 via sysfs,
the configuration will not be enabled due to below condition, right?

+ if (dpolicy->timeout != MAX_TIME)
+ f2fs_update_time(sbi, dpolicy->timeout);

> };
>
> @@ -3006,7 +3009,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
> bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
> void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
> void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
> -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
> +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
> void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
> struct cp_control *cpc);
> void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 9b79056d705d..97e0faf09ebf 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1037,6 +1037,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
>
> dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
> dpolicy->io_aware_gran = MAX_PLIST_NUM;
> + dpolicy->timeout = MAX_TIME;
>
> if (discard_type == DPOLICY_BG) {
> dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> @@ -1424,7 +1425,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
> int i, issued = 0;
> bool io_interrupted = false;
>
> + if (dpolicy->timeout != MAX_TIME)
> + f2fs_update_time(sbi, dpolicy->timeout);
> +
> for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
> + if (dpolicy->timeout != MAX_TIME &&
> + f2fs_time_over(sbi, dpolicy->timeout))
> + break;
> +
> if (i + 1 < dpolicy->granularity)
> break;
>
> @@ -1611,7 +1619,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
> }
>
> /* This comes from f2fs_put_super */
> -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
> +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
> {
> struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> struct discard_policy dpolicy;
> @@ -1619,6 +1627,7 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
>
> __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
> dcc->discard_granularity);
> + dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
> __issue_discard_cmd(sbi, &dpolicy);
> dropped = __drop_discard_cmd(sbi);
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index ea514acede36..7998ff5418f2 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1029,6 +1029,9 @@ static void f2fs_put_super(struct super_block *sb)
> int i;
> bool dropped;
>
> + /* be sure to wait for any on-going discard commands */
> + dropped = f2fs_issue_discard_timeout(sbi);
> +
> f2fs_quota_off_umount(sb);
>
> /* prevent remaining shrinker jobs */
> @@ -1044,17 +1047,12 @@ static void f2fs_put_super(struct super_block *sb)
> struct cp_control cpc = {
> .reason = CP_UMOUNT,
> };
> - f2fs_write_checkpoint(sbi, &cpc);
> - }
>
> - /* be sure to wait for any on-going discard commands */
> - dropped = f2fs_wait_discard_bios(sbi);
> + if ((f2fs_hw_support_discard(sbi) ||
> + f2fs_hw_should_discard(sbi)) &&
> + !sbi->discard_blks && !dropped)
> + cpc.reason |= CP_TRIMMED;
>
> - if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
> - !sbi->discard_blks && !dropped) {
> - struct cp_control cpc = {
> - .reason = CP_UMOUNT | CP_TRIMMED,
> - };
> f2fs_write_checkpoint(sbi, &cpc);

Still, there will be problematic as I commented in last patch, could you
check that?

Thanks,

> }
>
> @@ -2706,6 +2704,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
> sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
> sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
> sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
> + sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] = DEF_UMOUNT_DISCARD_TIMEOUT;
> clear_sbi_flag(sbi, SBI_NEED_FSCK);
>
> for (i = 0; i < NR_COUNT_TYPE; i++)
> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> index 5b83e1d66cb3..18c627e8fc90 100644
> --- a/fs/f2fs/sysfs.c
> +++ b/fs/f2fs/sysfs.c
> @@ -426,6 +426,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
> F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
> interval_time[DISCARD_TIME]);
> F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
> +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
> + umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
> F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
> F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
> F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
> @@ -483,6 +485,7 @@ static struct attribute *f2fs_attrs[] = {
> ATTR_LIST(idle_interval),
> ATTR_LIST(discard_idle_interval),
> ATTR_LIST(gc_idle_interval),
> + ATTR_LIST(umount_discard_timeout),
> ATTR_LIST(iostat_enable),
> ATTR_LIST(readdir_ra),
> ATTR_LIST(gc_pin_file_thresh),
>

2019-01-24 08:44:52

by Chao Yu

[permalink] [raw]

Subject: Re: [f2fs-dev] [PATCH 2/2] f2fs: sync filesystem after roll-forward recovery

On 2019/1/23 8:02, Jaegeuk Kim wrote:
> Some works after roll-forward recovery can get an error which will release
> all the data structures. Let's flush them in order to make it clean.
>
> One possible corruption came from:
>
> [ 90.400500] list_del corruption. prev->next should be ffffffed1f566208, but was (null)
> [ 90.675349] Call trace:
> [ 90.677869] __list_del_entry_valid+0x94/0xb4
> [ 90.682351] remove_dirty_inode+0xac/0x114
> [ 90.686563] __f2fs_write_data_pages+0x6a8/0x6c8
> [ 90.691302] f2fs_write_data_pages+0x40/0x4c
> [ 90.695695] do_writepages+0x80/0xf0
> [ 90.699372] __writeback_single_inode+0xdc/0x4ac
> [ 90.704113] writeback_sb_inodes+0x280/0x440
> [ 90.708501] wb_writeback+0x1b8/0x3d0
> [ 90.712267] wb_workfn+0x1a8/0x4d4
> [ 90.715765] process_one_work+0x1c0/0x3d4
> [ 90.719883] worker_thread+0x224/0x344
> [ 90.723739] kthread+0x120/0x130
> [ 90.727055] ret_from_fork+0x10/0x18
>
> Reported-by: Sahitya Tummala <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/checkpoint.c | 5 +++--
> fs/f2fs/node.c | 4 +++-
> fs/f2fs/super.c | 42 +++++++++++++++++++++++++++++++-----------
> 3 files changed, 37 insertions(+), 14 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index f955cd3e0677..ccccf0ce2f06 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -306,8 +306,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
> goto skip_write;
>
> /* collect a number of dirty meta pages and write together */
> - if (wbc->for_kupdate ||
> - get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
> + if (wbc->sync_mode != WB_SYNC_ALL &&
> + get_pages(sbi, F2FS_DIRTY_META) <
> + nr_pages_to_skip(sbi, META))
> goto skip_write;
>
> /* if locked failed, cp will flush dirty pages instead */
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 4f450e573312..f6ff84e29749 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1920,7 +1920,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
> f2fs_balance_fs_bg(sbi);
>
> /* collect a number of dirty node pages and write together */
> - if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
> + if (wbc->sync_mode != WB_SYNC_ALL &&
> + get_pages(sbi, F2FS_DIRTY_NODES) <
> + nr_pages_to_skip(sbi, NODE))
> goto skip_write;
>
> if (wbc->sync_mode == WB_SYNC_ALL)
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 7998ff5418f2..2af0db2b738e 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1456,9 +1456,16 @@ static int f2fs_enable_quotas(struct super_block *sb);
>
> static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> {
> + unsigned int s_flags = sbi->sb->s_flags;
> struct cp_control cpc;
> - int err;
> + int err = 0;
> + int ret;
>
> + if (s_flags & SB_RDONLY) {
> + f2fs_msg(sbi->sb, KERN_ERR,
> + "checkpoint=disable on readonly fs");
> + return -EINVAL;
> + }
> sbi->sb->s_flags |= SB_ACTIVE;
>
> f2fs_update_time(sbi, DISABLE_TIME);
> @@ -1466,18 +1473,24 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> while (!f2fs_time_over(sbi, DISABLE_TIME)) {
> mutex_lock(&sbi->gc_mutex);
> err = f2fs_gc(sbi, true, false, NULL_SEGNO);
> - if (err == -ENODATA)
> + if (err == -ENODATA) {
> + err = 0;
> break;
> + }
> if (err && err != -EAGAIN)
> - return err;
> + break;

Should be?

if (err) {
if (err == -EAGAIN)
err = 0;
else
break;
}

Thanks,

> }
>
> - err = sync_filesystem(sbi->sb);
> - if (err)
> - return err;
> + ret = sync_filesystem(sbi->sb);
> + if (ret || err) {
> + err = ret ? ret: err;
> + goto restore_flag;
> + }
>
> - if (f2fs_disable_cp_again(sbi))
> - return -EAGAIN;
> + if (f2fs_disable_cp_again(sbi)) {
> + err = -EAGAIN;
> + goto restore_flag;
> + }
>
> mutex_lock(&sbi->gc_mutex);
> cpc.reason = CP_PAUSE;
> @@ -1486,7 +1499,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
>
> sbi->unusable_block_count = 0;
> mutex_unlock(&sbi->gc_mutex);
> - return 0;
> +restore_flag:
> + sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
> + return err;
> }
>
> static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
> @@ -3356,7 +3371,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> if (test_opt(sbi, DISABLE_CHECKPOINT)) {
> err = f2fs_disable_checkpoint(sbi);
> if (err)
> - goto free_meta;
> + goto sync_free_meta;
> } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
> f2fs_enable_checkpoint(sbi);
> }
> @@ -3369,7 +3384,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> /* After POR, we can run background GC thread.*/
> err = f2fs_start_gc_thread(sbi);
> if (err)
> - goto free_meta;
> + goto sync_free_meta;
> }
> kvfree(options);
>
> @@ -3391,6 +3406,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> f2fs_update_time(sbi, REQ_TIME);
> return 0;
>
> +sync_free_meta:
> + /* safe to flush all the data */
> + sync_filesystem(sbi->sb);
> + retry = false;
> +
> free_meta:
> #ifdef CONFIG_QUOTA
> f2fs_truncate_quota_inode_pages(sb);
>

2019-01-25 00:52:39

by Jaegeuk Kim

[permalink] [raw]

Subject: Re: [f2fs-dev] [PATCH 2/2] f2fs: sync filesystem after roll-forward recovery

On 01/24, Chao Yu wrote:
> On 2019/1/23 8:02, Jaegeuk Kim wrote:
> > Some works after roll-forward recovery can get an error which will release
> > all the data structures. Let's flush them in order to make it clean.
> >
> > One possible corruption came from:
> >
> > [ 90.400500] list_del corruption. prev->next should be ffffffed1f566208, but was (null)
> > [ 90.675349] Call trace:
> > [ 90.677869] __list_del_entry_valid+0x94/0xb4
> > [ 90.682351] remove_dirty_inode+0xac/0x114
> > [ 90.686563] __f2fs_write_data_pages+0x6a8/0x6c8
> > [ 90.691302] f2fs_write_data_pages+0x40/0x4c
> > [ 90.695695] do_writepages+0x80/0xf0
> > [ 90.699372] __writeback_single_inode+0xdc/0x4ac
> > [ 90.704113] writeback_sb_inodes+0x280/0x440
> > [ 90.708501] wb_writeback+0x1b8/0x3d0
> > [ 90.712267] wb_workfn+0x1a8/0x4d4
> > [ 90.715765] process_one_work+0x1c0/0x3d4
> > [ 90.719883] worker_thread+0x224/0x344
> > [ 90.723739] kthread+0x120/0x130
> > [ 90.727055] ret_from_fork+0x10/0x18
> >
> > Reported-by: Sahitya Tummala <[email protected]>
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/checkpoint.c | 5 +++--
> > fs/f2fs/node.c | 4 +++-
> > fs/f2fs/super.c | 42 +++++++++++++++++++++++++++++++-----------
> > 3 files changed, 37 insertions(+), 14 deletions(-)
> >
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index f955cd3e0677..ccccf0ce2f06 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -306,8 +306,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
> > goto skip_write;
> >
> > /* collect a number of dirty meta pages and write together */
> > - if (wbc->for_kupdate ||
> > - get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
> > + if (wbc->sync_mode != WB_SYNC_ALL &&
> > + get_pages(sbi, F2FS_DIRTY_META) <
> > + nr_pages_to_skip(sbi, META))
> > goto skip_write;
> >
> > /* if locked failed, cp will flush dirty pages instead */
> > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > index 4f450e573312..f6ff84e29749 100644
> > --- a/fs/f2fs/node.c
> > +++ b/fs/f2fs/node.c
> > @@ -1920,7 +1920,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
> > f2fs_balance_fs_bg(sbi);
> >
> > /* collect a number of dirty node pages and write together */
> > - if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
> > + if (wbc->sync_mode != WB_SYNC_ALL &&
> > + get_pages(sbi, F2FS_DIRTY_NODES) <
> > + nr_pages_to_skip(sbi, NODE))
> > goto skip_write;
> >
> > if (wbc->sync_mode == WB_SYNC_ALL)
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index 7998ff5418f2..2af0db2b738e 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -1456,9 +1456,16 @@ static int f2fs_enable_quotas(struct super_block *sb);
> >
> > static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> > {
> > + unsigned int s_flags = sbi->sb->s_flags;
> > struct cp_control cpc;
> > - int err;
> > + int err = 0;
> > + int ret;
> >
> > + if (s_flags & SB_RDONLY) {
> > + f2fs_msg(sbi->sb, KERN_ERR,
> > + "checkpoint=disable on readonly fs");
> > + return -EINVAL;
> > + }
> > sbi->sb->s_flags |= SB_ACTIVE;
> >
> > f2fs_update_time(sbi, DISABLE_TIME);
> > @@ -1466,18 +1473,24 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> > while (!f2fs_time_over(sbi, DISABLE_TIME)) {
> > mutex_lock(&sbi->gc_mutex);
> > err = f2fs_gc(sbi, true, false, NULL_SEGNO);
> > - if (err == -ENODATA)
> > + if (err == -ENODATA) {
> > + err = 0;
> > break;
> > + }
> > if (err && err != -EAGAIN)
> > - return err;
> > + break;
>
> Should be?
>
> if (err) {
> if (err == -EAGAIN)
> err = 0;

Have to report EAGAIN, but not for ENODATA.

> else
> break;
> }
>
> Thanks,
>
> > }
> >
> > - err = sync_filesystem(sbi->sb);
> > - if (err)
> > - return err;
> > + ret = sync_filesystem(sbi->sb);
> > + if (ret || err) {
> > + err = ret ? ret: err;
> > + goto restore_flag;
> > + }
> >
> > - if (f2fs_disable_cp_again(sbi))
> > - return -EAGAIN;
> > + if (f2fs_disable_cp_again(sbi)) {
> > + err = -EAGAIN;
> > + goto restore_flag;
> > + }
> >
> > mutex_lock(&sbi->gc_mutex);
> > cpc.reason = CP_PAUSE;
> > @@ -1486,7 +1499,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> >
> > sbi->unusable_block_count = 0;
> > mutex_unlock(&sbi->gc_mutex);
> > - return 0;
> > +restore_flag:
> > + sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
> > + return err;
> > }
> >
> > static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
> > @@ -3356,7 +3371,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > if (test_opt(sbi, DISABLE_CHECKPOINT)) {
> > err = f2fs_disable_checkpoint(sbi);
> > if (err)
> > - goto free_meta;
> > + goto sync_free_meta;
> > } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
> > f2fs_enable_checkpoint(sbi);
> > }
> > @@ -3369,7 +3384,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > /* After POR, we can run background GC thread.*/
> > err = f2fs_start_gc_thread(sbi);
> > if (err)
> > - goto free_meta;
> > + goto sync_free_meta;
> > }
> > kvfree(options);
> >
> > @@ -3391,6 +3406,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > f2fs_update_time(sbi, REQ_TIME);
> > return 0;
> >
> > +sync_free_meta:
> > + /* safe to flush all the data */
> > + sync_filesystem(sbi->sb);
> > + retry = false;
> > +
> > free_meta:
> > #ifdef CONFIG_QUOTA
> > f2fs_truncate_quota_inode_pages(sb);
> >

2019-01-25 00:58:02

by Jaegeuk Kim

[permalink] [raw]

Subject: Re: [PATCH 1/2 v2] f2fs: run discard jobs when put_super

When we umount f2fs, we need to avoid long delay due to discard commands, which
is actually taking tens of seconds, if storage is very slow on UNMAP. So, this
patch introduces timeout-based work on it.

By default, let me give 5 seconds for discard.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
Change log from v1:
- use 0 for initial timeout

Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++
fs/f2fs/f2fs.h | 5 ++++-
fs/f2fs/segment.c | 11 ++++++++++-
fs/f2fs/super.c | 17 ++++++++---------
fs/f2fs/sysfs.c | 3 +++
5 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index a7ce33199457..91822ce25831 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -86,6 +86,13 @@ Description:
The unit size is one block, now only support configuring in range
of [1, 512].

+What: /sys/fs/f2fs/<disk>/umount_discard_timeout
+Date: January 2019
+Contact: "Jaegeuk Kim" <[email protected]>
+Description:
+ Set timeout to issue discard commands during umount.
+ Default: 5 secs
+
What: /sys/fs/f2fs/<disk>/max_victim_search
Date: January 2014
Contact: "Jaegeuk Kim" <[email protected]>
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0f564883e078..6b6ec5600089 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -191,6 +191,7 @@ enum {
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
#define DEF_DISABLE_INTERVAL 5 /* 5 secs */
+#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */

struct cp_control {
int reason;
@@ -310,6 +311,7 @@ struct discard_policy {
bool sync; /* submit discard with REQ_SYNC flag */
bool ordered; /* issue discard by lba order */
unsigned int granularity; /* discard granularity */
+ int timeout; /* discard timeout for put_super */
};

struct discard_cmd_control {
@@ -1110,6 +1112,7 @@ enum {
DISCARD_TIME,
GC_TIME,
DISABLE_TIME,
+ UMOUNT_DISCARD_TIMEOUT,
MAX_TIME,
};

@@ -3006,7 +3009,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9b79056d705d..5b2b9be6f28d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1037,6 +1037,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,

dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->timeout = 0;

if (discard_type == DPOLICY_BG) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -1424,7 +1425,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
int i, issued = 0;
bool io_interrupted = false;

+ if (dpolicy->timeout != 0)
+ f2fs_update_time(sbi, dpolicy->timeout);
+
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+ if (dpolicy->timeout != 0 &&
+ f2fs_time_over(sbi, dpolicy->timeout))
+ break;
+
if (i + 1 < dpolicy->granularity)
break;

@@ -1611,7 +1619,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
}

/* This comes from f2fs_put_super */
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_policy dpolicy;
@@ -1619,6 +1627,7 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)

__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
dcc->discard_granularity);
+ dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
__issue_discard_cmd(sbi, &dpolicy);
dropped = __drop_discard_cmd(sbi);

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 6f4dfbd6f325..2af0db2b738e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1029,6 +1029,9 @@ static void f2fs_put_super(struct super_block *sb)
int i;
bool dropped;

+ /* be sure to wait for any on-going discard commands */
+ dropped = f2fs_issue_discard_timeout(sbi);
+
f2fs_quota_off_umount(sb);

/* prevent remaining shrinker jobs */
@@ -1044,17 +1047,12 @@ static void f2fs_put_super(struct super_block *sb)
struct cp_control cpc = {
.reason = CP_UMOUNT,
};
- f2fs_write_checkpoint(sbi, &cpc);
- }

- /* be sure to wait for any on-going discard commands */
- dropped = f2fs_wait_discard_bios(sbi);
+ if ((f2fs_hw_support_discard(sbi) ||
+ f2fs_hw_should_discard(sbi)) &&
+ !sbi->discard_blks && !dropped)
+ cpc.reason |= CP_TRIMMED;

- if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
- !sbi->discard_blks && !dropped) {
- struct cp_control cpc = {
- .reason = CP_UMOUNT | CP_TRIMMED,
- };
f2fs_write_checkpoint(sbi, &cpc);
}

@@ -2721,6 +2719,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
+ sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] = DEF_UMOUNT_DISCARD_TIMEOUT;
clear_sbi_flag(sbi, SBI_NEED_FSCK);

for (i = 0; i < NR_COUNT_TYPE; i++)
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 5b83e1d66cb3..18c627e8fc90 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -426,6 +426,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
interval_time[DISCARD_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
+ umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
@@ -483,6 +485,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(idle_interval),
ATTR_LIST(discard_idle_interval),
ATTR_LIST(gc_idle_interval),
+ ATTR_LIST(umount_discard_timeout),
ATTR_LIST(iostat_enable),
ATTR_LIST(readdir_ra),
ATTR_LIST(gc_pin_file_thresh),
--
2.19.0.605.g01d371f741-goog

2019-01-25 00:58:29

by Jaegeuk Kim

[permalink] [raw]

Subject: Re: [f2fs-dev] [PATCH 1/2] f2fs: run discard jobs when put_super

On 01/23, Chao Yu wrote:
> On 2019/1/23 8:02, Jaegeuk Kim wrote:
> > When we umount f2fs, we need to avoid long delay due to discard commands, which
> > is actually taking tens of seconds, if storage is very slow on UNMAP. So, this
> > patch introduces timeout-based work on it.
> >
> > By default, let me give 5 seconds for discard.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++
> > fs/f2fs/f2fs.h | 5 ++++-
> > fs/f2fs/segment.c | 11 ++++++++++-
> > fs/f2fs/super.c | 17 ++++++++---------
> > fs/f2fs/sysfs.c | 3 +++
> > 5 files changed, 32 insertions(+), 11 deletions(-)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
> > index a7ce33199457..91822ce25831 100644
> > --- a/Documentation/ABI/testing/sysfs-fs-f2fs
> > +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
> > @@ -86,6 +86,13 @@ Description:
> > The unit size is one block, now only support configuring in range
> > of [1, 512].
> >
> > +What: /sys/fs/f2fs/<disk>/umount_discard_timeout
> > +Date: January 2019
> > +Contact: "Jaegeuk Kim" <[email protected]>
> > +Description:
> > + Set timeout to issue discard commands during umount.
> > + Default: 5 secs
> > +
> > What: /sys/fs/f2fs/<disk>/max_victim_search
> > Date: January 2014
> > Contact: "Jaegeuk Kim" <[email protected]>
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 0f564883e078..6b6ec5600089 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -191,6 +191,7 @@ enum {
> > #define DEF_CP_INTERVAL 60 /* 60 secs */
> > #define DEF_IDLE_INTERVAL 5 /* 5 secs */
> > #define DEF_DISABLE_INTERVAL 5 /* 5 secs */
> > +#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */
> >
> > struct cp_control {
> > int reason;
> > @@ -310,6 +311,7 @@ struct discard_policy {
> > bool sync; /* submit discard with REQ_SYNC flag */
> > bool ordered; /* issue discard by lba order */
> > unsigned int granularity; /* discard granularity */
> > + int timeout; /* discard timeout for put_super */
> > };
> >
> > struct discard_cmd_control {
> > @@ -1110,6 +1112,7 @@ enum {
> > DISCARD_TIME,
> > GC_TIME,
> > DISABLE_TIME,
> > + UMOUNT_DISCARD_TIMEOUT,
> > MAX_TIME,
>
> After the patch, MAX_TIME will be 6, so if we set @timeout to 6 via sysfs,
> the configuration will not be enabled due to below condition, right?

Yup.

>
> + if (dpolicy->timeout != MAX_TIME)
> + f2fs_update_time(sbi, dpolicy->timeout);
>
> > };
> >
> > @@ -3006,7 +3009,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
> > bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
> > void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
> > void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
> > -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
> > +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
> > void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
> > struct cp_control *cpc);
> > void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 9b79056d705d..97e0faf09ebf 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -1037,6 +1037,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
> >
> > dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
> > dpolicy->io_aware_gran = MAX_PLIST_NUM;
> > + dpolicy->timeout = MAX_TIME;
> >
> > if (discard_type == DPOLICY_BG) {
> > dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> > @@ -1424,7 +1425,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
> > int i, issued = 0;
> > bool io_interrupted = false;
> >
> > + if (dpolicy->timeout != MAX_TIME)
> > + f2fs_update_time(sbi, dpolicy->timeout);
> > +
> > for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
> > + if (dpolicy->timeout != MAX_TIME &&
> > + f2fs_time_over(sbi, dpolicy->timeout))
> > + break;
> > +
> > if (i + 1 < dpolicy->granularity)
> > break;
> >
> > @@ -1611,7 +1619,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
> > }
> >
> > /* This comes from f2fs_put_super */
> > -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
> > +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
> > {
> > struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> > struct discard_policy dpolicy;
> > @@ -1619,6 +1627,7 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
> >
> > __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
> > dcc->discard_granularity);
> > + dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
> > __issue_discard_cmd(sbi, &dpolicy);
> > dropped = __drop_discard_cmd(sbi);
> >
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index ea514acede36..7998ff5418f2 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -1029,6 +1029,9 @@ static void f2fs_put_super(struct super_block *sb)
> > int i;
> > bool dropped;
> >
> > + /* be sure to wait for any on-going discard commands */
> > + dropped = f2fs_issue_discard_timeout(sbi);
> > +
> > f2fs_quota_off_umount(sb);
> >
> > /* prevent remaining shrinker jobs */
> > @@ -1044,17 +1047,12 @@ static void f2fs_put_super(struct super_block *sb)
> > struct cp_control cpc = {
> > .reason = CP_UMOUNT,
> > };
> > - f2fs_write_checkpoint(sbi, &cpc);
> > - }
> >
> > - /* be sure to wait for any on-going discard commands */
> > - dropped = f2fs_wait_discard_bios(sbi);
> > + if ((f2fs_hw_support_discard(sbi) ||
> > + f2fs_hw_should_discard(sbi)) &&
> > + !sbi->discard_blks && !dropped)
> > + cpc.reason |= CP_TRIMMED;
> >
> > - if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
> > - !sbi->discard_blks && !dropped) {
> > - struct cp_control cpc = {
> > - .reason = CP_UMOUNT | CP_TRIMMED,
> > - };
> > f2fs_write_checkpoint(sbi, &cpc);
>
> Still, there will be problematic as I commented in last patch, could you
> check that?
>
> Thanks,
>
> > }
> >
> > @@ -2706,6 +2704,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
> > sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
> > sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
> > sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
> > + sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] = DEF_UMOUNT_DISCARD_TIMEOUT;
> > clear_sbi_flag(sbi, SBI_NEED_FSCK);
> >
> > for (i = 0; i < NR_COUNT_TYPE; i++)
> > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> > index 5b83e1d66cb3..18c627e8fc90 100644
> > --- a/fs/f2fs/sysfs.c
> > +++ b/fs/f2fs/sysfs.c
> > @@ -426,6 +426,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
> > F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
> > interval_time[DISCARD_TIME]);
> > F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
> > +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
> > + umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
> > F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
> > F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
> > F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
> > @@ -483,6 +485,7 @@ static struct attribute *f2fs_attrs[] = {
> > ATTR_LIST(idle_interval),
> > ATTR_LIST(discard_idle_interval),
> > ATTR_LIST(gc_idle_interval),
> > + ATTR_LIST(umount_discard_timeout),
> > ATTR_LIST(iostat_enable),
> > ATTR_LIST(readdir_ra),
> > ATTR_LIST(gc_pin_file_thresh),
> >

2019-01-25 01:26:37

by Chao Yu

[permalink] [raw]

Subject: Re: [f2fs-dev] [PATCH 1/2] f2fs: run discard jobs when put_super

On 2019/1/25 8:56, Jaegeuk Kim wrote:
> On 01/23, Chao Yu wrote:
>> On 2019/1/23 8:02, Jaegeuk Kim wrote:
>>> When we umount f2fs, we need to avoid long delay due to discard commands, which
>>> is actually taking tens of seconds, if storage is very slow on UNMAP. So, this
>>> patch introduces timeout-based work on it.
>>>
>>> By default, let me give 5 seconds for discard.
>>>
>>> Signed-off-by: Jaegeuk Kim <[email protected]>
>>> ---
>>> Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++
>>> fs/f2fs/f2fs.h | 5 ++++-
>>> fs/f2fs/segment.c | 11 ++++++++++-
>>> fs/f2fs/super.c | 17 ++++++++---------
>>> fs/f2fs/sysfs.c | 3 +++
>>> 5 files changed, 32 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> index a7ce33199457..91822ce25831 100644
>>> --- a/Documentation/ABI/testing/sysfs-fs-f2fs
>>> +++ b/Documentation/ABI/testing/sysfs-fs-f2fs
>>> @@ -86,6 +86,13 @@ Description:
>>> The unit size is one block, now only support configuring in range
>>> of [1, 512].
>>>
>>> +What: /sys/fs/f2fs/<disk>/umount_discard_timeout
>>> +Date: January 2019
>>> +Contact: "Jaegeuk Kim" <[email protected]>
>>> +Description:
>>> + Set timeout to issue discard commands during umount.
>>> + Default: 5 secs
>>> +
>>> What: /sys/fs/f2fs/<disk>/max_victim_search
>>> Date: January 2014
>>> Contact: "Jaegeuk Kim" <[email protected]>
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 0f564883e078..6b6ec5600089 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -191,6 +191,7 @@ enum {
>>> #define DEF_CP_INTERVAL 60 /* 60 secs */
>>> #define DEF_IDLE_INTERVAL 5 /* 5 secs */
>>> #define DEF_DISABLE_INTERVAL 5 /* 5 secs */
>>> +#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */
>>>
>>> struct cp_control {
>>> int reason;
>>> @@ -310,6 +311,7 @@ struct discard_policy {
>>> bool sync; /* submit discard with REQ_SYNC flag */
>>> bool ordered; /* issue discard by lba order */
>>> unsigned int granularity; /* discard granularity */
>>> + int timeout; /* discard timeout for put_super */
>>> };
>>>
>>> struct discard_cmd_control {
>>> @@ -1110,6 +1112,7 @@ enum {
>>> DISCARD_TIME,
>>> GC_TIME,
>>> DISABLE_TIME,
>>> + UMOUNT_DISCARD_TIMEOUT,
>>> MAX_TIME,
>>
>> After the patch, MAX_TIME will be 6, so if we set @timeout to 6 via sysfs,
>> the configuration will not be enabled due to below condition, right?
>
> Yup.
>
>>
>> + if (dpolicy->timeout != MAX_TIME)
>> + f2fs_update_time(sbi, dpolicy->timeout);
>>
>>> };
>>>
>>> @@ -3006,7 +3009,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
>>> bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
>>> void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
>>> void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
>>> -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
>>> +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
>>> void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
>>> struct cp_control *cpc);
>>> void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 9b79056d705d..97e0faf09ebf 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -1037,6 +1037,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
>>>
>>> dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
>>> dpolicy->io_aware_gran = MAX_PLIST_NUM;
>>> + dpolicy->timeout = MAX_TIME;
>>>
>>> if (discard_type == DPOLICY_BG) {
>>> dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
>>> @@ -1424,7 +1425,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>>> int i, issued = 0;
>>> bool io_interrupted = false;
>>>
>>> + if (dpolicy->timeout != MAX_TIME)
>>> + f2fs_update_time(sbi, dpolicy->timeout);
>>> +
>>> for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
>>> + if (dpolicy->timeout != MAX_TIME &&
>>> + f2fs_time_over(sbi, dpolicy->timeout))
>>> + break;
>>> +
>>> if (i + 1 < dpolicy->granularity)
>>> break;
>>>
>>> @@ -1611,7 +1619,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
>>> }
>>>
>>> /* This comes from f2fs_put_super */
>>> -bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
>>> +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
>>> {
>>> struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
>>> struct discard_policy dpolicy;
>>> @@ -1619,6 +1627,7 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
>>>
>>> __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
>>> dcc->discard_granularity);
>>> + dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
>>> __issue_discard_cmd(sbi, &dpolicy);
>>> dropped = __drop_discard_cmd(sbi);
>>>
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index ea514acede36..7998ff5418f2 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -1029,6 +1029,9 @@ static void f2fs_put_super(struct super_block *sb)
>>> int i;
>>> bool dropped;
>>>
>>> + /* be sure to wait for any on-going discard commands */
>>> + dropped = f2fs_issue_discard_timeout(sbi);
>>> +
>>> f2fs_quota_off_umount(sb);
>>>
>>> /* prevent remaining shrinker jobs */
>>> @@ -1044,17 +1047,12 @@ static void f2fs_put_super(struct super_block *sb)
>>> struct cp_control cpc = {
>>> .reason = CP_UMOUNT,
>>> };
>>> - f2fs_write_checkpoint(sbi, &cpc);
>>> - }
>>>
>>> - /* be sure to wait for any on-going discard commands */
>>> - dropped = f2fs_wait_discard_bios(sbi);
>>> + if ((f2fs_hw_support_discard(sbi) ||
>>> + f2fs_hw_should_discard(sbi)) &&
>>> + !sbi->discard_blks && !dropped)
>>> + cpc.reason |= CP_TRIMMED;
>>>
>>> - if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
>>> - !sbi->discard_blks && !dropped) {
>>> - struct cp_control cpc = {
>>> - .reason = CP_UMOUNT | CP_TRIMMED,
>>> - };
>>> f2fs_write_checkpoint(sbi, &cpc);
>>
>> Still, there will be problematic as I commented in last patch, could you
>> check that?

Ping,

If this checkpoint generate discard entry, tagged CP_TRIMMED flag should be
wrong, so the right order should be?

write_checkpoint(CP_UMOUNT)

dropped = f2fs_wait_discard_bios()

if (... && no_drop && no_remain_discard)
f2fs_write_checkpoint(sbi, CP_UMOUNT | CP_TRIMMED);

Am I missing something?

Thanks,

>>
>> Thanks,
>>
>>> }
>>>
>>> @@ -2706,6 +2704,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
>>> sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
>>> sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
>>> sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
>>> + sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] = DEF_UMOUNT_DISCARD_TIMEOUT;
>>> clear_sbi_flag(sbi, SBI_NEED_FSCK);
>>>
>>> for (i = 0; i < NR_COUNT_TYPE; i++)
>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>> index 5b83e1d66cb3..18c627e8fc90 100644
>>> --- a/fs/f2fs/sysfs.c
>>> +++ b/fs/f2fs/sysfs.c
>>> @@ -426,6 +426,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
>>> F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
>>> interval_time[DISCARD_TIME]);
>>> F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
>>> +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
>>> + umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
>>> F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
>>> F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
>>> F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
>>> @@ -483,6 +485,7 @@ static struct attribute *f2fs_attrs[] = {
>>> ATTR_LIST(idle_interval),
>>> ATTR_LIST(discard_idle_interval),
>>> ATTR_LIST(gc_idle_interval),
>>> + ATTR_LIST(umount_discard_timeout),
>>> ATTR_LIST(iostat_enable),
>>> ATTR_LIST(readdir_ra),
>>> ATTR_LIST(gc_pin_file_thresh),
>>>
>
> .
>

2019-01-25 01:38:47

by Chao Yu

[permalink] [raw]

Subject: Re: [f2fs-dev] [PATCH 2/2] f2fs: sync filesystem after roll-forward recovery

On 2019/1/25 8:52, Jaegeuk Kim wrote:
> On 01/24, Chao Yu wrote:
>> On 2019/1/23 8:02, Jaegeuk Kim wrote:
>>> Some works after roll-forward recovery can get an error which will release
>>> all the data structures. Let's flush them in order to make it clean.
>>>
>>> One possible corruption came from:
>>>
>>> [ 90.400500] list_del corruption. prev->next should be ffffffed1f566208, but was (null)
>>> [ 90.675349] Call trace:
>>> [ 90.677869] __list_del_entry_valid+0x94/0xb4
>>> [ 90.682351] remove_dirty_inode+0xac/0x114
>>> [ 90.686563] __f2fs_write_data_pages+0x6a8/0x6c8
>>> [ 90.691302] f2fs_write_data_pages+0x40/0x4c
>>> [ 90.695695] do_writepages+0x80/0xf0
>>> [ 90.699372] __writeback_single_inode+0xdc/0x4ac
>>> [ 90.704113] writeback_sb_inodes+0x280/0x440
>>> [ 90.708501] wb_writeback+0x1b8/0x3d0
>>> [ 90.712267] wb_workfn+0x1a8/0x4d4
>>> [ 90.715765] process_one_work+0x1c0/0x3d4
>>> [ 90.719883] worker_thread+0x224/0x344
>>> [ 90.723739] kthread+0x120/0x130
>>> [ 90.727055] ret_from_fork+0x10/0x18
>>>
>>> Reported-by: Sahitya Tummala <[email protected]>
>>> Signed-off-by: Jaegeuk Kim <[email protected]>
>>> ---
>>> fs/f2fs/checkpoint.c | 5 +++--
>>> fs/f2fs/node.c | 4 +++-
>>> fs/f2fs/super.c | 42 +++++++++++++++++++++++++++++++-----------
>>> 3 files changed, 37 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>> index f955cd3e0677..ccccf0ce2f06 100644
>>> --- a/fs/f2fs/checkpoint.c
>>> +++ b/fs/f2fs/checkpoint.c
>>> @@ -306,8 +306,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
>>> goto skip_write;
>>>
>>> /* collect a number of dirty meta pages and write together */
>>> - if (wbc->for_kupdate ||
>>> - get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
>>> + if (wbc->sync_mode != WB_SYNC_ALL &&
>>> + get_pages(sbi, F2FS_DIRTY_META) <
>>> + nr_pages_to_skip(sbi, META))
>>> goto skip_write;
>>>
>>> /* if locked failed, cp will flush dirty pages instead */
>>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>>> index 4f450e573312..f6ff84e29749 100644
>>> --- a/fs/f2fs/node.c
>>> +++ b/fs/f2fs/node.c
>>> @@ -1920,7 +1920,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
>>> f2fs_balance_fs_bg(sbi);
>>>
>>> /* collect a number of dirty node pages and write together */
>>> - if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
>>> + if (wbc->sync_mode != WB_SYNC_ALL &&
>>> + get_pages(sbi, F2FS_DIRTY_NODES) <
>>> + nr_pages_to_skip(sbi, NODE))
>>> goto skip_write;
>>>
>>> if (wbc->sync_mode == WB_SYNC_ALL)
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index 7998ff5418f2..2af0db2b738e 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -1456,9 +1456,16 @@ static int f2fs_enable_quotas(struct super_block *sb);
>>>
>>> static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
>>> {
>>> + unsigned int s_flags = sbi->sb->s_flags;
>>> struct cp_control cpc;
>>> - int err;
>>> + int err = 0;
>>> + int ret;
>>>
>>> + if (s_flags & SB_RDONLY) {
>>> + f2fs_msg(sbi->sb, KERN_ERR,
>>> + "checkpoint=disable on readonly fs");
>>> + return -EINVAL;
>>> + }
>>> sbi->sb->s_flags |= SB_ACTIVE;
>>>
>>> f2fs_update_time(sbi, DISABLE_TIME);
>>> @@ -1466,18 +1473,24 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
>>> while (!f2fs_time_over(sbi, DISABLE_TIME)) {
>>> mutex_lock(&sbi->gc_mutex);
>>> err = f2fs_gc(sbi, true, false, NULL_SEGNO);
>>> - if (err == -ENODATA)
>>> + if (err == -ENODATA) {
>>> + err = 0;
>>> break;
>>> + }
>>> if (err && err != -EAGAIN)
>>> - return err;
>>> + break;
>>
>> Should be?
>>
>> if (err) {
>> if (err == -EAGAIN)
>> err = 0;
>
> Have to report EAGAIN, but not for ENODATA.

Oh, right. :)

Reviewed-by: Chao Yu <[email protected]>

Thanks,

>
>> else
>> break;
>> }
>>
>> Thanks,
>>
>>> }
>>>
>>> - err = sync_filesystem(sbi->sb);
>>> - if (err)
>>> - return err;
>>> + ret = sync_filesystem(sbi->sb);
>>> + if (ret || err) {
>>> + err = ret ? ret: err;
>>> + goto restore_flag;
>>> + }
>>>
>>> - if (f2fs_disable_cp_again(sbi))
>>> - return -EAGAIN;
>>> + if (f2fs_disable_cp_again(sbi)) {
>>> + err = -EAGAIN;
>>> + goto restore_flag;
>>> + }
>>>
>>> mutex_lock(&sbi->gc_mutex);
>>> cpc.reason = CP_PAUSE;
>>> @@ -1486,7 +1499,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
>>>
>>> sbi->unusable_block_count = 0;
>>> mutex_unlock(&sbi->gc_mutex);
>>> - return 0;
>>> +restore_flag:
>>> + sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
>>> + return err;
>>> }
>>>
>>> static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
>>> @@ -3356,7 +3371,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>> if (test_opt(sbi, DISABLE_CHECKPOINT)) {
>>> err = f2fs_disable_checkpoint(sbi);
>>> if (err)
>>> - goto free_meta;
>>> + goto sync_free_meta;
>>> } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
>>> f2fs_enable_checkpoint(sbi);
>>> }
>>> @@ -3369,7 +3384,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>> /* After POR, we can run background GC thread.*/
>>> err = f2fs_start_gc_thread(sbi);
>>> if (err)
>>> - goto free_meta;
>>> + goto sync_free_meta;
>>> }
>>> kvfree(options);
>>>
>>> @@ -3391,6 +3406,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>> f2fs_update_time(sbi, REQ_TIME);
>>> return 0;
>>>
>>> +sync_free_meta:
>>> + /* safe to flush all the data */
>>> + sync_filesystem(sbi->sb);
>>> + retry = false;
>>> +
>>> free_meta:
>>> #ifdef CONFIG_QUOTA
>>> f2fs_truncate_quota_inode_pages(sb);
>>>
>
> .
>