2022-05-09 02:03:23

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 1/5] f2fs: stop allocating pinned sections if EAGAIN happens

EAGAIN doesn't guarantee to have a free section. Let's report it.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index e4cf8b7b23aa..b307d96a0a7c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1685,7 +1685,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
f2fs_down_write(&sbi->gc_lock);
err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
- if (err && err != -ENODATA && err != -EAGAIN)
+ if (err && err != -ENODATA)
goto out_err;
}

--
2.36.0.512.ge40c2bad7a-goog



2022-05-09 03:13:31

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 3/5] f2fs: keep wait_ms if EAGAIN happens

In f2fs_gc thread, let's keep wait_ms when sec_freed was zero.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/gc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index aeffcc1d5c02..ec3f6f876e76 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -37,7 +37,8 @@ static int gc_thread_func(void *data)
unsigned int wait_ms;
struct f2fs_gc_control gc_control = {
.victim_segno = NULL_SEGNO,
- .should_migrate_blocks = false };
+ .should_migrate_blocks = false,
+ .err_gc_skipped = false };

wait_ms = gc_th->min_sleep_time;

@@ -146,7 +147,6 @@ static int gc_thread_func(void *data)

gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
gc_control.no_bg_gc = foreground;
- gc_control.err_gc_skipped = sync_mode;

/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi, &gc_control))
--
2.36.0.512.ge40c2bad7a-goog


2022-05-09 04:54:22

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 4/5] f2fs: do not stop GC when requiring a free section

The f2fs_gc uses a bitmap to indicate pinned sections, but when disabling
chckpoint, we call f2fs_gc() with NULL_SEGNO which selects the same dirty
segment as a victim all the time, resulting in checkpoint=disable failure,
for example. Let's pick another one, if we fail to collect it.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/f2fs.h | 1 +
fs/f2fs/file.c | 12 ++++++++----
fs/f2fs/gc.c | 11 +++++++----
fs/f2fs/segment.c | 3 ++-
fs/f2fs/super.c | 3 ++-
include/trace/events/f2fs.h | 11 ++++++++---
6 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d49b9b476592..27871f6efb01 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1282,6 +1282,7 @@ struct f2fs_gc_control {
bool no_bg_gc; /* check the space and stop bg_gc */
bool should_migrate_blocks; /* should migrate blocks */
bool err_gc_skipped; /* return EAGAIN if GC skipped */
+ unsigned int nr_free_secs; /* # of free sections to do GC */
};

/* For s_flag in struct f2fs_sb_info */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0e7d101c3e65..7072c2b86b2f 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1650,7 +1650,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
.init_gc_type = FG_GC,
.should_migrate_blocks = false,
- .err_gc_skipped = true };
+ .err_gc_skipped = true,
+ .nr_free_secs = 0 };
pgoff_t pg_start, pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
@@ -2453,7 +2454,8 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
.no_bg_gc = false,
- .should_migrate_blocks = false };
+ .should_migrate_blocks = false,
+ .nr_free_secs = 0 };
__u32 sync;
int ret;

@@ -2494,7 +2496,8 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
.init_gc_type = range->sync ? FG_GC : BG_GC,
.no_bg_gc = false,
.should_migrate_blocks = false,
- .err_gc_skipped = range->sync };
+ .err_gc_skipped = range->sync,
+ .nr_free_secs = 0 };
u64 end;
int ret;

@@ -2940,7 +2943,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
struct f2fs_gc_control gc_control = {
.init_gc_type = FG_GC,
.should_migrate_blocks = true,
- .err_gc_skipped = true };
+ .err_gc_skipped = true,
+ .nr_free_secs = 0 };
int ret;

if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ec3f6f876e76..f63576ff1c2d 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -147,6 +147,7 @@ static int gc_thread_func(void *data)

gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
gc_control.no_bg_gc = foreground;
+ gc_control.nr_free_secs = foreground ? 1 : 0;

/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi, &gc_control))
@@ -1776,6 +1777,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
unsigned int skipped_round = 0, round = 0;

trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
+ gc_control->nr_free_secs,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
@@ -1848,11 +1850,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;

- if (gc_control->init_gc_type == FG_GC)
- goto stop;
-
- if (!has_not_enough_free_secs(sbi, sec_freed, 0))
+ if ((gc_control->init_gc_type == FG_GC ||
+ !has_not_enough_free_secs(sbi, sec_freed, 0))) {
+ if (sec_freed < gc_control->nr_free_secs)
+ goto gc_more;
goto stop;
+ }

if (skipped_round <= MAX_SKIP_GC_COUNT || skipped_round * 2 < round) {

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index bc63f0572c64..d6b9231ab0e1 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -528,7 +528,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
.init_gc_type = BG_GC,
.no_bg_gc = true,
.should_migrate_blocks = false,
- .err_gc_skipped = false };
+ .err_gc_skipped = false,
+ .nr_free_secs = 1 };
f2fs_down_write(&sbi->gc_lock);
f2fs_gc(sbi, &gc_control);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8b23fa6fc6b7..5d5b35067c3d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2084,7 +2084,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
.victim_segno = NULL_SEGNO,
.init_gc_type = FG_GC,
.should_migrate_blocks = false,
- .err_gc_skipped = true };
+ .err_gc_skipped = true,
+ .nr_free_secs = 1 };

f2fs_down_write(&sbi->gc_lock);
err = f2fs_gc(sbi, &gc_control);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 6699174977a3..349679a72301 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -653,18 +653,21 @@ TRACE_EVENT(f2fs_background_gc,
TRACE_EVENT(f2fs_gc_begin,

TP_PROTO(struct super_block *sb, int gc_type, bool no_bg_gc,
+ unsigned int nr_free_secs,
long long dirty_nodes, long long dirty_dents,
long long dirty_imeta, unsigned int free_sec,
unsigned int free_seg, int reserved_seg,
unsigned int prefree_seg),

- TP_ARGS(sb, gc_type, no_bg_gc, dirty_nodes, dirty_dents, dirty_imeta,
+ TP_ARGS(sb, gc_type, no_bg_gc, nr_free_secs, dirty_nodes,
+ dirty_dents, dirty_imeta,
free_sec, free_seg, reserved_seg, prefree_seg),

TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, gc_type)
__field(bool, no_bg_gc)
+ __field(unsigned int, nr_free_secs)
__field(long long, dirty_nodes)
__field(long long, dirty_dents)
__field(long long, dirty_imeta)
@@ -678,6 +681,7 @@ TRACE_EVENT(f2fs_gc_begin,
__entry->dev = sb->s_dev;
__entry->gc_type = gc_type;
__entry->no_bg_gc = no_bg_gc;
+ __entry->nr_free_secs = nr_free_secs;
__entry->dirty_nodes = dirty_nodes;
__entry->dirty_dents = dirty_dents;
__entry->dirty_imeta = dirty_imeta;
@@ -687,12 +691,13 @@ TRACE_EVENT(f2fs_gc_begin,
__entry->prefree_seg = prefree_seg;
),

- TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nodes = %lld, "
- "dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
+ TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nr_free_secs = %u, "
+ "nodes = %lld, dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
"rsv_seg:%d, prefree_seg:%u",
show_dev(__entry->dev),
show_gc_type(__entry->gc_type),
__entry->no_bg_gc,
+ __entry->nr_free_secs,
__entry->dirty_nodes,
__entry->dirty_dents,
__entry->dirty_imeta,
--
2.36.0.512.ge40c2bad7a-goog


2022-05-09 05:17:19

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 3/5] f2fs: keep wait_ms if EAGAIN happens

On 2022/5/7 7:20, Jaegeuk Kim wrote:
> In f2fs_gc thread, let's keep wait_ms when sec_freed was zero.

sec_freed won't increase for background GC due to below statement:

if (gc_type == FG_GC &&
get_valid_blocks(sbi, segno, false) == 0)
seg_freed++;

It may cause gc thread migrates lots of segments in each round?

Thanks,

>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/gc.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index aeffcc1d5c02..ec3f6f876e76 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -37,7 +37,8 @@ static int gc_thread_func(void *data)
> unsigned int wait_ms;
> struct f2fs_gc_control gc_control = {
> .victim_segno = NULL_SEGNO,
> - .should_migrate_blocks = false };
> + .should_migrate_blocks = false,
> + .err_gc_skipped = false };
>
> wait_ms = gc_th->min_sleep_time;
>
> @@ -146,7 +147,6 @@ static int gc_thread_func(void *data)
>
> gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
> gc_control.no_bg_gc = foreground;
> - gc_control.err_gc_skipped = sync_mode;
>
> /* if return value is not zero, no victim was selected */
> if (f2fs_gc(sbi, &gc_control))

2022-05-09 06:36:29

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 5/5] f2fs: don't need inode lock for system hidden quota

Let's avoid false-alarmed lockdep warning.

[ 58.914674] [T1501146] -> #2 (&sb->s_type->i_mutex_key#20){+.+.}-{3:3}:
[ 58.915975] [T1501146] system_server: down_write+0x7c/0xe0
[ 58.916738] [T1501146] system_server: f2fs_quota_sync+0x60/0x1a8
[ 58.917563] [T1501146] system_server: block_operations+0x16c/0x43c
[ 58.918410] [T1501146] system_server: f2fs_write_checkpoint+0x114/0x318
[ 58.919312] [T1501146] system_server: f2fs_issue_checkpoint+0x178/0x21c
[ 58.920214] [T1501146] system_server: f2fs_sync_fs+0x48/0x6c
[ 58.920999] [T1501146] system_server: f2fs_do_sync_file+0x334/0x738
[ 58.921862] [T1501146] system_server: f2fs_sync_file+0x30/0x48
[ 58.922667] [T1501146] system_server: __arm64_sys_fsync+0x84/0xf8
[ 58.923506] [T1501146] system_server: el0_svc_common.llvm.12821150825140585682+0xd8/0x20c
[ 58.924604] [T1501146] system_server: do_el0_svc+0x28/0xa0
[ 58.925366] [T1501146] system_server: el0_svc+0x24/0x38
[ 58.926094] [T1501146] system_server: el0_sync_handler+0x88/0xec
[ 58.926920] [T1501146] system_server: el0_sync+0x1b4/0x1c0

[ 58.927681] [T1501146] -> #1 (&sbi->cp_global_sem){+.+.}-{3:3}:
[ 58.928889] [T1501146] system_server: down_write+0x7c/0xe0
[ 58.929650] [T1501146] system_server: f2fs_write_checkpoint+0xbc/0x318
[ 58.930541] [T1501146] system_server: f2fs_issue_checkpoint+0x178/0x21c
[ 58.931443] [T1501146] system_server: f2fs_sync_fs+0x48/0x6c
[ 58.932226] [T1501146] system_server: sync_filesystem+0xac/0x130
[ 58.933053] [T1501146] system_server: generic_shutdown_super+0x38/0x150
[ 58.933958] [T1501146] system_server: kill_block_super+0x24/0x58
[ 58.934791] [T1501146] system_server: kill_f2fs_super+0xcc/0x124
[ 58.935618] [T1501146] system_server: deactivate_locked_super+0x90/0x120
[ 58.936529] [T1501146] system_server: deactivate_super+0x74/0xac
[ 58.937356] [T1501146] system_server: cleanup_mnt+0x128/0x168
[ 58.938150] [T1501146] system_server: __cleanup_mnt+0x18/0x28
[ 58.938944] [T1501146] system_server: task_work_run+0xb8/0x14c
[ 58.939749] [T1501146] system_server: do_notify_resume+0x114/0x1e8
[ 58.940595] [T1501146] system_server: work_pending+0xc/0x5f0

[ 58.941375] [T1501146] -> #0 (&sbi->gc_lock){+.+.}-{3:3}:
[ 58.942519] [T1501146] system_server: __lock_acquire+0x1270/0x2868
[ 58.943366] [T1501146] system_server: lock_acquire+0x114/0x294
[ 58.944169] [T1501146] system_server: down_write+0x7c/0xe0
[ 58.944930] [T1501146] system_server: f2fs_issue_checkpoint+0x13c/0x21c
[ 58.945831] [T1501146] system_server: f2fs_sync_fs+0x48/0x6c
[ 58.946614] [T1501146] system_server: f2fs_do_sync_file+0x334/0x738
[ 58.947472] [T1501146] system_server: f2fs_ioc_commit_atomic_write+0xc8/0x14c
[ 58.948439] [T1501146] system_server: __f2fs_ioctl+0x674/0x154c
[ 58.949253] [T1501146] system_server: f2fs_ioctl+0x54/0x88
[ 58.950018] [T1501146] system_server: __arm64_sys_ioctl+0xa8/0x110
[ 58.950865] [T1501146] system_server: el0_svc_common.llvm.12821150825140585682+0xd8/0x20c
[ 58.951965] [T1501146] system_server: do_el0_svc+0x28/0xa0
[ 58.952727] [T1501146] system_server: el0_svc+0x24/0x38
[ 58.953454] [T1501146] system_server: el0_sync_handler+0x88/0xec
[ 58.954279] [T1501146] system_server: el0_sync+0x1b4/0x1c0

Cc: [email protected]
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/super.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 5d5b35067c3d..09435280d856 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2700,7 +2700,8 @@ int f2fs_quota_sync(struct super_block *sb, int type)
if (!sb_has_quota_active(sb, cnt))
continue;

- inode_lock(dqopt->files[cnt]);
+ if (!f2fs_sb_has_quota_ino(sbi))
+ inode_lock(dqopt->files[cnt]);

/*
* do_quotactl
@@ -2719,7 +2720,8 @@ int f2fs_quota_sync(struct super_block *sb, int type)
f2fs_up_read(&sbi->quota_sem);
f2fs_unlock_op(sbi);

- inode_unlock(dqopt->files[cnt]);
+ if (!f2fs_sb_has_quota_ino(sbi))
+ inode_unlock(dqopt->files[cnt]);

if (ret)
break;
--
2.36.0.512.ge40c2bad7a-goog


2022-05-09 06:50:21

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 3/5] f2fs: keep wait_ms if EAGAIN happens

On 2022/5/8 22:41, Chao Yu wrote:
> On 2022/5/7 7:20, Jaegeuk Kim wrote:
>> In f2fs_gc thread, let's keep wait_ms when sec_freed was zero.
>
> sec_freed won't increase for background GC due to below statement:
>
> if (gc_type == FG_GC &&
> get_valid_blocks(sbi, segno, false) == 0)
> seg_freed++;
>
> It may cause gc thread migrates lots of segments in each round?

Please ignore this comment, I misunderstood it. :-P

>
> Thanks,
>
>>
>> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2022-05-09 08:20:34

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/5] f2fs: stop allocating pinned sections if EAGAIN happens

On 2022/5/7 7:20, Jaegeuk Kim wrote:
> EAGAIN doesn't guarantee to have a free section. Let's report it.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2022-05-09 08:31:35

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 4/5] f2fs: do not stop GC when requiring a free section

On 2022/5/7 7:20, Jaegeuk Kim wrote:
> The f2fs_gc uses a bitmap to indicate pinned sections, but when disabling
> chckpoint, we call f2fs_gc() with NULL_SEGNO which selects the same dirty
> segment as a victim all the time, resulting in checkpoint=disable failure,
> for example. Let's pick another one, if we fail to collect it.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/file.c | 12 ++++++++----
> fs/f2fs/gc.c | 11 +++++++----
> fs/f2fs/segment.c | 3 ++-
> fs/f2fs/super.c | 3 ++-
> include/trace/events/f2fs.h | 11 ++++++++---
> 6 files changed, 28 insertions(+), 13 deletions(-)
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index d49b9b476592..27871f6efb01 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1282,6 +1282,7 @@ struct f2fs_gc_control {
> bool no_bg_gc; /* check the space and stop bg_gc */
> bool should_migrate_blocks; /* should migrate blocks */
> bool err_gc_skipped; /* return EAGAIN if GC skipped */
> + unsigned int nr_free_secs; /* # of free sections to do GC */
> };
>
> /* For s_flag in struct f2fs_sb_info */
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 0e7d101c3e65..7072c2b86b2f 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1650,7 +1650,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
> struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
> .init_gc_type = FG_GC,
> .should_migrate_blocks = false,
> - .err_gc_skipped = true };
> + .err_gc_skipped = true,
> + .nr_free_secs = 0 };
> pgoff_t pg_start, pg_end;
> loff_t new_size = i_size_read(inode);
> loff_t off_end;
> @@ -2453,7 +2454,8 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
> .no_bg_gc = false,
> - .should_migrate_blocks = false };
> + .should_migrate_blocks = false,
> + .nr_free_secs = 0 };
> __u32 sync;
> int ret;
>
> @@ -2494,7 +2496,8 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
> .init_gc_type = range->sync ? FG_GC : BG_GC,
> .no_bg_gc = false,
> .should_migrate_blocks = false,
> - .err_gc_skipped = range->sync };
> + .err_gc_skipped = range->sync,
> + .nr_free_secs = 0 };
> u64 end;
> int ret;
>
> @@ -2940,7 +2943,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
> struct f2fs_gc_control gc_control = {
> .init_gc_type = FG_GC,
> .should_migrate_blocks = true,
> - .err_gc_skipped = true };
> + .err_gc_skipped = true,
> + .nr_free_secs = 0 };
> int ret;
>
> if (!capable(CAP_SYS_ADMIN))
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index ec3f6f876e76..f63576ff1c2d 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -147,6 +147,7 @@ static int gc_thread_func(void *data)
>
> gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
> gc_control.no_bg_gc = foreground;
> + gc_control.nr_free_secs = foreground ? 1 : 0;
>
> /* if return value is not zero, no victim was selected */
> if (f2fs_gc(sbi, &gc_control))
> @@ -1776,6 +1777,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> unsigned int skipped_round = 0, round = 0;
>
> trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
> + gc_control->nr_free_secs,
> get_pages(sbi, F2FS_DIRTY_NODES),
> get_pages(sbi, F2FS_DIRTY_DENTS),
> get_pages(sbi, F2FS_DIRTY_IMETA),
> @@ -1848,11 +1850,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> if (gc_type == FG_GC)
> sbi->cur_victim_sec = NULL_SEGNO;
>
> - if (gc_control->init_gc_type == FG_GC)
> - goto stop;
> -
> - if (!has_not_enough_free_secs(sbi, sec_freed, 0))
> + if ((gc_control->init_gc_type == FG_GC ||
> + !has_not_enough_free_secs(sbi, sec_freed, 0))) {
> + if (sec_freed < gc_control->nr_free_secs)

I missed to reset segno here, it may cause GC failure in next round due to
sec_usage_check() returns true.

segno = NULL_SEGNO;

Thanks,

> + goto gc_more;
> goto stop;
> + }
>
> if (skipped_round <= MAX_SKIP_GC_COUNT || skipped_round * 2 < round) {
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index bc63f0572c64..d6b9231ab0e1 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -528,7 +528,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
> .init_gc_type = BG_GC,
> .no_bg_gc = true,
> .should_migrate_blocks = false,
> - .err_gc_skipped = false };
> + .err_gc_skipped = false,
> + .nr_free_secs = 1 };
> f2fs_down_write(&sbi->gc_lock);
> f2fs_gc(sbi, &gc_control);
> }
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 8b23fa6fc6b7..5d5b35067c3d 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2084,7 +2084,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> .victim_segno = NULL_SEGNO,
> .init_gc_type = FG_GC,
> .should_migrate_blocks = false,
> - .err_gc_skipped = true };
> + .err_gc_skipped = true,
> + .nr_free_secs = 1 };
>
> f2fs_down_write(&sbi->gc_lock);
> err = f2fs_gc(sbi, &gc_control);
> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> index 6699174977a3..349679a72301 100644
> --- a/include/trace/events/f2fs.h
> +++ b/include/trace/events/f2fs.h
> @@ -653,18 +653,21 @@ TRACE_EVENT(f2fs_background_gc,
> TRACE_EVENT(f2fs_gc_begin,
>
> TP_PROTO(struct super_block *sb, int gc_type, bool no_bg_gc,
> + unsigned int nr_free_secs,
> long long dirty_nodes, long long dirty_dents,
> long long dirty_imeta, unsigned int free_sec,
> unsigned int free_seg, int reserved_seg,
> unsigned int prefree_seg),
>
> - TP_ARGS(sb, gc_type, no_bg_gc, dirty_nodes, dirty_dents, dirty_imeta,
> + TP_ARGS(sb, gc_type, no_bg_gc, nr_free_secs, dirty_nodes,
> + dirty_dents, dirty_imeta,
> free_sec, free_seg, reserved_seg, prefree_seg),
>
> TP_STRUCT__entry(
> __field(dev_t, dev)
> __field(int, gc_type)
> __field(bool, no_bg_gc)
> + __field(unsigned int, nr_free_secs)
> __field(long long, dirty_nodes)
> __field(long long, dirty_dents)
> __field(long long, dirty_imeta)
> @@ -678,6 +681,7 @@ TRACE_EVENT(f2fs_gc_begin,
> __entry->dev = sb->s_dev;
> __entry->gc_type = gc_type;
> __entry->no_bg_gc = no_bg_gc;
> + __entry->nr_free_secs = nr_free_secs;
> __entry->dirty_nodes = dirty_nodes;
> __entry->dirty_dents = dirty_dents;
> __entry->dirty_imeta = dirty_imeta;
> @@ -687,12 +691,13 @@ TRACE_EVENT(f2fs_gc_begin,
> __entry->prefree_seg = prefree_seg;
> ),
>
> - TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nodes = %lld, "
> - "dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
> + TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nr_free_secs = %u, "
> + "nodes = %lld, dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
> "rsv_seg:%d, prefree_seg:%u",
> show_dev(__entry->dev),
> show_gc_type(__entry->gc_type),
> __entry->no_bg_gc,
> + __entry->nr_free_secs,
> __entry->dirty_nodes,
> __entry->dirty_dents,
> __entry->dirty_imeta,

2022-05-09 09:37:57

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 2/5] f2fs: introduce f2fs_gc_control to consolidate f2fs_gc parameters

No functional change.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/f2fs.h | 11 +++++++++--
fs/f2fs/file.c | 30 +++++++++++++++++++++++++-----
fs/f2fs/gc.c | 29 ++++++++++++++++++-----------
fs/f2fs/segment.c | 8 +++++++-
fs/f2fs/super.c | 8 +++++++-
include/trace/events/f2fs.h | 18 +++++++++---------
6 files changed, 75 insertions(+), 29 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index efe5e80163a8..d49b9b476592 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1276,6 +1276,14 @@ struct atgc_management {
unsigned long long age_threshold; /* age threshold */
};

+struct f2fs_gc_control {
+ unsigned int victim_segno; /* target victim segment number */
+ int init_gc_type; /* FG_GC or BG_GC */
+ bool no_bg_gc; /* check the space and stop bg_gc */
+ bool should_migrate_blocks; /* should migrate blocks */
+ bool err_gc_skipped; /* return EAGAIN if GC skipped */
+};
+
/* For s_flag in struct f2fs_sb_info */
enum {
SBI_IS_DIRTY, /* dirty flag for checkpoint */
@@ -3786,8 +3794,7 @@ extern const struct iomap_ops f2fs_iomap_ops;
int f2fs_start_gc_thread(struct f2fs_sb_info *sbi);
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
-int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force,
- unsigned int segno);
+int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
int __init f2fs_create_garbage_collection_cache(void);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b307d96a0a7c..0e7d101c3e65 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1647,6 +1647,10 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
.m_may_create = true };
+ struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
+ .init_gc_type = FG_GC,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = true };
pgoff_t pg_start, pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
@@ -1684,7 +1688,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (has_not_enough_free_secs(sbi, 0,
GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
f2fs_down_write(&sbi->gc_lock);
- err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
+ err = f2fs_gc(sbi, &gc_control);
if (err && err != -ENODATA)
goto out_err;
}
@@ -2447,6 +2451,9 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
+ .no_bg_gc = false,
+ .should_migrate_blocks = false };
__u32 sync;
int ret;

@@ -2472,7 +2479,9 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
f2fs_down_write(&sbi->gc_lock);
}

- ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO);
+ gc_control.init_gc_type = sync ? FG_GC : BG_GC;
+ gc_control.err_gc_skipped = sync;
+ ret = f2fs_gc(sbi, &gc_control);
out:
mnt_drop_write_file(filp);
return ret;
@@ -2481,6 +2490,11 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
+ struct f2fs_gc_control gc_control = {
+ .init_gc_type = range->sync ? FG_GC : BG_GC,
+ .no_bg_gc = false,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = range->sync };
u64 end;
int ret;

@@ -2508,8 +2522,8 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
f2fs_down_write(&sbi->gc_lock);
}

- ret = f2fs_gc(sbi, range->sync, true, false,
- GET_SEGNO(sbi, range->start));
+ gc_control.victim_segno = GET_SEGNO(sbi, range->start);
+ ret = f2fs_gc(sbi, &gc_control);
if (ret) {
if (ret == -EBUSY)
ret = -EAGAIN;
@@ -2923,6 +2937,10 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
unsigned int start_segno = 0, end_segno = 0;
unsigned int dev_start_segno = 0, dev_end_segno = 0;
struct f2fs_flush_device range;
+ struct f2fs_gc_control gc_control = {
+ .init_gc_type = FG_GC,
+ .should_migrate_blocks = true,
+ .err_gc_skipped = true };
int ret;

if (!capable(CAP_SYS_ADMIN))
@@ -2966,7 +2984,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
sm->last_victim[GC_CB] = end_segno + 1;
sm->last_victim[GC_GREEDY] = end_segno + 1;
sm->last_victim[ALLOC_NEXT] = end_segno + 1;
- ret = f2fs_gc(sbi, true, true, true, start_segno);
+
+ gc_control.victim_segno = start_segno;
+ ret = f2fs_gc(sbi, &gc_control);
if (ret == -EAGAIN)
ret = 0;
else if (ret < 0)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 3009c0a97ab4..aeffcc1d5c02 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -35,6 +35,9 @@ static int gc_thread_func(void *data)
wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq;
unsigned int wait_ms;
+ struct f2fs_gc_control gc_control = {
+ .victim_segno = NULL_SEGNO,
+ .should_migrate_blocks = false };

wait_ms = gc_th->min_sleep_time;

@@ -141,8 +144,12 @@ static int gc_thread_func(void *data)
if (foreground)
sync_mode = false;

+ gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
+ gc_control.no_bg_gc = foreground;
+ gc_control.err_gc_skipped = sync_mode;
+
/* if return value is not zero, no victim was selected */
- if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO))
+ if (f2fs_gc(sbi, &gc_control))
wait_ms = gc_th->no_gc_sleep_time;

if (foreground)
@@ -1753,14 +1760,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
return seg_freed;
}

-int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
- bool background, bool force, unsigned int segno)
+int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
{
- int gc_type = sync ? FG_GC : BG_GC;
+ int gc_type = gc_control->init_gc_type;
+ unsigned int segno = gc_control->victim_segno;
int sec_freed = 0, seg_freed = 0, total_freed = 0;
int ret = 0;
struct cp_control cpc;
- unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
@@ -1769,7 +1775,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
unsigned long long first_skipped;
unsigned int skipped_round = 0, round = 0;

- trace_f2fs_gc_begin(sbi->sb, sync, background,
+ trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
@@ -1808,7 +1814,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
}

/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
- if (gc_type == BG_GC && !background) {
+ if (gc_type == BG_GC && gc_control->no_bg_gc) {
ret = -EINVAL;
goto stop;
}
@@ -1824,7 +1830,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
goto stop;
}

- seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, force);
+ seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type,
+ gc_control->should_migrate_blocks);
if (gc_type == FG_GC &&
seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
sec_freed++;
@@ -1841,7 +1848,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;

- if (sync)
+ if (gc_control->init_gc_type == FG_GC)
goto stop;

if (!has_not_enough_free_secs(sbi, sec_freed, 0))
@@ -1871,7 +1878,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
ret = f2fs_write_checkpoint(sbi, &cpc);
stop:
SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
- SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno;
+ SIT_I(sbi)->last_victim[FLUSH_DEVICE] = gc_control->victim_segno;

if (gc_type == FG_GC)
f2fs_unpin_all_sections(sbi, true);
@@ -1889,7 +1896,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,

put_gc_inode(&gc_list);

- if (sync && !ret)
+ if (gc_control->err_gc_skipped && !ret)
ret = sec_freed ? 0 : -EAGAIN;
return ret;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 87ff2b3cdf94..bc63f0572c64 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -523,8 +523,14 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
io_schedule();
finish_wait(&sbi->gc_thread->fggc_wq, &wait);
} else {
+ struct f2fs_gc_control gc_control = {
+ .victim_segno = NULL_SEGNO,
+ .init_gc_type = BG_GC,
+ .no_bg_gc = true,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = false };
f2fs_down_write(&sbi->gc_lock);
- f2fs_gc(sbi, false, false, false, NULL_SEGNO);
+ f2fs_gc(sbi, &gc_control);
}
}
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7edb018a60a6..8b23fa6fc6b7 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2080,8 +2080,14 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
sbi->gc_mode = GC_URGENT_HIGH;

while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+ struct f2fs_gc_control gc_control = {
+ .victim_segno = NULL_SEGNO,
+ .init_gc_type = FG_GC,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = true };
+
f2fs_down_write(&sbi->gc_lock);
- err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
+ err = f2fs_gc(sbi, &gc_control);
if (err == -ENODATA) {
err = 0;
break;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 4d1ad64d4cab..6699174977a3 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -652,19 +652,19 @@ TRACE_EVENT(f2fs_background_gc,

TRACE_EVENT(f2fs_gc_begin,

- TP_PROTO(struct super_block *sb, bool sync, bool background,
+ TP_PROTO(struct super_block *sb, int gc_type, bool no_bg_gc,
long long dirty_nodes, long long dirty_dents,
long long dirty_imeta, unsigned int free_sec,
unsigned int free_seg, int reserved_seg,
unsigned int prefree_seg),

- TP_ARGS(sb, sync, background, dirty_nodes, dirty_dents, dirty_imeta,
+ TP_ARGS(sb, gc_type, no_bg_gc, dirty_nodes, dirty_dents, dirty_imeta,
free_sec, free_seg, reserved_seg, prefree_seg),

TP_STRUCT__entry(
__field(dev_t, dev)
- __field(bool, sync)
- __field(bool, background)
+ __field(int, gc_type)
+ __field(bool, no_bg_gc)
__field(long long, dirty_nodes)
__field(long long, dirty_dents)
__field(long long, dirty_imeta)
@@ -676,8 +676,8 @@ TRACE_EVENT(f2fs_gc_begin,

TP_fast_assign(
__entry->dev = sb->s_dev;
- __entry->sync = sync;
- __entry->background = background;
+ __entry->gc_type = gc_type;
+ __entry->no_bg_gc = no_bg_gc;
__entry->dirty_nodes = dirty_nodes;
__entry->dirty_dents = dirty_dents;
__entry->dirty_imeta = dirty_imeta;
@@ -687,12 +687,12 @@ TRACE_EVENT(f2fs_gc_begin,
__entry->prefree_seg = prefree_seg;
),

- TP_printk("dev = (%d,%d), sync = %d, background = %d, nodes = %lld, "
+ TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nodes = %lld, "
"dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
"rsv_seg:%d, prefree_seg:%u",
show_dev(__entry->dev),
- __entry->sync,
- __entry->background,
+ show_gc_type(__entry->gc_type),
+ __entry->no_bg_gc,
__entry->dirty_nodes,
__entry->dirty_dents,
__entry->dirty_imeta,
--
2.36.0.512.ge40c2bad7a-goog


2022-05-09 16:55:55

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 2/5 v2] f2fs: introduce f2fs_gc_control to consolidate f2fs_gc parameters

This was used in Android for a long time. Let's upstream it.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
Change log from v1:
- fix tracepoint for the "don't care" entry

fs/f2fs/file.c | 58 ++++++++++++++++++++---
include/trace/events/f2fs.h | 94 +++++++++++++++++++++++++++++++++++++
2 files changed, 145 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 68ddf4c7ca64..51df34f95984 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4329,17 +4329,39 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
+ const loff_t pos = iocb->ki_pos;
ssize_t ret;

if (!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;

- if (f2fs_should_use_dio(inode, iocb, to))
- return f2fs_dio_read_iter(iocb, to);
+ if (trace_f2fs_dataread_start_enabled()) {
+ char *p = f2fs_kmalloc(F2FS_I_SB(inode), PATH_MAX, GFP_KERNEL);
+ char *path;
+
+ if (!p)
+ goto skip_read_trace;
+
+ path = dentry_path_raw(file_dentry(iocb->ki_filp), p, PATH_MAX);
+ if (IS_ERR(path)) {
+ kfree(p);
+ goto skip_read_trace;
+ }

- ret = filemap_read(iocb, to, 0);
- if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
+ trace_f2fs_dataread_start(inode, pos, iov_iter_count(to),
+ current->pid, path, current->comm);
+ kfree(p);
+ }
+skip_read_trace:
+ if (f2fs_should_use_dio(inode, iocb, to)) {
+ ret = f2fs_dio_read_iter(iocb, to);
+ } else {
+ ret = filemap_read(iocb, to, 0);
+ if (ret > 0)
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
+ }
+ if (trace_f2fs_dataread_end_enabled())
+ trace_f2fs_dataread_end(inode, pos, ret);
return ret;
}

@@ -4637,14 +4659,36 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* Possibly preallocate the blocks for the write. */
target_size = iocb->ki_pos + iov_iter_count(from);
preallocated = f2fs_preallocate_blocks(iocb, from, dio);
- if (preallocated < 0)
+ if (preallocated < 0) {
ret = preallocated;
- else
+ } else {
+ if (trace_f2fs_datawrite_start_enabled()) {
+ char *p = f2fs_kmalloc(F2FS_I_SB(inode),
+ PATH_MAX, GFP_KERNEL);
+ char *path;
+
+ if (!p)
+ goto skip_write_trace;
+ path = dentry_path_raw(file_dentry(iocb->ki_filp),
+ p, PATH_MAX);
+ if (IS_ERR(path)) {
+ kfree(p);
+ goto skip_write_trace;
+ }
+ trace_f2fs_datawrite_start(inode, orig_pos, orig_count,
+ current->pid, path, current->comm);
+ kfree(p);
+ }
+skip_write_trace:
/* Do the actual write. */
ret = dio ?
f2fs_dio_write_iter(iocb, from, &may_need_sync):
f2fs_buffered_write_iter(iocb, from);

+ if (trace_f2fs_datawrite_end_enabled())
+ trace_f2fs_datawrite_end(inode, orig_pos, ret);
+ }
+
/* Don't leave any preallocated blocks around past i_size. */
if (preallocated && i_size_read(inode) < target_size) {
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index f701bb23f83c..11f6b7147be2 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -2068,6 +2068,100 @@ TRACE_EVENT(f2fs_fiemap,
__entry->ret)
);

+DECLARE_EVENT_CLASS(f2fs__rw_start,
+
+ TP_PROTO(struct inode *inode, loff_t offset, int bytes,
+ pid_t pid, char *pathname, char *command),
+
+ TP_ARGS(inode, offset, bytes, pid, pathname, command),
+
+ TP_STRUCT__entry(
+ __string(pathbuf, pathname)
+ __field(loff_t, offset)
+ __field(int, bytes)
+ __field(loff_t, i_size)
+ __string(cmdline, command)
+ __field(pid_t, pid)
+ __field(ino_t, ino)
+ ),
+
+ TP_fast_assign(
+ /*
+ * Replace the spaces in filenames and cmdlines
+ * because this screws up the tooling that parses
+ * the traces.
+ */
+ __assign_str(pathbuf, pathname);
+ (void)strreplace(__get_str(pathbuf), ' ', '_');
+ __entry->offset = offset;
+ __entry->bytes = bytes;
+ __entry->i_size = i_size_read(inode);
+ __assign_str(cmdline, command);
+ (void)strreplace(__get_str(cmdline), ' ', '_');
+ __entry->pid = pid;
+ __entry->ino = inode->i_ino;
+ ),
+
+ TP_printk("entry_name %s, offset %llu, bytes %d, cmdline %s,"
+ " pid %d, i_size %llu, ino %lu",
+ __get_str(pathbuf), __entry->offset, __entry->bytes,
+ __get_str(cmdline), __entry->pid, __entry->i_size,
+ (unsigned long) __entry->ino)
+);
+
+DECLARE_EVENT_CLASS(f2fs__rw_end,
+
+ TP_PROTO(struct inode *inode, loff_t offset, int bytes),
+
+ TP_ARGS(inode, offset, bytes),
+
+ TP_STRUCT__entry(
+ __field(ino_t, ino)
+ __field(loff_t, offset)
+ __field(int, bytes)
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->offset = offset;
+ __entry->bytes = bytes;
+ ),
+
+ TP_printk("ino %lu, offset %llu, bytes %d",
+ (unsigned long) __entry->ino,
+ __entry->offset, __entry->bytes)
+);
+
+DEFINE_EVENT(f2fs__rw_start, f2fs_dataread_start,
+
+ TP_PROTO(struct inode *inode, loff_t offset, int bytes,
+ pid_t pid, char *pathname, char *command),
+
+ TP_ARGS(inode, offset, bytes, pid, pathname, command)
+);
+
+DEFINE_EVENT(f2fs__rw_end, f2fs_dataread_end,
+
+ TP_PROTO(struct inode *inode, loff_t offset, int bytes),
+
+ TP_ARGS(inode, offset, bytes)
+);
+
+DEFINE_EVENT(f2fs__rw_start, f2fs_datawrite_start,
+
+ TP_PROTO(struct inode *inode, loff_t offset, int bytes,
+ pid_t pid, char *pathname, char *command),
+
+ TP_ARGS(inode, offset, bytes, pid, pathname, command)
+);
+
+DEFINE_EVENT(f2fs__rw_end, f2fs_datawrite_end,
+
+ TP_PROTO(struct inode *inode, loff_t offset, int bytes),
+
+ TP_ARGS(inode, offset, bytes)
+);
+
#endif /* _TRACE_F2FS_H */

/* This part must be outside protection */
--
2.35.1.1021.g381101b075-goog


2022-05-09 17:07:15

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 4/5] f2fs: do not stop GC when requiring a free section

On 05/07, Chao Yu wrote:
> On 2022/5/7 7:20, Jaegeuk Kim wrote:
> > The f2fs_gc uses a bitmap to indicate pinned sections, but when disabling
> > chckpoint, we call f2fs_gc() with NULL_SEGNO which selects the same dirty
> > segment as a victim all the time, resulting in checkpoint=disable failure,
> > for example. Let's pick another one, if we fail to collect it.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/f2fs.h | 1 +
> > fs/f2fs/file.c | 12 ++++++++----
> > fs/f2fs/gc.c | 11 +++++++----
> > fs/f2fs/segment.c | 3 ++-
> > fs/f2fs/super.c | 3 ++-
> > include/trace/events/f2fs.h | 11 ++++++++---
> > 6 files changed, 28 insertions(+), 13 deletions(-)
> >
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index d49b9b476592..27871f6efb01 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1282,6 +1282,7 @@ struct f2fs_gc_control {
> > bool no_bg_gc; /* check the space and stop bg_gc */
> > bool should_migrate_blocks; /* should migrate blocks */
> > bool err_gc_skipped; /* return EAGAIN if GC skipped */
> > + unsigned int nr_free_secs; /* # of free sections to do GC */
> > };
> > /* For s_flag in struct f2fs_sb_info */
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 0e7d101c3e65..7072c2b86b2f 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -1650,7 +1650,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
> > struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
> > .init_gc_type = FG_GC,
> > .should_migrate_blocks = false,
> > - .err_gc_skipped = true };
> > + .err_gc_skipped = true,
> > + .nr_free_secs = 0 };
> > pgoff_t pg_start, pg_end;
> > loff_t new_size = i_size_read(inode);
> > loff_t off_end;
> > @@ -2453,7 +2454,8 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
> > struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
> > .no_bg_gc = false,
> > - .should_migrate_blocks = false };
> > + .should_migrate_blocks = false,
> > + .nr_free_secs = 0 };
> > __u32 sync;
> > int ret;
> > @@ -2494,7 +2496,8 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
> > .init_gc_type = range->sync ? FG_GC : BG_GC,
> > .no_bg_gc = false,
> > .should_migrate_blocks = false,
> > - .err_gc_skipped = range->sync };
> > + .err_gc_skipped = range->sync,
> > + .nr_free_secs = 0 };
> > u64 end;
> > int ret;
> > @@ -2940,7 +2943,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
> > struct f2fs_gc_control gc_control = {
> > .init_gc_type = FG_GC,
> > .should_migrate_blocks = true,
> > - .err_gc_skipped = true };
> > + .err_gc_skipped = true,
> > + .nr_free_secs = 0 };
> > int ret;
> > if (!capable(CAP_SYS_ADMIN))
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index ec3f6f876e76..f63576ff1c2d 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -147,6 +147,7 @@ static int gc_thread_func(void *data)
> > gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
> > gc_control.no_bg_gc = foreground;
> > + gc_control.nr_free_secs = foreground ? 1 : 0;
> > /* if return value is not zero, no victim was selected */
> > if (f2fs_gc(sbi, &gc_control))
> > @@ -1776,6 +1777,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> > unsigned int skipped_round = 0, round = 0;
> > trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
> > + gc_control->nr_free_secs,
> > get_pages(sbi, F2FS_DIRTY_NODES),
> > get_pages(sbi, F2FS_DIRTY_DENTS),
> > get_pages(sbi, F2FS_DIRTY_IMETA),
> > @@ -1848,11 +1850,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> > if (gc_type == FG_GC)
> > sbi->cur_victim_sec = NULL_SEGNO;
> > - if (gc_control->init_gc_type == FG_GC)
> > - goto stop;
> > -
> > - if (!has_not_enough_free_secs(sbi, sec_freed, 0))
> > + if ((gc_control->init_gc_type == FG_GC ||
> > + !has_not_enough_free_secs(sbi, sec_freed, 0))) {
> > + if (sec_freed < gc_control->nr_free_secs)
>
> I missed to reset segno here, it may cause GC failure in next round due to
> sec_usage_check() returns true.
>
> segno = NULL_SEGNO;

Added. Thanks.

>
> Thanks,
>
> > + goto gc_more;
> > goto stop;
> > + }
> > if (skipped_round <= MAX_SKIP_GC_COUNT || skipped_round * 2 < round) {
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index bc63f0572c64..d6b9231ab0e1 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -528,7 +528,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
> > .init_gc_type = BG_GC,
> > .no_bg_gc = true,
> > .should_migrate_blocks = false,
> > - .err_gc_skipped = false };
> > + .err_gc_skipped = false,
> > + .nr_free_secs = 1 };
> > f2fs_down_write(&sbi->gc_lock);
> > f2fs_gc(sbi, &gc_control);
> > }
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index 8b23fa6fc6b7..5d5b35067c3d 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -2084,7 +2084,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> > .victim_segno = NULL_SEGNO,
> > .init_gc_type = FG_GC,
> > .should_migrate_blocks = false,
> > - .err_gc_skipped = true };
> > + .err_gc_skipped = true,
> > + .nr_free_secs = 1 };
> > f2fs_down_write(&sbi->gc_lock);
> > err = f2fs_gc(sbi, &gc_control);
> > diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> > index 6699174977a3..349679a72301 100644
> > --- a/include/trace/events/f2fs.h
> > +++ b/include/trace/events/f2fs.h
> > @@ -653,18 +653,21 @@ TRACE_EVENT(f2fs_background_gc,
> > TRACE_EVENT(f2fs_gc_begin,
> > TP_PROTO(struct super_block *sb, int gc_type, bool no_bg_gc,
> > + unsigned int nr_free_secs,
> > long long dirty_nodes, long long dirty_dents,
> > long long dirty_imeta, unsigned int free_sec,
> > unsigned int free_seg, int reserved_seg,
> > unsigned int prefree_seg),
> > - TP_ARGS(sb, gc_type, no_bg_gc, dirty_nodes, dirty_dents, dirty_imeta,
> > + TP_ARGS(sb, gc_type, no_bg_gc, nr_free_secs, dirty_nodes,
> > + dirty_dents, dirty_imeta,
> > free_sec, free_seg, reserved_seg, prefree_seg),
> > TP_STRUCT__entry(
> > __field(dev_t, dev)
> > __field(int, gc_type)
> > __field(bool, no_bg_gc)
> > + __field(unsigned int, nr_free_secs)
> > __field(long long, dirty_nodes)
> > __field(long long, dirty_dents)
> > __field(long long, dirty_imeta)
> > @@ -678,6 +681,7 @@ TRACE_EVENT(f2fs_gc_begin,
> > __entry->dev = sb->s_dev;
> > __entry->gc_type = gc_type;
> > __entry->no_bg_gc = no_bg_gc;
> > + __entry->nr_free_secs = nr_free_secs;
> > __entry->dirty_nodes = dirty_nodes;
> > __entry->dirty_dents = dirty_dents;
> > __entry->dirty_imeta = dirty_imeta;
> > @@ -687,12 +691,13 @@ TRACE_EVENT(f2fs_gc_begin,
> > __entry->prefree_seg = prefree_seg;
> > ),
> > - TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nodes = %lld, "
> > - "dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
> > + TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nr_free_secs = %u, "
> > + "nodes = %lld, dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
> > "rsv_seg:%d, prefree_seg:%u",
> > show_dev(__entry->dev),
> > show_gc_type(__entry->gc_type),
> > __entry->no_bg_gc,
> > + __entry->nr_free_secs,
> > __entry->dirty_nodes,
> > __entry->dirty_dents,
> > __entry->dirty_imeta,

2022-05-11 04:30:26

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/5 v2] f2fs: introduce f2fs_gc_control to consolidate f2fs_gc parameters

Jaegeuk,

Seems it includes a wrong android tracepoint patch?

Thanks,

On 2022/5/10 0:47, Jaegeuk Kim wrote:
> This was used in Android for a long time. Let's upstream it.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> Change log from v1:
> - fix tracepoint for the "don't care" entry
>
> fs/f2fs/file.c | 58 ++++++++++++++++++++---
> include/trace/events/f2fs.h | 94 +++++++++++++++++++++++++++++++++++++
> 2 files changed, 145 insertions(+), 7 deletions(-)
>
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 68ddf4c7ca64..51df34f95984 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -4329,17 +4329,39 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
> static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
> {
> struct inode *inode = file_inode(iocb->ki_filp);
> + const loff_t pos = iocb->ki_pos;
> ssize_t ret;
>
> if (!f2fs_is_compress_backend_ready(inode))
> return -EOPNOTSUPP;
>
> - if (f2fs_should_use_dio(inode, iocb, to))
> - return f2fs_dio_read_iter(iocb, to);
> + if (trace_f2fs_dataread_start_enabled()) {
> + char *p = f2fs_kmalloc(F2FS_I_SB(inode), PATH_MAX, GFP_KERNEL);
> + char *path;
> +
> + if (!p)
> + goto skip_read_trace;
> +
> + path = dentry_path_raw(file_dentry(iocb->ki_filp), p, PATH_MAX);
> + if (IS_ERR(path)) {
> + kfree(p);
> + goto skip_read_trace;
> + }
>
> - ret = filemap_read(iocb, to, 0);
> - if (ret > 0)
> - f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
> + trace_f2fs_dataread_start(inode, pos, iov_iter_count(to),
> + current->pid, path, current->comm);
> + kfree(p);
> + }
> +skip_read_trace:
> + if (f2fs_should_use_dio(inode, iocb, to)) {
> + ret = f2fs_dio_read_iter(iocb, to);
> + } else {
> + ret = filemap_read(iocb, to, 0);
> + if (ret > 0)
> + f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
> + }
> + if (trace_f2fs_dataread_end_enabled())
> + trace_f2fs_dataread_end(inode, pos, ret);
> return ret;
> }
>
> @@ -4637,14 +4659,36 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> /* Possibly preallocate the blocks for the write. */
> target_size = iocb->ki_pos + iov_iter_count(from);
> preallocated = f2fs_preallocate_blocks(iocb, from, dio);
> - if (preallocated < 0)
> + if (preallocated < 0) {
> ret = preallocated;
> - else
> + } else {
> + if (trace_f2fs_datawrite_start_enabled()) {
> + char *p = f2fs_kmalloc(F2FS_I_SB(inode),
> + PATH_MAX, GFP_KERNEL);
> + char *path;
> +
> + if (!p)
> + goto skip_write_trace;
> + path = dentry_path_raw(file_dentry(iocb->ki_filp),
> + p, PATH_MAX);
> + if (IS_ERR(path)) {
> + kfree(p);
> + goto skip_write_trace;
> + }
> + trace_f2fs_datawrite_start(inode, orig_pos, orig_count,
> + current->pid, path, current->comm);
> + kfree(p);
> + }
> +skip_write_trace:
> /* Do the actual write. */
> ret = dio ?
> f2fs_dio_write_iter(iocb, from, &may_need_sync):
> f2fs_buffered_write_iter(iocb, from);
>
> + if (trace_f2fs_datawrite_end_enabled())
> + trace_f2fs_datawrite_end(inode, orig_pos, ret);
> + }
> +
> /* Don't leave any preallocated blocks around past i_size. */
> if (preallocated && i_size_read(inode) < target_size) {
> f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> index f701bb23f83c..11f6b7147be2 100644
> --- a/include/trace/events/f2fs.h
> +++ b/include/trace/events/f2fs.h
> @@ -2068,6 +2068,100 @@ TRACE_EVENT(f2fs_fiemap,
> __entry->ret)
> );
>
> +DECLARE_EVENT_CLASS(f2fs__rw_start,
> +
> + TP_PROTO(struct inode *inode, loff_t offset, int bytes,
> + pid_t pid, char *pathname, char *command),
> +
> + TP_ARGS(inode, offset, bytes, pid, pathname, command),
> +
> + TP_STRUCT__entry(
> + __string(pathbuf, pathname)
> + __field(loff_t, offset)
> + __field(int, bytes)
> + __field(loff_t, i_size)
> + __string(cmdline, command)
> + __field(pid_t, pid)
> + __field(ino_t, ino)
> + ),
> +
> + TP_fast_assign(
> + /*
> + * Replace the spaces in filenames and cmdlines
> + * because this screws up the tooling that parses
> + * the traces.
> + */
> + __assign_str(pathbuf, pathname);
> + (void)strreplace(__get_str(pathbuf), ' ', '_');
> + __entry->offset = offset;
> + __entry->bytes = bytes;
> + __entry->i_size = i_size_read(inode);
> + __assign_str(cmdline, command);
> + (void)strreplace(__get_str(cmdline), ' ', '_');
> + __entry->pid = pid;
> + __entry->ino = inode->i_ino;
> + ),
> +
> + TP_printk("entry_name %s, offset %llu, bytes %d, cmdline %s,"
> + " pid %d, i_size %llu, ino %lu",
> + __get_str(pathbuf), __entry->offset, __entry->bytes,
> + __get_str(cmdline), __entry->pid, __entry->i_size,
> + (unsigned long) __entry->ino)
> +);
> +
> +DECLARE_EVENT_CLASS(f2fs__rw_end,
> +
> + TP_PROTO(struct inode *inode, loff_t offset, int bytes),
> +
> + TP_ARGS(inode, offset, bytes),
> +
> + TP_STRUCT__entry(
> + __field(ino_t, ino)
> + __field(loff_t, offset)
> + __field(int, bytes)
> + ),
> +
> + TP_fast_assign(
> + __entry->ino = inode->i_ino;
> + __entry->offset = offset;
> + __entry->bytes = bytes;
> + ),
> +
> + TP_printk("ino %lu, offset %llu, bytes %d",
> + (unsigned long) __entry->ino,
> + __entry->offset, __entry->bytes)
> +);
> +
> +DEFINE_EVENT(f2fs__rw_start, f2fs_dataread_start,
> +
> + TP_PROTO(struct inode *inode, loff_t offset, int bytes,
> + pid_t pid, char *pathname, char *command),
> +
> + TP_ARGS(inode, offset, bytes, pid, pathname, command)
> +);
> +
> +DEFINE_EVENT(f2fs__rw_end, f2fs_dataread_end,
> +
> + TP_PROTO(struct inode *inode, loff_t offset, int bytes),
> +
> + TP_ARGS(inode, offset, bytes)
> +);
> +
> +DEFINE_EVENT(f2fs__rw_start, f2fs_datawrite_start,
> +
> + TP_PROTO(struct inode *inode, loff_t offset, int bytes,
> + pid_t pid, char *pathname, char *command),
> +
> + TP_ARGS(inode, offset, bytes, pid, pathname, command)
> +);
> +
> +DEFINE_EVENT(f2fs__rw_end, f2fs_datawrite_end,
> +
> + TP_PROTO(struct inode *inode, loff_t offset, int bytes),
> +
> + TP_ARGS(inode, offset, bytes)
> +);
> +
> #endif /* _TRACE_F2FS_H */
>
> /* This part must be outside protection */

2022-05-11 11:34:01

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/5 v2] f2fs: introduce f2fs_gc_control to consolidate f2fs_gc parameters

On 05/11, Chao Yu wrote:
> Jaegeuk,
>
> Seems it includes a wrong android tracepoint patch?

Oops. :)

>
> Thanks,
>
> On 2022/5/10 0:47, Jaegeuk Kim wrote:
> > This was used in Android for a long time. Let's upstream it.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > Change log from v1:
> > - fix tracepoint for the "don't care" entry
> >
> > fs/f2fs/file.c | 58 ++++++++++++++++++++---
> > include/trace/events/f2fs.h | 94 +++++++++++++++++++++++++++++++++++++
> > 2 files changed, 145 insertions(+), 7 deletions(-)
> >
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 68ddf4c7ca64..51df34f95984 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -4329,17 +4329,39 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
> > static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
> > {
> > struct inode *inode = file_inode(iocb->ki_filp);
> > + const loff_t pos = iocb->ki_pos;
> > ssize_t ret;
> > if (!f2fs_is_compress_backend_ready(inode))
> > return -EOPNOTSUPP;
> > - if (f2fs_should_use_dio(inode, iocb, to))
> > - return f2fs_dio_read_iter(iocb, to);
> > + if (trace_f2fs_dataread_start_enabled()) {
> > + char *p = f2fs_kmalloc(F2FS_I_SB(inode), PATH_MAX, GFP_KERNEL);
> > + char *path;
> > +
> > + if (!p)
> > + goto skip_read_trace;
> > +
> > + path = dentry_path_raw(file_dentry(iocb->ki_filp), p, PATH_MAX);
> > + if (IS_ERR(path)) {
> > + kfree(p);
> > + goto skip_read_trace;
> > + }
> > - ret = filemap_read(iocb, to, 0);
> > - if (ret > 0)
> > - f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
> > + trace_f2fs_dataread_start(inode, pos, iov_iter_count(to),
> > + current->pid, path, current->comm);
> > + kfree(p);
> > + }
> > +skip_read_trace:
> > + if (f2fs_should_use_dio(inode, iocb, to)) {
> > + ret = f2fs_dio_read_iter(iocb, to);
> > + } else {
> > + ret = filemap_read(iocb, to, 0);
> > + if (ret > 0)
> > + f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
> > + }
> > + if (trace_f2fs_dataread_end_enabled())
> > + trace_f2fs_dataread_end(inode, pos, ret);
> > return ret;
> > }
> > @@ -4637,14 +4659,36 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > /* Possibly preallocate the blocks for the write. */
> > target_size = iocb->ki_pos + iov_iter_count(from);
> > preallocated = f2fs_preallocate_blocks(iocb, from, dio);
> > - if (preallocated < 0)
> > + if (preallocated < 0) {
> > ret = preallocated;
> > - else
> > + } else {
> > + if (trace_f2fs_datawrite_start_enabled()) {
> > + char *p = f2fs_kmalloc(F2FS_I_SB(inode),
> > + PATH_MAX, GFP_KERNEL);
> > + char *path;
> > +
> > + if (!p)
> > + goto skip_write_trace;
> > + path = dentry_path_raw(file_dentry(iocb->ki_filp),
> > + p, PATH_MAX);
> > + if (IS_ERR(path)) {
> > + kfree(p);
> > + goto skip_write_trace;
> > + }
> > + trace_f2fs_datawrite_start(inode, orig_pos, orig_count,
> > + current->pid, path, current->comm);
> > + kfree(p);
> > + }
> > +skip_write_trace:
> > /* Do the actual write. */
> > ret = dio ?
> > f2fs_dio_write_iter(iocb, from, &may_need_sync):
> > f2fs_buffered_write_iter(iocb, from);
> > + if (trace_f2fs_datawrite_end_enabled())
> > + trace_f2fs_datawrite_end(inode, orig_pos, ret);
> > + }
> > +
> > /* Don't leave any preallocated blocks around past i_size. */
> > if (preallocated && i_size_read(inode) < target_size) {
> > f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> > index f701bb23f83c..11f6b7147be2 100644
> > --- a/include/trace/events/f2fs.h
> > +++ b/include/trace/events/f2fs.h
> > @@ -2068,6 +2068,100 @@ TRACE_EVENT(f2fs_fiemap,
> > __entry->ret)
> > );
> > +DECLARE_EVENT_CLASS(f2fs__rw_start,
> > +
> > + TP_PROTO(struct inode *inode, loff_t offset, int bytes,
> > + pid_t pid, char *pathname, char *command),
> > +
> > + TP_ARGS(inode, offset, bytes, pid, pathname, command),
> > +
> > + TP_STRUCT__entry(
> > + __string(pathbuf, pathname)
> > + __field(loff_t, offset)
> > + __field(int, bytes)
> > + __field(loff_t, i_size)
> > + __string(cmdline, command)
> > + __field(pid_t, pid)
> > + __field(ino_t, ino)
> > + ),
> > +
> > + TP_fast_assign(
> > + /*
> > + * Replace the spaces in filenames and cmdlines
> > + * because this screws up the tooling that parses
> > + * the traces.
> > + */
> > + __assign_str(pathbuf, pathname);
> > + (void)strreplace(__get_str(pathbuf), ' ', '_');
> > + __entry->offset = offset;
> > + __entry->bytes = bytes;
> > + __entry->i_size = i_size_read(inode);
> > + __assign_str(cmdline, command);
> > + (void)strreplace(__get_str(cmdline), ' ', '_');
> > + __entry->pid = pid;
> > + __entry->ino = inode->i_ino;
> > + ),
> > +
> > + TP_printk("entry_name %s, offset %llu, bytes %d, cmdline %s,"
> > + " pid %d, i_size %llu, ino %lu",
> > + __get_str(pathbuf), __entry->offset, __entry->bytes,
> > + __get_str(cmdline), __entry->pid, __entry->i_size,
> > + (unsigned long) __entry->ino)
> > +);
> > +
> > +DECLARE_EVENT_CLASS(f2fs__rw_end,
> > +
> > + TP_PROTO(struct inode *inode, loff_t offset, int bytes),
> > +
> > + TP_ARGS(inode, offset, bytes),
> > +
> > + TP_STRUCT__entry(
> > + __field(ino_t, ino)
> > + __field(loff_t, offset)
> > + __field(int, bytes)
> > + ),
> > +
> > + TP_fast_assign(
> > + __entry->ino = inode->i_ino;
> > + __entry->offset = offset;
> > + __entry->bytes = bytes;
> > + ),
> > +
> > + TP_printk("ino %lu, offset %llu, bytes %d",
> > + (unsigned long) __entry->ino,
> > + __entry->offset, __entry->bytes)
> > +);
> > +
> > +DEFINE_EVENT(f2fs__rw_start, f2fs_dataread_start,
> > +
> > + TP_PROTO(struct inode *inode, loff_t offset, int bytes,
> > + pid_t pid, char *pathname, char *command),
> > +
> > + TP_ARGS(inode, offset, bytes, pid, pathname, command)
> > +);
> > +
> > +DEFINE_EVENT(f2fs__rw_end, f2fs_dataread_end,
> > +
> > + TP_PROTO(struct inode *inode, loff_t offset, int bytes),
> > +
> > + TP_ARGS(inode, offset, bytes)
> > +);
> > +
> > +DEFINE_EVENT(f2fs__rw_start, f2fs_datawrite_start,
> > +
> > + TP_PROTO(struct inode *inode, loff_t offset, int bytes,
> > + pid_t pid, char *pathname, char *command),
> > +
> > + TP_ARGS(inode, offset, bytes, pid, pathname, command)
> > +);
> > +
> > +DEFINE_EVENT(f2fs__rw_end, f2fs_datawrite_end,
> > +
> > + TP_PROTO(struct inode *inode, loff_t offset, int bytes),
> > +
> > + TP_ARGS(inode, offset, bytes)
> > +);
> > +
> > #endif /* _TRACE_F2FS_H */
> > /* This part must be outside protection */

2022-05-11 13:19:56

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/5 v2] f2fs: introduce f2fs_gc_control to consolidate f2fs_gc parameters

No functional change.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
Oops. :)

Change log from v1:
- fix tracepoint for the "don't care" entry

fs/f2fs/f2fs.h | 11 +++++++++--
fs/f2fs/file.c | 30 +++++++++++++++++++++++++-----
fs/f2fs/gc.c | 29 ++++++++++++++++++-----------
fs/f2fs/segment.c | 8 +++++++-
fs/f2fs/super.c | 8 +++++++-
include/trace/events/f2fs.h | 18 +++++++++---------
6 files changed, 75 insertions(+), 29 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index efe5e80163a8..d49b9b476592 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1276,6 +1276,14 @@ struct atgc_management {
unsigned long long age_threshold; /* age threshold */
};

+struct f2fs_gc_control {
+ unsigned int victim_segno; /* target victim segment number */
+ int init_gc_type; /* FG_GC or BG_GC */
+ bool no_bg_gc; /* check the space and stop bg_gc */
+ bool should_migrate_blocks; /* should migrate blocks */
+ bool err_gc_skipped; /* return EAGAIN if GC skipped */
+};
+
/* For s_flag in struct f2fs_sb_info */
enum {
SBI_IS_DIRTY, /* dirty flag for checkpoint */
@@ -3786,8 +3794,7 @@ extern const struct iomap_ops f2fs_iomap_ops;
int f2fs_start_gc_thread(struct f2fs_sb_info *sbi);
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
-int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force,
- unsigned int segno);
+int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
int __init f2fs_create_garbage_collection_cache(void);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b307d96a0a7c..0e7d101c3e65 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1647,6 +1647,10 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
.m_may_create = true };
+ struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
+ .init_gc_type = FG_GC,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = true };
pgoff_t pg_start, pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
@@ -1684,7 +1688,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (has_not_enough_free_secs(sbi, 0,
GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
f2fs_down_write(&sbi->gc_lock);
- err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
+ err = f2fs_gc(sbi, &gc_control);
if (err && err != -ENODATA)
goto out_err;
}
@@ -2447,6 +2451,9 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
+ .no_bg_gc = false,
+ .should_migrate_blocks = false };
__u32 sync;
int ret;

@@ -2472,7 +2479,9 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
f2fs_down_write(&sbi->gc_lock);
}

- ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO);
+ gc_control.init_gc_type = sync ? FG_GC : BG_GC;
+ gc_control.err_gc_skipped = sync;
+ ret = f2fs_gc(sbi, &gc_control);
out:
mnt_drop_write_file(filp);
return ret;
@@ -2481,6 +2490,11 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
+ struct f2fs_gc_control gc_control = {
+ .init_gc_type = range->sync ? FG_GC : BG_GC,
+ .no_bg_gc = false,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = range->sync };
u64 end;
int ret;

@@ -2508,8 +2522,8 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
f2fs_down_write(&sbi->gc_lock);
}

- ret = f2fs_gc(sbi, range->sync, true, false,
- GET_SEGNO(sbi, range->start));
+ gc_control.victim_segno = GET_SEGNO(sbi, range->start);
+ ret = f2fs_gc(sbi, &gc_control);
if (ret) {
if (ret == -EBUSY)
ret = -EAGAIN;
@@ -2923,6 +2937,10 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
unsigned int start_segno = 0, end_segno = 0;
unsigned int dev_start_segno = 0, dev_end_segno = 0;
struct f2fs_flush_device range;
+ struct f2fs_gc_control gc_control = {
+ .init_gc_type = FG_GC,
+ .should_migrate_blocks = true,
+ .err_gc_skipped = true };
int ret;

if (!capable(CAP_SYS_ADMIN))
@@ -2966,7 +2984,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
sm->last_victim[GC_CB] = end_segno + 1;
sm->last_victim[GC_GREEDY] = end_segno + 1;
sm->last_victim[ALLOC_NEXT] = end_segno + 1;
- ret = f2fs_gc(sbi, true, true, true, start_segno);
+
+ gc_control.victim_segno = start_segno;
+ ret = f2fs_gc(sbi, &gc_control);
if (ret == -EAGAIN)
ret = 0;
else if (ret < 0)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 3009c0a97ab4..aeffcc1d5c02 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -35,6 +35,9 @@ static int gc_thread_func(void *data)
wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq;
unsigned int wait_ms;
+ struct f2fs_gc_control gc_control = {
+ .victim_segno = NULL_SEGNO,
+ .should_migrate_blocks = false };

wait_ms = gc_th->min_sleep_time;

@@ -141,8 +144,12 @@ static int gc_thread_func(void *data)
if (foreground)
sync_mode = false;

+ gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
+ gc_control.no_bg_gc = foreground;
+ gc_control.err_gc_skipped = sync_mode;
+
/* if return value is not zero, no victim was selected */
- if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO))
+ if (f2fs_gc(sbi, &gc_control))
wait_ms = gc_th->no_gc_sleep_time;

if (foreground)
@@ -1753,14 +1760,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
return seg_freed;
}

-int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
- bool background, bool force, unsigned int segno)
+int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
{
- int gc_type = sync ? FG_GC : BG_GC;
+ int gc_type = gc_control->init_gc_type;
+ unsigned int segno = gc_control->victim_segno;
int sec_freed = 0, seg_freed = 0, total_freed = 0;
int ret = 0;
struct cp_control cpc;
- unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
@@ -1769,7 +1775,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
unsigned long long first_skipped;
unsigned int skipped_round = 0, round = 0;

- trace_f2fs_gc_begin(sbi->sb, sync, background,
+ trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
@@ -1808,7 +1814,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
}

/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
- if (gc_type == BG_GC && !background) {
+ if (gc_type == BG_GC && gc_control->no_bg_gc) {
ret = -EINVAL;
goto stop;
}
@@ -1824,7 +1830,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
goto stop;
}

- seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, force);
+ seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type,
+ gc_control->should_migrate_blocks);
if (gc_type == FG_GC &&
seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
sec_freed++;
@@ -1841,7 +1848,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;

- if (sync)
+ if (gc_control->init_gc_type == FG_GC)
goto stop;

if (!has_not_enough_free_secs(sbi, sec_freed, 0))
@@ -1871,7 +1878,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
ret = f2fs_write_checkpoint(sbi, &cpc);
stop:
SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
- SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno;
+ SIT_I(sbi)->last_victim[FLUSH_DEVICE] = gc_control->victim_segno;

if (gc_type == FG_GC)
f2fs_unpin_all_sections(sbi, true);
@@ -1889,7 +1896,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,

put_gc_inode(&gc_list);

- if (sync && !ret)
+ if (gc_control->err_gc_skipped && !ret)
ret = sec_freed ? 0 : -EAGAIN;
return ret;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 87ff2b3cdf94..bc63f0572c64 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -523,8 +523,14 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
io_schedule();
finish_wait(&sbi->gc_thread->fggc_wq, &wait);
} else {
+ struct f2fs_gc_control gc_control = {
+ .victim_segno = NULL_SEGNO,
+ .init_gc_type = BG_GC,
+ .no_bg_gc = true,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = false };
f2fs_down_write(&sbi->gc_lock);
- f2fs_gc(sbi, false, false, false, NULL_SEGNO);
+ f2fs_gc(sbi, &gc_control);
}
}
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f0cd454d8f88..f6338f1a7364 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2079,8 +2079,14 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
sbi->gc_mode = GC_URGENT_HIGH;

while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+ struct f2fs_gc_control gc_control = {
+ .victim_segno = NULL_SEGNO,
+ .init_gc_type = FG_GC,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = true };
+
f2fs_down_write(&sbi->gc_lock);
- err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
+ err = f2fs_gc(sbi, &gc_control);
if (err == -ENODATA) {
err = 0;
break;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 4d1ad64d4cab..7435aacb4c78 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -652,19 +652,19 @@ TRACE_EVENT(f2fs_background_gc,

TRACE_EVENT(f2fs_gc_begin,

- TP_PROTO(struct super_block *sb, bool sync, bool background,
+ TP_PROTO(struct super_block *sb, int gc_type, bool no_bg_gc,
long long dirty_nodes, long long dirty_dents,
long long dirty_imeta, unsigned int free_sec,
unsigned int free_seg, int reserved_seg,
unsigned int prefree_seg),

- TP_ARGS(sb, sync, background, dirty_nodes, dirty_dents, dirty_imeta,
+ TP_ARGS(sb, gc_type, no_bg_gc, dirty_nodes, dirty_dents, dirty_imeta,
free_sec, free_seg, reserved_seg, prefree_seg),

TP_STRUCT__entry(
__field(dev_t, dev)
- __field(bool, sync)
- __field(bool, background)
+ __field(int, gc_type)
+ __field(bool, no_bg_gc)
__field(long long, dirty_nodes)
__field(long long, dirty_dents)
__field(long long, dirty_imeta)
@@ -676,8 +676,8 @@ TRACE_EVENT(f2fs_gc_begin,

TP_fast_assign(
__entry->dev = sb->s_dev;
- __entry->sync = sync;
- __entry->background = background;
+ __entry->gc_type = gc_type;
+ __entry->no_bg_gc = no_bg_gc;
__entry->dirty_nodes = dirty_nodes;
__entry->dirty_dents = dirty_dents;
__entry->dirty_imeta = dirty_imeta;
@@ -687,12 +687,12 @@ TRACE_EVENT(f2fs_gc_begin,
__entry->prefree_seg = prefree_seg;
),

- TP_printk("dev = (%d,%d), sync = %d, background = %d, nodes = %lld, "
+ TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nodes = %lld, "
"dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
"rsv_seg:%d, prefree_seg:%u",
show_dev(__entry->dev),
- __entry->sync,
- __entry->background,
+ show_gc_type(__entry->gc_type),
+ (__entry->gc_type == BG_GC) ? __entry->no_bg_gc : -1,
__entry->dirty_nodes,
__entry->dirty_dents,
__entry->dirty_imeta,
--
2.36.0.512.ge40c2bad7a-goog


2022-05-11 16:20:49

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/5 v2] f2fs: introduce f2fs_gc_control to consolidate f2fs_gc parameters

On 2022/5/11 11:30, Jaegeuk Kim wrote:
> No functional change.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2022-05-14 03:40:14

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 2/5 v2] f2fs: introduce f2fs_gc_control to consolidate f2fs_gc parameters

No functional change.

- remove checkpoint=disable check for f2fs_write_checkpoint
- get sec_freed all the time

Reviewed-by: Chao Yu <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---

Change log from v1:
- clean up more

fs/f2fs/f2fs.h | 11 +++++-
fs/f2fs/file.c | 30 ++++++++++++---
fs/f2fs/gc.c | 74 ++++++++++++++++++++-----------------
fs/f2fs/segment.c | 8 +++-
fs/f2fs/super.c | 8 +++-
include/trace/events/f2fs.h | 18 ++++-----
6 files changed, 98 insertions(+), 51 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0699d7460d5d..9920b2d6af8f 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1265,6 +1265,14 @@ struct atgc_management {
unsigned long long age_threshold; /* age threshold */
};

+struct f2fs_gc_control {
+ unsigned int victim_segno; /* target victim segment number */
+ int init_gc_type; /* FG_GC or BG_GC */
+ bool no_bg_gc; /* check the space and stop bg_gc */
+ bool should_migrate_blocks; /* should migrate blocks */
+ bool err_gc_skipped; /* return EAGAIN if GC skipped */
+};
+
/* For s_flag in struct f2fs_sb_info */
enum {
SBI_IS_DIRTY, /* dirty flag for checkpoint */
@@ -3761,8 +3769,7 @@ extern const struct iomap_ops f2fs_iomap_ops;
int f2fs_start_gc_thread(struct f2fs_sb_info *sbi);
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
-int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force,
- unsigned int segno);
+int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
int __init f2fs_create_garbage_collection_cache(void);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6da8a663de7b..d0547bef0851 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1647,6 +1647,10 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
.m_may_create = true };
+ struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
+ .init_gc_type = FG_GC,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = true };
pgoff_t pg_start, pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
@@ -1684,7 +1688,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (has_not_enough_free_secs(sbi, 0,
GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
f2fs_down_write(&sbi->gc_lock);
- err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
+ err = f2fs_gc(sbi, &gc_control);
if (err && err != -ENODATA)
goto out_err;
}
@@ -2344,6 +2348,9 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
+ .no_bg_gc = false,
+ .should_migrate_blocks = false };
__u32 sync;
int ret;

@@ -2369,7 +2376,9 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
f2fs_down_write(&sbi->gc_lock);
}

- ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO);
+ gc_control.init_gc_type = sync ? FG_GC : BG_GC;
+ gc_control.err_gc_skipped = sync;
+ ret = f2fs_gc(sbi, &gc_control);
out:
mnt_drop_write_file(filp);
return ret;
@@ -2378,6 +2387,11 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
+ struct f2fs_gc_control gc_control = {
+ .init_gc_type = range->sync ? FG_GC : BG_GC,
+ .no_bg_gc = false,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = range->sync };
u64 end;
int ret;

@@ -2405,8 +2419,8 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
f2fs_down_write(&sbi->gc_lock);
}

- ret = f2fs_gc(sbi, range->sync, true, false,
- GET_SEGNO(sbi, range->start));
+ gc_control.victim_segno = GET_SEGNO(sbi, range->start);
+ ret = f2fs_gc(sbi, &gc_control);
if (ret) {
if (ret == -EBUSY)
ret = -EAGAIN;
@@ -2820,6 +2834,10 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
unsigned int start_segno = 0, end_segno = 0;
unsigned int dev_start_segno = 0, dev_end_segno = 0;
struct f2fs_flush_device range;
+ struct f2fs_gc_control gc_control = {
+ .init_gc_type = FG_GC,
+ .should_migrate_blocks = true,
+ .err_gc_skipped = true };
int ret;

if (!capable(CAP_SYS_ADMIN))
@@ -2863,7 +2881,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
sm->last_victim[GC_CB] = end_segno + 1;
sm->last_victim[GC_GREEDY] = end_segno + 1;
sm->last_victim[ALLOC_NEXT] = end_segno + 1;
- ret = f2fs_gc(sbi, true, true, true, start_segno);
+
+ gc_control.victim_segno = start_segno;
+ ret = f2fs_gc(sbi, &gc_control);
if (ret == -EAGAIN)
ret = 0;
else if (ret < 0)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ba8e93e517be..f3d58d154240 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -35,6 +35,9 @@ static int gc_thread_func(void *data)
wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq;
unsigned int wait_ms;
+ struct f2fs_gc_control gc_control = {
+ .victim_segno = NULL_SEGNO,
+ .should_migrate_blocks = false };

wait_ms = gc_th->min_sleep_time;

@@ -141,8 +144,12 @@ static int gc_thread_func(void *data)
if (foreground)
sync_mode = false;

+ gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
+ gc_control.no_bg_gc = foreground;
+ gc_control.err_gc_skipped = sync_mode;
+
/* if return value is not zero, no victim was selected */
- if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO))
+ if (f2fs_gc(sbi, &gc_control))
wait_ms = gc_th->no_gc_sleep_time;

if (foreground)
@@ -1740,21 +1747,20 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
return seg_freed;
}

-int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
- bool background, bool force, unsigned int segno)
+int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
{
- int gc_type = sync ? FG_GC : BG_GC;
+ int gc_type = gc_control->init_gc_type;
+ unsigned int segno = gc_control->victim_segno;
int sec_freed = 0, seg_freed = 0, total_freed = 0;
int ret = 0;
struct cp_control cpc;
- unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
unsigned int skipped_round = 0, round = 0;

- trace_f2fs_gc_begin(sbi->sb, sync, background,
+ trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
@@ -1781,8 +1787,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
* threshold, we can make them free by checkpoint. Then, we
* secure free segments which doesn't need fggc any more.
*/
- if (prefree_segments(sbi) &&
- !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
+ if (prefree_segments(sbi)) {
ret = f2fs_write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
@@ -1792,7 +1797,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
}

/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
- if (gc_type == BG_GC && !background) {
+ if (gc_type == BG_GC && gc_control->no_bg_gc) {
ret = -EINVAL;
goto stop;
}
@@ -1808,45 +1813,48 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
goto stop;
}

- seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, force);
- if (gc_type == FG_GC &&
- seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
- sec_freed++;
+ seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type,
+ gc_control->should_migrate_blocks);
total_freed += seg_freed;

- if (gc_type == FG_GC) {
- if (sbi->skipped_gc_rwsem)
- skipped_round++;
- round++;
- }
+ if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
+ sec_freed++;

if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;

- if (sync)
+ if (gc_control->init_gc_type == FG_GC)
goto stop;

- if (!has_not_enough_free_secs(sbi, sec_freed, 0))
+ if (!has_not_enough_free_secs(sbi,
+ (gc_type == FG_GC) ? sec_freed : 0, 0))
goto stop;

- if (skipped_round <= MAX_SKIP_GC_COUNT || skipped_round * 2 < round) {
-
- /* Write checkpoint to reclaim prefree segments */
- if (free_sections(sbi) < NR_CURSEG_PERSIST_TYPE &&
- prefree_segments(sbi) &&
- !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
+ /* FG_GC stops GC by skip_count */
+ if (gc_type == FG_GC) {
+ if (sbi->skipped_gc_rwsem)
+ skipped_round++;
+ round++;
+ if (skipped_round > MAX_SKIP_GC_COUNT &&
+ skipped_round * 2 >= round) {
ret = f2fs_write_checkpoint(sbi, &cpc);
- if (ret)
- goto stop;
+ goto stop;
}
- segno = NULL_SEGNO;
- goto gc_more;
}
- if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+
+ /* Write checkpoint to reclaim prefree segments */
+ if (free_sections(sbi) < NR_CURSEG_PERSIST_TYPE &&
+ prefree_segments(sbi)) {
ret = f2fs_write_checkpoint(sbi, &cpc);
+ if (ret)
+ goto stop;
+ }
+ segno = NULL_SEGNO;
+ goto gc_more;
+
stop:
SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
- SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno;
+ SIT_I(sbi)->last_victim[FLUSH_DEVICE] = gc_control->victim_segno;

if (gc_type == FG_GC)
f2fs_unpin_all_sections(sbi, true);
@@ -1864,7 +1872,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,

put_gc_inode(&gc_list);

- if (sync && !ret)
+ if (gc_control->err_gc_skipped && !ret)
ret = sec_freed ? 0 : -EAGAIN;
return ret;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 47934995e2ca..8b4f2b1d2cca 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -399,8 +399,14 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
io_schedule();
finish_wait(&sbi->gc_thread->fggc_wq, &wait);
} else {
+ struct f2fs_gc_control gc_control = {
+ .victim_segno = NULL_SEGNO,
+ .init_gc_type = BG_GC,
+ .no_bg_gc = true,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = false };
f2fs_down_write(&sbi->gc_lock);
- f2fs_gc(sbi, false, false, false, NULL_SEGNO);
+ f2fs_gc(sbi, &gc_control);
}
}
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8c81dd324297..a28c27eed6d0 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2076,8 +2076,14 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
sbi->gc_mode = GC_URGENT_HIGH;

while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+ struct f2fs_gc_control gc_control = {
+ .victim_segno = NULL_SEGNO,
+ .init_gc_type = FG_GC,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = true };
+
f2fs_down_write(&sbi->gc_lock);
- err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
+ err = f2fs_gc(sbi, &gc_control);
if (err == -ENODATA) {
err = 0;
break;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 7e915dbf3674..54ec9e543f09 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -644,19 +644,19 @@ TRACE_EVENT(f2fs_background_gc,

TRACE_EVENT(f2fs_gc_begin,

- TP_PROTO(struct super_block *sb, bool sync, bool background,
+ TP_PROTO(struct super_block *sb, int gc_type, bool no_bg_gc,
long long dirty_nodes, long long dirty_dents,
long long dirty_imeta, unsigned int free_sec,
unsigned int free_seg, int reserved_seg,
unsigned int prefree_seg),

- TP_ARGS(sb, sync, background, dirty_nodes, dirty_dents, dirty_imeta,
+ TP_ARGS(sb, gc_type, no_bg_gc, dirty_nodes, dirty_dents, dirty_imeta,
free_sec, free_seg, reserved_seg, prefree_seg),

TP_STRUCT__entry(
__field(dev_t, dev)
- __field(bool, sync)
- __field(bool, background)
+ __field(int, gc_type)
+ __field(bool, no_bg_gc)
__field(long long, dirty_nodes)
__field(long long, dirty_dents)
__field(long long, dirty_imeta)
@@ -668,8 +668,8 @@ TRACE_EVENT(f2fs_gc_begin,

TP_fast_assign(
__entry->dev = sb->s_dev;
- __entry->sync = sync;
- __entry->background = background;
+ __entry->gc_type = gc_type;
+ __entry->no_bg_gc = no_bg_gc;
__entry->dirty_nodes = dirty_nodes;
__entry->dirty_dents = dirty_dents;
__entry->dirty_imeta = dirty_imeta;
@@ -679,12 +679,12 @@ TRACE_EVENT(f2fs_gc_begin,
__entry->prefree_seg = prefree_seg;
),

- TP_printk("dev = (%d,%d), sync = %d, background = %d, nodes = %lld, "
+ TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nodes = %lld, "
"dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
"rsv_seg:%d, prefree_seg:%u",
show_dev(__entry->dev),
- __entry->sync,
- __entry->background,
+ show_gc_type(__entry->gc_type),
+ (__entry->gc_type == BG_GC) ? __entry->no_bg_gc : -1,
__entry->dirty_nodes,
__entry->dirty_dents,
__entry->dirty_imeta,
--
2.36.0.550.gb090851708-goog


2022-05-14 03:44:20

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 4/5 v2] f2fs: do not stop GC when requiring a free section

The f2fs_gc uses a bitmap to indicate pinned sections, but when disabling
chckpoint, we call f2fs_gc() with NULL_SEGNO which selects the same dirty
segment as a victim all the time, resulting in checkpoint=disable failure,
for example. Let's pick another one, if we fail to collect it.

Signed-off-by: Jaegeuk Kim <[email protected]>
---

Change log from v1:
- refactor the code path to avoid ambiguous condition like BG_GC/sec_freed

fs/f2fs/f2fs.h | 1 +
fs/f2fs/file.c | 12 ++++++++----
fs/f2fs/gc.c | 14 +++++++++-----
fs/f2fs/segment.c | 3 ++-
fs/f2fs/super.c | 3 ++-
include/trace/events/f2fs.h | 11 ++++++++---
6 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9920b2d6af8f..492af5b96de1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1271,6 +1271,7 @@ struct f2fs_gc_control {
bool no_bg_gc; /* check the space and stop bg_gc */
bool should_migrate_blocks; /* should migrate blocks */
bool err_gc_skipped; /* return EAGAIN if GC skipped */
+ unsigned int nr_free_secs; /* # of free sections to do GC */
};

/* For s_flag in struct f2fs_sb_info */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index d0547bef0851..216081ea8c81 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1650,7 +1650,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
.init_gc_type = FG_GC,
.should_migrate_blocks = false,
- .err_gc_skipped = true };
+ .err_gc_skipped = true,
+ .nr_free_secs = 0 };
pgoff_t pg_start, pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
@@ -2350,7 +2351,8 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
.no_bg_gc = false,
- .should_migrate_blocks = false };
+ .should_migrate_blocks = false,
+ .nr_free_secs = 0 };
__u32 sync;
int ret;

@@ -2391,7 +2393,8 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
.init_gc_type = range->sync ? FG_GC : BG_GC,
.no_bg_gc = false,
.should_migrate_blocks = false,
- .err_gc_skipped = range->sync };
+ .err_gc_skipped = range->sync,
+ .nr_free_secs = 0 };
u64 end;
int ret;

@@ -2837,7 +2840,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
struct f2fs_gc_control gc_control = {
.init_gc_type = FG_GC,
.should_migrate_blocks = true,
- .err_gc_skipped = true };
+ .err_gc_skipped = true,
+ .nr_free_secs = 0 };
int ret;

if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e275b72bc65f..10b24b0f13a5 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -147,6 +147,7 @@ static int gc_thread_func(void *data)

gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
gc_control.no_bg_gc = foreground;
+ gc_control.nr_free_secs = foreground ? 1 : 0;

/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi, &gc_control))
@@ -1761,6 +1762,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
unsigned int skipped_round = 0, round = 0;

trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
+ gc_control->nr_free_secs,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
@@ -1823,12 +1825,13 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;

- if (gc_control->init_gc_type == FG_GC)
- goto stop;
-
- if (!has_not_enough_free_secs(sbi,
- (gc_type == FG_GC) ? sec_freed : 0, 0))
+ if (gc_control->init_gc_type == FG_GC ||
+ !has_not_enough_free_secs(sbi,
+ (gc_type == FG_GC) ? sec_freed : 0, 0)) {
+ if (gc_mode == FG_GC && sec_freed < gc_control->nr_free_secs)
+ goto go_gc_more;
goto stop;
+ }

/* FG_GC stops GC by skip_count */
if (gc_type == FG_GC) {
@@ -1849,6 +1852,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
if (ret)
goto stop;
}
+go_gc_more:
segno = NULL_SEGNO;
goto gc_more;

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8b4f2b1d2cca..0a4180f64291 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -404,7 +404,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
.init_gc_type = BG_GC,
.no_bg_gc = true,
.should_migrate_blocks = false,
- .err_gc_skipped = false };
+ .err_gc_skipped = false,
+ .nr_free_secs = 1 };
f2fs_down_write(&sbi->gc_lock);
f2fs_gc(sbi, &gc_control);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a28c27eed6d0..63daae67a9d9 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2080,7 +2080,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
.victim_segno = NULL_SEGNO,
.init_gc_type = FG_GC,
.should_migrate_blocks = false,
- .err_gc_skipped = true };
+ .err_gc_skipped = true,
+ .nr_free_secs = 1 };

f2fs_down_write(&sbi->gc_lock);
err = f2fs_gc(sbi, &gc_control);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 54ec9e543f09..16c67ede85b6 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -645,18 +645,21 @@ TRACE_EVENT(f2fs_background_gc,
TRACE_EVENT(f2fs_gc_begin,

TP_PROTO(struct super_block *sb, int gc_type, bool no_bg_gc,
+ unsigned int nr_free_secs,
long long dirty_nodes, long long dirty_dents,
long long dirty_imeta, unsigned int free_sec,
unsigned int free_seg, int reserved_seg,
unsigned int prefree_seg),

- TP_ARGS(sb, gc_type, no_bg_gc, dirty_nodes, dirty_dents, dirty_imeta,
+ TP_ARGS(sb, gc_type, no_bg_gc, nr_free_secs, dirty_nodes,
+ dirty_dents, dirty_imeta,
free_sec, free_seg, reserved_seg, prefree_seg),

TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, gc_type)
__field(bool, no_bg_gc)
+ __field(unsigned int, nr_free_secs)
__field(long long, dirty_nodes)
__field(long long, dirty_dents)
__field(long long, dirty_imeta)
@@ -670,6 +673,7 @@ TRACE_EVENT(f2fs_gc_begin,
__entry->dev = sb->s_dev;
__entry->gc_type = gc_type;
__entry->no_bg_gc = no_bg_gc;
+ __entry->nr_free_secs = nr_free_secs;
__entry->dirty_nodes = dirty_nodes;
__entry->dirty_dents = dirty_dents;
__entry->dirty_imeta = dirty_imeta;
@@ -679,12 +683,13 @@ TRACE_EVENT(f2fs_gc_begin,
__entry->prefree_seg = prefree_seg;
),

- TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nodes = %lld, "
- "dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
+ TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nr_free_secs = %u, "
+ "nodes = %lld, dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
"rsv_seg:%d, prefree_seg:%u",
show_dev(__entry->dev),
show_gc_type(__entry->gc_type),
(__entry->gc_type == BG_GC) ? __entry->no_bg_gc : -1,
+ __entry->nr_free_secs,
__entry->dirty_nodes,
__entry->dirty_dents,
__entry->dirty_imeta,
--
2.36.0.550.gb090851708-goog


2022-05-16 09:42:59

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 4/5 v2] f2fs: do not stop GC when requiring a free section

On 2022/5/13 4:50, Jaegeuk Kim wrote:
> The f2fs_gc uses a bitmap to indicate pinned sections, but when disabling
> chckpoint, we call f2fs_gc() with NULL_SEGNO which selects the same dirty
> segment as a victim all the time, resulting in checkpoint=disable failure,
> for example. Let's pick another one, if we fail to collect it.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
>
> Change log from v1:
> - refactor the code path to avoid ambiguous condition like BG_GC/sec_freed
>
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/file.c | 12 ++++++++----
> fs/f2fs/gc.c | 14 +++++++++-----
> fs/f2fs/segment.c | 3 ++-
> fs/f2fs/super.c | 3 ++-
> include/trace/events/f2fs.h | 11 ++++++++---
> 6 files changed, 30 insertions(+), 14 deletions(-)
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 9920b2d6af8f..492af5b96de1 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1271,6 +1271,7 @@ struct f2fs_gc_control {
> bool no_bg_gc; /* check the space and stop bg_gc */
> bool should_migrate_blocks; /* should migrate blocks */
> bool err_gc_skipped; /* return EAGAIN if GC skipped */
> + unsigned int nr_free_secs; /* # of free sections to do GC */
> };
>
> /* For s_flag in struct f2fs_sb_info */
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index d0547bef0851..216081ea8c81 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1650,7 +1650,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
> struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
> .init_gc_type = FG_GC,
> .should_migrate_blocks = false,
> - .err_gc_skipped = true };
> + .err_gc_skipped = true,
> + .nr_free_secs = 0 };
> pgoff_t pg_start, pg_end;
> loff_t new_size = i_size_read(inode);
> loff_t off_end;
> @@ -2350,7 +2351,8 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
> .no_bg_gc = false,
> - .should_migrate_blocks = false };
> + .should_migrate_blocks = false,
> + .nr_free_secs = 0 };
> __u32 sync;
> int ret;
>
> @@ -2391,7 +2393,8 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
> .init_gc_type = range->sync ? FG_GC : BG_GC,
> .no_bg_gc = false,
> .should_migrate_blocks = false,
> - .err_gc_skipped = range->sync };
> + .err_gc_skipped = range->sync,
> + .nr_free_secs = 0 };
> u64 end;
> int ret;
>
> @@ -2837,7 +2840,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
> struct f2fs_gc_control gc_control = {
> .init_gc_type = FG_GC,
> .should_migrate_blocks = true,
> - .err_gc_skipped = true };
> + .err_gc_skipped = true,
> + .nr_free_secs = 0 };
> int ret;
>
> if (!capable(CAP_SYS_ADMIN))
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index e275b72bc65f..10b24b0f13a5 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -147,6 +147,7 @@ static int gc_thread_func(void *data)
>
> gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
> gc_control.no_bg_gc = foreground;
> + gc_control.nr_free_secs = foreground ? 1 : 0;
>
> /* if return value is not zero, no victim was selected */
> if (f2fs_gc(sbi, &gc_control))
> @@ -1761,6 +1762,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> unsigned int skipped_round = 0, round = 0;
>
> trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
> + gc_control->nr_free_secs,
> get_pages(sbi, F2FS_DIRTY_NODES),
> get_pages(sbi, F2FS_DIRTY_DENTS),
> get_pages(sbi, F2FS_DIRTY_IMETA),
> @@ -1823,12 +1825,13 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> if (gc_type == FG_GC)
> sbi->cur_victim_sec = NULL_SEGNO;
>
> - if (gc_control->init_gc_type == FG_GC)
> - goto stop;
> -
> - if (!has_not_enough_free_secs(sbi,
> - (gc_type == FG_GC) ? sec_freed : 0, 0))
> + if (gc_control->init_gc_type == FG_GC ||
> + !has_not_enough_free_secs(sbi,
> + (gc_type == FG_GC) ? sec_freed : 0, 0)) {

In all gc_control->init_gc_type = BG_GC cases, gc_control->no_bg_gc is true,
if gc_type = BG_GC, then it should break out due to below condition.

/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
if (gc_type == BG_GC && gc_control->no_bg_gc) {
ret = -EINVAL;
goto stop;
}

Otherwise gc_type should always be FG_GC in
!has_not_enough_free_secs(sbi, (gc_type == FG_GC) ? sec_freed : 0, 0), right?

Thanks,

> + if (gc_mode == FG_GC && sec_freed < gc_control->nr_free_secs)
> + goto go_gc_more;
> goto stop;
> + }
>
> /* FG_GC stops GC by skip_count */
> if (gc_type == FG_GC) {
> @@ -1849,6 +1852,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> if (ret)
> goto stop;
> }
> +go_gc_more:
> segno = NULL_SEGNO;
> goto gc_more;
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 8b4f2b1d2cca..0a4180f64291 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -404,7 +404,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
> .init_gc_type = BG_GC,
> .no_bg_gc = true,
> .should_migrate_blocks = false,
> - .err_gc_skipped = false };
> + .err_gc_skipped = false,
> + .nr_free_secs = 1 };
> f2fs_down_write(&sbi->gc_lock);
> f2fs_gc(sbi, &gc_control);
> }
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index a28c27eed6d0..63daae67a9d9 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2080,7 +2080,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> .victim_segno = NULL_SEGNO,
> .init_gc_type = FG_GC,
> .should_migrate_blocks = false,
> - .err_gc_skipped = true };
> + .err_gc_skipped = true,
> + .nr_free_secs = 1 };
>
> f2fs_down_write(&sbi->gc_lock);
> err = f2fs_gc(sbi, &gc_control);
> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> index 54ec9e543f09..16c67ede85b6 100644
> --- a/include/trace/events/f2fs.h
> +++ b/include/trace/events/f2fs.h
> @@ -645,18 +645,21 @@ TRACE_EVENT(f2fs_background_gc,
> TRACE_EVENT(f2fs_gc_begin,
>
> TP_PROTO(struct super_block *sb, int gc_type, bool no_bg_gc,
> + unsigned int nr_free_secs,
> long long dirty_nodes, long long dirty_dents,
> long long dirty_imeta, unsigned int free_sec,
> unsigned int free_seg, int reserved_seg,
> unsigned int prefree_seg),
>
> - TP_ARGS(sb, gc_type, no_bg_gc, dirty_nodes, dirty_dents, dirty_imeta,
> + TP_ARGS(sb, gc_type, no_bg_gc, nr_free_secs, dirty_nodes,
> + dirty_dents, dirty_imeta,
> free_sec, free_seg, reserved_seg, prefree_seg),
>
> TP_STRUCT__entry(
> __field(dev_t, dev)
> __field(int, gc_type)
> __field(bool, no_bg_gc)
> + __field(unsigned int, nr_free_secs)
> __field(long long, dirty_nodes)
> __field(long long, dirty_dents)
> __field(long long, dirty_imeta)
> @@ -670,6 +673,7 @@ TRACE_EVENT(f2fs_gc_begin,
> __entry->dev = sb->s_dev;
> __entry->gc_type = gc_type;
> __entry->no_bg_gc = no_bg_gc;
> + __entry->nr_free_secs = nr_free_secs;
> __entry->dirty_nodes = dirty_nodes;
> __entry->dirty_dents = dirty_dents;
> __entry->dirty_imeta = dirty_imeta;
> @@ -679,12 +683,13 @@ TRACE_EVENT(f2fs_gc_begin,
> __entry->prefree_seg = prefree_seg;
> ),
>
> - TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nodes = %lld, "
> - "dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
> + TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nr_free_secs = %u, "
> + "nodes = %lld, dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
> "rsv_seg:%d, prefree_seg:%u",
> show_dev(__entry->dev),
> show_gc_type(__entry->gc_type),
> (__entry->gc_type == BG_GC) ? __entry->no_bg_gc : -1,
> + __entry->nr_free_secs,
> __entry->dirty_nodes,
> __entry->dirty_dents,
> __entry->dirty_imeta,

2022-05-16 22:49:51

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 4/5 v2] f2fs: do not stop GC when requiring a free section

On 05/15, Chao Yu wrote:
> On 2022/5/13 4:50, Jaegeuk Kim wrote:
> > The f2fs_gc uses a bitmap to indicate pinned sections, but when disabling
> > chckpoint, we call f2fs_gc() with NULL_SEGNO which selects the same dirty
> > segment as a victim all the time, resulting in checkpoint=disable failure,
> > for example. Let's pick another one, if we fail to collect it.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> >
> > Change log from v1:
> > - refactor the code path to avoid ambiguous condition like BG_GC/sec_freed
> >
> > fs/f2fs/f2fs.h | 1 +
> > fs/f2fs/file.c | 12 ++++++++----
> > fs/f2fs/gc.c | 14 +++++++++-----
> > fs/f2fs/segment.c | 3 ++-
> > fs/f2fs/super.c | 3 ++-
> > include/trace/events/f2fs.h | 11 ++++++++---
> > 6 files changed, 30 insertions(+), 14 deletions(-)
> >
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 9920b2d6af8f..492af5b96de1 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1271,6 +1271,7 @@ struct f2fs_gc_control {
> > bool no_bg_gc; /* check the space and stop bg_gc */
> > bool should_migrate_blocks; /* should migrate blocks */
> > bool err_gc_skipped; /* return EAGAIN if GC skipped */
> > + unsigned int nr_free_secs; /* # of free sections to do GC */
> > };
> > /* For s_flag in struct f2fs_sb_info */
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index d0547bef0851..216081ea8c81 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -1650,7 +1650,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
> > struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
> > .init_gc_type = FG_GC,
> > .should_migrate_blocks = false,
> > - .err_gc_skipped = true };
> > + .err_gc_skipped = true,
> > + .nr_free_secs = 0 };
> > pgoff_t pg_start, pg_end;
> > loff_t new_size = i_size_read(inode);
> > loff_t off_end;
> > @@ -2350,7 +2351,8 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
> > struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
> > .no_bg_gc = false,
> > - .should_migrate_blocks = false };
> > + .should_migrate_blocks = false,
> > + .nr_free_secs = 0 };
> > __u32 sync;
> > int ret;
> > @@ -2391,7 +2393,8 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
> > .init_gc_type = range->sync ? FG_GC : BG_GC,
> > .no_bg_gc = false,
> > .should_migrate_blocks = false,
> > - .err_gc_skipped = range->sync };
> > + .err_gc_skipped = range->sync,
> > + .nr_free_secs = 0 };
> > u64 end;
> > int ret;
> > @@ -2837,7 +2840,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
> > struct f2fs_gc_control gc_control = {
> > .init_gc_type = FG_GC,
> > .should_migrate_blocks = true,
> > - .err_gc_skipped = true };
> > + .err_gc_skipped = true,
> > + .nr_free_secs = 0 };
> > int ret;
> > if (!capable(CAP_SYS_ADMIN))
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index e275b72bc65f..10b24b0f13a5 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -147,6 +147,7 @@ static int gc_thread_func(void *data)
> > gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
> > gc_control.no_bg_gc = foreground;
> > + gc_control.nr_free_secs = foreground ? 1 : 0;
> > /* if return value is not zero, no victim was selected */
> > if (f2fs_gc(sbi, &gc_control))
> > @@ -1761,6 +1762,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> > unsigned int skipped_round = 0, round = 0;
> > trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
> > + gc_control->nr_free_secs,
> > get_pages(sbi, F2FS_DIRTY_NODES),
> > get_pages(sbi, F2FS_DIRTY_DENTS),
> > get_pages(sbi, F2FS_DIRTY_IMETA),
> > @@ -1823,12 +1825,13 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> > if (gc_type == FG_GC)
> > sbi->cur_victim_sec = NULL_SEGNO;
> > - if (gc_control->init_gc_type == FG_GC)
> > - goto stop;
> > -
> > - if (!has_not_enough_free_secs(sbi,
> > - (gc_type == FG_GC) ? sec_freed : 0, 0))
> > + if (gc_control->init_gc_type == FG_GC ||
> > + !has_not_enough_free_secs(sbi,
> > + (gc_type == FG_GC) ? sec_freed : 0, 0)) {
>
> In all gc_control->init_gc_type = BG_GC cases, gc_control->no_bg_gc is true,
> if gc_type = BG_GC, then it should break out due to below condition.
>
> /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
> if (gc_type == BG_GC && gc_control->no_bg_gc) {
> ret = -EINVAL;
> goto stop;
> }
>
> Otherwise gc_type should always be FG_GC in
> !has_not_enough_free_secs(sbi, (gc_type == FG_GC) ? sec_freed : 0, 0), right?

We can have gc_type=BG_GC and gc_control->no_bg_gc=false, which is a normal
background GC path.

>
> Thanks,
>
> > + if (gc_mode == FG_GC && sec_freed < gc_control->nr_free_secs)
> > + goto go_gc_more;
> > goto stop;
> > + }
> > /* FG_GC stops GC by skip_count */
> > if (gc_type == FG_GC) {
> > @@ -1849,6 +1852,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
> > if (ret)
> > goto stop;
> > }
> > +go_gc_more:
> > segno = NULL_SEGNO;
> > goto gc_more;
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 8b4f2b1d2cca..0a4180f64291 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -404,7 +404,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
> > .init_gc_type = BG_GC,
> > .no_bg_gc = true,
> > .should_migrate_blocks = false,
> > - .err_gc_skipped = false };
> > + .err_gc_skipped = false,
> > + .nr_free_secs = 1 };
> > f2fs_down_write(&sbi->gc_lock);
> > f2fs_gc(sbi, &gc_control);
> > }
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index a28c27eed6d0..63daae67a9d9 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -2080,7 +2080,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
> > .victim_segno = NULL_SEGNO,
> > .init_gc_type = FG_GC,
> > .should_migrate_blocks = false,
> > - .err_gc_skipped = true };
> > + .err_gc_skipped = true,
> > + .nr_free_secs = 1 };
> > f2fs_down_write(&sbi->gc_lock);
> > err = f2fs_gc(sbi, &gc_control);
> > diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> > index 54ec9e543f09..16c67ede85b6 100644
> > --- a/include/trace/events/f2fs.h
> > +++ b/include/trace/events/f2fs.h
> > @@ -645,18 +645,21 @@ TRACE_EVENT(f2fs_background_gc,
> > TRACE_EVENT(f2fs_gc_begin,
> > TP_PROTO(struct super_block *sb, int gc_type, bool no_bg_gc,
> > + unsigned int nr_free_secs,
> > long long dirty_nodes, long long dirty_dents,
> > long long dirty_imeta, unsigned int free_sec,
> > unsigned int free_seg, int reserved_seg,
> > unsigned int prefree_seg),
> > - TP_ARGS(sb, gc_type, no_bg_gc, dirty_nodes, dirty_dents, dirty_imeta,
> > + TP_ARGS(sb, gc_type, no_bg_gc, nr_free_secs, dirty_nodes,
> > + dirty_dents, dirty_imeta,
> > free_sec, free_seg, reserved_seg, prefree_seg),
> > TP_STRUCT__entry(
> > __field(dev_t, dev)
> > __field(int, gc_type)
> > __field(bool, no_bg_gc)
> > + __field(unsigned int, nr_free_secs)
> > __field(long long, dirty_nodes)
> > __field(long long, dirty_dents)
> > __field(long long, dirty_imeta)
> > @@ -670,6 +673,7 @@ TRACE_EVENT(f2fs_gc_begin,
> > __entry->dev = sb->s_dev;
> > __entry->gc_type = gc_type;
> > __entry->no_bg_gc = no_bg_gc;
> > + __entry->nr_free_secs = nr_free_secs;
> > __entry->dirty_nodes = dirty_nodes;
> > __entry->dirty_dents = dirty_dents;
> > __entry->dirty_imeta = dirty_imeta;
> > @@ -679,12 +683,13 @@ TRACE_EVENT(f2fs_gc_begin,
> > __entry->prefree_seg = prefree_seg;
> > ),
> > - TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nodes = %lld, "
> > - "dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
> > + TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nr_free_secs = %u, "
> > + "nodes = %lld, dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
> > "rsv_seg:%d, prefree_seg:%u",
> > show_dev(__entry->dev),
> > show_gc_type(__entry->gc_type),
> > (__entry->gc_type == BG_GC) ? __entry->no_bg_gc : -1,
> > + __entry->nr_free_secs,
> > __entry->dirty_nodes,
> > __entry->dirty_dents,
> > __entry->dirty_imeta,

2022-05-17 07:38:18

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 4/5 v2] f2fs: do not stop GC when requiring a free section

On 2022/5/17 1:29, Jaegeuk Kim wrote:
> On 05/15, Chao Yu wrote:
>> On 2022/5/13 4:50, Jaegeuk Kim wrote:
>>> The f2fs_gc uses a bitmap to indicate pinned sections, but when disabling
>>> chckpoint, we call f2fs_gc() with NULL_SEGNO which selects the same dirty
>>> segment as a victim all the time, resulting in checkpoint=disable failure,
>>> for example. Let's pick another one, if we fail to collect it.
>>>
>>> Signed-off-by: Jaegeuk Kim <[email protected]>
>>> ---
>>>
>>> Change log from v1:
>>> - refactor the code path to avoid ambiguous condition like BG_GC/sec_freed
>>>
>>> fs/f2fs/f2fs.h | 1 +
>>> fs/f2fs/file.c | 12 ++++++++----
>>> fs/f2fs/gc.c | 14 +++++++++-----
>>> fs/f2fs/segment.c | 3 ++-
>>> fs/f2fs/super.c | 3 ++-
>>> include/trace/events/f2fs.h | 11 ++++++++---
>>> 6 files changed, 30 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 9920b2d6af8f..492af5b96de1 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -1271,6 +1271,7 @@ struct f2fs_gc_control {
>>> bool no_bg_gc; /* check the space and stop bg_gc */
>>> bool should_migrate_blocks; /* should migrate blocks */
>>> bool err_gc_skipped; /* return EAGAIN if GC skipped */
>>> + unsigned int nr_free_secs; /* # of free sections to do GC */
>>> };
>>> /* For s_flag in struct f2fs_sb_info */
>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>> index d0547bef0851..216081ea8c81 100644
>>> --- a/fs/f2fs/file.c
>>> +++ b/fs/f2fs/file.c
>>> @@ -1650,7 +1650,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
>>> struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
>>> .init_gc_type = FG_GC,
>>> .should_migrate_blocks = false,
>>> - .err_gc_skipped = true };
>>> + .err_gc_skipped = true,
>>> + .nr_free_secs = 0 };
>>> pgoff_t pg_start, pg_end;
>>> loff_t new_size = i_size_read(inode);
>>> loff_t off_end;
>>> @@ -2350,7 +2351,8 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>> struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
>>> .no_bg_gc = false,
>>> - .should_migrate_blocks = false };
>>> + .should_migrate_blocks = false,
>>> + .nr_free_secs = 0 };
>>> __u32 sync;
>>> int ret;
>>> @@ -2391,7 +2393,8 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
>>> .init_gc_type = range->sync ? FG_GC : BG_GC,
>>> .no_bg_gc = false,
>>> .should_migrate_blocks = false,
>>> - .err_gc_skipped = range->sync };
>>> + .err_gc_skipped = range->sync,
>>> + .nr_free_secs = 0 };
>>> u64 end;
>>> int ret;
>>> @@ -2837,7 +2840,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
>>> struct f2fs_gc_control gc_control = {
>>> .init_gc_type = FG_GC,
>>> .should_migrate_blocks = true,
>>> - .err_gc_skipped = true };
>>> + .err_gc_skipped = true,
>>> + .nr_free_secs = 0 };
>>> int ret;
>>> if (!capable(CAP_SYS_ADMIN))
>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>> index e275b72bc65f..10b24b0f13a5 100644
>>> --- a/fs/f2fs/gc.c
>>> +++ b/fs/f2fs/gc.c
>>> @@ -147,6 +147,7 @@ static int gc_thread_func(void *data)
>>> gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
>>> gc_control.no_bg_gc = foreground;
>>> + gc_control.nr_free_secs = foreground ? 1 : 0;
>>> /* if return value is not zero, no victim was selected */
>>> if (f2fs_gc(sbi, &gc_control))
>>> @@ -1761,6 +1762,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
>>> unsigned int skipped_round = 0, round = 0;
>>> trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
>>> + gc_control->nr_free_secs,
>>> get_pages(sbi, F2FS_DIRTY_NODES),
>>> get_pages(sbi, F2FS_DIRTY_DENTS),
>>> get_pages(sbi, F2FS_DIRTY_IMETA),
>>> @@ -1823,12 +1825,13 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
>>> if (gc_type == FG_GC)
>>> sbi->cur_victim_sec = NULL_SEGNO;
>>> - if (gc_control->init_gc_type == FG_GC)
>>> - goto stop;
>>> -
>>> - if (!has_not_enough_free_secs(sbi,
>>> - (gc_type == FG_GC) ? sec_freed : 0, 0))
>>> + if (gc_control->init_gc_type == FG_GC ||
>>> + !has_not_enough_free_secs(sbi,
>>> + (gc_type == FG_GC) ? sec_freed : 0, 0)) {
>>
>> In all gc_control->init_gc_type = BG_GC cases, gc_control->no_bg_gc is true,
>> if gc_type = BG_GC, then it should break out due to below condition.
>>
>> /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
>> if (gc_type == BG_GC && gc_control->no_bg_gc) {
>> ret = -EINVAL;
>> goto stop;
>> }
>>
>> Otherwise gc_type should always be FG_GC in
>> !has_not_enough_free_secs(sbi, (gc_type == FG_GC) ? sec_freed : 0, 0), right?
>
> We can have gc_type=BG_GC and gc_control->no_bg_gc=false, which is a normal
> background GC path.

Okay, actually if gc_type = BG_GC, sec_freed should be zero at the most time.

Looks fine to me.

Reviewed-by: Chao Yu <[email protected]>

Thanks,

>
>>
>> Thanks,
>>
>>> + if (gc_mode == FG_GC && sec_freed < gc_control->nr_free_secs)
>>> + goto go_gc_more;
>>> goto stop;
>>> + }
>>> /* FG_GC stops GC by skip_count */
>>> if (gc_type == FG_GC) {
>>> @@ -1849,6 +1852,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
>>> if (ret)
>>> goto stop;
>>> }
>>> +go_gc_more:
>>> segno = NULL_SEGNO;
>>> goto gc_more;
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 8b4f2b1d2cca..0a4180f64291 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -404,7 +404,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
>>> .init_gc_type = BG_GC,
>>> .no_bg_gc = true,
>>> .should_migrate_blocks = false,
>>> - .err_gc_skipped = false };
>>> + .err_gc_skipped = false,
>>> + .nr_free_secs = 1 };
>>> f2fs_down_write(&sbi->gc_lock);
>>> f2fs_gc(sbi, &gc_control);
>>> }
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index a28c27eed6d0..63daae67a9d9 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -2080,7 +2080,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
>>> .victim_segno = NULL_SEGNO,
>>> .init_gc_type = FG_GC,
>>> .should_migrate_blocks = false,
>>> - .err_gc_skipped = true };
>>> + .err_gc_skipped = true,
>>> + .nr_free_secs = 1 };
>>> f2fs_down_write(&sbi->gc_lock);
>>> err = f2fs_gc(sbi, &gc_control);
>>> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
>>> index 54ec9e543f09..16c67ede85b6 100644
>>> --- a/include/trace/events/f2fs.h
>>> +++ b/include/trace/events/f2fs.h
>>> @@ -645,18 +645,21 @@ TRACE_EVENT(f2fs_background_gc,
>>> TRACE_EVENT(f2fs_gc_begin,
>>> TP_PROTO(struct super_block *sb, int gc_type, bool no_bg_gc,
>>> + unsigned int nr_free_secs,
>>> long long dirty_nodes, long long dirty_dents,
>>> long long dirty_imeta, unsigned int free_sec,
>>> unsigned int free_seg, int reserved_seg,
>>> unsigned int prefree_seg),
>>> - TP_ARGS(sb, gc_type, no_bg_gc, dirty_nodes, dirty_dents, dirty_imeta,
>>> + TP_ARGS(sb, gc_type, no_bg_gc, nr_free_secs, dirty_nodes,
>>> + dirty_dents, dirty_imeta,
>>> free_sec, free_seg, reserved_seg, prefree_seg),
>>> TP_STRUCT__entry(
>>> __field(dev_t, dev)
>>> __field(int, gc_type)
>>> __field(bool, no_bg_gc)
>>> + __field(unsigned int, nr_free_secs)
>>> __field(long long, dirty_nodes)
>>> __field(long long, dirty_dents)
>>> __field(long long, dirty_imeta)
>>> @@ -670,6 +673,7 @@ TRACE_EVENT(f2fs_gc_begin,
>>> __entry->dev = sb->s_dev;
>>> __entry->gc_type = gc_type;
>>> __entry->no_bg_gc = no_bg_gc;
>>> + __entry->nr_free_secs = nr_free_secs;
>>> __entry->dirty_nodes = dirty_nodes;
>>> __entry->dirty_dents = dirty_dents;
>>> __entry->dirty_imeta = dirty_imeta;
>>> @@ -679,12 +683,13 @@ TRACE_EVENT(f2fs_gc_begin,
>>> __entry->prefree_seg = prefree_seg;
>>> ),
>>> - TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nodes = %lld, "
>>> - "dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
>>> + TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nr_free_secs = %u, "
>>> + "nodes = %lld, dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
>>> "rsv_seg:%d, prefree_seg:%u",
>>> show_dev(__entry->dev),
>>> show_gc_type(__entry->gc_type),
>>> (__entry->gc_type == BG_GC) ? __entry->no_bg_gc : -1,
>>> + __entry->nr_free_secs,
>>> __entry->dirty_nodes,
>>> __entry->dirty_dents,
>>> __entry->dirty_imeta,