2022-03-10 00:24:55

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 1/2] f2fs: evict inode cache for frozen fs

Let's purge inode cache in order to avoid the below deadlock.

[freeze test] shrinkder
freeze_super
- pwercpu_down_write(SB_FREEZE_FS)
- super_cache_scan
- down_read(&sb->s_umount)
- prune_icache_sb
- dispose_list
- evict
- f2fs_evict_inode
thaw_super
- down_write(&sb->s_umount);
- __percpu_down_read(SB_FREEZE_FS)

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/debug.c | 1 +
fs/f2fs/f2fs.h | 1 +
fs/f2fs/inode.c | 6 ++++--
fs/f2fs/super.c | 4 ++++
4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 9a13902c7702..cba5eab24595 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -338,6 +338,7 @@ static char *s_flag[] = {
[SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush",
[SBI_QUOTA_NEED_REPAIR] = " quota_need_repair",
[SBI_IS_RESIZEFS] = " resizefs",
+ [SBI_IS_FREEZING] = " freezefs",
};

static int stat_show(struct seq_file *s, void *v)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 68d791ec8b27..da729f53daa8 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1293,6 +1293,7 @@ enum {
SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
SBI_IS_RESIZEFS, /* resizefs is in process */
+ SBI_IS_FREEZING, /* freezefs is in process */
};

enum {
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index ab8e0c06c78c..71f232dcf3c2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -778,7 +778,8 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);

- sb_start_intwrite(inode->i_sb);
+ if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
+ sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
i_size_write(inode, 0);
retry:
@@ -809,7 +810,8 @@ void f2fs_evict_inode(struct inode *inode)
if (dquot_initialize_needed(inode))
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
}
- sb_end_intwrite(inode->i_sb);
+ if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
+ sb_end_intwrite(inode->i_sb);
no_delete:
dquot_drop(inode);

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8e3840973077..4b570b5c2674 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1663,11 +1663,15 @@ static int f2fs_freeze(struct super_block *sb)
/* ensure no checkpoint required */
if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list))
return -EINVAL;
+
+ /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */
+ set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
return 0;
}

static int f2fs_unfreeze(struct super_block *sb)
{
+ clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
return 0;
}

--
2.35.1.616.g0bdcbb4464-goog


2022-03-10 10:23:56

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 1/2 v2] f2fs: do not expose unwritten blocks to user by DIO

DIO preallocates physical blocks before writing data, but if an error occurrs
or power-cut happens, we can see block contents from the disk. This patch tries
to fix it by 1) turning to buffered writes for DIO into holes, 2) truncating
unwritten blocks from error or power-cut.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
- log from v1: added documentation

fs/f2fs/data.c | 5 ++++-
fs/f2fs/f2fs.h | 5 +++++
fs/f2fs/file.c | 27 ++++++++++++++++++---------
fs/f2fs/inode.c | 8 ++++++++
4 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3db0f3049b90..9c867de1ec29 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1543,8 +1543,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
flag != F2FS_GET_BLOCK_DIO);
err = __allocate_data_block(&dn,
map->m_seg_type);
- if (!err)
+ if (!err) {
+ if (flag == F2FS_GET_BLOCK_PRE_DIO)
+ file_need_truncate(inode);
set_inode_flag(inode, FI_APPEND_WRITE);
+ }
}
if (err)
goto sync_out;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6f196621f772..d7435fcb9658 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -654,6 +654,7 @@ enum {
#define FADVISE_KEEP_SIZE_BIT 0x10
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40
+#define FADVISE_TRUNC_BIT 0x80

#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)

@@ -681,6 +682,10 @@ enum {
#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)

+#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
+#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
+#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
+
#define DEF_DIR_LEVEL 0

enum {
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 808a7c24d993..e1445cf915ea 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1687,6 +1687,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,

map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+ file_dont_truncate(inode);

up_write(&sbi->pin_sem);

@@ -4257,6 +4258,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
/* If it will be an out-of-place direct write, don't bother. */
if (dio && f2fs_lfs_mode(sbi))
return 0;
+ /*
+ * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
+ * buffered IO, if DIO meets any holes.
+ */
+ if (dio && i_size_read(inode) &&
+ (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
+ return 0;

/* No-wait I/O can't allocate blocks. */
if (iocb->ki_flags & IOCB_NOWAIT)
@@ -4292,8 +4300,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
}

ret = f2fs_map_blocks(inode, &map, 1, flag);
- /* -ENOSPC is only a fatal error if no blocks could be allocated. */
- if (ret < 0 && !(ret == -ENOSPC && map.m_len > 0))
+ /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
+ if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
return ret;
if (ret == 0)
set_inode_flag(inode, FI_PREALLOCATED_ALL);
@@ -4359,20 +4367,21 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* Possibly preallocate the blocks for the write. */
target_size = iocb->ki_pos + iov_iter_count(from);
preallocated = f2fs_preallocate_blocks(iocb, from);
- if (preallocated < 0) {
+ if (preallocated < 0)
ret = preallocated;
- goto out_unlock;
- }
-
- ret = __generic_file_write_iter(iocb, from);
+ else
+ ret = __generic_file_write_iter(iocb, from);

/* Don't leave any preallocated blocks around past i_size. */
- if (preallocated > 0 && i_size_read(inode) < target_size) {
+ if (preallocated && i_size_read(inode) < target_size) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
- f2fs_truncate(inode);
+ if (!f2fs_truncate(inode))
+ file_dont_truncate(inode);
filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ } else {
+ file_dont_truncate(inode);
}

clear_inode_flag(inode, FI_PREALLOCATED_ALL);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0f8b2df3e1e0..6998eb1d6bdb 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -544,6 +544,14 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
goto bad_inode;
}
f2fs_set_inode_flags(inode);
+
+ if (file_should_truncate(inode)) {
+ ret = f2fs_truncate(inode);
+ if (ret)
+ goto bad_inode;
+ file_dont_truncate(inode);
+ }
+
unlock_new_inode(inode);
trace_f2fs_iget(inode);
return inode;
--
2.34.1.400.ga245620fadb-goog

2022-03-10 13:34:14

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 2/2] f2fs: use spin_lock to avoid hang

[14696.634553] task:cat state:D stack: 0 pid:1613738 ppid:1613735 flags:0x00000004
[14696.638285] Call Trace:
[14696.639038] <TASK>
[14696.640032] __schedule+0x302/0x930
[14696.640969] schedule+0x58/0xd0
[14696.641799] schedule_preempt_disabled+0x18/0x30
[14696.642890] __mutex_lock.constprop.0+0x2fb/0x4f0
[14696.644035] ? mod_objcg_state+0x10c/0x310
[14696.645040] ? obj_cgroup_charge+0xe1/0x170
[14696.646067] __mutex_lock_slowpath+0x13/0x20
[14696.647126] mutex_lock+0x34/0x40
[14696.648070] stat_show+0x25/0x17c0 [f2fs]
[14696.649218] seq_read_iter+0x120/0x4b0
[14696.650289] ? aa_file_perm+0x12a/0x500
[14696.651357] ? lru_cache_add+0x1c/0x20
[14696.652470] seq_read+0xfd/0x140
[14696.653445] full_proxy_read+0x5c/0x80
[14696.654535] vfs_read+0xa0/0x1a0
[14696.655497] ksys_read+0x67/0xe0
[14696.656502] __x64_sys_read+0x1a/0x20
[14696.657580] do_syscall_64+0x3b/0xc0
[14696.658671] entry_SYSCALL_64_after_hwframe+0x44/0xae
[14696.660068] RIP: 0033:0x7efe39df1cb2
[14696.661133] RSP: 002b:00007ffc8badd948 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[14696.662958] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007efe39df1cb2
[14696.664757] RDX: 0000000000020000 RSI: 00007efe399df000 RDI: 0000000000000003
[14696.666542] RBP: 00007efe399df000 R08: 00007efe399de010 R09: 00007efe399de010
[14696.668363] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000000000
[14696.670155] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
[14696.671965] </TASK>
[14696.672826] task:umount state:D stack: 0 pid:1614985 ppid:1614984 flags:0x00004000
[14696.674930] Call Trace:
[14696.675903] <TASK>
[14696.676780] __schedule+0x302/0x930
[14696.677927] schedule+0x58/0xd0
[14696.679019] schedule_preempt_disabled+0x18/0x30
[14696.680412] __mutex_lock.constprop.0+0x2fb/0x4f0
[14696.681783] ? destroy_inode+0x65/0x80
[14696.683006] __mutex_lock_slowpath+0x13/0x20
[14696.684305] mutex_lock+0x34/0x40
[14696.685442] f2fs_destroy_stats+0x1e/0x60 [f2fs]
[14696.686803] f2fs_put_super+0x158/0x390 [f2fs]
[14696.688238] generic_shutdown_super+0x7a/0x120
[14696.689621] kill_block_super+0x27/0x50
[14696.690894] kill_f2fs_super+0x7f/0x100 [f2fs]
[14696.692311] deactivate_locked_super+0x35/0xa0
[14696.693698] deactivate_super+0x40/0x50
[14696.694985] cleanup_mnt+0x139/0x190
[14696.696209] __cleanup_mnt+0x12/0x20
[14696.697390] task_work_run+0x64/0xa0
[14696.698587] exit_to_user_mode_prepare+0x1b7/0x1c0
[14696.700053] syscall_exit_to_user_mode+0x27/0x50
[14696.701418] do_syscall_64+0x48/0xc0
[14696.702630] entry_SYSCALL_64_after_hwframe+0x44/0xae

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/debug.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index cba5eab24595..6d26872c7364 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -21,7 +21,7 @@
#include "gc.h"

static LIST_HEAD(f2fs_stat_list);
-static DEFINE_MUTEX(f2fs_stat_mutex);
+static DEFINE_RAW_SPINLOCK(f2fs_stat_lock);
#ifdef CONFIG_DEBUG_FS
static struct dentry *f2fs_debugfs_root;
#endif
@@ -345,8 +345,9 @@ static int stat_show(struct seq_file *s, void *v)
{
struct f2fs_stat_info *si;
int i = 0, j = 0;
+ unsigned long flags;

- mutex_lock(&f2fs_stat_mutex);
+ raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
list_for_each_entry(si, &f2fs_stat_list, stat_list) {
update_general_status(si->sbi);

@@ -577,7 +578,7 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - paged : %llu KB\n",
si->page_mem >> 10);
}
- mutex_unlock(&f2fs_stat_mutex);
+ raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
return 0;
}

@@ -588,6 +589,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_stat_info *si;
+ unsigned long flags;
int i;

si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
@@ -623,9 +625,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->max_aw_cnt, 0);
atomic_set(&sbi->max_vw_cnt, 0);

- mutex_lock(&f2fs_stat_mutex);
+ raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
list_add_tail(&si->stat_list, &f2fs_stat_list);
- mutex_unlock(&f2fs_stat_mutex);
+ raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);

return 0;
}
@@ -633,10 +635,11 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
+ unsigned long flags;

- mutex_lock(&f2fs_stat_mutex);
+ raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
list_del(&si->stat_list);
- mutex_unlock(&f2fs_stat_mutex);
+ raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);

kfree(si);
}
--
2.35.1.616.g0bdcbb4464-goog

2022-03-10 14:36:43

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/2] f2fs: evict inode cache for frozen fs

On 2022/3/10 5:48, Jaegeuk Kim wrote:
> Let's purge inode cache in order to avoid the below deadlock.
>
> [freeze test] shrinkder
> freeze_super
> - pwercpu_down_write(SB_FREEZE_FS)
> - super_cache_scan
> - down_read(&sb->s_umount)
> - prune_icache_sb
> - dispose_list
> - evict
> - f2fs_evict_inode
> thaw_super
> - down_write(&sb->s_umount);
> - __percpu_down_read(SB_FREEZE_FS)

Ah, finally we catch this. :)

>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/debug.c | 1 +
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/inode.c | 6 ++++--
> fs/f2fs/super.c | 4 ++++
> 4 files changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 9a13902c7702..cba5eab24595 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -338,6 +338,7 @@ static char *s_flag[] = {
> [SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush",
> [SBI_QUOTA_NEED_REPAIR] = " quota_need_repair",
> [SBI_IS_RESIZEFS] = " resizefs",
> + [SBI_IS_FREEZING] = " freezefs",

Could you please update description of "sb_status" entry in
Documentation/ABI/testing/sysfs-fs-f2fs as well?

Thanks,

> };
>
> static int stat_show(struct seq_file *s, void *v)
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 68d791ec8b27..da729f53daa8 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1293,6 +1293,7 @@ enum {
> SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
> SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
> SBI_IS_RESIZEFS, /* resizefs is in process */
> + SBI_IS_FREEZING, /* freezefs is in process */
> };
>
> enum {
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index ab8e0c06c78c..71f232dcf3c2 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -778,7 +778,8 @@ void f2fs_evict_inode(struct inode *inode)
> f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
> f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
>
> - sb_start_intwrite(inode->i_sb);
> + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
> + sb_start_intwrite(inode->i_sb);
> set_inode_flag(inode, FI_NO_ALLOC);
> i_size_write(inode, 0);
> retry:
> @@ -809,7 +810,8 @@ void f2fs_evict_inode(struct inode *inode)
> if (dquot_initialize_needed(inode))
> set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> }
> - sb_end_intwrite(inode->i_sb);
> + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
> + sb_end_intwrite(inode->i_sb);
> no_delete:
> dquot_drop(inode);
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 8e3840973077..4b570b5c2674 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1663,11 +1663,15 @@ static int f2fs_freeze(struct super_block *sb)
> /* ensure no checkpoint required */
> if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list))
> return -EINVAL;
> +
> + /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */
> + set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
> return 0;
> }
>
> static int f2fs_unfreeze(struct super_block *sb)
> {
> + clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
> return 0;
> }
>

2022-03-11 07:02:32

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/2] f2fs: use spin_lock to avoid hang

On 2022/3/10 5:48, Jaegeuk Kim wrote:
> [14696.634553] task:cat state:D stack: 0 pid:1613738 ppid:1613735 flags:0x00000004
> [14696.638285] Call Trace:
> [14696.639038] <TASK>
> [14696.640032] __schedule+0x302/0x930
> [14696.640969] schedule+0x58/0xd0
> [14696.641799] schedule_preempt_disabled+0x18/0x30
> [14696.642890] __mutex_lock.constprop.0+0x2fb/0x4f0
> [14696.644035] ? mod_objcg_state+0x10c/0x310
> [14696.645040] ? obj_cgroup_charge+0xe1/0x170
> [14696.646067] __mutex_lock_slowpath+0x13/0x20
> [14696.647126] mutex_lock+0x34/0x40
> [14696.648070] stat_show+0x25/0x17c0 [f2fs]
> [14696.649218] seq_read_iter+0x120/0x4b0
> [14696.650289] ? aa_file_perm+0x12a/0x500
> [14696.651357] ? lru_cache_add+0x1c/0x20
> [14696.652470] seq_read+0xfd/0x140
> [14696.653445] full_proxy_read+0x5c/0x80
> [14696.654535] vfs_read+0xa0/0x1a0
> [14696.655497] ksys_read+0x67/0xe0
> [14696.656502] __x64_sys_read+0x1a/0x20
> [14696.657580] do_syscall_64+0x3b/0xc0
> [14696.658671] entry_SYSCALL_64_after_hwframe+0x44/0xae
> [14696.660068] RIP: 0033:0x7efe39df1cb2
> [14696.661133] RSP: 002b:00007ffc8badd948 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
> [14696.662958] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007efe39df1cb2
> [14696.664757] RDX: 0000000000020000 RSI: 00007efe399df000 RDI: 0000000000000003
> [14696.666542] RBP: 00007efe399df000 R08: 00007efe399de010 R09: 00007efe399de010
> [14696.668363] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000000000
> [14696.670155] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
> [14696.671965] </TASK>
> [14696.672826] task:umount state:D stack: 0 pid:1614985 ppid:1614984 flags:0x00004000
> [14696.674930] Call Trace:
> [14696.675903] <TASK>
> [14696.676780] __schedule+0x302/0x930
> [14696.677927] schedule+0x58/0xd0
> [14696.679019] schedule_preempt_disabled+0x18/0x30
> [14696.680412] __mutex_lock.constprop.0+0x2fb/0x4f0
> [14696.681783] ? destroy_inode+0x65/0x80
> [14696.683006] __mutex_lock_slowpath+0x13/0x20
> [14696.684305] mutex_lock+0x34/0x40
> [14696.685442] f2fs_destroy_stats+0x1e/0x60 [f2fs]
> [14696.686803] f2fs_put_super+0x158/0x390 [f2fs]
> [14696.688238] generic_shutdown_super+0x7a/0x120
> [14696.689621] kill_block_super+0x27/0x50
> [14696.690894] kill_f2fs_super+0x7f/0x100 [f2fs]
> [14696.692311] deactivate_locked_super+0x35/0xa0
> [14696.693698] deactivate_super+0x40/0x50
> [14696.694985] cleanup_mnt+0x139/0x190
> [14696.696209] __cleanup_mnt+0x12/0x20
> [14696.697390] task_work_run+0x64/0xa0
> [14696.698587] exit_to_user_mode_prepare+0x1b7/0x1c0
> [14696.700053] syscall_exit_to_user_mode+0x27/0x50
> [14696.701418] do_syscall_64+0x48/0xc0
> [14696.702630] entry_SYSCALL_64_after_hwframe+0x44/0xae

Any race case here? I didn't catch the root cause here...
Thanks,

2022-03-11 22:15:19

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/2] f2fs: evict inode cache for frozen fs

Jaegeuk,

Could you please send v2 patch to mailing list? as I saw the revised
one has been merged in dev branch.

Otherwise, I've no idea where I should reply "Reviewed-by" tag to...

Thanks,

On 2022/3/10 9:53, Chao Yu wrote:
> On 2022/3/10 5:48, Jaegeuk Kim wrote:
>> Let's purge inode cache in order to avoid the below deadlock.
>>
>> [freeze test]                         shrinkder
>> freeze_super
>>   - pwercpu_down_write(SB_FREEZE_FS)
>>                                         - super_cache_scan
>>                                           - down_read(&sb->s_umount)
>>                                             - prune_icache_sb
>>                                              - dispose_list
>>                                               - evict
>>                                                - f2fs_evict_inode
>> thaw_super
>>   - down_write(&sb->s_umount);
>>                                                - __percpu_down_read(SB_FREEZE_FS)
>
> Ah, finally we catch this. :)
>
>>
>> Signed-off-by: Jaegeuk Kim <[email protected]>
>> ---
>>   fs/f2fs/debug.c | 1 +
>>   fs/f2fs/f2fs.h  | 1 +
>>   fs/f2fs/inode.c | 6 ++++--
>>   fs/f2fs/super.c | 4 ++++
>>   4 files changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
>> index 9a13902c7702..cba5eab24595 100644
>> --- a/fs/f2fs/debug.c
>> +++ b/fs/f2fs/debug.c
>> @@ -338,6 +338,7 @@ static char *s_flag[] = {
>>       [SBI_QUOTA_SKIP_FLUSH]    = " quota_skip_flush",
>>       [SBI_QUOTA_NEED_REPAIR]    = " quota_need_repair",
>>       [SBI_IS_RESIZEFS]    = " resizefs",
>> +    [SBI_IS_FREEZING]    = " freezefs",
>
> Could you please update description of "sb_status" entry in
> Documentation/ABI/testing/sysfs-fs-f2fs as well?
>
> Thanks,
>
>>   };
>>   static int stat_show(struct seq_file *s, void *v)
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 68d791ec8b27..da729f53daa8 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -1293,6 +1293,7 @@ enum {
>>       SBI_QUOTA_SKIP_FLUSH,            /* skip flushing quota in current CP */
>>       SBI_QUOTA_NEED_REPAIR,            /* quota file may be corrupted */
>>       SBI_IS_RESIZEFS,            /* resizefs is in process */
>> +    SBI_IS_FREEZING,            /* freezefs is in process */
>>   };
>>   enum {
>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>> index ab8e0c06c78c..71f232dcf3c2 100644
>> --- a/fs/f2fs/inode.c
>> +++ b/fs/f2fs/inode.c
>> @@ -778,7 +778,8 @@ void f2fs_evict_inode(struct inode *inode)
>>       f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
>>       f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
>> -    sb_start_intwrite(inode->i_sb);
>> +    if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
>> +        sb_start_intwrite(inode->i_sb);
>>       set_inode_flag(inode, FI_NO_ALLOC);
>>       i_size_write(inode, 0);
>>   retry:
>> @@ -809,7 +810,8 @@ void f2fs_evict_inode(struct inode *inode)
>>           if (dquot_initialize_needed(inode))
>>               set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
>>       }
>> -    sb_end_intwrite(inode->i_sb);
>> +    if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
>> +        sb_end_intwrite(inode->i_sb);
>>   no_delete:
>>       dquot_drop(inode);
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index 8e3840973077..4b570b5c2674 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -1663,11 +1663,15 @@ static int f2fs_freeze(struct super_block *sb)
>>       /* ensure no checkpoint required */
>>       if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list))
>>           return -EINVAL;
>> +
>> +    /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */
>> +    set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
>>       return 0;
>>   }
>>   static int f2fs_unfreeze(struct super_block *sb)
>>   {
>> +    clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
>>       return 0;
>>   }
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2022-03-11 22:18:39

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 1/2 v2] f2fs: don't get FREEZE lock in f2fs_evict_inode in frozen fs

Let's purge inode cache in order to avoid the below deadlock.

[freeze test] shrinkder
freeze_super
- pwercpu_down_write(SB_FREEZE_FS)
- super_cache_scan
- down_read(&sb->s_umount)
- prune_icache_sb
- dispose_list
- evict
- f2fs_evict_inode
thaw_super
- down_write(&sb->s_umount);
- __percpu_down_read(SB_FREEZE_FS)

Signed-off-by: Jaegeuk Kim <[email protected]>
---

Change log from v1:
- add doc

Documentation/ABI/testing/sysfs-fs-f2fs | 1 +
fs/f2fs/debug.c | 1 +
fs/f2fs/f2fs.h | 1 +
fs/f2fs/inode.c | 6 ++++--
fs/f2fs/super.c | 4 ++++
5 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 58bf0dc83712..5a5f3c5445f6 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -458,6 +458,7 @@ Description: Show status of f2fs superblock in real time.
0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP
0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted
0x2000 SBI_IS_RESIZEFS resizefs is in process
+ 0x4000 SBI_IS_FREEZING freefs is in process
====== ===================== =================================

What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 9a13902c7702..cba5eab24595 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -338,6 +338,7 @@ static char *s_flag[] = {
[SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush",
[SBI_QUOTA_NEED_REPAIR] = " quota_need_repair",
[SBI_IS_RESIZEFS] = " resizefs",
+ [SBI_IS_FREEZING] = " freezefs",
};

static int stat_show(struct seq_file *s, void *v)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 68d791ec8b27..da729f53daa8 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1293,6 +1293,7 @@ enum {
SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
SBI_IS_RESIZEFS, /* resizefs is in process */
+ SBI_IS_FREEZING, /* freezefs is in process */
};

enum {
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index ab8e0c06c78c..71f232dcf3c2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -778,7 +778,8 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);

- sb_start_intwrite(inode->i_sb);
+ if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
+ sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
i_size_write(inode, 0);
retry:
@@ -809,7 +810,8 @@ void f2fs_evict_inode(struct inode *inode)
if (dquot_initialize_needed(inode))
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
}
- sb_end_intwrite(inode->i_sb);
+ if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
+ sb_end_intwrite(inode->i_sb);
no_delete:
dquot_drop(inode);

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8e3840973077..4b570b5c2674 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1663,11 +1663,15 @@ static int f2fs_freeze(struct super_block *sb)
/* ensure no checkpoint required */
if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list))
return -EINVAL;
+
+ /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */
+ set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
return 0;
}

static int f2fs_unfreeze(struct super_block *sb)
{
+ clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
return 0;
}

--
2.35.1.723.g4982287a31-goog

2022-03-11 22:24:51

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/2] f2fs: evict inode cache for frozen fs

On 03/11, Chao Yu wrote:
> Jaegeuk,
>
> Could you please send v2 patch to mailing list? as I saw the revised
> one has been merged in dev branch.

Oops, it seems I sent a wrong patch as v2. I sent it again.

>
> Otherwise, I've no idea where I should reply "Reviewed-by" tag to...
>
> Thanks,
>
> On 2022/3/10 9:53, Chao Yu wrote:
> > On 2022/3/10 5:48, Jaegeuk Kim wrote:
> > > Let's purge inode cache in order to avoid the below deadlock.
> > >
> > > [freeze test]???????????????????????? shrinkder
> > > freeze_super
> > > ? - pwercpu_down_write(SB_FREEZE_FS)
> > > ??????????????????????????????????????? - super_cache_scan
> > > ????????????????????????????????????????? - down_read(&sb->s_umount)
> > > ??????????????????????????????????????????? - prune_icache_sb
> > > ???????????????????????????????????????????? - dispose_list
> > > ????????????????????????????????????????????? - evict
> > > ?????????????????????????????????????????????? - f2fs_evict_inode
> > > thaw_super
> > > ? - down_write(&sb->s_umount);
> > > ?????????????????????????????????????????????? - __percpu_down_read(SB_FREEZE_FS)
> >
> > Ah, finally we catch this. :)
> >
> > >
> > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > > ---
> > > ? fs/f2fs/debug.c | 1 +
> > > ? fs/f2fs/f2fs.h? | 1 +
> > > ? fs/f2fs/inode.c | 6 ++++--
> > > ? fs/f2fs/super.c | 4 ++++
> > > ? 4 files changed, 10 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> > > index 9a13902c7702..cba5eab24595 100644
> > > --- a/fs/f2fs/debug.c
> > > +++ b/fs/f2fs/debug.c
> > > @@ -338,6 +338,7 @@ static char *s_flag[] = {
> > > ????? [SBI_QUOTA_SKIP_FLUSH]??? = " quota_skip_flush",
> > > ????? [SBI_QUOTA_NEED_REPAIR]??? = " quota_need_repair",
> > > ????? [SBI_IS_RESIZEFS]??? = " resizefs",
> > > +??? [SBI_IS_FREEZING]??? = " freezefs",
> >
> > Could you please update description of "sb_status" entry in
> > Documentation/ABI/testing/sysfs-fs-f2fs as well?
> >
> > Thanks,
> >
> > > ? };
> > > ? static int stat_show(struct seq_file *s, void *v)
> > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > index 68d791ec8b27..da729f53daa8 100644
> > > --- a/fs/f2fs/f2fs.h
> > > +++ b/fs/f2fs/f2fs.h
> > > @@ -1293,6 +1293,7 @@ enum {
> > > ????? SBI_QUOTA_SKIP_FLUSH,??????????? /* skip flushing quota in current CP */
> > > ????? SBI_QUOTA_NEED_REPAIR,??????????? /* quota file may be corrupted */
> > > ????? SBI_IS_RESIZEFS,??????????? /* resizefs is in process */
> > > +??? SBI_IS_FREEZING,??????????? /* freezefs is in process */
> > > ? };
> > > ? enum {
> > > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> > > index ab8e0c06c78c..71f232dcf3c2 100644
> > > --- a/fs/f2fs/inode.c
> > > +++ b/fs/f2fs/inode.c
> > > @@ -778,7 +778,8 @@ void f2fs_evict_inode(struct inode *inode)
> > > ????? f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
> > > ????? f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
> > > -??? sb_start_intwrite(inode->i_sb);
> > > +??? if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
> > > +??????? sb_start_intwrite(inode->i_sb);
> > > ????? set_inode_flag(inode, FI_NO_ALLOC);
> > > ????? i_size_write(inode, 0);
> > > ? retry:
> > > @@ -809,7 +810,8 @@ void f2fs_evict_inode(struct inode *inode)
> > > ????????? if (dquot_initialize_needed(inode))
> > > ????????????? set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
> > > ????? }
> > > -??? sb_end_intwrite(inode->i_sb);
> > > +??? if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
> > > +??????? sb_end_intwrite(inode->i_sb);
> > > ? no_delete:
> > > ????? dquot_drop(inode);
> > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > index 8e3840973077..4b570b5c2674 100644
> > > --- a/fs/f2fs/super.c
> > > +++ b/fs/f2fs/super.c
> > > @@ -1663,11 +1663,15 @@ static int f2fs_freeze(struct super_block *sb)
> > > ????? /* ensure no checkpoint required */
> > > ????? if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list))
> > > ????????? return -EINVAL;
> > > +
> > > +??? /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */
> > > +??? set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
> > > ????? return 0;
> > > ? }
> > > ? static int f2fs_unfreeze(struct super_block *sb)
> > > ? {
> > > +??? clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING);
> > > ????? return 0;
> > > ? }
> >
> >
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2022-03-17 06:05:03

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/2] f2fs: use spin_lock to avoid hang

On 2022/3/10 5:48, Jaegeuk Kim wrote:
> [14696.634553] task:cat state:D stack: 0 pid:1613738 ppid:1613735 flags:0x00000004
> [14696.638285] Call Trace:
> [14696.639038] <TASK>
> [14696.640032] __schedule+0x302/0x930
> [14696.640969] schedule+0x58/0xd0
> [14696.641799] schedule_preempt_disabled+0x18/0x30
> [14696.642890] __mutex_lock.constprop.0+0x2fb/0x4f0
> [14696.644035] ? mod_objcg_state+0x10c/0x310
> [14696.645040] ? obj_cgroup_charge+0xe1/0x170
> [14696.646067] __mutex_lock_slowpath+0x13/0x20
> [14696.647126] mutex_lock+0x34/0x40
> [14696.648070] stat_show+0x25/0x17c0 [f2fs]
> [14696.649218] seq_read_iter+0x120/0x4b0
> [14696.650289] ? aa_file_perm+0x12a/0x500
> [14696.651357] ? lru_cache_add+0x1c/0x20
> [14696.652470] seq_read+0xfd/0x140
> [14696.653445] full_proxy_read+0x5c/0x80
> [14696.654535] vfs_read+0xa0/0x1a0
> [14696.655497] ksys_read+0x67/0xe0
> [14696.656502] __x64_sys_read+0x1a/0x20
> [14696.657580] do_syscall_64+0x3b/0xc0
> [14696.658671] entry_SYSCALL_64_after_hwframe+0x44/0xae
> [14696.660068] RIP: 0033:0x7efe39df1cb2
> [14696.661133] RSP: 002b:00007ffc8badd948 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
> [14696.662958] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007efe39df1cb2
> [14696.664757] RDX: 0000000000020000 RSI: 00007efe399df000 RDI: 0000000000000003
> [14696.666542] RBP: 00007efe399df000 R08: 00007efe399de010 R09: 00007efe399de010
> [14696.668363] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000000000
> [14696.670155] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
> [14696.671965] </TASK>
> [14696.672826] task:umount state:D stack: 0 pid:1614985 ppid:1614984 flags:0x00004000
> [14696.674930] Call Trace:
> [14696.675903] <TASK>
> [14696.676780] __schedule+0x302/0x930
> [14696.677927] schedule+0x58/0xd0
> [14696.679019] schedule_preempt_disabled+0x18/0x30
> [14696.680412] __mutex_lock.constprop.0+0x2fb/0x4f0
> [14696.681783] ? destroy_inode+0x65/0x80
> [14696.683006] __mutex_lock_slowpath+0x13/0x20
> [14696.684305] mutex_lock+0x34/0x40
> [14696.685442] f2fs_destroy_stats+0x1e/0x60 [f2fs]
> [14696.686803] f2fs_put_super+0x158/0x390 [f2fs]
> [14696.688238] generic_shutdown_super+0x7a/0x120
> [14696.689621] kill_block_super+0x27/0x50
> [14696.690894] kill_f2fs_super+0x7f/0x100 [f2fs]
> [14696.692311] deactivate_locked_super+0x35/0xa0
> [14696.693698] deactivate_super+0x40/0x50
> [14696.694985] cleanup_mnt+0x139/0x190
> [14696.696209] __cleanup_mnt+0x12/0x20
> [14696.697390] task_work_run+0x64/0xa0
> [14696.698587] exit_to_user_mode_prepare+0x1b7/0x1c0
> [14696.700053] syscall_exit_to_user_mode+0x27/0x50
> [14696.701418] do_syscall_64+0x48/0xc0
> [14696.702630] entry_SYSCALL_64_after_hwframe+0x44/0xae
>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,