2018-09-28 02:36:33

by Sheng Yong

[permalink] [raw]
Subject: [PATCH] f2fs: cleanup dirty pages if recover failed

During recover, we will try to create new dentries for inodes with
dentry_mark. But if the parent is missing (e.g. killed by fsck),
recover will break. But those recovered dirty pages are not cleanup.
This will hit f2fs_bug_on:

[ 53.519566] F2FS-fs (loop0): Found nat_bits in checkpoint
[ 53.539354] F2FS-fs (loop0): recover_inode: ino = 5, name = file, inline = 3
[ 53.539402] F2FS-fs (loop0): recover_dentry: ino = 5, name = file, dir = 0, err = -2
[ 53.545760] F2FS-fs (loop0): Cannot recover all fsync data errno=-2
[ 53.546105] F2FS-fs (loop0): access invalid blkaddr:4294967295
[ 53.546171] WARNING: CPU: 1 PID: 1798 at fs/f2fs/checkpoint.c:163 f2fs_is_valid_blkaddr+0x26c/0x320
[ 53.546174] Modules linked in:
[ 53.546183] CPU: 1 PID: 1798 Comm: mount Not tainted 4.19.0-rc2+ #1
[ 53.546186] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[ 53.546191] RIP: 0010:f2fs_is_valid_blkaddr+0x26c/0x320
[ 53.546195] Code: 85 bb 00 00 00 48 89 df 88 44 24 07 e8 ad a8 db ff 48 8b 3b 44 89 e1 48 c7 c2 40 03 72 a9 48 c7 c6 e0 01 72 a9 e8 84 3c ff ff <0f> 0b 0f b6 44 24 07 e9 8a 00 00 00 48 8d bf 38 01 00 00 e8 7c a8
[ 53.546201] RSP: 0018:ffff88006c067768 EFLAGS: 00010282
[ 53.546208] RAX: 0000000000000000 RBX: ffff880068844200 RCX: ffffffffa83e1a33
[ 53.546211] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88006d51e590
[ 53.546215] RBP: 0000000000000005 R08: ffffed000daa3cb3 R09: ffffed000daa3cb3
[ 53.546218] R10: 0000000000000001 R11: ffffed000daa3cb2 R12: 00000000ffffffff
[ 53.546221] R13: ffff88006a1f8000 R14: 0000000000000200 R15: 0000000000000009
[ 53.546226] FS: 00007fb2f3646840(0000) GS:ffff88006d500000(0000) knlGS:0000000000000000
[ 53.546229] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 53.546234] CR2: 00007f0fd77f0008 CR3: 00000000687e6002 CR4: 00000000000206e0
[ 53.546237] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 53.546240] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 53.546242] Call Trace:
[ 53.546248] f2fs_submit_page_bio+0x95/0x740
[ 53.546253] read_node_page+0x161/0x1e0
[ 53.546271] ? truncate_node+0x650/0x650
[ 53.546283] ? add_to_page_cache_lru+0x12c/0x170
[ 53.546288] ? pagecache_get_page+0x262/0x2d0
[ 53.546292] __get_node_page+0x200/0x660
[ 53.546302] f2fs_update_inode_page+0x4a/0x160
[ 53.546306] f2fs_write_inode+0x86/0xb0
[ 53.546317] __writeback_single_inode+0x49c/0x620
[ 53.546322] writeback_single_inode+0xe4/0x1e0
[ 53.546326] sync_inode_metadata+0x93/0xd0
[ 53.546330] ? sync_inode+0x10/0x10
[ 53.546342] ? do_raw_spin_unlock+0xed/0x100
[ 53.546347] f2fs_sync_inode_meta+0xe0/0x130
[ 53.546351] f2fs_fill_super+0x287d/0x2d10
[ 53.546367] ? vsnprintf+0x742/0x7a0
[ 53.546372] ? f2fs_commit_super+0x180/0x180
[ 53.546379] ? up_write+0x20/0x40
[ 53.546385] ? set_blocksize+0x5f/0x140
[ 53.546391] ? f2fs_commit_super+0x180/0x180
[ 53.546402] mount_bdev+0x181/0x200
[ 53.546406] mount_fs+0x94/0x180
[ 53.546411] vfs_kern_mount+0x6c/0x1e0
[ 53.546415] do_mount+0xe5e/0x1510
[ 53.546420] ? fs_reclaim_release+0x9/0x30
[ 53.546424] ? copy_mount_string+0x20/0x20
[ 53.546428] ? fs_reclaim_acquire+0xd/0x30
[ 53.546435] ? __might_sleep+0x2c/0xc0
[ 53.546440] ? ___might_sleep+0x53/0x170
[ 53.546453] ? __might_fault+0x4c/0x60
[ 53.546468] ? _copy_from_user+0x95/0xa0
[ 53.546474] ? memdup_user+0x39/0x60
[ 53.546478] ksys_mount+0x88/0xb0
[ 53.546482] __x64_sys_mount+0x5d/0x70
[ 53.546495] do_syscall_64+0x65/0x130
[ 53.546503] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 53.547639] ---[ end trace b804d1ea2fec893e ]---

So if recover fails, we need to drop all recovered data.

Signed-off-by: Sheng Yong <[email protected]>
---
fs/f2fs/recovery.c | 19 ++++++++++++-------
fs/f2fs/super.c | 15 ++++++++++++++-
2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index fb24a6d734e9..064b91544a84 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -96,8 +96,12 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
return ERR_PTR(err);
}

-static void del_fsync_inode(struct fsync_inode_entry *entry)
+static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
{
+ if (drop) {
+ make_bad_inode(entry->inode);
+ f2fs_inode_synced(entry->inode);
+ }
iput(entry->inode);
list_del(&entry->list);
kmem_cache_free(fsync_entry_slab, entry);
@@ -337,12 +341,12 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
return err;
}

-static void destroy_fsync_dnodes(struct list_head *head)
+static void destroy_fsync_dnodes(struct list_head *head, int drop)
{
struct fsync_inode_entry *entry, *tmp;

list_for_each_entry_safe(entry, tmp, head, list)
- del_fsync_inode(entry);
+ del_fsync_inode(entry, drop);
}

static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
@@ -631,7 +635,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
}

if (entry->blkaddr == blkaddr)
- del_fsync_inode(entry);
+ del_fsync_inode(entry, 0);
next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
@@ -697,7 +701,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
skip:
- destroy_fsync_dnodes(&inode_list);
+ destroy_fsync_dnodes(&inode_list, err);

/* truncate meta pages to be used by the recovery */
truncate_inode_pages_range(META_MAPPING(sbi),
@@ -706,13 +710,14 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
if (err) {
truncate_inode_pages_final(NODE_MAPPING(sbi));
truncate_inode_pages_final(META_MAPPING(sbi));
+ } else {
+ clear_sbi_flag(sbi, SBI_POR_DOING);
}

- clear_sbi_flag(sbi, SBI_POR_DOING);
mutex_unlock(&sbi->cp_mutex);

/* let's drop all the directory inodes for clean checkpoint */
- destroy_fsync_dnodes(&dir_list);
+ destroy_fsync_dnodes(&dir_list, err);

if (need_writecp) {
set_sbi_flag(sbi, SBI_IS_RECOVERED);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c47b1ef2685a..9cc3c43a0b35 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1883,6 +1883,19 @@ void f2fs_quota_off_umount(struct super_block *sb)
}
}

+static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
+{
+ struct quota_info *dqopt = sb_dqopt(sb);
+ int type;
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!dqopt->files[type])
+ continue;
+ f2fs_inode_synced(dqopt->files[type]);
+ truncate_inode_pages_final(dqopt->files[type]->i_mapping);
+ }
+}
+
static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
{
*projid = F2FS_I(inode)->i_projid;
@@ -3135,10 +3148,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)

free_meta:
#ifdef CONFIG_QUOTA
+ f2fs_truncate_quota_inode_pages(sb);
if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb))
f2fs_quota_off_umount(sbi->sb);
#endif
- f2fs_sync_inode_meta(sbi);
/*
* Some dirty meta pages can be produced by f2fs_recover_orphan_inodes()
* failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
--
2.17.1



2018-09-29 10:58:19

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH] f2fs: cleanup dirty pages if recover failed

On 2018/9/28 10:34, Sheng Yong wrote:
> During recover, we will try to create new dentries for inodes with
> dentry_mark. But if the parent is missing (e.g. killed by fsck),
> recover will break. But those recovered dirty pages are not cleanup.
> This will hit f2fs_bug_on:
>
> [ 53.519566] F2FS-fs (loop0): Found nat_bits in checkpoint
> [ 53.539354] F2FS-fs (loop0): recover_inode: ino = 5, name = file, inline = 3
> [ 53.539402] F2FS-fs (loop0): recover_dentry: ino = 5, name = file, dir = 0, err = -2
> [ 53.545760] F2FS-fs (loop0): Cannot recover all fsync data errno=-2
> [ 53.546105] F2FS-fs (loop0): access invalid blkaddr:4294967295
> [ 53.546171] WARNING: CPU: 1 PID: 1798 at fs/f2fs/checkpoint.c:163 f2fs_is_valid_blkaddr+0x26c/0x320
> [ 53.546174] Modules linked in:
> [ 53.546183] CPU: 1 PID: 1798 Comm: mount Not tainted 4.19.0-rc2+ #1
> [ 53.546186] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
> [ 53.546191] RIP: 0010:f2fs_is_valid_blkaddr+0x26c/0x320
> [ 53.546195] Code: 85 bb 00 00 00 48 89 df 88 44 24 07 e8 ad a8 db ff 48 8b 3b 44 89 e1 48 c7 c2 40 03 72 a9 48 c7 c6 e0 01 72 a9 e8 84 3c ff ff <0f> 0b 0f b6 44 24 07 e9 8a 00 00 00 48 8d bf 38 01 00 00 e8 7c a8
> [ 53.546201] RSP: 0018:ffff88006c067768 EFLAGS: 00010282
> [ 53.546208] RAX: 0000000000000000 RBX: ffff880068844200 RCX: ffffffffa83e1a33
> [ 53.546211] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88006d51e590
> [ 53.546215] RBP: 0000000000000005 R08: ffffed000daa3cb3 R09: ffffed000daa3cb3
> [ 53.546218] R10: 0000000000000001 R11: ffffed000daa3cb2 R12: 00000000ffffffff
> [ 53.546221] R13: ffff88006a1f8000 R14: 0000000000000200 R15: 0000000000000009
> [ 53.546226] FS: 00007fb2f3646840(0000) GS:ffff88006d500000(0000) knlGS:0000000000000000
> [ 53.546229] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 53.546234] CR2: 00007f0fd77f0008 CR3: 00000000687e6002 CR4: 00000000000206e0
> [ 53.546237] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 53.546240] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 53.546242] Call Trace:
> [ 53.546248] f2fs_submit_page_bio+0x95/0x740
> [ 53.546253] read_node_page+0x161/0x1e0
> [ 53.546271] ? truncate_node+0x650/0x650
> [ 53.546283] ? add_to_page_cache_lru+0x12c/0x170
> [ 53.546288] ? pagecache_get_page+0x262/0x2d0
> [ 53.546292] __get_node_page+0x200/0x660
> [ 53.546302] f2fs_update_inode_page+0x4a/0x160
> [ 53.546306] f2fs_write_inode+0x86/0xb0
> [ 53.546317] __writeback_single_inode+0x49c/0x620
> [ 53.546322] writeback_single_inode+0xe4/0x1e0
> [ 53.546326] sync_inode_metadata+0x93/0xd0
> [ 53.546330] ? sync_inode+0x10/0x10
> [ 53.546342] ? do_raw_spin_unlock+0xed/0x100
> [ 53.546347] f2fs_sync_inode_meta+0xe0/0x130
> [ 53.546351] f2fs_fill_super+0x287d/0x2d10
> [ 53.546367] ? vsnprintf+0x742/0x7a0
> [ 53.546372] ? f2fs_commit_super+0x180/0x180
> [ 53.546379] ? up_write+0x20/0x40
> [ 53.546385] ? set_blocksize+0x5f/0x140
> [ 53.546391] ? f2fs_commit_super+0x180/0x180
> [ 53.546402] mount_bdev+0x181/0x200
> [ 53.546406] mount_fs+0x94/0x180
> [ 53.546411] vfs_kern_mount+0x6c/0x1e0
> [ 53.546415] do_mount+0xe5e/0x1510
> [ 53.546420] ? fs_reclaim_release+0x9/0x30
> [ 53.546424] ? copy_mount_string+0x20/0x20
> [ 53.546428] ? fs_reclaim_acquire+0xd/0x30
> [ 53.546435] ? __might_sleep+0x2c/0xc0
> [ 53.546440] ? ___might_sleep+0x53/0x170
> [ 53.546453] ? __might_fault+0x4c/0x60
> [ 53.546468] ? _copy_from_user+0x95/0xa0
> [ 53.546474] ? memdup_user+0x39/0x60
> [ 53.546478] ksys_mount+0x88/0xb0
> [ 53.546482] __x64_sys_mount+0x5d/0x70
> [ 53.546495] do_syscall_64+0x65/0x130
> [ 53.546503] entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [ 53.547639] ---[ end trace b804d1ea2fec893e ]---
>
> So if recover fails, we need to drop all recovered data.
>
> Signed-off-by: Sheng Yong <[email protected]>
> ---
> fs/f2fs/recovery.c | 19 ++++++++++++-------
> fs/f2fs/super.c | 15 ++++++++++++++-
> 2 files changed, 26 insertions(+), 8 deletions(-)
>
> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> index fb24a6d734e9..064b91544a84 100644
> --- a/fs/f2fs/recovery.c
> +++ b/fs/f2fs/recovery.c
> @@ -96,8 +96,12 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
> return ERR_PTR(err);
> }
>
> -static void del_fsync_inode(struct fsync_inode_entry *entry)
> +static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
> {
> + if (drop) {
> + make_bad_inode(entry->inode);
> + f2fs_inode_synced(entry->inode);
> + }
> iput(entry->inode);
> list_del(&entry->list);
> kmem_cache_free(fsync_entry_slab, entry);
> @@ -337,12 +341,12 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
> return err;
> }
>
> -static void destroy_fsync_dnodes(struct list_head *head)
> +static void destroy_fsync_dnodes(struct list_head *head, int drop)
> {
> struct fsync_inode_entry *entry, *tmp;
>
> list_for_each_entry_safe(entry, tmp, head, list)
> - del_fsync_inode(entry);
> + del_fsync_inode(entry, drop);
> }
>
> static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
> @@ -631,7 +635,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
> }
>
> if (entry->blkaddr == blkaddr)
> - del_fsync_inode(entry);
> + del_fsync_inode(entry, 0);
> next:
> /* check next segment */
> blkaddr = next_blkaddr_of_node(page);
> @@ -697,7 +701,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
> if (!err)
> f2fs_bug_on(sbi, !list_empty(&inode_list));
> skip:
> - destroy_fsync_dnodes(&inode_list);
> + destroy_fsync_dnodes(&inode_list, err);
>
> /* truncate meta pages to be used by the recovery */
> truncate_inode_pages_range(META_MAPPING(sbi),
> @@ -706,13 +710,14 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
> if (err) {
> truncate_inode_pages_final(NODE_MAPPING(sbi));
> truncate_inode_pages_final(META_MAPPING(sbi));

How about removing SB_ACTIVE before destroy_fsync_dnodes() to let iput
trash all dirty data in inode.

Once more thing is, do we need to track all orphan inode in a list, once we
fail in later flow, we can remove SB_ACTIVE to let iput evicting inode
immediately, instead there are still dirty inode tracked in
sbi->inode_list[DIRTY_META] list.

Thanks,

> + } else {
> + clear_sbi_flag(sbi, SBI_POR_DOING);
> }
>
> - clear_sbi_flag(sbi, SBI_POR_DOING);
> mutex_unlock(&sbi->cp_mutex);
>
> /* let's drop all the directory inodes for clean checkpoint */
> - destroy_fsync_dnodes(&dir_list);
> + destroy_fsync_dnodes(&dir_list, err);
>
> if (need_writecp) {
> set_sbi_flag(sbi, SBI_IS_RECOVERED);
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index c47b1ef2685a..9cc3c43a0b35 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1883,6 +1883,19 @@ void f2fs_quota_off_umount(struct super_block *sb)
> }
> }
>
> +static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
> +{
> + struct quota_info *dqopt = sb_dqopt(sb);
> + int type;
> +
> + for (type = 0; type < MAXQUOTAS; type++) {
> + if (!dqopt->files[type])
> + continue;
> + f2fs_inode_synced(dqopt->files[type]);
> + truncate_inode_pages_final(dqopt->files[type]->i_mapping);
> + }
> +}
> +
> static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
> {
> *projid = F2FS_I(inode)->i_projid;
> @@ -3135,10 +3148,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>
> free_meta:
> #ifdef CONFIG_QUOTA
> + f2fs_truncate_quota_inode_pages(sb);
> if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb))
> f2fs_quota_off_umount(sbi->sb);
> #endif
> - f2fs_sync_inode_meta(sbi);
> /*
> * Some dirty meta pages can be produced by f2fs_recover_orphan_inodes()
> * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
>


2018-10-12 10:51:52

by Sheng Yong

[permalink] [raw]
Subject: [PATCH v2] f2fs: cleanup dirty pages if recover failed

During recover, we will try to create new dentries for inodes with
dentry_mark. But if the parent is missing (e.g. killed by fsck),
recover will break. But those recovered dirty pages are not cleanup.
This will hit f2fs_bug_on:

[ 53.519566] F2FS-fs (loop0): Found nat_bits in checkpoint
[ 53.539354] F2FS-fs (loop0): recover_inode: ino = 5, name = file, inline = 3
[ 53.539402] F2FS-fs (loop0): recover_dentry: ino = 5, name = file, dir = 0, err = -2
[ 53.545760] F2FS-fs (loop0): Cannot recover all fsync data errno=-2
[ 53.546105] F2FS-fs (loop0): access invalid blkaddr:4294967295
[ 53.546171] WARNING: CPU: 1 PID: 1798 at fs/f2fs/checkpoint.c:163 f2fs_is_valid_blkaddr+0x26c/0x320
[ 53.546174] Modules linked in:
[ 53.546183] CPU: 1 PID: 1798 Comm: mount Not tainted 4.19.0-rc2+ #1
[ 53.546186] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[ 53.546191] RIP: 0010:f2fs_is_valid_blkaddr+0x26c/0x320
[ 53.546195] Code: 85 bb 00 00 00 48 89 df 88 44 24 07 e8 ad a8 db ff 48 8b 3b 44 89 e1 48 c7 c2 40 03 72 a9 48 c7 c6 e0 01 72 a9 e8 84 3c ff ff <0f> 0b 0f b6 44 24 07 e9 8a 00 00 00 48 8d bf 38 01 00 00 e8 7c a8
[ 53.546201] RSP: 0018:ffff88006c067768 EFLAGS: 00010282
[ 53.546208] RAX: 0000000000000000 RBX: ffff880068844200 RCX: ffffffffa83e1a33
[ 53.546211] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88006d51e590
[ 53.546215] RBP: 0000000000000005 R08: ffffed000daa3cb3 R09: ffffed000daa3cb3
[ 53.546218] R10: 0000000000000001 R11: ffffed000daa3cb2 R12: 00000000ffffffff
[ 53.546221] R13: ffff88006a1f8000 R14: 0000000000000200 R15: 0000000000000009
[ 53.546226] FS: 00007fb2f3646840(0000) GS:ffff88006d500000(0000) knlGS:0000000000000000
[ 53.546229] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 53.546234] CR2: 00007f0fd77f0008 CR3: 00000000687e6002 CR4: 00000000000206e0
[ 53.546237] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 53.546240] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 53.546242] Call Trace:
[ 53.546248] f2fs_submit_page_bio+0x95/0x740
[ 53.546253] read_node_page+0x161/0x1e0
[ 53.546271] ? truncate_node+0x650/0x650
[ 53.546283] ? add_to_page_cache_lru+0x12c/0x170
[ 53.546288] ? pagecache_get_page+0x262/0x2d0
[ 53.546292] __get_node_page+0x200/0x660
[ 53.546302] f2fs_update_inode_page+0x4a/0x160
[ 53.546306] f2fs_write_inode+0x86/0xb0
[ 53.546317] __writeback_single_inode+0x49c/0x620
[ 53.546322] writeback_single_inode+0xe4/0x1e0
[ 53.546326] sync_inode_metadata+0x93/0xd0
[ 53.546330] ? sync_inode+0x10/0x10
[ 53.546342] ? do_raw_spin_unlock+0xed/0x100
[ 53.546347] f2fs_sync_inode_meta+0xe0/0x130
[ 53.546351] f2fs_fill_super+0x287d/0x2d10
[ 53.546367] ? vsnprintf+0x742/0x7a0
[ 53.546372] ? f2fs_commit_super+0x180/0x180
[ 53.546379] ? up_write+0x20/0x40
[ 53.546385] ? set_blocksize+0x5f/0x140
[ 53.546391] ? f2fs_commit_super+0x180/0x180
[ 53.546402] mount_bdev+0x181/0x200
[ 53.546406] mount_fs+0x94/0x180
[ 53.546411] vfs_kern_mount+0x6c/0x1e0
[ 53.546415] do_mount+0xe5e/0x1510
[ 53.546420] ? fs_reclaim_release+0x9/0x30
[ 53.546424] ? copy_mount_string+0x20/0x20
[ 53.546428] ? fs_reclaim_acquire+0xd/0x30
[ 53.546435] ? __might_sleep+0x2c/0xc0
[ 53.546440] ? ___might_sleep+0x53/0x170
[ 53.546453] ? __might_fault+0x4c/0x60
[ 53.546468] ? _copy_from_user+0x95/0xa0
[ 53.546474] ? memdup_user+0x39/0x60
[ 53.546478] ksys_mount+0x88/0xb0
[ 53.546482] __x64_sys_mount+0x5d/0x70
[ 53.546495] do_syscall_64+0x65/0x130
[ 53.546503] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 53.547639] ---[ end trace b804d1ea2fec893e ]---

So if recover fails, we need to drop all recovered data.

Signed-off-by: Sheng Yong <[email protected]>
---
v2->v1:
* track all recovered inodes so that if recovery fail, we can find and
iput all those inodes
* clear SB_ACTIVE before destroy_fsync_dnoes to let iput trash all data
* do not truncate pages of quota inodes, evict will do that
* no need to track orphan inodes, they are evict immediatelly during
recovery (i_count = 0)
---
fs/f2fs/recovery.c | 32 +++++++++++++++++++++-----------
fs/f2fs/super.c | 15 ++++++++++++++-
2 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 93e3b6c46aae..0f36ea758160 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -96,8 +96,12 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
return ERR_PTR(err);
}

-static void del_fsync_inode(struct fsync_inode_entry *entry)
+static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
{
+ if (drop) {
+ /* inode should not be recovered, drop it */
+ f2fs_inode_synced(entry->inode);
+ }
iput(entry->inode);
list_del(&entry->list);
kmem_cache_free(fsync_entry_slab, entry);
@@ -337,12 +341,12 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
return err;
}

-static void destroy_fsync_dnodes(struct list_head *head)
+static void destroy_fsync_dnodes(struct list_head *head, int drop)
{
struct fsync_inode_entry *entry, *tmp;

list_for_each_entry_safe(entry, tmp, head, list)
- del_fsync_inode(entry);
+ del_fsync_inode(entry, drop);
}

static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
@@ -579,7 +583,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
}

static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
- struct list_head *dir_list)
+ struct list_head *tmp_inode_list, struct list_head *dir_list)
{
struct curseg_info *curseg;
struct page *page = NULL;
@@ -633,7 +637,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
}

if (entry->blkaddr == blkaddr)
- del_fsync_inode(entry);
+ list_move_tail(&entry->list, tmp_inode_list);
next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
@@ -646,7 +650,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,

int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
{
- struct list_head inode_list;
+ struct list_head inode_list, tmp_inode_list;
struct list_head dir_list;
int err;
int ret = 0;
@@ -677,6 +681,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
}

INIT_LIST_HEAD(&inode_list);
+ INIT_LIST_HEAD(&tmp_inode_list);
INIT_LIST_HEAD(&dir_list);

/* prevent checkpoint */
@@ -695,11 +700,16 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
need_writecp = true;

/* step #2: recover data */
- err = recover_data(sbi, &inode_list, &dir_list);
+ err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
+ else {
+ /* restore s_flags to let iput() trash data */
+ sbi->sb->s_flags = s_flags;
+ }
skip:
- destroy_fsync_dnodes(&inode_list);
+ destroy_fsync_dnodes(&inode_list, err);
+ destroy_fsync_dnodes(&tmp_inode_list, err);

/* truncate meta pages to be used by the recovery */
truncate_inode_pages_range(META_MAPPING(sbi),
@@ -708,13 +718,13 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
if (err) {
truncate_inode_pages_final(NODE_MAPPING(sbi));
truncate_inode_pages_final(META_MAPPING(sbi));
+ } else {
+ clear_sbi_flag(sbi, SBI_POR_DOING);
}
-
- clear_sbi_flag(sbi, SBI_POR_DOING);
mutex_unlock(&sbi->cp_mutex);

/* let's drop all the directory inodes for clean checkpoint */
- destroy_fsync_dnodes(&dir_list);
+ destroy_fsync_dnodes(&dir_list, err);

if (need_writecp) {
set_sbi_flag(sbi, SBI_IS_RECOVERED);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 79060c96da5b..f95df93096d5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1992,6 +1992,19 @@ void f2fs_quota_off_umount(struct super_block *sb)
}
}

+static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
+{
+ struct quota_info *dqopt = sb_dqopt(sb);
+ int type;
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!dqopt->files[type])
+ continue;
+ f2fs_inode_synced(dqopt->files[type]);
+ }
+}
+
+
static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
{
*projid = F2FS_I(inode)->i_projid;
@@ -3282,10 +3295,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)

free_meta:
#ifdef CONFIG_QUOTA
+ f2fs_truncate_quota_inode_pages(sb);
if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb))
f2fs_quota_off_umount(sbi->sb);
#endif
- f2fs_sync_inode_meta(sbi);
/*
* Some dirty meta pages can be produced by f2fs_recover_orphan_inodes()
* failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
--
2.17.1


2018-10-16 12:39:12

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v2] f2fs: cleanup dirty pages if recover failed

On 2018/10/12 18:49, Sheng Yong wrote:
> During recover, we will try to create new dentries for inodes with
> dentry_mark. But if the parent is missing (e.g. killed by fsck),
> recover will break. But those recovered dirty pages are not cleanup.
> This will hit f2fs_bug_on:
>
> [ 53.519566] F2FS-fs (loop0): Found nat_bits in checkpoint
> [ 53.539354] F2FS-fs (loop0): recover_inode: ino = 5, name = file, inline = 3
> [ 53.539402] F2FS-fs (loop0): recover_dentry: ino = 5, name = file, dir = 0, err = -2
> [ 53.545760] F2FS-fs (loop0): Cannot recover all fsync data errno=-2
> [ 53.546105] F2FS-fs (loop0): access invalid blkaddr:4294967295
> [ 53.546171] WARNING: CPU: 1 PID: 1798 at fs/f2fs/checkpoint.c:163 f2fs_is_valid_blkaddr+0x26c/0x320
> [ 53.546174] Modules linked in:
> [ 53.546183] CPU: 1 PID: 1798 Comm: mount Not tainted 4.19.0-rc2+ #1
> [ 53.546186] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
> [ 53.546191] RIP: 0010:f2fs_is_valid_blkaddr+0x26c/0x320
> [ 53.546195] Code: 85 bb 00 00 00 48 89 df 88 44 24 07 e8 ad a8 db ff 48 8b 3b 44 89 e1 48 c7 c2 40 03 72 a9 48 c7 c6 e0 01 72 a9 e8 84 3c ff ff <0f> 0b 0f b6 44 24 07 e9 8a 00 00 00 48 8d bf 38 01 00 00 e8 7c a8
> [ 53.546201] RSP: 0018:ffff88006c067768 EFLAGS: 00010282
> [ 53.546208] RAX: 0000000000000000 RBX: ffff880068844200 RCX: ffffffffa83e1a33
> [ 53.546211] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88006d51e590
> [ 53.546215] RBP: 0000000000000005 R08: ffffed000daa3cb3 R09: ffffed000daa3cb3
> [ 53.546218] R10: 0000000000000001 R11: ffffed000daa3cb2 R12: 00000000ffffffff
> [ 53.546221] R13: ffff88006a1f8000 R14: 0000000000000200 R15: 0000000000000009
> [ 53.546226] FS: 00007fb2f3646840(0000) GS:ffff88006d500000(0000) knlGS:0000000000000000
> [ 53.546229] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 53.546234] CR2: 00007f0fd77f0008 CR3: 00000000687e6002 CR4: 00000000000206e0
> [ 53.546237] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 53.546240] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 53.546242] Call Trace:
> [ 53.546248] f2fs_submit_page_bio+0x95/0x740
> [ 53.546253] read_node_page+0x161/0x1e0
> [ 53.546271] ? truncate_node+0x650/0x650
> [ 53.546283] ? add_to_page_cache_lru+0x12c/0x170
> [ 53.546288] ? pagecache_get_page+0x262/0x2d0
> [ 53.546292] __get_node_page+0x200/0x660
> [ 53.546302] f2fs_update_inode_page+0x4a/0x160
> [ 53.546306] f2fs_write_inode+0x86/0xb0
> [ 53.546317] __writeback_single_inode+0x49c/0x620
> [ 53.546322] writeback_single_inode+0xe4/0x1e0
> [ 53.546326] sync_inode_metadata+0x93/0xd0
> [ 53.546330] ? sync_inode+0x10/0x10
> [ 53.546342] ? do_raw_spin_unlock+0xed/0x100
> [ 53.546347] f2fs_sync_inode_meta+0xe0/0x130
> [ 53.546351] f2fs_fill_super+0x287d/0x2d10
> [ 53.546367] ? vsnprintf+0x742/0x7a0
> [ 53.546372] ? f2fs_commit_super+0x180/0x180
> [ 53.546379] ? up_write+0x20/0x40
> [ 53.546385] ? set_blocksize+0x5f/0x140
> [ 53.546391] ? f2fs_commit_super+0x180/0x180
> [ 53.546402] mount_bdev+0x181/0x200
> [ 53.546406] mount_fs+0x94/0x180
> [ 53.546411] vfs_kern_mount+0x6c/0x1e0
> [ 53.546415] do_mount+0xe5e/0x1510
> [ 53.546420] ? fs_reclaim_release+0x9/0x30
> [ 53.546424] ? copy_mount_string+0x20/0x20
> [ 53.546428] ? fs_reclaim_acquire+0xd/0x30
> [ 53.546435] ? __might_sleep+0x2c/0xc0
> [ 53.546440] ? ___might_sleep+0x53/0x170
> [ 53.546453] ? __might_fault+0x4c/0x60
> [ 53.546468] ? _copy_from_user+0x95/0xa0
> [ 53.546474] ? memdup_user+0x39/0x60
> [ 53.546478] ksys_mount+0x88/0xb0
> [ 53.546482] __x64_sys_mount+0x5d/0x70
> [ 53.546495] do_syscall_64+0x65/0x130
> [ 53.546503] entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [ 53.547639] ---[ end trace b804d1ea2fec893e ]---
>
> So if recover fails, we need to drop all recovered data.
>
> Signed-off-by: Sheng Yong <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,