This patch fixes the following deadlock bug during the recovery.
INFO: task mount:1322 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
mount D ffffffff81125870 0 1322 1266 0x00000000
ffff8801207e39d8 0000000000000046 ffff88012ab1dee0 0000000000000046
ffff8801207e3a08 ffff880115903f40 ffff8801207e3fd8 ffff8801207e3fd8
ffff8801207e3fd8 ffff880115903f40 ffff8801207e39d8 ffff88012fc94520
Call Trace:
[<ffffffff81125870>] ? __lock_page+0x70/0x70
[<ffffffff816a92d9>] schedule+0x29/0x70
[<ffffffff816a93af>] io_schedule+0x8f/0xd0
[<ffffffff8112587e>] sleep_on_page+0xe/0x20
[<ffffffff816a649a>] __wait_on_bit_lock+0x5a/0xc0
[<ffffffff81125867>] __lock_page+0x67/0x70
[<ffffffff8106c7b0>] ? autoremove_wake_function+0x40/0x40
[<ffffffff81126857>] find_lock_page+0x67/0x80
[<ffffffff8112698f>] find_or_create_page+0x3f/0xb0
[<ffffffffa03901a8>] ? sync_inode_page+0xa8/0xd0 [f2fs]
[<ffffffffa038fdf7>] get_node_page+0x67/0x180 [f2fs]
[<ffffffffa039818b>] recover_fsync_data+0xacb/0xff0 [f2fs]
[<ffffffff816aaa1e>] ? _raw_spin_unlock+0x3e/0x40
[<ffffffffa0389634>] f2fs_fill_super+0x7d4/0x850 [f2fs]
[<ffffffff81184cf9>] mount_bdev+0x1c9/0x210
[<ffffffffa0388e60>] ? validate_superblock+0x180/0x180 [f2fs]
[<ffffffffa0387635>] f2fs_mount+0x15/0x20 [f2fs]
[<ffffffff81185a13>] mount_fs+0x43/0x1b0
[<ffffffff81145ba0>] ? __alloc_percpu+0x10/0x20
[<ffffffff811a0796>] vfs_kern_mount+0x76/0x120
[<ffffffff811a2cb7>] do_mount+0x237/0xa10
[<ffffffff81140b9b>] ? strndup_user+0x5b/0x80
[<ffffffff811a3520>] SyS_mount+0x90/0xe0
[<ffffffff816b3502>] system_call_fastpath+0x16/0x1b
The bug is triggered when check_index_in_prev_nodes tries to get the direct
node page by calling get_node_page.
At this point, if the direct node page is already locked by get_dnode_of_data,
its caller, we got a deadlock condition.
This patch adds additional condition check for the reuse of locked direct node
pages prior to the get_node_page call.
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/f2fs.h | 1 +
fs/f2fs/file.c | 2 +-
fs/f2fs/recovery.c | 26 +++++++++++++++++++++-----
3 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6594ce1..7b05029 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -900,6 +900,7 @@ void truncate_data_blocks(struct dnode_of_data *);
void f2fs_truncate(struct inode *);
int f2fs_setattr(struct dentry *, struct iattr *);
int truncate_hole(struct inode *, pgoff_t, pgoff_t);
+int truncate_data_blocks_range(struct dnode_of_data *, int);
long f2fs_ioctl(struct file *, unsigned int, unsigned long);
long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 316bcfe..deefd25 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -168,7 +168,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-static int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
+int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
{
int nr_free = 0, ofs = dn->ofs_in_node;
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 5148d90..eceb665 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -189,14 +189,14 @@ static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
}
static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
- block_t blkaddr)
+ block_t blkaddr, struct dnode_of_data *dn)
{
struct seg_entry *sentry;
unsigned int segno = GET_SEGNO(sbi, blkaddr);
unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) &
(sbi->blocks_per_seg - 1);
struct f2fs_summary sum;
- nid_t ino;
+ nid_t ino, nid;
void *kaddr;
struct inode *inode;
struct page *node_page;
@@ -224,10 +224,26 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
f2fs_put_page(sum_page, 1);
}
+ /* Use the locked dnode page and inode */
+ nid = le32_to_cpu(sum.nid);
+ if (dn->inode->i_ino == nid) {
+ struct dnode_of_data tdn = *dn;
+ tdn.nid = nid;
+ tdn.node_page = dn->inode_page;
+ tdn.ofs_in_node = sum.ofs_in_node;
+ truncate_data_blocks_range(&tdn, 1);
+ return;
+ } else if (dn->nid == nid) {
+ struct dnode_of_data tdn = *dn;
+ tdn.ofs_in_node = sum.ofs_in_node;
+ truncate_data_blocks_range(&tdn, 1);
+ return;
+ }
+
/* Get the node page */
- node_page = get_node_page(sbi, le32_to_cpu(sum.nid));
+ node_page = get_node_page(sbi, nid);
bidx = start_bidx_of_node(ofs_of_node(node_page)) +
- le16_to_cpu(sum.ofs_in_node);
+ le16_to_cpu(sum.ofs_in_node);
ino = ino_of_node(node_page);
f2fs_put_page(node_page, 1);
@@ -285,7 +301,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
}
/* Check the previous node page having this index */
- check_index_in_prev_nodes(sbi, dest);
+ check_index_in_prev_nodes(sbi, dest, &dn);
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
--
1.8.1.3.566.gaa39828
This patch adds error handling codes of check_index_in_prev_nodes and its
caller, do_recover_data.
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/recovery.c | 24 +++++++++++++++---------
1 file changed, 15 insertions(+), 9 deletions(-)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index eceb665..dcd8e86 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -188,7 +188,7 @@ static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
}
}
-static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
+static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
block_t blkaddr, struct dnode_of_data *dn)
{
struct seg_entry *sentry;
@@ -205,7 +205,7 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
sentry = get_seg_entry(sbi, segno);
if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
- return;
+ return 0;
/* Get the previous summary */
for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
@@ -232,16 +232,18 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
tdn.node_page = dn->inode_page;
tdn.ofs_in_node = sum.ofs_in_node;
truncate_data_blocks_range(&tdn, 1);
- return;
+ return 0;
} else if (dn->nid == nid) {
struct dnode_of_data tdn = *dn;
tdn.ofs_in_node = sum.ofs_in_node;
truncate_data_blocks_range(&tdn, 1);
- return;
+ return 0;
}
/* Get the node page */
node_page = get_node_page(sbi, nid);
+ if (IS_ERR(node_page))
+ return PTR_ERR(node_page);
bidx = start_bidx_of_node(ofs_of_node(node_page)) +
le16_to_cpu(sum.ofs_in_node);
ino = ino_of_node(node_page);
@@ -250,10 +252,11 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
/* Deallocate previous index in the node page */
inode = f2fs_iget(sbi->sb, ino);
if (IS_ERR(inode))
- return;
+ return PTR_ERR(inode);
truncate_hole(inode, bidx, bidx + 1);
iput(inode);
+ return 0;
}
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
@@ -301,7 +304,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
}
/* Check the previous node page having this index */
- check_index_in_prev_nodes(sbi, dest, &dn);
+ err = check_index_in_prev_nodes(sbi, dest, &dn);
+ if (err)
+ goto err;
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
@@ -324,13 +329,14 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
set_page_dirty(dn.node_page);
recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
+err:
f2fs_put_dnode(&dn);
mutex_unlock_op(sbi, ilock);
f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, "
- "recovered_data = %d blocks",
- inode->i_ino, recovered);
- return 0;
+ "recovered_data = %d blocks, err = %d",
+ inode->i_ino, recovered, err);
+ return err;
}
static int recover_data(struct f2fs_sb_info *sbi,
--
1.8.1.3.566.gaa39828