The FS got stuck in the below stack when the storage is almost
full/dirty condition (when FG_GC is being done).
schedule_timeout
io_schedule_timeout
congestion_wait
f2fs_drop_inmem_pages_all
f2fs_gc
f2fs_balance_fs
__write_node_page
f2fs_fsync_node_pages
f2fs_do_sync_file
f2fs_ioctl
The root cause for this issue is there is a potential infinite loop
in f2fs_drop_inmem_pages_all() for the case where gc_failure is true
and when there an inode whose i_gc_failures[GC_FAILURE_ATOMIC] is
not set. Fix this by keeping track of the total atomic files
currently opened and using that to exit from this condition.
Fix-suggested-by: Chao Yu <[email protected]>
Signed-off-by: Chao Yu <[email protected]>
Signed-off-by: Sahitya Tummala <[email protected]>
---
v2:
- change fix as per Chao's suggestion
- decrement sbi->atomic_files protected under sbi->inode_lock[ATOMIC_FILE] and
only when atomic flag is cleared for the first time, otherwise, the count
goes to an invalid/high value as f2fs_drop_inmem_pages() can be called from
two contexts at the same time.
fs/f2fs/f2fs.h | 1 +
fs/f2fs/file.c | 1 +
fs/f2fs/segment.c | 21 +++++++++++++++------
3 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c681f51..e04a665 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1297,6 +1297,7 @@ struct f2fs_sb_info {
unsigned int gc_mode; /* current GC state */
unsigned int next_victim_seg[2]; /* next segment in victim section */
/* for skip statistic */
+ unsigned int atomic_files; /* # of opened atomic file */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
unsigned long long skipped_gc_rwsem; /* FG_GC only */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f6c038e..22c4949 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1919,6 +1919,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (list_empty(&fi->inmem_ilist))
list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
+ sbi->atomic_files++;
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
/* add inode in inmem_list first and set atomic_file */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index da830fc..0b7a33b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
struct inode *inode;
struct f2fs_inode_info *fi;
+ unsigned int count = sbi->atomic_files;
+ unsigned int looped = 0;
next:
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (list_empty(head)) {
@@ -296,22 +298,26 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
}
fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
inode = igrab(&fi->vfs_inode);
+ if (inode)
+ list_move_tail(&fi->inmem_ilist, head);
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
if (inode) {
if (gc_failure) {
- if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
- goto drop;
- goto skip;
+ if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
+ goto skip;
}
-drop:
set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
f2fs_drop_inmem_pages(inode);
+skip:
iput(inode);
}
-skip:
congestion_wait(BLK_RW_ASYNC, HZ/50);
cond_resched();
+ if (gc_failure) {
+ if (++looped >= count)
+ return;
+ }
goto next;
}
@@ -327,13 +333,16 @@ void f2fs_drop_inmem_pages(struct inode *inode)
mutex_unlock(&fi->inmem_lock);
}
- clear_inode_flag(inode, FI_ATOMIC_FILE);
fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
stat_dec_atomic_write(inode);
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (!list_empty(&fi->inmem_ilist))
list_del_init(&fi->inmem_ilist);
+ if (f2fs_is_atomic_file(inode)) {
+ clear_inode_flag(inode, FI_ATOMIC_FILE);
+ sbi->atomic_files--;
+ }
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
}
--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
On 11/13, Sahitya Tummala wrote:
> The FS got stuck in the below stack when the storage is almost
> full/dirty condition (when FG_GC is being done).
>
> schedule_timeout
> io_schedule_timeout
> congestion_wait
> f2fs_drop_inmem_pages_all
> f2fs_gc
> f2fs_balance_fs
> __write_node_page
> f2fs_fsync_node_pages
> f2fs_do_sync_file
> f2fs_ioctl
>
> The root cause for this issue is there is a potential infinite loop
> in f2fs_drop_inmem_pages_all() for the case where gc_failure is true
> and when there an inode whose i_gc_failures[GC_FAILURE_ATOMIC] is
> not set. Fix this by keeping track of the total atomic files
> currently opened and using that to exit from this condition.
>
> Fix-suggested-by: Chao Yu <[email protected]>
> Signed-off-by: Chao Yu <[email protected]>
> Signed-off-by: Sahitya Tummala <[email protected]>
> ---
> v2:
> - change fix as per Chao's suggestion
> - decrement sbi->atomic_files protected under sbi->inode_lock[ATOMIC_FILE] and
> only when atomic flag is cleared for the first time, otherwise, the count
> goes to an invalid/high value as f2fs_drop_inmem_pages() can be called from
> two contexts at the same time.
>
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/file.c | 1 +
> fs/f2fs/segment.c | 21 +++++++++++++++------
> 3 files changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index c681f51..e04a665 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1297,6 +1297,7 @@ struct f2fs_sb_info {
> unsigned int gc_mode; /* current GC state */
> unsigned int next_victim_seg[2]; /* next segment in victim section */
> /* for skip statistic */
> + unsigned int atomic_files; /* # of opened atomic file */
> unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
> unsigned long long skipped_gc_rwsem; /* FG_GC only */
>
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index f6c038e..22c4949 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1919,6 +1919,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
> spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
> if (list_empty(&fi->inmem_ilist))
> list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
> + sbi->atomic_files++;
> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
>
> /* add inode in inmem_list first and set atomic_file */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index da830fc..0b7a33b 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
> struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
> struct inode *inode;
> struct f2fs_inode_info *fi;
> + unsigned int count = sbi->atomic_files;
> + unsigned int looped = 0;
> next:
> spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
> if (list_empty(head)) {
> @@ -296,22 +298,26 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
> }
> fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
> inode = igrab(&fi->vfs_inode);
> + if (inode)
> + list_move_tail(&fi->inmem_ilist, head);
> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
>
> if (inode) {
> if (gc_failure) {
> - if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
> - goto drop;
> - goto skip;
> + if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
> + goto skip;
> }
> -drop:
> set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
> f2fs_drop_inmem_pages(inode);
> +skip:
> iput(inode);
> }
> -skip:
> congestion_wait(BLK_RW_ASYNC, HZ/50);
> cond_resched();
> + if (gc_failure) {
> + if (++looped >= count)
There is a race condition when handling sbi->atomic_files?
> + return;
> + }
> goto next;
> }
>
> @@ -327,13 +333,16 @@ void f2fs_drop_inmem_pages(struct inode *inode)
> mutex_unlock(&fi->inmem_lock);
> }
>
> - clear_inode_flag(inode, FI_ATOMIC_FILE);
> fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
> stat_dec_atomic_write(inode);
>
> spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
> if (!list_empty(&fi->inmem_ilist))
> list_del_init(&fi->inmem_ilist);
> + if (f2fs_is_atomic_file(inode)) {
> + clear_inode_flag(inode, FI_ATOMIC_FILE);
> + sbi->atomic_files--;
> + }
> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
> }
>
> --
> Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
Hi Jaegeuk,
On Fri, Nov 22, 2019 at 08:53:28AM -0800, Jaegeuk Kim wrote:
> On 11/13, Sahitya Tummala wrote:
> > The FS got stuck in the below stack when the storage is almost
> > full/dirty condition (when FG_GC is being done).
> >
> > schedule_timeout
> > io_schedule_timeout
> > congestion_wait
> > f2fs_drop_inmem_pages_all
> > f2fs_gc
> > f2fs_balance_fs
> > __write_node_page
> > f2fs_fsync_node_pages
> > f2fs_do_sync_file
> > f2fs_ioctl
> >
> > The root cause for this issue is there is a potential infinite loop
> > in f2fs_drop_inmem_pages_all() for the case where gc_failure is true
> > and when there an inode whose i_gc_failures[GC_FAILURE_ATOMIC] is
> > not set. Fix this by keeping track of the total atomic files
> > currently opened and using that to exit from this condition.
> >
> > Fix-suggested-by: Chao Yu <[email protected]>
> > Signed-off-by: Chao Yu <[email protected]>
> > Signed-off-by: Sahitya Tummala <[email protected]>
> > ---
> > v2:
> > - change fix as per Chao's suggestion
> > - decrement sbi->atomic_files protected under sbi->inode_lock[ATOMIC_FILE] and
> > only when atomic flag is cleared for the first time, otherwise, the count
> > goes to an invalid/high value as f2fs_drop_inmem_pages() can be called from
> > two contexts at the same time.
> >
> > fs/f2fs/f2fs.h | 1 +
> > fs/f2fs/file.c | 1 +
> > fs/f2fs/segment.c | 21 +++++++++++++++------
> > 3 files changed, 17 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index c681f51..e04a665 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1297,6 +1297,7 @@ struct f2fs_sb_info {
> > unsigned int gc_mode; /* current GC state */
> > unsigned int next_victim_seg[2]; /* next segment in victim section */
> > /* for skip statistic */
> > + unsigned int atomic_files; /* # of opened atomic file */
> > unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
> > unsigned long long skipped_gc_rwsem; /* FG_GC only */
> >
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index f6c038e..22c4949 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -1919,6 +1919,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
> > spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
> > if (list_empty(&fi->inmem_ilist))
> > list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
> > + sbi->atomic_files++;
> > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
> >
> > /* add inode in inmem_list first and set atomic_file */
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index da830fc..0b7a33b 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
> > struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
> > struct inode *inode;
> > struct f2fs_inode_info *fi;
> > + unsigned int count = sbi->atomic_files;
> > + unsigned int looped = 0;
> > next:
> > spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
> > if (list_empty(head)) {
> > @@ -296,22 +298,26 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
> > }
> > fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
> > inode = igrab(&fi->vfs_inode);
> > + if (inode)
> > + list_move_tail(&fi->inmem_ilist, head);
> > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
> >
> > if (inode) {
> > if (gc_failure) {
> > - if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
> > - goto drop;
> > - goto skip;
> > + if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
> > + goto skip;
> > }
> > -drop:
> > set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
> > f2fs_drop_inmem_pages(inode);
> > +skip:
> > iput(inode);
> > }
> > -skip:
> > congestion_wait(BLK_RW_ASYNC, HZ/50);
> > cond_resched();
> > + if (gc_failure) {
> > + if (++looped >= count)
>
> There is a race condition when handling sbi->atomic_files?
>
There is no concern here in this function w.r.t sbi->atomic_files value.
Since when we loop over all the atomic files, the looped counter will increment
and will exit when all the files are looped at least once.
There is an issue with f2fs_drop_inmem_pages() which is actually decrementing
the sbi->atomic_files and that was handled below.
Thanks,
Sahitya.
> > + return;
> > + }
> > goto next;
> > }
> >
> > @@ -327,13 +333,16 @@ void f2fs_drop_inmem_pages(struct inode *inode)
> > mutex_unlock(&fi->inmem_lock);
> > }
> >
> > - clear_inode_flag(inode, FI_ATOMIC_FILE);
> > fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
> > stat_dec_atomic_write(inode);
> >
> > spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
> > if (!list_empty(&fi->inmem_ilist))
> > list_del_init(&fi->inmem_ilist);
> > + if (f2fs_is_atomic_file(inode)) {
> > + clear_inode_flag(inode, FI_ATOMIC_FILE);
> > + sbi->atomic_files--;
> > + }
> > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
> > }
> >
> > --
> > Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
> > Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
--
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.