For some corner case, f2fs_gc selects one target victim but cannot free
that victim segment due to some reason (e.g. the segment has some blocks
of atomic file which is not commited yet), in this case, the victim
segment may probably be selected over and over, and then f2fs_gc will
go to dead loop. This patch identifies the dead-loop segment, and skips
it in __get_victim next time.
Signed-off-by: Yunlong Song <[email protected]>
---
fs/f2fs/f2fs.h | 8 ++++++++
fs/f2fs/gc.c | 34 ++++++++++++++++++++++++++++++++++
fs/f2fs/super.c | 3 +++
3 files changed, 45 insertions(+)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ca6b0c9..b75851b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -115,6 +115,13 @@ struct f2fs_mount_info {
unsigned int opt;
};
+struct gc_loop_info {
+ int count;
+ unsigned int segno;
+ unsigned long *segmap;
+};
+#define GC_LOOP_MAX 10
+
#define F2FS_FEATURE_ENCRYPT 0x0001
#define F2FS_FEATURE_BLKZONED 0x0002
#define F2FS_FEATURE_ATOMIC_WRITE 0x0004
@@ -1125,6 +1132,7 @@ struct f2fs_sb_info {
/* threshold for converting bg victims for fg */
u64 fggc_threshold;
+ struct gc_loop_info gc_loop;
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 5d5bba4..4ee9e1b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -229,6 +229,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
if (no_fggc_candidate(sbi, secno))
continue;
+ if (sbi->gc_loop.segmap &&
+ test_bit(GET_SEG_FROM_SEC(sbi, secno), sbi->gc_loop.segmap))
+ continue;
+
clear_bit(secno, dirty_i->victim_secmap);
return GET_SEG_FROM_SEC(sbi, secno);
}
@@ -371,6 +375,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
if (gc_type == FG_GC && p.alloc_mode == LFS &&
no_fggc_candidate(sbi, secno))
goto next;
+ if (gc_type == FG_GC && p.alloc_mode == LFS &&
+ sbi->gc_loop.segmap && test_bit(segno, sbi->gc_loop.segmap))
+ goto next;
cost = get_gc_cost(sbi, segno, &p);
@@ -1042,6 +1049,27 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
sec_freed++;
+ else if (gc_type == FG_GC && seg_freed == 0) {
+ if (!sbi->gc_loop.segmap) {
+ sbi->gc_loop.segmap =
+ kvzalloc(f2fs_bitmap_size(MAIN_SEGS(sbi)), GFP_KERNEL);
+ sbi->gc_loop.count = 0;
+ sbi->gc_loop.segno = NULL_SEGNO;
+ }
+ if (segno == sbi->gc_loop.segno) {
+ if (sbi->gc_loop.count > GC_LOOP_MAX) {
+ f2fs_bug_on(sbi, 1);
+ set_bit(segno, sbi->gc_loop.segmap);
+ sbi->gc_loop.count = 0;
+ sbi->gc_loop.segno = NULL_SEGNO;
+ }
+ else
+ sbi->gc_loop.count++;
+ } else {
+ sbi->gc_loop.segno = segno;
+ sbi->gc_loop.count = 0;
+ }
+ }
total_freed += seg_freed;
if (gc_type == FG_GC)
@@ -1075,6 +1103,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
if (sync)
ret = sec_freed ? 0 : -EAGAIN;
+ if (sbi->gc_loop.segmap) {
+ kvfree(sbi->gc_loop.segmap);
+ sbi->gc_loop.segmap = NULL;
+ sbi->gc_loop.count = 0;
+ sbi->gc_loop.segno = NULL_SEGNO;
+ }
return ret;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 031cb26..76f0b72 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2562,6 +2562,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sbi->last_valid_block_count = sbi->total_valid_block_count;
sbi->reserved_blocks = 0;
sbi->current_reserved_blocks = 0;
+ sbi->gc_loop.segmap = NULL;
+ sbi->gc_loop.count = 0;
+ sbi->gc_loop.segno = NULL_SEGNO;
for (i = 0; i < NR_INODE_TYPE; i++) {
INIT_LIST_HEAD(&sbi->inode_list[i]);
--
1.8.5.2
On 2017/12/23 21:09, Yunlong Song wrote:
> For some corner case, f2fs_gc selects one target victim but cannot free
> that victim segment due to some reason (e.g. the segment has some blocks
> of atomic file which is not commited yet), in this case, the victim
File should not be atomic opened for long time since normally sqlite
transaction will finish quickly, so we can expect that gc loop could be
ended up soon, right?
Thanks,
> segment may probably be selected over and over, and then f2fs_gc will
> go to dead loop. This patch identifies the dead-loop segment, and skips
> it in __get_victim next time.
>
> Signed-off-by: Yunlong Song <[email protected]>
> ---
> fs/f2fs/f2fs.h | 8 ++++++++
> fs/f2fs/gc.c | 34 ++++++++++++++++++++++++++++++++++
> fs/f2fs/super.c | 3 +++
> 3 files changed, 45 insertions(+)
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ca6b0c9..b75851b 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -115,6 +115,13 @@ struct f2fs_mount_info {
> unsigned int opt;
> };
>
> +struct gc_loop_info {
> + int count;
> + unsigned int segno;
> + unsigned long *segmap;
> +};
> +#define GC_LOOP_MAX 10
> +
> #define F2FS_FEATURE_ENCRYPT 0x0001
> #define F2FS_FEATURE_BLKZONED 0x0002
> #define F2FS_FEATURE_ATOMIC_WRITE 0x0004
> @@ -1125,6 +1132,7 @@ struct f2fs_sb_info {
>
> /* threshold for converting bg victims for fg */
> u64 fggc_threshold;
> + struct gc_loop_info gc_loop;
>
> /* maximum # of trials to find a victim segment for SSR and GC */
> unsigned int max_victim_search;
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 5d5bba4..4ee9e1b 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -229,6 +229,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
> if (no_fggc_candidate(sbi, secno))
> continue;
>
> + if (sbi->gc_loop.segmap &&
> + test_bit(GET_SEG_FROM_SEC(sbi, secno), sbi->gc_loop.segmap))
> + continue;
> +
> clear_bit(secno, dirty_i->victim_secmap);
> return GET_SEG_FROM_SEC(sbi, secno);
> }
> @@ -371,6 +375,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
> if (gc_type == FG_GC && p.alloc_mode == LFS &&
> no_fggc_candidate(sbi, secno))
> goto next;
> + if (gc_type == FG_GC && p.alloc_mode == LFS &&
> + sbi->gc_loop.segmap && test_bit(segno, sbi->gc_loop.segmap))
> + goto next;
>
> cost = get_gc_cost(sbi, segno, &p);
>
> @@ -1042,6 +1049,27 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
> seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
> if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
> sec_freed++;
> + else if (gc_type == FG_GC && seg_freed == 0) {
> + if (!sbi->gc_loop.segmap) {
> + sbi->gc_loop.segmap =
> + kvzalloc(f2fs_bitmap_size(MAIN_SEGS(sbi)), GFP_KERNEL);
> + sbi->gc_loop.count = 0;
> + sbi->gc_loop.segno = NULL_SEGNO;
> + }
> + if (segno == sbi->gc_loop.segno) {
> + if (sbi->gc_loop.count > GC_LOOP_MAX) {
> + f2fs_bug_on(sbi, 1);
> + set_bit(segno, sbi->gc_loop.segmap);
> + sbi->gc_loop.count = 0;
> + sbi->gc_loop.segno = NULL_SEGNO;
> + }
> + else
> + sbi->gc_loop.count++;
> + } else {
> + sbi->gc_loop.segno = segno;
> + sbi->gc_loop.count = 0;
> + }
> + }
> total_freed += seg_freed;
>
> if (gc_type == FG_GC)
> @@ -1075,6 +1103,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>
> if (sync)
> ret = sec_freed ? 0 : -EAGAIN;
> + if (sbi->gc_loop.segmap) {
> + kvfree(sbi->gc_loop.segmap);
> + sbi->gc_loop.segmap = NULL;
> + sbi->gc_loop.count = 0;
> + sbi->gc_loop.segno = NULL_SEGNO;
> + }
> return ret;
> }
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 031cb26..76f0b72 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -2562,6 +2562,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> sbi->last_valid_block_count = sbi->total_valid_block_count;
> sbi->reserved_blocks = 0;
> sbi->current_reserved_blocks = 0;
> + sbi->gc_loop.segmap = NULL;
> + sbi->gc_loop.count = 0;
> + sbi->gc_loop.segno = NULL_SEGNO;
>
> for (i = 0; i < NR_INODE_TYPE; i++) {
> INIT_LIST_HEAD(&sbi->inode_list[i]);
>
What if the application starts atomic write but forgets to commit, e.g.
bugs in application or the application
is a malicious software itself?
On 2017/12/25 11:44, Chao Yu wrote:
> On 2017/12/23 21:09, Yunlong Song wrote:
>> For some corner case, f2fs_gc selects one target victim but cannot free
>> that victim segment due to some reason (e.g. the segment has some blocks
>> of atomic file which is not commited yet), in this case, the victim
> File should not be atomic opened for long time since normally sqlite
> transaction will finish quickly, so we can expect that gc loop could be
> ended up soon, right?
>
> Thanks,
>
>> segment may probably be selected over and over, and then f2fs_gc will
>> go to dead loop. This patch identifies the dead-loop segment, and skips
>> it in __get_victim next time.
>>
>> Signed-off-by: Yunlong Song <[email protected]>
>> ---
>> fs/f2fs/f2fs.h | 8 ++++++++
>> fs/f2fs/gc.c | 34 ++++++++++++++++++++++++++++++++++
>> fs/f2fs/super.c | 3 +++
>> 3 files changed, 45 insertions(+)
>>
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index ca6b0c9..b75851b 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -115,6 +115,13 @@ struct f2fs_mount_info {
>> unsigned int opt;
>> };
>>
>> +struct gc_loop_info {
>> + int count;
>> + unsigned int segno;
>> + unsigned long *segmap;
>> +};
>> +#define GC_LOOP_MAX 10
>> +
>> #define F2FS_FEATURE_ENCRYPT 0x0001
>> #define F2FS_FEATURE_BLKZONED 0x0002
>> #define F2FS_FEATURE_ATOMIC_WRITE 0x0004
>> @@ -1125,6 +1132,7 @@ struct f2fs_sb_info {
>>
>> /* threshold for converting bg victims for fg */
>> u64 fggc_threshold;
>> + struct gc_loop_info gc_loop;
>>
>> /* maximum # of trials to find a victim segment for SSR and GC */
>> unsigned int max_victim_search;
>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> index 5d5bba4..4ee9e1b 100644
>> --- a/fs/f2fs/gc.c
>> +++ b/fs/f2fs/gc.c
>> @@ -229,6 +229,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
>> if (no_fggc_candidate(sbi, secno))
>> continue;
>>
>> + if (sbi->gc_loop.segmap &&
>> + test_bit(GET_SEG_FROM_SEC(sbi, secno), sbi->gc_loop.segmap))
>> + continue;
>> +
>> clear_bit(secno, dirty_i->victim_secmap);
>> return GET_SEG_FROM_SEC(sbi, secno);
>> }
>> @@ -371,6 +375,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
>> if (gc_type == FG_GC && p.alloc_mode == LFS &&
>> no_fggc_candidate(sbi, secno))
>> goto next;
>> + if (gc_type == FG_GC && p.alloc_mode == LFS &&
>> + sbi->gc_loop.segmap && test_bit(segno, sbi->gc_loop.segmap))
>> + goto next;
>>
>> cost = get_gc_cost(sbi, segno, &p);
>>
>> @@ -1042,6 +1049,27 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>> seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
>> if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
>> sec_freed++;
>> + else if (gc_type == FG_GC && seg_freed == 0) {
>> + if (!sbi->gc_loop.segmap) {
>> + sbi->gc_loop.segmap =
>> + kvzalloc(f2fs_bitmap_size(MAIN_SEGS(sbi)), GFP_KERNEL);
>> + sbi->gc_loop.count = 0;
>> + sbi->gc_loop.segno = NULL_SEGNO;
>> + }
>> + if (segno == sbi->gc_loop.segno) {
>> + if (sbi->gc_loop.count > GC_LOOP_MAX) {
>> + f2fs_bug_on(sbi, 1);
>> + set_bit(segno, sbi->gc_loop.segmap);
>> + sbi->gc_loop.count = 0;
>> + sbi->gc_loop.segno = NULL_SEGNO;
>> + }
>> + else
>> + sbi->gc_loop.count++;
>> + } else {
>> + sbi->gc_loop.segno = segno;
>> + sbi->gc_loop.count = 0;
>> + }
>> + }
>> total_freed += seg_freed;
>>
>> if (gc_type == FG_GC)
>> @@ -1075,6 +1103,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>>
>> if (sync)
>> ret = sec_freed ? 0 : -EAGAIN;
>> + if (sbi->gc_loop.segmap) {
>> + kvfree(sbi->gc_loop.segmap);
>> + sbi->gc_loop.segmap = NULL;
>> + sbi->gc_loop.count = 0;
>> + sbi->gc_loop.segno = NULL_SEGNO;
>> + }
>> return ret;
>> }
>>
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index 031cb26..76f0b72 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -2562,6 +2562,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>> sbi->last_valid_block_count = sbi->total_valid_block_count;
>> sbi->reserved_blocks = 0;
>> sbi->current_reserved_blocks = 0;
>> + sbi->gc_loop.segmap = NULL;
>> + sbi->gc_loop.count = 0;
>> + sbi->gc_loop.segno = NULL_SEGNO;
>>
>> for (i = 0; i < NR_INODE_TYPE; i++) {
>> INIT_LIST_HEAD(&sbi->inode_list[i]);
>>
>
> .
>
--
Thanks,
Yunlong Song
On 2017/12/25 14:15, Yunlong Song wrote:
> What if the application starts atomic write but forgets to commit, e.g.
> bugs in application or the application
> is a malicious software itself?
I agree we should consider robustness of f2fs in security aspect, but
please consider more scenario of these sqlite customized interface usage,
it looks just skipping gc is not enough, for example, if there is one large
size db in our partition, with random write, its data spreads in each
segment, once this db has been atomic opened, foreground gc may loop for ever.
How about checking opened time of atomic or volatile file in
f2fs_balance_fs, if it exceeds threshold, we can restore the file to normal
one to avoid potential security issue.
Thanks,
>
> On 2017/12/25 11:44, Chao Yu wrote:
>> On 2017/12/23 21:09, Yunlong Song wrote:
>>> For some corner case, f2fs_gc selects one target victim but cannot free
>>> that victim segment due to some reason (e.g. the segment has some blocks
>>> of atomic file which is not commited yet), in this case, the victim
>> File should not be atomic opened for long time since normally sqlite
>> transaction will finish quickly, so we can expect that gc loop could be
>> ended up soon, right?
>>
>> Thanks,
>>
>>> segment may probably be selected over and over, and then f2fs_gc will
>>> go to dead loop. This patch identifies the dead-loop segment, and skips
>>> it in __get_victim next time.
>>>
>>> Signed-off-by: Yunlong Song <[email protected]>
>>> ---
>>> fs/f2fs/f2fs.h | 8 ++++++++
>>> fs/f2fs/gc.c | 34 ++++++++++++++++++++++++++++++++++
>>> fs/f2fs/super.c | 3 +++
>>> 3 files changed, 45 insertions(+)
>>>
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index ca6b0c9..b75851b 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -115,6 +115,13 @@ struct f2fs_mount_info {
>>> unsigned int opt;
>>> };
>>>
>>> +struct gc_loop_info {
>>> + int count;
>>> + unsigned int segno;
>>> + unsigned long *segmap;
>>> +};
>>> +#define GC_LOOP_MAX 10
>>> +
>>> #define F2FS_FEATURE_ENCRYPT 0x0001
>>> #define F2FS_FEATURE_BLKZONED 0x0002
>>> #define F2FS_FEATURE_ATOMIC_WRITE 0x0004
>>> @@ -1125,6 +1132,7 @@ struct f2fs_sb_info {
>>>
>>> /* threshold for converting bg victims for fg */
>>> u64 fggc_threshold;
>>> + struct gc_loop_info gc_loop;
>>>
>>> /* maximum # of trials to find a victim segment for SSR and GC */
>>> unsigned int max_victim_search;
>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>> index 5d5bba4..4ee9e1b 100644
>>> --- a/fs/f2fs/gc.c
>>> +++ b/fs/f2fs/gc.c
>>> @@ -229,6 +229,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
>>> if (no_fggc_candidate(sbi, secno))
>>> continue;
>>>
>>> + if (sbi->gc_loop.segmap &&
>>> + test_bit(GET_SEG_FROM_SEC(sbi, secno), sbi->gc_loop.segmap))
>>> + continue;
>>> +
>>> clear_bit(secno, dirty_i->victim_secmap);
>>> return GET_SEG_FROM_SEC(sbi, secno);
>>> }
>>> @@ -371,6 +375,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
>>> if (gc_type == FG_GC && p.alloc_mode == LFS &&
>>> no_fggc_candidate(sbi, secno))
>>> goto next;
>>> + if (gc_type == FG_GC && p.alloc_mode == LFS &&
>>> + sbi->gc_loop.segmap && test_bit(segno, sbi->gc_loop.segmap))
>>> + goto next;
>>>
>>> cost = get_gc_cost(sbi, segno, &p);
>>>
>>> @@ -1042,6 +1049,27 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>>> seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
>>> if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
>>> sec_freed++;
>>> + else if (gc_type == FG_GC && seg_freed == 0) {
>>> + if (!sbi->gc_loop.segmap) {
>>> + sbi->gc_loop.segmap =
>>> + kvzalloc(f2fs_bitmap_size(MAIN_SEGS(sbi)), GFP_KERNEL);
>>> + sbi->gc_loop.count = 0;
>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>> + }
>>> + if (segno == sbi->gc_loop.segno) {
>>> + if (sbi->gc_loop.count > GC_LOOP_MAX) {
>>> + f2fs_bug_on(sbi, 1);
>>> + set_bit(segno, sbi->gc_loop.segmap);
>>> + sbi->gc_loop.count = 0;
>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>> + }
>>> + else
>>> + sbi->gc_loop.count++;
>>> + } else {
>>> + sbi->gc_loop.segno = segno;
>>> + sbi->gc_loop.count = 0;
>>> + }
>>> + }
>>> total_freed += seg_freed;
>>>
>>> if (gc_type == FG_GC)
>>> @@ -1075,6 +1103,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>>>
>>> if (sync)
>>> ret = sec_freed ? 0 : -EAGAIN;
>>> + if (sbi->gc_loop.segmap) {
>>> + kvfree(sbi->gc_loop.segmap);
>>> + sbi->gc_loop.segmap = NULL;
>>> + sbi->gc_loop.count = 0;
>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>> + }
>>> return ret;
>>> }
>>>
>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>> index 031cb26..76f0b72 100644
>>> --- a/fs/f2fs/super.c
>>> +++ b/fs/f2fs/super.c
>>> @@ -2562,6 +2562,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>> sbi->last_valid_block_count = sbi->total_valid_block_count;
>>> sbi->reserved_blocks = 0;
>>> sbi->current_reserved_blocks = 0;
>>> + sbi->gc_loop.segmap = NULL;
>>> + sbi->gc_loop.count = 0;
>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>>
>>> for (i = 0; i < NR_INODE_TYPE; i++) {
>>> INIT_LIST_HEAD(&sbi->inode_list[i]);
>>>
>>
>> .
>>
>
In this case, f2fs_gc will skip all the victims and return with no dead
loop. The atomic file will
use SSR to OPU, it?s OK.
On 2017/12/25 17:45, Chao Yu wrote:
> On 2017/12/25 14:15, Yunlong Song wrote:
>> What if the application starts atomic write but forgets to commit, e.g.
>> bugs in application or the application
>> is a malicious software itself?
> I agree we should consider robustness of f2fs in security aspect, but
> please consider more scenario of these sqlite customized interface usage,
> it looks just skipping gc is not enough, for example, if there is one large
> size db in our partition, with random write, its data spreads in each
> segment, once this db has been atomic opened, foreground gc may loop for ever.
>
> How about checking opened time of atomic or volatile file in
> f2fs_balance_fs, if it exceeds threshold, we can restore the file to normal
> one to avoid potential security issue.
>
> Thanks,
>
>> On 2017/12/25 11:44, Chao Yu wrote:
>>> On 2017/12/23 21:09, Yunlong Song wrote:
>>>> For some corner case, f2fs_gc selects one target victim but cannot free
>>>> that victim segment due to some reason (e.g. the segment has some blocks
>>>> of atomic file which is not commited yet), in this case, the victim
>>> File should not be atomic opened for long time since normally sqlite
>>> transaction will finish quickly, so we can expect that gc loop could be
>>> ended up soon, right?
>>>
>>> Thanks,
>>>
>>>> segment may probably be selected over and over, and then f2fs_gc will
>>>> go to dead loop. This patch identifies the dead-loop segment, and skips
>>>> it in __get_victim next time.
>>>>
>>>> Signed-off-by: Yunlong Song <[email protected]>
>>>> ---
>>>> fs/f2fs/f2fs.h | 8 ++++++++
>>>> fs/f2fs/gc.c | 34 ++++++++++++++++++++++++++++++++++
>>>> fs/f2fs/super.c | 3 +++
>>>> 3 files changed, 45 insertions(+)
>>>>
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index ca6b0c9..b75851b 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -115,6 +115,13 @@ struct f2fs_mount_info {
>>>> unsigned int opt;
>>>> };
>>>>
>>>> +struct gc_loop_info {
>>>> + int count;
>>>> + unsigned int segno;
>>>> + unsigned long *segmap;
>>>> +};
>>>> +#define GC_LOOP_MAX 10
>>>> +
>>>> #define F2FS_FEATURE_ENCRYPT 0x0001
>>>> #define F2FS_FEATURE_BLKZONED 0x0002
>>>> #define F2FS_FEATURE_ATOMIC_WRITE 0x0004
>>>> @@ -1125,6 +1132,7 @@ struct f2fs_sb_info {
>>>>
>>>> /* threshold for converting bg victims for fg */
>>>> u64 fggc_threshold;
>>>> + struct gc_loop_info gc_loop;
>>>>
>>>> /* maximum # of trials to find a victim segment for SSR and GC */
>>>> unsigned int max_victim_search;
>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>> index 5d5bba4..4ee9e1b 100644
>>>> --- a/fs/f2fs/gc.c
>>>> +++ b/fs/f2fs/gc.c
>>>> @@ -229,6 +229,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
>>>> if (no_fggc_candidate(sbi, secno))
>>>> continue;
>>>>
>>>> + if (sbi->gc_loop.segmap &&
>>>> + test_bit(GET_SEG_FROM_SEC(sbi, secno), sbi->gc_loop.segmap))
>>>> + continue;
>>>> +
>>>> clear_bit(secno, dirty_i->victim_secmap);
>>>> return GET_SEG_FROM_SEC(sbi, secno);
>>>> }
>>>> @@ -371,6 +375,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
>>>> if (gc_type == FG_GC && p.alloc_mode == LFS &&
>>>> no_fggc_candidate(sbi, secno))
>>>> goto next;
>>>> + if (gc_type == FG_GC && p.alloc_mode == LFS &&
>>>> + sbi->gc_loop.segmap && test_bit(segno, sbi->gc_loop.segmap))
>>>> + goto next;
>>>>
>>>> cost = get_gc_cost(sbi, segno, &p);
>>>>
>>>> @@ -1042,6 +1049,27 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>>>> seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
>>>> if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
>>>> sec_freed++;
>>>> + else if (gc_type == FG_GC && seg_freed == 0) {
>>>> + if (!sbi->gc_loop.segmap) {
>>>> + sbi->gc_loop.segmap =
>>>> + kvzalloc(f2fs_bitmap_size(MAIN_SEGS(sbi)), GFP_KERNEL);
>>>> + sbi->gc_loop.count = 0;
>>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>>> + }
>>>> + if (segno == sbi->gc_loop.segno) {
>>>> + if (sbi->gc_loop.count > GC_LOOP_MAX) {
>>>> + f2fs_bug_on(sbi, 1);
>>>> + set_bit(segno, sbi->gc_loop.segmap);
>>>> + sbi->gc_loop.count = 0;
>>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>>> + }
>>>> + else
>>>> + sbi->gc_loop.count++;
>>>> + } else {
>>>> + sbi->gc_loop.segno = segno;
>>>> + sbi->gc_loop.count = 0;
>>>> + }
>>>> + }
>>>> total_freed += seg_freed;
>>>>
>>>> if (gc_type == FG_GC)
>>>> @@ -1075,6 +1103,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>>>>
>>>> if (sync)
>>>> ret = sec_freed ? 0 : -EAGAIN;
>>>> + if (sbi->gc_loop.segmap) {
>>>> + kvfree(sbi->gc_loop.segmap);
>>>> + sbi->gc_loop.segmap = NULL;
>>>> + sbi->gc_loop.count = 0;
>>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>>> + }
>>>> return ret;
>>>> }
>>>>
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index 031cb26..76f0b72 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -2562,6 +2562,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>> sbi->last_valid_block_count = sbi->total_valid_block_count;
>>>> sbi->reserved_blocks = 0;
>>>> sbi->current_reserved_blocks = 0;
>>>> + sbi->gc_loop.segmap = NULL;
>>>> + sbi->gc_loop.count = 0;
>>>> + sbi->gc_loop.segno = NULL_SEGNO;
>>>>
>>>> for (i = 0; i < NR_INODE_TYPE; i++) {
>>>> INIT_LIST_HEAD(&sbi->inode_list[i]);
>>>>
>>> .
>>>
>
> .
>
--
Thanks,
Yunlong Song
On 2017/12/25 17:56, Yunlong Song wrote:
> In this case, f2fs_gc will skip all the victims and return with no dead loop. The atomic file will
> use SSR to OPU, it?s OK.
Nope, SSR trigger condition is limited, don't rely on it.
Thanks,
>
> On 2017/12/25 17:45, Chao Yu wrote:
>> On 2017/12/25 14:15, Yunlong Song wrote:
>>> What if the application starts atomic write but forgets to commit, e.g.
>>> bugs in application or the application
>>> is a malicious software itself?
>> I agree we should consider robustness of f2fs in security aspect, but
>> please consider more scenario of these sqlite customized interface usage,
>> it looks just skipping gc is not enough, for example, if there is one large
>> size db in our partition, with random write, its data spreads in each
>> segment, once this db has been atomic opened, foreground gc may loop for ever.
>>
>> How about checking opened time of atomic or volatile file in
>> f2fs_balance_fs, if it exceeds threshold, we can restore the file to normal
>> one to avoid potential security issue.
>>
>> Thanks,
>>
>>> On 2017/12/25 11:44, Chao Yu wrote:
>>>> On 2017/12/23 21:09, Yunlong Song wrote:
>>>>> For some corner case, f2fs_gc selects one target victim but cannot free
>>>>> that victim segment due to some reason (e.g. the segment has some blocks
>>>>> of atomic file which is not commited yet), in this case, the victim
>>>> File should not be atomic opened for long time since normally sqlite
>>>> transaction will finish quickly, so we can expect that gc loop could be
>>>> ended up soon, right?
>>>>
>>>> Thanks,
>>>>
>>>>> segment may probably be selected over and over, and then f2fs_gc will
>>>>> go to dead loop. This patch identifies the dead-loop segment, and skips
>>>>> it in __get_victim next time.
>>>>>
>>>>> Signed-off-by: Yunlong Song <[email protected]>
>>>>> ---
>>>>> ?? fs/f2fs/f2fs.h? |? 8 ++++++++
>>>>> ?? fs/f2fs/gc.c??? | 34 ++++++++++++++++++++++++++++++++++
>>>>> ?? fs/f2fs/super.c |? 3 +++
>>>>> ?? 3 files changed, 45 insertions(+)
>>>>>
>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>> index ca6b0c9..b75851b 100644
>>>>> --- a/fs/f2fs/f2fs.h
>>>>> +++ b/fs/f2fs/f2fs.h
>>>>> @@ -115,6 +115,13 @@ struct f2fs_mount_info {
>>>>> ?????? unsigned int??? opt;
>>>>> ?? };
>>>>> ?? +struct gc_loop_info {
>>>>> +??? int count;
>>>>> +??? unsigned int segno;
>>>>> +??? unsigned long *segmap;
>>>>> +};
>>>>> +#define GC_LOOP_MAX 10
>>>>> +
>>>>> ?? #define F2FS_FEATURE_ENCRYPT??????? 0x0001
>>>>> ?? #define F2FS_FEATURE_BLKZONED??????? 0x0002
>>>>> ?? #define F2FS_FEATURE_ATOMIC_WRITE??? 0x0004
>>>>> @@ -1125,6 +1132,7 @@ struct f2fs_sb_info {
>>>>> ?? ?????? /* threshold for converting bg victims for fg */
>>>>> ?????? u64 fggc_threshold;
>>>>> +??? struct gc_loop_info gc_loop;
>>>>> ?? ?????? /* maximum # of trials to find a victim segment for SSR and GC */
>>>>> ?????? unsigned int max_victim_search;
>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>> index 5d5bba4..4ee9e1b 100644
>>>>> --- a/fs/f2fs/gc.c
>>>>> +++ b/fs/f2fs/gc.c
>>>>> @@ -229,6 +229,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
>>>>> ?????????? if (no_fggc_candidate(sbi, secno))
>>>>> ?????????????? continue;
>>>>> ?? +??????? if (sbi->gc_loop.segmap &&
>>>>> +??????????? test_bit(GET_SEG_FROM_SEC(sbi, secno), sbi->gc_loop.segmap))
>>>>> +??????????? continue;
>>>>> +
>>>>> ?????????? clear_bit(secno, dirty_i->victim_secmap);
>>>>> ?????????? return GET_SEG_FROM_SEC(sbi, secno);
>>>>> ?????? }
>>>>> @@ -371,6 +375,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
>>>>> ?????????? if (gc_type == FG_GC && p.alloc_mode == LFS &&
>>>>> ?????????????????????? no_fggc_candidate(sbi, secno))
>>>>> ?????????????? goto next;
>>>>> +??????? if (gc_type == FG_GC && p.alloc_mode == LFS &&
>>>>> +??????????? sbi->gc_loop.segmap && test_bit(segno, sbi->gc_loop.segmap))
>>>>> +??????????? goto next;
>>>>> ?? ?????????? cost = get_gc_cost(sbi, segno, &p);
>>>>> ?? @@ -1042,6 +1049,27 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>>>>> ?????? seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
>>>>> ?????? if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
>>>>> ?????????? sec_freed++;
>>>>> +??? else if (gc_type == FG_GC && seg_freed == 0) {
>>>>> +??????? if (!sbi->gc_loop.segmap) {
>>>>> +??????????? sbi->gc_loop.segmap =
>>>>> +??????????????? kvzalloc(f2fs_bitmap_size(MAIN_SEGS(sbi)), GFP_KERNEL);
>>>>> +??????????? sbi->gc_loop.count = 0;
>>>>> +??????????? sbi->gc_loop.segno = NULL_SEGNO;
>>>>> +??????? }
>>>>> +??????? if (segno == sbi->gc_loop.segno) {
>>>>> +??????????? if (sbi->gc_loop.count > GC_LOOP_MAX) {
>>>>> +??????????????? f2fs_bug_on(sbi, 1);
>>>>> +??????????????? set_bit(segno, sbi->gc_loop.segmap);
>>>>> +??????????????? sbi->gc_loop.count = 0;
>>>>> +??????????????? sbi->gc_loop.segno = NULL_SEGNO;
>>>>> +??????????? }
>>>>> +??????????? else
>>>>> +??????????????? sbi->gc_loop.count++;
>>>>> +??????? } else {
>>>>> +??????????? sbi->gc_loop.segno = segno;
>>>>> +??????????? sbi->gc_loop.count = 0;
>>>>> +??????? }
>>>>> +??? }
>>>>> ?????? total_freed += seg_freed;
>>>>> ?? ?????? if (gc_type == FG_GC)
>>>>> @@ -1075,6 +1103,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
>>>>> ?? ?????? if (sync)
>>>>> ?????????? ret = sec_freed ? 0 : -EAGAIN;
>>>>> +??? if (sbi->gc_loop.segmap) {
>>>>> +??????? kvfree(sbi->gc_loop.segmap);
>>>>> +??????? sbi->gc_loop.segmap = NULL;
>>>>> +??????? sbi->gc_loop.count = 0;
>>>>> +??????? sbi->gc_loop.segno = NULL_SEGNO;
>>>>> +??? }
>>>>> ?????? return ret;
>>>>> ?? }
>>>>> ?? diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>> index 031cb26..76f0b72 100644
>>>>> --- a/fs/f2fs/super.c
>>>>> +++ b/fs/f2fs/super.c
>>>>> @@ -2562,6 +2562,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>> ?????? sbi->last_valid_block_count = sbi->total_valid_block_count;
>>>>> ?????? sbi->reserved_blocks = 0;
>>>>> ?????? sbi->current_reserved_blocks = 0;
>>>>> +??? sbi->gc_loop.segmap = NULL;
>>>>> +??? sbi->gc_loop.count = 0;
>>>>> +??? sbi->gc_loop.segno = NULL_SEGNO;
>>>>> ?? ?????? for (i = 0; i < NR_INODE_TYPE; i++) {
>>>>> ?????????? INIT_LIST_HEAD(&sbi->inode_list[i]);
>>>>>
>>>> .
>>>>
>>
>> .
>>
>