f2fs has some bugs in section:segment > 1 and lfs mode, so fix them.
Yunlong Song (5):
f2fs: do not set free of current section
f2fs: clear the remaining prefree_map of the section
f2fs: blk_finish_plug of submit_bio in lfs mode
f2fs: disable small discard in lfs mode
f2fs: do not __punch_discard_cmd in lfs mode
fs/f2fs/data.c | 2 +-
fs/f2fs/segment.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++------
fs/f2fs/segment.h | 10 +++++-
fs/f2fs/sysfs.c | 4 +++
4 files changed, 102 insertions(+), 12 deletions(-)
--
1.8.5.2
For the case when sbi->segs_per_sec > 1, take section:segment = 5 for
example, if the section prefree_map is ...previous section | current
section (1 1 0 1 1) | next section..., then the start = x, end = x + 1,
after start = start_segno + sbi->segs_per_sec, start = x + 5, then it
will skip x + 3 and x + 4, but their bitmap is still set, which will
cause duplicated f2fs_issue_discard of this same section in the next
write_checkpoint, so fix it.
Signed-off-by: Yunlong Song <[email protected]>
---
fs/f2fs/segment.c | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 47b6595..fd38b61 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1684,8 +1684,23 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
start = start_segno + sbi->segs_per_sec;
if (start < end)
goto next;
- else
- end = start - 1;
+ else {
+ start_segno = start;
+
+ while (1) {
+ start = find_next_bit(prefree_map, start_segno,
+ end + 1);
+ if (start >= start_segno)
+ break;
+ end = find_next_zero_bit(prefree_map, start_segno,
+ start + 1);
+ for (i = start; i < end; i++)
+ clear_bit(i, prefree_map);
+ dirty_i->nr_dirty[PRE] -= end - start;
+ }
+
+ end = start_segno - 1;
+ }
}
mutex_unlock(&dirty_i->seglist_lock);
--
1.8.5.2
Expand the blk_finish_plug action from blkzoned to normal lfs mode,
since plug will cause the out-of-order IO submission, which is not
friendly to flash in lfs mode.
Signed-off-by: Yunlong Song <[email protected]>
---
fs/f2fs/data.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 70813a4..f12151d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -263,7 +263,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
if (type != DATA && type != NODE)
goto submit_io;
- if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug)
+ if (test_opt(sbi, LFS) && current->plug)
blk_finish_plug(current->plug);
start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
--
1.8.5.2
In lfs mode, it is better to send the discard of the overall section
each time to avoid missing alignment with flash.
Signed-off-by: Yunlong Song <[email protected]>
---
fs/f2fs/segment.c | 3 ++-
fs/f2fs/sysfs.c | 4 ++++
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index fd38b61..f6c20e0 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1766,7 +1766,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
atomic_set(&dcc->issing_discard, 0);
atomic_set(&dcc->discard_cmd_cnt, 0);
dcc->nr_discards = 0;
- dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
+ dcc->max_discards = test_opt(sbi, LFS) ? 0 :
+ MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
dcc->undiscard_blks = 0;
dcc->root = RB_ROOT;
dcc->rbtree_check = false;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 2e7e611..4b6c457 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -271,6 +271,10 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return count;
}
+ if (!strcmp(a->attr.name, "max_small_discards") &&
+ test_opt(sbi, LFS))
+ return -EINVAL;
+
*ui = t;
if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
--
1.8.5.2
For the case when sbi->segs_per_sec > 1, take section:segment = 5 for
example, if segment 1 is just used and allocate new segment 2, and the
blocks of segment 1 is invalidated, at this time, the previous code will
use __set_test_and_free to free the free_secmap and free_sections++,
this is not correct since it is still a current section, so fix it.
Signed-off-by: Yunlong Song <[email protected]>
---
fs/f2fs/segment.h | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index b5bd328..5049551 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -448,6 +448,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
+ if (IS_CURSEC(sbi, secno))
+ goto skip_free;
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
@@ -455,6 +457,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
free_i->free_sections++;
}
}
+skip_free:
spin_unlock(&free_i->segmap_lock);
}
--
1.8.5.2
In lfs mode, it is better to submit and wait for discard of the
new_blkaddr's overall section, rather than punch it which makes
more small discards and is not friendly with flash alignment. And
f2fs does not have to wait discard of each new_blkaddr except for the
start_block of each section with this patch.
Signed-off-by: Yunlong Song <[email protected]>
---
fs/f2fs/segment.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
fs/f2fs/segment.h | 7 ++++-
2 files changed, 75 insertions(+), 8 deletions(-)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f6c20e0..bce321a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -893,7 +893,19 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
static void f2fs_submit_discard_endio(struct bio *bio)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(dc->bdev->bd_super);
+ if (test_opt(sbi, LFS)) {
+ unsigned int segno = GET_SEGNO(sbi, dc->lstart);
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+ int cnt = (dc->len >> sbi->log_blocks_per_seg) /
+ sbi->segs_per_sec;
+
+ while (cnt--) {
+ set_bit(secno, FREE_I(sbi)->discard_secmap);
+ secno++;
+ }
+ }
dc->error = blk_status_to_errno(bio->bi_status);
dc->state = D_DONE;
complete_all(&dc->wait);
@@ -1349,8 +1361,15 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
NULL, blkaddr);
if (dc) {
- if (dc->state == D_PREP) {
+ if (dc->state == D_PREP && !test_opt(sbi, LFS))
__punch_discard_cmd(sbi, dc, blkaddr);
+ else if (dc->state == D_PREP && test_opt(sbi, LFS)) {
+ struct discard_policy dpolicy;
+
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
+ __submit_discard_cmd(sbi, &dpolicy, dc);
+ dc->ref++;
+ need_wait = true;
} else {
dc->ref++;
need_wait = true;
@@ -2071,9 +2090,10 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
unsigned int left_start = hint;
- bool init = true;
+ bool init = true, check_discard = test_opt(sbi, LFS) ? true : false;
int go_left = 0;
int i;
+ unsigned long *free_secmap;
spin_lock(&free_i->segmap_lock);
@@ -2084,11 +2104,25 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
goto got_it;
}
find_other_zone:
- secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
+ if (check_discard) {
+ int entries = f2fs_bitmap_size(MAIN_SECS(sbi)) / sizeof(unsigned long);
+
+ free_secmap = free_i->tmp_secmap;
+ for (i = 0; i < entries; i++)
+ free_secmap[i] = (!(free_i->free_secmap[i] ^
+ free_i->discard_secmap[i])) | free_i->free_secmap[i];
+ } else
+ free_secmap = free_i->free_secmap;
+
+ secno = find_next_zero_bit(free_secmap, MAIN_SECS(sbi), hint);
if (secno >= MAIN_SECS(sbi)) {
if (dir == ALLOC_RIGHT) {
- secno = find_next_zero_bit(free_i->free_secmap,
+ secno = find_next_zero_bit(free_secmap,
MAIN_SECS(sbi), 0);
+ if (secno >= MAIN_SECS(sbi) && check_discard) {
+ check_discard = false;
+ goto find_other_zone;
+ }
f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
} else {
go_left = 1;
@@ -2098,13 +2132,17 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
if (go_left == 0)
goto skip_left;
- while (test_bit(left_start, free_i->free_secmap)) {
+ while (test_bit(left_start, free_secmap)) {
if (left_start > 0) {
left_start--;
continue;
}
- left_start = find_next_zero_bit(free_i->free_secmap,
+ left_start = find_next_zero_bit(free_secmap,
MAIN_SECS(sbi), 0);
+ if (left_start >= MAIN_SECS(sbi) && check_discard) {
+ check_discard = false;
+ goto find_other_zone;
+ }
f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
break;
}
@@ -2719,7 +2757,18 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
- f2fs_wait_discard_bio(sbi, *new_blkaddr);
+ if (test_opt(sbi, LFS)) {
+ unsigned int start_segno, secno;
+
+ secno = GET_SEC_FROM_SEG(sbi, curseg->segno);
+ start_segno = secno * sbi->segs_per_sec;
+ if (*new_blkaddr == START_BLOCK(sbi, start_segno) &&
+ !test_bit(secno, FREE_I(sbi)->discard_secmap))
+ f2fs_wait_discard_bio(sbi, *new_blkaddr);
+ f2fs_bug_on(sbi, !test_bit(secno, FREE_I(sbi)->discard_secmap));
+ }
+ else
+ f2fs_wait_discard_bio(sbi, *new_blkaddr);
/*
* __add_sum_entry should be resided under the curseg_mutex
@@ -3648,10 +3697,21 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
if (!free_i->free_secmap)
return -ENOMEM;
+ free_i->discard_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
+ if (!free_i->discard_secmap)
+ return -ENOMEM;
+
+ free_i->tmp_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
+ if (!free_i->tmp_secmap)
+ return -ENOMEM;
+
/* set all segments as dirty temporarily */
memset(free_i->free_segmap, 0xff, bitmap_size);
memset(free_i->free_secmap, 0xff, sec_bitmap_size);
+ /* set all sections as discarded temporarily */
+ memset(free_i->discard_secmap, 0xff, sec_bitmap_size);
+
/* init free segmap information */
free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
free_i->free_segments = 0;
@@ -4047,6 +4107,8 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi)
SM_I(sbi)->free_info = NULL;
kvfree(free_i->free_segmap);
kvfree(free_i->free_secmap);
+ kvfree(free_i->discard_secmap);
+ kvfree(free_i->tmp_secmap);
kfree(free_i);
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5049551..b37a909 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -259,6 +259,8 @@ struct free_segmap_info {
spinlock_t segmap_lock; /* free segmap lock */
unsigned long *free_segmap; /* free segment bitmap */
unsigned long *free_secmap; /* free section bitmap */
+ unsigned long *discard_secmap; /* discard section bitmap */
+ unsigned long *tmp_secmap; /* bitmap for temporal use */
};
/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */
@@ -453,8 +455,11 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
- if (test_and_clear_bit(secno, free_i->free_secmap))
+ if (test_and_clear_bit(secno, free_i->free_secmap)) {
free_i->free_sections++;
+ if (test_opt(sbi, LFS))
+ clear_bit(secno, free_i->discard_secmap);
+ }
}
}
skip_free:
--
1.8.5.2
On 2018/7/12 23:09, Yunlong Song wrote:
> For the case when sbi->segs_per_sec > 1, take section:segment = 5 for
> example, if segment 1 is just used and allocate new segment 2, and the
> blocks of segment 1 is invalidated, at this time, the previous code will
> use __set_test_and_free to free the free_secmap and free_sections++,
> this is not correct since it is still a current section, so fix it.
>
> Signed-off-by: Yunlong Song <[email protected]>
Reviewed-by: Chao Yu <[email protected]>
Thanks,
On 2018/7/12 23:09, Yunlong Song wrote:
> For the case when sbi->segs_per_sec > 1, take section:segment = 5 for
> example, if the section prefree_map is ...previous section | current
> section (1 1 0 1 1) | next section..., then the start = x, end = x + 1,
> after start = start_segno + sbi->segs_per_sec, start = x + 5, then it
> will skip x + 3 and x + 4, but their bitmap is still set, which will
> cause duplicated f2fs_issue_discard of this same section in the next
> write_checkpoint, so fix it.
I didn't get it, if # 2 segment is not prefree state, so it still has valid
blocks there, so we won't issue discard due to below condition, right?
if (!IS_CURSEC(sbi, secno) &&
!get_valid_blocks(sbi, start, true))
Thanks,
>
> Signed-off-by: Yunlong Song <[email protected]>
> ---
> fs/f2fs/segment.c | 19 +++++++++++++++++--
> 1 file changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 47b6595..fd38b61 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1684,8 +1684,23 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
> start = start_segno + sbi->segs_per_sec;
> if (start < end)
> goto next;
> - else
> - end = start - 1;
> + else {
> + start_segno = start;
> +
> + while (1) {
> + start = find_next_bit(prefree_map, start_segno,
> + end + 1);
> + if (start >= start_segno)
> + break;
> + end = find_next_zero_bit(prefree_map, start_segno,
> + start + 1);
> + for (i = start; i < end; i++)
> + clear_bit(i, prefree_map);
> + dirty_i->nr_dirty[PRE] -= end - start;
> + }
> +
> + end = start_segno - 1;
> + }
> }
> mutex_unlock(&dirty_i->seglist_lock);
>
>
On 2018/7/12 23:09, Yunlong Song wrote:
> Expand the blk_finish_plug action from blkzoned to normal lfs mode,
> since plug will cause the out-of-order IO submission, which is not
> friendly to flash in lfs mode.
>
> Signed-off-by: Yunlong Song <[email protected]>
Reviewed-by: Chao Yu <[email protected]>
Thanks,
On 2018/7/12 23:09, Yunlong Song wrote:
> In lfs mode, it is better to send the discard of the overall section
> each time to avoid missing alignment with flash.
Hmm.. I think LFS mode can be used widely on different kind of device instead of
just on zoned block device, so let's just keep old implementation here.
Thanks,
>
> Signed-off-by: Yunlong Song <[email protected]>
> ---
> fs/f2fs/segment.c | 3 ++-
> fs/f2fs/sysfs.c | 4 ++++
> 2 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index fd38b61..f6c20e0 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1766,7 +1766,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
> atomic_set(&dcc->issing_discard, 0);
> atomic_set(&dcc->discard_cmd_cnt, 0);
> dcc->nr_discards = 0;
> - dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
> + dcc->max_discards = test_opt(sbi, LFS) ? 0 :
> + MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
> dcc->undiscard_blks = 0;
> dcc->root = RB_ROOT;
> dcc->rbtree_check = false;
> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> index 2e7e611..4b6c457 100644
> --- a/fs/f2fs/sysfs.c
> +++ b/fs/f2fs/sysfs.c
> @@ -271,6 +271,10 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
> return count;
> }
>
> + if (!strcmp(a->attr.name, "max_small_discards") &&
> + test_opt(sbi, LFS))
> + return -EINVAL;
> +
> *ui = t;
>
> if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
>
On 2018/7/12 23:09, Yunlong Song wrote:
> In lfs mode, it is better to submit and wait for discard of the
> new_blkaddr's overall section, rather than punch it which makes
> more small discards and is not friendly with flash alignment. And
> f2fs does not have to wait discard of each new_blkaddr except for the
> start_block of each section with this patch.
For non-zoned block device, unaligned discard can be allowed; and if synchronous
discard is very slow, it will block block allocator here, rather than that, I
prefer just punch 4k lba of discard entry for performance.
If you don't want to encounter this condition, I suggest issue large size
discard more quickly.
Thanks,
>
> Signed-off-by: Yunlong Song <[email protected]>
> ---
> fs/f2fs/segment.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
> fs/f2fs/segment.h | 7 ++++-
> 2 files changed, 75 insertions(+), 8 deletions(-)
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index f6c20e0..bce321a 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -893,7 +893,19 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
> static void f2fs_submit_discard_endio(struct bio *bio)
> {
> struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
> + struct f2fs_sb_info *sbi = F2FS_SB(dc->bdev->bd_super);
>
> + if (test_opt(sbi, LFS)) {
> + unsigned int segno = GET_SEGNO(sbi, dc->lstart);
> + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
> + int cnt = (dc->len >> sbi->log_blocks_per_seg) /
> + sbi->segs_per_sec;
> +
> + while (cnt--) {
> + set_bit(secno, FREE_I(sbi)->discard_secmap);
> + secno++;
> + }
> + }
> dc->error = blk_status_to_errno(bio->bi_status);
> dc->state = D_DONE;
> complete_all(&dc->wait);
> @@ -1349,8 +1361,15 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
> dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
> NULL, blkaddr);
> if (dc) {
> - if (dc->state == D_PREP) {
> + if (dc->state == D_PREP && !test_opt(sbi, LFS))
> __punch_discard_cmd(sbi, dc, blkaddr);
> + else if (dc->state == D_PREP && test_opt(sbi, LFS)) {
> + struct discard_policy dpolicy;
> +
> + __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
> + __submit_discard_cmd(sbi, &dpolicy, dc);
> + dc->ref++;
> + need_wait = true;
> } else {
> dc->ref++;
> need_wait = true;
> @@ -2071,9 +2090,10 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
> unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
> unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
> unsigned int left_start = hint;
> - bool init = true;
> + bool init = true, check_discard = test_opt(sbi, LFS) ? true : false;
> int go_left = 0;
> int i;
> + unsigned long *free_secmap;
>
> spin_lock(&free_i->segmap_lock);
>
> @@ -2084,11 +2104,25 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
> goto got_it;
> }
> find_other_zone:
> - secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
> + if (check_discard) {
> + int entries = f2fs_bitmap_size(MAIN_SECS(sbi)) / sizeof(unsigned long);
> +
> + free_secmap = free_i->tmp_secmap;
> + for (i = 0; i < entries; i++)
> + free_secmap[i] = (!(free_i->free_secmap[i] ^
> + free_i->discard_secmap[i])) | free_i->free_secmap[i];
> + } else
> + free_secmap = free_i->free_secmap;
> +
> + secno = find_next_zero_bit(free_secmap, MAIN_SECS(sbi), hint);
> if (secno >= MAIN_SECS(sbi)) {
> if (dir == ALLOC_RIGHT) {
> - secno = find_next_zero_bit(free_i->free_secmap,
> + secno = find_next_zero_bit(free_secmap,
> MAIN_SECS(sbi), 0);
> + if (secno >= MAIN_SECS(sbi) && check_discard) {
> + check_discard = false;
> + goto find_other_zone;
> + }
> f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
> } else {
> go_left = 1;
> @@ -2098,13 +2132,17 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
> if (go_left == 0)
> goto skip_left;
>
> - while (test_bit(left_start, free_i->free_secmap)) {
> + while (test_bit(left_start, free_secmap)) {
> if (left_start > 0) {
> left_start--;
> continue;
> }
> - left_start = find_next_zero_bit(free_i->free_secmap,
> + left_start = find_next_zero_bit(free_secmap,
> MAIN_SECS(sbi), 0);
> + if (left_start >= MAIN_SECS(sbi) && check_discard) {
> + check_discard = false;
> + goto find_other_zone;
> + }
> f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
> break;
> }
> @@ -2719,7 +2757,18 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>
> *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
>
> - f2fs_wait_discard_bio(sbi, *new_blkaddr);
> + if (test_opt(sbi, LFS)) {
> + unsigned int start_segno, secno;
> +
> + secno = GET_SEC_FROM_SEG(sbi, curseg->segno);
> + start_segno = secno * sbi->segs_per_sec;
> + if (*new_blkaddr == START_BLOCK(sbi, start_segno) &&
> + !test_bit(secno, FREE_I(sbi)->discard_secmap))
> + f2fs_wait_discard_bio(sbi, *new_blkaddr);
> + f2fs_bug_on(sbi, !test_bit(secno, FREE_I(sbi)->discard_secmap));
> + }
> + else
> + f2fs_wait_discard_bio(sbi, *new_blkaddr);
>
> /*
> * __add_sum_entry should be resided under the curseg_mutex
> @@ -3648,10 +3697,21 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
> if (!free_i->free_secmap)
> return -ENOMEM;
>
> + free_i->discard_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
> + if (!free_i->discard_secmap)
> + return -ENOMEM;
> +
> + free_i->tmp_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
> + if (!free_i->tmp_secmap)
> + return -ENOMEM;
> +
> /* set all segments as dirty temporarily */
> memset(free_i->free_segmap, 0xff, bitmap_size);
> memset(free_i->free_secmap, 0xff, sec_bitmap_size);
>
> + /* set all sections as discarded temporarily */
> + memset(free_i->discard_secmap, 0xff, sec_bitmap_size);
> +
> /* init free segmap information */
> free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
> free_i->free_segments = 0;
> @@ -4047,6 +4107,8 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi)
> SM_I(sbi)->free_info = NULL;
> kvfree(free_i->free_segmap);
> kvfree(free_i->free_secmap);
> + kvfree(free_i->discard_secmap);
> + kvfree(free_i->tmp_secmap);
> kfree(free_i);
> }
>
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 5049551..b37a909 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -259,6 +259,8 @@ struct free_segmap_info {
> spinlock_t segmap_lock; /* free segmap lock */
> unsigned long *free_segmap; /* free segment bitmap */
> unsigned long *free_secmap; /* free section bitmap */
> + unsigned long *discard_secmap; /* discard section bitmap */
> + unsigned long *tmp_secmap; /* bitmap for temporal use */
> };
>
> /* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */
> @@ -453,8 +455,11 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
> next = find_next_bit(free_i->free_segmap,
> start_segno + sbi->segs_per_sec, start_segno);
> if (next >= start_segno + sbi->segs_per_sec) {
> - if (test_and_clear_bit(secno, free_i->free_secmap))
> + if (test_and_clear_bit(secno, free_i->free_secmap)) {
> free_i->free_sections++;
> + if (test_opt(sbi, LFS))
> + clear_bit(secno, free_i->discard_secmap);
> + }
> }
> }
> skip_free:
>
round 1: section bitmap : 1 1 1 1 1, all valid, prefree_map: 0 0 0 0 0
then rm data block NO.2, block NO.2 becomes invalid, prefree_map: 0 0 1 0 0
write_checkpoint: section bitmap: 1 1 0 1 1, prefree_map: 0 0 0 0 0,
prefree of NO.2 is cleared, and no discard issued
round2: rm data block NO.0, NO.1, NO.3, NO.4
all invalid, but prefree bit of NO.2 is set and cleared in round1, then
prefree_map: 1 1 0 1 1
write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1, no
valid blocks of this section, so discard issued
but this time prefree bit of NO.3 and NO.4 is skipped...
round3:
write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1 - >
0 0 0 0 0, no valid blocks of this section, so discard issued
this time prefree bit of NO.3 and NO.4 is cleared, but the discard of
this section is sent again...
On 2018/7/13 11:13, Chao Yu wrote:
> On 2018/7/12 23:09, Yunlong Song wrote:
>> For the case when sbi->segs_per_sec > 1, take section:segment = 5 for
>> example, if the section prefree_map is ...previous section | current
>> section (1 1 0 1 1) | next section..., then the start = x, end = x + 1,
>> after start = start_segno + sbi->segs_per_sec, start = x + 5, then it
>> will skip x + 3 and x + 4, but their bitmap is still set, which will
>> cause duplicated f2fs_issue_discard of this same section in the next
>> write_checkpoint, so fix it.
> I didn't get it, if # 2 segment is not prefree state, so it still has valid
> blocks there, so we won't issue discard due to below condition, right?
>
> if (!IS_CURSEC(sbi, secno) &&
> !get_valid_blocks(sbi, start, true))
>
> Thanks,
>
>> Signed-off-by: Yunlong Song <[email protected]>
>> ---
>> fs/f2fs/segment.c | 19 +++++++++++++++++--
>> 1 file changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 47b6595..fd38b61 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -1684,8 +1684,23 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
>> start = start_segno + sbi->segs_per_sec;
>> if (start < end)
>> goto next;
>> - else
>> - end = start - 1;
>> + else {
>> + start_segno = start;
>> +
>> + while (1) {
>> + start = find_next_bit(prefree_map, start_segno,
>> + end + 1);
>> + if (start >= start_segno)
>> + break;
>> + end = find_next_zero_bit(prefree_map, start_segno,
>> + start + 1);
>> + for (i = start; i < end; i++)
>> + clear_bit(i, prefree_map);
>> + dirty_i->nr_dirty[PRE] -= end - start;
>> + }
>> +
>> + end = start_segno - 1;
>> + }
>> }
>> mutex_unlock(&dirty_i->seglist_lock);
>>
>>
>
> .
>
--
Thanks,
Yunlong Song
How about change test_opt(sbi, LFS) to f2fs_sb_has_blkzoned(sbi->sb) in
this patch, we apply
this patch to zoned block device?
On 2018/7/13 11:17, Chao Yu wrote:
> On 2018/7/12 23:09, Yunlong Song wrote:
>> In lfs mode, it is better to send the discard of the overall section
>> each time to avoid missing alignment with flash.
> Hmm.. I think LFS mode can be used widely on different kind of device instead of
> just on zoned block device, so let's just keep old implementation here.
>
> Thanks,
>
>> Signed-off-by: Yunlong Song <[email protected]>
>> ---
>> fs/f2fs/segment.c | 3 ++-
>> fs/f2fs/sysfs.c | 4 ++++
>> 2 files changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index fd38b61..f6c20e0 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -1766,7 +1766,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>> atomic_set(&dcc->issing_discard, 0);
>> atomic_set(&dcc->discard_cmd_cnt, 0);
>> dcc->nr_discards = 0;
>> - dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
>> + dcc->max_discards = test_opt(sbi, LFS) ? 0 :
>> + MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
>> dcc->undiscard_blks = 0;
>> dcc->root = RB_ROOT;
>> dcc->rbtree_check = false;
>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>> index 2e7e611..4b6c457 100644
>> --- a/fs/f2fs/sysfs.c
>> +++ b/fs/f2fs/sysfs.c
>> @@ -271,6 +271,10 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
>> return count;
>> }
>>
>> + if (!strcmp(a->attr.name, "max_small_discards") &&
>> + test_opt(sbi, LFS))
>> + return -EINVAL;
>> +
>> *ui = t;
>>
>> if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
>>
>
> .
>
--
Thanks,
Yunlong Song
On 2018/7/13 11:28, Yunlong Song wrote:
> round 1: section bitmap : 1 1 1 1 1, all valid, prefree_map: 0 0 0 0 0
> then rm data block NO.2, block NO.2 becomes invalid, prefree_map: 0 0 1 0 0
> write_checkpoint: section bitmap: 1 1 0 1 1, prefree_map: 0 0 0 0 0,
> prefree of NO.2 is cleared, and no discard issued
>
> round2: rm data block NO.0, NO.1, NO.3, NO.4
> all invalid, but prefree bit of NO.2 is set and cleared in round1, then
> prefree_map: 1 1 0 1 1
> write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1, no
Why prefree_map is not 0 0 0 0 0?
Thanks,
> valid blocks of this section, so discard issued
> but this time prefree bit of NO.3 and NO.4 is skipped...
>
> round3:
> write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1 - >
> 0 0 0 0 0, no valid blocks of this section, so discard issued
> this time prefree bit of NO.3 and NO.4 is cleared, but the discard of
> this section is sent again...
>
> On 2018/7/13 11:13, Chao Yu wrote:
>> On 2018/7/12 23:09, Yunlong Song wrote:
>>> For the case when sbi->segs_per_sec > 1, take section:segment = 5 for
>>> example, if the section prefree_map is ...previous section | current
>>> section (1 1 0 1 1) | next section..., then the start = x, end = x + 1,
>>> after start = start_segno + sbi->segs_per_sec, start = x + 5, then it
>>> will skip x + 3 and x + 4, but their bitmap is still set, which will
>>> cause duplicated f2fs_issue_discard of this same section in the next
>>> write_checkpoint, so fix it.
>> I didn't get it, if # 2 segment is not prefree state, so it still has valid
>> blocks there, so we won't issue discard due to below condition, right?
>>
>> if (!IS_CURSEC(sbi, secno) &&
>> !get_valid_blocks(sbi, start, true))
>>
>> Thanks,
>>
>>> Signed-off-by: Yunlong Song <[email protected]>
>>> ---
>>> fs/f2fs/segment.c | 19 +++++++++++++++++--
>>> 1 file changed, 17 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 47b6595..fd38b61 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -1684,8 +1684,23 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
>>> start = start_segno + sbi->segs_per_sec;
>>> if (start < end)
>>> goto next;
>>> - else
>>> - end = start - 1;
>>> + else {
>>> + start_segno = start;
>>> +
>>> + while (1) {
>>> + start = find_next_bit(prefree_map, start_segno,
>>> + end + 1);
>>> + if (start >= start_segno)
>>> + break;
>>> + end = find_next_zero_bit(prefree_map, start_segno,
>>> + start + 1);
>>> + for (i = start; i < end; i++)
>>> + clear_bit(i, prefree_map);
>>> + dirty_i->nr_dirty[PRE] -= end - start;
>>> + }
>>> +
>>> + end = start_segno - 1;
>>> + }
>>> }
>>> mutex_unlock(&dirty_i->seglist_lock);
>>>
>>>
>>
>> .
>>
>
How about change test_opt(sbi, LFS) to f2fs_sb_has_blkzoned(sbi->sb) in
this patch, which can
avoid punch discard (creating small discard) in zoned block device.
On 2018/7/13 11:26, Chao Yu wrote:
> On 2018/7/12 23:09, Yunlong Song wrote:
>> In lfs mode, it is better to submit and wait for discard of the
>> new_blkaddr's overall section, rather than punch it which makes
>> more small discards and is not friendly with flash alignment. And
>> f2fs does not have to wait discard of each new_blkaddr except for the
>> start_block of each section with this patch.
> For non-zoned block device, unaligned discard can be allowed; and if synchronous
> discard is very slow, it will block block allocator here, rather than that, I
> prefer just punch 4k lba of discard entry for performance.
>
> If you don't want to encounter this condition, I suggest issue large size
> discard more quickly.
>
> Thanks,
>
>> Signed-off-by: Yunlong Song <[email protected]>
>> ---
>> fs/f2fs/segment.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
>> fs/f2fs/segment.h | 7 ++++-
>> 2 files changed, 75 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index f6c20e0..bce321a 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -893,7 +893,19 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
>> static void f2fs_submit_discard_endio(struct bio *bio)
>> {
>> struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
>> + struct f2fs_sb_info *sbi = F2FS_SB(dc->bdev->bd_super);
>>
>> + if (test_opt(sbi, LFS)) {
>> + unsigned int segno = GET_SEGNO(sbi, dc->lstart);
>> + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
>> + int cnt = (dc->len >> sbi->log_blocks_per_seg) /
>> + sbi->segs_per_sec;
>> +
>> + while (cnt--) {
>> + set_bit(secno, FREE_I(sbi)->discard_secmap);
>> + secno++;
>> + }
>> + }
>> dc->error = blk_status_to_errno(bio->bi_status);
>> dc->state = D_DONE;
>> complete_all(&dc->wait);
>> @@ -1349,8 +1361,15 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
>> dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
>> NULL, blkaddr);
>> if (dc) {
>> - if (dc->state == D_PREP) {
>> + if (dc->state == D_PREP && !test_opt(sbi, LFS))
>> __punch_discard_cmd(sbi, dc, blkaddr);
>> + else if (dc->state == D_PREP && test_opt(sbi, LFS)) {
>> + struct discard_policy dpolicy;
>> +
>> + __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
>> + __submit_discard_cmd(sbi, &dpolicy, dc);
>> + dc->ref++;
>> + need_wait = true;
>> } else {
>> dc->ref++;
>> need_wait = true;
>> @@ -2071,9 +2090,10 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
>> unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
>> unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
>> unsigned int left_start = hint;
>> - bool init = true;
>> + bool init = true, check_discard = test_opt(sbi, LFS) ? true : false;
>> int go_left = 0;
>> int i;
>> + unsigned long *free_secmap;
>>
>> spin_lock(&free_i->segmap_lock);
>>
>> @@ -2084,11 +2104,25 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
>> goto got_it;
>> }
>> find_other_zone:
>> - secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
>> + if (check_discard) {
>> + int entries = f2fs_bitmap_size(MAIN_SECS(sbi)) / sizeof(unsigned long);
>> +
>> + free_secmap = free_i->tmp_secmap;
>> + for (i = 0; i < entries; i++)
>> + free_secmap[i] = (!(free_i->free_secmap[i] ^
>> + free_i->discard_secmap[i])) | free_i->free_secmap[i];
>> + } else
>> + free_secmap = free_i->free_secmap;
>> +
>> + secno = find_next_zero_bit(free_secmap, MAIN_SECS(sbi), hint);
>> if (secno >= MAIN_SECS(sbi)) {
>> if (dir == ALLOC_RIGHT) {
>> - secno = find_next_zero_bit(free_i->free_secmap,
>> + secno = find_next_zero_bit(free_secmap,
>> MAIN_SECS(sbi), 0);
>> + if (secno >= MAIN_SECS(sbi) && check_discard) {
>> + check_discard = false;
>> + goto find_other_zone;
>> + }
>> f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
>> } else {
>> go_left = 1;
>> @@ -2098,13 +2132,17 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
>> if (go_left == 0)
>> goto skip_left;
>>
>> - while (test_bit(left_start, free_i->free_secmap)) {
>> + while (test_bit(left_start, free_secmap)) {
>> if (left_start > 0) {
>> left_start--;
>> continue;
>> }
>> - left_start = find_next_zero_bit(free_i->free_secmap,
>> + left_start = find_next_zero_bit(free_secmap,
>> MAIN_SECS(sbi), 0);
>> + if (left_start >= MAIN_SECS(sbi) && check_discard) {
>> + check_discard = false;
>> + goto find_other_zone;
>> + }
>> f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
>> break;
>> }
>> @@ -2719,7 +2757,18 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>
>> *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
>>
>> - f2fs_wait_discard_bio(sbi, *new_blkaddr);
>> + if (test_opt(sbi, LFS)) {
>> + unsigned int start_segno, secno;
>> +
>> + secno = GET_SEC_FROM_SEG(sbi, curseg->segno);
>> + start_segno = secno * sbi->segs_per_sec;
>> + if (*new_blkaddr == START_BLOCK(sbi, start_segno) &&
>> + !test_bit(secno, FREE_I(sbi)->discard_secmap))
>> + f2fs_wait_discard_bio(sbi, *new_blkaddr);
>> + f2fs_bug_on(sbi, !test_bit(secno, FREE_I(sbi)->discard_secmap));
>> + }
>> + else
>> + f2fs_wait_discard_bio(sbi, *new_blkaddr);
>>
>> /*
>> * __add_sum_entry should be resided under the curseg_mutex
>> @@ -3648,10 +3697,21 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
>> if (!free_i->free_secmap)
>> return -ENOMEM;
>>
>> + free_i->discard_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
>> + if (!free_i->discard_secmap)
>> + return -ENOMEM;
>> +
>> + free_i->tmp_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
>> + if (!free_i->tmp_secmap)
>> + return -ENOMEM;
>> +
>> /* set all segments as dirty temporarily */
>> memset(free_i->free_segmap, 0xff, bitmap_size);
>> memset(free_i->free_secmap, 0xff, sec_bitmap_size);
>>
>> + /* set all sections as discarded temporarily */
>> + memset(free_i->discard_secmap, 0xff, sec_bitmap_size);
>> +
>> /* init free segmap information */
>> free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
>> free_i->free_segments = 0;
>> @@ -4047,6 +4107,8 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi)
>> SM_I(sbi)->free_info = NULL;
>> kvfree(free_i->free_segmap);
>> kvfree(free_i->free_secmap);
>> + kvfree(free_i->discard_secmap);
>> + kvfree(free_i->tmp_secmap);
>> kfree(free_i);
>> }
>>
>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>> index 5049551..b37a909 100644
>> --- a/fs/f2fs/segment.h
>> +++ b/fs/f2fs/segment.h
>> @@ -259,6 +259,8 @@ struct free_segmap_info {
>> spinlock_t segmap_lock; /* free segmap lock */
>> unsigned long *free_segmap; /* free segment bitmap */
>> unsigned long *free_secmap; /* free section bitmap */
>> + unsigned long *discard_secmap; /* discard section bitmap */
>> + unsigned long *tmp_secmap; /* bitmap for temporal use */
>> };
>>
>> /* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */
>> @@ -453,8 +455,11 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
>> next = find_next_bit(free_i->free_segmap,
>> start_segno + sbi->segs_per_sec, start_segno);
>> if (next >= start_segno + sbi->segs_per_sec) {
>> - if (test_and_clear_bit(secno, free_i->free_secmap))
>> + if (test_and_clear_bit(secno, free_i->free_secmap)) {
>> free_i->free_sections++;
>> + if (test_opt(sbi, LFS))
>> + clear_bit(secno, free_i->discard_secmap);
>> + }
>> }
>> }
>> skip_free:
>>
>
> .
>
--
Thanks,
Yunlong Song
On 2018/7/13 11:39, Yunlong Song wrote:
> How about change test_opt(sbi, LFS) to f2fs_sb_has_blkzoned(sbi->sb) in
> this patch, we apply
> this patch to zoned block device?
IIRC, we will use blkdev_reset_zones instead of __queue_discard_cmd for such
kind of device, so we will not encounter the issue you described.
Thanks,
>
> On 2018/7/13 11:17, Chao Yu wrote:
>> On 2018/7/12 23:09, Yunlong Song wrote:
>>> In lfs mode, it is better to send the discard of the overall section
>>> each time to avoid missing alignment with flash.
>> Hmm.. I think LFS mode can be used widely on different kind of device instead of
>> just on zoned block device, so let's just keep old implementation here.
>>
>> Thanks,
>>
>>> Signed-off-by: Yunlong Song <[email protected]>
>>> ---
>>> fs/f2fs/segment.c | 3 ++-
>>> fs/f2fs/sysfs.c | 4 ++++
>>> 2 files changed, 6 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index fd38b61..f6c20e0 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -1766,7 +1766,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
>>> atomic_set(&dcc->issing_discard, 0);
>>> atomic_set(&dcc->discard_cmd_cnt, 0);
>>> dcc->nr_discards = 0;
>>> - dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
>>> + dcc->max_discards = test_opt(sbi, LFS) ? 0 :
>>> + MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
>>> dcc->undiscard_blks = 0;
>>> dcc->root = RB_ROOT;
>>> dcc->rbtree_check = false;
>>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
>>> index 2e7e611..4b6c457 100644
>>> --- a/fs/f2fs/sysfs.c
>>> +++ b/fs/f2fs/sysfs.c
>>> @@ -271,6 +271,10 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
>>> return count;
>>> }
>>>
>>> + if (!strcmp(a->attr.name, "max_small_discards") &&
>>> + test_opt(sbi, LFS))
>>> + return -EINVAL;
>>> +
>>> *ui = t;
>>>
>>> if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
>>>
>>
>> .
>>
>
Because in f2fs_clear_prefree_segments, the codes:
...
while (1) {
int i;
start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
if (start >= MAIN_SEGS(sbi))
break;
end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
start + 1);
for (i = start; i < end; i++)
clear_bit(i, prefree_map);
...
next:
secno = GET_SEC_FROM_SEG(sbi, start);
start_segno = GET_SEG_FROM_SEC(sbi, secno);
if (!IS_CURSEC(sbi, secno) &&
!get_valid_blocks(sbi, start, true))
f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
sbi->segs_per_sec << sbi->log_blocks_per_seg);
start = start_segno + sbi->segs_per_sec;
if (start < end)
goto next;
else
end = start - 1;
...
In round 2, for prefree_map: 1 1 0 1 1, start = 0, end = 2, then
start = start_segno + sbi->segs_per_sec makes start = 5
if (start < end) --> start = 5, end = 2
so end = start -1 --> end = 4, then return to while again, this time skips
prefree bit 3 and 4.
On 2018/7/13 11:42, Chao Yu wrote:
> On 2018/7/13 11:28, Yunlong Song wrote:
>> round 1: section bitmap : 1 1 1 1 1, all valid, prefree_map: 0 0 0 0 0
>> then rm data block NO.2, block NO.2 becomes invalid, prefree_map: 0 0 1 0 0
>> write_checkpoint: section bitmap: 1 1 0 1 1, prefree_map: 0 0 0 0 0,
>> prefree of NO.2 is cleared, and no discard issued
>>
>> round2: rm data block NO.0, NO.1, NO.3, NO.4
>> all invalid, but prefree bit of NO.2 is set and cleared in round1, then
>> prefree_map: 1 1 0 1 1
>> write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1, no
> Why prefree_map is not 0 0 0 0 0?
>
> Thanks,
>
>> valid blocks of this section, so discard issued
>> but this time prefree bit of NO.3 and NO.4 is skipped...
>>
>> round3:
>> write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1 - >
>> 0 0 0 0 0, no valid blocks of this section, so discard issued
>> this time prefree bit of NO.3 and NO.4 is cleared, but the discard of
>> this section is sent again...
>>
>> On 2018/7/13 11:13, Chao Yu wrote:
>>> On 2018/7/12 23:09, Yunlong Song wrote:
>>>> For the case when sbi->segs_per_sec > 1, take section:segment = 5 for
>>>> example, if the section prefree_map is ...previous section | current
>>>> section (1 1 0 1 1) | next section..., then the start = x, end = x + 1,
>>>> after start = start_segno + sbi->segs_per_sec, start = x + 5, then it
>>>> will skip x + 3 and x + 4, but their bitmap is still set, which will
>>>> cause duplicated f2fs_issue_discard of this same section in the next
>>>> write_checkpoint, so fix it.
>>> I didn't get it, if # 2 segment is not prefree state, so it still has valid
>>> blocks there, so we won't issue discard due to below condition, right?
>>>
>>> if (!IS_CURSEC(sbi, secno) &&
>>> !get_valid_blocks(sbi, start, true))
>>>
>>> Thanks,
>>>
>>>> Signed-off-by: Yunlong Song <[email protected]>
>>>> ---
>>>> fs/f2fs/segment.c | 19 +++++++++++++++++--
>>>> 1 file changed, 17 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>> index 47b6595..fd38b61 100644
>>>> --- a/fs/f2fs/segment.c
>>>> +++ b/fs/f2fs/segment.c
>>>> @@ -1684,8 +1684,23 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
>>>> start = start_segno + sbi->segs_per_sec;
>>>> if (start < end)
>>>> goto next;
>>>> - else
>>>> - end = start - 1;
>>>> + else {
>>>> + start_segno = start;
>>>> +
>>>> + while (1) {
>>>> + start = find_next_bit(prefree_map, start_segno,
>>>> + end + 1);
>>>> + if (start >= start_segno)
>>>> + break;
>>>> + end = find_next_zero_bit(prefree_map, start_segno,
>>>> + start + 1);
>>>> + for (i = start; i < end; i++)
>>>> + clear_bit(i, prefree_map);
>>>> + dirty_i->nr_dirty[PRE] -= end - start;
>>>> + }
>>>> +
>>>> + end = start_segno - 1;
>>>> + }
>>>> }
>>>> mutex_unlock(&dirty_i->seglist_lock);
>>>>
>>>>
>>> .
>>>
>
> .
>
--
Thanks,
Yunlong Song
On 2018/7/13 11:51, Yunlong Song wrote:
> Because in f2fs_clear_prefree_segments, the codes:
> ...
> while (1) {
> int i;
> start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
> if (start >= MAIN_SEGS(sbi))
> break;
> end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
> start + 1);
>
> for (i = start; i < end; i++)
> clear_bit(i, prefree_map);
> ...
> next:
> secno = GET_SEC_FROM_SEG(sbi, start);
> start_segno = GET_SEG_FROM_SEC(sbi, secno);
> if (!IS_CURSEC(sbi, secno) &&
> !get_valid_blocks(sbi, start, true))
> f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
> sbi->segs_per_sec << sbi->log_blocks_per_seg);
>
> start = start_segno + sbi->segs_per_sec;
> if (start < end)
> goto next;
> else
> end = start - 1;
> ...
> In round 2, for prefree_map: 1 1 0 1 1, start = 0, end = 2, then
>
> start = start_segno + sbi->segs_per_sec makes start = 5
>
> if (start < end) --> start = 5, end = 2
>
> so end = start -1 --> end = 4, then return to while again, this time skips
> prefree bit 3 and 4.
I got it, thanks for the explanation.
If we are in LFS mode & with big section, what about aligning start/end to
section boundary for fstrim or real-time discard operation, and decide to issue
discard only when whole section is invalid and the last segment in section is
freed in current checkpoint? so that we can issue discard aligned to section
size as much as possible and avoid redundant discard.
Thanks,
>
> On 2018/7/13 11:42, Chao Yu wrote:
>> On 2018/7/13 11:28, Yunlong Song wrote:
>>> round 1: section bitmap : 1 1 1 1 1, all valid, prefree_map: 0 0 0 0 0
>>> then rm data block NO.2, block NO.2 becomes invalid, prefree_map: 0 0 1 0 0
>>> write_checkpoint: section bitmap: 1 1 0 1 1, prefree_map: 0 0 0 0 0,
>>> prefree of NO.2 is cleared, and no discard issued
>>>
>>> round2: rm data block NO.0, NO.1, NO.3, NO.4
>>> all invalid, but prefree bit of NO.2 is set and cleared in round1, then
>>> prefree_map: 1 1 0 1 1
>>> write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1, no
>> Why prefree_map is not 0 0 0 0 0?
>>
>> Thanks,
>>
>>> valid blocks of this section, so discard issued
>>> but this time prefree bit of NO.3 and NO.4 is skipped...
>>>
>>> round3:
>>> write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1 - >
>>> 0 0 0 0 0, no valid blocks of this section, so discard issued
>>> this time prefree bit of NO.3 and NO.4 is cleared, but the discard of
>>> this section is sent again...
>>>
>>> On 2018/7/13 11:13, Chao Yu wrote:
>>>> On 2018/7/12 23:09, Yunlong Song wrote:
>>>>> For the case when sbi->segs_per_sec > 1, take section:segment = 5 for
>>>>> example, if the section prefree_map is ...previous section | current
>>>>> section (1 1 0 1 1) | next section..., then the start = x, end = x + 1,
>>>>> after start = start_segno + sbi->segs_per_sec, start = x + 5, then it
>>>>> will skip x + 3 and x + 4, but their bitmap is still set, which will
>>>>> cause duplicated f2fs_issue_discard of this same section in the next
>>>>> write_checkpoint, so fix it.
>>>> I didn't get it, if # 2 segment is not prefree state, so it still has valid
>>>> blocks there, so we won't issue discard due to below condition, right?
>>>>
>>>> if (!IS_CURSEC(sbi, secno) &&
>>>> !get_valid_blocks(sbi, start, true))
>>>>
>>>> Thanks,
>>>>
>>>>> Signed-off-by: Yunlong Song <[email protected]>
>>>>> ---
>>>>> fs/f2fs/segment.c | 19 +++++++++++++++++--
>>>>> 1 file changed, 17 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>> index 47b6595..fd38b61 100644
>>>>> --- a/fs/f2fs/segment.c
>>>>> +++ b/fs/f2fs/segment.c
>>>>> @@ -1684,8 +1684,23 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
>>>>> start = start_segno + sbi->segs_per_sec;
>>>>> if (start < end)
>>>>> goto next;
>>>>> - else
>>>>> - end = start - 1;
>>>>> + else {
>>>>> + start_segno = start;
>>>>> +
>>>>> + while (1) {
>>>>> + start = find_next_bit(prefree_map, start_segno,
>>>>> + end + 1);
>>>>> + if (start >= start_segno)
>>>>> + break;
>>>>> + end = find_next_zero_bit(prefree_map, start_segno,
>>>>> + start + 1);
>>>>> + for (i = start; i < end; i++)
>>>>> + clear_bit(i, prefree_map);
>>>>> + dirty_i->nr_dirty[PRE] -= end - start;
>>>>> + }
>>>>> +
>>>>> + end = start_segno - 1;
>>>>> + }
>>>>> }
>>>>> mutex_unlock(&dirty_i->seglist_lock);
>>>>>
>>>>>
>>>> .
>>>>
>>
>> .
>>
>
On 2018/7/13 11:28, Yunlong Song wrote:
> round 1: section bitmap : 1 1 1 1 1, all valid, prefree_map: 0 0 0 0 0
> then rm data block NO.2, block NO.2 becomes invalid, prefree_map: 0 0 1 0 0
> write_checkpoint: section bitmap: 1 1 0 1 1, prefree_map: 0 0 0 0 0,
> prefree of NO.2 is cleared, and no discard issued
>
> round2: rm data block NO.0, NO.1, NO.3, NO.4
> all invalid, but prefree bit of NO.2 is set and cleared in round1, then
> prefree_map: 1 1 0 1 1
> write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1, no
> valid blocks of this section, so discard issued
> but this time prefree bit of NO.3 and NO.4 is skipped...
due to start = start_segno + sbi->segs_per_sec;
>
> round3:
> write_checkpoint: section bitmap: 0 0 0 0 0, prefree_map: 0 0 0 1 1 - >
> 0 0 0 0 0, no valid blocks of this section, so discard issued
> this time prefree bit of NO.3 and NO.4 is cleared, but the discard of
> this section is sent again...
Could you add this into commit log?
Thanks,
>
> On 2018/7/13 11:13, Chao Yu wrote:
>> On 2018/7/12 23:09, Yunlong Song wrote:
>>> For the case when sbi->segs_per_sec > 1, take section:segment = 5 for
>>> example, if the section prefree_map is ...previous section | current
>>> section (1 1 0 1 1) | next section..., then the start = x, end = x + 1,
>>> after start = start_segno + sbi->segs_per_sec, start = x + 5, then it
>>> will skip x + 3 and x + 4, but their bitmap is still set, which will
>>> cause duplicated f2fs_issue_discard of this same section in the next
>>> write_checkpoint, so fix it.
>> I didn't get it, if # 2 segment is not prefree state, so it still has valid
>> blocks there, so we won't issue discard due to below condition, right?
>>
>> if (!IS_CURSEC(sbi, secno) &&
>> !get_valid_blocks(sbi, start, true))
>>
>> Thanks,
>>
>>> Signed-off-by: Yunlong Song <[email protected]>
>>> ---
>>> fs/f2fs/segment.c | 19 +++++++++++++++++--
>>> 1 file changed, 17 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 47b6595..fd38b61 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -1684,8 +1684,23 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
>>> start = start_segno + sbi->segs_per_sec;
>>> if (start < end)
>>> goto next;
>>> - else
>>> - end = start - 1;
>>> + else {
>>> + start_segno = start;
>>> +
>>> + while (1) {
>>> + start = find_next_bit(prefree_map, start_segno,
>>> + end + 1);
>>> + if (start >= start_segno)
>>> + break;
>>> + end = find_next_zero_bit(prefree_map, start_segno,
>>> + start + 1);
>>> + for (i = start; i < end; i++)
>>> + clear_bit(i, prefree_map);
>>> + dirty_i->nr_dirty[PRE] -= end - start;
>>> + }
>>> +
>>> + end = start_segno - 1;
>>> + }
>>> }
>>> mutex_unlock(&dirty_i->seglist_lock);
>>>
>>>
>>
>> .
>>
>