2024-02-13 17:38:55

by Daeho Jeong

[permalink] [raw]
Subject: [PATCH v3 1/2] f2fs: separate f2fs_gc_range() to use GC for a range

From: Daeho Jeong <[email protected]>

Make f2fs_gc_range() an extenal function to use it for GC for a range.

Signed-off-by: Daeho Jeong <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/gc.c | 49 ++++++++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 8a9cdc5a72c5..a089a938355b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1961,10 +1961,34 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
init_atgc_management(sbi);
}

+static int f2fs_gc_range(struct f2fs_sb_info *sbi,
+ unsigned int start_seg, unsigned int end_seg, bool dry_run)
+{
+ unsigned int segno;
+
+ for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
+ struct gc_inode_list gc_list = {
+ .ilist = LIST_HEAD_INIT(gc_list.ilist),
+ .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
+ };
+
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
+ put_gc_inode(&gc_list);
+
+ if (!dry_run && get_valid_blocks(sbi, segno, true))
+ return -EAGAIN;
+
+ if (fatal_signal_pending(current))
+ return -ERESTARTSYS;
+ }
+
+ return 0;
+}
+
static int free_segment_range(struct f2fs_sb_info *sbi,
- unsigned int secs, bool gc_only)
+ unsigned int secs, bool dry_run)
{
- unsigned int segno, next_inuse, start, end;
+ unsigned int next_inuse, start, end;
struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
int gc_mode, gc_type;
int err = 0;
@@ -1990,25 +2014,8 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
f2fs_allocate_segment_for_resize(sbi, type, start, end);

/* do GC to move out valid blocks in the range */
- for (segno = start; segno <= end; segno += SEGS_PER_SEC(sbi)) {
- struct gc_inode_list gc_list = {
- .ilist = LIST_HEAD_INIT(gc_list.ilist),
- .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
- };
-
- do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
- put_gc_inode(&gc_list);
-
- if (!gc_only && get_valid_blocks(sbi, segno, true)) {
- err = -EAGAIN;
- goto out;
- }
- if (fatal_signal_pending(current)) {
- err = -ERESTARTSYS;
- goto out;
- }
- }
- if (gc_only)
+ err = f2fs_gc_range(sbi, start, end, dry_run);
+ if (err || dry_run)
goto out;

stat_inc_cp_call_count(sbi, TOTAL_CALL);
--
2.43.0.687.g38aa6559b0-goog



2024-02-13 17:39:08

by Daeho Jeong

[permalink] [raw]
Subject: [PATCH v3 2/2] f2fs: support file pinning for zoned devices

From: Daeho Jeong <[email protected]>

Support file pinning with conventional storage area for zoned devices

Signed-off-by: Daeho Jeong <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
v3: check the hole when migrating blocks for swap.
do not use the remainder of cold pin section.
v2: flush previous dirty pages before swapon.
do not re-check for the last extent of swap area.
merge this patch with swap file pinning support patch.
---
fs/f2fs/data.c | 58 ++++++++++++++++++++++++++-------------
fs/f2fs/f2fs.h | 17 +++++++++++-
fs/f2fs/file.c | 24 ++++++++++++-----
fs/f2fs/gc.c | 14 +++++++---
fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------
fs/f2fs/segment.h | 10 +++++++
6 files changed, 154 insertions(+), 38 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 828c797cd47c..0c9aa3082fcf 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
unsigned int blkofs;
unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
unsigned int secidx = start_blk / blk_per_sec;
- unsigned int end_sec = secidx + blkcnt / blk_per_sec;
+ unsigned int end_sec;
int ret = 0;

+ if (!blkcnt)
+ return 0;
+ end_sec = secidx + (blkcnt - 1) / blk_per_sec;
+
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);

set_inode_flag(inode, FI_ALIGNED_WRITE);
set_inode_flag(inode, FI_OPU_WRITE);

- for (; secidx < end_sec; secidx++) {
+ for (; secidx <= end_sec; secidx++) {
+ unsigned int blkofs_end = secidx == end_sec ?
+ (blkcnt - 1) % blk_per_sec : blk_per_sec - 1;
+
f2fs_down_write(&sbi->pin_sem);

- f2fs_lock_op(sbi);
- f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
- f2fs_unlock_op(sbi);
+ ret = f2fs_allocate_pinning_section(sbi);
+ if (ret) {
+ f2fs_up_write(&sbi->pin_sem);
+ break;
+ }

set_inode_flag(inode, FI_SKIP_WRITES);

- for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
+ for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
struct page *page;
unsigned int blkidx = secidx * blk_per_sec + blkofs;

@@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis,
nr_pblocks = map.m_len;

if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
- nr_pblocks & sec_blks_mask) {
+ nr_pblocks & sec_blks_mask ||
+ !f2fs_valid_pinned_area(sbi, pblock)) {
+ bool last_extent = false;
+
not_aligned++;

nr_pblocks = roundup(nr_pblocks, blks_per_sec);
if (cur_lblock + nr_pblocks > sis->max)
nr_pblocks -= blks_per_sec;

+ /* this extent is last one */
if (!nr_pblocks) {
- /* this extent is last one */
- nr_pblocks = map.m_len;
- f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
- goto next;
+ nr_pblocks = last_lblock - cur_lblock;
+ last_extent = true;
}

ret = f2fs_migrate_blocks(inode, cur_lblock,
nr_pblocks);
- if (ret)
+ if (ret) {
+ if (ret == -ENOENT)
+ ret = -EINVAL;
goto out;
- goto retry;
+ }
+
+ if (!last_extent)
+ goto retry;
}
-next:
+
if (cur_lblock + nr_pblocks >= sis->max)
nr_pblocks = sis->max - cur_lblock;

@@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
struct inode *inode = file_inode(file);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;

if (!S_ISREG(inode->i_mode))
return -EINVAL;

- if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+ if (f2fs_readonly(sbi->sb))
return -EROFS;

- if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
- f2fs_err(F2FS_I_SB(inode),
- "Swapfile not supported in LFS mode");
+ if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_err(sbi, "Swapfile not supported in LFS mode");
return -EINVAL;
}

@@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,

f2fs_precache_extents(inode);

+ ret = filemap_fdatawrite(inode->i_mapping);
+ if (ret < 0)
+ return ret;
+
ret = check_swap_activate(sis, file, span);
if (ret < 0)
return ret;

stat_inc_swapfile_inode(inode);
set_inode_flag(inode, FI_PIN_FILE);
- f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ f2fs_update_time(sbi, REQ_TIME);
return ret;
}

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 40eb590ed646..351133a11518 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
unsigned int *newseg, bool new_sec, int dir);
void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end);
-void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
+int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
+int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
@@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
+int f2fs_gc_range(struct f2fs_sb_info *sbi,
+ unsigned int start_seg, unsigned int end_seg,
+ bool dry_run, unsigned int dry_run_sections);
int f2fs_resize_fs(struct file *filp, __u64 block_count);
int __init f2fs_create_garbage_collection_cache(void);
void f2fs_destroy_garbage_collection_cache(void);
@@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
}

+static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ int devi = f2fs_target_device_index(sbi, blkaddr);
+
+ return !bdev_is_zoned(FDEV(devi).bdev);
+ }
+ return true;
+}
+
static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
{
return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 2c13b340c8a0..21c3aa93a8db 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,

f2fs_down_write(&sbi->pin_sem);

- f2fs_lock_op(sbi);
- f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
- f2fs_unlock_op(sbi);
+ err = f2fs_allocate_pinning_section(sbi);
+ if (err) {
+ f2fs_up_write(&sbi->pin_sem);
+ goto out_err;
+ }

map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
@@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
__u32 pin;
int ret = 0;

@@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
if (!S_ISREG(inode->i_mode))
return -EINVAL;

- if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+ if (f2fs_readonly(sbi->sb))
return -EROFS;

ret = mnt_want_write_file(filp);
@@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
clear_inode_flag(inode, FI_PIN_FILE);
f2fs_i_gc_failures_write(inode, 0);
goto done;
+ } else if (f2fs_is_pinned_file(inode)) {
+ goto done;
}

- if (f2fs_should_update_outplace(inode, NULL)) {
+ if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
+ ret = -EFBIG;
+ goto out;
+ }
+
+ /* Let's allow file pinning on zoned device. */
+ if (!f2fs_sb_has_blkzoned(sbi) &&
+ f2fs_should_update_outplace(inode, NULL)) {
ret = -EINVAL;
goto out;
}
@@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
set_inode_flag(inode, FI_PIN_FILE);
ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
done:
- f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ f2fs_update_time(sbi, REQ_TIME);
out:
inode_unlock(inode);
mnt_drop_write_file(filp);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index a089a938355b..3ff126316d42 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
init_atgc_management(sbi);
}

-static int f2fs_gc_range(struct f2fs_sb_info *sbi,
- unsigned int start_seg, unsigned int end_seg, bool dry_run)
+int f2fs_gc_range(struct f2fs_sb_info *sbi,
+ unsigned int start_seg, unsigned int end_seg,
+ bool dry_run, unsigned int dry_run_sections)
{
unsigned int segno;
+ unsigned int gc_secs = dry_run_sections;

for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
struct gc_inode_list gc_list = {
@@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};

- do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC,
+ dry_run_sections == 0);
put_gc_inode(&gc_list);

if (!dry_run && get_valid_blocks(sbi, segno, true))
return -EAGAIN;
+ if (dry_run && dry_run_sections &&
+ !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
+ break;

if (fatal_signal_pending(current))
return -ERESTARTSYS;
@@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
f2fs_allocate_segment_for_resize(sbi, type, start, end);

/* do GC to move out valid blocks in the range */
- err = f2fs_gc_range(sbi, start, end, dry_run);
+ err = f2fs_gc_range(sbi, start, end, dry_run, 0);
if (err || dry_run)
goto out;

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4e985750c938..0b72c8536ccf 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
* This function should be returned with success, otherwise BUG
*/
static void get_new_segment(struct f2fs_sb_info *sbi,
- unsigned int *newseg, bool new_sec)
+ unsigned int *newseg, bool new_sec, bool pinning)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int segno, secno, zoneno;
@@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
goto got_it;
}
+
+ /*
+ * If we format f2fs on zoned storage, let's try to get pinned sections
+ * from beginning of the storage, which should be a conventional one.
+ */
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
+ hint = GET_SEC_FROM_SEG(sbi, segno);
+ }
+
find_other_zone:
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
if (secno >= MAIN_SECS(sbi)) {
@@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
* Allocate a current working segment.
* This function always allocates a free segment in LFS manner.
*/
-static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
+static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno = curseg->segno;
+ bool pinning = type == CURSEG_COLD_DATA_PINNED;

if (curseg->inited)
write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
+
segno = __get_next_segno(sbi, type);
- get_new_segment(sbi, &segno, new_sec);
+ get_new_segment(sbi, &segno, new_sec, pinning);
+ if (new_sec && pinning &&
+ !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
+ __set_free(sbi, segno);
+ return -EAGAIN;
+ }
+
curseg->next_segno = segno;
reset_curseg(sbi, type, 1);
curseg->alloc_type = LFS;
if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
curseg->fragment_remained_chunk =
get_random_u32_inclusive(1, sbi->max_fragment_chunk);
+ return 0;
}

static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
f2fs_up_read(&SM_I(sbi)->curseg_lock);
}

-static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
+static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
bool new_sec, bool force)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
!curseg->next_blkoff &&
!get_valid_blocks(sbi, curseg->segno, new_sec) &&
!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
- return;
+ return 0;

old_segno = curseg->segno;
- new_curseg(sbi, type, true);
+ if (new_curseg(sbi, type, true))
+ return -EAGAIN;
stat_inc_seg_type(sbi, curseg);
locate_dirty_segment(sbi, old_segno);
+ return 0;
}

-void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
+int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
{
+ int ret;
+
f2fs_down_read(&SM_I(sbi)->curseg_lock);
down_write(&SIT_I(sbi)->sentry_lock);
- __allocate_new_segment(sbi, type, true, force);
+ ret = __allocate_new_segment(sbi, type, true, force);
up_write(&SIT_I(sbi)->sentry_lock);
f2fs_up_read(&SM_I(sbi)->curseg_lock);
+
+ return ret;
+}
+
+int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
+{
+ int err;
+ bool gc_required = true;
+
+retry:
+ f2fs_lock_op(sbi);
+ err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
+ f2fs_unlock_op(sbi);
+
+ if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) {
+ f2fs_down_write(&sbi->gc_lock);
+ f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
+ f2fs_up_write(&sbi->gc_lock);
+
+ gc_required = false;
+ goto retry;
+ }
+
+ return err;
}

void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
@@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
* new segment.
*/
if (segment_full) {
+ if (type == CURSEG_COLD_DATA_PINNED &&
+ !((curseg->segno + 1) % sbi->segs_per_sec))
+ goto skip_new_segment;
+
if (from_gc) {
get_atssr_segment(sbi, type, se->type,
AT_SSR, se->mtime);
@@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
stat_inc_seg_type(sbi, curseg);
}
}
+
+skip_new_segment:
/*
* segment dirty status should be updated after segment allocation,
* so we just need to update status only one time after previous
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 60d93a16f2ac..953af072915f 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
dcc->discard_wake = true;
wake_up_interruptible_all(&dcc->discard_wait_queue);
}
+
+static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
+{
+ int devi;
+
+ for (devi = 0; devi < sbi->s_ndevs; devi++)
+ if (bdev_is_zoned(FDEV(devi).bdev))
+ return GET_SEGNO(sbi, FDEV(devi).start_blk);
+ return 0;
+}
--
2.43.0.687.g38aa6559b0-goog


2024-02-20 08:37:46

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v3 1/2] f2fs: separate f2fs_gc_range() to use GC for a range

On 2024/2/14 1:38, Daeho Jeong wrote:
> From: Daeho Jeong <[email protected]>
>
> Make f2fs_gc_range() an extenal function to use it for GC for a range.
>
> Signed-off-by: Daeho Jeong <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2024-02-21 18:10:49

by patchwork-bot+f2fs

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v3 1/2] f2fs: separate f2fs_gc_range() to use GC for a range

Hello:

This series was applied to jaegeuk/f2fs.git (dev)
by Jaegeuk Kim <[email protected]>:

On Tue, 13 Feb 2024 09:38:11 -0800 you wrote:
> From: Daeho Jeong <[email protected]>
>
> Make f2fs_gc_range() an extenal function to use it for GC for a range.
>
> Signed-off-by: Daeho Jeong <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>
>
> [...]

Here is the summary with links:
- [f2fs-dev,v3,1/2] f2fs: separate f2fs_gc_range() to use GC for a range
https://git.kernel.org/jaegeuk/f2fs/c/50581e3bda8f
- [f2fs-dev,v3,2/2] f2fs: support file pinning for zoned devices
https://git.kernel.org/jaegeuk/f2fs/c/2fded30d8dce

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



2024-02-23 03:23:40

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v3 2/2] f2fs: support file pinning for zoned devices

Hi Daeho,

On 2024/2/14 1:38, Daeho Jeong wrote:
> From: Daeho Jeong <[email protected]>
>
> Support file pinning with conventional storage area for zoned devices
>
> Signed-off-by: Daeho Jeong <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> v3: check the hole when migrating blocks for swap.
> do not use the remainder of cold pin section.
> v2: flush previous dirty pages before swapon.
> do not re-check for the last extent of swap area.
> merge this patch with swap file pinning support patch.
> ---
> fs/f2fs/data.c | 58 ++++++++++++++++++++++++++-------------
> fs/f2fs/f2fs.h | 17 +++++++++++-
> fs/f2fs/file.c | 24 ++++++++++++-----
> fs/f2fs/gc.c | 14 +++++++---
> fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------
> fs/f2fs/segment.h | 10 +++++++
> 6 files changed, 154 insertions(+), 38 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 828c797cd47c..0c9aa3082fcf 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
> unsigned int blkofs;
> unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
> unsigned int secidx = start_blk / blk_per_sec;
> - unsigned int end_sec = secidx + blkcnt / blk_per_sec;
> + unsigned int end_sec;
> int ret = 0;
>
> + if (!blkcnt)
> + return 0;
> + end_sec = secidx + (blkcnt - 1) / blk_per_sec;
> +
> f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> filemap_invalidate_lock(inode->i_mapping);
>
> set_inode_flag(inode, FI_ALIGNED_WRITE);
> set_inode_flag(inode, FI_OPU_WRITE);
>
> - for (; secidx < end_sec; secidx++) {
> + for (; secidx <= end_sec; secidx++) {
> + unsigned int blkofs_end = secidx == end_sec ?
> + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1;

(start_blk + blkcnt - 1) % blk_per_sec ?

> +
> f2fs_down_write(&sbi->pin_sem);
>
> - f2fs_lock_op(sbi);
> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> - f2fs_unlock_op(sbi);
> + ret = f2fs_allocate_pinning_section(sbi);
> + if (ret) {
> + f2fs_up_write(&sbi->pin_sem);
> + break;
> + }
>
> set_inode_flag(inode, FI_SKIP_WRITES);
>
> - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
> + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
> struct page *page;
> unsigned int blkidx = secidx * blk_per_sec + blkofs;
>
> @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis,
> nr_pblocks = map.m_len;
>
> if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
> - nr_pblocks & sec_blks_mask) {
> + nr_pblocks & sec_blks_mask ||
> + !f2fs_valid_pinned_area(sbi, pblock)) {
> + bool last_extent = false;
> +
> not_aligned++;
>
> nr_pblocks = roundup(nr_pblocks, blks_per_sec);
> if (cur_lblock + nr_pblocks > sis->max)
> nr_pblocks -= blks_per_sec;
>
> + /* this extent is last one */
> if (!nr_pblocks) {
> - /* this extent is last one */
> - nr_pblocks = map.m_len;
> - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
> - goto next;
> + nr_pblocks = last_lblock - cur_lblock;
> + last_extent = true;
> }
>
> ret = f2fs_migrate_blocks(inode, cur_lblock,
> nr_pblocks);
> - if (ret)
> + if (ret) {
> + if (ret == -ENOENT)
> + ret = -EINVAL;
> goto out;
> - goto retry;
> + }
> +
> + if (!last_extent)
> + goto retry;
> }
> -next:
> +
> if (cur_lblock + nr_pblocks >= sis->max)
> nr_pblocks = sis->max - cur_lblock;
>
> @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> sector_t *span)
> {
> struct inode *inode = file_inode(file);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> int ret;
>
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
>
> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> + if (f2fs_readonly(sbi->sb))
> return -EROFS;
>
> - if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
> - f2fs_err(F2FS_I_SB(inode),
> - "Swapfile not supported in LFS mode");
> + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
> + f2fs_err(sbi, "Swapfile not supported in LFS mode");
> return -EINVAL;
> }
>
> @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
>
> f2fs_precache_extents(inode);
>
> + ret = filemap_fdatawrite(inode->i_mapping);
> + if (ret < 0)
> + return ret;

What do you think of exchanging position of f2fs_precache_extents()
and filemap_fdatawrite()? so that f2fs_precache_extents() can load
extent info after physical addresses of all data are fixed.

Thanks,

> +
> ret = check_swap_activate(sis, file, span);
> if (ret < 0)
> return ret;
>
> stat_inc_swapfile_inode(inode);
> set_inode_flag(inode, FI_PIN_FILE);
> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> + f2fs_update_time(sbi, REQ_TIME);
> return ret;
> }
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 40eb590ed646..351133a11518 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
> unsigned int *newseg, bool new_sec, int dir);
> void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> unsigned int start, unsigned int end);
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
> int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
> bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
> @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
> block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
> int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
> void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> + unsigned int start_seg, unsigned int end_seg,
> + bool dry_run, unsigned int dry_run_sections);
> int f2fs_resize_fs(struct file *filp, __u64 block_count);
> int __init f2fs_create_garbage_collection_cache(void);
> void f2fs_destroy_garbage_collection_cache(void);
> @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
> return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
> }
>
> +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
> + block_t blkaddr)
> +{
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + int devi = f2fs_target_device_index(sbi, blkaddr);
> +
> + return !bdev_is_zoned(FDEV(devi).bdev);
> + }
> + return true;
> +}
> +
> static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
> {
> return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 2c13b340c8a0..21c3aa93a8db 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
>
> f2fs_down_write(&sbi->pin_sem);
>
> - f2fs_lock_op(sbi);
> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> - f2fs_unlock_op(sbi);
> + err = f2fs_allocate_pinning_section(sbi);
> + if (err) {
> + f2fs_up_write(&sbi->pin_sem);
> + goto out_err;
> + }
>
> map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
> @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
> static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> {
> struct inode *inode = file_inode(filp);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> __u32 pin;
> int ret = 0;
>
> @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
>
> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> + if (f2fs_readonly(sbi->sb))
> return -EROFS;
>
> ret = mnt_want_write_file(filp);
> @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> clear_inode_flag(inode, FI_PIN_FILE);
> f2fs_i_gc_failures_write(inode, 0);
> goto done;
> + } else if (f2fs_is_pinned_file(inode)) {
> + goto done;
> }
>
> - if (f2fs_should_update_outplace(inode, NULL)) {
> + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
> + ret = -EFBIG;
> + goto out;
> + }
> +
> + /* Let's allow file pinning on zoned device. */
> + if (!f2fs_sb_has_blkzoned(sbi) &&
> + f2fs_should_update_outplace(inode, NULL)) {
> ret = -EINVAL;
> goto out;
> }
> @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> set_inode_flag(inode, FI_PIN_FILE);
> ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
> done:
> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> + f2fs_update_time(sbi, REQ_TIME);
> out:
> inode_unlock(inode);
> mnt_drop_write_file(filp);
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index a089a938355b..3ff126316d42 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
> init_atgc_management(sbi);
> }
>
> -static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> - unsigned int start_seg, unsigned int end_seg, bool dry_run)
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> + unsigned int start_seg, unsigned int end_seg,
> + bool dry_run, unsigned int dry_run_sections)
> {
> unsigned int segno;
> + unsigned int gc_secs = dry_run_sections;
>
> for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
> struct gc_inode_list gc_list = {
> @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
> };
>
> - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
> + do_garbage_collect(sbi, segno, &gc_list, FG_GC,
> + dry_run_sections == 0);
> put_gc_inode(&gc_list);
>
> if (!dry_run && get_valid_blocks(sbi, segno, true))
> return -EAGAIN;
> + if (dry_run && dry_run_sections &&
> + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
> + break;
>
> if (fatal_signal_pending(current))
> return -ERESTARTSYS;
> @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
> f2fs_allocate_segment_for_resize(sbi, type, start, end);
>
> /* do GC to move out valid blocks in the range */
> - err = f2fs_gc_range(sbi, start, end, dry_run);
> + err = f2fs_gc_range(sbi, start, end, dry_run, 0);
> if (err || dry_run)
> goto out;
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4e985750c938..0b72c8536ccf 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
> * This function should be returned with success, otherwise BUG
> */
> static void get_new_segment(struct f2fs_sb_info *sbi,
> - unsigned int *newseg, bool new_sec)
> + unsigned int *newseg, bool new_sec, bool pinning)
> {
> struct free_segmap_info *free_i = FREE_I(sbi);
> unsigned int segno, secno, zoneno;
> @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
> if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
> goto got_it;
> }
> +
> + /*
> + * If we format f2fs on zoned storage, let's try to get pinned sections
> + * from beginning of the storage, which should be a conventional one.
> + */
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
> + hint = GET_SEC_FROM_SEG(sbi, segno);
> + }
> +
> find_other_zone:
> secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
> if (secno >= MAIN_SECS(sbi)) {
> @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> * Allocate a current working segment.
> * This function always allocates a free segment in LFS manner.
> */
> -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> unsigned int segno = curseg->segno;
> + bool pinning = type == CURSEG_COLD_DATA_PINNED;
>
> if (curseg->inited)
> write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
> +
> segno = __get_next_segno(sbi, type);
> - get_new_segment(sbi, &segno, new_sec);
> + get_new_segment(sbi, &segno, new_sec, pinning);
> + if (new_sec && pinning &&
> + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
> + __set_free(sbi, segno);
> + return -EAGAIN;
> + }
> +
> curseg->next_segno = segno;
> reset_curseg(sbi, type, 1);
> curseg->alloc_type = LFS;
> if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
> curseg->fragment_remained_chunk =
> get_random_u32_inclusive(1, sbi->max_fragment_chunk);
> + return 0;
> }
>
> static int __next_free_blkoff(struct f2fs_sb_info *sbi,
> @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> f2fs_up_read(&SM_I(sbi)->curseg_lock);
> }
>
> -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> bool new_sec, bool force)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> !curseg->next_blkoff &&
> !get_valid_blocks(sbi, curseg->segno, new_sec) &&
> !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
> - return;
> + return 0;
>
> old_segno = curseg->segno;
> - new_curseg(sbi, type, true);
> + if (new_curseg(sbi, type, true))
> + return -EAGAIN;
> stat_inc_seg_type(sbi, curseg);
> locate_dirty_segment(sbi, old_segno);
> + return 0;
> }
>
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> {
> + int ret;
> +
> f2fs_down_read(&SM_I(sbi)->curseg_lock);
> down_write(&SIT_I(sbi)->sentry_lock);
> - __allocate_new_segment(sbi, type, true, force);
> + ret = __allocate_new_segment(sbi, type, true, force);
> up_write(&SIT_I(sbi)->sentry_lock);
> f2fs_up_read(&SM_I(sbi)->curseg_lock);
> +
> + return ret;
> +}
> +
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
> +{
> + int err;
> + bool gc_required = true;
> +
> +retry:
> + f2fs_lock_op(sbi);
> + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> + f2fs_unlock_op(sbi);
> +
> + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) {
> + f2fs_down_write(&sbi->gc_lock);
> + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
> + f2fs_up_write(&sbi->gc_lock);
> +
> + gc_required = false;
> + goto retry;
> + }
> +
> + return err;
> }
>
> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
> @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> * new segment.
> */
> if (segment_full) {
> + if (type == CURSEG_COLD_DATA_PINNED &&
> + !((curseg->segno + 1) % sbi->segs_per_sec))
> + goto skip_new_segment;
> +
> if (from_gc) {
> get_atssr_segment(sbi, type, se->type,
> AT_SSR, se->mtime);
> @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> stat_inc_seg_type(sbi, curseg);
> }
> }
> +
> +skip_new_segment:
> /*
> * segment dirty status should be updated after segment allocation,
> * so we just need to update status only one time after previous
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 60d93a16f2ac..953af072915f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
> dcc->discard_wake = true;
> wake_up_interruptible_all(&dcc->discard_wait_queue);
> }
> +
> +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> +{
> + int devi;
> +
> + for (devi = 0; devi < sbi->s_ndevs; devi++)
> + if (bdev_is_zoned(FDEV(devi).bdev))
> + return GET_SEGNO(sbi, FDEV(devi).start_blk);
> + return 0;
> +}

2024-02-23 03:43:53

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v3 2/2] f2fs: support file pinning for zoned devices

On 2024/2/14 1:38, Daeho Jeong wrote:
> From: Daeho Jeong <[email protected]>
>
> Support file pinning with conventional storage area for zoned devices
>
> Signed-off-by: Daeho Jeong <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> v3: check the hole when migrating blocks for swap.
> do not use the remainder of cold pin section.
> v2: flush previous dirty pages before swapon.
> do not re-check for the last extent of swap area.
> merge this patch with swap file pinning support patch.
> ---
> fs/f2fs/data.c | 58 ++++++++++++++++++++++++++-------------
> fs/f2fs/f2fs.h | 17 +++++++++++-
> fs/f2fs/file.c | 24 ++++++++++++-----
> fs/f2fs/gc.c | 14 +++++++---
> fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------
> fs/f2fs/segment.h | 10 +++++++
> 6 files changed, 154 insertions(+), 38 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 828c797cd47c..0c9aa3082fcf 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
> unsigned int blkofs;
> unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
> unsigned int secidx = start_blk / blk_per_sec;
> - unsigned int end_sec = secidx + blkcnt / blk_per_sec;
> + unsigned int end_sec;
> int ret = 0;
>
> + if (!blkcnt)
> + return 0;
> + end_sec = secidx + (blkcnt - 1) / blk_per_sec;
> +
> f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> filemap_invalidate_lock(inode->i_mapping);
>
> set_inode_flag(inode, FI_ALIGNED_WRITE);
> set_inode_flag(inode, FI_OPU_WRITE);
>
> - for (; secidx < end_sec; secidx++) {
> + for (; secidx <= end_sec; secidx++) {
> + unsigned int blkofs_end = secidx == end_sec ?
> + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1;
> +
> f2fs_down_write(&sbi->pin_sem);
>
> - f2fs_lock_op(sbi);
> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> - f2fs_unlock_op(sbi);
> + ret = f2fs_allocate_pinning_section(sbi);
> + if (ret) {
> + f2fs_up_write(&sbi->pin_sem);
> + break;
> + }
>
> set_inode_flag(inode, FI_SKIP_WRITES);
>
> - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
> + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
> struct page *page;
> unsigned int blkidx = secidx * blk_per_sec + blkofs;
>
> @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis,
> nr_pblocks = map.m_len;
>
> if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
> - nr_pblocks & sec_blks_mask) {
> + nr_pblocks & sec_blks_mask ||
> + !f2fs_valid_pinned_area(sbi, pblock)) {
> + bool last_extent = false;
> +
> not_aligned++;
>
> nr_pblocks = roundup(nr_pblocks, blks_per_sec);
> if (cur_lblock + nr_pblocks > sis->max)
> nr_pblocks -= blks_per_sec;
>
> + /* this extent is last one */
> if (!nr_pblocks) {
> - /* this extent is last one */
> - nr_pblocks = map.m_len;
> - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
> - goto next;
> + nr_pblocks = last_lblock - cur_lblock;
> + last_extent = true;
> }
>
> ret = f2fs_migrate_blocks(inode, cur_lblock,
> nr_pblocks);
> - if (ret)
> + if (ret) {
> + if (ret == -ENOENT)
> + ret = -EINVAL;
> goto out;
> - goto retry;
> + }
> +
> + if (!last_extent)
> + goto retry;
> }
> -next:
> +
> if (cur_lblock + nr_pblocks >= sis->max)
> nr_pblocks = sis->max - cur_lblock;
>
> @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> sector_t *span)
> {
> struct inode *inode = file_inode(file);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> int ret;
>
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
>
> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> + if (f2fs_readonly(sbi->sb))
> return -EROFS;
>
> - if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
> - f2fs_err(F2FS_I_SB(inode),
> - "Swapfile not supported in LFS mode");
> + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
> + f2fs_err(sbi, "Swapfile not supported in LFS mode");
> return -EINVAL;
> }
>
> @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
>
> f2fs_precache_extents(inode);
>
> + ret = filemap_fdatawrite(inode->i_mapping);
> + if (ret < 0)
> + return ret;
> +
> ret = check_swap_activate(sis, file, span);
> if (ret < 0)
> return ret;
>
> stat_inc_swapfile_inode(inode);
> set_inode_flag(inode, FI_PIN_FILE);
> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> + f2fs_update_time(sbi, REQ_TIME);
> return ret;
> }
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 40eb590ed646..351133a11518 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
> unsigned int *newseg, bool new_sec, int dir);
> void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> unsigned int start, unsigned int end);
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
> int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
> bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
> @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
> block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
> int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
> void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> + unsigned int start_seg, unsigned int end_seg,
> + bool dry_run, unsigned int dry_run_sections);
> int f2fs_resize_fs(struct file *filp, __u64 block_count);
> int __init f2fs_create_garbage_collection_cache(void);
> void f2fs_destroy_garbage_collection_cache(void);
> @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
> return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
> }
>
> +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
> + block_t blkaddr)
> +{
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + int devi = f2fs_target_device_index(sbi, blkaddr);
> +
> + return !bdev_is_zoned(FDEV(devi).bdev);
> + }
> + return true;
> +}
> +
> static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
> {
> return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 2c13b340c8a0..21c3aa93a8db 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
>
> f2fs_down_write(&sbi->pin_sem);
>
> - f2fs_lock_op(sbi);
> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> - f2fs_unlock_op(sbi);
> + err = f2fs_allocate_pinning_section(sbi);
> + if (err) {
> + f2fs_up_write(&sbi->pin_sem);
> + goto out_err;
> + }
>
> map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
> @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
> static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> {
> struct inode *inode = file_inode(filp);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> __u32 pin;
> int ret = 0;
>
> @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
>
> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> + if (f2fs_readonly(sbi->sb))
> return -EROFS;
>
> ret = mnt_want_write_file(filp);
> @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> clear_inode_flag(inode, FI_PIN_FILE);
> f2fs_i_gc_failures_write(inode, 0);
> goto done;
> + } else if (f2fs_is_pinned_file(inode)) {
> + goto done;
> }
>
> - if (f2fs_should_update_outplace(inode, NULL)) {
> + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
> + ret = -EFBIG;
> + goto out;
> + }
> +
> + /* Let's allow file pinning on zoned device. */
> + if (!f2fs_sb_has_blkzoned(sbi) &&
> + f2fs_should_update_outplace(inode, NULL)) {
> ret = -EINVAL;
> goto out;
> }
> @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> set_inode_flag(inode, FI_PIN_FILE);
> ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
> done:
> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> + f2fs_update_time(sbi, REQ_TIME);
> out:
> inode_unlock(inode);
> mnt_drop_write_file(filp);
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index a089a938355b..3ff126316d42 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
> init_atgc_management(sbi);
> }
>
> -static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> - unsigned int start_seg, unsigned int end_seg, bool dry_run)
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> + unsigned int start_seg, unsigned int end_seg,
> + bool dry_run, unsigned int dry_run_sections)
> {
> unsigned int segno;
> + unsigned int gc_secs = dry_run_sections;
>
> for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
> struct gc_inode_list gc_list = {
> @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
> };
>
> - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
> + do_garbage_collect(sbi, segno, &gc_list, FG_GC,
> + dry_run_sections == 0);
> put_gc_inode(&gc_list);
>
> if (!dry_run && get_valid_blocks(sbi, segno, true))
> return -EAGAIN;
> + if (dry_run && dry_run_sections &&
> + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
> + break;
>
> if (fatal_signal_pending(current))
> return -ERESTARTSYS;
> @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
> f2fs_allocate_segment_for_resize(sbi, type, start, end);
>
> /* do GC to move out valid blocks in the range */
> - err = f2fs_gc_range(sbi, start, end, dry_run);
> + err = f2fs_gc_range(sbi, start, end, dry_run, 0);
> if (err || dry_run)
> goto out;
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4e985750c938..0b72c8536ccf 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
> * This function should be returned with success, otherwise BUG
> */
> static void get_new_segment(struct f2fs_sb_info *sbi,
> - unsigned int *newseg, bool new_sec)
> + unsigned int *newseg, bool new_sec, bool pinning)
> {
> struct free_segmap_info *free_i = FREE_I(sbi);
> unsigned int segno, secno, zoneno;
> @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
> if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
> goto got_it;
> }
> +
> + /*
> + * If we format f2fs on zoned storage, let's try to get pinned sections
> + * from beginning of the storage, which should be a conventional one.
> + */
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
> + hint = GET_SEC_FROM_SEG(sbi, segno);
> + }
> +
> find_other_zone:
> secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
> if (secno >= MAIN_SECS(sbi)) {
> @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> * Allocate a current working segment.
> * This function always allocates a free segment in LFS manner.
> */
> -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> unsigned int segno = curseg->segno;
> + bool pinning = type == CURSEG_COLD_DATA_PINNED;
>
> if (curseg->inited)
> write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
> +
> segno = __get_next_segno(sbi, type);

If type is CURSEG_COLD_DATA_PINNED, can we let __get_next_segno() return 0?
then we can allocate free segment from conventional zone in priority.

Thanks,

> - get_new_segment(sbi, &segno, new_sec);
> + get_new_segment(sbi, &segno, new_sec, pinning);
> + if (new_sec && pinning &&
> + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
> + __set_free(sbi, segno);
> + return -EAGAIN;
> + }
> +
> curseg->next_segno = segno;
> reset_curseg(sbi, type, 1);
> curseg->alloc_type = LFS;
> if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
> curseg->fragment_remained_chunk =
> get_random_u32_inclusive(1, sbi->max_fragment_chunk);
> + return 0;
> }
>
> static int __next_free_blkoff(struct f2fs_sb_info *sbi,
> @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> f2fs_up_read(&SM_I(sbi)->curseg_lock);
> }
>
> -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> bool new_sec, bool force)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> !curseg->next_blkoff &&
> !get_valid_blocks(sbi, curseg->segno, new_sec) &&
> !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
> - return;
> + return 0;
>
> old_segno = curseg->segno;
> - new_curseg(sbi, type, true);
> + if (new_curseg(sbi, type, true))
> + return -EAGAIN;
> stat_inc_seg_type(sbi, curseg);
> locate_dirty_segment(sbi, old_segno);
> + return 0;
> }
>
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> {
> + int ret;
> +
> f2fs_down_read(&SM_I(sbi)->curseg_lock);
> down_write(&SIT_I(sbi)->sentry_lock);
> - __allocate_new_segment(sbi, type, true, force);
> + ret = __allocate_new_segment(sbi, type, true, force);
> up_write(&SIT_I(sbi)->sentry_lock);
> f2fs_up_read(&SM_I(sbi)->curseg_lock);
> +
> + return ret;
> +}
> +
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
> +{
> + int err;
> + bool gc_required = true;
> +
> +retry:
> + f2fs_lock_op(sbi);
> + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> + f2fs_unlock_op(sbi);
> +
> + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) {
> + f2fs_down_write(&sbi->gc_lock);
> + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
> + f2fs_up_write(&sbi->gc_lock);
> +
> + gc_required = false;
> + goto retry;
> + }
> +
> + return err;
> }
>
> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
> @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> * new segment.
> */
> if (segment_full) {
> + if (type == CURSEG_COLD_DATA_PINNED &&
> + !((curseg->segno + 1) % sbi->segs_per_sec))
> + goto skip_new_segment;
> +
> if (from_gc) {
> get_atssr_segment(sbi, type, se->type,
> AT_SSR, se->mtime);
> @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> stat_inc_seg_type(sbi, curseg);
> }
> }
> +
> +skip_new_segment:
> /*
> * segment dirty status should be updated after segment allocation,
> * so we just need to update status only one time after previous
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 60d93a16f2ac..953af072915f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
> dcc->discard_wake = true;
> wake_up_interruptible_all(&dcc->discard_wait_queue);
> }
> +
> +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> +{
> + int devi;
> +
> + for (devi = 0; devi < sbi->s_ndevs; devi++)
> + if (bdev_is_zoned(FDEV(devi).bdev))
> + return GET_SEGNO(sbi, FDEV(devi).start_blk);
> + return 0;
> +}

2024-02-23 03:53:33

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v3 2/2] f2fs: support file pinning for zoned devices

On 2024/2/14 1:38, Daeho Jeong wrote:
> From: Daeho Jeong <[email protected]>
>
> Support file pinning with conventional storage area for zoned devices
>
> Signed-off-by: Daeho Jeong <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> v3: check the hole when migrating blocks for swap.
> do not use the remainder of cold pin section.
> v2: flush previous dirty pages before swapon.
> do not re-check for the last extent of swap area.
> merge this patch with swap file pinning support patch.
> ---
> fs/f2fs/data.c | 58 ++++++++++++++++++++++++++-------------
> fs/f2fs/f2fs.h | 17 +++++++++++-
> fs/f2fs/file.c | 24 ++++++++++++-----
> fs/f2fs/gc.c | 14 +++++++---
> fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------
> fs/f2fs/segment.h | 10 +++++++
> 6 files changed, 154 insertions(+), 38 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 828c797cd47c..0c9aa3082fcf 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
> unsigned int blkofs;
> unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
> unsigned int secidx = start_blk / blk_per_sec;
> - unsigned int end_sec = secidx + blkcnt / blk_per_sec;
> + unsigned int end_sec;
> int ret = 0;
>
> + if (!blkcnt)
> + return 0;
> + end_sec = secidx + (blkcnt - 1) / blk_per_sec;
> +
> f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> filemap_invalidate_lock(inode->i_mapping);
>
> set_inode_flag(inode, FI_ALIGNED_WRITE);
> set_inode_flag(inode, FI_OPU_WRITE);
>
> - for (; secidx < end_sec; secidx++) {
> + for (; secidx <= end_sec; secidx++) {
> + unsigned int blkofs_end = secidx == end_sec ?
> + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1;
> +
> f2fs_down_write(&sbi->pin_sem);
>
> - f2fs_lock_op(sbi);
> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> - f2fs_unlock_op(sbi);
> + ret = f2fs_allocate_pinning_section(sbi);
> + if (ret) {
> + f2fs_up_write(&sbi->pin_sem);
> + break;
> + }
>
> set_inode_flag(inode, FI_SKIP_WRITES);
>
> - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
> + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
> struct page *page;
> unsigned int blkidx = secidx * blk_per_sec + blkofs;
>
> @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis,
> nr_pblocks = map.m_len;
>
> if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
> - nr_pblocks & sec_blks_mask) {
> + nr_pblocks & sec_blks_mask ||
> + !f2fs_valid_pinned_area(sbi, pblock)) {
> + bool last_extent = false;
> +
> not_aligned++;
>
> nr_pblocks = roundup(nr_pblocks, blks_per_sec);
> if (cur_lblock + nr_pblocks > sis->max)
> nr_pblocks -= blks_per_sec;
>
> + /* this extent is last one */
> if (!nr_pblocks) {
> - /* this extent is last one */
> - nr_pblocks = map.m_len;
> - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
> - goto next;
> + nr_pblocks = last_lblock - cur_lblock;
> + last_extent = true;
> }
>
> ret = f2fs_migrate_blocks(inode, cur_lblock,
> nr_pblocks);
> - if (ret)
> + if (ret) {
> + if (ret == -ENOENT)
> + ret = -EINVAL;
> goto out;
> - goto retry;
> + }
> +
> + if (!last_extent)
> + goto retry;
> }
> -next:
> +
> if (cur_lblock + nr_pblocks >= sis->max)
> nr_pblocks = sis->max - cur_lblock;
>
> @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> sector_t *span)
> {
> struct inode *inode = file_inode(file);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> int ret;
>
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
>
> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> + if (f2fs_readonly(sbi->sb))
> return -EROFS;
>
> - if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
> - f2fs_err(F2FS_I_SB(inode),
> - "Swapfile not supported in LFS mode");
> + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
> + f2fs_err(sbi, "Swapfile not supported in LFS mode");
> return -EINVAL;
> }
>
> @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
>
> f2fs_precache_extents(inode);
>
> + ret = filemap_fdatawrite(inode->i_mapping);
> + if (ret < 0)
> + return ret;
> +
> ret = check_swap_activate(sis, file, span);
> if (ret < 0)
> return ret;
>
> stat_inc_swapfile_inode(inode);
> set_inode_flag(inode, FI_PIN_FILE);
> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> + f2fs_update_time(sbi, REQ_TIME);
> return ret;
> }
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 40eb590ed646..351133a11518 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
> unsigned int *newseg, bool new_sec, int dir);
> void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> unsigned int start, unsigned int end);
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
> int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
> bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
> @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
> block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
> int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
> void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> + unsigned int start_seg, unsigned int end_seg,
> + bool dry_run, unsigned int dry_run_sections);
> int f2fs_resize_fs(struct file *filp, __u64 block_count);
> int __init f2fs_create_garbage_collection_cache(void);
> void f2fs_destroy_garbage_collection_cache(void);
> @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
> return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
> }
>
> +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
> + block_t blkaddr)
> +{
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + int devi = f2fs_target_device_index(sbi, blkaddr);
> +
> + return !bdev_is_zoned(FDEV(devi).bdev);
> + }
> + return true;
> +}
> +
> static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
> {
> return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 2c13b340c8a0..21c3aa93a8db 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
>
> f2fs_down_write(&sbi->pin_sem);
>
> - f2fs_lock_op(sbi);
> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> - f2fs_unlock_op(sbi);
> + err = f2fs_allocate_pinning_section(sbi);
> + if (err) {
> + f2fs_up_write(&sbi->pin_sem);
> + goto out_err;
> + }
>
> map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
> @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
> static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> {
> struct inode *inode = file_inode(filp);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> __u32 pin;
> int ret = 0;
>
> @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> if (!S_ISREG(inode->i_mode))
> return -EINVAL;
>
> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> + if (f2fs_readonly(sbi->sb))
> return -EROFS;
>
> ret = mnt_want_write_file(filp);
> @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> clear_inode_flag(inode, FI_PIN_FILE);
> f2fs_i_gc_failures_write(inode, 0);
> goto done;
> + } else if (f2fs_is_pinned_file(inode)) {
> + goto done;
> }
>
> - if (f2fs_should_update_outplace(inode, NULL)) {
> + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
> + ret = -EFBIG;
> + goto out;
> + }
> +
> + /* Let's allow file pinning on zoned device. */
> + if (!f2fs_sb_has_blkzoned(sbi) &&
> + f2fs_should_update_outplace(inode, NULL)) {
> ret = -EINVAL;
> goto out;
> }
> @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> set_inode_flag(inode, FI_PIN_FILE);
> ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
> done:
> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> + f2fs_update_time(sbi, REQ_TIME);
> out:
> inode_unlock(inode);
> mnt_drop_write_file(filp);
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index a089a938355b..3ff126316d42 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
> init_atgc_management(sbi);
> }
>
> -static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> - unsigned int start_seg, unsigned int end_seg, bool dry_run)
> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> + unsigned int start_seg, unsigned int end_seg,
> + bool dry_run, unsigned int dry_run_sections)
> {
> unsigned int segno;
> + unsigned int gc_secs = dry_run_sections;
>
> for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
> struct gc_inode_list gc_list = {
> @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
> };
>
> - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
> + do_garbage_collect(sbi, segno, &gc_list, FG_GC,
> + dry_run_sections == 0);
> put_gc_inode(&gc_list);
>
> if (!dry_run && get_valid_blocks(sbi, segno, true))
> return -EAGAIN;
> + if (dry_run && dry_run_sections &&
> + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
> + break;
>
> if (fatal_signal_pending(current))
> return -ERESTARTSYS;
> @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
> f2fs_allocate_segment_for_resize(sbi, type, start, end);
>
> /* do GC to move out valid blocks in the range */
> - err = f2fs_gc_range(sbi, start, end, dry_run);
> + err = f2fs_gc_range(sbi, start, end, dry_run, 0);
> if (err || dry_run)
> goto out;
>
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4e985750c938..0b72c8536ccf 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
> * This function should be returned with success, otherwise BUG
> */
> static void get_new_segment(struct f2fs_sb_info *sbi,
> - unsigned int *newseg, bool new_sec)
> + unsigned int *newseg, bool new_sec, bool pinning)
> {
> struct free_segmap_info *free_i = FREE_I(sbi);
> unsigned int segno, secno, zoneno;
> @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
> if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
> goto got_it;
> }
> +
> + /*
> + * If we format f2fs on zoned storage, let's try to get pinned sections
> + * from beginning of the storage, which should be a conventional one.
> + */
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
> + hint = GET_SEC_FROM_SEG(sbi, segno);
> + }
> +
> find_other_zone:
> secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
> if (secno >= MAIN_SECS(sbi)) {
> @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> * Allocate a current working segment.
> * This function always allocates a free segment in LFS manner.
> */
> -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> unsigned int segno = curseg->segno;
> + bool pinning = type == CURSEG_COLD_DATA_PINNED;
>
> if (curseg->inited)
> write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
> +
> segno = __get_next_segno(sbi, type);
> - get_new_segment(sbi, &segno, new_sec);
> + get_new_segment(sbi, &segno, new_sec, pinning);
> + if (new_sec && pinning &&
> + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
> + __set_free(sbi, segno);
> + return -EAGAIN;
> + }
> +
> curseg->next_segno = segno;
> reset_curseg(sbi, type, 1);
> curseg->alloc_type = LFS;
> if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
> curseg->fragment_remained_chunk =
> get_random_u32_inclusive(1, sbi->max_fragment_chunk);
> + return 0;
> }
>
> static int __next_free_blkoff(struct f2fs_sb_info *sbi,
> @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> f2fs_up_read(&SM_I(sbi)->curseg_lock);
> }
>
> -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> bool new_sec, bool force)
> {
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> !curseg->next_blkoff &&
> !get_valid_blocks(sbi, curseg->segno, new_sec) &&
> !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
> - return;
> + return 0;
>
> old_segno = curseg->segno;
> - new_curseg(sbi, type, true);
> + if (new_curseg(sbi, type, true))
> + return -EAGAIN;
> stat_inc_seg_type(sbi, curseg);
> locate_dirty_segment(sbi, old_segno);
> + return 0;
> }
>
> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> {
> + int ret;
> +
> f2fs_down_read(&SM_I(sbi)->curseg_lock);
> down_write(&SIT_I(sbi)->sentry_lock);
> - __allocate_new_segment(sbi, type, true, force);
> + ret = __allocate_new_segment(sbi, type, true, force);
> up_write(&SIT_I(sbi)->sentry_lock);
> f2fs_up_read(&SM_I(sbi)->curseg_lock);
> +
> + return ret;
> +}
> +
> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
> +{
> + int err;
> + bool gc_required = true;
> +
> +retry:
> + f2fs_lock_op(sbi);
> + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> + f2fs_unlock_op(sbi);
> +
> + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) {
> + f2fs_down_write(&sbi->gc_lock);
> + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
> + f2fs_up_write(&sbi->gc_lock);
> +
> + gc_required = false;
> + goto retry;
> + }
> +
> + return err;
> }
>
> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
> @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> * new segment.
> */
> if (segment_full) {
> + if (type == CURSEG_COLD_DATA_PINNED &&
> + !((curseg->segno + 1) % sbi->segs_per_sec))
> + goto skip_new_segment;

Before we skip allocate new segment for pinned log, how about
tagging curseg as uninitialized one via curseg->inited = false, and
curseg->segno = NULL_SEGNO? so that we can avoid
__f2fs_save_inmem_curseg() to touch this log, and not show incorrect
segno of pinned log in /sys/kernel/debug/f2fs/status.

Thanks,

> +
> if (from_gc) {
> get_atssr_segment(sbi, type, se->type,
> AT_SSR, se->mtime);
> @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> stat_inc_seg_type(sbi, curseg);
> }
> }
> +
> +skip_new_segment:
> /*
> * segment dirty status should be updated after segment allocation,
> * so we just need to update status only one time after previous
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index 60d93a16f2ac..953af072915f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
> dcc->discard_wake = true;
> wake_up_interruptible_all(&dcc->discard_wait_queue);
> }
> +
> +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> +{
> + int devi;
> +
> + for (devi = 0; devi < sbi->s_ndevs; devi++)
> + if (bdev_is_zoned(FDEV(devi).bdev))
> + return GET_SEGNO(sbi, FDEV(devi).start_blk);
> + return 0;
> +}

2024-02-23 17:38:14

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v3 2/2] f2fs: support file pinning for zoned devices

Hi Chao,

I've tested the patch and queued in -dev, so can you take a look at it and
propose any change on top of it? Then, we can discuss further on it.

On 02/23, Chao Yu wrote:
> On 2024/2/14 1:38, Daeho Jeong wrote:
> > From: Daeho Jeong <[email protected]>
> >
> > Support file pinning with conventional storage area for zoned devices
> >
> > Signed-off-by: Daeho Jeong <[email protected]>
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > v3: check the hole when migrating blocks for swap.
> > do not use the remainder of cold pin section.
> > v2: flush previous dirty pages before swapon.
> > do not re-check for the last extent of swap area.
> > merge this patch with swap file pinning support patch.
> > ---
> > fs/f2fs/data.c | 58 ++++++++++++++++++++++++++-------------
> > fs/f2fs/f2fs.h | 17 +++++++++++-
> > fs/f2fs/file.c | 24 ++++++++++++-----
> > fs/f2fs/gc.c | 14 +++++++---
> > fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------
> > fs/f2fs/segment.h | 10 +++++++
> > 6 files changed, 154 insertions(+), 38 deletions(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 828c797cd47c..0c9aa3082fcf 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
> > unsigned int blkofs;
> > unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
> > unsigned int secidx = start_blk / blk_per_sec;
> > - unsigned int end_sec = secidx + blkcnt / blk_per_sec;
> > + unsigned int end_sec;
> > int ret = 0;
> > + if (!blkcnt)
> > + return 0;
> > + end_sec = secidx + (blkcnt - 1) / blk_per_sec;
> > +
> > f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > filemap_invalidate_lock(inode->i_mapping);
> > set_inode_flag(inode, FI_ALIGNED_WRITE);
> > set_inode_flag(inode, FI_OPU_WRITE);
> > - for (; secidx < end_sec; secidx++) {
> > + for (; secidx <= end_sec; secidx++) {
> > + unsigned int blkofs_end = secidx == end_sec ?
> > + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1;
> > +
> > f2fs_down_write(&sbi->pin_sem);
> > - f2fs_lock_op(sbi);
> > - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> > - f2fs_unlock_op(sbi);
> > + ret = f2fs_allocate_pinning_section(sbi);
> > + if (ret) {
> > + f2fs_up_write(&sbi->pin_sem);
> > + break;
> > + }
> > set_inode_flag(inode, FI_SKIP_WRITES);
> > - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
> > + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
> > struct page *page;
> > unsigned int blkidx = secidx * blk_per_sec + blkofs;
> > @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis,
> > nr_pblocks = map.m_len;
> > if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
> > - nr_pblocks & sec_blks_mask) {
> > + nr_pblocks & sec_blks_mask ||
> > + !f2fs_valid_pinned_area(sbi, pblock)) {
> > + bool last_extent = false;
> > +
> > not_aligned++;
> > nr_pblocks = roundup(nr_pblocks, blks_per_sec);
> > if (cur_lblock + nr_pblocks > sis->max)
> > nr_pblocks -= blks_per_sec;
> > + /* this extent is last one */
> > if (!nr_pblocks) {
> > - /* this extent is last one */
> > - nr_pblocks = map.m_len;
> > - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
> > - goto next;
> > + nr_pblocks = last_lblock - cur_lblock;
> > + last_extent = true;
> > }
> > ret = f2fs_migrate_blocks(inode, cur_lblock,
> > nr_pblocks);
> > - if (ret)
> > + if (ret) {
> > + if (ret == -ENOENT)
> > + ret = -EINVAL;
> > goto out;
> > - goto retry;
> > + }
> > +
> > + if (!last_extent)
> > + goto retry;
> > }
> > -next:
> > +
> > if (cur_lblock + nr_pblocks >= sis->max)
> > nr_pblocks = sis->max - cur_lblock;
> > @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> > sector_t *span)
> > {
> > struct inode *inode = file_inode(file);
> > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > int ret;
> > if (!S_ISREG(inode->i_mode))
> > return -EINVAL;
> > - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> > + if (f2fs_readonly(sbi->sb))
> > return -EROFS;
> > - if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
> > - f2fs_err(F2FS_I_SB(inode),
> > - "Swapfile not supported in LFS mode");
> > + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
> > + f2fs_err(sbi, "Swapfile not supported in LFS mode");
> > return -EINVAL;
> > }
> > @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
> > f2fs_precache_extents(inode);
> > + ret = filemap_fdatawrite(inode->i_mapping);
> > + if (ret < 0)
> > + return ret;
> > +
> > ret = check_swap_activate(sis, file, span);
> > if (ret < 0)
> > return ret;
> > stat_inc_swapfile_inode(inode);
> > set_inode_flag(inode, FI_PIN_FILE);
> > - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> > + f2fs_update_time(sbi, REQ_TIME);
> > return ret;
> > }
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 40eb590ed646..351133a11518 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
> > unsigned int *newseg, bool new_sec, int dir);
> > void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> > unsigned int start, unsigned int end);
> > -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> > +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
> > +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
> > void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
> > int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
> > bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
> > @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
> > block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
> > int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
> > void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
> > +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> > + unsigned int start_seg, unsigned int end_seg,
> > + bool dry_run, unsigned int dry_run_sections);
> > int f2fs_resize_fs(struct file *filp, __u64 block_count);
> > int __init f2fs_create_garbage_collection_cache(void);
> > void f2fs_destroy_garbage_collection_cache(void);
> > @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
> > return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
> > }
> > +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
> > + block_t blkaddr)
> > +{
> > + if (f2fs_sb_has_blkzoned(sbi)) {
> > + int devi = f2fs_target_device_index(sbi, blkaddr);
> > +
> > + return !bdev_is_zoned(FDEV(devi).bdev);
> > + }
> > + return true;
> > +}
> > +
> > static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
> > {
> > return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 2c13b340c8a0..21c3aa93a8db 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
> > f2fs_down_write(&sbi->pin_sem);
> > - f2fs_lock_op(sbi);
> > - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> > - f2fs_unlock_op(sbi);
> > + err = f2fs_allocate_pinning_section(sbi);
> > + if (err) {
> > + f2fs_up_write(&sbi->pin_sem);
> > + goto out_err;
> > + }
> > map.m_seg_type = CURSEG_COLD_DATA_PINNED;
> > err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
> > @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
> > static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> > {
> > struct inode *inode = file_inode(filp);
> > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > __u32 pin;
> > int ret = 0;
> > @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> > if (!S_ISREG(inode->i_mode))
> > return -EINVAL;
> > - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
> > + if (f2fs_readonly(sbi->sb))
> > return -EROFS;
> > ret = mnt_want_write_file(filp);
> > @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> > clear_inode_flag(inode, FI_PIN_FILE);
> > f2fs_i_gc_failures_write(inode, 0);
> > goto done;
> > + } else if (f2fs_is_pinned_file(inode)) {
> > + goto done;
> > }
> > - if (f2fs_should_update_outplace(inode, NULL)) {
> > + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
> > + ret = -EFBIG;
> > + goto out;
> > + }
> > +
> > + /* Let's allow file pinning on zoned device. */
> > + if (!f2fs_sb_has_blkzoned(sbi) &&
> > + f2fs_should_update_outplace(inode, NULL)) {
> > ret = -EINVAL;
> > goto out;
> > }
> > @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
> > set_inode_flag(inode, FI_PIN_FILE);
> > ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
> > done:
> > - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> > + f2fs_update_time(sbi, REQ_TIME);
> > out:
> > inode_unlock(inode);
> > mnt_drop_write_file(filp);
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index a089a938355b..3ff126316d42 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
> > init_atgc_management(sbi);
> > }
> > -static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> > - unsigned int start_seg, unsigned int end_seg, bool dry_run)
> > +int f2fs_gc_range(struct f2fs_sb_info *sbi,
> > + unsigned int start_seg, unsigned int end_seg,
> > + bool dry_run, unsigned int dry_run_sections)
> > {
> > unsigned int segno;
> > + unsigned int gc_secs = dry_run_sections;
> > for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
> > struct gc_inode_list gc_list = {
> > @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi,
> > .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
> > };
> > - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
> > + do_garbage_collect(sbi, segno, &gc_list, FG_GC,
> > + dry_run_sections == 0);
> > put_gc_inode(&gc_list);
> > if (!dry_run && get_valid_blocks(sbi, segno, true))
> > return -EAGAIN;
> > + if (dry_run && dry_run_sections &&
> > + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
> > + break;
> > if (fatal_signal_pending(current))
> > return -ERESTARTSYS;
> > @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
> > f2fs_allocate_segment_for_resize(sbi, type, start, end);
> > /* do GC to move out valid blocks in the range */
> > - err = f2fs_gc_range(sbi, start, end, dry_run);
> > + err = f2fs_gc_range(sbi, start, end, dry_run, 0);
> > if (err || dry_run)
> > goto out;
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 4e985750c938..0b72c8536ccf 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
> > * This function should be returned with success, otherwise BUG
> > */
> > static void get_new_segment(struct f2fs_sb_info *sbi,
> > - unsigned int *newseg, bool new_sec)
> > + unsigned int *newseg, bool new_sec, bool pinning)
> > {
> > struct free_segmap_info *free_i = FREE_I(sbi);
> > unsigned int segno, secno, zoneno;
> > @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
> > if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
> > goto got_it;
> > }
> > +
> > + /*
> > + * If we format f2fs on zoned storage, let's try to get pinned sections
> > + * from beginning of the storage, which should be a conventional one.
> > + */
> > + if (f2fs_sb_has_blkzoned(sbi)) {
> > + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
> > + hint = GET_SEC_FROM_SEG(sbi, segno);
> > + }
> > +
> > find_other_zone:
> > secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
> > if (secno >= MAIN_SECS(sbi)) {
> > @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
> > * Allocate a current working segment.
> > * This function always allocates a free segment in LFS manner.
> > */
> > -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> > +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
> > {
> > struct curseg_info *curseg = CURSEG_I(sbi, type);
> > unsigned int segno = curseg->segno;
> > + bool pinning = type == CURSEG_COLD_DATA_PINNED;
> > if (curseg->inited)
> > write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
> > +
> > segno = __get_next_segno(sbi, type);
> > - get_new_segment(sbi, &segno, new_sec);
> > + get_new_segment(sbi, &segno, new_sec, pinning);
> > + if (new_sec && pinning &&
> > + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
> > + __set_free(sbi, segno);
> > + return -EAGAIN;
> > + }
> > +
> > curseg->next_segno = segno;
> > reset_curseg(sbi, type, 1);
> > curseg->alloc_type = LFS;
> > if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
> > curseg->fragment_remained_chunk =
> > get_random_u32_inclusive(1, sbi->max_fragment_chunk);
> > + return 0;
> > }
> > static int __next_free_blkoff(struct f2fs_sb_info *sbi,
> > @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
> > f2fs_up_read(&SM_I(sbi)->curseg_lock);
> > }
> > -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> > +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> > bool new_sec, bool force)
> > {
> > struct curseg_info *curseg = CURSEG_I(sbi, type);
> > @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> > !curseg->next_blkoff &&
> > !get_valid_blocks(sbi, curseg->segno, new_sec) &&
> > !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
> > - return;
> > + return 0;
> > old_segno = curseg->segno;
> > - new_curseg(sbi, type, true);
> > + if (new_curseg(sbi, type, true))
> > + return -EAGAIN;
> > stat_inc_seg_type(sbi, curseg);
> > locate_dirty_segment(sbi, old_segno);
> > + return 0;
> > }
> > -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> > +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
> > {
> > + int ret;
> > +
> > f2fs_down_read(&SM_I(sbi)->curseg_lock);
> > down_write(&SIT_I(sbi)->sentry_lock);
> > - __allocate_new_segment(sbi, type, true, force);
> > + ret = __allocate_new_segment(sbi, type, true, force);
> > up_write(&SIT_I(sbi)->sentry_lock);
> > f2fs_up_read(&SM_I(sbi)->curseg_lock);
> > +
> > + return ret;
> > +}
> > +
> > +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
> > +{
> > + int err;
> > + bool gc_required = true;
> > +
> > +retry:
> > + f2fs_lock_op(sbi);
> > + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
> > + f2fs_unlock_op(sbi);
> > +
> > + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) {
> > + f2fs_down_write(&sbi->gc_lock);
> > + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
> > + f2fs_up_write(&sbi->gc_lock);
> > +
> > + gc_required = false;
> > + goto retry;
> > + }
> > +
> > + return err;
> > }
> > void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
> > @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> > * new segment.
> > */
> > if (segment_full) {
> > + if (type == CURSEG_COLD_DATA_PINNED &&
> > + !((curseg->segno + 1) % sbi->segs_per_sec))
> > + goto skip_new_segment;
>
> Before we skip allocate new segment for pinned log, how about
> tagging curseg as uninitialized one via curseg->inited = false, and
> curseg->segno = NULL_SEGNO? so that we can avoid
> __f2fs_save_inmem_curseg() to touch this log, and not show incorrect
> segno of pinned log in /sys/kernel/debug/f2fs/status.
>
> Thanks,
>
> > +
> > if (from_gc) {
> > get_atssr_segment(sbi, type, se->type,
> > AT_SSR, se->mtime);
> > @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> > stat_inc_seg_type(sbi, curseg);
> > }
> > }
> > +
> > +skip_new_segment:
> > /*
> > * segment dirty status should be updated after segment allocation,
> > * so we just need to update status only one time after previous
> > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > index 60d93a16f2ac..953af072915f 100644
> > --- a/fs/f2fs/segment.h
> > +++ b/fs/f2fs/segment.h
> > @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
> > dcc->discard_wake = true;
> > wake_up_interruptible_all(&dcc->discard_wait_queue);
> > }
> > +
> > +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> > +{
> > + int devi;
> > +
> > + for (devi = 0; devi < sbi->s_ndevs; devi++)
> > + if (bdev_is_zoned(FDEV(devi).bdev))
> > + return GET_SEGNO(sbi, FDEV(devi).start_blk);
> > + return 0;
> > +}

2024-02-25 06:31:04

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v3 2/2] f2fs: support file pinning for zoned devices

On 2024/2/24 1:31, Jaegeuk Kim wrote:
> Hi Chao,
>
> I've tested the patch and queued in -dev, so can you take a look at it and
> propose any change on top of it? Then, we can discuss further on it.

Okay, let me send patches for comments.

Thanks,

>
> On 02/23, Chao Yu wrote:
>> On 2024/2/14 1:38, Daeho Jeong wrote:
>>> From: Daeho Jeong <[email protected]>
>>>
>>> Support file pinning with conventional storage area for zoned devices
>>>
>>> Signed-off-by: Daeho Jeong <[email protected]>
>>> Signed-off-by: Jaegeuk Kim <[email protected]>
>>> ---
>>> v3: check the hole when migrating blocks for swap.
>>> do not use the remainder of cold pin section.
>>> v2: flush previous dirty pages before swapon.
>>> do not re-check for the last extent of swap area.
>>> merge this patch with swap file pinning support patch.
>>> ---
>>> fs/f2fs/data.c | 58 ++++++++++++++++++++++++++-------------
>>> fs/f2fs/f2fs.h | 17 +++++++++++-
>>> fs/f2fs/file.c | 24 ++++++++++++-----
>>> fs/f2fs/gc.c | 14 +++++++---
>>> fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------
>>> fs/f2fs/segment.h | 10 +++++++
>>> 6 files changed, 154 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>> index 828c797cd47c..0c9aa3082fcf 100644
>>> --- a/fs/f2fs/data.c
>>> +++ b/fs/f2fs/data.c
>>> @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
>>> unsigned int blkofs;
>>> unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
>>> unsigned int secidx = start_blk / blk_per_sec;
>>> - unsigned int end_sec = secidx + blkcnt / blk_per_sec;
>>> + unsigned int end_sec;
>>> int ret = 0;
>>> + if (!blkcnt)
>>> + return 0;
>>> + end_sec = secidx + (blkcnt - 1) / blk_per_sec;
>>> +
>>> f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
>>> filemap_invalidate_lock(inode->i_mapping);
>>> set_inode_flag(inode, FI_ALIGNED_WRITE);
>>> set_inode_flag(inode, FI_OPU_WRITE);
>>> - for (; secidx < end_sec; secidx++) {
>>> + for (; secidx <= end_sec; secidx++) {
>>> + unsigned int blkofs_end = secidx == end_sec ?
>>> + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1;
>>> +
>>> f2fs_down_write(&sbi->pin_sem);
>>> - f2fs_lock_op(sbi);
>>> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
>>> - f2fs_unlock_op(sbi);
>>> + ret = f2fs_allocate_pinning_section(sbi);
>>> + if (ret) {
>>> + f2fs_up_write(&sbi->pin_sem);
>>> + break;
>>> + }
>>> set_inode_flag(inode, FI_SKIP_WRITES);
>>> - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
>>> + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
>>> struct page *page;
>>> unsigned int blkidx = secidx * blk_per_sec + blkofs;
>>> @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis,
>>> nr_pblocks = map.m_len;
>>> if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
>>> - nr_pblocks & sec_blks_mask) {
>>> + nr_pblocks & sec_blks_mask ||
>>> + !f2fs_valid_pinned_area(sbi, pblock)) {
>>> + bool last_extent = false;
>>> +
>>> not_aligned++;
>>> nr_pblocks = roundup(nr_pblocks, blks_per_sec);
>>> if (cur_lblock + nr_pblocks > sis->max)
>>> nr_pblocks -= blks_per_sec;
>>> + /* this extent is last one */
>>> if (!nr_pblocks) {
>>> - /* this extent is last one */
>>> - nr_pblocks = map.m_len;
>>> - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
>>> - goto next;
>>> + nr_pblocks = last_lblock - cur_lblock;
>>> + last_extent = true;
>>> }
>>> ret = f2fs_migrate_blocks(inode, cur_lblock,
>>> nr_pblocks);
>>> - if (ret)
>>> + if (ret) {
>>> + if (ret == -ENOENT)
>>> + ret = -EINVAL;
>>> goto out;
>>> - goto retry;
>>> + }
>>> +
>>> + if (!last_extent)
>>> + goto retry;
>>> }
>>> -next:
>>> +
>>> if (cur_lblock + nr_pblocks >= sis->max)
>>> nr_pblocks = sis->max - cur_lblock;
>>> @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
>>> sector_t *span)
>>> {
>>> struct inode *inode = file_inode(file);
>>> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>> int ret;
>>> if (!S_ISREG(inode->i_mode))
>>> return -EINVAL;
>>> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
>>> + if (f2fs_readonly(sbi->sb))
>>> return -EROFS;
>>> - if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
>>> - f2fs_err(F2FS_I_SB(inode),
>>> - "Swapfile not supported in LFS mode");
>>> + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
>>> + f2fs_err(sbi, "Swapfile not supported in LFS mode");
>>> return -EINVAL;
>>> }
>>> @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
>>> f2fs_precache_extents(inode);
>>> + ret = filemap_fdatawrite(inode->i_mapping);
>>> + if (ret < 0)
>>> + return ret;
>>> +
>>> ret = check_swap_activate(sis, file, span);
>>> if (ret < 0)
>>> return ret;
>>> stat_inc_swapfile_inode(inode);
>>> set_inode_flag(inode, FI_PIN_FILE);
>>> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
>>> + f2fs_update_time(sbi, REQ_TIME);
>>> return ret;
>>> }
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 40eb590ed646..351133a11518 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
>>> unsigned int *newseg, bool new_sec, int dir);
>>> void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
>>> unsigned int start, unsigned int end);
>>> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
>>> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
>>> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
>>> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
>>> int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
>>> bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
>>> @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
>>> block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
>>> int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
>>> void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
>>> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
>>> + unsigned int start_seg, unsigned int end_seg,
>>> + bool dry_run, unsigned int dry_run_sections);
>>> int f2fs_resize_fs(struct file *filp, __u64 block_count);
>>> int __init f2fs_create_garbage_collection_cache(void);
>>> void f2fs_destroy_garbage_collection_cache(void);
>>> @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
>>> return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
>>> }
>>> +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
>>> + block_t blkaddr)
>>> +{
>>> + if (f2fs_sb_has_blkzoned(sbi)) {
>>> + int devi = f2fs_target_device_index(sbi, blkaddr);
>>> +
>>> + return !bdev_is_zoned(FDEV(devi).bdev);
>>> + }
>>> + return true;
>>> +}
>>> +
>>> static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
>>> {
>>> return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>> index 2c13b340c8a0..21c3aa93a8db 100644
>>> --- a/fs/f2fs/file.c
>>> +++ b/fs/f2fs/file.c
>>> @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
>>> f2fs_down_write(&sbi->pin_sem);
>>> - f2fs_lock_op(sbi);
>>> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
>>> - f2fs_unlock_op(sbi);
>>> + err = f2fs_allocate_pinning_section(sbi);
>>> + if (err) {
>>> + f2fs_up_write(&sbi->pin_sem);
>>> + goto out_err;
>>> + }
>>> map.m_seg_type = CURSEG_COLD_DATA_PINNED;
>>> err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
>>> @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
>>> static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
>>> {
>>> struct inode *inode = file_inode(filp);
>>> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>> __u32 pin;
>>> int ret = 0;
>>> @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
>>> if (!S_ISREG(inode->i_mode))
>>> return -EINVAL;
>>> - if (f2fs_readonly(F2FS_I_SB(inode)->sb))
>>> + if (f2fs_readonly(sbi->sb))
>>> return -EROFS;
>>> ret = mnt_want_write_file(filp);
>>> @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
>>> clear_inode_flag(inode, FI_PIN_FILE);
>>> f2fs_i_gc_failures_write(inode, 0);
>>> goto done;
>>> + } else if (f2fs_is_pinned_file(inode)) {
>>> + goto done;
>>> }
>>> - if (f2fs_should_update_outplace(inode, NULL)) {
>>> + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
>>> + ret = -EFBIG;
>>> + goto out;
>>> + }
>>> +
>>> + /* Let's allow file pinning on zoned device. */
>>> + if (!f2fs_sb_has_blkzoned(sbi) &&
>>> + f2fs_should_update_outplace(inode, NULL)) {
>>> ret = -EINVAL;
>>> goto out;
>>> }
>>> @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
>>> set_inode_flag(inode, FI_PIN_FILE);
>>> ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
>>> done:
>>> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
>>> + f2fs_update_time(sbi, REQ_TIME);
>>> out:
>>> inode_unlock(inode);
>>> mnt_drop_write_file(filp);
>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>> index a089a938355b..3ff126316d42 100644
>>> --- a/fs/f2fs/gc.c
>>> +++ b/fs/f2fs/gc.c
>>> @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
>>> init_atgc_management(sbi);
>>> }
>>> -static int f2fs_gc_range(struct f2fs_sb_info *sbi,
>>> - unsigned int start_seg, unsigned int end_seg, bool dry_run)
>>> +int f2fs_gc_range(struct f2fs_sb_info *sbi,
>>> + unsigned int start_seg, unsigned int end_seg,
>>> + bool dry_run, unsigned int dry_run_sections)
>>> {
>>> unsigned int segno;
>>> + unsigned int gc_secs = dry_run_sections;
>>> for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
>>> struct gc_inode_list gc_list = {
>>> @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi,
>>> .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
>>> };
>>> - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
>>> + do_garbage_collect(sbi, segno, &gc_list, FG_GC,
>>> + dry_run_sections == 0);
>>> put_gc_inode(&gc_list);
>>> if (!dry_run && get_valid_blocks(sbi, segno, true))
>>> return -EAGAIN;
>>> + if (dry_run && dry_run_sections &&
>>> + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
>>> + break;
>>> if (fatal_signal_pending(current))
>>> return -ERESTARTSYS;
>>> @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
>>> f2fs_allocate_segment_for_resize(sbi, type, start, end);
>>> /* do GC to move out valid blocks in the range */
>>> - err = f2fs_gc_range(sbi, start, end, dry_run);
>>> + err = f2fs_gc_range(sbi, start, end, dry_run, 0);
>>> if (err || dry_run)
>>> goto out;
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 4e985750c938..0b72c8536ccf 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
>>> * This function should be returned with success, otherwise BUG
>>> */
>>> static void get_new_segment(struct f2fs_sb_info *sbi,
>>> - unsigned int *newseg, bool new_sec)
>>> + unsigned int *newseg, bool new_sec, bool pinning)
>>> {
>>> struct free_segmap_info *free_i = FREE_I(sbi);
>>> unsigned int segno, secno, zoneno;
>>> @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
>>> if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
>>> goto got_it;
>>> }
>>> +
>>> + /*
>>> + * If we format f2fs on zoned storage, let's try to get pinned sections
>>> + * from beginning of the storage, which should be a conventional one.
>>> + */
>>> + if (f2fs_sb_has_blkzoned(sbi)) {
>>> + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
>>> + hint = GET_SEC_FROM_SEG(sbi, segno);
>>> + }
>>> +
>>> find_other_zone:
>>> secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
>>> if (secno >= MAIN_SECS(sbi)) {
>>> @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
>>> * Allocate a current working segment.
>>> * This function always allocates a free segment in LFS manner.
>>> */
>>> -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
>>> +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
>>> {
>>> struct curseg_info *curseg = CURSEG_I(sbi, type);
>>> unsigned int segno = curseg->segno;
>>> + bool pinning = type == CURSEG_COLD_DATA_PINNED;
>>> if (curseg->inited)
>>> write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
>>> +
>>> segno = __get_next_segno(sbi, type);
>>> - get_new_segment(sbi, &segno, new_sec);
>>> + get_new_segment(sbi, &segno, new_sec, pinning);
>>> + if (new_sec && pinning &&
>>> + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
>>> + __set_free(sbi, segno);
>>> + return -EAGAIN;
>>> + }
>>> +
>>> curseg->next_segno = segno;
>>> reset_curseg(sbi, type, 1);
>>> curseg->alloc_type = LFS;
>>> if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
>>> curseg->fragment_remained_chunk =
>>> get_random_u32_inclusive(1, sbi->max_fragment_chunk);
>>> + return 0;
>>> }
>>> static int __next_free_blkoff(struct f2fs_sb_info *sbi,
>>> @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
>>> f2fs_up_read(&SM_I(sbi)->curseg_lock);
>>> }
>>> -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>> +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>> bool new_sec, bool force)
>>> {
>>> struct curseg_info *curseg = CURSEG_I(sbi, type);
>>> @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>> !curseg->next_blkoff &&
>>> !get_valid_blocks(sbi, curseg->segno, new_sec) &&
>>> !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
>>> - return;
>>> + return 0;
>>> old_segno = curseg->segno;
>>> - new_curseg(sbi, type, true);
>>> + if (new_curseg(sbi, type, true))
>>> + return -EAGAIN;
>>> stat_inc_seg_type(sbi, curseg);
>>> locate_dirty_segment(sbi, old_segno);
>>> + return 0;
>>> }
>>> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
>>> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
>>> {
>>> + int ret;
>>> +
>>> f2fs_down_read(&SM_I(sbi)->curseg_lock);
>>> down_write(&SIT_I(sbi)->sentry_lock);
>>> - __allocate_new_segment(sbi, type, true, force);
>>> + ret = __allocate_new_segment(sbi, type, true, force);
>>> up_write(&SIT_I(sbi)->sentry_lock);
>>> f2fs_up_read(&SM_I(sbi)->curseg_lock);
>>> +
>>> + return ret;
>>> +}
>>> +
>>> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
>>> +{
>>> + int err;
>>> + bool gc_required = true;
>>> +
>>> +retry:
>>> + f2fs_lock_op(sbi);
>>> + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
>>> + f2fs_unlock_op(sbi);
>>> +
>>> + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) {
>>> + f2fs_down_write(&sbi->gc_lock);
>>> + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
>>> + f2fs_up_write(&sbi->gc_lock);
>>> +
>>> + gc_required = false;
>>> + goto retry;
>>> + }
>>> +
>>> + return err;
>>> }
>>> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
>>> @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>> * new segment.
>>> */
>>> if (segment_full) {
>>> + if (type == CURSEG_COLD_DATA_PINNED &&
>>> + !((curseg->segno + 1) % sbi->segs_per_sec))
>>> + goto skip_new_segment;
>>
>> Before we skip allocate new segment for pinned log, how about
>> tagging curseg as uninitialized one via curseg->inited = false, and
>> curseg->segno = NULL_SEGNO? so that we can avoid
>> __f2fs_save_inmem_curseg() to touch this log, and not show incorrect
>> segno of pinned log in /sys/kernel/debug/f2fs/status.
>>
>> Thanks,
>>
>>> +
>>> if (from_gc) {
>>> get_atssr_segment(sbi, type, se->type,
>>> AT_SSR, se->mtime);
>>> @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>>> stat_inc_seg_type(sbi, curseg);
>>> }
>>> }
>>> +
>>> +skip_new_segment:
>>> /*
>>> * segment dirty status should be updated after segment allocation,
>>> * so we just need to update status only one time after previous
>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>> index 60d93a16f2ac..953af072915f 100644
>>> --- a/fs/f2fs/segment.h
>>> +++ b/fs/f2fs/segment.h
>>> @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
>>> dcc->discard_wake = true;
>>> wake_up_interruptible_all(&dcc->discard_wait_queue);
>>> }
>>> +
>>> +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>>> +{
>>> + int devi;
>>> +
>>> + for (devi = 0; devi < sbi->s_ndevs; devi++)
>>> + if (bdev_is_zoned(FDEV(devi).bdev))
>>> + return GET_SEGNO(sbi, FDEV(devi).start_blk);
>>> + return 0;
>>> +}