2024-04-07 13:59:05

by Chao Yu

[permalink] [raw]
Subject: [PATCH v3] f2fs: zone: don't block IO if there is remained open zone

max open zone may be larger than log header number of f2fs, for
such case, it doesn't need to wait last IO in previous zone, let's
introduce available_open_zone semaphore, and reduce it once we
submit first write IO in a zone, and increase it after completion
of last IO in the zone.

Cc: Daeho Jeong <[email protected]>
Signed-off-by: Chao Yu <[email protected]>
---
v3:
- avoid race condition in between __submit_merged_bio()
and __allocate_new_segment().
fs/f2fs/data.c | 105 ++++++++++++++++++++++++++++++----------------
fs/f2fs/f2fs.h | 34 ++++++++++++---
fs/f2fs/iostat.c | 7 ++++
fs/f2fs/iostat.h | 2 +
fs/f2fs/segment.c | 43 ++++++++++++++++---
fs/f2fs/segment.h | 12 +++++-
fs/f2fs/super.c | 2 +
7 files changed, 156 insertions(+), 49 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0d88649c60a5..18a4ac0a06bc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
#ifdef CONFIG_BLK_DEV_ZONED
static void f2fs_zone_write_end_io(struct bio *bio)
{
- struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
+ struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);

- bio->bi_private = io->bi_private;
- complete(&io->zone_wait);
f2fs_write_end_io(bio);
+ up(&sbi->available_open_zones);
}
#endif

@@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
if (!io->bio)
return;

+#ifdef CONFIG_BLK_DEV_ZONED
+ if (io->open_zone) {
+ /*
+ * if there is no open zone, it will wait for last IO in
+ * previous zone before submitting new IO.
+ */
+ down(&fio->sbi->available_open_zones);
+ io->open_zone = false;
+ io->zone_openned = true;
+ }
+
+ if (io->close_zone) {
+ io->bio->bi_end_io = f2fs_zone_write_end_io;
+ io->zone_openned = false;
+ io->close_zone = false;
+ }
+#endif
+
if (is_read_io(fio->op)) {
trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
@@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
#ifdef CONFIG_BLK_DEV_ZONED
- init_completion(&sbi->write_io[i][j].zone_wait);
- sbi->write_io[i][j].zone_pending_bio = NULL;
- sbi->write_io[i][j].bi_private = NULL;
+ sbi->write_io[i][j].open_zone = false;
+ sbi->write_io[i][j].zone_openned = false;
+ sbi->write_io[i][j].close_zone = false;
#endif
}
}
@@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
f2fs_up_write(&io->io_rwsem);
}

+void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+ struct f2fs_bio_info *io;
+
+ if (!f2fs_sb_has_blkzoned(sbi))
+ return;
+
+ io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
+
+ f2fs_down_write(&io->io_rwsem);
+ if (io->zone_openned) {
+ if (io->bio) {
+ io->close_zone = true;
+ __submit_merged_bio(io);
+ } else if (io->zone_openned) {
+ up(&sbi->available_open_zones);
+ io->zone_openned = false;
+ }
+ }
+ f2fs_up_write(&io->io_rwsem);
+#endif
+
+}
+
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
struct inode *inode, struct page *page,
nid_t ino, enum page_type type, bool force)
@@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
}

#ifdef CONFIG_BLK_DEV_ZONED
-static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
+static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
+ block_t blkaddr, bool start)
{
- int devi = 0;
+ if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
+ return false;
+
+ if (start)
+ return (blkaddr % sbi->blocks_per_blkz) == 0;
+ return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);

- if (f2fs_is_multi_device(sbi)) {
- devi = f2fs_target_device_index(sbi, blkaddr);
- if (blkaddr < FDEV(devi).start_blk ||
- blkaddr > FDEV(devi).end_blk) {
- f2fs_err(sbi, "Invalid block %x", blkaddr);
- return false;
- }
- blkaddr -= FDEV(devi).start_blk;
- }
- return bdev_is_zoned(FDEV(devi).bdev) &&
- f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
- (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
}
#endif

@@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
struct page *bio_page;
enum count_type type;
+#ifdef CONFIG_BLK_DEV_ZONED
+ bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
+#endif

f2fs_bug_on(sbi, is_read_io(fio->op));

f2fs_down_write(&io->io_rwsem);
next:
-#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
- wait_for_completion_io(&io->zone_wait);
- bio_put(io->zone_pending_bio);
- io->zone_pending_bio = NULL;
- io->bi_private = NULL;
- }
-#endif
-
if (fio->in_list) {
spin_lock(&io->io_lock);
if (list_empty(&io->io_list)) {
@@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
type = WB_DATA_TYPE(bio_page, fio->compressed_page);
inc_page_count(sbi, type);

+#ifdef CONFIG_BLK_DEV_ZONED
+ if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
+ io->open_zone = true;
+#endif
+
if (io->bio &&
(!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
fio->new_blkaddr) ||
@@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
io->last_block_in_bio = fio->new_blkaddr;

trace_f2fs_submit_page_write(fio->page, fio);
+
#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
- is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
- bio_get(io->bio);
- reinit_completion(&io->zone_wait);
- io->bi_private = io->bio->bi_private;
- io->bio->bi_private = io;
- io->bio->bi_end_io = f2fs_zone_write_end_io;
- io->zone_pending_bio = io->bio;
+ if (blkzoned &&
+ is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
+ io->close_zone = true;
__submit_merged_bio(io);
}
#endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 694f8a52cb84..8a26530cf4fb 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
struct bio *bio; /* bios to merge */
sector_t last_block_in_bio; /* last block number */
struct f2fs_io_info fio; /* store buffered io info. */
-#ifdef CONFIG_BLK_DEV_ZONED
- struct completion zone_wait; /* condition value for the previous open zone to close */
- struct bio *zone_pending_bio; /* pending bio for the previous zone */
- void *bi_private; /* previous bi_private for pending bio */
-#endif
struct f2fs_rwsem io_rwsem; /* blocking op for bio */
spinlock_t io_lock; /* serialize DATA/NODE IOs */
struct list_head io_list; /* track fios */
struct list_head bio_list; /* bio entry list head */
struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */
+#ifdef CONFIG_BLK_DEV_ZONED
+ bool open_zone; /* open a zone */
+ bool zone_openned; /* zone has been openned */
+ bool close_zone; /* close a zone */
+#endif
};

#define FDEV(i) (sbi->devs[i])
@@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
unsigned int max_open_zones; /* max open zone resources of the zoned device */
+ struct semaphore available_open_zones; /* available open zones */
#endif

/* for node-related operations */
@@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
enum page_type type);
int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
+void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
struct inode *inode, struct page *page,
@@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,

return test_bit(zno, FDEV(devi).blkz_seq);
}
+
+static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ int devi = 0;
+
+ if (f2fs_is_multi_device(sbi)) {
+ devi = f2fs_target_device_index(sbi, blkaddr);
+ if (blkaddr < FDEV(devi).start_blk ||
+ blkaddr > FDEV(devi).end_blk) {
+ f2fs_err(sbi, "Invalid block %x", blkaddr);
+ return false;
+ }
+ blkaddr -= FDEV(devi).start_blk;
+ }
+
+ if (!bdev_is_zoned(FDEV(devi).bdev) ||
+ !f2fs_blkz_is_seq(sbi, devi, blkaddr))
+ return false;
+
+ return true;
+}
#endif

static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
index f8703038e1d8..a8626e297876 100644
--- a/fs/f2fs/iostat.c
+++ b/fs/f2fs/iostat.c
@@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
}

+void *iostat_get_bio_private(struct bio *bio)
+{
+ struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+
+ return iostat_ctx->sbi;
+}
+
void iostat_update_and_unbind_ctx(struct bio *bio)
{
struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
index eb99d05cf272..9006c3d41590 100644
--- a/fs/f2fs/iostat.h
+++ b/fs/f2fs/iostat.h
@@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
return iostat_ctx->post_read_ctx;
}

+extern void *iostat_get_bio_private(struct bio *bio);
extern void iostat_update_and_unbind_ctx(struct bio *bio);
extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
struct bio *bio, struct bio_post_read_ctx *ctx);
@@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
#else
static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
enum iostat_type type, unsigned long long io_bytes) {}
+static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
struct bio *bio, struct bio_post_read_ctx *ctx) {}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4fd76e867e0a..4a3cf2888faf 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
return err;
stat_inc_seg_type(sbi, curseg);
locate_dirty_segment(sbi, old_segno);
+
+ f2fs_blkzoned_submit_merged_write(sbi, type);
+
return 0;
}

@@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
f2fs_bug_on(fio->sbi, true);
}

- if (IS_HOT(type))
- fio->temp = HOT;
- else if (IS_WARM(type))
- fio->temp = WARM;
- else
- fio->temp = COLD;
+ fio->temp = type_to_temp(type);
return type;
}

@@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
return -EINVAL;
}

+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ for (type = 0; type < NR_PERSISTENT_LOG; type++) {
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ enum page_type ptype;
+ enum temp_type temp;
+
+ if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
+ continue;
+
+ if (!f2fs_blkaddr_in_seqzone(sbi,
+ START_BLOCK(sbi, curseg->segno)))
+ continue;
+
+ ptype = PAGE_TYPE(type);
+ temp = type_to_temp(type);
+ down(&sbi->available_open_zones);
+ sbi->write_io[ptype][temp].zone_openned = true;
+ }
+ }
+#endif
return 0;
}

@@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_CURSEG_TYPE; i++) {
kfree(array[i].sum_blk);
kfree(array[i].journal);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ enum page_type ptype = PAGE_TYPE(i);
+ enum temp_type temp = type_to_temp(i);
+
+ if (sbi->write_io[ptype][temp].zone_openned) {
+ up(&sbi->available_open_zones);
+ sbi->write_io[ptype][temp].zone_openned = false;
+ }
+ }
+#endif
}
kfree(array);
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index e1c0f418aa11..855978ca869f 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -24,7 +24,8 @@

#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
-#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
+#define PAGE_TYPE(t) (IS_NODESEG(t) ? NODE : DATA)
+#define SE_PAGETYPE(se) (PAGE_TYPE((se)->type))

static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
unsigned short seg_type)
@@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
return GET_SEGNO(sbi, FDEV(devi).start_blk);
return 0;
}
+
+static inline enum temp_type type_to_temp(int type)
+{
+ if (IS_HOT(type))
+ return HOT;
+ else if (IS_WARM(type))
+ return WARM;
+ return COLD;
+}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fdf358c7f808..954baa6c100d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
return -EINVAL;
}
+
+ sema_init(&sbi->available_open_zones, sbi->max_open_zones);
}

zone_sectors = bdev_zone_sectors(bdev);
--
2.40.1



2024-04-10 18:38:42

by Daeho Jeong

[permalink] [raw]
Subject: Re: [PATCH v3] f2fs: zone: don't block IO if there is remained open zone

On Sun, Apr 7, 2024 at 6:58 AM Chao Yu <[email protected]> wrote:
>
> max open zone may be larger than log header number of f2fs, for
> such case, it doesn't need to wait last IO in previous zone, let's
> introduce available_open_zone semaphore, and reduce it once we
> submit first write IO in a zone, and increase it after completion
> of last IO in the zone.
>
> Cc: Daeho Jeong <[email protected]>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> v3:
> - avoid race condition in between __submit_merged_bio()
> and __allocate_new_segment().
> fs/f2fs/data.c | 105 ++++++++++++++++++++++++++++++----------------
> fs/f2fs/f2fs.h | 34 ++++++++++++---
> fs/f2fs/iostat.c | 7 ++++
> fs/f2fs/iostat.h | 2 +
> fs/f2fs/segment.c | 43 ++++++++++++++++---
> fs/f2fs/segment.h | 12 +++++-
> fs/f2fs/super.c | 2 +
> 7 files changed, 156 insertions(+), 49 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 0d88649c60a5..18a4ac0a06bc 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
> #ifdef CONFIG_BLK_DEV_ZONED
> static void f2fs_zone_write_end_io(struct bio *bio)
> {
> - struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
> + struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>
> - bio->bi_private = io->bi_private;
> - complete(&io->zone_wait);
> f2fs_write_end_io(bio);
> + up(&sbi->available_open_zones);
> }
> #endif
>
> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
> if (!io->bio)
> return;
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> + if (io->open_zone) {
> + /*
> + * if there is no open zone, it will wait for last IO in
> + * previous zone before submitting new IO.
> + */
> + down(&fio->sbi->available_open_zones);
> + io->open_zone = false;
> + io->zone_openned = true;
> + }
> +
> + if (io->close_zone) {
> + io->bio->bi_end_io = f2fs_zone_write_end_io;
> + io->zone_openned = false;
> + io->close_zone = false;
> + }
> +#endif
> +
> if (is_read_io(fio->op)) {
> trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
> f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
> INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
> init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
> #ifdef CONFIG_BLK_DEV_ZONED
> - init_completion(&sbi->write_io[i][j].zone_wait);
> - sbi->write_io[i][j].zone_pending_bio = NULL;
> - sbi->write_io[i][j].bi_private = NULL;
> + sbi->write_io[i][j].open_zone = false;
> + sbi->write_io[i][j].zone_openned = false;
> + sbi->write_io[i][j].close_zone = false;
> #endif
> }
> }
> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
> f2fs_up_write(&io->io_rwsem);
> }
>
> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
> +{
> +#ifdef CONFIG_BLK_DEV_ZONED
> + struct f2fs_bio_info *io;
> +
> + if (!f2fs_sb_has_blkzoned(sbi))
> + return;
> +
> + io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
> +
> + f2fs_down_write(&io->io_rwsem);
> + if (io->zone_openned) {
> + if (io->bio) {
> + io->close_zone = true;
> + __submit_merged_bio(io);
> + } else if (io->zone_openned) {

We already checked if this value is true above.

Thanks,

> + up(&sbi->available_open_zones);
> + io->zone_openned = false;
> + }
> + }
> + f2fs_up_write(&io->io_rwsem);
> +#endif
> +
> +}
> +
> static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
> struct inode *inode, struct page *page,
> nid_t ino, enum page_type type, bool force)
> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
> }
>
> #ifdef CONFIG_BLK_DEV_ZONED
> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
> + block_t blkaddr, bool start)
> {
> - int devi = 0;
> + if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
> + return false;
> +
> + if (start)
> + return (blkaddr % sbi->blocks_per_blkz) == 0;
> + return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>
> - if (f2fs_is_multi_device(sbi)) {
> - devi = f2fs_target_device_index(sbi, blkaddr);
> - if (blkaddr < FDEV(devi).start_blk ||
> - blkaddr > FDEV(devi).end_blk) {
> - f2fs_err(sbi, "Invalid block %x", blkaddr);
> - return false;
> - }
> - blkaddr -= FDEV(devi).start_blk;
> - }
> - return bdev_is_zoned(FDEV(devi).bdev) &&
> - f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
> - (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
> }
> #endif
>
> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
> struct page *bio_page;
> enum count_type type;
> +#ifdef CONFIG_BLK_DEV_ZONED
> + bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
> +#endif
>
> f2fs_bug_on(sbi, is_read_io(fio->op));
>
> f2fs_down_write(&io->io_rwsem);
> next:
> -#ifdef CONFIG_BLK_DEV_ZONED
> - if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
> - wait_for_completion_io(&io->zone_wait);
> - bio_put(io->zone_pending_bio);
> - io->zone_pending_bio = NULL;
> - io->bi_private = NULL;
> - }
> -#endif
> -
> if (fio->in_list) {
> spin_lock(&io->io_lock);
> if (list_empty(&io->io_list)) {
> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> type = WB_DATA_TYPE(bio_page, fio->compressed_page);
> inc_page_count(sbi, type);
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> + if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
> + io->open_zone = true;
> +#endif
> +
> if (io->bio &&
> (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
> fio->new_blkaddr) ||
> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> io->last_block_in_bio = fio->new_blkaddr;
>
> trace_f2fs_submit_page_write(fio->page, fio);
> +
> #ifdef CONFIG_BLK_DEV_ZONED
> - if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
> - is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
> - bio_get(io->bio);
> - reinit_completion(&io->zone_wait);
> - io->bi_private = io->bio->bi_private;
> - io->bio->bi_private = io;
> - io->bio->bi_end_io = f2fs_zone_write_end_io;
> - io->zone_pending_bio = io->bio;
> + if (blkzoned &&
> + is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
> + io->close_zone = true;
> __submit_merged_bio(io);
> }
> #endif
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 694f8a52cb84..8a26530cf4fb 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
> struct bio *bio; /* bios to merge */
> sector_t last_block_in_bio; /* last block number */
> struct f2fs_io_info fio; /* store buffered io info. */
> -#ifdef CONFIG_BLK_DEV_ZONED
> - struct completion zone_wait; /* condition value for the previous open zone to close */
> - struct bio *zone_pending_bio; /* pending bio for the previous zone */
> - void *bi_private; /* previous bi_private for pending bio */
> -#endif
> struct f2fs_rwsem io_rwsem; /* blocking op for bio */
> spinlock_t io_lock; /* serialize DATA/NODE IOs */
> struct list_head io_list; /* track fios */
> struct list_head bio_list; /* bio entry list head */
> struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */
> +#ifdef CONFIG_BLK_DEV_ZONED
> + bool open_zone; /* open a zone */
> + bool zone_openned; /* zone has been openned */
> + bool close_zone; /* close a zone */
> +#endif
> };
>
> #define FDEV(i) (sbi->devs[i])
> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
> #ifdef CONFIG_BLK_DEV_ZONED
> unsigned int blocks_per_blkz; /* F2FS blocks per zone */
> unsigned int max_open_zones; /* max open zone resources of the zoned device */
> + struct semaphore available_open_zones; /* available open zones */
> #endif
>
> /* for node-related operations */
> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
> void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
> enum page_type type);
> int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
> void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
> void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
> struct inode *inode, struct page *page,
> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>
> return test_bit(zno, FDEV(devi).blkz_seq);
> }
> +
> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> + block_t blkaddr)
> +{
> + int devi = 0;
> +
> + if (f2fs_is_multi_device(sbi)) {
> + devi = f2fs_target_device_index(sbi, blkaddr);
> + if (blkaddr < FDEV(devi).start_blk ||
> + blkaddr > FDEV(devi).end_blk) {
> + f2fs_err(sbi, "Invalid block %x", blkaddr);
> + return false;
> + }
> + blkaddr -= FDEV(devi).start_blk;
> + }
> +
> + if (!bdev_is_zoned(FDEV(devi).bdev) ||
> + !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> + return false;
> +
> + return true;
> +}
> #endif
>
> static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
> index f8703038e1d8..a8626e297876 100644
> --- a/fs/f2fs/iostat.c
> +++ b/fs/f2fs/iostat.c
> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
> spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
> }
>
> +void *iostat_get_bio_private(struct bio *bio)
> +{
> + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> +
> + return iostat_ctx->sbi;
> +}
> +
> void iostat_update_and_unbind_ctx(struct bio *bio)
> {
> struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
> index eb99d05cf272..9006c3d41590 100644
> --- a/fs/f2fs/iostat.h
> +++ b/fs/f2fs/iostat.h
> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
> return iostat_ctx->post_read_ctx;
> }
>
> +extern void *iostat_get_bio_private(struct bio *bio);
> extern void iostat_update_and_unbind_ctx(struct bio *bio);
> extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> struct bio *bio, struct bio_post_read_ctx *ctx);
> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
> #else
> static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
> enum iostat_type type, unsigned long long io_bytes) {}
> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
> static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
> static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> struct bio *bio, struct bio_post_read_ctx *ctx) {}
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4fd76e867e0a..4a3cf2888faf 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> return err;
> stat_inc_seg_type(sbi, curseg);
> locate_dirty_segment(sbi, old_segno);
> +
> + f2fs_blkzoned_submit_merged_write(sbi, type);
> +
> return 0;
> }
>
> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
> f2fs_bug_on(fio->sbi, true);
> }
>
> - if (IS_HOT(type))
> - fio->temp = HOT;
> - else if (IS_WARM(type))
> - fio->temp = WARM;
> - else
> - fio->temp = COLD;
> + fio->temp = type_to_temp(type);
> return type;
> }
>
> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
> return -EINVAL;
> }
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + for (type = 0; type < NR_PERSISTENT_LOG; type++) {
> + struct curseg_info *curseg = CURSEG_I(sbi, type);
> + enum page_type ptype;
> + enum temp_type temp;
> +
> + if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
> + continue;
> +
> + if (!f2fs_blkaddr_in_seqzone(sbi,
> + START_BLOCK(sbi, curseg->segno)))
> + continue;
> +
> + ptype = PAGE_TYPE(type);
> + temp = type_to_temp(type);
> + down(&sbi->available_open_zones);
> + sbi->write_io[ptype][temp].zone_openned = true;
> + }
> + }
> +#endif
> return 0;
> }
>
> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
> for (i = 0; i < NR_CURSEG_TYPE; i++) {
> kfree(array[i].sum_blk);
> kfree(array[i].journal);
> +
> +#ifdef CONFIG_BLK_DEV_ZONED
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + enum page_type ptype = PAGE_TYPE(i);
> + enum temp_type temp = type_to_temp(i);
> +
> + if (sbi->write_io[ptype][temp].zone_openned) {
> + up(&sbi->available_open_zones);
> + sbi->write_io[ptype][temp].zone_openned = false;
> + }
> + }
> +#endif
> }
> kfree(array);
> }
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index e1c0f418aa11..855978ca869f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -24,7 +24,8 @@
>
> #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
> #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> -#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
> +#define PAGE_TYPE(t) (IS_NODESEG(t) ? NODE : DATA)
> +#define SE_PAGETYPE(se) (PAGE_TYPE((se)->type))
>
> static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
> unsigned short seg_type)
> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> return GET_SEGNO(sbi, FDEV(devi).start_blk);
> return 0;
> }
> +
> +static inline enum temp_type type_to_temp(int type)
> +{
> + if (IS_HOT(type))
> + return HOT;
> + else if (IS_WARM(type))
> + return WARM;
> + return COLD;
> +}
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index fdf358c7f808..954baa6c100d 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
> sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
> return -EINVAL;
> }
> +
> + sema_init(&sbi->available_open_zones, sbi->max_open_zones);
> }
>
> zone_sectors = bdev_zone_sectors(bdev);
> --
> 2.40.1
>

2024-04-12 21:13:55

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH v3] f2fs: zone: don't block IO if there is remained open zone

On 04/07, Chao Yu wrote:
> max open zone may be larger than log header number of f2fs, for
> such case, it doesn't need to wait last IO in previous zone, let's
> introduce available_open_zone semaphore, and reduce it once we
> submit first write IO in a zone, and increase it after completion
> of last IO in the zone.
>
> Cc: Daeho Jeong <[email protected]>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> v3:
> - avoid race condition in between __submit_merged_bio()
> and __allocate_new_segment().
> fs/f2fs/data.c | 105 ++++++++++++++++++++++++++++++----------------
> fs/f2fs/f2fs.h | 34 ++++++++++++---
> fs/f2fs/iostat.c | 7 ++++
> fs/f2fs/iostat.h | 2 +
> fs/f2fs/segment.c | 43 ++++++++++++++++---
> fs/f2fs/segment.h | 12 +++++-
> fs/f2fs/super.c | 2 +
> 7 files changed, 156 insertions(+), 49 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 0d88649c60a5..18a4ac0a06bc 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
> #ifdef CONFIG_BLK_DEV_ZONED
> static void f2fs_zone_write_end_io(struct bio *bio)
> {
> - struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
> + struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>
> - bio->bi_private = io->bi_private;
> - complete(&io->zone_wait);
> f2fs_write_end_io(bio);
> + up(&sbi->available_open_zones);
> }
> #endif
>
> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
> if (!io->bio)
> return;
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> + if (io->open_zone) {
> + /*
> + * if there is no open zone, it will wait for last IO in
> + * previous zone before submitting new IO.
> + */
> + down(&fio->sbi->available_open_zones);
> + io->open_zone = false;
> + io->zone_openned = true;
> + }
> +
> + if (io->close_zone) {
> + io->bio->bi_end_io = f2fs_zone_write_end_io;
> + io->zone_openned = false;
> + io->close_zone = false;
> + }
> +#endif
> +
> if (is_read_io(fio->op)) {
> trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
> f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
> INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
> init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
> #ifdef CONFIG_BLK_DEV_ZONED
> - init_completion(&sbi->write_io[i][j].zone_wait);
> - sbi->write_io[i][j].zone_pending_bio = NULL;
> - sbi->write_io[i][j].bi_private = NULL;
> + sbi->write_io[i][j].open_zone = false;
> + sbi->write_io[i][j].zone_openned = false;
> + sbi->write_io[i][j].close_zone = false;
> #endif
> }
> }
> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
> f2fs_up_write(&io->io_rwsem);
> }
>
> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
> +{
> +#ifdef CONFIG_BLK_DEV_ZONED
> + struct f2fs_bio_info *io;
> +
> + if (!f2fs_sb_has_blkzoned(sbi))
> + return;
> +
> + io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
> +
> + f2fs_down_write(&io->io_rwsem);
> + if (io->zone_openned) {
> + if (io->bio) {
> + io->close_zone = true;
> + __submit_merged_bio(io);
> + } else if (io->zone_openned) {
> + up(&sbi->available_open_zones);
> + io->zone_openned = false;
> + }
> + }
> + f2fs_up_write(&io->io_rwsem);
> +#endif
> +
> +}
> +
> static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
> struct inode *inode, struct page *page,
> nid_t ino, enum page_type type, bool force)
> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
> }
>
> #ifdef CONFIG_BLK_DEV_ZONED
> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
> + block_t blkaddr, bool start)
> {
> - int devi = 0;
> + if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
> + return false;
> +
> + if (start)
> + return (blkaddr % sbi->blocks_per_blkz) == 0;
> + return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>
> - if (f2fs_is_multi_device(sbi)) {
> - devi = f2fs_target_device_index(sbi, blkaddr);
> - if (blkaddr < FDEV(devi).start_blk ||
> - blkaddr > FDEV(devi).end_blk) {
> - f2fs_err(sbi, "Invalid block %x", blkaddr);
> - return false;
> - }
> - blkaddr -= FDEV(devi).start_blk;
> - }
> - return bdev_is_zoned(FDEV(devi).bdev) &&
> - f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
> - (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
> }
> #endif
>
> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
> struct page *bio_page;
> enum count_type type;
> +#ifdef CONFIG_BLK_DEV_ZONED
> + bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
> +#endif
>
> f2fs_bug_on(sbi, is_read_io(fio->op));
>
> f2fs_down_write(&io->io_rwsem);
> next:
> -#ifdef CONFIG_BLK_DEV_ZONED
> - if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
> - wait_for_completion_io(&io->zone_wait);
> - bio_put(io->zone_pending_bio);
> - io->zone_pending_bio = NULL;
> - io->bi_private = NULL;
> - }
> -#endif
> -
> if (fio->in_list) {
> spin_lock(&io->io_lock);
> if (list_empty(&io->io_list)) {
> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> type = WB_DATA_TYPE(bio_page, fio->compressed_page);
> inc_page_count(sbi, type);
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> + if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
> + io->open_zone = true;
> +#endif
> +
> if (io->bio &&
> (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
> fio->new_blkaddr) ||
> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> io->last_block_in_bio = fio->new_blkaddr;
>
> trace_f2fs_submit_page_write(fio->page, fio);
> +
> #ifdef CONFIG_BLK_DEV_ZONED
> - if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
> - is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
> - bio_get(io->bio);
> - reinit_completion(&io->zone_wait);
> - io->bi_private = io->bio->bi_private;
> - io->bio->bi_private = io;
> - io->bio->bi_end_io = f2fs_zone_write_end_io;
> - io->zone_pending_bio = io->bio;
> + if (blkzoned &&
> + is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
> + io->close_zone = true;
> __submit_merged_bio(io);
> }
> #endif
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 694f8a52cb84..8a26530cf4fb 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
> struct bio *bio; /* bios to merge */
> sector_t last_block_in_bio; /* last block number */
> struct f2fs_io_info fio; /* store buffered io info. */
> -#ifdef CONFIG_BLK_DEV_ZONED
> - struct completion zone_wait; /* condition value for the previous open zone to close */
> - struct bio *zone_pending_bio; /* pending bio for the previous zone */
> - void *bi_private; /* previous bi_private for pending bio */
> -#endif
> struct f2fs_rwsem io_rwsem; /* blocking op for bio */
> spinlock_t io_lock; /* serialize DATA/NODE IOs */
> struct list_head io_list; /* track fios */
> struct list_head bio_list; /* bio entry list head */
> struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */
> +#ifdef CONFIG_BLK_DEV_ZONED
> + bool open_zone; /* open a zone */
> + bool zone_openned; /* zone has been openned */
> + bool close_zone; /* close a zone */
> +#endif
> };
>
> #define FDEV(i) (sbi->devs[i])
> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
> #ifdef CONFIG_BLK_DEV_ZONED
> unsigned int blocks_per_blkz; /* F2FS blocks per zone */
> unsigned int max_open_zones; /* max open zone resources of the zoned device */
> + struct semaphore available_open_zones; /* available open zones */
> #endif
>
> /* for node-related operations */
> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
> void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
> enum page_type type);
> int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
> void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
> void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
> struct inode *inode, struct page *page,
> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>
> return test_bit(zno, FDEV(devi).blkz_seq);
> }
> +
> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> + block_t blkaddr)
> +{
> + int devi = 0;
> +
> + if (f2fs_is_multi_device(sbi)) {
> + devi = f2fs_target_device_index(sbi, blkaddr);
> + if (blkaddr < FDEV(devi).start_blk ||
> + blkaddr > FDEV(devi).end_blk) {
> + f2fs_err(sbi, "Invalid block %x", blkaddr);
> + return false;
> + }
> + blkaddr -= FDEV(devi).start_blk;
> + }
> +
> + if (!bdev_is_zoned(FDEV(devi).bdev) ||
> + !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> + return false;
> +
> + return true;

Applied as below.

@@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
blkaddr -= FDEV(devi).start_blk;
}

- if (!bdev_is_zoned(FDEV(devi).bdev) ||
- !f2fs_blkz_is_seq(sbi, devi, blkaddr))
- return false;
-
- return true;
+ return bdev_is_zoned(FDEV(devi).bdev) &&
+ f2fs_blkz_is_seq(sbi, devi, blkaddr);
}
#endif

> +}
> #endif
>
> static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
> index f8703038e1d8..a8626e297876 100644
> --- a/fs/f2fs/iostat.c
> +++ b/fs/f2fs/iostat.c
> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
> spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
> }
>
> +void *iostat_get_bio_private(struct bio *bio)
> +{
> + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> +
> + return iostat_ctx->sbi;
> +}
> +
> void iostat_update_and_unbind_ctx(struct bio *bio)
> {
> struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
> index eb99d05cf272..9006c3d41590 100644
> --- a/fs/f2fs/iostat.h
> +++ b/fs/f2fs/iostat.h
> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
> return iostat_ctx->post_read_ctx;
> }
>
> +extern void *iostat_get_bio_private(struct bio *bio);
> extern void iostat_update_and_unbind_ctx(struct bio *bio);
> extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> struct bio *bio, struct bio_post_read_ctx *ctx);
> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
> #else
> static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
> enum iostat_type type, unsigned long long io_bytes) {}
> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
> static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
> static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> struct bio *bio, struct bio_post_read_ctx *ctx) {}
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 4fd76e867e0a..4a3cf2888faf 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> return err;
> stat_inc_seg_type(sbi, curseg);
> locate_dirty_segment(sbi, old_segno);
> +
> + f2fs_blkzoned_submit_merged_write(sbi, type);
> +
> return 0;
> }
>
> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
> f2fs_bug_on(fio->sbi, true);
> }
>
> - if (IS_HOT(type))
> - fio->temp = HOT;
> - else if (IS_WARM(type))
> - fio->temp = WARM;
> - else
> - fio->temp = COLD;
> + fio->temp = type_to_temp(type);
> return type;
> }
>
> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
> return -EINVAL;
> }
>
> +#ifdef CONFIG_BLK_DEV_ZONED
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + for (type = 0; type < NR_PERSISTENT_LOG; type++) {
> + struct curseg_info *curseg = CURSEG_I(sbi, type);
> + enum page_type ptype;
> + enum temp_type temp;
> +
> + if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
> + continue;
> +
> + if (!f2fs_blkaddr_in_seqzone(sbi,
> + START_BLOCK(sbi, curseg->segno)))
> + continue;
> +
> + ptype = PAGE_TYPE(type);
> + temp = type_to_temp(type);
> + down(&sbi->available_open_zones);
> + sbi->write_io[ptype][temp].zone_openned = true;
> + }
> + }
> +#endif
> return 0;
> }
>
> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
> for (i = 0; i < NR_CURSEG_TYPE; i++) {
> kfree(array[i].sum_blk);
> kfree(array[i].journal);
> +
> +#ifdef CONFIG_BLK_DEV_ZONED
> + if (f2fs_sb_has_blkzoned(sbi)) {
> + enum page_type ptype = PAGE_TYPE(i);
> + enum temp_type temp = type_to_temp(i);
> +
> + if (sbi->write_io[ptype][temp].zone_openned) {
> + up(&sbi->available_open_zones);
> + sbi->write_io[ptype][temp].zone_openned = false;
> + }
> + }
> +#endif
> }
> kfree(array);
> }
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index e1c0f418aa11..855978ca869f 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -24,7 +24,8 @@
>
> #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
> #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> -#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
> +#define PAGE_TYPE(t) (IS_NODESEG(t) ? NODE : DATA)
> +#define SE_PAGETYPE(se) (PAGE_TYPE((se)->type))
>
> static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
> unsigned short seg_type)
> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> return GET_SEGNO(sbi, FDEV(devi).start_blk);
> return 0;
> }
> +
> +static inline enum temp_type type_to_temp(int type)
> +{
> + if (IS_HOT(type))
> + return HOT;
> + else if (IS_WARM(type))
> + return WARM;
> + return COLD;
> +}
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index fdf358c7f808..954baa6c100d 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
> sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
> return -EINVAL;
> }
> +
> + sema_init(&sbi->available_open_zones, sbi->max_open_zones);
> }
>
> zone_sectors = bdev_zone_sectors(bdev);
> --
> 2.40.1

2024-04-13 01:34:00

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v3] f2fs: zone: don't block IO if there is remained open zone

On 2024/4/13 5:11, Jaegeuk Kim wrote:
> On 04/07, Chao Yu wrote:
>> max open zone may be larger than log header number of f2fs, for
>> such case, it doesn't need to wait last IO in previous zone, let's
>> introduce available_open_zone semaphore, and reduce it once we
>> submit first write IO in a zone, and increase it after completion
>> of last IO in the zone.
>>
>> Cc: Daeho Jeong <[email protected]>
>> Signed-off-by: Chao Yu <[email protected]>
>> ---
>> v3:
>> - avoid race condition in between __submit_merged_bio()
>> and __allocate_new_segment().
>> fs/f2fs/data.c | 105 ++++++++++++++++++++++++++++++----------------
>> fs/f2fs/f2fs.h | 34 ++++++++++++---
>> fs/f2fs/iostat.c | 7 ++++
>> fs/f2fs/iostat.h | 2 +
>> fs/f2fs/segment.c | 43 ++++++++++++++++---
>> fs/f2fs/segment.h | 12 +++++-
>> fs/f2fs/super.c | 2 +
>> 7 files changed, 156 insertions(+), 49 deletions(-)
>>
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index 0d88649c60a5..18a4ac0a06bc 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>> #ifdef CONFIG_BLK_DEV_ZONED
>> static void f2fs_zone_write_end_io(struct bio *bio)
>> {
>> - struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
>> + struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>>
>> - bio->bi_private = io->bi_private;
>> - complete(&io->zone_wait);
>> f2fs_write_end_io(bio);
>> + up(&sbi->available_open_zones);
>> }
>> #endif
>>
>> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>> if (!io->bio)
>> return;
>>
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> + if (io->open_zone) {
>> + /*
>> + * if there is no open zone, it will wait for last IO in
>> + * previous zone before submitting new IO.
>> + */
>> + down(&fio->sbi->available_open_zones);
>> + io->open_zone = false;
>> + io->zone_openned = true;
>> + }
>> +
>> + if (io->close_zone) {
>> + io->bio->bi_end_io = f2fs_zone_write_end_io;
>> + io->zone_openned = false;
>> + io->close_zone = false;
>> + }
>> +#endif
>> +
>> if (is_read_io(fio->op)) {
>> trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>> f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
>> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>> INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>> init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>> #ifdef CONFIG_BLK_DEV_ZONED
>> - init_completion(&sbi->write_io[i][j].zone_wait);
>> - sbi->write_io[i][j].zone_pending_bio = NULL;
>> - sbi->write_io[i][j].bi_private = NULL;
>> + sbi->write_io[i][j].open_zone = false;
>> + sbi->write_io[i][j].zone_openned = false;
>> + sbi->write_io[i][j].close_zone = false;
>> #endif
>> }
>> }
>> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>> f2fs_up_write(&io->io_rwsem);
>> }
>>
>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
>> +{
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> + struct f2fs_bio_info *io;
>> +
>> + if (!f2fs_sb_has_blkzoned(sbi))
>> + return;
>> +
>> + io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
>> +
>> + f2fs_down_write(&io->io_rwsem);
>> + if (io->zone_openned) {
>> + if (io->bio) {
>> + io->close_zone = true;
>> + __submit_merged_bio(io);
>> + } else if (io->zone_openned) {
>> + up(&sbi->available_open_zones);
>> + io->zone_openned = false;
>> + }
>> + }
>> + f2fs_up_write(&io->io_rwsem);
>> +#endif
>> +
>> +}
>> +
>> static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>> struct inode *inode, struct page *page,
>> nid_t ino, enum page_type type, bool force)
>> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>> }
>>
>> #ifdef CONFIG_BLK_DEV_ZONED
>> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
>> + block_t blkaddr, bool start)
>> {
>> - int devi = 0;
>> + if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
>> + return false;
>> +
>> + if (start)
>> + return (blkaddr % sbi->blocks_per_blkz) == 0;
>> + return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>
>> - if (f2fs_is_multi_device(sbi)) {
>> - devi = f2fs_target_device_index(sbi, blkaddr);
>> - if (blkaddr < FDEV(devi).start_blk ||
>> - blkaddr > FDEV(devi).end_blk) {
>> - f2fs_err(sbi, "Invalid block %x", blkaddr);
>> - return false;
>> - }
>> - blkaddr -= FDEV(devi).start_blk;
>> - }
>> - return bdev_is_zoned(FDEV(devi).bdev) &&
>> - f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
>> - (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>> }
>> #endif
>>
>> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>> struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>> struct page *bio_page;
>> enum count_type type;
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> + bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
>> +#endif
>>
>> f2fs_bug_on(sbi, is_read_io(fio->op));
>>
>> f2fs_down_write(&io->io_rwsem);
>> next:
>> -#ifdef CONFIG_BLK_DEV_ZONED
>> - if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
>> - wait_for_completion_io(&io->zone_wait);
>> - bio_put(io->zone_pending_bio);
>> - io->zone_pending_bio = NULL;
>> - io->bi_private = NULL;
>> - }
>> -#endif
>> -
>> if (fio->in_list) {
>> spin_lock(&io->io_lock);
>> if (list_empty(&io->io_list)) {
>> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>> type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>> inc_page_count(sbi, type);
>>
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> + if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
>> + io->open_zone = true;
>> +#endif
>> +
>> if (io->bio &&
>> (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>> fio->new_blkaddr) ||
>> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>> io->last_block_in_bio = fio->new_blkaddr;
>>
>> trace_f2fs_submit_page_write(fio->page, fio);
>> +
>> #ifdef CONFIG_BLK_DEV_ZONED
>> - if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
>> - is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
>> - bio_get(io->bio);
>> - reinit_completion(&io->zone_wait);
>> - io->bi_private = io->bio->bi_private;
>> - io->bio->bi_private = io;
>> - io->bio->bi_end_io = f2fs_zone_write_end_io;
>> - io->zone_pending_bio = io->bio;
>> + if (blkzoned &&
>> + is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
>> + io->close_zone = true;
>> __submit_merged_bio(io);
>> }
>> #endif
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 694f8a52cb84..8a26530cf4fb 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>> struct bio *bio; /* bios to merge */
>> sector_t last_block_in_bio; /* last block number */
>> struct f2fs_io_info fio; /* store buffered io info. */
>> -#ifdef CONFIG_BLK_DEV_ZONED
>> - struct completion zone_wait; /* condition value for the previous open zone to close */
>> - struct bio *zone_pending_bio; /* pending bio for the previous zone */
>> - void *bi_private; /* previous bi_private for pending bio */
>> -#endif
>> struct f2fs_rwsem io_rwsem; /* blocking op for bio */
>> spinlock_t io_lock; /* serialize DATA/NODE IOs */
>> struct list_head io_list; /* track fios */
>> struct list_head bio_list; /* bio entry list head */
>> struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> + bool open_zone; /* open a zone */
>> + bool zone_openned; /* zone has been openned */
>> + bool close_zone; /* close a zone */
>> +#endif
>> };
>>
>> #define FDEV(i) (sbi->devs[i])
>> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>> #ifdef CONFIG_BLK_DEV_ZONED
>> unsigned int blocks_per_blkz; /* F2FS blocks per zone */
>> unsigned int max_open_zones; /* max open zone resources of the zoned device */
>> + struct semaphore available_open_zones; /* available open zones */
>> #endif
>>
>> /* for node-related operations */
>> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>> void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>> enum page_type type);
>> int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>> void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>> void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>> struct inode *inode, struct page *page,
>> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>>
>> return test_bit(zno, FDEV(devi).blkz_seq);
>> }
>> +
>> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>> + block_t blkaddr)
>> +{
>> + int devi = 0;
>> +
>> + if (f2fs_is_multi_device(sbi)) {
>> + devi = f2fs_target_device_index(sbi, blkaddr);
>> + if (blkaddr < FDEV(devi).start_blk ||
>> + blkaddr > FDEV(devi).end_blk) {
>> + f2fs_err(sbi, "Invalid block %x", blkaddr);
>> + return false;
>> + }
>> + blkaddr -= FDEV(devi).start_blk;
>> + }
>> +
>> + if (!bdev_is_zoned(FDEV(devi).bdev) ||
>> + !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>> + return false;
>> +
>> + return true;
>
> Applied as below.
>
> @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> blkaddr -= FDEV(devi).start_blk;
> }
>
> - if (!bdev_is_zoned(FDEV(devi).bdev) ||
> - !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> - return false;
> -
> - return true;
> + return bdev_is_zoned(FDEV(devi).bdev) &&
> + f2fs_blkz_is_seq(sbi, devi, blkaddr);

Looks good, thank you for cleanup.

Thanks,

> }
> #endif
>
>> +}
>> #endif
>>
>> static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
>> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
>> index f8703038e1d8..a8626e297876 100644
>> --- a/fs/f2fs/iostat.c
>> +++ b/fs/f2fs/iostat.c
>> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>> spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>> }
>>
>> +void *iostat_get_bio_private(struct bio *bio)
>> +{
>> + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>> +
>> + return iostat_ctx->sbi;
>> +}
>> +
>> void iostat_update_and_unbind_ctx(struct bio *bio)
>> {
>> struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
>> index eb99d05cf272..9006c3d41590 100644
>> --- a/fs/f2fs/iostat.h
>> +++ b/fs/f2fs/iostat.h
>> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>> return iostat_ctx->post_read_ctx;
>> }
>>
>> +extern void *iostat_get_bio_private(struct bio *bio);
>> extern void iostat_update_and_unbind_ctx(struct bio *bio);
>> extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>> struct bio *bio, struct bio_post_read_ctx *ctx);
>> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>> #else
>> static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>> enum iostat_type type, unsigned long long io_bytes) {}
>> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>> static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>> static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>> struct bio *bio, struct bio_post_read_ctx *ctx) {}
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 4fd76e867e0a..4a3cf2888faf 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>> return err;
>> stat_inc_seg_type(sbi, curseg);
>> locate_dirty_segment(sbi, old_segno);
>> +
>> + f2fs_blkzoned_submit_merged_write(sbi, type);
>> +
>> return 0;
>> }
>>
>> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>> f2fs_bug_on(fio->sbi, true);
>> }
>>
>> - if (IS_HOT(type))
>> - fio->temp = HOT;
>> - else if (IS_WARM(type))
>> - fio->temp = WARM;
>> - else
>> - fio->temp = COLD;
>> + fio->temp = type_to_temp(type);
>> return type;
>> }
>>
>> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>> return -EINVAL;
>> }
>>
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> + if (f2fs_sb_has_blkzoned(sbi)) {
>> + for (type = 0; type < NR_PERSISTENT_LOG; type++) {
>> + struct curseg_info *curseg = CURSEG_I(sbi, type);
>> + enum page_type ptype;
>> + enum temp_type temp;
>> +
>> + if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
>> + continue;
>> +
>> + if (!f2fs_blkaddr_in_seqzone(sbi,
>> + START_BLOCK(sbi, curseg->segno)))
>> + continue;
>> +
>> + ptype = PAGE_TYPE(type);
>> + temp = type_to_temp(type);
>> + down(&sbi->available_open_zones);
>> + sbi->write_io[ptype][temp].zone_openned = true;
>> + }
>> + }
>> +#endif
>> return 0;
>> }
>>
>> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>> for (i = 0; i < NR_CURSEG_TYPE; i++) {
>> kfree(array[i].sum_blk);
>> kfree(array[i].journal);
>> +
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> + if (f2fs_sb_has_blkzoned(sbi)) {
>> + enum page_type ptype = PAGE_TYPE(i);
>> + enum temp_type temp = type_to_temp(i);
>> +
>> + if (sbi->write_io[ptype][temp].zone_openned) {
>> + up(&sbi->available_open_zones);
>> + sbi->write_io[ptype][temp].zone_openned = false;
>> + }
>> + }
>> +#endif
>> }
>> kfree(array);
>> }
>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>> index e1c0f418aa11..855978ca869f 100644
>> --- a/fs/f2fs/segment.h
>> +++ b/fs/f2fs/segment.h
>> @@ -24,7 +24,8 @@
>>
>> #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
>> #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>> -#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
>> +#define PAGE_TYPE(t) (IS_NODESEG(t) ? NODE : DATA)
>> +#define SE_PAGETYPE(se) (PAGE_TYPE((se)->type))
>>
>> static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>> unsigned short seg_type)
>> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>> return GET_SEGNO(sbi, FDEV(devi).start_blk);
>> return 0;
>> }
>> +
>> +static inline enum temp_type type_to_temp(int type)
>> +{
>> + if (IS_HOT(type))
>> + return HOT;
>> + else if (IS_WARM(type))
>> + return WARM;
>> + return COLD;
>> +}
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index fdf358c7f808..954baa6c100d 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>> sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>> return -EINVAL;
>> }
>> +
>> + sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>> }
>>
>> zone_sectors = bdev_zone_sectors(bdev);
>> --
>> 2.40.1

2024-04-14 15:19:33

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH v3] f2fs: zone: don't block IO if there is remained open zone

It seems this caused kernel hang. Chao, have you tested this patch enough?

On 04/13, Chao Yu wrote:
> On 2024/4/13 5:11, Jaegeuk Kim wrote:
> > On 04/07, Chao Yu wrote:
> > > max open zone may be larger than log header number of f2fs, for
> > > such case, it doesn't need to wait last IO in previous zone, let's
> > > introduce available_open_zone semaphore, and reduce it once we
> > > submit first write IO in a zone, and increase it after completion
> > > of last IO in the zone.
> > >
> > > Cc: Daeho Jeong <[email protected]>
> > > Signed-off-by: Chao Yu <[email protected]>
> > > ---
> > > v3:
> > > - avoid race condition in between __submit_merged_bio()
> > > and __allocate_new_segment().
> > > fs/f2fs/data.c | 105 ++++++++++++++++++++++++++++++----------------
> > > fs/f2fs/f2fs.h | 34 ++++++++++++---
> > > fs/f2fs/iostat.c | 7 ++++
> > > fs/f2fs/iostat.h | 2 +
> > > fs/f2fs/segment.c | 43 ++++++++++++++++---
> > > fs/f2fs/segment.h | 12 +++++-
> > > fs/f2fs/super.c | 2 +
> > > 7 files changed, 156 insertions(+), 49 deletions(-)
> > >
> > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > index 0d88649c60a5..18a4ac0a06bc 100644
> > > --- a/fs/f2fs/data.c
> > > +++ b/fs/f2fs/data.c
> > > @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
> > > #ifdef CONFIG_BLK_DEV_ZONED
> > > static void f2fs_zone_write_end_io(struct bio *bio)
> > > {
> > > - struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
> > > + struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
> > > - bio->bi_private = io->bi_private;
> > > - complete(&io->zone_wait);
> > > f2fs_write_end_io(bio);
> > > + up(&sbi->available_open_zones);
> > > }
> > > #endif
> > > @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
> > > if (!io->bio)
> > > return;
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > + if (io->open_zone) {
> > > + /*
> > > + * if there is no open zone, it will wait for last IO in
> > > + * previous zone before submitting new IO.
> > > + */
> > > + down(&fio->sbi->available_open_zones);
> > > + io->open_zone = false;
> > > + io->zone_openned = true;
> > > + }
> > > +
> > > + if (io->close_zone) {
> > > + io->bio->bi_end_io = f2fs_zone_write_end_io;
> > > + io->zone_openned = false;
> > > + io->close_zone = false;
> > > + }
> > > +#endif
> > > +
> > > if (is_read_io(fio->op)) {
> > > trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
> > > f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
> > > @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
> > > INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
> > > init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
> > > #ifdef CONFIG_BLK_DEV_ZONED
> > > - init_completion(&sbi->write_io[i][j].zone_wait);
> > > - sbi->write_io[i][j].zone_pending_bio = NULL;
> > > - sbi->write_io[i][j].bi_private = NULL;
> > > + sbi->write_io[i][j].open_zone = false;
> > > + sbi->write_io[i][j].zone_openned = false;
> > > + sbi->write_io[i][j].close_zone = false;
> > > #endif
> > > }
> > > }
> > > @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
> > > f2fs_up_write(&io->io_rwsem);
> > > }
> > > +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
> > > +{
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > + struct f2fs_bio_info *io;
> > > +
> > > + if (!f2fs_sb_has_blkzoned(sbi))
> > > + return;
> > > +
> > > + io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
> > > +
> > > + f2fs_down_write(&io->io_rwsem);
> > > + if (io->zone_openned) {
> > > + if (io->bio) {
> > > + io->close_zone = true;
> > > + __submit_merged_bio(io);
> > > + } else if (io->zone_openned) {
> > > + up(&sbi->available_open_zones);
> > > + io->zone_openned = false;
> > > + }
> > > + }
> > > + f2fs_up_write(&io->io_rwsem);
> > > +#endif
> > > +
> > > +}
> > > +
> > > static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
> > > struct inode *inode, struct page *page,
> > > nid_t ino, enum page_type type, bool force)
> > > @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
> > > }
> > > #ifdef CONFIG_BLK_DEV_ZONED
> > > -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
> > > +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
> > > + block_t blkaddr, bool start)
> > > {
> > > - int devi = 0;
> > > + if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
> > > + return false;
> > > +
> > > + if (start)
> > > + return (blkaddr % sbi->blocks_per_blkz) == 0;
> > > + return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
> > > - if (f2fs_is_multi_device(sbi)) {
> > > - devi = f2fs_target_device_index(sbi, blkaddr);
> > > - if (blkaddr < FDEV(devi).start_blk ||
> > > - blkaddr > FDEV(devi).end_blk) {
> > > - f2fs_err(sbi, "Invalid block %x", blkaddr);
> > > - return false;
> > > - }
> > > - blkaddr -= FDEV(devi).start_blk;
> > > - }
> > > - return bdev_is_zoned(FDEV(devi).bdev) &&
> > > - f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
> > > - (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
> > > }
> > > #endif
> > > @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > > struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
> > > struct page *bio_page;
> > > enum count_type type;
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > + bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
> > > +#endif
> > > f2fs_bug_on(sbi, is_read_io(fio->op));
> > > f2fs_down_write(&io->io_rwsem);
> > > next:
> > > -#ifdef CONFIG_BLK_DEV_ZONED
> > > - if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
> > > - wait_for_completion_io(&io->zone_wait);
> > > - bio_put(io->zone_pending_bio);
> > > - io->zone_pending_bio = NULL;
> > > - io->bi_private = NULL;
> > > - }
> > > -#endif
> > > -
> > > if (fio->in_list) {
> > > spin_lock(&io->io_lock);
> > > if (list_empty(&io->io_list)) {
> > > @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > > type = WB_DATA_TYPE(bio_page, fio->compressed_page);
> > > inc_page_count(sbi, type);
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > + if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
> > > + io->open_zone = true;
> > > +#endif
> > > +
> > > if (io->bio &&
> > > (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
> > > fio->new_blkaddr) ||
> > > @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > > io->last_block_in_bio = fio->new_blkaddr;
> > > trace_f2fs_submit_page_write(fio->page, fio);
> > > +
> > > #ifdef CONFIG_BLK_DEV_ZONED
> > > - if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
> > > - is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
> > > - bio_get(io->bio);
> > > - reinit_completion(&io->zone_wait);
> > > - io->bi_private = io->bio->bi_private;
> > > - io->bio->bi_private = io;
> > > - io->bio->bi_end_io = f2fs_zone_write_end_io;
> > > - io->zone_pending_bio = io->bio;
> > > + if (blkzoned &&
> > > + is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
> > > + io->close_zone = true;
> > > __submit_merged_bio(io);
> > > }
> > > #endif
> > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > index 694f8a52cb84..8a26530cf4fb 100644
> > > --- a/fs/f2fs/f2fs.h
> > > +++ b/fs/f2fs/f2fs.h
> > > @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
> > > struct bio *bio; /* bios to merge */
> > > sector_t last_block_in_bio; /* last block number */
> > > struct f2fs_io_info fio; /* store buffered io info. */
> > > -#ifdef CONFIG_BLK_DEV_ZONED
> > > - struct completion zone_wait; /* condition value for the previous open zone to close */
> > > - struct bio *zone_pending_bio; /* pending bio for the previous zone */
> > > - void *bi_private; /* previous bi_private for pending bio */
> > > -#endif
> > > struct f2fs_rwsem io_rwsem; /* blocking op for bio */
> > > spinlock_t io_lock; /* serialize DATA/NODE IOs */
> > > struct list_head io_list; /* track fios */
> > > struct list_head bio_list; /* bio entry list head */
> > > struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > + bool open_zone; /* open a zone */
> > > + bool zone_openned; /* zone has been openned */
> > > + bool close_zone; /* close a zone */
> > > +#endif
> > > };
> > > #define FDEV(i) (sbi->devs[i])
> > > @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
> > > #ifdef CONFIG_BLK_DEV_ZONED
> > > unsigned int blocks_per_blkz; /* F2FS blocks per zone */
> > > unsigned int max_open_zones; /* max open zone resources of the zoned device */
> > > + struct semaphore available_open_zones; /* available open zones */
> > > #endif
> > > /* for node-related operations */
> > > @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
> > > void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
> > > enum page_type type);
> > > int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
> > > +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
> > > void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
> > > void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
> > > struct inode *inode, struct page *page,
> > > @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
> > > return test_bit(zno, FDEV(devi).blkz_seq);
> > > }
> > > +
> > > +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> > > + block_t blkaddr)
> > > +{
> > > + int devi = 0;
> > > +
> > > + if (f2fs_is_multi_device(sbi)) {
> > > + devi = f2fs_target_device_index(sbi, blkaddr);
> > > + if (blkaddr < FDEV(devi).start_blk ||
> > > + blkaddr > FDEV(devi).end_blk) {
> > > + f2fs_err(sbi, "Invalid block %x", blkaddr);
> > > + return false;
> > > + }
> > > + blkaddr -= FDEV(devi).start_blk;
> > > + }
> > > +
> > > + if (!bdev_is_zoned(FDEV(devi).bdev) ||
> > > + !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> > > + return false;
> > > +
> > > + return true;
> >
> > Applied as below.
> >
> > @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> > blkaddr -= FDEV(devi).start_blk;
> > }
> >
> > - if (!bdev_is_zoned(FDEV(devi).bdev) ||
> > - !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> > - return false;
> > -
> > - return true;
> > + return bdev_is_zoned(FDEV(devi).bdev) &&
> > + f2fs_blkz_is_seq(sbi, devi, blkaddr);
>
> Looks good, thank you for cleanup.
>
> Thanks,
>
> > }
> > #endif
> >
> > > +}
> > > #endif
> > > static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
> > > diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
> > > index f8703038e1d8..a8626e297876 100644
> > > --- a/fs/f2fs/iostat.c
> > > +++ b/fs/f2fs/iostat.c
> > > @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
> > > spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
> > > }
> > > +void *iostat_get_bio_private(struct bio *bio)
> > > +{
> > > + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > > +
> > > + return iostat_ctx->sbi;
> > > +}
> > > +
> > > void iostat_update_and_unbind_ctx(struct bio *bio)
> > > {
> > > struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > > diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
> > > index eb99d05cf272..9006c3d41590 100644
> > > --- a/fs/f2fs/iostat.h
> > > +++ b/fs/f2fs/iostat.h
> > > @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
> > > return iostat_ctx->post_read_ctx;
> > > }
> > > +extern void *iostat_get_bio_private(struct bio *bio);
> > > extern void iostat_update_and_unbind_ctx(struct bio *bio);
> > > extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> > > struct bio *bio, struct bio_post_read_ctx *ctx);
> > > @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
> > > #else
> > > static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
> > > enum iostat_type type, unsigned long long io_bytes) {}
> > > +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
> > > static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
> > > static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> > > struct bio *bio, struct bio_post_read_ctx *ctx) {}
> > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > > index 4fd76e867e0a..4a3cf2888faf 100644
> > > --- a/fs/f2fs/segment.c
> > > +++ b/fs/f2fs/segment.c
> > > @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> > > return err;
> > > stat_inc_seg_type(sbi, curseg);
> > > locate_dirty_segment(sbi, old_segno);
> > > +
> > > + f2fs_blkzoned_submit_merged_write(sbi, type);
> > > +
> > > return 0;
> > > }
> > > @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
> > > f2fs_bug_on(fio->sbi, true);
> > > }
> > > - if (IS_HOT(type))
> > > - fio->temp = HOT;
> > > - else if (IS_WARM(type))
> > > - fio->temp = WARM;
> > > - else
> > > - fio->temp = COLD;
> > > + fio->temp = type_to_temp(type);
> > > return type;
> > > }
> > > @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
> > > return -EINVAL;
> > > }
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > + if (f2fs_sb_has_blkzoned(sbi)) {
> > > + for (type = 0; type < NR_PERSISTENT_LOG; type++) {
> > > + struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > + enum page_type ptype;
> > > + enum temp_type temp;
> > > +
> > > + if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
> > > + continue;
> > > +
> > > + if (!f2fs_blkaddr_in_seqzone(sbi,
> > > + START_BLOCK(sbi, curseg->segno)))
> > > + continue;
> > > +
> > > + ptype = PAGE_TYPE(type);
> > > + temp = type_to_temp(type);
> > > + down(&sbi->available_open_zones);
> > > + sbi->write_io[ptype][temp].zone_openned = true;
> > > + }
> > > + }
> > > +#endif
> > > return 0;
> > > }
> > > @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
> > > for (i = 0; i < NR_CURSEG_TYPE; i++) {
> > > kfree(array[i].sum_blk);
> > > kfree(array[i].journal);
> > > +
> > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > + if (f2fs_sb_has_blkzoned(sbi)) {
> > > + enum page_type ptype = PAGE_TYPE(i);
> > > + enum temp_type temp = type_to_temp(i);
> > > +
> > > + if (sbi->write_io[ptype][temp].zone_openned) {
> > > + up(&sbi->available_open_zones);
> > > + sbi->write_io[ptype][temp].zone_openned = false;
> > > + }
> > > + }
> > > +#endif
> > > }
> > > kfree(array);
> > > }
> > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > index e1c0f418aa11..855978ca869f 100644
> > > --- a/fs/f2fs/segment.h
> > > +++ b/fs/f2fs/segment.h
> > > @@ -24,7 +24,8 @@
> > > #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
> > > #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> > > -#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
> > > +#define PAGE_TYPE(t) (IS_NODESEG(t) ? NODE : DATA)
> > > +#define SE_PAGETYPE(se) (PAGE_TYPE((se)->type))
> > > static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
> > > unsigned short seg_type)
> > > @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> > > return GET_SEGNO(sbi, FDEV(devi).start_blk);
> > > return 0;
> > > }
> > > +
> > > +static inline enum temp_type type_to_temp(int type)
> > > +{
> > > + if (IS_HOT(type))
> > > + return HOT;
> > > + else if (IS_WARM(type))
> > > + return WARM;
> > > + return COLD;
> > > +}
> > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > index fdf358c7f808..954baa6c100d 100644
> > > --- a/fs/f2fs/super.c
> > > +++ b/fs/f2fs/super.c
> > > @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
> > > sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
> > > return -EINVAL;
> > > }
> > > +
> > > + sema_init(&sbi->available_open_zones, sbi->max_open_zones);
> > > }
> > > zone_sectors = bdev_zone_sectors(bdev);
> > > --
> > > 2.40.1

2024-04-15 03:26:43

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v3] f2fs: zone: don't block IO if there is remained open zone

On 2024/4/14 23:19, Jaegeuk Kim wrote:
> It seems this caused kernel hang. Chao, have you tested this patch enough?

Jaegeuk,

Oh, I've checked this patch w/ fsstress before submitting it, but missed
the SPO testcase... do you encounter kernel hang w/ SPO testcase?

Anyway, let me test it more.

Thanks,

>
> On 04/13, Chao Yu wrote:
>> On 2024/4/13 5:11, Jaegeuk Kim wrote:
>>> On 04/07, Chao Yu wrote:
>>>> max open zone may be larger than log header number of f2fs, for
>>>> such case, it doesn't need to wait last IO in previous zone, let's
>>>> introduce available_open_zone semaphore, and reduce it once we
>>>> submit first write IO in a zone, and increase it after completion
>>>> of last IO in the zone.
>>>>
>>>> Cc: Daeho Jeong <[email protected]>
>>>> Signed-off-by: Chao Yu <[email protected]>
>>>> ---
>>>> v3:
>>>> - avoid race condition in between __submit_merged_bio()
>>>> and __allocate_new_segment().
>>>> fs/f2fs/data.c | 105 ++++++++++++++++++++++++++++++----------------
>>>> fs/f2fs/f2fs.h | 34 ++++++++++++---
>>>> fs/f2fs/iostat.c | 7 ++++
>>>> fs/f2fs/iostat.h | 2 +
>>>> fs/f2fs/segment.c | 43 ++++++++++++++++---
>>>> fs/f2fs/segment.h | 12 +++++-
>>>> fs/f2fs/super.c | 2 +
>>>> 7 files changed, 156 insertions(+), 49 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>> index 0d88649c60a5..18a4ac0a06bc 100644
>>>> --- a/fs/f2fs/data.c
>>>> +++ b/fs/f2fs/data.c
>>>> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>>>> #ifdef CONFIG_BLK_DEV_ZONED
>>>> static void f2fs_zone_write_end_io(struct bio *bio)
>>>> {
>>>> - struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
>>>> + struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>>>> - bio->bi_private = io->bi_private;
>>>> - complete(&io->zone_wait);
>>>> f2fs_write_end_io(bio);
>>>> + up(&sbi->available_open_zones);
>>>> }
>>>> #endif
>>>> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>>>> if (!io->bio)
>>>> return;
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + if (io->open_zone) {
>>>> + /*
>>>> + * if there is no open zone, it will wait for last IO in
>>>> + * previous zone before submitting new IO.
>>>> + */
>>>> + down(&fio->sbi->available_open_zones);
>>>> + io->open_zone = false;
>>>> + io->zone_openned = true;
>>>> + }
>>>> +
>>>> + if (io->close_zone) {
>>>> + io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>> + io->zone_openned = false;
>>>> + io->close_zone = false;
>>>> + }
>>>> +#endif
>>>> +
>>>> if (is_read_io(fio->op)) {
>>>> trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>>>> f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
>>>> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>>>> INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>>>> init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>>>> #ifdef CONFIG_BLK_DEV_ZONED
>>>> - init_completion(&sbi->write_io[i][j].zone_wait);
>>>> - sbi->write_io[i][j].zone_pending_bio = NULL;
>>>> - sbi->write_io[i][j].bi_private = NULL;
>>>> + sbi->write_io[i][j].open_zone = false;
>>>> + sbi->write_io[i][j].zone_openned = false;
>>>> + sbi->write_io[i][j].close_zone = false;
>>>> #endif
>>>> }
>>>> }
>>>> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>>>> f2fs_up_write(&io->io_rwsem);
>>>> }
>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
>>>> +{
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + struct f2fs_bio_info *io;
>>>> +
>>>> + if (!f2fs_sb_has_blkzoned(sbi))
>>>> + return;
>>>> +
>>>> + io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
>>>> +
>>>> + f2fs_down_write(&io->io_rwsem);
>>>> + if (io->zone_openned) {
>>>> + if (io->bio) {
>>>> + io->close_zone = true;
>>>> + __submit_merged_bio(io);
>>>> + } else if (io->zone_openned) {
>>>> + up(&sbi->available_open_zones);
>>>> + io->zone_openned = false;
>>>> + }
>>>> + }
>>>> + f2fs_up_write(&io->io_rwsem);
>>>> +#endif
>>>> +
>>>> +}
>>>> +
>>>> static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>> struct inode *inode, struct page *page,
>>>> nid_t ino, enum page_type type, bool force)
>>>> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>>>> }
>>>> #ifdef CONFIG_BLK_DEV_ZONED
>>>> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>>>> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
>>>> + block_t blkaddr, bool start)
>>>> {
>>>> - int devi = 0;
>>>> + if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
>>>> + return false;
>>>> +
>>>> + if (start)
>>>> + return (blkaddr % sbi->blocks_per_blkz) == 0;
>>>> + return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>> - if (f2fs_is_multi_device(sbi)) {
>>>> - devi = f2fs_target_device_index(sbi, blkaddr);
>>>> - if (blkaddr < FDEV(devi).start_blk ||
>>>> - blkaddr > FDEV(devi).end_blk) {
>>>> - f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>> - return false;
>>>> - }
>>>> - blkaddr -= FDEV(devi).start_blk;
>>>> - }
>>>> - return bdev_is_zoned(FDEV(devi).bdev) &&
>>>> - f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
>>>> - (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>> }
>>>> #endif
>>>> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>> struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>>>> struct page *bio_page;
>>>> enum count_type type;
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
>>>> +#endif
>>>> f2fs_bug_on(sbi, is_read_io(fio->op));
>>>> f2fs_down_write(&io->io_rwsem);
>>>> next:
>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>> - if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
>>>> - wait_for_completion_io(&io->zone_wait);
>>>> - bio_put(io->zone_pending_bio);
>>>> - io->zone_pending_bio = NULL;
>>>> - io->bi_private = NULL;
>>>> - }
>>>> -#endif
>>>> -
>>>> if (fio->in_list) {
>>>> spin_lock(&io->io_lock);
>>>> if (list_empty(&io->io_list)) {
>>>> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>> type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>>>> inc_page_count(sbi, type);
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
>>>> + io->open_zone = true;
>>>> +#endif
>>>> +
>>>> if (io->bio &&
>>>> (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>>>> fio->new_blkaddr) ||
>>>> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>> io->last_block_in_bio = fio->new_blkaddr;
>>>> trace_f2fs_submit_page_write(fio->page, fio);
>>>> +
>>>> #ifdef CONFIG_BLK_DEV_ZONED
>>>> - if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
>>>> - is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
>>>> - bio_get(io->bio);
>>>> - reinit_completion(&io->zone_wait);
>>>> - io->bi_private = io->bio->bi_private;
>>>> - io->bio->bi_private = io;
>>>> - io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>> - io->zone_pending_bio = io->bio;
>>>> + if (blkzoned &&
>>>> + is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
>>>> + io->close_zone = true;
>>>> __submit_merged_bio(io);
>>>> }
>>>> #endif
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index 694f8a52cb84..8a26530cf4fb 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>>>> struct bio *bio; /* bios to merge */
>>>> sector_t last_block_in_bio; /* last block number */
>>>> struct f2fs_io_info fio; /* store buffered io info. */
>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>> - struct completion zone_wait; /* condition value for the previous open zone to close */
>>>> - struct bio *zone_pending_bio; /* pending bio for the previous zone */
>>>> - void *bi_private; /* previous bi_private for pending bio */
>>>> -#endif
>>>> struct f2fs_rwsem io_rwsem; /* blocking op for bio */
>>>> spinlock_t io_lock; /* serialize DATA/NODE IOs */
>>>> struct list_head io_list; /* track fios */
>>>> struct list_head bio_list; /* bio entry list head */
>>>> struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + bool open_zone; /* open a zone */
>>>> + bool zone_openned; /* zone has been openned */
>>>> + bool close_zone; /* close a zone */
>>>> +#endif
>>>> };
>>>> #define FDEV(i) (sbi->devs[i])
>>>> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>>>> #ifdef CONFIG_BLK_DEV_ZONED
>>>> unsigned int blocks_per_blkz; /* F2FS blocks per zone */
>>>> unsigned int max_open_zones; /* max open zone resources of the zoned device */
>>>> + struct semaphore available_open_zones; /* available open zones */
>>>> #endif
>>>> /* for node-related operations */
>>>> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>>>> void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>>>> enum page_type type);
>>>> int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>>>> void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>>>> void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>> struct inode *inode, struct page *page,
>>>> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>>>> return test_bit(zno, FDEV(devi).blkz_seq);
>>>> }
>>>> +
>>>> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>> + block_t blkaddr)
>>>> +{
>>>> + int devi = 0;
>>>> +
>>>> + if (f2fs_is_multi_device(sbi)) {
>>>> + devi = f2fs_target_device_index(sbi, blkaddr);
>>>> + if (blkaddr < FDEV(devi).start_blk ||
>>>> + blkaddr > FDEV(devi).end_blk) {
>>>> + f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>> + return false;
>>>> + }
>>>> + blkaddr -= FDEV(devi).start_blk;
>>>> + }
>>>> +
>>>> + if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>> + !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>> + return false;
>>>> +
>>>> + return true;
>>>
>>> Applied as below.
>>>
>>> @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>> blkaddr -= FDEV(devi).start_blk;
>>> }
>>>
>>> - if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>> - !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>> - return false;
>>> -
>>> - return true;
>>> + return bdev_is_zoned(FDEV(devi).bdev) &&
>>> + f2fs_blkz_is_seq(sbi, devi, blkaddr);
>>
>> Looks good, thank you for cleanup.
>>
>> Thanks,
>>
>>> }
>>> #endif
>>>
>>>> +}
>>>> #endif
>>>> static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
>>>> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
>>>> index f8703038e1d8..a8626e297876 100644
>>>> --- a/fs/f2fs/iostat.c
>>>> +++ b/fs/f2fs/iostat.c
>>>> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>>>> spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>>>> }
>>>> +void *iostat_get_bio_private(struct bio *bio)
>>>> +{
>>>> + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>> +
>>>> + return iostat_ctx->sbi;
>>>> +}
>>>> +
>>>> void iostat_update_and_unbind_ctx(struct bio *bio)
>>>> {
>>>> struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
>>>> index eb99d05cf272..9006c3d41590 100644
>>>> --- a/fs/f2fs/iostat.h
>>>> +++ b/fs/f2fs/iostat.h
>>>> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>>>> return iostat_ctx->post_read_ctx;
>>>> }
>>>> +extern void *iostat_get_bio_private(struct bio *bio);
>>>> extern void iostat_update_and_unbind_ctx(struct bio *bio);
>>>> extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>> struct bio *bio, struct bio_post_read_ctx *ctx);
>>>> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>>>> #else
>>>> static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>>>> enum iostat_type type, unsigned long long io_bytes) {}
>>>> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>>>> static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>>>> static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>> struct bio *bio, struct bio_post_read_ctx *ctx) {}
>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>> index 4fd76e867e0a..4a3cf2888faf 100644
>>>> --- a/fs/f2fs/segment.c
>>>> +++ b/fs/f2fs/segment.c
>>>> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>>> return err;
>>>> stat_inc_seg_type(sbi, curseg);
>>>> locate_dirty_segment(sbi, old_segno);
>>>> +
>>>> + f2fs_blkzoned_submit_merged_write(sbi, type);
>>>> +
>>>> return 0;
>>>> }
>>>> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>>>> f2fs_bug_on(fio->sbi, true);
>>>> }
>>>> - if (IS_HOT(type))
>>>> - fio->temp = HOT;
>>>> - else if (IS_WARM(type))
>>>> - fio->temp = WARM;
>>>> - else
>>>> - fio->temp = COLD;
>>>> + fio->temp = type_to_temp(type);
>>>> return type;
>>>> }
>>>> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>> return -EINVAL;
>>>> }
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + if (f2fs_sb_has_blkzoned(sbi)) {
>>>> + for (type = 0; type < NR_PERSISTENT_LOG; type++) {
>>>> + struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>> + enum page_type ptype;
>>>> + enum temp_type temp;
>>>> +
>>>> + if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
>>>> + continue;
>>>> +
>>>> + if (!f2fs_blkaddr_in_seqzone(sbi,
>>>> + START_BLOCK(sbi, curseg->segno)))
>>>> + continue;
>>>> +
>>>> + ptype = PAGE_TYPE(type);
>>>> + temp = type_to_temp(type);
>>>> + down(&sbi->available_open_zones);
>>>> + sbi->write_io[ptype][temp].zone_openned = true;
>>>> + }
>>>> + }
>>>> +#endif
>>>> return 0;
>>>> }
>>>> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>>>> for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>> kfree(array[i].sum_blk);
>>>> kfree(array[i].journal);
>>>> +
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + if (f2fs_sb_has_blkzoned(sbi)) {
>>>> + enum page_type ptype = PAGE_TYPE(i);
>>>> + enum temp_type temp = type_to_temp(i);
>>>> +
>>>> + if (sbi->write_io[ptype][temp].zone_openned) {
>>>> + up(&sbi->available_open_zones);
>>>> + sbi->write_io[ptype][temp].zone_openned = false;
>>>> + }
>>>> + }
>>>> +#endif
>>>> }
>>>> kfree(array);
>>>> }
>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>> index e1c0f418aa11..855978ca869f 100644
>>>> --- a/fs/f2fs/segment.h
>>>> +++ b/fs/f2fs/segment.h
>>>> @@ -24,7 +24,8 @@
>>>> #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
>>>> #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>> -#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
>>>> +#define PAGE_TYPE(t) (IS_NODESEG(t) ? NODE : DATA)
>>>> +#define SE_PAGETYPE(se) (PAGE_TYPE((se)->type))
>>>> static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>>>> unsigned short seg_type)
>>>> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>>>> return GET_SEGNO(sbi, FDEV(devi).start_blk);
>>>> return 0;
>>>> }
>>>> +
>>>> +static inline enum temp_type type_to_temp(int type)
>>>> +{
>>>> + if (IS_HOT(type))
>>>> + return HOT;
>>>> + else if (IS_WARM(type))
>>>> + return WARM;
>>>> + return COLD;
>>>> +}
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index fdf358c7f808..954baa6c100d 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>>> sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>>>> return -EINVAL;
>>>> }
>>>> +
>>>> + sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>>>> }
>>>> zone_sectors = bdev_zone_sectors(bdev);
>>>> --
>>>> 2.40.1

2024-04-15 14:01:58

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone

On 2024/4/15 11:26, Chao Yu wrote:
> On 2024/4/14 23:19, Jaegeuk Kim wrote:
>> It seems this caused kernel hang. Chao, have you tested this patch enough?
>
> Jaegeuk,
>
> Oh, I've checked this patch w/ fsstress before submitting it, but missed
> the SPO testcase... do you encounter kernel hang w/ SPO testcase?

I did see any hang issue w/ por_fsstress testcase, which testcase do you use?

Thanks,

>
> Anyway, let me test it more.
>
> Thanks,
>
>>
>> On 04/13, Chao Yu wrote:
>>> On 2024/4/13 5:11, Jaegeuk Kim wrote:
>>>> On 04/07, Chao Yu wrote:
>>>>> max open zone may be larger than log header number of f2fs, for
>>>>> such case, it doesn't need to wait last IO in previous zone, let's
>>>>> introduce available_open_zone semaphore, and reduce it once we
>>>>> submit first write IO in a zone, and increase it after completion
>>>>> of last IO in the zone.
>>>>>
>>>>> Cc: Daeho Jeong <[email protected]>
>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>> ---
>>>>> v3:
>>>>> - avoid race condition in between __submit_merged_bio()
>>>>> and __allocate_new_segment().
>>>>>    fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
>>>>>    fs/f2fs/f2fs.h    |  34 ++++++++++++---
>>>>>    fs/f2fs/iostat.c  |   7 ++++
>>>>>    fs/f2fs/iostat.h  |   2 +
>>>>>    fs/f2fs/segment.c |  43 ++++++++++++++++---
>>>>>    fs/f2fs/segment.h |  12 +++++-
>>>>>    fs/f2fs/super.c   |   2 +
>>>>>    7 files changed, 156 insertions(+), 49 deletions(-)
>>>>>
>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>> index 0d88649c60a5..18a4ac0a06bc 100644
>>>>> --- a/fs/f2fs/data.c
>>>>> +++ b/fs/f2fs/data.c
>>>>> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>    static void f2fs_zone_write_end_io(struct bio *bio)
>>>>>    {
>>>>> -    struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
>>>>> +    struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>>>>> -    bio->bi_private = io->bi_private;
>>>>> -    complete(&io->zone_wait);
>>>>>        f2fs_write_end_io(bio);
>>>>> +    up(&sbi->available_open_zones);
>>>>>    }
>>>>>    #endif
>>>>> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>>>>>        if (!io->bio)
>>>>>            return;
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    if (io->open_zone) {
>>>>> +        /*
>>>>> +         * if there is no open zone, it will wait for last IO in
>>>>> +         * previous zone before submitting new IO.
>>>>> +         */
>>>>> +        down(&fio->sbi->available_open_zones);
>>>>> +        io->open_zone = false;
>>>>> +        io->zone_openned = true;
>>>>> +    }
>>>>> +
>>>>> +    if (io->close_zone) {
>>>>> +        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>> +        io->zone_openned = false;
>>>>> +        io->close_zone = false;
>>>>> +    }
>>>>> +#endif
>>>>> +
>>>>>        if (is_read_io(fio->op)) {
>>>>>            trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>>>>>            f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
>>>>> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>>>>>                INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>>>>>                init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>> -            init_completion(&sbi->write_io[i][j].zone_wait);
>>>>> -            sbi->write_io[i][j].zone_pending_bio = NULL;
>>>>> -            sbi->write_io[i][j].bi_private = NULL;
>>>>> +            sbi->write_io[i][j].open_zone = false;
>>>>> +            sbi->write_io[i][j].zone_openned = false;
>>>>> +            sbi->write_io[i][j].close_zone = false;
>>>>>    #endif
>>>>>            }
>>>>>        }
>>>>> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>>>>>        f2fs_up_write(&io->io_rwsem);
>>>>>    }
>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
>>>>> +{
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    struct f2fs_bio_info *io;
>>>>> +
>>>>> +    if (!f2fs_sb_has_blkzoned(sbi))
>>>>> +        return;
>>>>> +
>>>>> +    io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
>>>>> +
>>>>> +    f2fs_down_write(&io->io_rwsem);
>>>>> +    if (io->zone_openned) {
>>>>> +        if (io->bio) {
>>>>> +            io->close_zone = true;
>>>>> +            __submit_merged_bio(io);
>>>>> +        } else if (io->zone_openned) {
>>>>> +            up(&sbi->available_open_zones);
>>>>> +            io->zone_openned = false;
>>>>> +        }
>>>>> +    }
>>>>> +    f2fs_up_write(&io->io_rwsem);
>>>>> +#endif
>>>>> +
>>>>> +}
>>>>> +
>>>>>    static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>                    struct inode *inode, struct page *page,
>>>>>                    nid_t ino, enum page_type type, bool force)
>>>>> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>>>>>    }
>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>>>>> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
>>>>> +                    block_t blkaddr, bool start)
>>>>>    {
>>>>> -    int devi = 0;
>>>>> +    if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
>>>>> +        return false;
>>>>> +
>>>>> +    if (start)
>>>>> +        return (blkaddr % sbi->blocks_per_blkz) == 0;
>>>>> +    return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>> -    if (f2fs_is_multi_device(sbi)) {
>>>>> -        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>> -        if (blkaddr < FDEV(devi).start_blk ||
>>>>> -            blkaddr > FDEV(devi).end_blk) {
>>>>> -            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>> -            return false;
>>>>> -        }
>>>>> -        blkaddr -= FDEV(devi).start_blk;
>>>>> -    }
>>>>> -    return bdev_is_zoned(FDEV(devi).bdev) &&
>>>>> -        f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
>>>>> -        (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>>    }
>>>>>    #endif
>>>>> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>        struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>>>>>        struct page *bio_page;
>>>>>        enum count_type type;
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
>>>>> +#endif
>>>>>        f2fs_bug_on(sbi, is_read_io(fio->op));
>>>>>        f2fs_down_write(&io->io_rwsem);
>>>>>    next:
>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
>>>>> -        wait_for_completion_io(&io->zone_wait);
>>>>> -        bio_put(io->zone_pending_bio);
>>>>> -        io->zone_pending_bio = NULL;
>>>>> -        io->bi_private = NULL;
>>>>> -    }
>>>>> -#endif
>>>>> -
>>>>>        if (fio->in_list) {
>>>>>            spin_lock(&io->io_lock);
>>>>>            if (list_empty(&io->io_list)) {
>>>>> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>        type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>>>>>        inc_page_count(sbi, type);
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
>>>>> +        io->open_zone = true;
>>>>> +#endif
>>>>> +
>>>>>        if (io->bio &&
>>>>>            (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>>>>>                      fio->new_blkaddr) ||
>>>>> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>        io->last_block_in_bio = fio->new_blkaddr;
>>>>>        trace_f2fs_submit_page_write(fio->page, fio);
>>>>> +
>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
>>>>> -            is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
>>>>> -        bio_get(io->bio);
>>>>> -        reinit_completion(&io->zone_wait);
>>>>> -        io->bi_private = io->bio->bi_private;
>>>>> -        io->bio->bi_private = io;
>>>>> -        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>> -        io->zone_pending_bio = io->bio;
>>>>> +    if (blkzoned &&
>>>>> +        is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
>>>>> +        io->close_zone = true;
>>>>>            __submit_merged_bio(io);
>>>>>        }
>>>>>    #endif
>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>> index 694f8a52cb84..8a26530cf4fb 100644
>>>>> --- a/fs/f2fs/f2fs.h
>>>>> +++ b/fs/f2fs/f2fs.h
>>>>> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>>>>>        struct bio *bio;        /* bios to merge */
>>>>>        sector_t last_block_in_bio;    /* last block number */
>>>>>        struct f2fs_io_info fio;    /* store buffered io info. */
>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>> -    struct completion zone_wait;    /* condition value for the previous open zone to close */
>>>>> -    struct bio *zone_pending_bio;    /* pending bio for the previous zone */
>>>>> -    void *bi_private;        /* previous bi_private for pending bio */
>>>>> -#endif
>>>>>        struct f2fs_rwsem io_rwsem;    /* blocking op for bio */
>>>>>        spinlock_t io_lock;        /* serialize DATA/NODE IOs */
>>>>>        struct list_head io_list;    /* track fios */
>>>>>        struct list_head bio_list;    /* bio entry list head */
>>>>>        struct f2fs_rwsem bio_list_lock;    /* lock to protect bio entry list */
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    bool open_zone;            /* open a zone */
>>>>> +    bool zone_openned;        /* zone has been openned */
>>>>> +    bool close_zone;        /* close a zone */
>>>>> +#endif
>>>>>    };
>>>>>    #define FDEV(i)                (sbi->devs[i])
>>>>> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>        unsigned int blocks_per_blkz;        /* F2FS blocks per zone */
>>>>>        unsigned int max_open_zones;        /* max open zone resources of the zoned device */
>>>>> +    struct semaphore available_open_zones;    /* available open zones */
>>>>>    #endif
>>>>>        /* for node-related operations */
>>>>> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>>>>>    void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>>>>>                  enum page_type type);
>>>>>    int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>>>>>    void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>>>>>    void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>                    struct inode *inode, struct page *page,
>>>>> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>>>>>        return test_bit(zno, FDEV(devi).blkz_seq);
>>>>>    }
>>>>> +
>>>>> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>> +                            block_t blkaddr)
>>>>> +{
>>>>> +    int devi = 0;
>>>>> +
>>>>> +    if (f2fs_is_multi_device(sbi)) {
>>>>> +        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>> +        if (blkaddr < FDEV(devi).start_blk ||
>>>>> +            blkaddr > FDEV(devi).end_blk) {
>>>>> +            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>> +            return false;
>>>>> +        }
>>>>> +        blkaddr -= FDEV(devi).start_blk;
>>>>> +    }
>>>>> +
>>>>> +    if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>>> +        !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>>> +        return false;
>>>>> +
>>>>> +    return true;
>>>>
>>>> Applied as below.
>>>>
>>>> @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>                   blkaddr -= FDEV(devi).start_blk;
>>>>           }
>>>>
>>>> -       if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>> -               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>> -               return false;
>>>> -
>>>> -       return true;
>>>> +       return bdev_is_zoned(FDEV(devi).bdev) &&
>>>> +               f2fs_blkz_is_seq(sbi, devi, blkaddr);
>>>
>>> Looks good, thank you for cleanup.
>>>
>>> Thanks,
>>>
>>>>    }
>>>>    #endif
>>>>
>>>>> +}
>>>>>    #endif
>>>>>    static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
>>>>> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
>>>>> index f8703038e1d8..a8626e297876 100644
>>>>> --- a/fs/f2fs/iostat.c
>>>>> +++ b/fs/f2fs/iostat.c
>>>>> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>>>>>        spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>>>>>    }
>>>>> +void *iostat_get_bio_private(struct bio *bio)
>>>>> +{
>>>>> +    struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>> +
>>>>> +    return iostat_ctx->sbi;
>>>>> +}
>>>>> +
>>>>>    void iostat_update_and_unbind_ctx(struct bio *bio)
>>>>>    {
>>>>>        struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
>>>>> index eb99d05cf272..9006c3d41590 100644
>>>>> --- a/fs/f2fs/iostat.h
>>>>> +++ b/fs/f2fs/iostat.h
>>>>> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>>>>>        return iostat_ctx->post_read_ctx;
>>>>>    }
>>>>> +extern void *iostat_get_bio_private(struct bio *bio);
>>>>>    extern void iostat_update_and_unbind_ctx(struct bio *bio);
>>>>>    extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>            struct bio *bio, struct bio_post_read_ctx *ctx);
>>>>> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>>>>>    #else
>>>>>    static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>>>>>            enum iostat_type type, unsigned long long io_bytes) {}
>>>>> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>>>>>    static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>>>>>    static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>            struct bio *bio, struct bio_post_read_ctx *ctx) {}
>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>> index 4fd76e867e0a..4a3cf2888faf 100644
>>>>> --- a/fs/f2fs/segment.c
>>>>> +++ b/fs/f2fs/segment.c
>>>>> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>>>>            return err;
>>>>>        stat_inc_seg_type(sbi, curseg);
>>>>>        locate_dirty_segment(sbi, old_segno);
>>>>> +
>>>>> +    f2fs_blkzoned_submit_merged_write(sbi, type);
>>>>> +
>>>>>        return 0;
>>>>>    }
>>>>> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>>>>>            f2fs_bug_on(fio->sbi, true);
>>>>>        }
>>>>> -    if (IS_HOT(type))
>>>>> -        fio->temp = HOT;
>>>>> -    else if (IS_WARM(type))
>>>>> -        fio->temp = WARM;
>>>>> -    else
>>>>> -        fio->temp = COLD;
>>>>> +    fio->temp = type_to_temp(type);
>>>>>        return type;
>>>>>    }
>>>>> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>>            return -EINVAL;
>>>>>        }
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +    if (f2fs_sb_has_blkzoned(sbi)) {
>>>>> +        for (type = 0; type < NR_PERSISTENT_LOG; type++) {
>>>>> +            struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>> +            enum page_type ptype;
>>>>> +            enum temp_type temp;
>>>>> +
>>>>> +            if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
>>>>> +                continue;
>>>>> +
>>>>> +            if (!f2fs_blkaddr_in_seqzone(sbi,
>>>>> +                    START_BLOCK(sbi, curseg->segno)))
>>>>> +                continue;
>>>>> +
>>>>> +            ptype = PAGE_TYPE(type);
>>>>> +            temp = type_to_temp(type);
>>>>> +            down(&sbi->available_open_zones);
>>>>> +            sbi->write_io[ptype][temp].zone_openned = true;
>>>>> +        }
>>>>> +    }
>>>>> +#endif
>>>>>        return 0;
>>>>>    }
>>>>> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>>>>>        for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>>>            kfree(array[i].sum_blk);
>>>>>            kfree(array[i].journal);
>>>>> +
>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>> +        if (f2fs_sb_has_blkzoned(sbi)) {
>>>>> +            enum page_type ptype = PAGE_TYPE(i);
>>>>> +            enum temp_type temp = type_to_temp(i);
>>>>> +
>>>>> +            if (sbi->write_io[ptype][temp].zone_openned) {
>>>>> +                up(&sbi->available_open_zones);
>>>>> +                sbi->write_io[ptype][temp].zone_openned = false;
>>>>> +            }
>>>>> +        }
>>>>> +#endif
>>>>>        }
>>>>>        kfree(array);
>>>>>    }
>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>> index e1c0f418aa11..855978ca869f 100644
>>>>> --- a/fs/f2fs/segment.h
>>>>> +++ b/fs/f2fs/segment.h
>>>>> @@ -24,7 +24,8 @@
>>>>>    #define IS_DATASEG(t)    ((t) <= CURSEG_COLD_DATA)
>>>>>    #define IS_NODESEG(t)    ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>>> -#define SE_PAGETYPE(se)    ((IS_NODESEG((se)->type) ? NODE : DATA))
>>>>> +#define PAGE_TYPE(t)    (IS_NODESEG(t) ? NODE : DATA)
>>>>> +#define SE_PAGETYPE(se)    (PAGE_TYPE((se)->type))
>>>>>    static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>>>>>                            unsigned short seg_type)
>>>>> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>>>>>                return GET_SEGNO(sbi, FDEV(devi).start_blk);
>>>>>        return 0;
>>>>>    }
>>>>> +
>>>>> +static inline enum temp_type type_to_temp(int type)
>>>>> +{
>>>>> +    if (IS_HOT(type))
>>>>> +        return HOT;
>>>>> +    else if (IS_WARM(type))
>>>>> +        return WARM;
>>>>> +    return COLD;
>>>>> +}
>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>> index fdf358c7f808..954baa6c100d 100644
>>>>> --- a/fs/f2fs/super.c
>>>>> +++ b/fs/f2fs/super.c
>>>>> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>>>>                    sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>>>>>                return -EINVAL;
>>>>>            }
>>>>> +
>>>>> +        sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>>>>>        }
>>>>>        zone_sectors = bdev_zone_sectors(bdev);
>>>>> --
>>>>> 2.40.1
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2024-04-16 00:59:15

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone

On 2024/4/15 22:01, Chao Yu wrote:
> On 2024/4/15 11:26, Chao Yu wrote:
>> On 2024/4/14 23:19, Jaegeuk Kim wrote:
>>> It seems this caused kernel hang. Chao, have you tested this patch enough?
>>
>> Jaegeuk,
>>
>> Oh, I've checked this patch w/ fsstress before submitting it, but missed
>> the SPO testcase... do you encounter kernel hang w/ SPO testcase?
>
> I did see any hang issue w/ por_fsstress testcase, which testcase do you use?

Sorry, I mean I haven't reproduced it yet...

Thanks,

>
> Thanks,
>
>>
>> Anyway, let me test it more.
>>
>> Thanks,
>>
>>>
>>> On 04/13, Chao Yu wrote:
>>>> On 2024/4/13 5:11, Jaegeuk Kim wrote:
>>>>> On 04/07, Chao Yu wrote:
>>>>>> max open zone may be larger than log header number of f2fs, for
>>>>>> such case, it doesn't need to wait last IO in previous zone, let's
>>>>>> introduce available_open_zone semaphore, and reduce it once we
>>>>>> submit first write IO in a zone, and increase it after completion
>>>>>> of last IO in the zone.
>>>>>>
>>>>>> Cc: Daeho Jeong <[email protected]>
>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>> ---
>>>>>> v3:
>>>>>> - avoid race condition in between __submit_merged_bio()
>>>>>> and __allocate_new_segment().
>>>>>>    fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
>>>>>>    fs/f2fs/f2fs.h    |  34 ++++++++++++---
>>>>>>    fs/f2fs/iostat.c  |   7 ++++
>>>>>>    fs/f2fs/iostat.h  |   2 +
>>>>>>    fs/f2fs/segment.c |  43 ++++++++++++++++---
>>>>>>    fs/f2fs/segment.h |  12 +++++-
>>>>>>    fs/f2fs/super.c   |   2 +
>>>>>>    7 files changed, 156 insertions(+), 49 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>> index 0d88649c60a5..18a4ac0a06bc 100644
>>>>>> --- a/fs/f2fs/data.c
>>>>>> +++ b/fs/f2fs/data.c
>>>>>> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>    static void f2fs_zone_write_end_io(struct bio *bio)
>>>>>>    {
>>>>>> -    struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
>>>>>> +    struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>>>>>> -    bio->bi_private = io->bi_private;
>>>>>> -    complete(&io->zone_wait);
>>>>>>        f2fs_write_end_io(bio);
>>>>>> +    up(&sbi->available_open_zones);
>>>>>>    }
>>>>>>    #endif
>>>>>> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>>>>>>        if (!io->bio)
>>>>>>            return;
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    if (io->open_zone) {
>>>>>> +        /*
>>>>>> +         * if there is no open zone, it will wait for last IO in
>>>>>> +         * previous zone before submitting new IO.
>>>>>> +         */
>>>>>> +        down(&fio->sbi->available_open_zones);
>>>>>> +        io->open_zone = false;
>>>>>> +        io->zone_openned = true;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (io->close_zone) {
>>>>>> +        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>>> +        io->zone_openned = false;
>>>>>> +        io->close_zone = false;
>>>>>> +    }
>>>>>> +#endif
>>>>>> +
>>>>>>        if (is_read_io(fio->op)) {
>>>>>>            trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>>>>>>            f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
>>>>>> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>>>>>>                INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>>>>>>                init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>> -            init_completion(&sbi->write_io[i][j].zone_wait);
>>>>>> -            sbi->write_io[i][j].zone_pending_bio = NULL;
>>>>>> -            sbi->write_io[i][j].bi_private = NULL;
>>>>>> +            sbi->write_io[i][j].open_zone = false;
>>>>>> +            sbi->write_io[i][j].zone_openned = false;
>>>>>> +            sbi->write_io[i][j].close_zone = false;
>>>>>>    #endif
>>>>>>            }
>>>>>>        }
>>>>>> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>>>>>>        f2fs_up_write(&io->io_rwsem);
>>>>>>    }
>>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
>>>>>> +{
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    struct f2fs_bio_info *io;
>>>>>> +
>>>>>> +    if (!f2fs_sb_has_blkzoned(sbi))
>>>>>> +        return;
>>>>>> +
>>>>>> +    io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
>>>>>> +
>>>>>> +    f2fs_down_write(&io->io_rwsem);
>>>>>> +    if (io->zone_openned) {
>>>>>> +        if (io->bio) {
>>>>>> +            io->close_zone = true;
>>>>>> +            __submit_merged_bio(io);
>>>>>> +        } else if (io->zone_openned) {
>>>>>> +            up(&sbi->available_open_zones);
>>>>>> +            io->zone_openned = false;
>>>>>> +        }
>>>>>> +    }
>>>>>> +    f2fs_up_write(&io->io_rwsem);
>>>>>> +#endif
>>>>>> +
>>>>>> +}
>>>>>> +
>>>>>>    static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>>                    struct inode *inode, struct page *page,
>>>>>>                    nid_t ino, enum page_type type, bool force)
>>>>>> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>>>>>>    }
>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>>>>>> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
>>>>>> +                    block_t blkaddr, bool start)
>>>>>>    {
>>>>>> -    int devi = 0;
>>>>>> +    if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    if (start)
>>>>>> +        return (blkaddr % sbi->blocks_per_blkz) == 0;
>>>>>> +    return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>>> -    if (f2fs_is_multi_device(sbi)) {
>>>>>> -        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>>> -        if (blkaddr < FDEV(devi).start_blk ||
>>>>>> -            blkaddr > FDEV(devi).end_blk) {
>>>>>> -            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>>> -            return false;
>>>>>> -        }
>>>>>> -        blkaddr -= FDEV(devi).start_blk;
>>>>>> -    }
>>>>>> -    return bdev_is_zoned(FDEV(devi).bdev) &&
>>>>>> -        f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
>>>>>> -        (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>>>    }
>>>>>>    #endif
>>>>>> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>        struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>>>>>>        struct page *bio_page;
>>>>>>        enum count_type type;
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
>>>>>> +#endif
>>>>>>        f2fs_bug_on(sbi, is_read_io(fio->op));
>>>>>>        f2fs_down_write(&io->io_rwsem);
>>>>>>    next:
>>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
>>>>>> -        wait_for_completion_io(&io->zone_wait);
>>>>>> -        bio_put(io->zone_pending_bio);
>>>>>> -        io->zone_pending_bio = NULL;
>>>>>> -        io->bi_private = NULL;
>>>>>> -    }
>>>>>> -#endif
>>>>>> -
>>>>>>        if (fio->in_list) {
>>>>>>            spin_lock(&io->io_lock);
>>>>>>            if (list_empty(&io->io_list)) {
>>>>>> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>        type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>>>>>>        inc_page_count(sbi, type);
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
>>>>>> +        io->open_zone = true;
>>>>>> +#endif
>>>>>> +
>>>>>>        if (io->bio &&
>>>>>>            (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>>>>>>                      fio->new_blkaddr) ||
>>>>>> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>        io->last_block_in_bio = fio->new_blkaddr;
>>>>>>        trace_f2fs_submit_page_write(fio->page, fio);
>>>>>> +
>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
>>>>>> -            is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
>>>>>> -        bio_get(io->bio);
>>>>>> -        reinit_completion(&io->zone_wait);
>>>>>> -        io->bi_private = io->bio->bi_private;
>>>>>> -        io->bio->bi_private = io;
>>>>>> -        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>>> -        io->zone_pending_bio = io->bio;
>>>>>> +    if (blkzoned &&
>>>>>> +        is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
>>>>>> +        io->close_zone = true;
>>>>>>            __submit_merged_bio(io);
>>>>>>        }
>>>>>>    #endif
>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>> index 694f8a52cb84..8a26530cf4fb 100644
>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>>>>>>        struct bio *bio;        /* bios to merge */
>>>>>>        sector_t last_block_in_bio;    /* last block number */
>>>>>>        struct f2fs_io_info fio;    /* store buffered io info. */
>>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> -    struct completion zone_wait;    /* condition value for the previous open zone to close */
>>>>>> -    struct bio *zone_pending_bio;    /* pending bio for the previous zone */
>>>>>> -    void *bi_private;        /* previous bi_private for pending bio */
>>>>>> -#endif
>>>>>>        struct f2fs_rwsem io_rwsem;    /* blocking op for bio */
>>>>>>        spinlock_t io_lock;        /* serialize DATA/NODE IOs */
>>>>>>        struct list_head io_list;    /* track fios */
>>>>>>        struct list_head bio_list;    /* bio entry list head */
>>>>>>        struct f2fs_rwsem bio_list_lock;    /* lock to protect bio entry list */
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    bool open_zone;            /* open a zone */
>>>>>> +    bool zone_openned;        /* zone has been openned */
>>>>>> +    bool close_zone;        /* close a zone */
>>>>>> +#endif
>>>>>>    };
>>>>>>    #define FDEV(i)                (sbi->devs[i])
>>>>>> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>        unsigned int blocks_per_blkz;        /* F2FS blocks per zone */
>>>>>>        unsigned int max_open_zones;        /* max open zone resources of the zoned device */
>>>>>> +    struct semaphore available_open_zones;    /* available open zones */
>>>>>>    #endif
>>>>>>        /* for node-related operations */
>>>>>> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>>>>>>    void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>>>>>>                  enum page_type type);
>>>>>>    int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
>>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>>>>>>    void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>>>>>>    void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>>                    struct inode *inode, struct page *page,
>>>>>> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>>>>>>        return test_bit(zno, FDEV(devi).blkz_seq);
>>>>>>    }
>>>>>> +
>>>>>> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>>> +                            block_t blkaddr)
>>>>>> +{
>>>>>> +    int devi = 0;
>>>>>> +
>>>>>> +    if (f2fs_is_multi_device(sbi)) {
>>>>>> +        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>>> +        if (blkaddr < FDEV(devi).start_blk ||
>>>>>> +            blkaddr > FDEV(devi).end_blk) {
>>>>>> +            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>>> +            return false;
>>>>>> +        }
>>>>>> +        blkaddr -= FDEV(devi).start_blk;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>>>> +        !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>>>> +        return false;
>>>>>> +
>>>>>> +    return true;
>>>>>
>>>>> Applied as below.
>>>>>
>>>>> @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>>                   blkaddr -= FDEV(devi).start_blk;
>>>>>           }
>>>>>
>>>>> -       if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>>> -               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>>> -               return false;
>>>>> -
>>>>> -       return true;
>>>>> +       return bdev_is_zoned(FDEV(devi).bdev) &&
>>>>> +               f2fs_blkz_is_seq(sbi, devi, blkaddr);
>>>>
>>>> Looks good, thank you for cleanup.
>>>>
>>>> Thanks,
>>>>
>>>>>    }
>>>>>    #endif
>>>>>
>>>>>> +}
>>>>>>    #endif
>>>>>>    static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
>>>>>> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
>>>>>> index f8703038e1d8..a8626e297876 100644
>>>>>> --- a/fs/f2fs/iostat.c
>>>>>> +++ b/fs/f2fs/iostat.c
>>>>>> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>>>>>>        spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>>>>>>    }
>>>>>> +void *iostat_get_bio_private(struct bio *bio)
>>>>>> +{
>>>>>> +    struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>>> +
>>>>>> +    return iostat_ctx->sbi;
>>>>>> +}
>>>>>> +
>>>>>>    void iostat_update_and_unbind_ctx(struct bio *bio)
>>>>>>    {
>>>>>>        struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>>> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
>>>>>> index eb99d05cf272..9006c3d41590 100644
>>>>>> --- a/fs/f2fs/iostat.h
>>>>>> +++ b/fs/f2fs/iostat.h
>>>>>> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>>>>>>        return iostat_ctx->post_read_ctx;
>>>>>>    }
>>>>>> +extern void *iostat_get_bio_private(struct bio *bio);
>>>>>>    extern void iostat_update_and_unbind_ctx(struct bio *bio);
>>>>>>    extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>>            struct bio *bio, struct bio_post_read_ctx *ctx);
>>>>>> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>>>>>>    #else
>>>>>>    static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>>>>>>            enum iostat_type type, unsigned long long io_bytes) {}
>>>>>> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>>>>>>    static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>>>>>>    static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>>            struct bio *bio, struct bio_post_read_ctx *ctx) {}
>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>> index 4fd76e867e0a..4a3cf2888faf 100644
>>>>>> --- a/fs/f2fs/segment.c
>>>>>> +++ b/fs/f2fs/segment.c
>>>>>> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>>>>>            return err;
>>>>>>        stat_inc_seg_type(sbi, curseg);
>>>>>>        locate_dirty_segment(sbi, old_segno);
>>>>>> +
>>>>>> +    f2fs_blkzoned_submit_merged_write(sbi, type);
>>>>>> +
>>>>>>        return 0;
>>>>>>    }
>>>>>> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>>>>>>            f2fs_bug_on(fio->sbi, true);
>>>>>>        }
>>>>>> -    if (IS_HOT(type))
>>>>>> -        fio->temp = HOT;
>>>>>> -    else if (IS_WARM(type))
>>>>>> -        fio->temp = WARM;
>>>>>> -    else
>>>>>> -        fio->temp = COLD;
>>>>>> +    fio->temp = type_to_temp(type);
>>>>>>        return type;
>>>>>>    }
>>>>>> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>>>            return -EINVAL;
>>>>>>        }
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +    if (f2fs_sb_has_blkzoned(sbi)) {
>>>>>> +        for (type = 0; type < NR_PERSISTENT_LOG; type++) {
>>>>>> +            struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>> +            enum page_type ptype;
>>>>>> +            enum temp_type temp;
>>>>>> +
>>>>>> +            if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
>>>>>> +                continue;
>>>>>> +
>>>>>> +            if (!f2fs_blkaddr_in_seqzone(sbi,
>>>>>> +                    START_BLOCK(sbi, curseg->segno)))
>>>>>> +                continue;
>>>>>> +
>>>>>> +            ptype = PAGE_TYPE(type);
>>>>>> +            temp = type_to_temp(type);
>>>>>> +            down(&sbi->available_open_zones);
>>>>>> +            sbi->write_io[ptype][temp].zone_openned = true;
>>>>>> +        }
>>>>>> +    }
>>>>>> +#endif
>>>>>>        return 0;
>>>>>>    }
>>>>>> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>>>>>>        for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>>>>            kfree(array[i].sum_blk);
>>>>>>            kfree(array[i].journal);
>>>>>> +
>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>> +        if (f2fs_sb_has_blkzoned(sbi)) {
>>>>>> +            enum page_type ptype = PAGE_TYPE(i);
>>>>>> +            enum temp_type temp = type_to_temp(i);
>>>>>> +
>>>>>> +            if (sbi->write_io[ptype][temp].zone_openned) {
>>>>>> +                up(&sbi->available_open_zones);
>>>>>> +                sbi->write_io[ptype][temp].zone_openned = false;
>>>>>> +            }
>>>>>> +        }
>>>>>> +#endif
>>>>>>        }
>>>>>>        kfree(array);
>>>>>>    }
>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>> index e1c0f418aa11..855978ca869f 100644
>>>>>> --- a/fs/f2fs/segment.h
>>>>>> +++ b/fs/f2fs/segment.h
>>>>>> @@ -24,7 +24,8 @@
>>>>>>    #define IS_DATASEG(t)    ((t) <= CURSEG_COLD_DATA)
>>>>>>    #define IS_NODESEG(t)    ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>>>> -#define SE_PAGETYPE(se)    ((IS_NODESEG((se)->type) ? NODE : DATA))
>>>>>> +#define PAGE_TYPE(t)    (IS_NODESEG(t) ? NODE : DATA)
>>>>>> +#define SE_PAGETYPE(se)    (PAGE_TYPE((se)->type))
>>>>>>    static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>>>>>>                            unsigned short seg_type)
>>>>>> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>>>>>>                return GET_SEGNO(sbi, FDEV(devi).start_blk);
>>>>>>        return 0;
>>>>>>    }
>>>>>> +
>>>>>> +static inline enum temp_type type_to_temp(int type)
>>>>>> +{
>>>>>> +    if (IS_HOT(type))
>>>>>> +        return HOT;
>>>>>> +    else if (IS_WARM(type))
>>>>>> +        return WARM;
>>>>>> +    return COLD;
>>>>>> +}
>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>> index fdf358c7f808..954baa6c100d 100644
>>>>>> --- a/fs/f2fs/super.c
>>>>>> +++ b/fs/f2fs/super.c
>>>>>> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>>>>>                    sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>>>>>>                return -EINVAL;
>>>>>>            }
>>>>>> +
>>>>>> +        sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>>>>>>        }
>>>>>>        zone_sectors = bdev_zone_sectors(bdev);
>>>>>> --
>>>>>> 2.40.1
>>
>>
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> [email protected]
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2024-04-16 16:53:22

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone

On 04/16, Chao Yu wrote:
> On 2024/4/15 22:01, Chao Yu wrote:
> > On 2024/4/15 11:26, Chao Yu wrote:
> > > On 2024/4/14 23:19, Jaegeuk Kim wrote:
> > > > It seems this caused kernel hang. Chao, have you tested this patch enough?
> > >
> > > Jaegeuk,
> > >
> > > Oh, I've checked this patch w/ fsstress before submitting it, but missed
> > > the SPO testcase... do you encounter kernel hang w/ SPO testcase?
> >
> > I did see any hang issue w/ por_fsstress testcase, which testcase do you use?
>
> Sorry, I mean I haven't reproduced it yet...

I'd prefer to check this patch later. Have you tested on Zoned device with
nullblk?

>
> Thanks,
>
> >
> > Thanks,
> >
> > >
> > > Anyway, let me test it more.
> > >
> > > Thanks,
> > >
> > > >
> > > > On 04/13, Chao Yu wrote:
> > > > > On 2024/4/13 5:11, Jaegeuk Kim wrote:
> > > > > > On 04/07, Chao Yu wrote:
> > > > > > > max open zone may be larger than log header number of f2fs, for
> > > > > > > such case, it doesn't need to wait last IO in previous zone, let's
> > > > > > > introduce available_open_zone semaphore, and reduce it once we
> > > > > > > submit first write IO in a zone, and increase it after completion
> > > > > > > of last IO in the zone.
> > > > > > >
> > > > > > > Cc: Daeho Jeong <[email protected]>
> > > > > > > Signed-off-by: Chao Yu <[email protected]>
> > > > > > > ---
> > > > > > > v3:
> > > > > > > - avoid race condition in between __submit_merged_bio()
> > > > > > > and __allocate_new_segment().
> > > > > > > ?? fs/f2fs/data.c??? | 105 ++++++++++++++++++++++++++++++----------------
> > > > > > > ?? fs/f2fs/f2fs.h??? |? 34 ++++++++++++---
> > > > > > > ?? fs/f2fs/iostat.c? |?? 7 ++++
> > > > > > > ?? fs/f2fs/iostat.h? |?? 2 +
> > > > > > > ?? fs/f2fs/segment.c |? 43 ++++++++++++++++---
> > > > > > > ?? fs/f2fs/segment.h |? 12 +++++-
> > > > > > > ?? fs/f2fs/super.c?? |?? 2 +
> > > > > > > ?? 7 files changed, 156 insertions(+), 49 deletions(-)
> > > > > > >
> > > > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > > > > > index 0d88649c60a5..18a4ac0a06bc 100644
> > > > > > > --- a/fs/f2fs/data.c
> > > > > > > +++ b/fs/f2fs/data.c
> > > > > > > @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
> > > > > > > ?? #ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > ?? static void f2fs_zone_write_end_io(struct bio *bio)
> > > > > > > ?? {
> > > > > > > -??? struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
> > > > > > > +??? struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
> > > > > > > -??? bio->bi_private = io->bi_private;
> > > > > > > -??? complete(&io->zone_wait);
> > > > > > > ?????? f2fs_write_end_io(bio);
> > > > > > > +??? up(&sbi->available_open_zones);
> > > > > > > ?? }
> > > > > > > ?? #endif
> > > > > > > @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
> > > > > > > ?????? if (!io->bio)
> > > > > > > ?????????? return;
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +??? if (io->open_zone) {
> > > > > > > +??????? /*
> > > > > > > +???????? * if there is no open zone, it will wait for last IO in
> > > > > > > +???????? * previous zone before submitting new IO.
> > > > > > > +???????? */
> > > > > > > +??????? down(&fio->sbi->available_open_zones);
> > > > > > > +??????? io->open_zone = false;
> > > > > > > +??????? io->zone_openned = true;
> > > > > > > +??? }
> > > > > > > +
> > > > > > > +??? if (io->close_zone) {
> > > > > > > +??????? io->bio->bi_end_io = f2fs_zone_write_end_io;
> > > > > > > +??????? io->zone_openned = false;
> > > > > > > +??????? io->close_zone = false;
> > > > > > > +??? }
> > > > > > > +#endif
> > > > > > > +
> > > > > > > ?????? if (is_read_io(fio->op)) {
> > > > > > > ?????????? trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
> > > > > > > ?????????? f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
> > > > > > > @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
> > > > > > > ?????????????? INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
> > > > > > > ?????????????? init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
> > > > > > > ?? #ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > -??????????? init_completion(&sbi->write_io[i][j].zone_wait);
> > > > > > > -??????????? sbi->write_io[i][j].zone_pending_bio = NULL;
> > > > > > > -??????????? sbi->write_io[i][j].bi_private = NULL;
> > > > > > > +??????????? sbi->write_io[i][j].open_zone = false;
> > > > > > > +??????????? sbi->write_io[i][j].zone_openned = false;
> > > > > > > +??????????? sbi->write_io[i][j].close_zone = false;
> > > > > > > ?? #endif
> > > > > > > ?????????? }
> > > > > > > ?????? }
> > > > > > > @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
> > > > > > > ?????? f2fs_up_write(&io->io_rwsem);
> > > > > > > ?? }
> > > > > > > +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
> > > > > > > +{
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +??? struct f2fs_bio_info *io;
> > > > > > > +
> > > > > > > +??? if (!f2fs_sb_has_blkzoned(sbi))
> > > > > > > +??????? return;
> > > > > > > +
> > > > > > > +??? io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
> > > > > > > +
> > > > > > > +??? f2fs_down_write(&io->io_rwsem);
> > > > > > > +??? if (io->zone_openned) {
> > > > > > > +??????? if (io->bio) {
> > > > > > > +??????????? io->close_zone = true;
> > > > > > > +??????????? __submit_merged_bio(io);
> > > > > > > +??????? } else if (io->zone_openned) {
> > > > > > > +??????????? up(&sbi->available_open_zones);
> > > > > > > +??????????? io->zone_openned = false;
> > > > > > > +??????? }
> > > > > > > +??? }
> > > > > > > +??? f2fs_up_write(&io->io_rwsem);
> > > > > > > +#endif
> > > > > > > +
> > > > > > > +}
> > > > > > > +
> > > > > > > ?? static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
> > > > > > > ?????????????????? struct inode *inode, struct page *page,
> > > > > > > ?????????????????? nid_t ino, enum page_type type, bool force)
> > > > > > > @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
> > > > > > > ?? }
> > > > > > > ?? #ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
> > > > > > > +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
> > > > > > > +??????????????????? block_t blkaddr, bool start)
> > > > > > > ?? {
> > > > > > > -??? int devi = 0;
> > > > > > > +??? if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
> > > > > > > +??????? return false;
> > > > > > > +
> > > > > > > +??? if (start)
> > > > > > > +??????? return (blkaddr % sbi->blocks_per_blkz) == 0;
> > > > > > > +??? return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
> > > > > > > -??? if (f2fs_is_multi_device(sbi)) {
> > > > > > > -??????? devi = f2fs_target_device_index(sbi, blkaddr);
> > > > > > > -??????? if (blkaddr < FDEV(devi).start_blk ||
> > > > > > > -??????????? blkaddr > FDEV(devi).end_blk) {
> > > > > > > -??????????? f2fs_err(sbi, "Invalid block %x", blkaddr);
> > > > > > > -??????????? return false;
> > > > > > > -??????? }
> > > > > > > -??????? blkaddr -= FDEV(devi).start_blk;
> > > > > > > -??? }
> > > > > > > -??? return bdev_is_zoned(FDEV(devi).bdev) &&
> > > > > > > -??????? f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
> > > > > > > -??????? (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
> > > > > > > ?? }
> > > > > > > ?? #endif
> > > > > > > @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > > > > > > ?????? struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
> > > > > > > ?????? struct page *bio_page;
> > > > > > > ?????? enum count_type type;
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +??? bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
> > > > > > > +#endif
> > > > > > > ?????? f2fs_bug_on(sbi, is_read_io(fio->op));
> > > > > > > ?????? f2fs_down_write(&io->io_rwsem);
> > > > > > > ?? next:
> > > > > > > -#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > -??? if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
> > > > > > > -??????? wait_for_completion_io(&io->zone_wait);
> > > > > > > -??????? bio_put(io->zone_pending_bio);
> > > > > > > -??????? io->zone_pending_bio = NULL;
> > > > > > > -??????? io->bi_private = NULL;
> > > > > > > -??? }
> > > > > > > -#endif
> > > > > > > -
> > > > > > > ?????? if (fio->in_list) {
> > > > > > > ?????????? spin_lock(&io->io_lock);
> > > > > > > ?????????? if (list_empty(&io->io_list)) {
> > > > > > > @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > > > > > > ?????? type = WB_DATA_TYPE(bio_page, fio->compressed_page);
> > > > > > > ?????? inc_page_count(sbi, type);
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +??? if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
> > > > > > > +??????? io->open_zone = true;
> > > > > > > +#endif
> > > > > > > +
> > > > > > > ?????? if (io->bio &&
> > > > > > > ?????????? (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
> > > > > > > ???????????????????? fio->new_blkaddr) ||
> > > > > > > @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > > > > > > ?????? io->last_block_in_bio = fio->new_blkaddr;
> > > > > > > ?????? trace_f2fs_submit_page_write(fio->page, fio);
> > > > > > > +
> > > > > > > ?? #ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > -??? if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
> > > > > > > -??????????? is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
> > > > > > > -??????? bio_get(io->bio);
> > > > > > > -??????? reinit_completion(&io->zone_wait);
> > > > > > > -??????? io->bi_private = io->bio->bi_private;
> > > > > > > -??????? io->bio->bi_private = io;
> > > > > > > -??????? io->bio->bi_end_io = f2fs_zone_write_end_io;
> > > > > > > -??????? io->zone_pending_bio = io->bio;
> > > > > > > +??? if (blkzoned &&
> > > > > > > +??????? is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
> > > > > > > +??????? io->close_zone = true;
> > > > > > > ?????????? __submit_merged_bio(io);
> > > > > > > ?????? }
> > > > > > > ?? #endif
> > > > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > > > index 694f8a52cb84..8a26530cf4fb 100644
> > > > > > > --- a/fs/f2fs/f2fs.h
> > > > > > > +++ b/fs/f2fs/f2fs.h
> > > > > > > @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
> > > > > > > ?????? struct bio *bio;??????? /* bios to merge */
> > > > > > > ?????? sector_t last_block_in_bio;??? /* last block number */
> > > > > > > ?????? struct f2fs_io_info fio;??? /* store buffered io info. */
> > > > > > > -#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > -??? struct completion zone_wait;??? /* condition value for the previous open zone to close */
> > > > > > > -??? struct bio *zone_pending_bio;??? /* pending bio for the previous zone */
> > > > > > > -??? void *bi_private;??????? /* previous bi_private for pending bio */
> > > > > > > -#endif
> > > > > > > ?????? struct f2fs_rwsem io_rwsem;??? /* blocking op for bio */
> > > > > > > ?????? spinlock_t io_lock;??????? /* serialize DATA/NODE IOs */
> > > > > > > ?????? struct list_head io_list;??? /* track fios */
> > > > > > > ?????? struct list_head bio_list;??? /* bio entry list head */
> > > > > > > ?????? struct f2fs_rwsem bio_list_lock;??? /* lock to protect bio entry list */
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +??? bool open_zone;??????????? /* open a zone */
> > > > > > > +??? bool zone_openned;??????? /* zone has been openned */
> > > > > > > +??? bool close_zone;??????? /* close a zone */
> > > > > > > +#endif
> > > > > > > ?? };
> > > > > > > ?? #define FDEV(i)??????????????? (sbi->devs[i])
> > > > > > > @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
> > > > > > > ?? #ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > ?????? unsigned int blocks_per_blkz;??????? /* F2FS blocks per zone */
> > > > > > > ?????? unsigned int max_open_zones;??????? /* max open zone resources of the zoned device */
> > > > > > > +??? struct semaphore available_open_zones;??? /* available open zones */
> > > > > > > ?? #endif
> > > > > > > ?????? /* for node-related operations */
> > > > > > > @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
> > > > > > > ?? void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
> > > > > > > ???????????????? enum page_type type);
> > > > > > > ?? int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
> > > > > > > +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
> > > > > > > ?? void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
> > > > > > > ?? void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
> > > > > > > ?????????????????? struct inode *inode, struct page *page,
> > > > > > > @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
> > > > > > > ?????? return test_bit(zno, FDEV(devi).blkz_seq);
> > > > > > > ?? }
> > > > > > > +
> > > > > > > +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> > > > > > > +??????????????????????????? block_t blkaddr)
> > > > > > > +{
> > > > > > > +??? int devi = 0;
> > > > > > > +
> > > > > > > +??? if (f2fs_is_multi_device(sbi)) {
> > > > > > > +??????? devi = f2fs_target_device_index(sbi, blkaddr);
> > > > > > > +??????? if (blkaddr < FDEV(devi).start_blk ||
> > > > > > > +??????????? blkaddr > FDEV(devi).end_blk) {
> > > > > > > +??????????? f2fs_err(sbi, "Invalid block %x", blkaddr);
> > > > > > > +??????????? return false;
> > > > > > > +??????? }
> > > > > > > +??????? blkaddr -= FDEV(devi).start_blk;
> > > > > > > +??? }
> > > > > > > +
> > > > > > > +??? if (!bdev_is_zoned(FDEV(devi).bdev) ||
> > > > > > > +??????? !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> > > > > > > +??????? return false;
> > > > > > > +
> > > > > > > +??? return true;
> > > > > >
> > > > > > Applied as below.
> > > > > >
> > > > > > @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
> > > > > > ????????????????? blkaddr -= FDEV(devi).start_blk;
> > > > > > ????????? }
> > > > > >
> > > > > > -?????? if (!bdev_is_zoned(FDEV(devi).bdev) ||
> > > > > > -?????????????? !f2fs_blkz_is_seq(sbi, devi, blkaddr))
> > > > > > -?????????????? return false;
> > > > > > -
> > > > > > -?????? return true;
> > > > > > +?????? return bdev_is_zoned(FDEV(devi).bdev) &&
> > > > > > +?????????????? f2fs_blkz_is_seq(sbi, devi, blkaddr);
> > > > >
> > > > > Looks good, thank you for cleanup.
> > > > >
> > > > > Thanks,
> > > > >
> > > > > > ?? }
> > > > > > ?? #endif
> > > > > >
> > > > > > > +}
> > > > > > > ?? #endif
> > > > > > > ?? static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
> > > > > > > diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
> > > > > > > index f8703038e1d8..a8626e297876 100644
> > > > > > > --- a/fs/f2fs/iostat.c
> > > > > > > +++ b/fs/f2fs/iostat.c
> > > > > > > @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
> > > > > > > ?????? spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
> > > > > > > ?? }
> > > > > > > +void *iostat_get_bio_private(struct bio *bio)
> > > > > > > +{
> > > > > > > +??? struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > > > > > > +
> > > > > > > +??? return iostat_ctx->sbi;
> > > > > > > +}
> > > > > > > +
> > > > > > > ?? void iostat_update_and_unbind_ctx(struct bio *bio)
> > > > > > > ?? {
> > > > > > > ?????? struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > > > > > > diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
> > > > > > > index eb99d05cf272..9006c3d41590 100644
> > > > > > > --- a/fs/f2fs/iostat.h
> > > > > > > +++ b/fs/f2fs/iostat.h
> > > > > > > @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
> > > > > > > ?????? return iostat_ctx->post_read_ctx;
> > > > > > > ?? }
> > > > > > > +extern void *iostat_get_bio_private(struct bio *bio);
> > > > > > > ?? extern void iostat_update_and_unbind_ctx(struct bio *bio);
> > > > > > > ?? extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> > > > > > > ?????????? struct bio *bio, struct bio_post_read_ctx *ctx);
> > > > > > > @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
> > > > > > > ?? #else
> > > > > > > ?? static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
> > > > > > > ?????????? enum iostat_type type, unsigned long long io_bytes) {}
> > > > > > > +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
> > > > > > > ?? static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
> > > > > > > ?? static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
> > > > > > > ?????????? struct bio *bio, struct bio_post_read_ctx *ctx) {}
> > > > > > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > > > > > > index 4fd76e867e0a..4a3cf2888faf 100644
> > > > > > > --- a/fs/f2fs/segment.c
> > > > > > > +++ b/fs/f2fs/segment.c
> > > > > > > @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
> > > > > > > ?????????? return err;
> > > > > > > ?????? stat_inc_seg_type(sbi, curseg);
> > > > > > > ?????? locate_dirty_segment(sbi, old_segno);
> > > > > > > +
> > > > > > > +??? f2fs_blkzoned_submit_merged_write(sbi, type);
> > > > > > > +
> > > > > > > ?????? return 0;
> > > > > > > ?? }
> > > > > > > @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
> > > > > > > ?????????? f2fs_bug_on(fio->sbi, true);
> > > > > > > ?????? }
> > > > > > > -??? if (IS_HOT(type))
> > > > > > > -??????? fio->temp = HOT;
> > > > > > > -??? else if (IS_WARM(type))
> > > > > > > -??????? fio->temp = WARM;
> > > > > > > -??? else
> > > > > > > -??????? fio->temp = COLD;
> > > > > > > +??? fio->temp = type_to_temp(type);
> > > > > > > ?????? return type;
> > > > > > > ?? }
> > > > > > > @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
> > > > > > > ?????????? return -EINVAL;
> > > > > > > ?????? }
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +??? if (f2fs_sb_has_blkzoned(sbi)) {
> > > > > > > +??????? for (type = 0; type < NR_PERSISTENT_LOG; type++) {
> > > > > > > +??????????? struct curseg_info *curseg = CURSEG_I(sbi, type);
> > > > > > > +??????????? enum page_type ptype;
> > > > > > > +??????????? enum temp_type temp;
> > > > > > > +
> > > > > > > +??????????? if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
> > > > > > > +??????????????? continue;
> > > > > > > +
> > > > > > > +??????????? if (!f2fs_blkaddr_in_seqzone(sbi,
> > > > > > > +??????????????????? START_BLOCK(sbi, curseg->segno)))
> > > > > > > +??????????????? continue;
> > > > > > > +
> > > > > > > +??????????? ptype = PAGE_TYPE(type);
> > > > > > > +??????????? temp = type_to_temp(type);
> > > > > > > +??????????? down(&sbi->available_open_zones);
> > > > > > > +??????????? sbi->write_io[ptype][temp].zone_openned = true;
> > > > > > > +??????? }
> > > > > > > +??? }
> > > > > > > +#endif
> > > > > > > ?????? return 0;
> > > > > > > ?? }
> > > > > > > @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
> > > > > > > ?????? for (i = 0; i < NR_CURSEG_TYPE; i++) {
> > > > > > > ?????????? kfree(array[i].sum_blk);
> > > > > > > ?????????? kfree(array[i].journal);
> > > > > > > +
> > > > > > > +#ifdef CONFIG_BLK_DEV_ZONED
> > > > > > > +??????? if (f2fs_sb_has_blkzoned(sbi)) {
> > > > > > > +??????????? enum page_type ptype = PAGE_TYPE(i);
> > > > > > > +??????????? enum temp_type temp = type_to_temp(i);
> > > > > > > +
> > > > > > > +??????????? if (sbi->write_io[ptype][temp].zone_openned) {
> > > > > > > +??????????????? up(&sbi->available_open_zones);
> > > > > > > +??????????????? sbi->write_io[ptype][temp].zone_openned = false;
> > > > > > > +??????????? }
> > > > > > > +??????? }
> > > > > > > +#endif
> > > > > > > ?????? }
> > > > > > > ?????? kfree(array);
> > > > > > > ?? }
> > > > > > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > > > > > index e1c0f418aa11..855978ca869f 100644
> > > > > > > --- a/fs/f2fs/segment.h
> > > > > > > +++ b/fs/f2fs/segment.h
> > > > > > > @@ -24,7 +24,8 @@
> > > > > > > ?? #define IS_DATASEG(t)??? ((t) <= CURSEG_COLD_DATA)
> > > > > > > ?? #define IS_NODESEG(t)??? ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
> > > > > > > -#define SE_PAGETYPE(se)??? ((IS_NODESEG((se)->type) ? NODE : DATA))
> > > > > > > +#define PAGE_TYPE(t)??? (IS_NODESEG(t) ? NODE : DATA)
> > > > > > > +#define SE_PAGETYPE(se)??? (PAGE_TYPE((se)->type))
> > > > > > > ?? static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
> > > > > > > ?????????????????????????? unsigned short seg_type)
> > > > > > > @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
> > > > > > > ?????????????? return GET_SEGNO(sbi, FDEV(devi).start_blk);
> > > > > > > ?????? return 0;
> > > > > > > ?? }
> > > > > > > +
> > > > > > > +static inline enum temp_type type_to_temp(int type)
> > > > > > > +{
> > > > > > > +??? if (IS_HOT(type))
> > > > > > > +??????? return HOT;
> > > > > > > +??? else if (IS_WARM(type))
> > > > > > > +??????? return WARM;
> > > > > > > +??? return COLD;
> > > > > > > +}
> > > > > > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > > > > > index fdf358c7f808..954baa6c100d 100644
> > > > > > > --- a/fs/f2fs/super.c
> > > > > > > +++ b/fs/f2fs/super.c
> > > > > > > @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
> > > > > > > ?????????????????? sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
> > > > > > > ?????????????? return -EINVAL;
> > > > > > > ?????????? }
> > > > > > > +
> > > > > > > +??????? sema_init(&sbi->available_open_zones, sbi->max_open_zones);
> > > > > > > ?????? }
> > > > > > > ?????? zone_sectors = bdev_zone_sectors(bdev);
> > > > > > > --
> > > > > > > 2.40.1
> > >
> > >
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > [email protected]
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> >
> >
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2024-04-17 01:05:25

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH v3] f2fs: zone: don't block IO if there is remained open zone

On 2024/4/17 0:51, Jaegeuk Kim wrote:
> On 04/16, Chao Yu wrote:
>> On 2024/4/15 22:01, Chao Yu wrote:
>>> On 2024/4/15 11:26, Chao Yu wrote:
>>>> On 2024/4/14 23:19, Jaegeuk Kim wrote:
>>>>> It seems this caused kernel hang. Chao, have you tested this patch enough?
>>>>
>>>> Jaegeuk,
>>>>
>>>> Oh, I've checked this patch w/ fsstress before submitting it, but missed
>>>> the SPO testcase... do you encounter kernel hang w/ SPO testcase?
>>>
>>> I did see any hang issue w/ por_fsstress testcase, which testcase do you use?
>>
>> Sorry, I mean I haven't reproduced it yet...
>
> I'd prefer to check this patch later. Have you tested on Zoned device with
> nullblk?

Yes, I enabled blkzoned feature w/ nullblk device, and set
/sys/kernel/config/nullb/nullb0/zone_max_open to six, so that it can
emulate ZUFS' configuration.

Thanks,

>
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>
>>>>
>>>> Anyway, let me test it more.
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>> On 04/13, Chao Yu wrote:
>>>>>> On 2024/4/13 5:11, Jaegeuk Kim wrote:
>>>>>>> On 04/07, Chao Yu wrote:
>>>>>>>> max open zone may be larger than log header number of f2fs, for
>>>>>>>> such case, it doesn't need to wait last IO in previous zone, let's
>>>>>>>> introduce available_open_zone semaphore, and reduce it once we
>>>>>>>> submit first write IO in a zone, and increase it after completion
>>>>>>>> of last IO in the zone.
>>>>>>>>
>>>>>>>> Cc: Daeho Jeong <[email protected]>
>>>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>>>> ---
>>>>>>>> v3:
>>>>>>>> - avoid race condition in between __submit_merged_bio()
>>>>>>>> and __allocate_new_segment().
>>>>>>>>    fs/f2fs/data.c    | 105 ++++++++++++++++++++++++++++++----------------
>>>>>>>>    fs/f2fs/f2fs.h    |  34 ++++++++++++---
>>>>>>>>    fs/f2fs/iostat.c  |   7 ++++
>>>>>>>>    fs/f2fs/iostat.h  |   2 +
>>>>>>>>    fs/f2fs/segment.c |  43 ++++++++++++++++---
>>>>>>>>    fs/f2fs/segment.h |  12 +++++-
>>>>>>>>    fs/f2fs/super.c   |   2 +
>>>>>>>>    7 files changed, 156 insertions(+), 49 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>>>> index 0d88649c60a5..18a4ac0a06bc 100644
>>>>>>>> --- a/fs/f2fs/data.c
>>>>>>>> +++ b/fs/f2fs/data.c
>>>>>>>> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>>>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>>    static void f2fs_zone_write_end_io(struct bio *bio)
>>>>>>>>    {
>>>>>>>> -    struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
>>>>>>>> +    struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>>>>>>>> -    bio->bi_private = io->bi_private;
>>>>>>>> -    complete(&io->zone_wait);
>>>>>>>>        f2fs_write_end_io(bio);
>>>>>>>> +    up(&sbi->available_open_zones);
>>>>>>>>    }
>>>>>>>>    #endif
>>>>>>>> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>>>>>>>>        if (!io->bio)
>>>>>>>>            return;
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    if (io->open_zone) {
>>>>>>>> +        /*
>>>>>>>> +         * if there is no open zone, it will wait for last IO in
>>>>>>>> +         * previous zone before submitting new IO.
>>>>>>>> +         */
>>>>>>>> +        down(&fio->sbi->available_open_zones);
>>>>>>>> +        io->open_zone = false;
>>>>>>>> +        io->zone_openned = true;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    if (io->close_zone) {
>>>>>>>> +        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>>>>> +        io->zone_openned = false;
>>>>>>>> +        io->close_zone = false;
>>>>>>>> +    }
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>>        if (is_read_io(fio->op)) {
>>>>>>>>            trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>>>>>>>>            f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
>>>>>>>> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>>>>>>>>                INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>>>>>>>>                init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>>>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> -            init_completion(&sbi->write_io[i][j].zone_wait);
>>>>>>>> -            sbi->write_io[i][j].zone_pending_bio = NULL;
>>>>>>>> -            sbi->write_io[i][j].bi_private = NULL;
>>>>>>>> +            sbi->write_io[i][j].open_zone = false;
>>>>>>>> +            sbi->write_io[i][j].zone_openned = false;
>>>>>>>> +            sbi->write_io[i][j].close_zone = false;
>>>>>>>>    #endif
>>>>>>>>            }
>>>>>>>>        }
>>>>>>>> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>>>>>>>>        f2fs_up_write(&io->io_rwsem);
>>>>>>>>    }
>>>>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
>>>>>>>> +{
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    struct f2fs_bio_info *io;
>>>>>>>> +
>>>>>>>> +    if (!f2fs_sb_has_blkzoned(sbi))
>>>>>>>> +        return;
>>>>>>>> +
>>>>>>>> +    io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
>>>>>>>> +
>>>>>>>> +    f2fs_down_write(&io->io_rwsem);
>>>>>>>> +    if (io->zone_openned) {
>>>>>>>> +        if (io->bio) {
>>>>>>>> +            io->close_zone = true;
>>>>>>>> +            __submit_merged_bio(io);
>>>>>>>> +        } else if (io->zone_openned) {
>>>>>>>> +            up(&sbi->available_open_zones);
>>>>>>>> +            io->zone_openned = false;
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +    f2fs_up_write(&io->io_rwsem);
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>    static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>>>>                    struct inode *inode, struct page *page,
>>>>>>>>                    nid_t ino, enum page_type type, bool force)
>>>>>>>> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>>>>>>>>    }
>>>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>>>>>>>> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
>>>>>>>> +                    block_t blkaddr, bool start)
>>>>>>>>    {
>>>>>>>> -    int devi = 0;
>>>>>>>> +    if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
>>>>>>>> +        return false;
>>>>>>>> +
>>>>>>>> +    if (start)
>>>>>>>> +        return (blkaddr % sbi->blocks_per_blkz) == 0;
>>>>>>>> +    return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>>>>> -    if (f2fs_is_multi_device(sbi)) {
>>>>>>>> -        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>>>>> -        if (blkaddr < FDEV(devi).start_blk ||
>>>>>>>> -            blkaddr > FDEV(devi).end_blk) {
>>>>>>>> -            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>>>>> -            return false;
>>>>>>>> -        }
>>>>>>>> -        blkaddr -= FDEV(devi).start_blk;
>>>>>>>> -    }
>>>>>>>> -    return bdev_is_zoned(FDEV(devi).bdev) &&
>>>>>>>> -        f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
>>>>>>>> -        (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>>>>>>    }
>>>>>>>>    #endif
>>>>>>>> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>>>        struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>>>>>>>>        struct page *bio_page;
>>>>>>>>        enum count_type type;
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
>>>>>>>> +#endif
>>>>>>>>        f2fs_bug_on(sbi, is_read_io(fio->op));
>>>>>>>>        f2fs_down_write(&io->io_rwsem);
>>>>>>>>    next:
>>>>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
>>>>>>>> -        wait_for_completion_io(&io->zone_wait);
>>>>>>>> -        bio_put(io->zone_pending_bio);
>>>>>>>> -        io->zone_pending_bio = NULL;
>>>>>>>> -        io->bi_private = NULL;
>>>>>>>> -    }
>>>>>>>> -#endif
>>>>>>>> -
>>>>>>>>        if (fio->in_list) {
>>>>>>>>            spin_lock(&io->io_lock);
>>>>>>>>            if (list_empty(&io->io_list)) {
>>>>>>>> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>>>        type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>>>>>>>>        inc_page_count(sbi, type);
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
>>>>>>>> +        io->open_zone = true;
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>>        if (io->bio &&
>>>>>>>>            (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>>>>>>>>                      fio->new_blkaddr) ||
>>>>>>>> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>>>>>>        io->last_block_in_bio = fio->new_blkaddr;
>>>>>>>>        trace_f2fs_submit_page_write(fio->page, fio);
>>>>>>>> +
>>>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> -    if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
>>>>>>>> -            is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
>>>>>>>> -        bio_get(io->bio);
>>>>>>>> -        reinit_completion(&io->zone_wait);
>>>>>>>> -        io->bi_private = io->bio->bi_private;
>>>>>>>> -        io->bio->bi_private = io;
>>>>>>>> -        io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>>>>>> -        io->zone_pending_bio = io->bio;
>>>>>>>> +    if (blkzoned &&
>>>>>>>> +        is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
>>>>>>>> +        io->close_zone = true;
>>>>>>>>            __submit_merged_bio(io);
>>>>>>>>        }
>>>>>>>>    #endif
>>>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>>>> index 694f8a52cb84..8a26530cf4fb 100644
>>>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>>>> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>>>>>>>>        struct bio *bio;        /* bios to merge */
>>>>>>>>        sector_t last_block_in_bio;    /* last block number */
>>>>>>>>        struct f2fs_io_info fio;    /* store buffered io info. */
>>>>>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> -    struct completion zone_wait;    /* condition value for the previous open zone to close */
>>>>>>>> -    struct bio *zone_pending_bio;    /* pending bio for the previous zone */
>>>>>>>> -    void *bi_private;        /* previous bi_private for pending bio */
>>>>>>>> -#endif
>>>>>>>>        struct f2fs_rwsem io_rwsem;    /* blocking op for bio */
>>>>>>>>        spinlock_t io_lock;        /* serialize DATA/NODE IOs */
>>>>>>>>        struct list_head io_list;    /* track fios */
>>>>>>>>        struct list_head bio_list;    /* bio entry list head */
>>>>>>>>        struct f2fs_rwsem bio_list_lock;    /* lock to protect bio entry list */
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    bool open_zone;            /* open a zone */
>>>>>>>> +    bool zone_openned;        /* zone has been openned */
>>>>>>>> +    bool close_zone;        /* close a zone */
>>>>>>>> +#endif
>>>>>>>>    };
>>>>>>>>    #define FDEV(i)                (sbi->devs[i])
>>>>>>>> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>>>>>>>>    #ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>>        unsigned int blocks_per_blkz;        /* F2FS blocks per zone */
>>>>>>>>        unsigned int max_open_zones;        /* max open zone resources of the zoned device */
>>>>>>>> +    struct semaphore available_open_zones;    /* available open zones */
>>>>>>>>    #endif
>>>>>>>>        /* for node-related operations */
>>>>>>>> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>>>>>>>>    void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>>>>>>>>                  enum page_type type);
>>>>>>>>    int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
>>>>>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>>>>>>>>    void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>>>>>>>>    void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>>>>>>                    struct inode *inode, struct page *page,
>>>>>>>> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>>>>>>>>        return test_bit(zno, FDEV(devi).blkz_seq);
>>>>>>>>    }
>>>>>>>> +
>>>>>>>> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>>>>> +                            block_t blkaddr)
>>>>>>>> +{
>>>>>>>> +    int devi = 0;
>>>>>>>> +
>>>>>>>> +    if (f2fs_is_multi_device(sbi)) {
>>>>>>>> +        devi = f2fs_target_device_index(sbi, blkaddr);
>>>>>>>> +        if (blkaddr < FDEV(devi).start_blk ||
>>>>>>>> +            blkaddr > FDEV(devi).end_blk) {
>>>>>>>> +            f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>>>>>> +            return false;
>>>>>>>> +        }
>>>>>>>> +        blkaddr -= FDEV(devi).start_blk;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>>>>>> +        !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>>>>>> +        return false;
>>>>>>>> +
>>>>>>>> +    return true;
>>>>>>>
>>>>>>> Applied as below.
>>>>>>>
>>>>>>> @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>>>>>                   blkaddr -= FDEV(devi).start_blk;
>>>>>>>           }
>>>>>>>
>>>>>>> -       if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>>>>> -               !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>>>>> -               return false;
>>>>>>> -
>>>>>>> -       return true;
>>>>>>> +       return bdev_is_zoned(FDEV(devi).bdev) &&
>>>>>>> +               f2fs_blkz_is_seq(sbi, devi, blkaddr);
>>>>>>
>>>>>> Looks good, thank you for cleanup.
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>    }
>>>>>>>    #endif
>>>>>>>
>>>>>>>> +}
>>>>>>>>    #endif
>>>>>>>>    static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
>>>>>>>> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
>>>>>>>> index f8703038e1d8..a8626e297876 100644
>>>>>>>> --- a/fs/f2fs/iostat.c
>>>>>>>> +++ b/fs/f2fs/iostat.c
>>>>>>>> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>>>>>>>>        spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>>>>>>>>    }
>>>>>>>> +void *iostat_get_bio_private(struct bio *bio)
>>>>>>>> +{
>>>>>>>> +    struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>>>>> +
>>>>>>>> +    return iostat_ctx->sbi;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>    void iostat_update_and_unbind_ctx(struct bio *bio)
>>>>>>>>    {
>>>>>>>>        struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>>>>>> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
>>>>>>>> index eb99d05cf272..9006c3d41590 100644
>>>>>>>> --- a/fs/f2fs/iostat.h
>>>>>>>> +++ b/fs/f2fs/iostat.h
>>>>>>>> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>>>>>>>>        return iostat_ctx->post_read_ctx;
>>>>>>>>    }
>>>>>>>> +extern void *iostat_get_bio_private(struct bio *bio);
>>>>>>>>    extern void iostat_update_and_unbind_ctx(struct bio *bio);
>>>>>>>>    extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>>>>            struct bio *bio, struct bio_post_read_ctx *ctx);
>>>>>>>> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>>>>>>>>    #else
>>>>>>>>    static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>>>>>>>>            enum iostat_type type, unsigned long long io_bytes) {}
>>>>>>>> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>>>>>>>>    static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>>>>>>>>    static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>>>>>>            struct bio *bio, struct bio_post_read_ctx *ctx) {}
>>>>>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>>>>>> index 4fd76e867e0a..4a3cf2888faf 100644
>>>>>>>> --- a/fs/f2fs/segment.c
>>>>>>>> +++ b/fs/f2fs/segment.c
>>>>>>>> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>>>>>>>            return err;
>>>>>>>>        stat_inc_seg_type(sbi, curseg);
>>>>>>>>        locate_dirty_segment(sbi, old_segno);
>>>>>>>> +
>>>>>>>> +    f2fs_blkzoned_submit_merged_write(sbi, type);
>>>>>>>> +
>>>>>>>>        return 0;
>>>>>>>>    }
>>>>>>>> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>>>>>>>>            f2fs_bug_on(fio->sbi, true);
>>>>>>>>        }
>>>>>>>> -    if (IS_HOT(type))
>>>>>>>> -        fio->temp = HOT;
>>>>>>>> -    else if (IS_WARM(type))
>>>>>>>> -        fio->temp = WARM;
>>>>>>>> -    else
>>>>>>>> -        fio->temp = COLD;
>>>>>>>> +    fio->temp = type_to_temp(type);
>>>>>>>>        return type;
>>>>>>>>    }
>>>>>>>> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>>>>>>            return -EINVAL;
>>>>>>>>        }
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +    if (f2fs_sb_has_blkzoned(sbi)) {
>>>>>>>> +        for (type = 0; type < NR_PERSISTENT_LOG; type++) {
>>>>>>>> +            struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>>>>>> +            enum page_type ptype;
>>>>>>>> +            enum temp_type temp;
>>>>>>>> +
>>>>>>>> +            if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
>>>>>>>> +                continue;
>>>>>>>> +
>>>>>>>> +            if (!f2fs_blkaddr_in_seqzone(sbi,
>>>>>>>> +                    START_BLOCK(sbi, curseg->segno)))
>>>>>>>> +                continue;
>>>>>>>> +
>>>>>>>> +            ptype = PAGE_TYPE(type);
>>>>>>>> +            temp = type_to_temp(type);
>>>>>>>> +            down(&sbi->available_open_zones);
>>>>>>>> +            sbi->write_io[ptype][temp].zone_openned = true;
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>>> +#endif
>>>>>>>>        return 0;
>>>>>>>>    }
>>>>>>>> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>>>>>>>>        for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>>>>>>            kfree(array[i].sum_blk);
>>>>>>>>            kfree(array[i].journal);
>>>>>>>> +
>>>>>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>>>>>> +        if (f2fs_sb_has_blkzoned(sbi)) {
>>>>>>>> +            enum page_type ptype = PAGE_TYPE(i);
>>>>>>>> +            enum temp_type temp = type_to_temp(i);
>>>>>>>> +
>>>>>>>> +            if (sbi->write_io[ptype][temp].zone_openned) {
>>>>>>>> +                up(&sbi->available_open_zones);
>>>>>>>> +                sbi->write_io[ptype][temp].zone_openned = false;
>>>>>>>> +            }
>>>>>>>> +        }
>>>>>>>> +#endif
>>>>>>>>        }
>>>>>>>>        kfree(array);
>>>>>>>>    }
>>>>>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>>>>>> index e1c0f418aa11..855978ca869f 100644
>>>>>>>> --- a/fs/f2fs/segment.h
>>>>>>>> +++ b/fs/f2fs/segment.h
>>>>>>>> @@ -24,7 +24,8 @@
>>>>>>>>    #define IS_DATASEG(t)    ((t) <= CURSEG_COLD_DATA)
>>>>>>>>    #define IS_NODESEG(t)    ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>>>>>> -#define SE_PAGETYPE(se)    ((IS_NODESEG((se)->type) ? NODE : DATA))
>>>>>>>> +#define PAGE_TYPE(t)    (IS_NODESEG(t) ? NODE : DATA)
>>>>>>>> +#define SE_PAGETYPE(se)    (PAGE_TYPE((se)->type))
>>>>>>>>    static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>>>>>>>>                            unsigned short seg_type)
>>>>>>>> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>>>>>>>>                return GET_SEGNO(sbi, FDEV(devi).start_blk);
>>>>>>>>        return 0;
>>>>>>>>    }
>>>>>>>> +
>>>>>>>> +static inline enum temp_type type_to_temp(int type)
>>>>>>>> +{
>>>>>>>> +    if (IS_HOT(type))
>>>>>>>> +        return HOT;
>>>>>>>> +    else if (IS_WARM(type))
>>>>>>>> +        return WARM;
>>>>>>>> +    return COLD;
>>>>>>>> +}
>>>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>>>> index fdf358c7f808..954baa6c100d 100644
>>>>>>>> --- a/fs/f2fs/super.c
>>>>>>>> +++ b/fs/f2fs/super.c
>>>>>>>> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>>>>>>>                    sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>>>>>>>>                return -EINVAL;
>>>>>>>>            }
>>>>>>>> +
>>>>>>>> +        sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>>>>>>>>        }
>>>>>>>>        zone_sectors = bdev_zone_sectors(bdev);
>>>>>>>> --
>>>>>>>> 2.40.1
>>>>
>>>>
>>>> _______________________________________________
>>>> Linux-f2fs-devel mailing list
>>>> [email protected]
>>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>>
>>>
>>> _______________________________________________
>>> Linux-f2fs-devel mailing list
>>> [email protected]
>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel