From: Wenjie Qi <[email protected]>
1. If the max active zones of zoned devices are less than
the active logs of F2FS, the device may error due to
insufficient zone resources when multiple active logs are
being written at the same time.
2. We can get the number of remaining available zone
resources by subtracting the number of active logs from
the number of max active zones of zoned devices. We can
use these available zone resources to reduce the number
of pending bio when switching zones.
3. The original code for determining zone end was
after "out":, which would have missed some fio's
where is_end_zone_blkaddr(sbi, fio->new_blkaddr)
was true. I've moved this code before "skip:" to
make sure it's done for each fio.
Signed-off-by: Wenjie Qi <[email protected]>
---
fs/f2fs/data.c | 38 ++++++++++++++++++++++++++++----------
fs/f2fs/f2fs.h | 2 ++
fs/f2fs/super.c | 9 +++++++++
3 files changed, 39 insertions(+), 10 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index dce8defdf4c7..6b11364e94b8 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -392,6 +392,19 @@ static void f2fs_zone_write_end_io(struct bio *bio)
complete(&io->zone_wait);
f2fs_write_end_io(bio);
}
+
+static void f2fs_zone_write_end_io_nowait(struct bio *bio)
+{
+#ifdef CONFIG_F2FS_IOSTAT
+ struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+ struct f2fs_sb_info *sbi = iostat_ctx->sbi;
+#else
+ struct f2fs_sb_info *sbi = (struct f2fs_sb_info *)bio->bi_private;
+#endif
+
+ atomic_inc(&sbi->available_active_zones);
+ f2fs_write_end_io(bio);
+}
#endif
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
@@ -1080,22 +1093,27 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
io->last_block_in_bio = fio->new_blkaddr;
trace_f2fs_submit_page_write(fio->page, fio);
-skip:
- if (fio->in_list)
- goto next;
-out:
#ifdef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
- bio_get(io->bio);
- reinit_completion(&io->zone_wait);
- io->bi_private = io->bio->bi_private;
- io->bio->bi_private = io;
- io->bio->bi_end_io = f2fs_zone_write_end_io;
- io->zone_pending_bio = io->bio;
+ if (!atomic_add_negative(-1, &sbi->available_active_zones)) {
+ io->bio->bi_end_io = f2fs_zone_write_end_io_nowait;
+ } else {
+ atomic_inc(&sbi->available_active_zones);
+ bio_get(io->bio);
+ reinit_completion(&io->zone_wait);
+ io->bi_private = io->bio->bi_private;
+ io->bio->bi_private = io;
+ io->bio->bi_end_io = f2fs_zone_write_end_io;
+ io->zone_pending_bio = io->bio;
+ }
__submit_merged_bio(io);
}
#endif
+skip:
+ if (fio->in_list)
+ goto next;
+out:
if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
!f2fs_is_checkpoint_ready(sbi))
__submit_merged_bio(io);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 65294e3b0bef..1b1833e1d10e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1551,6 +1551,8 @@ struct f2fs_sb_info {
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
+ unsigned int max_active_zones; /* max zone resources of the zoned device */
+ atomic_t available_active_zones; /* remaining zone resources */
#endif
/* for node-related operations */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 206d03c82d96..c79919425d63 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3932,6 +3932,15 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
if (!f2fs_sb_has_blkzoned(sbi))
return 0;
+ sbi->max_active_zones = bdev_max_active_zones(bdev);
+ if (sbi->max_active_zones && (sbi->max_active_zones < F2FS_OPTION(sbi).active_logs)) {
+ f2fs_err(sbi,
+ "zoned: max active zones %u is too small, need at least %u active zones",
+ sbi->max_active_zones, F2FS_OPTION(sbi).active_logs);
+ return -EINVAL;
+ }
+ atomic_set(&sbi->available_active_zones, sbi->max_active_zones - F2FS_OPTION(sbi).active_logs);
+
zone_sectors = bdev_zone_sectors(bdev);
if (!is_power_of_2(zone_sectors)) {
f2fs_err(sbi, "F2FS does not support non power of 2 zone sizes\n");
--
2.34.1
On 01/09, Wenjie wrote:
> From: Wenjie Qi <[email protected]>
>
> 1. If the max active zones of zoned devices are less than
> the active logs of F2FS, the device may error due to
> insufficient zone resources when multiple active logs are
> being written at the same time.
>
> 2. We can get the number of remaining available zone
> resources by subtracting the number of active logs from
> the number of max active zones of zoned devices. We can
> use these available zone resources to reduce the number
> of pending bio when switching zones.
>
> 3. The original code for determining zone end was
> after "out":, which would have missed some fio's
> where is_end_zone_blkaddr(sbi, fio->new_blkaddr)
> was true. I've moved this code before "skip:" to
> make sure it's done for each fio.
Could you please keep #3 as a separate patch?
>
> Signed-off-by: Wenjie Qi <[email protected]>
> ---
> fs/f2fs/data.c | 38 ++++++++++++++++++++++++++++----------
> fs/f2fs/f2fs.h | 2 ++
> fs/f2fs/super.c | 9 +++++++++
> 3 files changed, 39 insertions(+), 10 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index dce8defdf4c7..6b11364e94b8 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -392,6 +392,19 @@ static void f2fs_zone_write_end_io(struct bio *bio)
> complete(&io->zone_wait);
> f2fs_write_end_io(bio);
> }
> +
> +static void f2fs_zone_write_end_io_nowait(struct bio *bio)
> +{
> +#ifdef CONFIG_F2FS_IOSTAT
> + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> + struct f2fs_sb_info *sbi = iostat_ctx->sbi;
> +#else
> + struct f2fs_sb_info *sbi = (struct f2fs_sb_info *)bio->bi_private;
> +#endif
> +
> + atomic_inc(&sbi->available_active_zones);
> + f2fs_write_end_io(bio);
> +}
> #endif
>
> struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
> @@ -1080,22 +1093,27 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> io->last_block_in_bio = fio->new_blkaddr;
>
> trace_f2fs_submit_page_write(fio->page, fio);
> -skip:
> - if (fio->in_list)
> - goto next;
> -out:
> #ifdef CONFIG_BLK_DEV_ZONED
> if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
> is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
> - bio_get(io->bio);
> - reinit_completion(&io->zone_wait);
> - io->bi_private = io->bio->bi_private;
> - io->bio->bi_private = io;
> - io->bio->bi_end_io = f2fs_zone_write_end_io;
> - io->zone_pending_bio = io->bio;
> + if (!atomic_add_negative(-1, &sbi->available_active_zones)) {
> + io->bio->bi_end_io = f2fs_zone_write_end_io_nowait;
> + } else {
> + atomic_inc(&sbi->available_active_zones);
> + bio_get(io->bio);
> + reinit_completion(&io->zone_wait);
> + io->bi_private = io->bio->bi_private;
> + io->bio->bi_private = io;
> + io->bio->bi_end_io = f2fs_zone_write_end_io;
> + io->zone_pending_bio = io->bio;
> + }
> __submit_merged_bio(io);
> }
> #endif
> +skip:
> + if (fio->in_list)
> + goto next;
> +out:
> if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
> !f2fs_is_checkpoint_ready(sbi))
> __submit_merged_bio(io);
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 65294e3b0bef..1b1833e1d10e 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1551,6 +1551,8 @@ struct f2fs_sb_info {
>
> #ifdef CONFIG_BLK_DEV_ZONED
> unsigned int blocks_per_blkz; /* F2FS blocks per zone */
> + unsigned int max_active_zones; /* max zone resources of the zoned device */
> + atomic_t available_active_zones; /* remaining zone resources */
> #endif
>
> /* for node-related operations */
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 206d03c82d96..c79919425d63 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -3932,6 +3932,15 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
> if (!f2fs_sb_has_blkzoned(sbi))
> return 0;
>
> + sbi->max_active_zones = bdev_max_active_zones(bdev);
> + if (sbi->max_active_zones && (sbi->max_active_zones < F2FS_OPTION(sbi).active_logs)) {
> + f2fs_err(sbi,
> + "zoned: max active zones %u is too small, need at least %u active zones",
> + sbi->max_active_zones, F2FS_OPTION(sbi).active_logs);
> + return -EINVAL;
> + }
> + atomic_set(&sbi->available_active_zones, sbi->max_active_zones - F2FS_OPTION(sbi).active_logs);
> +
> zone_sectors = bdev_zone_sectors(bdev);
> if (!is_power_of_2(zone_sectors)) {
> f2fs_err(sbi, "F2FS does not support non power of 2 zone sizes\n");
> --
> 2.34.1
Hello Jaegeuk,
Thank you for your suggestions. I have split this patch into three patches.
Here is a link to the latest email list.
The [1] of these links is the patch for the third point above.
[1] https://lore.kernel.org/linux-f2fs-devel/CAGFpFsT6XyTNPRxrTg+=F_eQ_-cryhdYd-k7rXxy1oJn4F4oLA@mail.gmail.com/T/#t
[2] https://lore.kernel.org/linux-f2fs-devel/CAGFpFsTJAJXjrkRT2n9oFHCMW-V3BgVmouR_8-GD4ZqxC2n_nw@mail.gmail.com/T/#t
[3] https://lore.kernel.org/linux-f2fs-devel/[email protected]/T/#t
On Sat, Jan 27, 2024 at 7:08 AM Jaegeuk Kim <[email protected]> wrote:
>
> On 01/09, Wenjie wrote:
> > From: Wenjie Qi <[email protected]>
> >
> > 1. If the max active zones of zoned devices are less than
> > the active logs of F2FS, the device may error due to
> > insufficient zone resources when multiple active logs are
> > being written at the same time.
> >
> > 2. We can get the number of remaining available zone
> > resources by subtracting the number of active logs from
> > the number of max active zones of zoned devices. We can
> > use these available zone resources to reduce the number
> > of pending bio when switching zones.
> >
> > 3. The original code for determining zone end was
> > after "out":, which would have missed some fio's
> > where is_end_zone_blkaddr(sbi, fio->new_blkaddr)
> > was true. I've moved this code before "skip:" to
> > make sure it's done for each fio.
>
> Could you please keep #3 as a separate patch?
>
> >
> > Signed-off-by: Wenjie Qi <[email protected]>
> > ---
> > fs/f2fs/data.c | 38 ++++++++++++++++++++++++++++----------
> > fs/f2fs/f2fs.h | 2 ++
> > fs/f2fs/super.c | 9 +++++++++
> > 3 files changed, 39 insertions(+), 10 deletions(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index dce8defdf4c7..6b11364e94b8 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -392,6 +392,19 @@ static void f2fs_zone_write_end_io(struct bio *bio)
> > complete(&io->zone_wait);
> > f2fs_write_end_io(bio);
> > }
> > +
> > +static void f2fs_zone_write_end_io_nowait(struct bio *bio)
> > +{
> > +#ifdef CONFIG_F2FS_IOSTAT
> > + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > + struct f2fs_sb_info *sbi = iostat_ctx->sbi;
> > +#else
> > + struct f2fs_sb_info *sbi = (struct f2fs_sb_info *)bio->bi_private;
> > +#endif
> > +
> > + atomic_inc(&sbi->available_active_zones);
> > + f2fs_write_end_io(bio);
> > +}
> > #endif
> >
> > struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
> > @@ -1080,22 +1093,27 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
> > io->last_block_in_bio = fio->new_blkaddr;
> >
> > trace_f2fs_submit_page_write(fio->page, fio);
> > -skip:
> > - if (fio->in_list)
> > - goto next;
> > -out:
> > #ifdef CONFIG_BLK_DEV_ZONED
> > if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
> > is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
> > - bio_get(io->bio);
> > - reinit_completion(&io->zone_wait);
> > - io->bi_private = io->bio->bi_private;
> > - io->bio->bi_private = io;
> > - io->bio->bi_end_io = f2fs_zone_write_end_io;
> > - io->zone_pending_bio = io->bio;
> > + if (!atomic_add_negative(-1, &sbi->available_active_zones)) {
> > + io->bio->bi_end_io = f2fs_zone_write_end_io_nowait;
> > + } else {
> > + atomic_inc(&sbi->available_active_zones);
> > + bio_get(io->bio);
> > + reinit_completion(&io->zone_wait);
> > + io->bi_private = io->bio->bi_private;
> > + io->bio->bi_private = io;
> > + io->bio->bi_end_io = f2fs_zone_write_end_io;
> > + io->zone_pending_bio = io->bio;
> > + }
> > __submit_merged_bio(io);
> > }
> > #endif
> > +skip:
> > + if (fio->in_list)
> > + goto next;
> > +out:
> > if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
> > !f2fs_is_checkpoint_ready(sbi))
> > __submit_merged_bio(io);
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 65294e3b0bef..1b1833e1d10e 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1551,6 +1551,8 @@ struct f2fs_sb_info {
> >
> > #ifdef CONFIG_BLK_DEV_ZONED
> > unsigned int blocks_per_blkz; /* F2FS blocks per zone */
> > + unsigned int max_active_zones; /* max zone resources of the zoned device */
> > + atomic_t available_active_zones; /* remaining zone resources */
> > #endif
> >
> > /* for node-related operations */
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index 206d03c82d96..c79919425d63 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -3932,6 +3932,15 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
> > if (!f2fs_sb_has_blkzoned(sbi))
> > return 0;
> >
> > + sbi->max_active_zones = bdev_max_active_zones(bdev);
> > + if (sbi->max_active_zones && (sbi->max_active_zones < F2FS_OPTION(sbi).active_logs)) {
> > + f2fs_err(sbi,
> > + "zoned: max active zones %u is too small, need at least %u active zones",
> > + sbi->max_active_zones, F2FS_OPTION(sbi).active_logs);
> > + return -EINVAL;
> > + }
> > + atomic_set(&sbi->available_active_zones, sbi->max_active_zones - F2FS_OPTION(sbi).active_logs);
> > +
> > zone_sectors = bdev_zone_sectors(bdev);
> > if (!is_power_of_2(zone_sectors)) {
> > f2fs_err(sbi, "F2FS does not support non power of 2 zone sizes\n");
> > --
> > 2.34.1