2024-01-25 17:03:18

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH v2 0/5] block: remove gfp_mask for blkdev_zone_mgmt()

Fueled by the LSFMM discussion on removing GFP_NOFS initiated by Willy,
I've looked into the sole GFP_NOFS allocation in zonefs. As it turned out,
it is only done for zone management commands and can be removed.

After digging into more callers of blkdev_zone_mgmt() I came to the
conclusion that the gfp_mask parameter can be removed alltogether.

So this series switches all callers of blkdev_zone_mgmt() to either use
GFP_KERNEL where possible or grab a memalloc_no{fs,io} context.

The final patch in this series is getting rid of the gfp_mask parameter.

Link: https://lore.kernel.org/all/[email protected]/

---
Changes in v2:
- guard blkdev_zone_mgmt in dm-zoned-metadata.c with memalloc_noio context
- Link to v1: https://lore.kernel.org/r/[email protected]

---
Johannes Thumshirn (5):
zonefs: pass GFP_KERNEL to blkdev_zone_mgmt() call
dm: dm-zoned: guard blkdev_zone_mgmt with noio scope
btrfs: zoned: call blkdev_zone_mgmt in nofs scope
f2fs: guard blkdev_zone_mgmt with nofs scope
block: remove gfp_flags from blkdev_zone_mgmt

block/blk-zoned.c | 19 ++++++++-----------
drivers/md/dm-zoned-metadata.c | 5 ++++-
drivers/nvme/target/zns.c | 5 ++---
fs/btrfs/zoned.c | 35 +++++++++++++++++++++++++----------
fs/f2fs/segment.c | 15 ++++++++++++---
fs/zonefs/super.c | 2 +-
include/linux/blkdev.h | 2 +-
7 files changed, 53 insertions(+), 30 deletions(-)
---
base-commit: 615d300648869c774bd1fe54b4627bb0c20faed4
change-id: 20240110-zonefs_nofs-dd1e22b2e046

Best regards,
--
Johannes Thumshirn <[email protected]>



2024-01-25 17:03:37

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH v2 1/5] zonefs: pass GFP_KERNEL to blkdev_zone_mgmt() call

Pass GFP_KERNEL instead of GFP_NOFS to the blkdev_zone_mgmt() call in
zonefs_zone_mgmt().

As as zonefs_zone_mgmt() and zonefs_inode_zone_mgmt() are never called
from a place that can recurse back into the filesystem on memory reclaim,
it is save to call blkdev_zone_mgmt() with GFP_KERNEL.

Link: https://lore.kernel.org/all/[email protected]/
Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/zonefs/super.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 93971742613a..63fbac018c04 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -113,7 +113,7 @@ static int zonefs_zone_mgmt(struct super_block *sb,

trace_zonefs_zone_mgmt(sb, z, op);
ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector,
- z->z_size >> SECTOR_SHIFT, GFP_NOFS);
+ z->z_size >> SECTOR_SHIFT, GFP_KERNEL);
if (ret) {
zonefs_err(sb,
"Zone management operation %s at %llu failed %d\n",

--
2.43.0


2024-01-25 17:04:01

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH v2 2/5] dm: dm-zoned: guard blkdev_zone_mgmt with noio scope

Guard the calls to blkdev_zone_mgmt() with a memalloc_noio scope.
This helps us getting rid of the GFP_NOIO argument to blkdev_zone_mgmt();

Signed-off-by: Johannes Thumshirn <[email protected]>
---
drivers/md/dm-zoned-metadata.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index fdfe30f7b697..165996cc966c 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -1655,10 +1655,13 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)

if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
struct dmz_dev *dev = zone->dev;
+ unsigned int noio_flag;

+ noio_flag = memalloc_noio_save();
ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET,
dmz_start_sect(zmd, zone),
- zmd->zone_nr_sectors, GFP_NOIO);
+ zmd->zone_nr_sectors, GFP_KERNEL);
+ memalloc_noio_restore(noio_flag);
if (ret) {
dmz_dev_err(dev, "Reset zone %u failed %d",
zone->id, ret);

--
2.43.0


2024-01-25 17:04:29

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH v2 3/5] btrfs: zoned: call blkdev_zone_mgmt in nofs scope

Add a memalloc_nofs scope around all calls to blkdev_zone_mgmt(). This
allows us to further get rid of the GFP_NOFS argument for
blkdev_zone_mgmt().

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/btrfs/zoned.c | 33 ++++++++++++++++++++++++++-------
1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 168af9d000d1..05640d61e435 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -824,11 +824,15 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
reset = &zones[1];

if (reset && reset->cond != BLK_ZONE_COND_EMPTY) {
+ unsigned int nofs_flags;
+
ASSERT(sb_zone_is_full(reset));

+ nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
reset->start, reset->len,
- GFP_NOFS);
+ GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flags);
if (ret)
return ret;

@@ -974,11 +978,14 @@ int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
* explicit ZONE_FINISH is not necessary.
*/
if (zone->wp != zone->start + zone->capacity) {
+ unsigned int nofs_flags;
int ret;

+ nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(device->bdev,
REQ_OP_ZONE_FINISH, zone->start,
- zone->len, GFP_NOFS);
+ zone->len, GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flags);
if (ret)
return ret;
}
@@ -996,11 +1003,13 @@ int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)

int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
{
+ unsigned int nofs_flags;
sector_t zone_sectors;
sector_t nr_sectors;
u8 zone_sectors_shift;
u32 sb_zone;
u32 nr_zones;
+ int ret;

zone_sectors = bdev_zone_sectors(bdev);
zone_sectors_shift = ilog2(zone_sectors);
@@ -1011,9 +1020,13 @@ int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
if (sb_zone + 1 >= nr_zones)
return -ENOENT;

- return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
- zone_start_sector(sb_zone, bdev),
- zone_sectors * BTRFS_NR_SB_LOG_ZONES, GFP_NOFS);
+ nofs_flags = memalloc_nofs_save();
+ ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
+ zone_start_sector(sb_zone, bdev),
+ zone_sectors * BTRFS_NR_SB_LOG_ZONES,
+ GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flags);
+ return ret;
}

/*
@@ -1124,12 +1137,15 @@ static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos)
int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
u64 length, u64 *bytes)
{
+ unsigned int nofs_flags;
int ret;

*bytes = 0;
+ nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET,
physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT,
- GFP_NOFS);
+ GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flags);
if (ret)
return ret;

@@ -2234,14 +2250,17 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
struct btrfs_device *device = map->stripes[i].dev;
const u64 physical = map->stripes[i].physical;
struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ unsigned int nofs_flags;

if (zinfo->max_active_zones == 0)
continue;

+ nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
physical >> SECTOR_SHIFT,
zinfo->zone_size >> SECTOR_SHIFT,
- GFP_NOFS);
+ GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flags);

if (ret)
return ret;

--
2.43.0


2024-01-25 17:04:59

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH v2 4/5] f2fs: guard blkdev_zone_mgmt with nofs scope

Guard the calls to blkdev_zone_mgmt() with a memalloc_nofs scope.
This helps us getting rid of the GFP_NOFS argument to blkdev_zone_mgmt();

Signed-off-by: Johannes Thumshirn <[email protected]>
---
fs/f2fs/segment.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4c8836ded90f..0094fe491364 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1971,9 +1971,15 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
}

if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
+ unsigned int nofs_flags;
+ int ret;
+
trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
- sector, nr_sects, GFP_NOFS);
+ nofs_flags = memalloc_nofs_save();
+ ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
+ sector, nr_sects, GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flags);
+ return ret;
}

__queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
@@ -4865,6 +4871,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
block_t zone_block, valid_block_cnt;
unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
int ret;
+ unsigned int nofs_flags;

if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
@@ -4912,8 +4919,10 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
"pointer: valid block[0x%x,0x%x] cond[0x%x]",
zone_segno, valid_block_cnt, zone->cond);

+ nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
- zone->start, zone->len, GFP_NOFS);
+ zone->start, zone->len, GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flags);
if (ret == -EOPNOTSUPP) {
ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
zone->len - (zone->wp - zone->start),

--
2.43.0


2024-01-25 17:05:24

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH v2 5/5] block: remove gfp_flags from blkdev_zone_mgmt

Now that all callers pass in GFP_KERNEL to blkdev_zone_mgmt() and use
memalloc_no{io,fs}_{save,restore}() to define the allocation scope, we can
drop the gfp_mask parameter from blkdev_zone_mgmt() as well as
blkdev_zone_reset_all() and blkdev_zone_reset_all_emulated().

Signed-off-by: Johannes Thumshirn <[email protected]>
---
block/blk-zoned.c | 19 ++++++++-----------
drivers/nvme/target/zns.c | 5 ++---
fs/btrfs/zoned.c | 14 +++++---------
fs/f2fs/segment.c | 4 ++--
fs/zonefs/super.c | 2 +-
include/linux/blkdev.h | 2 +-
6 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index d343e5756a9c..d4f4f8325eff 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -177,8 +177,7 @@ static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
}
}

-static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
- gfp_t gfp_mask)
+static int blkdev_zone_reset_all_emulated(struct block_device *bdev)
{
struct gendisk *disk = bdev->bd_disk;
sector_t capacity = bdev_nr_sectors(bdev);
@@ -205,7 +204,7 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
}

bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
- gfp_mask);
+ GFP_KERNEL);
bio->bi_iter.bi_sector = sector;
sector += zone_sectors;

@@ -223,7 +222,7 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
return ret;
}

-static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
+static int blkdev_zone_reset_all(struct block_device *bdev)
{
struct bio bio;

@@ -238,7 +237,6 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
* @sector: Start sector of the first zone to operate on
* @nr_sectors: Number of sectors, should be at least the length of one zone and
* must be zone size aligned.
- * @gfp_mask: Memory allocation flags (for bio_alloc)
*
* Description:
* Perform the specified operation on the range of zones specified by
@@ -248,7 +246,7 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
* or finish request.
*/
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
- sector_t sector, sector_t nr_sectors, gfp_t gfp_mask)
+ sector_t sector, sector_t nr_sectors)
{
struct request_queue *q = bdev_get_queue(bdev);
sector_t zone_sectors = bdev_zone_sectors(bdev);
@@ -285,12 +283,12 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
*/
if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
if (!blk_queue_zone_resetall(q))
- return blkdev_zone_reset_all_emulated(bdev, gfp_mask);
- return blkdev_zone_reset_all(bdev, gfp_mask);
+ return blkdev_zone_reset_all_emulated(bdev);
+ return blkdev_zone_reset_all(bdev);
}

while (sector < end_sector) {
- bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, gfp_mask);
+ bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL);
bio->bi_iter.bi_sector = sector;
sector += zone_sectors;

@@ -419,8 +417,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
return -ENOTTY;
}

- ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
- GFP_KERNEL);
+ ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);

fail:
if (cmd == BLKRESETZONE)
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 5b5c1e481722..3148d9f1bde6 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -456,8 +456,7 @@ static u16 nvmet_bdev_execute_zmgmt_send_all(struct nvmet_req *req)
switch (zsa_req_op(req->cmd->zms.zsa)) {
case REQ_OP_ZONE_RESET:
ret = blkdev_zone_mgmt(req->ns->bdev, REQ_OP_ZONE_RESET, 0,
- get_capacity(req->ns->bdev->bd_disk),
- GFP_KERNEL);
+ get_capacity(req->ns->bdev->bd_disk));
if (ret < 0)
return blkdev_zone_mgmt_errno_to_nvme_status(ret);
break;
@@ -508,7 +507,7 @@ static void nvmet_bdev_zmgmt_send_work(struct work_struct *w)
goto out;
}

- ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors, GFP_KERNEL);
+ ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors);
if (ret < 0)
status = blkdev_zone_mgmt_errno_to_nvme_status(ret);

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 05640d61e435..cf2e779d8ef4 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -830,8 +830,7 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,

nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
- reset->start, reset->len,
- GFP_KERNEL);
+ reset->start, reset->len);
memalloc_nofs_restore(nofs_flags);
if (ret)
return ret;
@@ -984,7 +983,7 @@ int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(device->bdev,
REQ_OP_ZONE_FINISH, zone->start,
- zone->len, GFP_KERNEL);
+ zone->len);
memalloc_nofs_restore(nofs_flags);
if (ret)
return ret;
@@ -1023,8 +1022,7 @@ int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
zone_start_sector(sb_zone, bdev),
- zone_sectors * BTRFS_NR_SB_LOG_ZONES,
- GFP_KERNEL);
+ zone_sectors * BTRFS_NR_SB_LOG_ZONES);
memalloc_nofs_restore(nofs_flags);
return ret;
}
@@ -1143,8 +1141,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
*bytes = 0;
nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET,
- physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT,
- GFP_KERNEL);
+ physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT);
memalloc_nofs_restore(nofs_flags);
if (ret)
return ret;
@@ -2258,8 +2255,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
physical >> SECTOR_SHIFT,
- zinfo->zone_size >> SECTOR_SHIFT,
- GFP_KERNEL);
+ zinfo->zone_size >> SECTOR_SHIFT);
memalloc_nofs_restore(nofs_flags);

if (ret)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0094fe491364..e1065ba70207 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1977,7 +1977,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
trace_f2fs_issue_reset_zone(bdev, blkstart);
nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
- sector, nr_sects, GFP_KERNEL);
+ sector, nr_sects);
memalloc_nofs_restore(nofs_flags);
return ret;
}
@@ -4921,7 +4921,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,

nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
- zone->start, zone->len, GFP_KERNEL);
+ zone->start, zone->len);
memalloc_nofs_restore(nofs_flags);
if (ret == -EOPNOTSUPP) {
ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 63fbac018c04..cadb1364f951 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -113,7 +113,7 @@ static int zonefs_zone_mgmt(struct super_block *sb,

trace_zonefs_zone_mgmt(sb, z, op);
ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector,
- z->z_size >> SECTOR_SHIFT, GFP_KERNEL);
+ z->z_size >> SECTOR_SHIFT);
if (ret) {
zonefs_err(sb,
"Zone management operation %s at %llu failed %d\n",
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 99e4f5e72213..8467c1910404 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -325,7 +325,7 @@ void disk_set_zoned(struct gendisk *disk);
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
- sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask);
+ sector_t sectors, sector_t nr_sectors);
int blk_revalidate_disk_zones(struct gendisk *disk,
void (*update_driver_data)(struct gendisk *disk));


--
2.43.0