Hi,
This patch serie adds support for explicit control of zone transitions.
To test it, one can use an updated blkzone version that is available
here:
https://github.com/MatiasBjorling/util-linux.git zonemgmt
blkzone can be compiled with:
./autogen.sh
./configure
make blkzone
After that, the binary is available in the compile directory.
Regards,
Matias
Ajay Joshi (4):
block: add zone open, close and finish support
null_blk: add zone open, close, and finish support
scsi: sd_zbc: add zone open, close, and finish support
dm: add zone open, close and finish support
block/blk-core.c | 3 ++
block/blk-zoned.c | 51 +++++++++++++++++++++---------
block/ioctl.c | 5 ++-
drivers/block/null_blk.h | 4 +--
drivers/block/null_blk_main.c | 13 ++++++--
drivers/block/null_blk_zoned.c | 33 ++++++++++++++++++--
drivers/md/dm-flakey.c | 7 ++---
drivers/md/dm-linear.c | 2 +-
drivers/md/dm.c | 5 +--
drivers/scsi/sd.c | 15 ++++++++-
drivers/scsi/sd.h | 6 ++--
drivers/scsi/sd_zbc.c | 18 ++++++++---
include/linux/blk_types.h | 35 +++++++++++++++++++--
include/linux/blkdev.h | 57 +++++++++++++++++++++++++++++-----
include/uapi/linux/blkzoned.h | 17 ++++++++--
15 files changed, 221 insertions(+), 50 deletions(-)
--
2.19.1
From: Ajay Joshi <[email protected]>
Implement REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH
support to allow explicit control of zone states.
Signed-off-by: Ajay Joshi <[email protected]>
---
drivers/scsi/sd.c | 15 ++++++++++++++-
drivers/scsi/sd.h | 6 ++++--
drivers/scsi/sd_zbc.c | 18 +++++++++++++-----
3 files changed, 31 insertions(+), 8 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a3406bd62391..89f955a01d44 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1292,7 +1292,17 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
case REQ_OP_WRITE:
return sd_setup_read_write_cmnd(cmd);
case REQ_OP_ZONE_RESET:
- return sd_zbc_setup_reset_cmnd(cmd);
+ return sd_zbc_setup_zone_mgmt_op_cmnd(cmd,
+ ZO_RESET_WRITE_POINTER);
+ case REQ_OP_ZONE_OPEN:
+ return sd_zbc_setup_zone_mgmt_op_cmnd(cmd,
+ ZO_OPEN_ZONE);
+ case REQ_OP_ZONE_CLOSE:
+ return sd_zbc_setup_zone_mgmt_op_cmnd(cmd,
+ ZO_CLOSE_ZONE);
+ case REQ_OP_ZONE_FINISH:
+ return sd_zbc_setup_zone_mgmt_op_cmnd(cmd,
+ ZO_FINISH_ZONE);
default:
WARN_ON_ONCE(1);
return BLK_STS_NOTSUPP;
@@ -1958,6 +1968,9 @@ static int sd_done(struct scsi_cmnd *SCpnt)
case REQ_OP_WRITE_ZEROES:
case REQ_OP_WRITE_SAME:
case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
if (!result) {
good_bytes = blk_rq_bytes(req);
scsi_set_resid(SCpnt, 0);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 5796ace76225..9a20633caefa 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -209,7 +209,8 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
extern void sd_zbc_print_zones(struct scsi_disk *sdkp);
-extern blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd);
+extern blk_status_t sd_zbc_setup_zone_mgmt_op_cmnd(struct scsi_cmnd *cmd,
+ unsigned char op);
extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
struct scsi_sense_hdr *sshdr);
extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
@@ -226,7 +227,8 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {}
-static inline blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
+static inline blk_status_t sd_zbc_setup_zone_mgmt_op_cmnd(struct scsi_cmnd *cmd,
+ unsigned char op)
{
return BLK_STS_TARGET;
}
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 7334024b64f1..41020db5353a 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -168,12 +168,17 @@ static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp)
}
/**
- * sd_zbc_setup_reset_cmnd - Prepare a RESET WRITE POINTER scsi command.
- * @cmd: the command to setup
+ * sd_zbc_setup_zone_mgmt_op_cmnd - Prepare a zone ZBC_OUT command. The
+ * operations can be RESET WRITE POINTER,
+ * OPEN, CLOSE or FINISH.
+ * @cmd: The command to setup
+ * @op: Operation to be performed
*
- * Called from sd_init_command() for a REQ_OP_ZONE_RESET request.
+ * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_OPEN,
+ * REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests.
*/
-blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
+blk_status_t sd_zbc_setup_zone_mgmt_op_cmnd(struct scsi_cmnd *cmd,
+ unsigned char op)
{
struct request *rq = cmd->request;
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
@@ -194,7 +199,7 @@ blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
cmd->cmd_len = 16;
memset(cmd->cmnd, 0, cmd->cmd_len);
cmd->cmnd[0] = ZBC_OUT;
- cmd->cmnd[1] = ZO_RESET_WRITE_POINTER;
+ cmd->cmnd[1] = op;
put_unaligned_be64(block, &cmd->cmnd[2]);
rq->timeout = SD_TIMEOUT;
@@ -222,6 +227,9 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
switch (req_op(rq)) {
case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
if (result &&
sshdr->sense_key == ILLEGAL_REQUEST &&
--
2.19.1
From: Ajay Joshi <[email protected]>
Implement REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH
support to allow explicit control of zone states.
Signed-off-by: Ajay Joshi <[email protected]>
---
drivers/md/dm-flakey.c | 7 +++----
drivers/md/dm-linear.c | 2 +-
drivers/md/dm.c | 5 +++--
include/linux/blk_types.h | 8 ++++++++
4 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index a9bc518156f2..fff529c0732c 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -280,7 +280,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
struct flakey_c *fc = ti->private;
bio_set_dev(bio, fc->dev->bdev);
- if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (bio_sectors(bio) || bio_is_zone_mgmt_op(bio))
bio->bi_iter.bi_sector =
flakey_map_sector(ti, bio->bi_iter.bi_sector);
}
@@ -322,8 +322,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
pb->bio_submitted = false;
- /* Do not fail reset zone */
- if (bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (bio_is_zone_mgmt_op(bio))
goto map_bio;
/* Are we alive ? */
@@ -384,7 +383,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
struct flakey_c *fc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
- if (bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (bio_is_zone_mgmt_op(bio))
return DM_ENDIO_DONE;
if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index ad980a38fb1e..217a1dee8197 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -90,7 +90,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
struct linear_c *lc = ti->private;
bio_set_dev(bio, lc->dev->bdev);
- if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (bio_sectors(bio) || bio_is_zone_mgmt_op(bio))
bio->bi_iter.bi_sector =
linear_map_sector(ti, bio->bi_iter.bi_sector);
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5475081dcbd6..f4507ec20a57 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1176,7 +1176,8 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
/*
* A target may call dm_accept_partial_bio only from the map routine. It is
- * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET.
+ * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET,
+ * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH.
*
* dm_accept_partial_bio informs the dm that the target only wants to process
* additional n_sectors sectors of the bio and the rest of the data should be
@@ -1629,7 +1630,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
ci.sector_count = 0;
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
- } else if (bio_op(bio) == REQ_OP_ZONE_RESET) {
+ } else if (bio_is_zone_mgmt_op(bio)) {
ci.bio = bio;
ci.sector_count = 0;
error = __split_and_process_non_flush(&ci);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 067ef9242275..fd2458cd1a49 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -398,6 +398,14 @@ static inline bool op_is_zone_mgmt_op(enum req_opf op)
}
}
+/*
+ * Check if the bio is zoned operation.
+ */
+static inline bool bio_is_zone_mgmt_op(struct bio *bio)
+{
+ return op_is_zone_mgmt_op(bio_op(bio));
+}
+
static inline bool op_is_write(unsigned int op)
{
return (op & 1);
--
2.19.1
From: Ajay Joshi <[email protected]>
Implement REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH
support to allow explicit control of zone states.
Signed-off-by: Ajay Joshi <[email protected]>
Signed-off-by: Matias Bjørling <[email protected]>
---
drivers/block/null_blk.h | 4 ++--
drivers/block/null_blk_main.c | 13 ++++++++++---
drivers/block/null_blk_zoned.c | 33 ++++++++++++++++++++++++++++++---
3 files changed, 42 insertions(+), 8 deletions(-)
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index 34b22d6523ba..62ef65cb0f3e 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -93,7 +93,7 @@ int null_zone_report(struct gendisk *disk, sector_t sector,
gfp_t gfp_mask);
void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
unsigned int nr_sectors);
-void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
+void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector);
#else
static inline int null_zone_init(struct nullb_device *dev)
{
@@ -111,6 +111,6 @@ static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
unsigned int nr_sectors)
{
}
-static inline void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) {}
+static inline void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector) {}
#endif /* CONFIG_BLK_DEV_ZONED */
#endif /* __NULL_BLK_H */
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 447d635c79a2..5058fb980c9c 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1209,10 +1209,17 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
nr_sectors = blk_rq_sectors(cmd->rq);
}
- if (op == REQ_OP_WRITE)
+ switch (op) {
+ case REQ_OP_WRITE:
null_zone_write(cmd, sector, nr_sectors);
- else if (op == REQ_OP_ZONE_RESET)
- null_zone_reset(cmd, sector);
+ break;
+ case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
+ null_zone_mgmt_op(cmd, sector);
+ break;
+ }
}
out:
/* Complete IO by inline, softirq or timer */
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index fca0c97ff1aa..47d956b2e148 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -121,17 +121,44 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
}
}
-void null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
+void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector)
{
struct nullb_device *dev = cmd->nq->dev;
unsigned int zno = null_zone_no(dev, sector);
struct blk_zone *zone = &dev->zones[zno];
+ enum req_opf op = req_op(cmd->rq);
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
cmd->error = BLK_STS_IOERR;
return;
}
- zone->cond = BLK_ZONE_COND_EMPTY;
- zone->wp = zone->start;
+ switch (op) {
+ case REQ_OP_ZONE_RESET:
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ zone->wp = zone->start;
+ return;
+ case REQ_OP_ZONE_OPEN:
+ if (zone->cond == BLK_ZONE_COND_FULL) {
+ cmd->error = BLK_STS_IOERR;
+ return;
+ }
+ zone->cond = BLK_ZONE_COND_EXP_OPEN;
+ return;
+ case REQ_OP_ZONE_CLOSE:
+ if (zone->cond == BLK_ZONE_COND_FULL) {
+ cmd->error = BLK_STS_IOERR;
+ return;
+ }
+ zone->cond = BLK_ZONE_COND_CLOSED;
+ return;
+ case REQ_OP_ZONE_FINISH:
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = zone->start + zone->len;
+ return;
+ default:
+ /* Invalid zone condition */
+ cmd->error = BLK_STS_IOERR;
+ return;
+ }
}
--
2.19.1
From: Ajay Joshi <[email protected]>
Zoned block devices allows one to control zone transitions by using
explicit commands. The available transitions are:
* Open zone: Transition a zone to open state.
* Close zone: Transition a zone to closed state.
* Finish zone: Transition a zone to full state.
Allow kernel to issue these transitions by introducing
blkdev_zones_mgmt_op() and add three new request opcodes:
* REQ_IO_ZONE_OPEN, REQ_IO_ZONE_CLOSE, and REQ_OP_ZONE_FINISH
Allow user-space to issue the transitions through the following ioctls:
* BLKOPENZONE, BLKCLOSEZONE, and BLKFINISHZONE.
Signed-off-by: Ajay Joshi <[email protected]>
Signed-off-by: Aravind Ramesh <[email protected]>
Signed-off-by: Matias Bjørling <[email protected]>
---
block/blk-core.c | 3 ++
block/blk-zoned.c | 51 ++++++++++++++++++++++---------
block/ioctl.c | 5 ++-
include/linux/blk_types.h | 27 +++++++++++++++--
include/linux/blkdev.h | 57 ++++++++++++++++++++++++++++++-----
include/uapi/linux/blkzoned.h | 17 +++++++++--
6 files changed, 133 insertions(+), 27 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 8340f69670d8..c0f0dbad548d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -897,6 +897,9 @@ generic_make_request_checks(struct bio *bio)
goto not_supported;
break;
case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
if (!blk_queue_is_zoned(q))
goto not_supported;
break;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index ae7e91bd0618..d0c933593b93 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -201,20 +201,22 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
EXPORT_SYMBOL_GPL(blkdev_report_zones);
/**
- * blkdev_reset_zones - Reset zones write pointer
+ * blkdev_zones_mgmt_op - Perform the specified operation on the zone(s)
* @bdev: Target block device
- * @sector: Start sector of the first zone to reset
- * @nr_sectors: Number of sectors, at least the length of one zone
+ * @op: Operation to be performed on the zone(s)
+ * @sector: Start sector of the first zone to operate on
+ * @nr_sectors: Number of sectors, at least the length of one zone and
+ * must be zone size aligned.
* @gfp_mask: Memory allocation flags (for bio_alloc)
*
* Description:
- * Reset the write pointer of the zones contained in the range
+ * Perform the specified operation contained in the range
* @sector..@sector+@nr_sectors. Specifying the entire disk sector range
* is valid, but the specified range should not contain conventional zones.
*/
-int blkdev_reset_zones(struct block_device *bdev,
- sector_t sector, sector_t nr_sectors,
- gfp_t gfp_mask)
+int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
+ sector_t sector, sector_t nr_sectors,
+ gfp_t gfp_mask)
{
struct request_queue *q = bdev_get_queue(bdev);
sector_t zone_sectors;
@@ -226,6 +228,9 @@ int blkdev_reset_zones(struct block_device *bdev,
if (!blk_queue_is_zoned(q))
return -EOPNOTSUPP;
+ if (!op_is_zone_mgmt_op(op))
+ return -EOPNOTSUPP;
+
if (bdev_read_only(bdev))
return -EPERM;
@@ -248,7 +253,7 @@ int blkdev_reset_zones(struct block_device *bdev,
bio = blk_next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
- bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
+ bio_set_op_attrs(bio, op, 0);
sector += zone_sectors;
@@ -264,7 +269,7 @@ int blkdev_reset_zones(struct block_device *bdev,
return ret;
}
-EXPORT_SYMBOL_GPL(blkdev_reset_zones);
+EXPORT_SYMBOL_GPL(blkdev_zones_mgmt_op);
/*
* BLKREPORTZONE ioctl processing.
@@ -329,15 +334,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
}
/*
- * BLKRESETZONE ioctl processing.
+ * Zone operation (open, close, finish or reset) ioctl processing.
* Called from blkdev_ioctl.
*/
-int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
+int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
struct request_queue *q;
struct blk_zone_range zrange;
+ enum req_opf op;
if (!argp)
return -EINVAL;
@@ -358,8 +364,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
return -EFAULT;
- return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
- GFP_KERNEL);
+ switch (cmd) {
+ case BLKRESETZONE:
+ op = REQ_OP_ZONE_RESET;
+ break;
+ case BLKOPENZONE:
+ op = REQ_OP_ZONE_OPEN;
+ break;
+ case BLKCLOSEZONE:
+ op = REQ_OP_ZONE_CLOSE;
+ break;
+ case BLKFINISHZONE:
+ op = REQ_OP_ZONE_FINISH;
+ break;
+ default:
+ return -ENOTTY;
+ }
+
+ return blkdev_zones_mgmt_op(bdev, op, zrange.sector, zrange.nr_sectors,
+ GFP_KERNEL);
}
static inline unsigned long *blk_alloc_zone_bitmap(int node,
diff --git a/block/ioctl.c b/block/ioctl.c
index 15a0eb80ada9..df7fe54db158 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKREPORTZONE:
return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
case BLKRESETZONE:
- return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
+ case BLKOPENZONE:
+ case BLKCLOSEZONE:
+ case BLKFINISHZONE:
+ return blkdev_zones_mgmt_op_ioctl(bdev, mode, cmd, arg);
case BLKGETZONESZ:
return put_uint(arg, bdev_zone_sectors(bdev));
case BLKGETNRZONES:
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 95202f80676c..067ef9242275 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -284,13 +284,20 @@ enum req_opf {
REQ_OP_DISCARD = 3,
/* securely erase sectors */
REQ_OP_SECURE_ERASE = 5,
- /* reset a zone write pointer */
- REQ_OP_ZONE_RESET = 6,
/* write the same sector many times */
REQ_OP_WRITE_SAME = 7,
/* write the zero filled sector many times */
REQ_OP_WRITE_ZEROES = 9,
+ /* reset a zone write pointer */
+ REQ_OP_ZONE_RESET = 16,
+ /* Open zone(s) */
+ REQ_OP_ZONE_OPEN = 17,
+ /* Close zone(s) */
+ REQ_OP_ZONE_CLOSE = 18,
+ /* Finish zone(s) */
+ REQ_OP_ZONE_FINISH = 19,
+
/* SCSI passthrough using struct scsi_request */
REQ_OP_SCSI_IN = 32,
REQ_OP_SCSI_OUT = 33,
@@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
bio->bi_opf = op | op_flags;
}
+/*
+ * Check if the op is zoned operation.
+ */
+static inline bool op_is_zone_mgmt_op(enum req_opf op)
+{
+ switch (op) {
+ case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
+ return true;
+ default:
+ return false;
+ }
+}
+
static inline bool op_is_write(unsigned int op)
{
return (op & 1);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 592669bcc536..943084f9dc9c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -348,14 +348,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev);
extern int blkdev_report_zones(struct block_device *bdev,
sector_t sector, struct blk_zone *zones,
unsigned int *nr_zones, gfp_t gfp_mask);
-extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
- sector_t nr_sectors, gfp_t gfp_mask);
extern int blk_revalidate_disk_zones(struct gendisk *disk);
extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg);
-extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg);
+extern int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg);
+extern int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
+ sector_t sector, sector_t nr_sectors,
+ gfp_t gfp_mask);
#else /* CONFIG_BLK_DEV_ZONED */
@@ -376,15 +377,57 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
return -ENOTTY;
}
-static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
- fmode_t mode, unsigned int cmd,
- unsigned long arg)
+static inline int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev,
+ fmode_t mode, unsigned int cmd,
+ unsigned long arg)
+{
+ return -ENOTTY;
+}
+
+static inline int blkdev_zones_mgmt_op(struct block_device *bdev,
+ enum req_opf op,
+ sector_t sector, sector_t nr_sectors,
+ gfp_t gfp_mask)
{
return -ENOTTY;
}
#endif /* CONFIG_BLK_DEV_ZONED */
+static inline int blkdev_reset_zones(struct block_device *bdev,
+ sector_t sector, sector_t nr_sectors,
+ gfp_t gfp_mask)
+{
+ return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_RESET,
+ sector, nr_sectors, gfp_mask);
+}
+
+static inline int blkdev_open_zones(struct block_device *bdev,
+ sector_t sector, sector_t nr_sectors,
+ gfp_t gfp_mask)
+{
+ return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_OPEN,
+ sector, nr_sectors, gfp_mask);
+}
+
+static inline int blkdev_close_zones(struct block_device *bdev,
+ sector_t sector, sector_t nr_sectors,
+ gfp_t gfp_mask)
+{
+ return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_CLOSE,
+ sector, nr_sectors,
+ gfp_mask);
+}
+
+static inline int blkdev_finish_zones(struct block_device *bdev,
+ sector_t sector, sector_t nr_sectors,
+ gfp_t gfp_mask)
+{
+ return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_FINISH,
+ sector, nr_sectors,
+ gfp_mask);
+}
+
struct request_queue {
/*
* Together with queue_head for cacheline sharing
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index 498eec813494..701e0692b8d3 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -120,9 +120,11 @@ struct blk_zone_report {
};
/**
- * struct blk_zone_range - BLKRESETZONE ioctl request
- * @sector: starting sector of the first zone to issue reset write pointer
- * @nr_sectors: Total number of sectors of 1 or more zones to reset
+ * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/
+ * BLKCLOSEZONE/BLKFINISHZONE ioctl
+ * request
+ * @sector: starting sector of the first zone to operate on
+ * @nr_sectors: Total number of sectors of all zones to operate on
*/
struct blk_zone_range {
__u64 sector;
@@ -139,10 +141,19 @@ struct blk_zone_range {
* sector range. The sector range must be zone aligned.
* @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
* @BLKGETNRZONES: Get the total number of zones of the device.
+ * @BLKOPENZONE: Open the zones in the specified sector range. The
+ * sector range must be zone aligned.
+ * @BLKCLOSEZONE: Close the zones in the specified sector range. The
+ * sector range must be zone aligned.
+ * @BLKFINISHZONE: Finish the zones in the specified sector range. The
+ * sector range must be zone aligned.
*/
#define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report)
#define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range)
#define BLKGETZONESZ _IOR(0x12, 132, __u32)
#define BLKGETNRZONES _IOR(0x12, 133, __u32)
+#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
+#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
+#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
#endif /* _UAPI_BLKZONED_H */
--
2.19.1
Matias,
Some comments inline below.
On 2019/06/21 22:07, Matias Bj?rling wrote:
> From: Ajay Joshi <[email protected]>
>
> Zoned block devices allows one to control zone transitions by using
> explicit commands. The available transitions are:
>
> * Open zone: Transition a zone to open state.
> * Close zone: Transition a zone to closed state.
> * Finish zone: Transition a zone to full state.
>
> Allow kernel to issue these transitions by introducing
> blkdev_zones_mgmt_op() and add three new request opcodes:
>
> * REQ_IO_ZONE_OPEN, REQ_IO_ZONE_CLOSE, and REQ_OP_ZONE_FINISH
>
> Allow user-space to issue the transitions through the following ioctls:
>
> * BLKOPENZONE, BLKCLOSEZONE, and BLKFINISHZONE.
>
> Signed-off-by: Ajay Joshi <[email protected]>
> Signed-off-by: Aravind Ramesh <[email protected]>
> Signed-off-by: Matias Bj?rling <[email protected]>
> ---
> block/blk-core.c | 3 ++
> block/blk-zoned.c | 51 ++++++++++++++++++++++---------
> block/ioctl.c | 5 ++-
> include/linux/blk_types.h | 27 +++++++++++++++--
> include/linux/blkdev.h | 57 ++++++++++++++++++++++++++++++-----
> include/uapi/linux/blkzoned.h | 17 +++++++++--
> 6 files changed, 133 insertions(+), 27 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 8340f69670d8..c0f0dbad548d 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -897,6 +897,9 @@ generic_make_request_checks(struct bio *bio)
> goto not_supported;
> break;
> case REQ_OP_ZONE_RESET:
> + case REQ_OP_ZONE_OPEN:
> + case REQ_OP_ZONE_CLOSE:
> + case REQ_OP_ZONE_FINISH:
> if (!blk_queue_is_zoned(q))
> goto not_supported;
> break;
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index ae7e91bd0618..d0c933593b93 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -201,20 +201,22 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
> EXPORT_SYMBOL_GPL(blkdev_report_zones);
>
> /**
> - * blkdev_reset_zones - Reset zones write pointer
> + * blkdev_zones_mgmt_op - Perform the specified operation on the zone(s)
> * @bdev: Target block device
> - * @sector: Start sector of the first zone to reset
> - * @nr_sectors: Number of sectors, at least the length of one zone
> + * @op: Operation to be performed on the zone(s)
> + * @sector: Start sector of the first zone to operate on
> + * @nr_sectors: Number of sectors, at least the length of one zone and
> + * must be zone size aligned.
> * @gfp_mask: Memory allocation flags (for bio_alloc)
> *
> * Description:
> - * Reset the write pointer of the zones contained in the range
> + * Perform the specified operation contained in the range
Perform the specified operation over the sector range
> * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
> * is valid, but the specified range should not contain conventional zones.
> */
> -int blkdev_reset_zones(struct block_device *bdev,
> - sector_t sector, sector_t nr_sectors,
> - gfp_t gfp_mask)
> +int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> {
> struct request_queue *q = bdev_get_queue(bdev);
> sector_t zone_sectors;
> @@ -226,6 +228,9 @@ int blkdev_reset_zones(struct block_device *bdev,
> if (!blk_queue_is_zoned(q))
> return -EOPNOTSUPP;
>
> + if (!op_is_zone_mgmt_op(op))
> + return -EOPNOTSUPP;
EINVAL may be better here.
> +
> if (bdev_read_only(bdev))
> return -EPERM;
>
> @@ -248,7 +253,7 @@ int blkdev_reset_zones(struct block_device *bdev,
> bio = blk_next_bio(bio, 0, gfp_mask);
> bio->bi_iter.bi_sector = sector;
> bio_set_dev(bio, bdev);
> - bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
> + bio_set_op_attrs(bio, op, 0);
>
> sector += zone_sectors;
>
> @@ -264,7 +269,7 @@ int blkdev_reset_zones(struct block_device *bdev,
>
> return ret;
> }
> -EXPORT_SYMBOL_GPL(blkdev_reset_zones);
> +EXPORT_SYMBOL_GPL(blkdev_zones_mgmt_op);
>
> /*
> * BLKREPORTZONE ioctl processing.
> @@ -329,15 +334,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
> }
>
> /*
> - * BLKRESETZONE ioctl processing.
> + * Zone operation (open, close, finish or reset) ioctl processing.
> * Called from blkdev_ioctl.
> */
> -int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
> - unsigned int cmd, unsigned long arg)
> +int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
> + unsigned int cmd, unsigned long arg)
> {
> void __user *argp = (void __user *)arg;
> struct request_queue *q;
> struct blk_zone_range zrange;
> + enum req_opf op;
>
> if (!argp)
> return -EINVAL;
> @@ -358,8 +364,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
> if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
> return -EFAULT;
>
> - return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
> - GFP_KERNEL);
> + switch (cmd) {
> + case BLKRESETZONE:
> + op = REQ_OP_ZONE_RESET;
> + break;
> + case BLKOPENZONE:
> + op = REQ_OP_ZONE_OPEN;
> + break;
> + case BLKCLOSEZONE:
> + op = REQ_OP_ZONE_CLOSE;
> + break;
> + case BLKFINISHZONE:
> + op = REQ_OP_ZONE_FINISH;
> + break;
> + default:
> + return -ENOTTY;
> + }
> +
> + return blkdev_zones_mgmt_op(bdev, op, zrange.sector, zrange.nr_sectors,
> + GFP_KERNEL);
> }
>
> static inline unsigned long *blk_alloc_zone_bitmap(int node,
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 15a0eb80ada9..df7fe54db158 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
> case BLKREPORTZONE:
> return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
> case BLKRESETZONE:
> - return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
> + case BLKOPENZONE:
> + case BLKCLOSEZONE:
> + case BLKFINISHZONE:
> + return blkdev_zones_mgmt_op_ioctl(bdev, mode, cmd, arg);
> case BLKGETZONESZ:
> return put_uint(arg, bdev_zone_sectors(bdev));
> case BLKGETNRZONES:
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 95202f80676c..067ef9242275 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -284,13 +284,20 @@ enum req_opf {
> REQ_OP_DISCARD = 3,
> /* securely erase sectors */
> REQ_OP_SECURE_ERASE = 5,
> - /* reset a zone write pointer */
> - REQ_OP_ZONE_RESET = 6,
> /* write the same sector many times */
> REQ_OP_WRITE_SAME = 7,
> /* write the zero filled sector many times */
> REQ_OP_WRITE_ZEROES = 9,
>
> + /* reset a zone write pointer */
> + REQ_OP_ZONE_RESET = 16,
> + /* Open zone(s) */
> + REQ_OP_ZONE_OPEN = 17,
> + /* Close zone(s) */
> + REQ_OP_ZONE_CLOSE = 18,
> + /* Finish zone(s) */
> + REQ_OP_ZONE_FINISH = 19,
> +
> /* SCSI passthrough using struct scsi_request */
> REQ_OP_SCSI_IN = 32,
> REQ_OP_SCSI_OUT = 33,
> @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
> bio->bi_opf = op | op_flags;
> }
>
> +/*
> + * Check if the op is zoned operation.
Check if op is a zone management operation.
> + */
> +static inline bool op_is_zone_mgmt_op(enum req_opf op)
Similarly to "op_is_write" pattern, "op_is_zone_mgmt" may be a better name.
> +{
> + switch (op) {
> + case REQ_OP_ZONE_RESET:
> + case REQ_OP_ZONE_OPEN:
> + case REQ_OP_ZONE_CLOSE:
> + case REQ_OP_ZONE_FINISH:
> + return true;
> + default:
> + return false;
> + }
> +}
> +
> static inline bool op_is_write(unsigned int op)
> {
> return (op & 1);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 592669bcc536..943084f9dc9c 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -348,14 +348,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev);
> extern int blkdev_report_zones(struct block_device *bdev,
> sector_t sector, struct blk_zone *zones,
> unsigned int *nr_zones, gfp_t gfp_mask);
> -extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
> - sector_t nr_sectors, gfp_t gfp_mask);
> extern int blk_revalidate_disk_zones(struct gendisk *disk);
>
> extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
> unsigned int cmd, unsigned long arg);
> -extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
> - unsigned int cmd, unsigned long arg);
> +extern int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
> + unsigned int cmd, unsigned long arg);
> +extern int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask);
To keep the grouping of declarations, may be declare this one where
blkdev_reset_zones() was ?
>
> #else /* CONFIG_BLK_DEV_ZONED */
>
> @@ -376,15 +377,57 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
> return -ENOTTY;
> }
>
> -static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
> - fmode_t mode, unsigned int cmd,
> - unsigned long arg)
> +static inline int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev,
> + fmode_t mode, unsigned int cmd,
> + unsigned long arg)
> +{
> + return -ENOTTY;
> +}
> +
> +static inline int blkdev_zones_mgmt_op(struct block_device *bdev,
> + enum req_opf op,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> {
> return -ENOTTY;
That should be -ENOTSUPP. This is not an ioctl. The ioctl call is above this one.
> }
>
> #endif /* CONFIG_BLK_DEV_ZONED */
>
> +static inline int blkdev_reset_zones(struct block_device *bdev,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> +{
> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_RESET,
> + sector, nr_sectors, gfp_mask);
> +}
> +
> +static inline int blkdev_open_zones(struct block_device *bdev,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> +{
> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_OPEN,
> + sector, nr_sectors, gfp_mask);
> +}
> +
> +static inline int blkdev_close_zones(struct block_device *bdev,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> +{
> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_CLOSE,
> + sector, nr_sectors,
> + gfp_mask);
> +}
> +
> +static inline int blkdev_finish_zones(struct block_device *bdev,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> +{
> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_FINISH,
> + sector, nr_sectors,
> + gfp_mask);
> +}
> +
> struct request_queue {
> /*
> * Together with queue_head for cacheline sharing
> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
> index 498eec813494..701e0692b8d3 100644
> --- a/include/uapi/linux/blkzoned.h
> +++ b/include/uapi/linux/blkzoned.h
> @@ -120,9 +120,11 @@ struct blk_zone_report {
> };
>
> /**
> - * struct blk_zone_range - BLKRESETZONE ioctl request
> - * @sector: starting sector of the first zone to issue reset write pointer
> - * @nr_sectors: Total number of sectors of 1 or more zones to reset
> + * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/
> + * BLKCLOSEZONE/BLKFINISHZONE ioctl
> + * request
> + * @sector: starting sector of the first zone to operate on
> + * @nr_sectors: Total number of sectors of all zones to operate on
> */
> struct blk_zone_range {
> __u64 sector;
> @@ -139,10 +141,19 @@ struct blk_zone_range {
> * sector range. The sector range must be zone aligned.
> * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
> * @BLKGETNRZONES: Get the total number of zones of the device.
> + * @BLKOPENZONE: Open the zones in the specified sector range. The
> + * sector range must be zone aligned.
> + * @BLKCLOSEZONE: Close the zones in the specified sector range. The
> + * sector range must be zone aligned.
> + * @BLKFINISHZONE: Finish the zones in the specified sector range. The
> + * sector range must be zone aligned.
> */
> #define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report)
> #define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range)
> #define BLKGETZONESZ _IOR(0x12, 132, __u32)
> #define BLKGETNRZONES _IOR(0x12, 133, __u32)
> +#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
> +#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
> +#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
>
> #endif /* _UAPI_BLKZONED_H */
>
--
Damien Le Moal
Western Digital Research
On 2019/06/21 22:07, Matias Bj?rling wrote:
> From: Ajay Joshi <[email protected]>
>
> Implement REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH
> support to allow explicit control of zone states.
>
> Signed-off-by: Ajay Joshi <[email protected]>
> Signed-off-by: Matias Bj?rling <[email protected]>
> ---
> drivers/block/null_blk.h | 4 ++--
> drivers/block/null_blk_main.c | 13 ++++++++++---
> drivers/block/null_blk_zoned.c | 33 ++++++++++++++++++++++++++++++---
> 3 files changed, 42 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
> index 34b22d6523ba..62ef65cb0f3e 100644
> --- a/drivers/block/null_blk.h
> +++ b/drivers/block/null_blk.h
> @@ -93,7 +93,7 @@ int null_zone_report(struct gendisk *disk, sector_t sector,
> gfp_t gfp_mask);
> void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
> unsigned int nr_sectors);
> -void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
> +void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector);
> #else
> static inline int null_zone_init(struct nullb_device *dev)
> {
> @@ -111,6 +111,6 @@ static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
> unsigned int nr_sectors)
> {
> }
> -static inline void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) {}
> +static inline void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector) {}
> #endif /* CONFIG_BLK_DEV_ZONED */
> #endif /* __NULL_BLK_H */
> diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
> index 447d635c79a2..5058fb980c9c 100644
> --- a/drivers/block/null_blk_main.c
> +++ b/drivers/block/null_blk_main.c
> @@ -1209,10 +1209,17 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
> nr_sectors = blk_rq_sectors(cmd->rq);
> }
>
> - if (op == REQ_OP_WRITE)
> + switch (op) {
> + case REQ_OP_WRITE:
> null_zone_write(cmd, sector, nr_sectors);
> - else if (op == REQ_OP_ZONE_RESET)
> - null_zone_reset(cmd, sector);
> + break;
> + case REQ_OP_ZONE_RESET:
> + case REQ_OP_ZONE_OPEN:
> + case REQ_OP_ZONE_CLOSE:
> + case REQ_OP_ZONE_FINISH:
> + null_zone_mgmt_op(cmd, sector);
> + break;
> + }
> }
> out:
> /* Complete IO by inline, softirq or timer */
> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
> index fca0c97ff1aa..47d956b2e148 100644
> --- a/drivers/block/null_blk_zoned.c
> +++ b/drivers/block/null_blk_zoned.c
> @@ -121,17 +121,44 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
> }
> }
>
> -void null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
> +void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector)
> {
> struct nullb_device *dev = cmd->nq->dev;
> unsigned int zno = null_zone_no(dev, sector);
> struct blk_zone *zone = &dev->zones[zno];
> + enum req_opf op = req_op(cmd->rq);
>
> if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
> cmd->error = BLK_STS_IOERR;
> return;
> }
>
> - zone->cond = BLK_ZONE_COND_EMPTY;
> - zone->wp = zone->start;
> + switch (op) {
> + case REQ_OP_ZONE_RESET:
> + zone->cond = BLK_ZONE_COND_EMPTY;
> + zone->wp = zone->start;
> + return;
> + case REQ_OP_ZONE_OPEN:
> + if (zone->cond == BLK_ZONE_COND_FULL) {
> + cmd->error = BLK_STS_IOERR;
> + return;
> + }
> + zone->cond = BLK_ZONE_COND_EXP_OPEN;
With ZBC, open of a full zone is a "nop". No error. So I would rather have this as:
if (zone->cond != BLK_ZONE_COND_FULL)
zone->cond = BLK_ZONE_COND_EXP_OPEN;
> + return;
> + case REQ_OP_ZONE_CLOSE:
> + if (zone->cond == BLK_ZONE_COND_FULL) {
> + cmd->error = BLK_STS_IOERR;
> + return;
> + }
> + zone->cond = BLK_ZONE_COND_CLOSED;
Sam as for open. Closing a full zone on ZBC is a nop. And the code above would
also set an empty zone to closed. Finally, if the zone is open but nothing was
written to it, it must be returned to empty condition, not closed. So something
like this is needed.
switch (zone->cond) {
case BLK_ZONE_COND_FULL:
case BLK_ZONE_COND_EMPTY:
break;
case BLK_ZONE_COND_EXP_OPEN:
if (zone->wp == zone->start) {
zone->cond = BLK_ZONE_COND_EMPTY;
break;
}
/* fallthrough */
default:
zone->cond = BLK_ZONE_COND_CLOSED;
}
> + return;
> + case REQ_OP_ZONE_FINISH:
> + zone->cond = BLK_ZONE_COND_FULL;
> + zone->wp = zone->start + zone->len;
> + return;
> + default:
> + /* Invalid zone condition */
> + cmd->error = BLK_STS_IOERR;
> + return;
> + }
> }
>
--
Damien Le Moal
Western Digital Research
On 2019/06/21 22:07, Matias Bj?rling wrote:
> From: Ajay Joshi <[email protected]>
>
> Implement REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH
> support to allow explicit control of zone states.
>
> Signed-off-by: Ajay Joshi <[email protected]>
> ---
> drivers/scsi/sd.c | 15 ++++++++++++++-
> drivers/scsi/sd.h | 6 ++++--
> drivers/scsi/sd_zbc.c | 18 +++++++++++++-----
> 3 files changed, 31 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index a3406bd62391..89f955a01d44 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -1292,7 +1292,17 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
> case REQ_OP_WRITE:
> return sd_setup_read_write_cmnd(cmd);
> case REQ_OP_ZONE_RESET:
> - return sd_zbc_setup_reset_cmnd(cmd);
> + return sd_zbc_setup_zone_mgmt_op_cmnd(cmd,
> + ZO_RESET_WRITE_POINTER);
> + case REQ_OP_ZONE_OPEN:
> + return sd_zbc_setup_zone_mgmt_op_cmnd(cmd,
> + ZO_OPEN_ZONE);
> + case REQ_OP_ZONE_CLOSE:
> + return sd_zbc_setup_zone_mgmt_op_cmnd(cmd,
> + ZO_CLOSE_ZONE);
> + case REQ_OP_ZONE_FINISH:
> + return sd_zbc_setup_zone_mgmt_op_cmnd(cmd,
> + ZO_FINISH_ZONE);
> default:
> WARN_ON_ONCE(1);
> return BLK_STS_NOTSUPP;
> @@ -1958,6 +1968,9 @@ static int sd_done(struct scsi_cmnd *SCpnt)
> case REQ_OP_WRITE_ZEROES:
> case REQ_OP_WRITE_SAME:
> case REQ_OP_ZONE_RESET:
> + case REQ_OP_ZONE_OPEN:
> + case REQ_OP_ZONE_CLOSE:
> + case REQ_OP_ZONE_FINISH:
> if (!result) {
> good_bytes = blk_rq_bytes(req);
> scsi_set_resid(SCpnt, 0);
> diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
> index 5796ace76225..9a20633caefa 100644
> --- a/drivers/scsi/sd.h
> +++ b/drivers/scsi/sd.h
> @@ -209,7 +209,8 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
>
> extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
> extern void sd_zbc_print_zones(struct scsi_disk *sdkp);
> -extern blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd);
> +extern blk_status_t sd_zbc_setup_zone_mgmt_op_cmnd(struct scsi_cmnd *cmd,
> + unsigned char op);
In ZBC specs, open, close, finish and reset command are all ZBC_OUT (94h)
commands with a different servie action. So may be renaming this function to
sd_zbc_setup_zbc_out_cmnd() is better.
> extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
> struct scsi_sense_hdr *sshdr);
> extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
> @@ -226,7 +227,8 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
>
> static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {}
>
> -static inline blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
> +static inline blk_status_t sd_zbc_setup_zone_mgmt_op_cmnd(struct scsi_cmnd *cmd,
> + unsigned char op)
> {
> return BLK_STS_TARGET;
> }
> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
> index 7334024b64f1..41020db5353a 100644
> --- a/drivers/scsi/sd_zbc.c
> +++ b/drivers/scsi/sd_zbc.c
> @@ -168,12 +168,17 @@ static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp)
> }
>
> /**
> - * sd_zbc_setup_reset_cmnd - Prepare a RESET WRITE POINTER scsi command.
> - * @cmd: the command to setup
> + * sd_zbc_setup_zone_mgmt_op_cmnd - Prepare a zone ZBC_OUT command. The
> + * operations can be RESET WRITE POINTER,
> + * OPEN, CLOSE or FINISH.
> + * @cmd: The command to setup
> + * @op: Operation to be performed
> *
> - * Called from sd_init_command() for a REQ_OP_ZONE_RESET request.
> + * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_OPEN,
> + * REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests.
> */
> -blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
> +blk_status_t sd_zbc_setup_zone_mgmt_op_cmnd(struct scsi_cmnd *cmd,
> + unsigned char op)
> {
> struct request *rq = cmd->request;
> struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
> @@ -194,7 +199,7 @@ blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
> cmd->cmd_len = 16;
> memset(cmd->cmnd, 0, cmd->cmd_len);
> cmd->cmnd[0] = ZBC_OUT;
> - cmd->cmnd[1] = ZO_RESET_WRITE_POINTER;
> + cmd->cmnd[1] = op;
> put_unaligned_be64(block, &cmd->cmnd[2]);
>
> rq->timeout = SD_TIMEOUT;
> @@ -222,6 +227,9 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
>
> switch (req_op(rq)) {
> case REQ_OP_ZONE_RESET:
> + case REQ_OP_ZONE_OPEN:
> + case REQ_OP_ZONE_CLOSE:
> + case REQ_OP_ZONE_FINISH:
>
> if (result &&
> sshdr->sense_key == ILLEGAL_REQUEST &&
The comment after this code references only the reset operation. So it needs to
be updated. The same comment applies to all operations as they all have the same
error return behavior.
--
Damien Le Moal
Western Digital Research
On 2019/06/21 22:07, Matias Bj?rling wrote:
> From: Ajay Joshi <[email protected]>
>
> Implement REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH
> support to allow explicit control of zone states.
>
> Signed-off-by: Ajay Joshi <[email protected]>
> ---
> drivers/md/dm-flakey.c | 7 +++----
> drivers/md/dm-linear.c | 2 +-
> drivers/md/dm.c | 5 +++--
> include/linux/blk_types.h | 8 ++++++++
> 4 files changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
> index a9bc518156f2..fff529c0732c 100644
> --- a/drivers/md/dm-flakey.c
> +++ b/drivers/md/dm-flakey.c
> @@ -280,7 +280,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
> struct flakey_c *fc = ti->private;
>
> bio_set_dev(bio, fc->dev->bdev);
> - if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
> + if (bio_sectors(bio) || bio_is_zone_mgmt_op(bio))
> bio->bi_iter.bi_sector =
> flakey_map_sector(ti, bio->bi_iter.bi_sector);
> }
> @@ -322,8 +322,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
> struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
> pb->bio_submitted = false;
>
> - /* Do not fail reset zone */
> - if (bio_op(bio) == REQ_OP_ZONE_RESET)
> + if (bio_is_zone_mgmt_op(bio))
> goto map_bio;
>
> /* Are we alive ? */
> @@ -384,7 +383,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
> struct flakey_c *fc = ti->private;
> struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
>
> - if (bio_op(bio) == REQ_OP_ZONE_RESET)
> + if (bio_is_zone_mgmt_op(bio))
> return DM_ENDIO_DONE;
>
> if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
> diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
> index ad980a38fb1e..217a1dee8197 100644
> --- a/drivers/md/dm-linear.c
> +++ b/drivers/md/dm-linear.c
> @@ -90,7 +90,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
> struct linear_c *lc = ti->private;
>
> bio_set_dev(bio, lc->dev->bdev);
> - if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
> + if (bio_sectors(bio) || bio_is_zone_mgmt_op(bio))
> bio->bi_iter.bi_sector =
> linear_map_sector(ti, bio->bi_iter.bi_sector);
> }
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 5475081dcbd6..f4507ec20a57 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -1176,7 +1176,8 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
>
> /*
> * A target may call dm_accept_partial_bio only from the map routine. It is
> - * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET.
> + * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET,
> + * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH.
> *
> * dm_accept_partial_bio informs the dm that the target only wants to process
> * additional n_sectors sectors of the bio and the rest of the data should be
> @@ -1629,7 +1630,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
> ci.sector_count = 0;
> error = __send_empty_flush(&ci);
> /* dec_pending submits any data associated with flush */
> - } else if (bio_op(bio) == REQ_OP_ZONE_RESET) {
> + } else if (bio_is_zone_mgmt_op(bio)) {
> ci.bio = bio;
> ci.sector_count = 0;
> error = __split_and_process_non_flush(&ci);
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 067ef9242275..fd2458cd1a49 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -398,6 +398,14 @@ static inline bool op_is_zone_mgmt_op(enum req_opf op)
> }
> }
>
> +/*
> + * Check if the bio is zoned operation.
> + */
> +static inline bool bio_is_zone_mgmt_op(struct bio *bio)
> +{
> + return op_is_zone_mgmt_op(bio_op(bio));
> +}
> +
> static inline bool op_is_write(unsigned int op)
> {
> return (op & 1);
>
Looks good.
Reviewed-by: Damien Le Moal <[email protected]>
--
Damien Le Moal
Western Digital Research
On 19-06-21 15:07:08, Matias Bjørling wrote:
> @@ -226,6 +228,9 @@ int blkdev_reset_zones(struct block_device *bdev,
> if (!blk_queue_is_zoned(q))
> return -EOPNOTSUPP;
>
> + if (!op_is_zone_mgmt_op(op))
> + return -EOPNOTSUPP;
> +
nitpick: -EINVAL looks better to return as Damien pointed out.
Otherwise it looks good to me:
Reviewed-by: Minwoo Im <[email protected]>
On 6/22/19 2:51 AM, Damien Le Moal wrote:
> Matias,
>
> Some comments inline below.
>
> On 2019/06/21 22:07, Matias Bjørling wrote:
>> From: Ajay Joshi <[email protected]>
>>
>> Zoned block devices allows one to control zone transitions by using
>> explicit commands. The available transitions are:
>>
>> * Open zone: Transition a zone to open state.
>> * Close zone: Transition a zone to closed state.
>> * Finish zone: Transition a zone to full state.
>>
>> Allow kernel to issue these transitions by introducing
>> blkdev_zones_mgmt_op() and add three new request opcodes:
>>
>> * REQ_IO_ZONE_OPEN, REQ_IO_ZONE_CLOSE, and REQ_OP_ZONE_FINISH
>>
>> Allow user-space to issue the transitions through the following ioctls:
>>
>> * BLKOPENZONE, BLKCLOSEZONE, and BLKFINISHZONE.
>>
>> Signed-off-by: Ajay Joshi <[email protected]>
>> Signed-off-by: Aravind Ramesh <[email protected]>
>> Signed-off-by: Matias Bjørling <[email protected]>
>> ---
>> block/blk-core.c | 3 ++
>> block/blk-zoned.c | 51 ++++++++++++++++++++++---------
>> block/ioctl.c | 5 ++-
>> include/linux/blk_types.h | 27 +++++++++++++++--
>> include/linux/blkdev.h | 57 ++++++++++++++++++++++++++++++-----
>> include/uapi/linux/blkzoned.h | 17 +++++++++--
>> 6 files changed, 133 insertions(+), 27 deletions(-)
>>
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 8340f69670d8..c0f0dbad548d 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -897,6 +897,9 @@ generic_make_request_checks(struct bio *bio)
>> goto not_supported;
>> break;
>> case REQ_OP_ZONE_RESET:
>> + case REQ_OP_ZONE_OPEN:
>> + case REQ_OP_ZONE_CLOSE:
>> + case REQ_OP_ZONE_FINISH:
>> if (!blk_queue_is_zoned(q))
>> goto not_supported;
>> break;
>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>> index ae7e91bd0618..d0c933593b93 100644
>> --- a/block/blk-zoned.c
>> +++ b/block/blk-zoned.c
>> @@ -201,20 +201,22 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>> EXPORT_SYMBOL_GPL(blkdev_report_zones);
>>
>> /**
>> - * blkdev_reset_zones - Reset zones write pointer
>> + * blkdev_zones_mgmt_op - Perform the specified operation on the zone(s)
>> * @bdev: Target block device
>> - * @sector: Start sector of the first zone to reset
>> - * @nr_sectors: Number of sectors, at least the length of one zone
>> + * @op: Operation to be performed on the zone(s)
>> + * @sector: Start sector of the first zone to operate on
>> + * @nr_sectors: Number of sectors, at least the length of one zone and
>> + * must be zone size aligned.
>> * @gfp_mask: Memory allocation flags (for bio_alloc)
>> *
>> * Description:
>> - * Reset the write pointer of the zones contained in the range
>> + * Perform the specified operation contained in the range
> Perform the specified operation over the sector range
>> * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
>> * is valid, but the specified range should not contain conventional zones.
>> */
>> -int blkdev_reset_zones(struct block_device *bdev,
>> - sector_t sector, sector_t nr_sectors,
>> - gfp_t gfp_mask)
>> +int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> {
>> struct request_queue *q = bdev_get_queue(bdev);
>> sector_t zone_sectors;
>> @@ -226,6 +228,9 @@ int blkdev_reset_zones(struct block_device *bdev,
>> if (!blk_queue_is_zoned(q))
>> return -EOPNOTSUPP;
>>
>> + if (!op_is_zone_mgmt_op(op))
>> + return -EOPNOTSUPP;
>
> EINVAL may be better here.
>
>> +
>> if (bdev_read_only(bdev))
>> return -EPERM;
>>
>> @@ -248,7 +253,7 @@ int blkdev_reset_zones(struct block_device *bdev,
>> bio = blk_next_bio(bio, 0, gfp_mask);
>> bio->bi_iter.bi_sector = sector;
>> bio_set_dev(bio, bdev);
>> - bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
>> + bio_set_op_attrs(bio, op, 0);
>>
>> sector += zone_sectors;
>>
>> @@ -264,7 +269,7 @@ int blkdev_reset_zones(struct block_device *bdev,
>>
>> return ret;
>> }
>> -EXPORT_SYMBOL_GPL(blkdev_reset_zones);
>> +EXPORT_SYMBOL_GPL(blkdev_zones_mgmt_op);
>>
>> /*
>> * BLKREPORTZONE ioctl processing.
>> @@ -329,15 +334,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
>> }
>>
>> /*
>> - * BLKRESETZONE ioctl processing.
>> + * Zone operation (open, close, finish or reset) ioctl processing.
>> * Called from blkdev_ioctl.
>> */
>> -int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
>> - unsigned int cmd, unsigned long arg)
>> +int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
>> + unsigned int cmd, unsigned long arg)
>> {
>> void __user *argp = (void __user *)arg;
>> struct request_queue *q;
>> struct blk_zone_range zrange;
>> + enum req_opf op;
>>
>> if (!argp)
>> return -EINVAL;
>> @@ -358,8 +364,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
>> if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
>> return -EFAULT;
>>
>> - return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
>> - GFP_KERNEL);
>> + switch (cmd) {
>> + case BLKRESETZONE:
>> + op = REQ_OP_ZONE_RESET;
>> + break;
>> + case BLKOPENZONE:
>> + op = REQ_OP_ZONE_OPEN;
>> + break;
>> + case BLKCLOSEZONE:
>> + op = REQ_OP_ZONE_CLOSE;
>> + break;
>> + case BLKFINISHZONE:
>> + op = REQ_OP_ZONE_FINISH;
>> + break;
>> + default:
>> + return -ENOTTY;
>> + }
>> +
>> + return blkdev_zones_mgmt_op(bdev, op, zrange.sector, zrange.nr_sectors,
>> + GFP_KERNEL);
>> }
>>
>> static inline unsigned long *blk_alloc_zone_bitmap(int node,
>> diff --git a/block/ioctl.c b/block/ioctl.c
>> index 15a0eb80ada9..df7fe54db158 100644
>> --- a/block/ioctl.c
>> +++ b/block/ioctl.c
>> @@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
>> case BLKREPORTZONE:
>> return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
>> case BLKRESETZONE:
>> - return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
>> + case BLKOPENZONE:
>> + case BLKCLOSEZONE:
>> + case BLKFINISHZONE:
>> + return blkdev_zones_mgmt_op_ioctl(bdev, mode, cmd, arg);
>> case BLKGETZONESZ:
>> return put_uint(arg, bdev_zone_sectors(bdev));
>> case BLKGETNRZONES:
>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>> index 95202f80676c..067ef9242275 100644
>> --- a/include/linux/blk_types.h
>> +++ b/include/linux/blk_types.h
>> @@ -284,13 +284,20 @@ enum req_opf {
>> REQ_OP_DISCARD = 3,
>> /* securely erase sectors */
>> REQ_OP_SECURE_ERASE = 5,
>> - /* reset a zone write pointer */
>> - REQ_OP_ZONE_RESET = 6,
>> /* write the same sector many times */
>> REQ_OP_WRITE_SAME = 7,
>> /* write the zero filled sector many times */
>> REQ_OP_WRITE_ZEROES = 9,
>>
>> + /* reset a zone write pointer */
>> + REQ_OP_ZONE_RESET = 16,
>> + /* Open zone(s) */
>> + REQ_OP_ZONE_OPEN = 17,
>> + /* Close zone(s) */
>> + REQ_OP_ZONE_CLOSE = 18,
>> + /* Finish zone(s) */
>> + REQ_OP_ZONE_FINISH = 19,
>> +
>> /* SCSI passthrough using struct scsi_request */
>> REQ_OP_SCSI_IN = 32,
>> REQ_OP_SCSI_OUT = 33,
>> @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
>> bio->bi_opf = op | op_flags;
>> }
>>
>> +/*
>> + * Check if the op is zoned operation.
> Check if op is a zone management operation.
>> + */
>> +static inline bool op_is_zone_mgmt_op(enum req_opf op)
>
> Similarly to "op_is_write" pattern, "op_is_zone_mgmt" may be a better name.
>
>> +{
>> + switch (op) {
>> + case REQ_OP_ZONE_RESET:
>> + case REQ_OP_ZONE_OPEN:
>> + case REQ_OP_ZONE_CLOSE:
>> + case REQ_OP_ZONE_FINISH:
>> + return true;
>> + default:
>> + return false;
>> + }
>> +}
>> +
>> static inline bool op_is_write(unsigned int op)
>> {
>> return (op & 1);
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 592669bcc536..943084f9dc9c 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -348,14 +348,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev);
>> extern int blkdev_report_zones(struct block_device *bdev,
>> sector_t sector, struct blk_zone *zones,
>> unsigned int *nr_zones, gfp_t gfp_mask);
>> -extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
>> - sector_t nr_sectors, gfp_t gfp_mask);
>> extern int blk_revalidate_disk_zones(struct gendisk *disk);
>>
>> extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
>> unsigned int cmd, unsigned long arg);
>> -extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
>> - unsigned int cmd, unsigned long arg);
>> +extern int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
>> + unsigned int cmd, unsigned long arg);
>> +extern int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask);
>
> To keep the grouping of declarations, may be declare this one where
> blkdev_reset_zones() was ?
>
>>
>> #else /* CONFIG_BLK_DEV_ZONED */
>>
>> @@ -376,15 +377,57 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
>> return -ENOTTY;
>> }
>>
>> -static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
>> - fmode_t mode, unsigned int cmd,
>> - unsigned long arg)
>> +static inline int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev,
>> + fmode_t mode, unsigned int cmd,
>> + unsigned long arg)
>> +{
>> + return -ENOTTY;
>> +}
>> +
>> +static inline int blkdev_zones_mgmt_op(struct block_device *bdev,
>> + enum req_opf op,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> {
>> return -ENOTTY;
>
> That should be -ENOTSUPP. This is not an ioctl. The ioctl call is above this one.
>
>> }
>>
>> #endif /* CONFIG_BLK_DEV_ZONED */
>>
>> +static inline int blkdev_reset_zones(struct block_device *bdev,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> +{
>> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_RESET,
>> + sector, nr_sectors, gfp_mask);
>> +}
>> +
>> +static inline int blkdev_open_zones(struct block_device *bdev,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> +{
>> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_OPEN,
>> + sector, nr_sectors, gfp_mask);
>> +}
>> +
>> +static inline int blkdev_close_zones(struct block_device *bdev,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> +{
>> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_CLOSE,
>> + sector, nr_sectors,
>> + gfp_mask);
>> +}
>> +
>> +static inline int blkdev_finish_zones(struct block_device *bdev,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> +{
>> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_FINISH,
>> + sector, nr_sectors,
>> + gfp_mask);
>> +}
>> +
>> struct request_queue {
>> /*
>> * Together with queue_head for cacheline sharing
>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>> index 498eec813494..701e0692b8d3 100644
>> --- a/include/uapi/linux/blkzoned.h
>> +++ b/include/uapi/linux/blkzoned.h
>> @@ -120,9 +120,11 @@ struct blk_zone_report {
>> };
>>
>> /**
>> - * struct blk_zone_range - BLKRESETZONE ioctl request
>> - * @sector: starting sector of the first zone to issue reset write pointer
>> - * @nr_sectors: Total number of sectors of 1 or more zones to reset
>> + * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/
>> + * BLKCLOSEZONE/BLKFINISHZONE ioctl
>> + * request
>> + * @sector: starting sector of the first zone to operate on
>> + * @nr_sectors: Total number of sectors of all zones to operate on
>> */
>> struct blk_zone_range {
>> __u64 sector;
>> @@ -139,10 +141,19 @@ struct blk_zone_range {
>> * sector range. The sector range must be zone aligned.
>> * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
>> * @BLKGETNRZONES: Get the total number of zones of the device.
>> + * @BLKOPENZONE: Open the zones in the specified sector range. The
>> + * sector range must be zone aligned.
>> + * @BLKCLOSEZONE: Close the zones in the specified sector range. The
>> + * sector range must be zone aligned.
>> + * @BLKFINISHZONE: Finish the zones in the specified sector range. The
>> + * sector range must be zone aligned.
>> */
>> #define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report)
>> #define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range)
>> #define BLKGETZONESZ _IOR(0x12, 132, __u32)
>> #define BLKGETNRZONES _IOR(0x12, 133, __u32)
>> +#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
>> +#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
>> +#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
>>
>> #endif /* _UAPI_BLKZONED_H */
>>
>
>
Thanks Damien.
I was wondering if we should, instead of introducing three new ioctls,
make a v2 of the zone mgmt API?
Something like the following data structure being passed from user-space:
struct blk_zone_mgmt {
__u8 opcode;
__u8 resv[3];
__u32 flags;
__u64 sector;
__u64 nr_sectors;
__u64 resv1; /* to make room if we where do pass a data
data pointer through this API */
};
-Matias
On 6/21/19 6:07 AM, Matias Bjørling wrote:
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 95202f80676c..067ef9242275 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -284,13 +284,20 @@ enum req_opf {
> REQ_OP_DISCARD = 3,
> /* securely erase sectors */
> REQ_OP_SECURE_ERASE = 5,
> - /* reset a zone write pointer */
> - REQ_OP_ZONE_RESET = 6,
> /* write the same sector many times */
> REQ_OP_WRITE_SAME = 7,
> /* write the zero filled sector many times */
> REQ_OP_WRITE_ZEROES = 9,
>
> + /* reset a zone write pointer */
> + REQ_OP_ZONE_RESET = 16,
> + /* Open zone(s) */
> + REQ_OP_ZONE_OPEN = 17,
> + /* Close zone(s) */
> + REQ_OP_ZONE_CLOSE = 18,
> + /* Finish zone(s) */
> + REQ_OP_ZONE_FINISH = 19,
> +
> /* SCSI passthrough using struct scsi_request */
> REQ_OP_SCSI_IN = 32,
> REQ_OP_SCSI_OUT = 33,
> @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
> bio->bi_opf = op | op_flags;
> }
Are the new operation types ever passed to op_is_write()? The definition
of that function is as follows:
static inline bool op_is_write(unsigned int op)
{
return (op & 1);
}
On 6/21/19 6:07 AM, Matias Bjørling wrote:
> + * @op: Operation to be performed
This description could be more clear.
Thanks,
Bart.
On 6/24/19 12:43 PM, Bart Van Assche wrote:
> On 6/21/19 6:07 AM, Matias Bj?rling wrote:
>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>> index 95202f80676c..067ef9242275 100644
>> --- a/include/linux/blk_types.h
>> +++ b/include/linux/blk_types.h
>> @@ -284,13 +284,20 @@ enum req_opf {
>> REQ_OP_DISCARD = 3,
>> /* securely erase sectors */
>> REQ_OP_SECURE_ERASE = 5,
>> - /* reset a zone write pointer */
>> - REQ_OP_ZONE_RESET = 6,
>> /* write the same sector many times */
>> REQ_OP_WRITE_SAME = 7,
>> /* write the zero filled sector many times */
>> REQ_OP_WRITE_ZEROES = 9,
>>
>> + /* reset a zone write pointer */
>> + REQ_OP_ZONE_RESET = 16,
>> + /* Open zone(s) */
>> + REQ_OP_ZONE_OPEN = 17,
>> + /* Close zone(s) */
>> + REQ_OP_ZONE_CLOSE = 18,
>> + /* Finish zone(s) */
>> + REQ_OP_ZONE_FINISH = 19,
>> +
>> /* SCSI passthrough using struct scsi_request */
>> REQ_OP_SCSI_IN = 32,
>> REQ_OP_SCSI_OUT = 33,
>> @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
>> bio->bi_opf = op | op_flags;
>> }
>
> Are the new operation types ever passed to op_is_write()? The definition
> of that function is as follows:
>
May be we should change numbering since blktrace also relies on the
having on_is_write() without looking at the rq_ops().
197 * Data direction bit lookup
198 */
199 static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
200 BLK_TC_ACT(BLK_TC_WRITE) }; <----
201
202 #define BLK_TC_RAHEAD BLK_TC_AHEAD
203 #define BLK_TC_PREFLUSH BLK_TC_FLUSH
204
205 /* The ilog2() calls fall out because they're constant */
206 #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
207 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ##
__name))
208
209 /*
210 * The worker for the various blk_add_trace*() types. Fills out a
211 * blk_io_trace structure and places it in a per-cpu subbuffer.
212 */
213 static void __blk_add_trace(struct blk_trace *bt, sector_t sector,
int bytes,
214 int op, int op_flags, u32 what, int error,
int pdu_len,
215 void *pdu_data, union kernfs_node_id *cgid)
216 {
217 struct task_struct *tsk = current;
218 struct ring_buffer_event *event = NULL;
219 struct ring_buffer *buffer = NULL;
220 struct blk_io_trace *t;
221 unsigned long flags = 0;
222 unsigned long *sequence;
223 pid_t pid;
224 int cpu, pc = 0;
225 bool blk_tracer = blk_tracer_enabled;
226 ssize_t cgid_len = cgid ? sizeof(*cgid) : 0;
227
228 if (unlikely(bt->trace_state != Blktrace_running &&
!blk_tracer))
229 return;
230
231 what |= ddir_act[op_is_write(op) ? WRITE : READ]; <---
> static inline bool op_is_write(unsigned int op)
> {
> return (op & 1);
> }
>
On 6/24/19 9:46 PM, Bart Van Assche wrote:
> On 6/21/19 6:07 AM, Matias Bjørling wrote:
>> + * @op: Operation to be performed
>
> This description could be more clear.
>
> Thanks,
>
> Bart.
Thanks Bart. I'll update it.
On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote:
> On 6/24/19 12:43 PM, Bart Van Assche wrote:
>> On 6/21/19 6:07 AM, Matias Bjørling wrote:
>>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>>> index 95202f80676c..067ef9242275 100644
>>> --- a/include/linux/blk_types.h
>>> +++ b/include/linux/blk_types.h
>>> @@ -284,13 +284,20 @@ enum req_opf {
>>> REQ_OP_DISCARD = 3,
>>> /* securely erase sectors */
>>> REQ_OP_SECURE_ERASE = 5,
>>> - /* reset a zone write pointer */
>>> - REQ_OP_ZONE_RESET = 6,
>>> /* write the same sector many times */
>>> REQ_OP_WRITE_SAME = 7,
>>> /* write the zero filled sector many times */
>>> REQ_OP_WRITE_ZEROES = 9,
>>>
>>> + /* reset a zone write pointer */
>>> + REQ_OP_ZONE_RESET = 16,
>>> + /* Open zone(s) */
>>> + REQ_OP_ZONE_OPEN = 17,
>>> + /* Close zone(s) */
>>> + REQ_OP_ZONE_CLOSE = 18,
>>> + /* Finish zone(s) */
>>> + REQ_OP_ZONE_FINISH = 19,
>>> +
>>> /* SCSI passthrough using struct scsi_request */
>>> REQ_OP_SCSI_IN = 32,
>>> REQ_OP_SCSI_OUT = 33,
>>> @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
>>> bio->bi_opf = op | op_flags;
>>> }
>>
>> Are the new operation types ever passed to op_is_write()? The definition
>> of that function is as follows:
>>
> May be we should change numbering since blktrace also relies on the
> having on_is_write() without looking at the rq_ops().
>
> 197 * Data direction bit lookup
> 198 */
> 199 static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
> 200 BLK_TC_ACT(BLK_TC_WRITE) }; <----
> 201
> 202 #define BLK_TC_RAHEAD BLK_TC_AHEAD
> 203 #define BLK_TC_PREFLUSH BLK_TC_FLUSH
> 204
> 205 /* The ilog2() calls fall out because they're constant */
> 206 #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
> 207 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ##
> __name))
> 208
> 209 /*
> 210 * The worker for the various blk_add_trace*() types. Fills out a
> 211 * blk_io_trace structure and places it in a per-cpu subbuffer.
> 212 */
> 213 static void __blk_add_trace(struct blk_trace *bt, sector_t sector,
> int bytes,
> 214 int op, int op_flags, u32 what, int error,
> int pdu_len,
> 215 void *pdu_data, union kernfs_node_id *cgid)
> 216 {
> 217 struct task_struct *tsk = current;
> 218 struct ring_buffer_event *event = NULL;
> 219 struct ring_buffer *buffer = NULL;
> 220 struct blk_io_trace *t;
> 221 unsigned long flags = 0;
> 222 unsigned long *sequence;
> 223 pid_t pid;
> 224 int cpu, pc = 0;
> 225 bool blk_tracer = blk_tracer_enabled;
> 226 ssize_t cgid_len = cgid ? sizeof(*cgid) : 0;
> 227
> 228 if (unlikely(bt->trace_state != Blktrace_running &&
> !blk_tracer))
> 229 return;
> 230
> 231 what |= ddir_act[op_is_write(op) ? WRITE : READ]; <---
>
>
>> static inline bool op_is_write(unsigned int op)
>> {
>> return (op & 1);
>> }
>>
>
The zone mgmt commands are neither write nor reads commands. I guess,
one could characterize them as write commands, but they don't write any
data, they update a state of a zone on a drive. One should keep it as
is? and make sure the zone mgmt commands don't get categorized as either
read/write.
On 6/22/19 3:02 AM, Damien Le Moal wrote:
> On 2019/06/21 22:07, Matias Bjørling wrote:
>> From: Ajay Joshi <[email protected]>
>>
>> Implement REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH
>> support to allow explicit control of zone states.
>>
>> Signed-off-by: Ajay Joshi <[email protected]>
>> Signed-off-by: Matias Bjørling <[email protected]>
>> ---
>> drivers/block/null_blk.h | 4 ++--
>> drivers/block/null_blk_main.c | 13 ++++++++++---
>> drivers/block/null_blk_zoned.c | 33 ++++++++++++++++++++++++++++++---
>> 3 files changed, 42 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
>> index 34b22d6523ba..62ef65cb0f3e 100644
>> --- a/drivers/block/null_blk.h
>> +++ b/drivers/block/null_blk.h
>> @@ -93,7 +93,7 @@ int null_zone_report(struct gendisk *disk, sector_t sector,
>> gfp_t gfp_mask);
>> void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>> unsigned int nr_sectors);
>> -void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
>> +void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector);
>> #else
>> static inline int null_zone_init(struct nullb_device *dev)
>> {
>> @@ -111,6 +111,6 @@ static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>> unsigned int nr_sectors)
>> {
>> }
>> -static inline void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) {}
>> +static inline void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector) {}
>> #endif /* CONFIG_BLK_DEV_ZONED */
>> #endif /* __NULL_BLK_H */
>> diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
>> index 447d635c79a2..5058fb980c9c 100644
>> --- a/drivers/block/null_blk_main.c
>> +++ b/drivers/block/null_blk_main.c
>> @@ -1209,10 +1209,17 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
>> nr_sectors = blk_rq_sectors(cmd->rq);
>> }
>>
>> - if (op == REQ_OP_WRITE)
>> + switch (op) {
>> + case REQ_OP_WRITE:
>> null_zone_write(cmd, sector, nr_sectors);
>> - else if (op == REQ_OP_ZONE_RESET)
>> - null_zone_reset(cmd, sector);
>> + break;
>> + case REQ_OP_ZONE_RESET:
>> + case REQ_OP_ZONE_OPEN:
>> + case REQ_OP_ZONE_CLOSE:
>> + case REQ_OP_ZONE_FINISH:
>> + null_zone_mgmt_op(cmd, sector);
>> + break;
>> + }
>> }
>> out:
>> /* Complete IO by inline, softirq or timer */
>> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
>> index fca0c97ff1aa..47d956b2e148 100644
>> --- a/drivers/block/null_blk_zoned.c
>> +++ b/drivers/block/null_blk_zoned.c
>> @@ -121,17 +121,44 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>> }
>> }
>>
>> -void null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
>> +void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector)
>> {
>> struct nullb_device *dev = cmd->nq->dev;
>> unsigned int zno = null_zone_no(dev, sector);
>> struct blk_zone *zone = &dev->zones[zno];
>> + enum req_opf op = req_op(cmd->rq);
>>
>> if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
>> cmd->error = BLK_STS_IOERR;
>> return;
>> }
>>
>> - zone->cond = BLK_ZONE_COND_EMPTY;
>> - zone->wp = zone->start;
>> + switch (op) {
>> + case REQ_OP_ZONE_RESET:
>> + zone->cond = BLK_ZONE_COND_EMPTY;
>> + zone->wp = zone->start;
>> + return;
>> + case REQ_OP_ZONE_OPEN:
>> + if (zone->cond == BLK_ZONE_COND_FULL) {
>> + cmd->error = BLK_STS_IOERR;
>> + return;
>> + }
>> + zone->cond = BLK_ZONE_COND_EXP_OPEN;
>
>
> With ZBC, open of a full zone is a "nop". No error. So I would rather have this as:
>
> if (zone->cond != BLK_ZONE_COND_FULL)
> zone->cond = BLK_ZONE_COND_EXP_OPEN;
>
Is this only ZBC? I can't find a reference to it in ZAC. I think it
should fail. One is trying to open a zone that is full, one can't open
it again. It's done for this round.
>
>> + return;
>> + case REQ_OP_ZONE_CLOSE:
>> + if (zone->cond == BLK_ZONE_COND_FULL) {
>> + cmd->error = BLK_STS_IOERR;
>> + return;
>> + }
>> + zone->cond = BLK_ZONE_COND_CLOSED;
>
> Sam as for open. Closing a full zone on ZBC is a nop.
I think this should cause error.
And the code above would
> also set an empty zone to closed. Finally, if the zone is open but nothing was
> written to it, it must be returned to empty condition, not closed.
Only on a reset event right? In general, if I do a expl. open, close it,
it should not go to empty.
So something
> like this is needed.
>
> switch (zone->cond) {
> case BLK_ZONE_COND_FULL:
> case BLK_ZONE_COND_EMPTY:
> break;
> case BLK_ZONE_COND_EXP_OPEN:
> if (zone->wp == zone->start) {
> zone->cond = BLK_ZONE_COND_EMPTY;
> break;
> }
> /* fallthrough */
> default:
> zone->cond = BLK_ZONE_COND_CLOSED;
> }
>
>> + return;
>> + case REQ_OP_ZONE_FINISH:
>> + zone->cond = BLK_ZONE_COND_FULL;
>> + zone->wp = zone->start + zone->len;
>> + return;
>> + default:
>> + /* Invalid zone condition */
>> + cmd->error = BLK_STS_IOERR;
>> + return;
>> + }
>> }
>>
>
>
On 2019/06/25 20:06, Matias Bj?rling wrote:
> On 6/22/19 3:02 AM, Damien Le Moal wrote:
>> On 2019/06/21 22:07, Matias Bj?rling wrote:
>>> From: Ajay Joshi <[email protected]>
>>>
>>> Implement REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH
>>> support to allow explicit control of zone states.
>>>
>>> Signed-off-by: Ajay Joshi <[email protected]>
>>> Signed-off-by: Matias Bj?rling <[email protected]>
>>> ---
>>> drivers/block/null_blk.h | 4 ++--
>>> drivers/block/null_blk_main.c | 13 ++++++++++---
>>> drivers/block/null_blk_zoned.c | 33 ++++++++++++++++++++++++++++++---
>>> 3 files changed, 42 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
>>> index 34b22d6523ba..62ef65cb0f3e 100644
>>> --- a/drivers/block/null_blk.h
>>> +++ b/drivers/block/null_blk.h
>>> @@ -93,7 +93,7 @@ int null_zone_report(struct gendisk *disk, sector_t sector,
>>> gfp_t gfp_mask);
>>> void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>>> unsigned int nr_sectors);
>>> -void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
>>> +void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector);
>>> #else
>>> static inline int null_zone_init(struct nullb_device *dev)
>>> {
>>> @@ -111,6 +111,6 @@ static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>>> unsigned int nr_sectors)
>>> {
>>> }
>>> -static inline void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) {}
>>> +static inline void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector) {}
>>> #endif /* CONFIG_BLK_DEV_ZONED */
>>> #endif /* __NULL_BLK_H */
>>> diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
>>> index 447d635c79a2..5058fb980c9c 100644
>>> --- a/drivers/block/null_blk_main.c
>>> +++ b/drivers/block/null_blk_main.c
>>> @@ -1209,10 +1209,17 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
>>> nr_sectors = blk_rq_sectors(cmd->rq);
>>> }
>>>
>>> - if (op == REQ_OP_WRITE)
>>> + switch (op) {
>>> + case REQ_OP_WRITE:
>>> null_zone_write(cmd, sector, nr_sectors);
>>> - else if (op == REQ_OP_ZONE_RESET)
>>> - null_zone_reset(cmd, sector);
>>> + break;
>>> + case REQ_OP_ZONE_RESET:
>>> + case REQ_OP_ZONE_OPEN:
>>> + case REQ_OP_ZONE_CLOSE:
>>> + case REQ_OP_ZONE_FINISH:
>>> + null_zone_mgmt_op(cmd, sector);
>>> + break;
>>> + }
>>> }
>>> out:
>>> /* Complete IO by inline, softirq or timer */
>>> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
>>> index fca0c97ff1aa..47d956b2e148 100644
>>> --- a/drivers/block/null_blk_zoned.c
>>> +++ b/drivers/block/null_blk_zoned.c
>>> @@ -121,17 +121,44 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>>> }
>>> }
>>>
>>> -void null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
>>> +void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector)
>>> {
>>> struct nullb_device *dev = cmd->nq->dev;
>>> unsigned int zno = null_zone_no(dev, sector);
>>> struct blk_zone *zone = &dev->zones[zno];
>>> + enum req_opf op = req_op(cmd->rq);
>>>
>>> if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
>>> cmd->error = BLK_STS_IOERR;
>>> return;
>>> }
>>>
>>> - zone->cond = BLK_ZONE_COND_EMPTY;
>>> - zone->wp = zone->start;
>>> + switch (op) {
>>> + case REQ_OP_ZONE_RESET:
>>> + zone->cond = BLK_ZONE_COND_EMPTY;
>>> + zone->wp = zone->start;
>>> + return;
>>> + case REQ_OP_ZONE_OPEN:
>>> + if (zone->cond == BLK_ZONE_COND_FULL) {
>>> + cmd->error = BLK_STS_IOERR;
>>> + return;
>>> + }
>>> + zone->cond = BLK_ZONE_COND_EXP_OPEN;
>>
>>
>> With ZBC, open of a full zone is a "nop". No error. So I would rather have this as:
>>
>> if (zone->cond != BLK_ZONE_COND_FULL)
>> zone->cond = BLK_ZONE_COND_EXP_OPEN;
>>
> Is this only ZBC? I can't find a reference to it in ZAC. I think it
> should fail. One is trying to open a zone that is full, one can't open
> it again. It's done for this round.
Page 52/53, section 5.2.6.3.2:
If the OPEN ALL bit is cleared to zero and the zone specified by the ZONE ID
field (see 5.2.4.3.3) is in Zone Condition:
a) EMPTY, IMPLICITLY OPENED, or CLOSED, then the device shall process an
Explicitly Open Zone function
(see 4.6.3.4.10) for the zone specified by the ZONE ID field;
b) EXPLICITLY OPENED or FULL, then the device shall:
A) not change the zone's state; and
B) return successful command completion;
>>
>>> + return;
>>> + case REQ_OP_ZONE_CLOSE:
>>> + if (zone->cond == BLK_ZONE_COND_FULL) {
>>> + cmd->error = BLK_STS_IOERR;
>>> + return;
>>> + }
>>> + zone->cond = BLK_ZONE_COND_CLOSED;
>>
>> Sam as for open. Closing a full zone on ZBC is a nop.
>
> I think this should cause error.
See ZAB/ZAC close command description. Same text as above, almost. Not an error.
It is a nop. ZAC page 48, section 5.2.4.3.2:
If the CLOSE ALL bit is cleared to zero and the zone specified by the ZONE ID
field (see 5.2.4.3.3) is in Zone Condition:
a) IMPLICITLY OPENED, or EXPLICITLY OPENED, then the device shall process a
Close Zone function
(see 4.6.3.4.11) for the zone specified by the ZONE ID field;
b) EMPTY, CLOSED, or FULL, then the device shall:
A) not change the zone's state; and
B) return successful command completion;
>
> And the code above would
>> also set an empty zone to closed. Finally, if the zone is open but nothing was
>> written to it, it must be returned to empty condition, not closed.
>
> Only on a reset event right? In general, if I do a expl. open, close it,
> it should not go to empty.
See the zone state machine. It does return to empty from expl open if nothing
was written, that is, if the WP is still at zone start. This text is in ZAC
section 4.6.3.4.11 as noted above:
For the specified zone, the Zone Condition state machine processing of this
function (e.g., as shown in the ZC2: Implicit_Open state (see 4.6.3.4.3))
results in the Zone Condition for the specified zone becoming:
a) EMPTY, if the write pointer indicates the lowest LBA in the zone and Non
Sequential Write Resources Active is false; or
b) CLOSED, if the write pointer does not indicate the lowest LBA in the zone or
Non-Sequential Write Resources Active is true.
>
> So something
>> like this is needed.
>>
>> switch (zone->cond) {
>> case BLK_ZONE_COND_FULL:
>> case BLK_ZONE_COND_EMPTY:
>> break;
>> case BLK_ZONE_COND_EXP_OPEN:
>> if (zone->wp == zone->start) {
>> zone->cond = BLK_ZONE_COND_EMPTY;
>> break;
>> }
>> /* fallthrough */
>> default:
>> zone->cond = BLK_ZONE_COND_CLOSED;
>> }
>>
>>> + return;
>>> + case REQ_OP_ZONE_FINISH:
>>> + zone->cond = BLK_ZONE_COND_FULL;
>>> + zone->wp = zone->start + zone->len;
>>> + return;
>>> + default:
>>> + /* Invalid zone condition */
>>> + cmd->error = BLK_STS_IOERR;
>>> + return;
>>> + }
>>> }
>>>
>>
>>
>
>
--
Damien Le Moal
Western Digital Research
On 6/25/19 2:36 PM, Damien Le Moal wrote:
> On 2019/06/25 20:06, Matias Bjørling wrote:
>> On 6/22/19 3:02 AM, Damien Le Moal wrote:
>>> On 2019/06/21 22:07, Matias Bjørling wrote:
>>>> From: Ajay Joshi <[email protected]>
>>>>
>>>> Implement REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH
>>>> support to allow explicit control of zone states.
>>>>
>>>> Signed-off-by: Ajay Joshi <[email protected]>
>>>> Signed-off-by: Matias Bjørling <[email protected]>
>>>> ---
>>>> drivers/block/null_blk.h | 4 ++--
>>>> drivers/block/null_blk_main.c | 13 ++++++++++---
>>>> drivers/block/null_blk_zoned.c | 33 ++++++++++++++++++++++++++++++---
>>>> 3 files changed, 42 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
>>>> index 34b22d6523ba..62ef65cb0f3e 100644
>>>> --- a/drivers/block/null_blk.h
>>>> +++ b/drivers/block/null_blk.h
>>>> @@ -93,7 +93,7 @@ int null_zone_report(struct gendisk *disk, sector_t sector,
>>>> gfp_t gfp_mask);
>>>> void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>>>> unsigned int nr_sectors);
>>>> -void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
>>>> +void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector);
>>>> #else
>>>> static inline int null_zone_init(struct nullb_device *dev)
>>>> {
>>>> @@ -111,6 +111,6 @@ static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>>>> unsigned int nr_sectors)
>>>> {
>>>> }
>>>> -static inline void null_zone_reset(struct nullb_cmd *cmd, sector_t sector) {}
>>>> +static inline void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector) {}
>>>> #endif /* CONFIG_BLK_DEV_ZONED */
>>>> #endif /* __NULL_BLK_H */
>>>> diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
>>>> index 447d635c79a2..5058fb980c9c 100644
>>>> --- a/drivers/block/null_blk_main.c
>>>> +++ b/drivers/block/null_blk_main.c
>>>> @@ -1209,10 +1209,17 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
>>>> nr_sectors = blk_rq_sectors(cmd->rq);
>>>> }
>>>>
>>>> - if (op == REQ_OP_WRITE)
>>>> + switch (op) {
>>>> + case REQ_OP_WRITE:
>>>> null_zone_write(cmd, sector, nr_sectors);
>>>> - else if (op == REQ_OP_ZONE_RESET)
>>>> - null_zone_reset(cmd, sector);
>>>> + break;
>>>> + case REQ_OP_ZONE_RESET:
>>>> + case REQ_OP_ZONE_OPEN:
>>>> + case REQ_OP_ZONE_CLOSE:
>>>> + case REQ_OP_ZONE_FINISH:
>>>> + null_zone_mgmt_op(cmd, sector);
>>>> + break;
>>>> + }
>>>> }
>>>> out:
>>>> /* Complete IO by inline, softirq or timer */
>>>> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
>>>> index fca0c97ff1aa..47d956b2e148 100644
>>>> --- a/drivers/block/null_blk_zoned.c
>>>> +++ b/drivers/block/null_blk_zoned.c
>>>> @@ -121,17 +121,44 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>>>> }
>>>> }
>>>>
>>>> -void null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
>>>> +void null_zone_mgmt_op(struct nullb_cmd *cmd, sector_t sector)
>>>> {
>>>> struct nullb_device *dev = cmd->nq->dev;
>>>> unsigned int zno = null_zone_no(dev, sector);
>>>> struct blk_zone *zone = &dev->zones[zno];
>>>> + enum req_opf op = req_op(cmd->rq);
>>>>
>>>> if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
>>>> cmd->error = BLK_STS_IOERR;
>>>> return;
>>>> }
>>>>
>>>> - zone->cond = BLK_ZONE_COND_EMPTY;
>>>> - zone->wp = zone->start;
>>>> + switch (op) {
>>>> + case REQ_OP_ZONE_RESET:
>>>> + zone->cond = BLK_ZONE_COND_EMPTY;
>>>> + zone->wp = zone->start;
>>>> + return;
>>>> + case REQ_OP_ZONE_OPEN:
>>>> + if (zone->cond == BLK_ZONE_COND_FULL) {
>>>> + cmd->error = BLK_STS_IOERR;
>>>> + return;
>>>> + }
>>>> + zone->cond = BLK_ZONE_COND_EXP_OPEN;
>>>
>>>
>>> With ZBC, open of a full zone is a "nop". No error. So I would rather have this as:
>>>
>>> if (zone->cond != BLK_ZONE_COND_FULL)
>>> zone->cond = BLK_ZONE_COND_EXP_OPEN;
>>>
>> Is this only ZBC? I can't find a reference to it in ZAC. I think it
>> should fail. One is trying to open a zone that is full, one can't open
>> it again. It's done for this round.
>
> Page 52/53, section 5.2.6.3.2:
>
> If the OPEN ALL bit is cleared to zero and the zone specified by the ZONE ID
> field (see 5.2.4.3.3) is in Zone Condition:
> a) EMPTY, IMPLICITLY OPENED, or CLOSED, then the device shall process an
> Explicitly Open Zone function
> (see 4.6.3.4.10) for the zone specified by the ZONE ID field;
> b) EXPLICITLY OPENED or FULL, then the device shall:
> A) not change the zone's state; and
> B) return successful command completion;
>
>>>
>>>> + return;
>>>> + case REQ_OP_ZONE_CLOSE:
>>>> + if (zone->cond == BLK_ZONE_COND_FULL) {
>>>> + cmd->error = BLK_STS_IOERR;
>>>> + return;
>>>> + }
>>>> + zone->cond = BLK_ZONE_COND_CLOSED;
>>>
>>> Sam as for open. Closing a full zone on ZBC is a nop.
>>
>> I think this should cause error.
>
> See ZAB/ZAC close command description. Same text as above, almost. Not an error.
> It is a nop. ZAC page 48, section 5.2.4.3.2:
>
> If the CLOSE ALL bit is cleared to zero and the zone specified by the ZONE ID
> field (see 5.2.4.3.3) is in Zone Condition:
> a) IMPLICITLY OPENED, or EXPLICITLY OPENED, then the device shall process a
> Close Zone function
> (see 4.6.3.4.11) for the zone specified by the ZONE ID field;
> b) EMPTY, CLOSED, or FULL, then the device shall:
> A) not change the zone's state; and
> B) return successful command completion;
>
>>
>> And the code above would
>>> also set an empty zone to closed. Finally, if the zone is open but nothing was
>>> written to it, it must be returned to empty condition, not closed.
>>
>> Only on a reset event right? In general, if I do a expl. open, close it,
>> it should not go to empty.
>
> See the zone state machine. It does return to empty from expl open if nothing
> was written, that is, if the WP is still at zone start. This text is in ZAC
> section 4.6.3.4.11 as noted above:
>
> For the specified zone, the Zone Condition state machine processing of this
> function (e.g., as shown in the ZC2: Implicit_Open state (see 4.6.3.4.3))
> results in the Zone Condition for the specified zone becoming:
> a) EMPTY, if the write pointer indicates the lowest LBA in the zone and Non
> Sequential Write Resources Active is false; or
> b) CLOSED, if the write pointer does not indicate the lowest LBA in the zone or
> Non-Sequential Write Resources Active is true.
>
Schooled! That is what one gets from having the spec in paper form on
the table ;)
>>
>> So something
>>> like this is needed.
>>>
>>> switch (zone->cond) {
>>> case BLK_ZONE_COND_FULL:
>>> case BLK_ZONE_COND_EMPTY:
>>> break;
>>> case BLK_ZONE_COND_EXP_OPEN:
>>> if (zone->wp == zone->start) {
>>> zone->cond = BLK_ZONE_COND_EMPTY;
>>> break;
>>> }
>>> /* fallthrough */
>>> default:
>>> zone->cond = BLK_ZONE_COND_CLOSED;
>>> }
>>>
>>>> + return;
>>>> + case REQ_OP_ZONE_FINISH:
>>>> + zone->cond = BLK_ZONE_COND_FULL;
>>>> + zone->wp = zone->start + zone->len;
>>>> + return;
>>>> + default:
>>>> + /* Invalid zone condition */
>>>> + cmd->error = BLK_STS_IOERR;
>>>> + return;
>>>> + }
>>>> }
>>>>
>>>
>>>
>>
>>
>
>
On 6/25/19 3:35 AM, Matias Bjørling wrote:
> On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote:
>> On 6/24/19 12:43 PM, Bart Van Assche wrote:
>>> static inline bool op_is_write(unsigned int op)
>>> {
>>> return (op & 1);
>>> }
>>>
>>
>
> The zone mgmt commands are neither write nor reads commands. I guess,
> one could characterize them as write commands, but they don't write any
> data, they update a state of a zone on a drive. One should keep it as
> is? and make sure the zone mgmt commands don't get categorized as either
> read/write.
Since the open, close and finish operations support modifying zone data
I propose to characterize these as write commands. How about the
following additional changes:
- Make bio_check_ro() refuse open/close/flush/reset zone operations for
read-only partitions (see also commit a32e236eb93e ("Partially revert
"block: fail op_is_write() requests to read-only partitions"") # v4.18).
- In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ"
into something that uses blk_op_str().
- Add open/close/flush zone support be added in blk_partition_remap().
Thanks,
Bart.
> On 25 Jun 2019, at 17.55, Bart Van Assche <[email protected]> wrote:
>
> On 6/25/19 3:35 AM, Matias Bjørling wrote:
>> On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote:
>>> On 6/24/19 12:43 PM, Bart Van Assche wrote:
>>>> static inline bool op_is_write(unsigned int op)
>>>> {
>>>> return (op & 1);
>>>> }
>> The zone mgmt commands are neither write nor reads commands. I guess, one could characterize them as write commands, but they don't write any data, they update a state of a zone on a drive. One should keep it as is? and make sure the zone mgmt commands don't get categorized as either read/write.
>
> Since the open, close and finish operations support modifying zone data I propose to characterize these as write commands. How about the following additional changes:
> - Make bio_check_ro() refuse open/close/flush/reset zone operations for read-only partitions (see also commit a32e236eb93e ("Partially revert "block: fail op_is_write() requests to read-only partitions"") # v4.18).
> - In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ" into something that uses blk_op_str().
> - Add open/close/flush zone support be added in blk_partition_remap().
>
> Thanks,
>
> Bart.
Agreed. This is also what we do with REQ_OP_DISCARD and it makes things
easier in the driver.
Apart from the return comment from Damien and Minwoo, the patch looks good to me.
Reviewed-by: Javier González <[email protected]>
On 06/25/2019 08:56 AM, Bart Van Assche wrote:
> On 6/25/19 3:35 AM, Matias Bj?rling wrote:
>> On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote:
>>> On 6/24/19 12:43 PM, Bart Van Assche wrote:
>>>> static inline bool op_is_write(unsigned int op)
>>>> {
>>>> return (op & 1);
>>>> }
>>>>
>>>
>>
>> The zone mgmt commands are neither write nor reads commands. I guess,
>> one could characterize them as write commands, but they don't write any
>> data, they update a state of a zone on a drive. One should keep it as
>> is? and make sure the zone mgmt commands don't get categorized as either
>> read/write.
>
> Since the open, close and finish operations support modifying zone data
> I propose to characterize these as write commands. How about the
> following additional changes:
> - Make bio_check_ro() refuse open/close/flush/reset zone operations for
^
Since finish also listed above which supports modifying data do we need
to add finish here with flush in above line ?
> read-only partitions (see also commit a32e236eb93e ("Partially revert
> "block: fail op_is_write() requests to read-only partitions"") # v4.18).
> - In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ"
> into something that uses blk_op_str().
Good idea, I've a patch for blk_op_str() and debugfs just waiting for
this to merge. Does it make sense to add that patch in this series ?
> - Add open/close/flush zone support be added in blk_partition_remap().
same here for finish ?
>
> Thanks,
>
> Bart.
>
On Tue, Jun 25, 2019 at 6:51 PM Chaitanya Kulkarni
<[email protected]> wrote:
>
> On 06/25/2019 08:56 AM, Bart Van Assche wrote:
> > On 6/25/19 3:35 AM, Matias Bjørling wrote:
> >> On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote:
> >>> On 6/24/19 12:43 PM, Bart Van Assche wrote:
> >>>> static inline bool op_is_write(unsigned int op)
> >>>> {
> >>>> return (op & 1);
> >>>> }
> >>>>
> >>>
> >>
> >> The zone mgmt commands are neither write nor reads commands. I guess,
> >> one could characterize them as write commands, but they don't write any
> >> data, they update a state of a zone on a drive. One should keep it as
> >> is? and make sure the zone mgmt commands don't get categorized as either
> >> read/write.
> >
> > Since the open, close and finish operations support modifying zone data
> > I propose to characterize these as write commands. How about the
> > following additional changes:
> > - Make bio_check_ro() refuse open/close/flush/reset zone operations for
> ^
> Since finish also listed above which supports modifying data do we need
> to add finish here with flush in above line ?
>
> > read-only partitions (see also commit a32e236eb93e ("Partially revert
> > "block: fail op_is_write() requests to read-only partitions"") # v4.18).
> > - In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ"
> > into something that uses blk_op_str().
> Good idea, I've a patch for blk_op_str() and debugfs just waiting for
> this to merge. Does it make sense to add that patch in this series ?
Ship it off separately. Your patches can go in first.
> > - Add open/close/flush zone support be added in blk_partition_remap().
> same here for finish ?
> >
> > Thanks,
> >
> > Bart.
> >
>
On 2019/06/26 0:56, Bart Van Assche wrote:
> On 6/25/19 3:35 AM, Matias Bj?rling wrote:
>> On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote:
>>> On 6/24/19 12:43 PM, Bart Van Assche wrote:
>>>> static inline bool op_is_write(unsigned int op)
>>>> {
>>>> ????return (op & 1);
>>>> }
>>>>
>>>
>>
>> The zone mgmt commands are neither write nor reads commands. I guess,
>> one could characterize them as write commands, but they don't write any
>> data, they update a state of a zone on a drive. One should keep it as
>> is? and make sure the zone mgmt commands don't get categorized as either
>> read/write.
>
> Since the open, close and finish operations support modifying zone data
> I propose to characterize these as write commands. How about the
> following additional changes:
> - Make bio_check_ro() refuse open/close/flush/reset zone operations for
> read-only partitions (see also commit a32e236eb93e ("Partially revert
> "block: fail op_is_write() requests to read-only partitions"") # v4.18).
> - In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ"
> into something that uses blk_op_str().
> - Add open/close/flush zone support be added in blk_partition_remap().
Sounds good to me. And indeed, all operations should be failed for RO
devices/partitions. Of note though is that only reset and finish operations have
an effect on the zone data. Open and close have no effect at all on the zone
data and only change the zone condition/state. But since they do change
something on the drive, we can still consider them as "write" operations.
Best regards.
--
Damien Le Moal
Western Digital Research
On 2019/06/26 1:51, Chaitanya Kulkarni wrote:
> On 06/25/2019 08:56 AM, Bart Van Assche wrote:
>> On 6/25/19 3:35 AM, Matias Bj?rling wrote:
>>> On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote:
>>>> On 6/24/19 12:43 PM, Bart Van Assche wrote:
>>>>> static inline bool op_is_write(unsigned int op)
>>>>> {
>>>>> return (op & 1);
>>>>> }
>>>>>
>>>>
>>>
>>> The zone mgmt commands are neither write nor reads commands. I guess,
>>> one could characterize them as write commands, but they don't write any
>>> data, they update a state of a zone on a drive. One should keep it as
>>> is? and make sure the zone mgmt commands don't get categorized as either
>>> read/write.
>>
>> Since the open, close and finish operations support modifying zone data
>> I propose to characterize these as write commands. How about the
>> following additional changes:
>> - Make bio_check_ro() refuse open/close/flush/reset zone operations for
> ^
> Since finish also listed above which supports modifying data do we need
> to add finish here with flush in above line ?
There is no "zone flush" operation. I guess Bart made a typo here and meant
"finish". "flush" on a zoned disk is not different from regular disks.
>
>> read-only partitions (see also commit a32e236eb93e ("Partially revert
>> "block: fail op_is_write() requests to read-only partitions"") # v4.18).
>> - In submit_bio(), change op_is_write(bio_op(bio)) ? "WRITE" : "READ"
>> into something that uses blk_op_str().
> Good idea, I've a patch for blk_op_str() and debugfs just waiting for
> this to merge. Does it make sense to add that patch in this series ?
>> - Add open/close/flush zone support be added in blk_partition_remap().
> same here for finish ?
>>
>> Thanks,
>>
>> Bart.
>>
>
>
--
Damien Le Moal
Western Digital Research