2023-12-06 08:13:10

by Daniel Wagner

[permalink] [raw]
Subject: [PATCH v3 0/4] nvme: add csi, ms and nuse to sysfs

I'v dropped the RFC status and tested it lot more which found an obvious bug.
When initializating the disk in nvme_alloc_ns, the ns->head pointer was still
NULL. This is why I added the 'nvme: initialize head before namespace' patch.

I can't really explain why my testing didn't catch this earlier. Anyway, I
rebuild my test setup completely to make sure I got the most obvious things
tested. The blktests with loop and nvme-tcp pass, nvme-rdma breaks but so does
the base line test. So not a regression by this series.

Thanks,
Daniel

libnvme changes:
https://github.com/igaw/libnvme/tree/tree-no-cmd

changes:
v3:
- cut overlong lines shorter
- fixed disk (queuedata) initialization order
- more testing with blktest
- added nuse ratelimit
- added reviewed tags

v2:
- moved ns id data to nvme_ns_head
- dropped ds, nsze
- https://lore.kernel.org/linux-nvme/[email protected]/

v1:
- initial version
- https://lore.kernel.org/linux-nvme/[email protected]/

Daniel Wagner (4):
nvme: lookup ctrl from request instead from namespace
nvme: initialize head before namespace
nvme: move ns id info to struct nvme_ns_head
nvme: add csi, ms and nuse to sysfs

drivers/nvme/host/apple.c | 4 +-
drivers/nvme/host/core.c | 279 +++++++++++++++++++--------------
drivers/nvme/host/fc.c | 4 +-
drivers/nvme/host/ioctl.c | 20 +--
drivers/nvme/host/multipath.c | 31 ++--
drivers/nvme/host/nvme.h | 62 ++++----
drivers/nvme/host/rdma.c | 10 +-
drivers/nvme/host/sysfs.c | 31 ++++
drivers/nvme/host/tcp.c | 8 +-
drivers/nvme/host/zns.c | 34 ++--
drivers/nvme/target/loop.c | 4 +-
drivers/nvme/target/passthru.c | 8 +-
12 files changed, 291 insertions(+), 204 deletions(-)

--
2.43.0


2023-12-06 08:13:32

by Daniel Wagner

[permalink] [raw]
Subject: [PATCH v3 2/4] nvme: initialize head before namespace

In preparation to use struct nvme_ns_head pointers instead of a struct
nvme_ns pointers, initialize the head pointer before we create the disk.
This allows us to attach the head as private data to the disk object.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/core.c | 46 ++++++++++++++++++++++------------------
1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 900c045fcae0..1fabe1b81de0 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3474,10 +3474,11 @@ static int nvme_global_check_duplicate_ids(struct nvme_subsystem *this,
return ret;
}

-static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
+static int nvme_init_ns_head(struct nvme_ctrl *ctrl,
+ struct nvme_ns_info *info,
+ struct nvme_ns_head **head)
{
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_ns_head *head = NULL;
+ struct nvme_ns_head *h = NULL;
int ret;

ret = nvme_global_check_duplicate_ids(ctrl->subsys, &info->ids);
@@ -3499,8 +3500,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
* up at any time.
*/
nvme_print_device_info(ctrl);
- if ((ns->ctrl->ops->flags & NVME_F_FABRICS) || /* !PCIe */
- ((ns->ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) &&
+ if ((ctrl->ops->flags & NVME_F_FABRICS) || /* !PCIe */
+ ((ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) &&
info->is_shared)) {
dev_err(ctrl->device,
"ignoring nsid %d because of duplicate IDs\n",
@@ -3519,8 +3520,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
}

mutex_lock(&ctrl->subsys->lock);
- head = nvme_find_ns_head(ctrl, info->nsid);
- if (!head) {
+ h = nvme_find_ns_head(ctrl, info->nsid);
+ if (!h) {
ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, &info->ids);
if (ret) {
dev_err(ctrl->device,
@@ -3528,20 +3529,20 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
info->nsid);
goto out_unlock;
}
- head = nvme_alloc_ns_head(ctrl, info);
- if (IS_ERR(head)) {
- ret = PTR_ERR(head);
+ h = nvme_alloc_ns_head(ctrl, info);
+ if (IS_ERR(h)) {
+ ret = PTR_ERR(h);
goto out_unlock;
}
} else {
ret = -EINVAL;
- if (!info->is_shared || !head->shared) {
+ if (!info->is_shared || !h->shared) {
dev_err(ctrl->device,
"Duplicate unshared namespace %d\n",
info->nsid);
goto out_put_ns_head;
}
- if (!nvme_ns_ids_equal(&head->ids, &info->ids)) {
+ if (!nvme_ns_ids_equal(&h->ids, &info->ids)) {
dev_err(ctrl->device,
"IDs don't match for shared namespace %d\n",
info->nsid);
@@ -3557,13 +3558,12 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
}
}

- list_add_tail_rcu(&ns->siblings, &head->list);
- ns->head = head;
+ *head = h;
mutex_unlock(&ctrl->subsys->lock);
return 0;

out_put_ns_head:
- nvme_put_ns_head(head);
+ nvme_put_ns_head(h);
out_unlock:
mutex_unlock(&ctrl->subsys->lock);
return ret;
@@ -3615,15 +3615,22 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
if (!ns)
return;

+ if (nvme_init_ns_head(ctrl, info, &ns->head))
+ goto out_free_ns;
+
disk = blk_mq_alloc_disk(ctrl->tagset, ns);
if (IS_ERR(disk))
- goto out_free_ns;
+ goto out_free_head;
disk->fops = &nvme_bdev_ops;
disk->private_data = ns;

ns->disk = disk;
ns->queue = disk->queue;

+ mutex_lock(&ctrl->subsys->lock);
+ list_add_tail_rcu(&ns->siblings, &ns->head->list);
+ mutex_unlock(&ctrl->subsys->lock);
+
if (ctrl->opts && ctrl->opts->data_digest)
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue);

@@ -3635,9 +3642,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
ns->ctrl = ctrl;
kref_init(&ns->kref);

- if (nvme_init_ns_head(ns, info))
- goto out_cleanup_disk;
-
/*
* If multipathing is enabled, the device name for all disks and not
* just those that represent shared namespaces needs to be based on the
@@ -3691,9 +3695,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
if (list_empty(&ns->head->list))
list_del_init(&ns->head->entry);
mutex_unlock(&ctrl->subsys->lock);
- nvme_put_ns_head(ns->head);
- out_cleanup_disk:
put_disk(disk);
+ out_free_head:
+ nvme_put_ns_head(ns->head);
out_free_ns:
kfree(ns);
}
--
2.43.0

2023-12-06 08:13:33

by Daniel Wagner

[permalink] [raw]
Subject: [PATCH v3 4/4] nvme: add csi, ms and nuse to sysfs

libnvme is using the sysfs for enumarating the nvme resources. Though
there are few missing attritbutes in the sysfs. For these libnvme issues
commands during discovering.

As the kernel already knows all these attributes and we would like to
avoid libnvme to issue commands all the time, expose these missing
attributes.

The nuse value is updated on request because the nuse is a volatile
value. Since any user can read the sysfs attribute, a very simple rate
limit is added (update max every 5 seconds). A more sophisticated update
strategy can be added later if there is actually a need for it.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/core.c | 28 ++++++++++++++++++++++++++++
drivers/nvme/host/nvme.h | 2 ++
drivers/nvme/host/sysfs.c | 31 +++++++++++++++++++++++++++++++
3 files changed, 61 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index caa52c2f57c8..e7dd64ee1653 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1663,6 +1663,33 @@ static void nvme_ns_release(struct nvme_ns *ns)
nvme_put_ns(ns);
}

+int nvme_ns_update_nuse(struct nvme_ns_head *head)
+{
+ static DEFINE_RATELIMIT_STATE(_rs, 5 * HZ, 1);
+ struct nvme_id_ns *id;
+ struct nvme_ns *ns;
+ int srcu_idx, ret = -EWOULDBLOCK;
+
+ if (__ratelimit(&_rs))
+ return 0;
+
+ srcu_idx = srcu_read_lock(&head->srcu);
+ ns = nvme_find_path(head);
+ if (!ns)
+ goto out_unlock;
+
+ ret = nvme_identify_ns(ns->ctrl, head->ns_id, &id);
+ if (ret)
+ goto out_unlock;
+
+ head->nuse = le64_to_cpu(id->nuse);
+ kfree(id);
+
+out_unlock:
+ srcu_read_unlock(&head->srcu, srcu_idx);
+ return ret;
+}
+
static int nvme_open(struct gendisk *disk, blk_mode_t mode)
{
return nvme_ns_open(disk->private_data);
@@ -2068,6 +2095,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
blk_mq_freeze_queue(ns->disk->queue);
lbaf = nvme_lbaf_index(id->flbas);
ns->head->lba_shift = id->lbaf[lbaf].ds;
+ ns->head->nuse = le64_to_cpu(id->nuse);
nvme_set_queue_limits(ns->ctrl, ns->queue);

ret = nvme_configure_metadata(ns, id);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index e6c7890b14c9..6a928646dc09 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -456,6 +456,7 @@ struct nvme_ns_head {
u16 pi_size;
u16 sgs;
u32 sws;
+ u64 nuse;
u8 pi_type;
u8 guard_type;
#ifdef CONFIG_BLK_DEV_ZONED
@@ -867,6 +868,7 @@ int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
unsigned int issue_flags);
int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
unsigned int issue_flags);
+int nvme_ns_update_nuse(struct nvme_ns_head *head);
int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);

diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index c6b7fbd4d34d..c24854eca496 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -114,12 +114,43 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(nsid);

+static ssize_t csi_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ids.csi);
+}
+static DEVICE_ATTR_RO(csi);
+
+static ssize_t metadata_bytes_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ms);
+}
+static DEVICE_ATTR_RO(metadata_bytes);
+
+static ssize_t nuse_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns_head *head = dev_to_ns_head(dev);
+ int ret;
+
+ ret = nvme_ns_update_nuse(head);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%llu\n", head->nuse);
+}
+static DEVICE_ATTR_RO(nuse);
+
static struct attribute *nvme_ns_id_attrs[] = {
&dev_attr_wwid.attr,
&dev_attr_uuid.attr,
&dev_attr_nguid.attr,
&dev_attr_eui.attr,
+ &dev_attr_csi.attr,
&dev_attr_nsid.attr,
+ &dev_attr_metadata_bytes.attr,
+ &dev_attr_nuse.attr,
#ifdef CONFIG_NVME_MULTIPATH
&dev_attr_ana_grpid.attr,
&dev_attr_ana_state.attr,
--
2.43.0

2023-12-06 08:13:40

by Daniel Wagner

[permalink] [raw]
Subject: [PATCH v3 3/4] nvme: move ns id info to struct nvme_ns_head

Move the namesapce info to struct nvme_ns_head, because it's the same
for all associated namespaces.

The head pointer is accessible from the ns pointer so we could just
update all places with ns->x to ns->head->x. While this is okay for the
slow path, it's not for the fast path. Thus we store the head pointer as
private data into request_queue.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/apple.c | 4 +-
drivers/nvme/host/core.c | 192 +++++++++++++++++----------------
drivers/nvme/host/fc.c | 4 +-
drivers/nvme/host/ioctl.c | 20 ++--
drivers/nvme/host/multipath.c | 31 +++---
drivers/nvme/host/nvme.h | 60 ++++++-----
drivers/nvme/host/rdma.c | 10 +-
drivers/nvme/host/tcp.c | 8 +-
drivers/nvme/host/zns.c | 34 +++---
drivers/nvme/target/loop.c | 4 +-
drivers/nvme/target/passthru.c | 8 +-
11 files changed, 197 insertions(+), 178 deletions(-)

diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index 596bb11eeba5..04c1c5fbd4d1 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -732,7 +732,7 @@ static int apple_nvme_remove_sq(struct apple_nvme *anv)
static blk_status_t apple_nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
- struct nvme_ns *ns = hctx->queue->queuedata;
+ struct nvme_ns_head *head = hctx->queue->queuedata;
struct apple_nvme_queue *q = hctx->driver_data;
struct apple_nvme *anv = queue_to_apple_nvme(q);
struct request *req = bd->rq;
@@ -753,7 +753,7 @@ static blk_status_t apple_nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
if (!nvme_check_ready(&anv->ctrl, req, true))
return nvme_fail_nonready_command(&anv->ctrl, req);

- ret = nvme_setup_cmd(ns, req);
+ ret = nvme_setup_cmd(head, req);
if (ret)
return ret;

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1fabe1b81de0..caa52c2f57c8 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -308,16 +308,16 @@ static void nvme_retry_req(struct request *req)

static void nvme_log_error(struct request *req)
{
- struct nvme_ns *ns = req->q->queuedata;
+ struct nvme_ns_head *head = req->q->queuedata;
struct nvme_request *nr = nvme_req(req);

- if (ns) {
- pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %llu blocks, %s (sct 0x%x / sc 0x%x) %s%s\n",
- ns->disk ? ns->disk->disk_name : "?",
+ if (head) {
+ pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %u blocks, %s (sct 0x%x / sc 0x%x) %s%s\n",
+ head->disk ? head->disk->disk_name : "?",
nvme_get_opcode_str(nr->cmd->common.opcode),
nr->cmd->common.opcode,
- (unsigned long long)nvme_sect_to_lba(ns, blk_rq_pos(req)),
- (unsigned long long)blk_rq_bytes(req) >> ns->lba_shift,
+ nvme_sect_to_lba(head, blk_rq_pos(req)),
+ blk_rq_bytes(req) >> head->lba_shift,
nvme_get_error_status_str(nr->status),
nr->status >> 8 & 7, /* Status Code Type */
nr->status & 0xff, /* Status Code */
@@ -757,16 +757,16 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
}
EXPORT_SYMBOL_GPL(__nvme_check_ready);

-static inline void nvme_setup_flush(struct nvme_ns *ns,
+static inline void nvme_setup_flush(struct nvme_ns_head *head,
struct nvme_command *cmnd)
{
memset(cmnd, 0, sizeof(*cmnd));
cmnd->common.opcode = nvme_cmd_flush;
- cmnd->common.nsid = cpu_to_le32(ns->head->ns_id);
+ cmnd->common.nsid = cpu_to_le32(head->ns_id);
}

-static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
- struct nvme_command *cmnd)
+static blk_status_t nvme_setup_discard(struct nvme_ns_head *head,
+ struct request *req, struct nvme_command *cmnd)
{
unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
@@ -794,8 +794,8 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
}

if (queue_max_discard_segments(req->q) == 1) {
- u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req));
- u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9);
+ u64 slba = nvme_sect_to_lba(head, blk_rq_pos(req));
+ u32 nlb = blk_rq_sectors(req) >> (head->lba_shift - 9);

range[0].cattr = cpu_to_le32(0);
range[0].nlb = cpu_to_le32(nlb);
@@ -803,8 +803,9 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
n = 1;
} else {
__rq_for_each_bio(bio, req) {
- u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
- u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
+ u64 slba = nvme_sect_to_lba(head,
+ bio->bi_iter.bi_sector);
+ u32 nlb = bio->bi_iter.bi_size >> head->lba_shift;

if (n < segments) {
range[n].cattr = cpu_to_le32(0);
@@ -825,7 +826,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,

memset(cmnd, 0, sizeof(*cmnd));
cmnd->dsm.opcode = nvme_cmd_dsm;
- cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id);
+ cmnd->dsm.nsid = cpu_to_le32(head->ns_id);
cmnd->dsm.nr = cpu_to_le32(segments - 1);
cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);

@@ -835,14 +836,14 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
return BLK_STS_OK;
}

-static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd,
- struct request *req)
+static void nvme_set_ref_tag(struct nvme_ns_head *head,
+ struct nvme_command *cmnd, struct request *req)
{
u32 upper, lower;
u64 ref48;

/* both rw and write zeroes share the same reftag format */
- switch (ns->guard_type) {
+ switch (head->guard_type) {
case NVME_NVM_NS_16B_GUARD:
cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
break;
@@ -859,7 +860,7 @@ static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd,
}
}

-static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
+static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns_head *head,
struct request *req, struct nvme_command *cmnd)
{
struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
@@ -867,25 +868,25 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
memset(cmnd, 0, sizeof(*cmnd));

if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
- return nvme_setup_discard(ns, req, cmnd);
+ return nvme_setup_discard(head, req, cmnd);

cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes;
- cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id);
+ cmnd->write_zeroes.nsid = cpu_to_le32(head->ns_id);
cmnd->write_zeroes.slba =
- cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
+ cpu_to_le64(nvme_sect_to_lba(head, blk_rq_pos(req)));
cmnd->write_zeroes.length =
- cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ cpu_to_le16((blk_rq_bytes(req) >> head->lba_shift) - 1);

- if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC))
+ if (!(req->cmd_flags & REQ_NOUNMAP) && (head->features & NVME_NS_DEAC))
cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC);

- if (nvme_ns_has_pi(ns)) {
+ if (nvme_ns_has_pi(head)) {
cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT);

- switch (ns->pi_type) {
+ switch (head->pi_type) {
case NVME_NS_DPS_PI_TYPE1:
case NVME_NS_DPS_PI_TYPE2:
- nvme_set_ref_tag(ns, cmnd, req);
+ nvme_set_ref_tag(head, cmnd, req);
break;
}
}
@@ -893,7 +894,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
return BLK_STS_OK;
}

-static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
+static inline blk_status_t nvme_setup_rw(struct nvme_ns_head *head,
struct request *req, struct nvme_command *cmnd,
enum nvme_opcode op)
{
@@ -910,17 +911,19 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,

cmnd->rw.opcode = op;
cmnd->rw.flags = 0;
- cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
+ cmnd->rw.nsid = cpu_to_le32(head->ns_id);
cmnd->rw.cdw2 = 0;
cmnd->rw.cdw3 = 0;
cmnd->rw.metadata = 0;
- cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
- cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ cmnd->rw.slba =
+ cpu_to_le64(nvme_sect_to_lba(head, blk_rq_pos(req)));
+ cmnd->rw.length =
+ cpu_to_le16((blk_rq_bytes(req) >> head->lba_shift) - 1);
cmnd->rw.reftag = 0;
cmnd->rw.apptag = 0;
cmnd->rw.appmask = 0;

- if (ns->ms) {
+ if (head->ms) {
/*
* If formated with metadata, the block layer always provides a
* metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled. Else
@@ -928,12 +931,12 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
* namespace capacity to zero to prevent any I/O.
*/
if (!blk_integrity_rq(req)) {
- if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
+ if (WARN_ON_ONCE(!nvme_ns_has_pi(head)))
return BLK_STS_NOTSUPP;
control |= NVME_RW_PRINFO_PRACT;
}

- switch (ns->pi_type) {
+ switch (head->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
control |= NVME_RW_PRINFO_PRCHK_GUARD;
break;
@@ -943,7 +946,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
NVME_RW_PRINFO_PRCHK_REF;
if (op == nvme_cmd_zone_append)
control |= NVME_RW_APPEND_PIREMAP;
- nvme_set_ref_tag(ns, cmnd, req);
+ nvme_set_ref_tag(head, cmnd, req);
break;
}
}
@@ -966,7 +969,7 @@ void nvme_cleanup_cmd(struct request *req)
}
EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);

-blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
+blk_status_t nvme_setup_cmd(struct nvme_ns_head *head, struct request *req)
{
struct nvme_command *cmd = nvme_req(req)->cmd;
blk_status_t ret = BLK_STS_OK;
@@ -980,35 +983,39 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
/* these are setup prior to execution in nvme_init_request() */
break;
case REQ_OP_FLUSH:
- nvme_setup_flush(ns, cmd);
+ nvme_setup_flush(head, cmd);
break;
case REQ_OP_ZONE_RESET_ALL:
case REQ_OP_ZONE_RESET:
- ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_RESET);
+ ret = nvme_setup_zone_mgmt_send(head, req,
+ cmd, NVME_ZONE_RESET);
break;
case REQ_OP_ZONE_OPEN:
- ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_OPEN);
+ ret = nvme_setup_zone_mgmt_send(head, req,
+ cmd, NVME_ZONE_OPEN);
break;
case REQ_OP_ZONE_CLOSE:
- ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_CLOSE);
+ ret = nvme_setup_zone_mgmt_send(head, req,
+ cmd, NVME_ZONE_CLOSE);
break;
case REQ_OP_ZONE_FINISH:
- ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_FINISH);
+ ret = nvme_setup_zone_mgmt_send(head, req,
+ cmd, NVME_ZONE_FINISH);
break;
case REQ_OP_WRITE_ZEROES:
- ret = nvme_setup_write_zeroes(ns, req, cmd);
+ ret = nvme_setup_write_zeroes(head, req, cmd);
break;
case REQ_OP_DISCARD:
- ret = nvme_setup_discard(ns, req, cmd);
+ ret = nvme_setup_discard(head, req, cmd);
break;
case REQ_OP_READ:
- ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_read);
+ ret = nvme_setup_rw(head, req, cmd, nvme_cmd_read);
break;
case REQ_OP_WRITE:
- ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_write);
+ ret = nvme_setup_rw(head, req, cmd, nvme_cmd_write);
break;
case REQ_OP_ZONE_APPEND:
- ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
+ ret = nvme_setup_rw(head, req, cmd, nvme_cmd_zone_append);
break;
default:
WARN_ON_ONCE(1);
@@ -1084,12 +1091,13 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
}
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);

-u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
+u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
+ u8 opcode)
{
u32 effects = 0;

- if (ns) {
- effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
+ if (head) {
+ effects = le32_to_cpu(head->effects->iocs[opcode]);
if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
dev_warn_once(ctrl->device,
"IO command:%02x has unusual effects:%08x\n",
@@ -1109,9 +1117,10 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
}
EXPORT_SYMBOL_NS_GPL(nvme_command_effects, NVME_TARGET_PASSTHRU);

-u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
+u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
+ u8 opcode)
{
- u32 effects = nvme_command_effects(ctrl, ns, opcode);
+ u32 effects = nvme_command_effects(ctrl, head, opcode);

/*
* For simplicity, IO to all namespaces is quiesced even if the command
@@ -1129,8 +1138,8 @@ u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
}
EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU);

-void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
- struct nvme_command *cmd, int status)
+void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
+ u32 effects, struct nvme_command *cmd, int status)
{
if (effects & NVME_CMD_EFFECTS_CSE_MASK) {
nvme_unfreeze(ctrl);
@@ -1149,7 +1158,7 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
nvme_queue_scan(ctrl);
flush_work(&ctrl->scan_work);
}
- if (ns)
+ if (head)
return;

switch (cmd->common.opcode) {
@@ -1679,9 +1688,9 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
{
struct blk_integrity integrity = { };

- switch (ns->pi_type) {
+ switch (ns->head->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
- switch (ns->guard_type) {
+ switch (ns->head->guard_type) {
case NVME_NVM_NS_16B_GUARD:
integrity.profile = &t10_pi_type3_crc;
integrity.tag_size = sizeof(u16) + sizeof(u32);
@@ -1699,7 +1708,7 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
break;
case NVME_NS_DPS_PI_TYPE1:
case NVME_NS_DPS_PI_TYPE2:
- switch (ns->guard_type) {
+ switch (ns->head->guard_type) {
case NVME_NVM_NS_16B_GUARD:
integrity.profile = &t10_pi_type1_crc;
integrity.tag_size = sizeof(u16);
@@ -1720,7 +1729,7 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
break;
}

- integrity.tuple_size = ns->ms;
+ integrity.tuple_size = ns->head->ms;
blk_integrity_register(disk, &integrity);
blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
}
@@ -1737,8 +1746,10 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
struct request_queue *queue = disk->queue;
u32 size = queue_logical_block_size(queue);

- if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX))
- ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl);
+ if (ctrl->dmrsl &&
+ ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX))
+ ctrl->max_discard_sectors =
+ nvme_lba_to_sect(ns->head, ctrl->dmrsl);

if (ctrl->max_discard_sectors == 0) {
blk_queue_max_discard_sectors(queue, 0);
@@ -1779,11 +1790,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)
int ret = 0;
u32 elbaf;

- ns->pi_size = 0;
- ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+ ns->head->pi_size = 0;
+ ns->head->ms = le16_to_cpu(id->lbaf[lbaf].ms);
if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) {
- ns->pi_size = sizeof(struct t10_pi_tuple);
- ns->guard_type = NVME_NVM_NS_16B_GUARD;
+ ns->head->pi_size = sizeof(struct t10_pi_tuple);
+ ns->head->guard_type = NVME_NVM_NS_16B_GUARD;
goto set_pi;
}

@@ -1806,13 +1817,13 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)
if (nvme_elbaf_sts(elbaf))
goto free_data;

- ns->guard_type = nvme_elbaf_guard_type(elbaf);
- switch (ns->guard_type) {
+ ns->head->guard_type = nvme_elbaf_guard_type(elbaf);
+ switch (ns->head->guard_type) {
case NVME_NVM_NS_64B_GUARD:
- ns->pi_size = sizeof(struct crc64_pi_tuple);
+ ns->head->pi_size = sizeof(struct crc64_pi_tuple);
break;
case NVME_NVM_NS_16B_GUARD:
- ns->pi_size = sizeof(struct t10_pi_tuple);
+ ns->head->pi_size = sizeof(struct t10_pi_tuple);
break;
default:
break;
@@ -1821,10 +1832,10 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)
free_data:
kfree(nvm);
set_pi:
- if (ns->pi_size && (first || ns->ms == ns->pi_size))
- ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
+ if (ns->head->pi_size && (first || ns->head->ms == ns->head->pi_size))
+ ns->head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
else
- ns->pi_type = 0;
+ ns->head->pi_type = 0;

return ret;
}
@@ -1838,8 +1849,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
if (ret)
return ret;

- ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
- if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
+ ns->head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
+ if (!ns->head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
return 0;

if (ctrl->ops->flags & NVME_F_FABRICS) {
@@ -1851,7 +1862,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
return 0;

- ns->features |= NVME_NS_EXT_LBAS;
+ ns->head->features |= NVME_NS_EXT_LBAS;

/*
* The current fabrics transport drivers support namespace
@@ -1862,8 +1873,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
* Note, this check will need to be modified if any drivers
* gain the ability to use other metadata formats.
*/
- if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns))
- ns->features |= NVME_NS_METADATA_SUPPORTED;
+ if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns->head))
+ ns->head->features |= NVME_NS_METADATA_SUPPORTED;
} else {
/*
* For PCIe controllers, we can't easily remap the separate
@@ -1872,9 +1883,9 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
* We allow extended LBAs for the passthrough interface, though.
*/
if (id->flbas & NVME_NS_FLBAS_META_EXT)
- ns->features |= NVME_NS_EXT_LBAS;
+ ns->head->features |= NVME_NS_EXT_LBAS;
else
- ns->features |= NVME_NS_METADATA_SUPPORTED;
+ ns->head->features |= NVME_NS_METADATA_SUPPORTED;
}
return 0;
}
@@ -1900,8 +1911,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
static void nvme_update_disk_info(struct gendisk *disk,
struct nvme_ns *ns, struct nvme_id_ns *id)
{
- sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
- u32 bs = 1U << ns->lba_shift;
+ sector_t capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
+ u32 bs = 1U << ns->head->lba_shift;
u32 atomic_bs, phys_bs, io_opt = 0;

/*
@@ -1909,7 +1920,8 @@ static void nvme_update_disk_info(struct gendisk *disk,
* or smaller than a sector size yet, so catch this early and don't
* allow block I/O.
*/
- if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) {
+ if (ns->head->lba_shift > PAGE_SHIFT ||
+ ns->head->lba_shift < SECTOR_SHIFT) {
capacity = 0;
bs = (1 << 9);
}
@@ -1952,12 +1964,12 @@ static void nvme_update_disk_info(struct gendisk *disk,
* I/O to namespaces with metadata except when the namespace supports
* PI, as it can strip/insert in that case.
*/
- if (ns->ms) {
+ if (ns->head->ms) {
if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
- (ns->features & NVME_NS_METADATA_SUPPORTED))
+ (ns->head->features & NVME_NS_METADATA_SUPPORTED))
nvme_init_integrity(disk, ns,
ns->ctrl->max_integrity_segments);
- else if (!nvme_ns_has_pi(ns))
+ else if (!nvme_ns_has_pi(ns->head))
capacity = 0;
}

@@ -1988,7 +2000,7 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
is_power_of_2(ctrl->max_hw_sectors))
iob = ctrl->max_hw_sectors;
else
- iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
+ iob = nvme_lba_to_sect(ns->head, le16_to_cpu(id->noiob));

if (!iob)
return;
@@ -2021,7 +2033,7 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
if (nvme_ns_head_multipath(ns->head)) {
blk_mq_freeze_queue(ns->head->disk->queue);
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
- nvme_mpath_revalidate_paths(ns);
+ nvme_mpath_revalidate_paths(ns->head);
blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0);
ns->head->disk->flags |= GENHD_FL_HIDDEN;
@@ -2055,7 +2067,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,

blk_mq_freeze_queue(ns->disk->queue);
lbaf = nvme_lbaf_index(id->flbas);
- ns->lba_shift = id->lbaf[lbaf].ds;
+ ns->head->lba_shift = id->lbaf[lbaf].ds;
nvme_set_queue_limits(ns->ctrl, ns->queue);

ret = nvme_configure_metadata(ns, id);
@@ -2081,7 +2093,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
* do not return zeroes.
*/
if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
- ns->features |= NVME_NS_DEAC;
+ ns->head->features |= NVME_NS_DEAC;
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue);
@@ -2096,7 +2108,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
blk_mq_freeze_queue(ns->head->disk->queue);
nvme_update_disk_info(ns->head->disk, ns, id);
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
- nvme_mpath_revalidate_paths(ns);
+ nvme_mpath_revalidate_paths(ns->head);
blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0);
disk_update_readahead(ns->head->disk);
@@ -3618,7 +3630,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
if (nvme_init_ns_head(ctrl, info, &ns->head))
goto out_free_ns;

- disk = blk_mq_alloc_disk(ctrl->tagset, ns);
+ disk = blk_mq_alloc_disk(ctrl->tagset, ns->head);
if (IS_ERR(disk))
goto out_free_head;
disk->fops = &nvme_bdev_ops;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index fb22976a36a8..41f9bacae6f4 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2799,7 +2799,7 @@ static blk_status_t
nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
- struct nvme_ns *ns = hctx->queue->queuedata;
+ struct nvme_ns_head *head = hctx->queue->queuedata;
struct nvme_fc_queue *queue = hctx->driver_data;
struct nvme_fc_ctrl *ctrl = queue->ctrl;
struct request *rq = bd->rq;
@@ -2813,7 +2813,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);

- ret = nvme_setup_cmd(ns, rq);
+ ret = nvme_setup_cmd(head, rq);
if (ret)
return ret;

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 529b9954d2b8..29263009bb6e 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -61,7 +61,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
* and marks this command as supported. If not reject unprivileged
* passthrough.
*/
- effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode);
+ effects = nvme_command_effects(ns->ctrl, ns->head, c->common.opcode);
if (!(effects & NVME_CMD_EFFECTS_CSUPP))
return false;

@@ -168,8 +168,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned int flags)
{
struct request_queue *q = req->q;
- struct nvme_ns *ns = q->queuedata;
- struct block_device *bdev = ns ? ns->disk->part0 : NULL;
+ struct nvme_ns_head *head = q->queuedata;
+ struct block_device *bdev = head ? head->disk->part0 : NULL;
struct bio *bio = NULL;
void *meta = NULL;
int ret;
@@ -222,7 +222,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
u64 *result, unsigned timeout, unsigned int flags)
{
- struct nvme_ns *ns = q->queuedata;
+ struct nvme_ns_head *head = q->queuedata;
struct nvme_ctrl *ctrl;
struct request *req;
void *meta = NULL;
@@ -245,7 +245,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
bio = req->bio;
ctrl = nvme_req(req)->ctrl;

- effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
+ effects = nvme_passthru_start(ctrl, head, cmd->common.opcode);
ret = nvme_execute_rq(req, false);
if (result)
*result = le64_to_cpu(nvme_req(req)->result.u64);
@@ -257,7 +257,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
blk_mq_free_request(req);

if (effects)
- nvme_passthru_end(ctrl, ns, effects, cmd, ret);
+ nvme_passthru_end(ctrl, head, effects, cmd, ret);

return ret;
}
@@ -283,10 +283,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
return -EINVAL;
}

- length = (io.nblocks + 1) << ns->lba_shift;
+ length = (io.nblocks + 1) << ns->head->lba_shift;

if ((io.control & NVME_RW_PRINFO_PRACT) &&
- ns->ms == sizeof(struct t10_pi_tuple)) {
+ ns->head->ms == sizeof(struct t10_pi_tuple)) {
/*
* Protection information is stripped/inserted by the
* controller.
@@ -296,11 +296,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
meta_len = 0;
metadata = NULL;
} else {
- meta_len = (io.nblocks + 1) * ns->ms;
+ meta_len = (io.nblocks + 1) * ns->head->ms;
metadata = nvme_to_user_ptr(io.metadata);
}

- if (ns->features & NVME_NS_EXT_LBAS) {
+ if (ns->head->features & NVME_NS_EXT_LBAS) {
length += meta_len;
meta_len = 0;
} else if (meta_len) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 0a88d7bdc5e3..24ccd5b9c350 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -82,11 +82,14 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)

void nvme_failover_req(struct request *req)
{
- struct nvme_ns *ns = req->q->queuedata;
+ struct nvme_ns_head *head = req->q->queuedata;
+ struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
+ struct nvme_ns *ns;
u16 status = nvme_req(req)->status & 0x7ff;
unsigned long flags;
struct bio *bio;

+ ns = nvme_find_get_ns(ctrl, head->ns_id);
nvme_mpath_clear_current_path(ns);

/*
@@ -94,14 +97,14 @@ void nvme_failover_req(struct request *req)
* ready to serve this namespace. Kick of a re-read of the ANA
* information page, and just try any other available path for now.
*/
- if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
+ if (nvme_is_ana_error(status) && ctrl->ana_log_buf) {
set_bit(NVME_NS_ANA_PENDING, &ns->flags);
- queue_work(nvme_wq, &ns->ctrl->ana_work);
+ queue_work(nvme_wq, &ctrl->ana_work);
}

- spin_lock_irqsave(&ns->head->requeue_lock, flags);
+ spin_lock_irqsave(&head->requeue_lock, flags);
for (bio = req->bio; bio; bio = bio->bi_next) {
- bio_set_dev(bio, ns->head->disk->part0);
+ bio_set_dev(bio, head->disk->part0);
if (bio->bi_opf & REQ_POLLED) {
bio->bi_opf &= ~REQ_POLLED;
bio->bi_cookie = BLK_QC_T_NONE;
@@ -115,17 +118,17 @@ void nvme_failover_req(struct request *req)
*/
bio->bi_opf &= ~REQ_NOWAIT;
}
- blk_steal_bios(&ns->head->requeue_list, req);
- spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
+ blk_steal_bios(&head->requeue_list, req);
+ spin_unlock_irqrestore(&head->requeue_lock, flags);

blk_mq_end_request(req, 0);
- kblockd_schedule_work(&ns->head->requeue_work);
+ kblockd_schedule_work(&head->requeue_work);
}

void nvme_mpath_start_request(struct request *rq)
{
- struct nvme_ns *ns = rq->q->queuedata;
- struct gendisk *disk = ns->head->disk;
+ struct nvme_ns_head *head = rq->q->queuedata;
+ struct gendisk *disk = head->disk;

if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq))
return;
@@ -138,11 +141,11 @@ EXPORT_SYMBOL_GPL(nvme_mpath_start_request);

void nvme_mpath_end_request(struct request *rq)
{
- struct nvme_ns *ns = rq->q->queuedata;
+ struct nvme_ns_head *head = rq->q->queuedata;

if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
return;
- bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
+ bdev_end_io_acct(head->disk->part0, req_op(rq),
blk_rq_bytes(rq) >> SECTOR_SHIFT,
nvme_req(rq)->start_time);
}
@@ -202,10 +205,10 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
up_read(&ctrl->namespaces_rwsem);
}

-void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
+void nvme_mpath_revalidate_paths(struct nvme_ns_head *head)
{
- struct nvme_ns_head *head = ns->head;
sector_t capacity = get_capacity(head->disk);
+ struct nvme_ns *ns;
int node;
int srcu_idx;

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 578e6d311bc9..e6c7890b14c9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -451,6 +451,17 @@ struct nvme_ns_head {
bool shared;
int instance;
struct nvme_effects_log *effects;
+ int lba_shift;
+ u16 ms;
+ u16 pi_size;
+ u16 sgs;
+ u32 sws;
+ u8 pi_type;
+ u8 guard_type;
+#ifdef CONFIG_BLK_DEV_ZONED
+ u64 zsze;
+#endif
+ unsigned long features;

struct cdev cdev;
struct device cdev_device;
@@ -492,17 +503,6 @@ struct nvme_ns {
struct kref kref;
struct nvme_ns_head *head;

- int lba_shift;
- u16 ms;
- u16 pi_size;
- u16 sgs;
- u32 sws;
- u8 pi_type;
- u8 guard_type;
-#ifdef CONFIG_BLK_DEV_ZONED
- u64 zsze;
-#endif
- unsigned long features;
unsigned long flags;
#define NVME_NS_REMOVING 0
#define NVME_NS_ANA_PENDING 2
@@ -517,9 +517,9 @@ struct nvme_ns {
};

/* NVMe ns supports metadata actions by the controller (generate/strip) */
-static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
+static inline bool nvme_ns_has_pi(struct nvme_ns_head *head)
{
- return ns->pi_type && ns->ms == ns->pi_size;
+ return head->pi_type && head->ms == head->pi_size;
}

struct nvme_ctrl_ops {
@@ -651,17 +651,17 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
/*
* Convert a 512B sector number to a device logical block number.
*/
-static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector)
+static inline u64 nvme_sect_to_lba(struct nvme_ns_head *head, sector_t sector)
{
- return sector >> (ns->lba_shift - SECTOR_SHIFT);
+ return sector >> (head->lba_shift - SECTOR_SHIFT);
}

/*
* Convert a device logical block number to a 512B sector number.
*/
-static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba)
+static inline sector_t nvme_lba_to_sect(struct nvme_ns_head *head, u64 lba)
{
- return lba << (ns->lba_shift - SECTOR_SHIFT);
+ return lba << (head->lba_shift - SECTOR_SHIFT);
}

/*
@@ -792,7 +792,7 @@ static inline enum req_op nvme_req_op(struct nvme_command *cmd)
#define NVME_QID_ANY -1
void nvme_init_request(struct request *req, struct nvme_command *cmd);
void nvme_cleanup_cmd(struct request *req);
-blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req);
+blk_status_t nvme_setup_cmd(struct nvme_ns_head *head, struct request *req);
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *req);
bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
@@ -901,7 +901,7 @@ void nvme_mpath_update(struct nvme_ctrl *ctrl);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
-void nvme_mpath_revalidate_paths(struct nvme_ns *ns);
+void nvme_mpath_revalidate_paths(struct nvme_ns_head *head);
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
void nvme_mpath_start_request(struct request *rq);
@@ -909,10 +909,10 @@ void nvme_mpath_end_request(struct request *rq);

static inline void nvme_trace_bio_complete(struct request *req)
{
- struct nvme_ns *ns = req->q->queuedata;
+ struct nvme_ns_head *head = req->q->queuedata;

if ((req->cmd_flags & REQ_NVME_MPATH) && req->bio)
- trace_block_bio_complete(ns->head->disk->queue, req->bio);
+ trace_block_bio_complete(head->disk->queue, req->bio);
}

extern bool multipath;
@@ -1004,13 +1004,14 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
#ifdef CONFIG_BLK_DEV_ZONED
int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf);
-blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns_head *head,
+ struct request *req,
struct nvme_command *cmnd,
enum nvme_zone_mgmt_action action);
#else
-static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
- struct request *req, struct nvme_command *cmnd,
- enum nvme_zone_mgmt_action action)
+static inline blk_status_t nvme_setup_zone_mgmt_send(
+ struct nvme_ns_head *head, struct request *req,
+ struct nvme_command *cmnd, enum nvme_zone_mgmt_action action)
{
return BLK_STS_NOTSUPP;
}
@@ -1086,12 +1087,13 @@ static inline int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid)
static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {};
#endif

-u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
u8 opcode);
-u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode);
+u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
+ u8 opcode);
int nvme_execute_rq(struct request *rq, bool at_head);
-void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
- struct nvme_command *cmd, int status);
+void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
+ u32 effects, struct nvme_command *cmd, int status);
struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
void nvme_put_ns(struct nvme_ns *ns);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 81e2621169e5..0e967413ee43 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1407,7 +1407,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
struct nvme_rdma_sgl *sgl = &req->data_sgl;
struct ib_reg_wr *wr = &req->reg_wr;
struct request *rq = blk_mq_rq_from_pdu(req);
- struct nvme_ns *ns = rq->q->queuedata;
+ struct nvme_ns_head *head = rq->q->queuedata;
struct bio *bio = rq->bio;
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
int nr;
@@ -1423,7 +1423,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
goto mr_put;

nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c,
- req->mr->sig_attrs, ns->pi_type);
+ req->mr->sig_attrs, head->pi_type);
nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);

ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
@@ -1979,7 +1979,7 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
- struct nvme_ns *ns = hctx->queue->queuedata;
+ struct nvme_ns_head *head = hctx->queue->queuedata;
struct nvme_rdma_queue *queue = hctx->driver_data;
struct request *rq = bd->rq;
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
@@ -2007,7 +2007,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
ib_dma_sync_single_for_cpu(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE);

- ret = nvme_setup_cmd(ns, rq);
+ ret = nvme_setup_cmd(head, rq);
if (ret)
goto unmap_qe;

@@ -2017,7 +2017,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
queue->pi_support &&
(c->common.opcode == nvme_cmd_write ||
c->common.opcode == nvme_cmd_read) &&
- nvme_ns_has_pi(ns))
+ nvme_ns_has_pi(head))
req->use_sig_mr = true;
else
req->use_sig_mr = false;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 08805f027810..169462faad47 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2483,7 +2483,7 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
return 0;
}

-static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
+static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns_head *head,
struct request *rq)
{
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
@@ -2492,7 +2492,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0;
blk_status_t ret;

- ret = nvme_setup_cmd(ns, rq);
+ ret = nvme_setup_cmd(head, rq);
if (ret)
return ret;

@@ -2548,7 +2548,7 @@ static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
- struct nvme_ns *ns = hctx->queue->queuedata;
+ struct nvme_ns_head *head = hctx->queue->queuedata;
struct nvme_tcp_queue *queue = hctx->driver_data;
struct request *rq = bd->rq;
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
@@ -2558,7 +2558,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);

- ret = nvme_tcp_setup_cmd_pdu(ns, rq);
+ ret = nvme_tcp_setup_cmd_pdu(head, rq);
if (unlikely(ret))
return ret;

diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index ec8557810c21..51ac3a61ce3f 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -11,7 +11,7 @@ int nvme_revalidate_zones(struct nvme_ns *ns)
{
struct request_queue *q = ns->queue;

- blk_queue_chunk_sectors(q, ns->zsze);
+ blk_queue_chunk_sectors(q, ns->head->zsze);
blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);

return blk_revalidate_disk_zones(ns->disk, NULL);
@@ -99,11 +99,12 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
goto free_data;
}

- ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
- if (!is_power_of_2(ns->zsze)) {
+ ns->head->zsze =
+ nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze));
+ if (!is_power_of_2(ns->head->zsze)) {
dev_warn(ns->ctrl->device,
"invalid zone size:%llu for namespace:%u\n",
- ns->zsze, ns->head->ns_id);
+ ns->head->zsze, ns->head->ns_id);
status = -ENODEV;
goto free_data;
}
@@ -128,7 +129,7 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
sizeof(struct nvme_zone_descriptor);

nr_zones = min_t(unsigned int, nr_zones,
- get_capacity(ns->disk) >> ilog2(ns->zsze));
+ get_capacity(ns->head->disk) >> ilog2(ns->head->zsze));

bufsize = sizeof(struct nvme_zone_report) +
nr_zones * sizeof(struct nvme_zone_descriptor);
@@ -162,13 +163,13 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,

zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
zone.cond = entry->zs >> 4;
- zone.len = ns->zsze;
- zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
- zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
+ zone.len = ns->head->zsze;
+ zone.capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(entry->zcap));
+ zone.start = nvme_lba_to_sect(ns->head, le64_to_cpu(entry->zslba));
if (zone.cond == BLK_ZONE_COND_FULL)
zone.wp = zone.start + zone.len;
else
- zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
+ zone.wp = nvme_lba_to_sect(ns->head, le64_to_cpu(entry->wp));

return cb(&zone, idx, data);
}
@@ -196,11 +197,11 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;

- sector &= ~(ns->zsze - 1);
+ sector &= ~(ns->head->zsze - 1);
while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
memset(report, 0, buflen);

- c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
+ c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, sector));
ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
if (ret) {
if (ret > 0)
@@ -220,7 +221,7 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
zone_idx++;
}

- sector += ns->zsze * nz;
+ sector += ns->head->zsze * nz;
}

if (zone_idx > 0)
@@ -232,14 +233,15 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
return ret;
}

-blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
- struct nvme_command *c, enum nvme_zone_mgmt_action action)
+blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns_head *head,
+ struct request *req, struct nvme_command *c,
+ enum nvme_zone_mgmt_action action)
{
memset(c, 0, sizeof(*c));

c->zms.opcode = nvme_cmd_zone_mgmt_send;
- c->zms.nsid = cpu_to_le32(ns->head->ns_id);
- c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
+ c->zms.nsid = cpu_to_le32(head->ns_id);
+ c->zms.slba = cpu_to_le64(nvme_sect_to_lba(head, blk_rq_pos(req)));
c->zms.zsa = action;

if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 9cb434c58075..1c3abadecaa7 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -131,7 +131,7 @@ static void nvme_loop_execute_work(struct work_struct *work)
static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
- struct nvme_ns *ns = hctx->queue->queuedata;
+ struct nvme_ns_head *head = hctx->queue->queuedata;
struct nvme_loop_queue *queue = hctx->driver_data;
struct request *req = bd->rq;
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
@@ -141,7 +141,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
if (!nvme_check_ready(&queue->ctrl->ctrl, req, queue_ready))
return nvme_fail_nonready_command(&queue->ctrl->ctrl, req);

- ret = nvme_setup_cmd(ns, req);
+ ret = nvme_setup_cmd(head, req);
if (ret)
return ret;

diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 9fe07d7efa96..7c13084a8f87 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -216,11 +216,11 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
struct nvmet_req *req = container_of(w, struct nvmet_req, p.work);
struct request *rq = req->p.rq;
struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl;
- struct nvme_ns *ns = rq->q->queuedata;
+ struct nvme_ns_head *head = rq->q->queuedata;
u32 effects;
int status;

- effects = nvme_passthru_start(ctrl, ns, req->cmd->common.opcode);
+ effects = nvme_passthru_start(ctrl, head, req->cmd->common.opcode);
status = nvme_execute_rq(rq, false);
if (status == NVME_SC_SUCCESS &&
req->cmd->common.opcode == nvme_admin_identify) {
@@ -243,7 +243,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
blk_mq_free_request(rq);

if (effects)
- nvme_passthru_end(ctrl, ns, effects, req->cmd, status);
+ nvme_passthru_end(ctrl, head, effects, req->cmd, status);
}

static enum rq_end_io_ret nvmet_passthru_req_done(struct request *rq,
@@ -339,7 +339,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
* non-trivial effects, make sure to execute the command synchronously
* in a workqueue so that nvme_passthru_end gets called.
*/
- effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode);
+ effects = nvme_command_effects(ctrl, ns->head, req->cmd->common.opcode);
if (req->p.use_workqueue ||
(effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))) {
INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work);
--
2.43.0

2023-12-06 08:51:08

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v3 2/4] nvme: initialize head before namespace

On Wed, Dec 06, 2023 at 09:12:42AM +0100, Daniel Wagner wrote:
> In preparation to use struct nvme_ns_head pointers instead of a struct
> nvme_ns pointers, initialize the head pointer before we create the disk.
> This allows us to attach the head as private data to the disk object.
>
> Signed-off-by: Daniel Wagner <[email protected]>
> ---
> drivers/nvme/host/core.c | 46 ++++++++++++++++++++++------------------
> 1 file changed, 25 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 900c045fcae0..1fabe1b81de0 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3474,10 +3474,11 @@ static int nvme_global_check_duplicate_ids(struct nvme_subsystem *this,
> return ret;
> }
>
> -static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
> +static int nvme_init_ns_head(struct nvme_ctrl *ctrl,
> + struct nvme_ns_info *info,
> + struct nvme_ns_head **head)

Can we just return the head or an ERR_PTR here instead of an additional
argument? That would also remove the need for the variable renaming
below.

I'd also rename the function to nvme_find_or_alloc_ns_head if you're at
it.

> + mutex_lock(&ctrl->subsys->lock);
> + list_add_tail_rcu(&ns->siblings, &ns->head->list);
> + mutex_unlock(&ctrl->subsys->lock);

This can't race with someone else adding the ns as all scanning is
from the scan work item. Maybe ad da comment on why this pattern is
safe? Because I think it wasn't when the code was originally added..

Otherwise this looks good to me.

2023-12-06 08:55:00

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v3 3/4] nvme: move ns id info to struct nvme_ns_head

On Wed, Dec 06, 2023 at 09:12:43AM +0100, Daniel Wagner wrote:
> Move the namesapce info to struct nvme_ns_head, because it's the same
> for all associated namespaces.
>
> The head pointer is accessible from the ns pointer so we could just
> update all places with ns->x to ns->head->x. While this is okay for the
> slow path,

Do you have any data to show that it matters? All the I/O command
setup functions already access the ns_head for ->ns_id, so looking
at more fields can't really make any difference.

If we have a good argument about reducing the pointer chasing I'm all
for it, but please as a separate, well-documented commit that also
explains the tradeoffs for the newly added lookups this adds in a few
places.

2023-12-06 09:00:51

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v3 4/4] nvme: add csi, ms and nuse to sysfs

On Wed, Dec 06, 2023 at 09:12:44AM +0100, Daniel Wagner wrote:
> libnvme is using the sysfs for enumarating the nvme resources. Though
> there are few missing attritbutes in the sysfs. For these libnvme issues
> commands during discovering.
>
> As the kernel already knows all these attributes and we would like to
> avoid libnvme to issue commands all the time, expose these missing
> attributes.
>
> The nuse value is updated on request because the nuse is a volatile
> value. Since any user can read the sysfs attribute, a very simple rate
> limit is added (update max every 5 seconds). A more sophisticated update
> strategy can be added later if there is actually a need for it.
>
> Signed-off-by: Daniel Wagner <[email protected]>
> ---
> drivers/nvme/host/core.c | 28 ++++++++++++++++++++++++++++
> drivers/nvme/host/nvme.h | 2 ++
> drivers/nvme/host/sysfs.c | 31 +++++++++++++++++++++++++++++++
> 3 files changed, 61 insertions(+)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index caa52c2f57c8..e7dd64ee1653 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1663,6 +1663,33 @@ static void nvme_ns_release(struct nvme_ns *ns)
> nvme_put_ns(ns);
> }
>
> +int nvme_ns_update_nuse(struct nvme_ns_head *head)
> +{
> + static DEFINE_RATELIMIT_STATE(_rs, 5 * HZ, 1);
> + struct nvme_id_ns *id;
> + struct nvme_ns *ns;
> + int srcu_idx, ret = -EWOULDBLOCK;
> +
> + if (__ratelimit(&_rs))
> + return 0;

Can you add a comment on the ratelimiting here?

> +
> + srcu_idx = srcu_read_lock(&head->srcu);
> + ns = nvme_find_path(head);
> + if (!ns)
> + goto out_unlock;
> +
> + ret = nvme_identify_ns(ns->ctrl, head->ns_id, &id);
> + if (ret)
> + goto out_unlock;
> +
> + head->nuse = le64_to_cpu(id->nuse);

This looks like the wrong thing to do for the non-multipath nodes,
which should be able to go straight to the ns.

I'd move this to sysfs.c, and then do a similar trick to say
nvme_send_pr_command to directly use the ns for the non-multipath
nodes, and do what you're doing here for the multipath nodes.

> static struct attribute *nvme_ns_id_attrs[] = {

And I duess the _id is not correct now, I'd just drop it.

2023-12-06 17:39:46

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v3 3/4] nvme: move ns id info to struct nvme_ns_head

Hi Daniel,

kernel test robot noticed the following build errors:

[auto build test ERROR on v6.7-rc4]
[also build test ERROR on linus/master next-20231206]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Daniel-Wagner/nvme-lookup-ctrl-from-request-instead-from-namespace/20231206-161455
base: v6.7-rc4
patch link: https://lore.kernel.org/r/20231206081244.32733-4-dwagner%40suse.de
patch subject: [PATCH v3 3/4] nvme: move ns id info to struct nvme_ns_head
config: arm64-defconfig (https://download.01.org/0day-ci/archive/20231207/[email protected]/config)
compiler: aarch64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231207/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

drivers/nvme/host/core.c: In function 'nvme_update_ns_info_generic':
>> drivers/nvme/host/core.c:2034:47: error: passing argument 1 of 'nvme_mpath_revalidate_paths' from incompatible pointer type [-Werror=incompatible-pointer-types]
2034 | nvme_mpath_revalidate_paths(ns->head);
| ~~^~~~~~
| |
| struct nvme_ns_head *
In file included from drivers/nvme/host/core.c:25:
drivers/nvme/host/nvme.h:945:64: note: expected 'struct nvme_ns *' but argument is of type 'struct nvme_ns_head *'
945 | static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
| ~~~~~~~~~~~~~~~~^~
drivers/nvme/host/core.c: In function 'nvme_update_ns_info_block':
drivers/nvme/host/core.c:2109:47: error: passing argument 1 of 'nvme_mpath_revalidate_paths' from incompatible pointer type [-Werror=incompatible-pointer-types]
2109 | nvme_mpath_revalidate_paths(ns->head);
| ~~^~~~~~
| |
| struct nvme_ns_head *
drivers/nvme/host/nvme.h:945:64: note: expected 'struct nvme_ns *' but argument is of type 'struct nvme_ns_head *'
945 | static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
| ~~~~~~~~~~~~~~~~^~
cc1: some warnings being treated as errors


vim +/nvme_mpath_revalidate_paths +2034 drivers/nvme/host/core.c

2022
2023 static int nvme_update_ns_info_generic(struct nvme_ns *ns,
2024 struct nvme_ns_info *info)
2025 {
2026 blk_mq_freeze_queue(ns->disk->queue);
2027 nvme_set_queue_limits(ns->ctrl, ns->queue);
2028 set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
2029 blk_mq_unfreeze_queue(ns->disk->queue);
2030
2031 if (nvme_ns_head_multipath(ns->head)) {
2032 blk_mq_freeze_queue(ns->head->disk->queue);
2033 set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
> 2034 nvme_mpath_revalidate_paths(ns->head);
2035 blk_stack_limits(&ns->head->disk->queue->limits,
2036 &ns->queue->limits, 0);
2037 ns->head->disk->flags |= GENHD_FL_HIDDEN;
2038 blk_mq_unfreeze_queue(ns->head->disk->queue);
2039 }
2040
2041 /* Hide the block-interface for these devices */
2042 ns->disk->flags |= GENHD_FL_HIDDEN;
2043 set_bit(NVME_NS_READY, &ns->flags);
2044
2045 return 0;
2046 }
2047

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-12-06 17:39:46

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v3 3/4] nvme: move ns id info to struct nvme_ns_head

Hi Daniel,

kernel test robot noticed the following build errors:

[auto build test ERROR on v6.7-rc4]
[also build test ERROR on linus/master next-20231206]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Daniel-Wagner/nvme-lookup-ctrl-from-request-instead-from-namespace/20231206-161455
base: v6.7-rc4
patch link: https://lore.kernel.org/r/20231206081244.32733-4-dwagner%40suse.de
patch subject: [PATCH v3 3/4] nvme: move ns id info to struct nvme_ns_head
config: i386-buildonly-randconfig-003-20231206 (https://download.01.org/0day-ci/archive/20231207/[email protected]/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231207/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

>> drivers/nvme/host/core.c:2034:31: error: incompatible pointer types passing 'struct nvme_ns_head *' to parameter of type 'struct nvme_ns *' [-Werror,-Wincompatible-pointer-types]
nvme_mpath_revalidate_paths(ns->head);
^~~~~~~~
drivers/nvme/host/nvme.h:945:64: note: passing argument to parameter 'ns' here
static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
^
drivers/nvme/host/core.c:2109:31: error: incompatible pointer types passing 'struct nvme_ns_head *' to parameter of type 'struct nvme_ns *' [-Werror,-Wincompatible-pointer-types]
nvme_mpath_revalidate_paths(ns->head);
^~~~~~~~
drivers/nvme/host/nvme.h:945:64: note: passing argument to parameter 'ns' here
static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
^
2 errors generated.


vim +2034 drivers/nvme/host/core.c

2022
2023 static int nvme_update_ns_info_generic(struct nvme_ns *ns,
2024 struct nvme_ns_info *info)
2025 {
2026 blk_mq_freeze_queue(ns->disk->queue);
2027 nvme_set_queue_limits(ns->ctrl, ns->queue);
2028 set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
2029 blk_mq_unfreeze_queue(ns->disk->queue);
2030
2031 if (nvme_ns_head_multipath(ns->head)) {
2032 blk_mq_freeze_queue(ns->head->disk->queue);
2033 set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
> 2034 nvme_mpath_revalidate_paths(ns->head);
2035 blk_stack_limits(&ns->head->disk->queue->limits,
2036 &ns->queue->limits, 0);
2037 ns->head->disk->flags |= GENHD_FL_HIDDEN;
2038 blk_mq_unfreeze_queue(ns->head->disk->queue);
2039 }
2040
2041 /* Hide the block-interface for these devices */
2042 ns->disk->flags |= GENHD_FL_HIDDEN;
2043 set_bit(NVME_NS_READY, &ns->flags);
2044
2045 return 0;
2046 }
2047

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-12-07 05:36:52

by Dan Carpenter

[permalink] [raw]
Subject: Re: [PATCH v3 3/4] nvme: move ns id info to struct nvme_ns_head

Hi Daniel,

kernel test robot noticed the following build warnings:

https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Daniel-Wagner/nvme-lookup-ctrl-from-request-instead-from-namespace/20231206-161455
base: v6.7-rc4
patch link: https://lore.kernel.org/r/20231206081244.32733-4-dwagner%40suse.de
patch subject: [PATCH v3 3/4] nvme: move ns id info to struct nvme_ns_head
config: i386-randconfig-141-20231207 (https://download.01.org/0day-ci/archive/20231207/[email protected]/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce: (https://download.01.org/0day-ci/archive/20231207/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Reported-by: Dan Carpenter <[email protected]>
| Closes: https://lore.kernel.org/r/[email protected]/

smatch warnings:
drivers/nvme/target/passthru.c:354 nvmet_passthru_execute_cmd() warn: variable dereferenced before check 'ns' (see line 342)

vim +/ns +354 drivers/nvme/target/passthru.c

c1fef73f793b7f Logan Gunthorpe 2020-07-24 292 static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
c1fef73f793b7f Logan Gunthorpe 2020-07-24 293 {
ab7a2737ac5acd Christoph Hellwig 2021-08-27 294 struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 295 struct request_queue *q = ctrl->admin_q;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 296 struct nvme_ns *ns = NULL;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 297 struct request *rq = NULL;
47e9730c26a4a5 Chaitanya Kulkarni 2020-11-09 298 unsigned int timeout;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 299 u32 effects;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 300 u16 status;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 301 int ret;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 302
c1fef73f793b7f Logan Gunthorpe 2020-07-24 303 if (likely(req->sq->qid != 0)) {
c1fef73f793b7f Logan Gunthorpe 2020-07-24 304 u32 nsid = le32_to_cpu(req->cmd->common.nsid);
c1fef73f793b7f Logan Gunthorpe 2020-07-24 305
c1fef73f793b7f Logan Gunthorpe 2020-07-24 306 ns = nvme_find_get_ns(ctrl, nsid);
c1fef73f793b7f Logan Gunthorpe 2020-07-24 307 if (unlikely(!ns)) {
c1fef73f793b7f Logan Gunthorpe 2020-07-24 308 pr_err("failed to get passthru ns nsid:%u\n", nsid);
c1fef73f793b7f Logan Gunthorpe 2020-07-24 309 status = NVME_SC_INVALID_NS | NVME_SC_DNR;
4db69a3d7cfe31 Chaitanya Kulkarni 2020-08-06 310 goto out;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 311 }
c1fef73f793b7f Logan Gunthorpe 2020-07-24 312
c1fef73f793b7f Logan Gunthorpe 2020-07-24 313 q = ns->queue;
20c2c3bb83f26c Chaitanya Kulkarni 2021-02-09 314 timeout = nvmet_req_subsys(req)->io_timeout;
a2f6a2b8ce43db Chaitanya Kulkarni 2020-11-09 315 } else {
20c2c3bb83f26c Chaitanya Kulkarni 2021-02-09 316 timeout = nvmet_req_subsys(req)->admin_timeout;

ns is NULL here

c1fef73f793b7f Logan Gunthorpe 2020-07-24 317 }
c1fef73f793b7f Logan Gunthorpe 2020-07-24 318
e559398f47e090 Christoph Hellwig 2022-03-15 319 rq = blk_mq_alloc_request(q, nvme_req_op(req->cmd), 0);
c1fef73f793b7f Logan Gunthorpe 2020-07-24 320 if (IS_ERR(rq)) {
c1fef73f793b7f Logan Gunthorpe 2020-07-24 321 status = NVME_SC_INTERNAL;
4db69a3d7cfe31 Chaitanya Kulkarni 2020-08-06 322 goto out_put_ns;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 323 }
e559398f47e090 Christoph Hellwig 2022-03-15 324 nvme_init_request(rq, req->cmd);
c1fef73f793b7f Logan Gunthorpe 2020-07-24 325
a2f6a2b8ce43db Chaitanya Kulkarni 2020-11-09 326 if (timeout)
a2f6a2b8ce43db Chaitanya Kulkarni 2020-11-09 327 rq->timeout = timeout;
a2f6a2b8ce43db Chaitanya Kulkarni 2020-11-09 328
c1fef73f793b7f Logan Gunthorpe 2020-07-24 329 if (req->sg_cnt) {
c1fef73f793b7f Logan Gunthorpe 2020-07-24 330 ret = nvmet_passthru_map_sg(req, rq);
c1fef73f793b7f Logan Gunthorpe 2020-07-24 331 if (unlikely(ret)) {
c1fef73f793b7f Logan Gunthorpe 2020-07-24 332 status = NVME_SC_INTERNAL;
a2138fd49467d0 Chaitanya Kulkarni 2020-08-06 333 goto out_put_req;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 334 }
c1fef73f793b7f Logan Gunthorpe 2020-07-24 335 }
c1fef73f793b7f Logan Gunthorpe 2020-07-24 336
c1fef73f793b7f Logan Gunthorpe 2020-07-24 337 /*
2a459f6933e1c4 Christoph Hellwig 2022-12-21 338 * If a command needs post-execution fixups, or there are any
2a459f6933e1c4 Christoph Hellwig 2022-12-21 339 * non-trivial effects, make sure to execute the command synchronously
2a459f6933e1c4 Christoph Hellwig 2022-12-21 340 * in a workqueue so that nvme_passthru_end gets called.
c1fef73f793b7f Logan Gunthorpe 2020-07-24 341 */
a754bb00c0d393 Daniel Wagner 2023-12-06 @342 effects = nvme_command_effects(ctrl, ns->head, req->cmd->common.opcode);
^^^^^^^^
Unchecked dereference

2a459f6933e1c4 Christoph Hellwig 2022-12-21 343 if (req->p.use_workqueue ||
2a459f6933e1c4 Christoph Hellwig 2022-12-21 344 (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))) {
c1fef73f793b7f Logan Gunthorpe 2020-07-24 345 INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work);
c1fef73f793b7f Logan Gunthorpe 2020-07-24 346 req->p.rq = rq;
8832cf922151e9 Sagi Grimberg 2022-03-21 347 queue_work(nvmet_wq, &req->p.work);
c1fef73f793b7f Logan Gunthorpe 2020-07-24 348 } else {
e2e530867245d0 Christoph Hellwig 2022-05-24 349 rq->end_io = nvmet_passthru_req_done;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 350 rq->end_io_data = req;
e2e530867245d0 Christoph Hellwig 2022-05-24 351 blk_execute_rq_nowait(rq, false);
c1fef73f793b7f Logan Gunthorpe 2020-07-24 352 }
c1fef73f793b7f Logan Gunthorpe 2020-07-24 353
c1fef73f793b7f Logan Gunthorpe 2020-07-24 @354 if (ns)

The rest of the code checks

c1fef73f793b7f Logan Gunthorpe 2020-07-24 355 nvme_put_ns(ns);
c1fef73f793b7f Logan Gunthorpe 2020-07-24 356
c1fef73f793b7f Logan Gunthorpe 2020-07-24 357 return;
c1fef73f793b7f Logan Gunthorpe 2020-07-24 358
a2138fd49467d0 Chaitanya Kulkarni 2020-08-06 359 out_put_req:
7ee51cf60a90c2 Chaitanya Kulkarni 2020-08-06 360 blk_mq_free_request(rq);
4db69a3d7cfe31 Chaitanya Kulkarni 2020-08-06 361 out_put_ns:
c1fef73f793b7f Logan Gunthorpe 2020-07-24 362 if (ns)
c1fef73f793b7f Logan Gunthorpe 2020-07-24 363 nvme_put_ns(ns);
4db69a3d7cfe31 Chaitanya Kulkarni 2020-08-06 364 out:
c1fef73f793b7f Logan Gunthorpe 2020-07-24 365 nvmet_req_complete(req, status);
c1fef73f793b7f Logan Gunthorpe 2020-07-24 366 }

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-12-07 10:53:25

by Daniel Wagner

[permalink] [raw]
Subject: Re: [PATCH v3 3/4] nvme: move ns id info to struct nvme_ns_head

On Wed, Dec 06, 2023 at 09:54:36AM +0100, Christoph Hellwig wrote:
> On Wed, Dec 06, 2023 at 09:12:43AM +0100, Daniel Wagner wrote:
> > Move the namesapce info to struct nvme_ns_head, because it's the same
> > for all associated namespaces.
> >
> > The head pointer is accessible from the ns pointer so we could just
> > update all places with ns->x to ns->head->x. While this is okay for the
> > slow path,
>
> Do you have any data to show that it matters? All the I/O command
> setup functions already access the ns_head for ->ns_id, so looking
> at more fields can't really make any difference.

I've splitted the patch so that the first patch just moves the variables
around and changes ns->x to ns->head->x ('patched'). Then I changed the
layout of nvme_ns_head, so that all variables used in nvme_setup_rw()
are in one cacheline ('cache line optimized') and the last change is
passing the nvme_ns_head pointer around ('use nvme_ns_head directly')

I assume that nvme_setup_rw is the function which is used most and thus
I've tried to benchmark nvme_setup_rw with issuing 4k reads. I am sure
my benchmark setup is not perfect but well that's what I have.

Anyway, the results are pointing towards that moving the variables to
nvme_ns_head has a slight performance impact but that can be more than
mitigated by optimizing the cacheline access. The change to use
nvme_ns_head directly seems to eat up all the cacheline optimization
gains again.

'patched' layout:

struct nvme_ns_head {
struct list_head list; /* 0 16 */
struct srcu_struct srcu; /* 16 72 */
/* --- cacheline 1 boundary (64 bytes) was 24 bytes ago --- */
struct nvme_subsystem * subsys; /* 88 8 */
unsigned int ns_id; /* 96 4 */
struct nvme_ns_ids ids; /* 100 41 */

/* XXX 3 bytes hole, try to pack */

/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
struct list_head entry; /* 144 16 */
struct kref ref; /* 160 4 */
bool shared; /* 164 1 */

/* XXX 3 bytes hole, try to pack */

int instance; /* 168 4 */

/* XXX 4 bytes hole, try to pack */

struct nvme_effects_log * effects; /* 176 8 */
int lba_shift; /* 184 4 */
u16 ms; /* 188 2 */
u16 pi_size; /* 190 2 */
/* --- cacheline 3 boundary (192 bytes) --- */
u16 sgs; /* 192 2 */

/* XXX 2 bytes hole, try to pack */

u32 sws; /* 196 4 */
u64 nuse; /* 200 8 */
u8 pi_type; /* 208 1 */
u8 guard_type; /* 209 1 */

/* XXX 6 bytes hole, try to pack */

u64 zsze; /* 216 8 */
unsigned long features; /* 224 8 */
struct ratelimit_state rs_nuse; /* 232 104 */
/* --- cacheline 5 boundary (320 bytes) was 16 bytes ago --- */

[...]
}


'cacheline optimized' layout:

struct nvme_ns_head {
struct list_head list; /* 0 16 */
struct srcu_struct srcu; /* 16 72 */
/* --- cacheline 1 boundary (64 bytes) was 24 bytes ago --- */
struct nvme_subsystem * subsys; /* 88 8 */
struct nvme_ns_ids ids; /* 96 41 */

/* XXX 7 bytes hole, try to pack */

/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
struct list_head entry; /* 144 16 */
struct kref ref; /* 160 4 */
bool shared; /* 164 1 */

/* XXX 3 bytes hole, try to pack */

int instance; /* 168 4 */

/* XXX 4 bytes hole, try to pack */

struct nvme_effects_log * effects; /* 176 8 */
u64 nuse; /* 184 8 */
/* --- cacheline 3 boundary (192 bytes) --- */
unsigned int ns_id; /* 192 4 */
int lba_shift; /* 196 4 */
u16 ms; /* 200 2 */
u16 pi_size; /* 202 2 */
u8 pi_type; /* 204 1 */
u8 guard_type; /* 205 1 */
u16 sgs; /* 206 2 */
u32 sws; /* 208 4 */
[...]
}

fio test job:
[global]
name=nvme-read
time_based
ramp_time=30
runtime=120
readwrite=read
bs=4k
ioengine=io_uring
direct=1
numjobs=4
iodepth=64
group_reporting=1[nvme0]
new_group
filename=/dev/nvme0n1
cpus_allowed=1-4
cpus_allowed_policy=split

[nvme0]
new_group
filename=/dev/nvme0n1

bandwidth
'baseline' 'patched' 'cache line optimized' 'use nvme_ns_head directly'
1608 1632 1613 1618
1608 1610 1634 1618
1623 1639 1642 1646
1638 1610 1640 1619
1637 1611 1642 1620
avg 1622.8 1620.4 1634.2 1624.2
stdev 14.75 14.01 12.29 12.21

ios
'baseline' 'patched' 'cache line optimized' 'use nvme_ns_head directly'
65626946 66735998 66268893 66458877
65641469 66041634 66888910 66384526
66012335 66904002 67132768 67329550
66589757 66013222 67132053 66491121
66569213 66033040 67132075 66474708
avg 66087944 66345579.2 66910939.8 66627756.4
stdev 474608.24 437260.67 374068.50 394426.34