2024-04-26 10:55:21

by kwb

[permalink] [raw]
Subject: [PATCH] bug-fix: nvme connect deadlock, bug report: https://bugzilla.kernel.org/show_bug.cgi?id=218777

---
drivers/nvme/host/core.c | 16 ++++++++--------
drivers/nvme/host/fabrics.c | 12 ++++++------
drivers/nvme/host/fabrics.h | 16 +++++++---------
drivers/nvme/host/fc.c | 4 ++--
drivers/nvme/host/nvme.h | 8 ++++----
drivers/nvme/host/pci.c | 6 +++---
drivers/nvme/host/rdma.c | 4 ++--
drivers/nvme/host/tcp.c | 4 ++--
drivers/nvme/target/loop.c | 4 ++--
9 files changed, 36 insertions(+), 38 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 6aaecf2ecf97..761bc44527a3 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2138,7 +2138,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
int ret;

- while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
+ while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts, BLK_MQ_REQ_RESERVED)) == 0) {
if (csts == ~0)
return -ENODEV;
if ((csts & NVME_CSTS_RDY) == bit)
@@ -2171,7 +2171,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
ctrl->ctrl_config &= ~NVME_CC_ENABLE;

- ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config, BLK_MQ_REQ_RESERVED);
if (ret)
return ret;

@@ -2187,7 +2187,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
unsigned dev_page_min;
int ret;

- ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
+ ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap, BLK_MQ_REQ_RESERVED);
if (ret) {
dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
return ret;
@@ -2210,7 +2210,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
ctrl->ctrl_config |= NVME_CC_ENABLE;

- ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config, BLK_MQ_REQ_RESERVED);
if (ret)
return ret;
return nvme_wait_ready(ctrl, ctrl->cap, true);
@@ -2226,11 +2226,11 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;

- ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config, BLK_MQ_REQ_RESERVED);
if (ret)
return ret;

- while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
+ while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts, BLK_MQ_REQ_RESERVED)) == 0) {
if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
break;

@@ -3070,7 +3070,7 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
{
int ret;

- ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
+ ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs, 0);
if (ret) {
dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
return ret;
@@ -4331,7 +4331,7 @@ static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)

u32 csts;

- if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
+ if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts, 0))
return false;

if (csts == ~0)
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 668c6bb7a567..5d18822edd0a 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -142,7 +142,7 @@ EXPORT_SYMBOL_GPL(nvmf_get_address);
* > 0: NVMe error status code
* < 0: Linux errno error code
*/
-int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
+int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val, blk_mq_req_flags_t flags)
{
struct nvme_command cmd;
union nvme_result res;
@@ -154,7 +154,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
cmd.prop_get.offset = cpu_to_le32(off);

ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0,
- NVME_QID_ANY, 0, 0);
+ NVME_QID_ANY, 0, flags);

if (ret >= 0)
*val = le64_to_cpu(res.u64);
@@ -188,7 +188,7 @@ EXPORT_SYMBOL_GPL(nvmf_reg_read32);
* > 0: NVMe error status code
* < 0: Linux errno error code
*/
-int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
+int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val, blk_mq_req_flags_t flags)
{
struct nvme_command cmd = { };
union nvme_result res;
@@ -200,7 +200,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
cmd.prop_get.offset = cpu_to_le32(off);

ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0,
- NVME_QID_ANY, 0, 0);
+ NVME_QID_ANY, 0, flags);

if (ret >= 0)
*val = le64_to_cpu(res.u64);
@@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(nvmf_reg_read64);
* > 0: NVMe error status code
* < 0: Linux errno error code
*/
-int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
+int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val, blk_mq_req_flags_t flags)
{
struct nvme_command cmd = { };
int ret;
@@ -245,7 +245,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
cmd.prop_set.value = cpu_to_le64(val);

ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0, 0,
- NVME_QID_ANY, 0, 0);
+ NVME_QID_ANY, 0, flags);
if (unlikely(ret))
dev_err(ctrl->device,
"Property Set error: %d, offset %#x\n",
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 561c2abd3892..3f9ed5392e36 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -18,12 +18,10 @@
/* default is -1: the fail fast mechanism is disabled */
#define NVMF_DEF_FAIL_FAST_TMO -1

-/*
- * Reserved one command for internal usage. This command is used for sending
- * the connect command, as well as for the keep alive command on the admin
- * queue once live.
- */
-#define NVMF_RESERVED_TAGS 1
+/* Reserved for connect */
+#define NVMF_IO_RESERVED_TAGS 1
+/* Reserved for connect and keep alive and reset/delete */
+#define NVMF_ADMIN_RESERVED_TAGS 3

/*
* Define a host as seen by the target. We allocate one at boot, but also
@@ -179,9 +177,9 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
return true;
}

-int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
-int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
-int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
+int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val, blk_mq_req_flags_t flags);
+int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val, blk_mq_req_flags_t falgs);
+int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val, blk_mq_req_flags_t flags);
int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
int nvmf_register_transport(struct nvmf_transport_ops *ops);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index aa14ad963d91..36834408caf0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2876,7 +2876,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
ctrl->tag_set.ops = &nvme_fc_mq_ops;
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
- ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS;
+ ctrl->tag_set.reserved_tags = NVMF_IO_RESERVED_TAGS;
ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ctrl->tag_set.cmd_size =
@@ -3510,7 +3510,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
- ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS;
+ ctrl->admin_tag_set.reserved_tags = NVMF_ADMIN_RESERVED_TAGS;
ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->admin_tag_set.cmd_size =
struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d94774cc52bc..b7577156ed80 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -493,9 +493,9 @@ struct nvme_ctrl_ops {
#define NVME_F_FABRICS (1 << 0)
#define NVME_F_METADATA_SUPPORTED (1 << 1)
#define NVME_F_PCI_P2PDMA (1 << 2)
- int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
- int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
- int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
+ int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val, blk_mq_req_flags_t flags);
+ int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val, blk_mq_req_flags_t flags);
+ int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val, blk_mq_req_flags_t flags);
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
@@ -566,7 +566,7 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
{
if (!ctrl->subsystem)
return -ENOTTY;
- return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
+ return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65, BLK_MQ_REQ_RESERVED);
}

/*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d820131d39b2..b614273029fc 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2850,19 +2850,19 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
nvme_put_ctrl(&dev->ctrl);
}

-static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
+static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val, blk_mq_req_flags_t flags)
{
*val = readl(to_nvme_dev(ctrl)->bar + off);
return 0;
}

-static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
+static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val, blk_mq_req_flags_t flags)
{
writel(val, to_nvme_dev(ctrl)->bar + off);
return 0;
}

-static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
+static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val, blk_mq_req_flags_t flags)
{
*val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off);
return 0;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index a49061f2afce..20fedf17166b 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -801,7 +801,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
memset(set, 0, sizeof(*set));
set->ops = &nvme_rdma_admin_mq_ops;
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
- set->reserved_tags = NVMF_RESERVED_TAGS;
+ set->reserved_tags = NVMF_ADMIN_RESERVED_TAGS;
set->numa_node = nctrl->numa_node;
set->cmd_size = sizeof(struct nvme_rdma_request) +
NVME_RDMA_DATA_SGL_SIZE;
@@ -814,7 +814,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
memset(set, 0, sizeof(*set));
set->ops = &nvme_rdma_mq_ops;
set->queue_depth = nctrl->sqsize + 1;
- set->reserved_tags = NVMF_RESERVED_TAGS;
+ set->reserved_tags = NVMF_IO_RESERVED_TAGS;
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->cmd_size = sizeof(struct nvme_rdma_request) +
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 2d81db71aaa1..df7acb44b20c 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1667,7 +1667,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
memset(set, 0, sizeof(*set));
set->ops = &nvme_tcp_admin_mq_ops;
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
- set->reserved_tags = NVMF_RESERVED_TAGS;
+ set->reserved_tags = NVMF_ADMIN_RESERVED_TAGS;
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request);
@@ -1679,7 +1679,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
memset(set, 0, sizeof(*set));
set->ops = &nvme_tcp_mq_ops;
set->queue_depth = nctrl->sqsize + 1;
- set->reserved_tags = NVMF_RESERVED_TAGS;
+ set->reserved_tags = NVMF_IO_RESERVED_TAGS;
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 2553f487c9f2..9a591ab4c810 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -353,7 +353,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
- ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS;
+ ctrl->admin_tag_set.reserved_tags = NVMF_ADMIN_RESERVED_TAGS;
ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
@@ -527,7 +527,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
ctrl->tag_set.ops = &nvme_loop_mq_ops;
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
- ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS;
+ ctrl->tag_set.reserved_tags = NVMF_IO_RESERVED_TAGS;
ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
--
2.36.0