2023-05-04 09:15:21

by Daniel Wagner

[permalink] [raw]
Subject: [RFC v3 0/9] Unifying fabrics drivers

I've rebased on nvme-6.4 and gave it a bit of testing. Up to the last patch it seems to
work fine, at least blktests doesn't trigger any errors.

I've tried to split the pure moving code around part from the part where we need
to change some code. That means the last two patches highlight the problems I
run into with this attempt to implement the setup_transport() callback

changes:
v2:
- move the complete ctrl state machine to fabrics.c
- https://lore.kernel.org/linux-nvme/[email protected]/

v1:
- https://lore.kernel.org/linux-nvme/[email protected]/


Daniel Wagner (9):
nvme-rdma: stream line queue functions arguments
nvme-rdma: factor rdma specific queue init code out
nvme-tcp: move error and connect work to nvme_ctrl
nvme-rdma: use error and connect work from nvme_ctrl
nvme-fabrics: add fabric state machine
nvme-tcp: replace state machine with generic one
nvme-rdma: replace state machine with generic one
nvme: move queue flags to middle layer
nvme: introduce setup_transport()

drivers/nvme/host/fabrics.c | 513 +++++++++++++++++++++++++
drivers/nvme/host/fabrics.h | 24 ++
drivers/nvme/host/nvme.h | 19 +
drivers/nvme/host/rdma.c | 720 ++++++++++--------------------------
drivers/nvme/host/tcp.c | 706 ++++++++---------------------------
5 files changed, 913 insertions(+), 1069 deletions(-)

--
2.40.0


2023-05-04 09:15:33

by Daniel Wagner

[permalink] [raw]
Subject: [RFC v3 1/9] nvme-rdma: stream line queue functions arguments

In preparation to move common code from the fabrics driver to fabrics.c,
we stream line the low level functions. This allows any common code just
to pass in nvme subsystem global types, such as 'struct nvme_ctrl'
instead of the driver specialized types 'struct nvme_rdma_ctrl'.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/rdma.c | 62 ++++++++++++++++++++++++++--------------
1 file changed, 40 insertions(+), 22 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 0eb79696fb73..92e5d0ccf3a9 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -132,6 +132,11 @@ static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
}

+static inline int nvme_rdma_queue_id(struct nvme_rdma_queue *queue)
+{
+ return queue - queue->ctrl->queues;
+}
+
static LIST_HEAD(device_list);
static DEFINE_MUTEX(device_list_mutex);

@@ -566,13 +571,19 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
return ret;
}

-static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
- int idx, size_t queue_size)
+static int nvme_rdma_alloc_queue(struct nvme_ctrl *nctrl, int idx)
{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
struct nvme_rdma_queue *queue;
struct sockaddr *src_addr = NULL;
+ size_t queue_size;
int ret;

+ if (idx == 0)
+ queue_size = NVME_AQ_DEPTH;
+ else
+ queue_size = ctrl->ctrl.sqsize + 1;
+
queue = &ctrl->queues[idx];
mutex_init(&queue->queue_lock);
queue->ctrl = ctrl;
@@ -636,16 +647,22 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
ib_drain_qp(queue->qp);
}

-static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
+static void nvme_rdma_stop_queue(struct nvme_ctrl *nctrl, int qid)
{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+ struct nvme_rdma_queue *queue = &ctrl->queues[qid];
+
mutex_lock(&queue->queue_lock);
if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
__nvme_rdma_stop_queue(queue);
mutex_unlock(&queue->queue_lock);
}

-static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
+static void nvme_rdma_free_queue(struct nvme_ctrl *nctrl, int qid)
{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+ struct nvme_rdma_queue *queue = &ctrl->queues[qid];
+
if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
return;

@@ -659,7 +676,7 @@ static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
int i;

for (i = 1; i < ctrl->ctrl.queue_count; i++)
- nvme_rdma_free_queue(&ctrl->queues[i]);
+ nvme_rdma_free_queue(&ctrl->ctrl, i);
}

static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
@@ -667,18 +684,19 @@ static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
int i;

for (i = 1; i < ctrl->ctrl.queue_count; i++)
- nvme_rdma_stop_queue(&ctrl->queues[i]);
+ nvme_rdma_stop_queue(&ctrl->ctrl, i);
}

-static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
+static int nvme_rdma_start_queue(struct nvme_ctrl *nctrl, int idx)
{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
struct nvme_rdma_queue *queue = &ctrl->queues[idx];
int ret;

if (idx)
- ret = nvmf_connect_io_queue(&ctrl->ctrl, idx);
+ ret = nvmf_connect_io_queue(nctrl, idx);
else
- ret = nvmf_connect_admin_queue(&ctrl->ctrl);
+ ret = nvmf_connect_admin_queue(nctrl);

if (!ret) {
set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
@@ -697,7 +715,7 @@ static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl,
int i, ret = 0;

for (i = first; i < last; i++) {
- ret = nvme_rdma_start_queue(ctrl, i);
+ ret = nvme_rdma_start_queue(&ctrl->ctrl, i);
if (ret)
goto out_stop_queues;
}
@@ -706,7 +724,7 @@ static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl,

out_stop_queues:
for (i--; i >= first; i--)
- nvme_rdma_stop_queue(&ctrl->queues[i]);
+ nvme_rdma_stop_queue(&ctrl->ctrl, i);
return ret;
}

@@ -768,8 +786,7 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
}

for (i = 1; i < ctrl->ctrl.queue_count; i++) {
- ret = nvme_rdma_alloc_queue(ctrl, i,
- ctrl->ctrl.sqsize + 1);
+ ret = nvme_rdma_alloc_queue(&ctrl->ctrl, i);
if (ret)
goto out_free_queues;
}
@@ -778,7 +795,7 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)

out_free_queues:
for (i--; i >= 1; i--)
- nvme_rdma_free_queue(&ctrl->queues[i]);
+ nvme_rdma_free_queue(&ctrl->ctrl, i);

return ret;
}
@@ -806,7 +823,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl)
sizeof(struct nvme_command), DMA_TO_DEVICE);
ctrl->async_event_sqe.data = NULL;
}
- nvme_rdma_free_queue(&ctrl->queues[0]);
+ nvme_rdma_free_queue(&ctrl->ctrl, 0);
}

static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
@@ -815,7 +832,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool pi_capable = false;
int error;

- error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
+ error = nvme_rdma_alloc_queue(&ctrl->ctrl, 0);
if (error)
return error;

@@ -850,7 +867,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,

}

- error = nvme_rdma_start_queue(ctrl, 0);
+ error = nvme_rdma_start_queue(&ctrl->ctrl, 0);
if (error)
goto out_remove_admin_tag_set;

@@ -877,7 +894,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q);
out_stop_queue:
- nvme_rdma_stop_queue(&ctrl->queues[0]);
+ nvme_rdma_stop_queue(&ctrl->ctrl, 0);
nvme_cancel_admin_tagset(&ctrl->ctrl);
out_remove_admin_tag_set:
if (new)
@@ -889,7 +906,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->async_event_sqe.data = NULL;
}
out_free_queue:
- nvme_rdma_free_queue(&ctrl->queues[0]);
+ nvme_rdma_free_queue(&ctrl->ctrl, 0);
return error;
}

@@ -962,7 +979,7 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
{
nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q);
- nvme_rdma_stop_queue(&ctrl->queues[0]);
+ nvme_rdma_stop_queue(&ctrl->ctrl, 0);
nvme_cancel_admin_tagset(&ctrl->ctrl);
if (remove) {
nvme_unquiesce_admin_queue(&ctrl->ctrl);
@@ -1113,7 +1130,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
destroy_admin:
nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q);
- nvme_rdma_stop_queue(&ctrl->queues[0]);
+ nvme_rdma_stop_queue(&ctrl->ctrl, 0);
nvme_cancel_admin_tagset(&ctrl->ctrl);
if (new)
nvme_remove_admin_tag_set(&ctrl->ctrl);
@@ -1960,9 +1977,10 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
static void nvme_rdma_complete_timed_out(struct request *rq)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
struct nvme_rdma_queue *queue = req->queue;

- nvme_rdma_stop_queue(queue);
+ nvme_rdma_stop_queue(ctrl, nvme_rdma_queue_id(queue));
nvmf_complete_timed_out_request(rq);
}

--
2.40.0

2023-05-04 09:15:58

by Daniel Wagner

[permalink] [raw]
Subject: [RFC v3 3/9] nvme-tcp: move error and connect work to nvme_ctrl

Move common data structures for fabrics to nvme_ctrl so that we are able
to use them in fabrcis.c later.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/nvme.h | 3 +++
drivers/nvme/host/tcp.c | 24 ++++++++++--------------
2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bf46f122e9e1..5aa30b00dd17 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -339,6 +339,9 @@ struct nvme_ctrl {
struct work_struct ana_work;
#endif

+ struct work_struct err_work;
+ struct delayed_work connect_work;
+
#ifdef CONFIG_NVME_AUTH
struct work_struct dhchap_auth_work;
struct mutex dhchap_auth_mutex;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 273c1f2760a4..74ccc84d244a 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -166,8 +166,6 @@ struct nvme_tcp_ctrl {
struct sockaddr_storage src_addr;
struct nvme_ctrl ctrl;

- struct work_struct err_work;
- struct delayed_work connect_work;
struct nvme_tcp_request async_req;
u32 io_queues[HCTX_MAX_TYPES];
};
@@ -527,7 +525,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
return;

dev_warn(ctrl->device, "starting error recovery\n");
- queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
+ queue_work(nvme_reset_wq, &ctrl->err_work);
}

static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
@@ -2025,7 +2023,7 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
if (nvmf_should_reconnect(ctrl)) {
dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
ctrl->opts->reconnect_delay);
- queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
+ queue_delayed_work(nvme_wq, &ctrl->connect_work,
ctrl->opts->reconnect_delay * HZ);
} else {
dev_info(ctrl->device, "Removing controller...\n");
@@ -2107,9 +2105,8 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)

static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
{
- struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
- struct nvme_tcp_ctrl, connect_work);
- struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+ struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+ struct nvme_ctrl, connect_work);

++ctrl->nr_reconnects;

@@ -2131,9 +2128,8 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)

static void nvme_tcp_error_recovery_work(struct work_struct *work)
{
- struct nvme_tcp_ctrl *tcp_ctrl = container_of(work,
- struct nvme_tcp_ctrl, err_work);
- struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+ struct nvme_ctrl *ctrl = container_of(work,
+ struct nvme_ctrl, err_work);

nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
@@ -2194,8 +2190,8 @@ static void nvme_reset_ctrl_work(struct work_struct *work)

static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
{
- flush_work(&to_tcp_ctrl(ctrl)->err_work);
- cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
+ flush_work(&ctrl->err_work);
+ cancel_delayed_work_sync(&ctrl->connect_work);
}

static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
@@ -2581,9 +2577,9 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;

- INIT_DELAYED_WORK(&ctrl->connect_work,
+ INIT_DELAYED_WORK(&ctrl->ctrl.connect_work,
nvme_tcp_reconnect_ctrl_work);
- INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work);
+ INIT_WORK(&ctrl->ctrl.err_work, nvme_tcp_error_recovery_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);

if (!(opts->mask & NVMF_OPT_TRSVCID)) {
--
2.40.0

2023-05-04 09:16:03

by Daniel Wagner

[permalink] [raw]
Subject: [RFC v3 4/9] nvme-rdma: use error and connect work from nvme_ctrl

Use common data structures from nvme_ctrl so that we are able to use
them in fabrics.c later.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/rdma.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index a78c66278b19..b0ab5a9d5fe0 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -106,12 +106,9 @@ struct nvme_rdma_ctrl {

/* other member variables */
struct blk_mq_tag_set tag_set;
- struct work_struct err_work;

struct nvme_rdma_qe async_event_sqe;

- struct delayed_work reconnect_work;
-
struct list_head list;

struct blk_mq_tag_set admin_tag_set;
@@ -1036,8 +1033,8 @@ static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);

- flush_work(&ctrl->err_work);
- cancel_delayed_work_sync(&ctrl->reconnect_work);
+ flush_work(&ctrl->ctrl.err_work);
+ cancel_delayed_work_sync(&ctrl->ctrl.connect_work);
}

static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
@@ -1069,7 +1066,7 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
if (nvmf_should_reconnect(&ctrl->ctrl)) {
dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
ctrl->ctrl.opts->reconnect_delay);
- queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
+ queue_delayed_work(nvme_wq, &ctrl->ctrl.connect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
nvme_delete_ctrl(&ctrl->ctrl);
@@ -1167,8 +1164,9 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)

static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
{
- struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
- struct nvme_rdma_ctrl, reconnect_work);
+ struct nvme_ctrl *nctrl = container_of(to_delayed_work(work),
+ struct nvme_ctrl, connect_work);
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);

++ctrl->ctrl.nr_reconnects;

@@ -1190,8 +1188,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)

static void nvme_rdma_error_recovery_work(struct work_struct *work)
{
- struct nvme_rdma_ctrl *ctrl = container_of(work,
- struct nvme_rdma_ctrl, err_work);
+ struct nvme_ctrl *nctrl = container_of(work,
+ struct nvme_ctrl, err_work);
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);

nvme_stop_keep_alive(&ctrl->ctrl);
flush_work(&ctrl->ctrl.async_event_work);
@@ -1217,7 +1216,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
return;

dev_warn(ctrl->ctrl.device, "starting error recovery\n");
- queue_work(nvme_reset_wq, &ctrl->err_work);
+ queue_work(nvme_reset_wq, &ctrl->ctrl.err_work);
}

static void nvme_rdma_end_request(struct nvme_rdma_request *req)
@@ -2369,9 +2368,9 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
goto out_free_ctrl;
}

- INIT_DELAYED_WORK(&ctrl->reconnect_work,
+ INIT_DELAYED_WORK(&ctrl->ctrl.connect_work,
nvme_rdma_reconnect_ctrl_work);
- INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
+ INIT_WORK(&ctrl->ctrl.err_work, nvme_rdma_error_recovery_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);

ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
--
2.40.0

2023-05-04 09:16:15

by Daniel Wagner

[permalink] [raw]
Subject: [RFC v3 5/9] nvme-fabrics: add fabric state machine

The transports are sharing a lot of common code for the state machine.
Add a generic static machine based on tcp transport.

In this first step additional callbacks such as alloc_admin_tag_set()
are in the callback API. These will be remove later again. This is just
a for making the areas where more than moving code around is necessary
a bit easier to review.

This is approach just for discussion purpose and the proper series
wont have these intermediate steps. I suppose, the later steps in
this series would go in before the main state machine.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/fabrics.c | 426 ++++++++++++++++++++++++++++++++++++
drivers/nvme/host/fabrics.h | 25 +++
drivers/nvme/host/nvme.h | 3 +
3 files changed, 454 insertions(+)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index bbaa04a0c502..3d2cde17338d 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -1134,6 +1134,432 @@ nvmf_create_ctrl(struct device *dev, const char *buf)
return ERR_PTR(ret);
}

+static int nvmf_start_io_queues(struct nvme_ctrl *ctrl,
+ int first, int last)
+{
+ int i, ret;
+
+ for (i = first; i < last; i++) {
+ ret = ctrl->fabrics_ops->start_io_queue(ctrl, i);
+ if (ret)
+ goto out_stop_queues;
+ }
+
+ return 0;
+
+out_stop_queues:
+ for (i--; i >= first; i--)
+ ctrl->fabrics_ops->stop_io_queue(ctrl, i);
+ return ret;
+}
+
+static void nvmf_stop_io_queues(struct nvme_ctrl *ctrl)
+{
+ int i;
+
+ for (i = 1; i < ctrl->queue_count; i++)
+ ctrl->fabrics_ops->stop_io_queue(ctrl, i);
+}
+
+static int __nvmf_alloc_io_queues(struct nvme_ctrl *ctrl)
+{
+ int i, ret;
+
+ for (i = 1; i < ctrl->queue_count; i++) {
+ ret = ctrl->fabrics_ops->alloc_io_queue(ctrl, i);
+ if (ret)
+ goto out_free_queues;
+ }
+
+ return 0;
+
+out_free_queues:
+ for (i--; i >= 1; i--)
+ ctrl->fabrics_ops->free_io_queue(ctrl, i);
+
+ return ret;
+}
+
+static int nvmf_alloc_io_queues(struct nvme_ctrl *ctrl)
+{
+ unsigned int nr_io_queues;
+ int ret;
+
+ nr_io_queues = ctrl->fabrics_ops->nr_io_queues(ctrl);
+ ret = nvme_set_queue_count(ctrl, &nr_io_queues);
+ if (ret)
+ return ret;
+
+ if (nr_io_queues == 0) {
+ dev_err(ctrl->device,
+ "unable to set any I/O queues\n");
+ return -ENOMEM;
+ }
+
+ ctrl->queue_count = nr_io_queues + 1;
+ dev_info(ctrl->device,
+ "creating %d I/O queues.\n", nr_io_queues);
+
+ ctrl->fabrics_ops->set_io_queues(ctrl, nr_io_queues);
+
+ return __nvmf_alloc_io_queues(ctrl);
+}
+
+static void nvmf_free_io_queues(struct nvme_ctrl *ctrl)
+{
+ int i;
+
+ for (i = 1; i < ctrl->queue_count; i++)
+ ctrl->fabrics_ops->free_io_queue(ctrl, i);
+}
+
+static int nvmf_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
+{
+ int ret, nr_queues;
+
+ ret = nvmf_alloc_io_queues(ctrl);
+ if (ret)
+ return ret;
+
+ if (new) {
+ ret = ctrl->fabrics_ops->alloc_tag_set(ctrl);
+ if (ret)
+ goto out_free_io_queues;
+ }
+
+ /*
+ * Only start IO queues for which we have allocated the tagset
+ * and limitted it to the available queues. On reconnects, the
+ * queue number might have changed.
+ */
+ nr_queues = min(ctrl->tagset->nr_hw_queues + 1, ctrl->queue_count);
+ ret = nvmf_start_io_queues(ctrl, 1, nr_queues);
+ if (ret)
+ goto out_cleanup_connect_q;
+
+ if (!new) {
+ nvme_unquiesce_io_queues(ctrl);
+ if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
+ /*
+ * If we timed out waiting for freeze we are likely to
+ * be stuck. Fail the controller initialization just
+ * to be safe.
+ */
+ ret = -ENODEV;
+ goto out_wait_freeze_timed_out;
+ }
+ blk_mq_update_nr_hw_queues(ctrl->tagset,
+ ctrl->queue_count - 1);
+ nvme_unfreeze(ctrl);
+ }
+
+ /*
+ * If the number of queues has increased (reconnect case)
+ * start all new queues now.
+ */
+ ret = nvmf_start_io_queues(ctrl, nr_queues,
+ ctrl->tagset->nr_hw_queues + 1);
+ if (ret)
+ goto out_wait_freeze_timed_out;
+
+ return 0;
+
+out_wait_freeze_timed_out:
+ nvme_quiesce_io_queues(ctrl);
+ nvme_sync_io_queues(ctrl);
+ nvmf_stop_io_queues(ctrl);
+out_cleanup_connect_q:
+ nvme_cancel_tagset(ctrl);
+ if (new)
+ nvme_remove_io_tag_set(ctrl);
+out_free_io_queues:
+ nvmf_free_io_queues(ctrl);
+ return ret;
+}
+
+static int nvmf_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
+{
+ int error;
+
+ error = ctrl->fabrics_ops->alloc_admin_queue(ctrl);
+ if (error)
+ return error;
+
+ if (new) {
+ error = ctrl->fabrics_ops->alloc_admin_tag_set(ctrl);
+ if (error)
+ goto out_free_admin_queue;
+
+ }
+
+ error = ctrl->fabrics_ops->start_admin_queue(ctrl);
+ if (error)
+ goto out_remove_admin_tag_set;
+
+ error = nvme_enable_ctrl(ctrl);
+ if (error)
+ goto out_stop_queue;
+
+ nvme_unquiesce_admin_queue(ctrl);
+
+ error = nvme_init_ctrl_finish(ctrl, false);
+ if (error)
+ goto out_quiesce_queue;
+
+ return 0;
+
+out_quiesce_queue:
+ nvme_quiesce_admin_queue(ctrl);
+ blk_sync_queue(ctrl->admin_q);
+out_stop_queue:
+ ctrl->fabrics_ops->stop_admin_queue(ctrl);
+ nvme_cancel_admin_tagset(ctrl);
+out_remove_admin_tag_set:
+ if (new)
+ nvme_remove_admin_tag_set(ctrl);
+out_free_admin_queue:
+ ctrl->fabrics_ops->free_admin_queue(ctrl);
+ return error;
+}
+
+static void nvmf_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
+{
+ nvmf_stop_io_queues(ctrl);
+ if (remove)
+ nvme_remove_io_tag_set(ctrl);
+ nvmf_free_io_queues(ctrl);
+}
+
+static void nvmf_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
+{
+ ctrl->fabrics_ops->stop_admin_queue(ctrl);
+ if (remove)
+ nvme_remove_admin_tag_set(ctrl);
+
+ ctrl->fabrics_ops->free_admin_queue(ctrl);
+}
+
+static void nvmf_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove)
+{
+ nvme_quiesce_admin_queue(ctrl);
+ blk_sync_queue(ctrl->admin_q);
+ ctrl->fabrics_ops->stop_admin_queue(ctrl);
+ nvme_cancel_admin_tagset(ctrl);
+ if (remove)
+ nvme_unquiesce_admin_queue(ctrl);
+ nvmf_destroy_admin_queue(ctrl, remove);
+}
+
+static void nvmf_teardown_io_queues(struct nvme_ctrl *ctrl, bool remove)
+{
+ if (ctrl->queue_count <= 1)
+ return;
+ nvme_quiesce_admin_queue(ctrl);
+ nvme_start_freeze(ctrl);
+ nvme_quiesce_io_queues(ctrl);
+ nvme_sync_io_queues(ctrl);
+ nvmf_stop_io_queues(ctrl);
+ nvme_cancel_tagset(ctrl);
+ if (remove)
+ nvme_unquiesce_io_queues(ctrl);
+ nvmf_destroy_io_queues(ctrl, remove);
+}
+
+void nvmf_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
+{
+ nvmf_teardown_io_queues(ctrl, shutdown);
+ nvme_quiesce_admin_queue(ctrl);
+ nvme_disable_ctrl(ctrl, shutdown);
+ nvmf_teardown_admin_queue(ctrl, shutdown);
+}
+EXPORT_SYMBOL_GPL(nvmf_teardown_ctrl);
+
+void nvmf_stop_ctrl(struct nvme_ctrl *ctrl)
+{
+ flush_work(&ctrl->err_work);
+ cancel_delayed_work_sync(&ctrl->connect_work);
+}
+EXPORT_SYMBOL_GPL(nvmf_stop_ctrl);
+
+int nvmf_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
+{
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+ int ret;
+
+ ret = nvmf_configure_admin_queue(ctrl, new);
+ if (ret)
+ return ret;
+
+ if (ctrl->icdoff) {
+ ret = -EOPNOTSUPP;
+ dev_err(ctrl->device, "icdoff is not supported!\n");
+ goto destroy_admin;
+ }
+
+ if (!nvme_ctrl_sgl_supported(ctrl)) {
+ ret = -EOPNOTSUPP;
+ dev_err(ctrl->device, "Mandatory sgls are not supported!\n");
+ goto destroy_admin;
+ }
+
+ if (opts->queue_size > ctrl->sqsize + 1)
+ dev_warn(ctrl->device,
+ "queue_size %zu > ctrl sqsize %u, clamping down\n",
+ opts->queue_size, ctrl->sqsize + 1);
+
+ if (ctrl->sqsize + 1 > ctrl->maxcmd) {
+ dev_warn(ctrl->device,
+ "sqsize %u > ctrl maxcmd %u, clamping down\n",
+ ctrl->sqsize + 1, ctrl->maxcmd);
+ ctrl->sqsize = ctrl->maxcmd - 1;
+ }
+
+ if (ctrl->queue_count > 1) {
+ ret = nvmf_configure_io_queues(ctrl, new);
+ if (ret)
+ goto destroy_admin;
+ }
+
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) {
+ /*
+ * state change failure is ok if we started ctrl delete,
+ * unless we're during creation of a new controller to
+ * avoid races with teardown flow.
+ */
+ WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
+ ctrl->state != NVME_CTRL_DELETING_NOIO);
+ WARN_ON_ONCE(new);
+ ret = -EINVAL;
+ goto destroy_io;
+ }
+
+ nvme_start_ctrl(ctrl);
+ return 0;
+
+destroy_io:
+ if (ctrl->queue_count > 1) {
+ nvme_quiesce_io_queues(ctrl);
+ nvme_sync_io_queues(ctrl);
+ nvmf_stop_io_queues(ctrl);
+ nvme_cancel_tagset(ctrl);
+ nvmf_destroy_io_queues(ctrl, new);
+ }
+destroy_admin:
+ nvme_quiesce_admin_queue(ctrl);
+ blk_sync_queue(ctrl->admin_q);
+ ctrl->fabrics_ops->stop_admin_queue(ctrl);
+ nvme_cancel_admin_tagset(ctrl);
+ nvmf_destroy_admin_queue(ctrl, new);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nvmf_setup_ctrl);
+
+void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl)
+{
+ /* If we are resetting/deleting then do nothing */
+ if (ctrl->state != NVME_CTRL_CONNECTING) {
+ WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
+ ctrl->state == NVME_CTRL_LIVE);
+ return;
+ }
+
+ if (nvmf_should_reconnect(ctrl)) {
+ dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
+ ctrl->opts->reconnect_delay);
+ queue_delayed_work(nvme_wq, &ctrl->connect_work,
+ ctrl->opts->reconnect_delay * HZ);
+ } else {
+ dev_info(ctrl->device, "Removing controller...\n");
+ nvme_delete_ctrl(ctrl);
+ }
+}
+EXPORT_SYMBOL_GPL(nvmf_reconnect_or_remove);
+
+void nvmf_error_recovery_work(struct work_struct *work)
+{
+ struct nvme_ctrl *ctrl = container_of(work,
+ struct nvme_ctrl, err_work);
+
+ nvme_stop_keep_alive(ctrl);
+ flush_work(&ctrl->async_event_work);
+ nvmf_teardown_io_queues(ctrl, false);
+ /* unquiesce to fail fast pending requests */
+ nvme_unquiesce_io_queues(ctrl);
+ nvmf_teardown_admin_queue(ctrl, false);
+ nvme_unquiesce_admin_queue(ctrl);
+ nvme_auth_stop(ctrl);
+
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
+ /* state change failure is ok if we started ctrl delete */
+ WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
+ ctrl->state != NVME_CTRL_DELETING_NOIO);
+ return;
+ }
+
+ nvmf_reconnect_or_remove(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvmf_error_recovery_work);
+
+void nvmf_reset_ctrl_work(struct work_struct *work)
+{
+ struct nvme_ctrl *ctrl =
+ container_of(work, struct nvme_ctrl, reset_work);
+
+ nvme_stop_ctrl(ctrl);
+ nvmf_teardown_ctrl(ctrl, false);
+
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
+ /* state change failure is ok if we started ctrl delete */
+ WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
+ ctrl->state != NVME_CTRL_DELETING_NOIO);
+ return;
+ }
+
+ if (nvmf_setup_ctrl(ctrl, false))
+ goto out_fail;
+
+ return;
+
+out_fail:
+ ++ctrl->nr_reconnects;
+ nvmf_reconnect_or_remove(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvmf_reset_ctrl_work);
+
+void nvmf_reconnect_ctrl_work(struct work_struct *work)
+{
+ struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+ struct nvme_ctrl, connect_work);
+
+ ++ctrl->nr_reconnects;
+
+ if (nvmf_setup_ctrl(ctrl, false))
+ goto requeue;
+
+ dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
+ ctrl->nr_reconnects);
+
+ ctrl->nr_reconnects = 0;
+
+ return;
+
+requeue:
+ dev_info(ctrl->device, "Failed reconnect attempt %d\n",
+ ctrl->nr_reconnects);
+ nvmf_reconnect_or_remove(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvmf_reconnect_ctrl_work);
+
+void nvmf_error_recovery(struct nvme_ctrl *ctrl)
+{
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ return;
+
+ dev_warn(ctrl->device, "starting error recovery\n");
+ queue_work(nvme_reset_wq, &ctrl->err_work);
+}
+EXPORT_SYMBOL_GPL(nvmf_error_recovery);
+
static struct class *nvmf_class;
static struct device *nvmf_device;
static DEFINE_MUTEX(nvmf_dev_mutex);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index dcac3df8a5f7..345d6de6bc86 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -172,6 +172,23 @@ struct nvmf_transport_ops {
struct nvmf_ctrl_options *opts);
};

+struct nvme_fabrics_ops {
+ int (*alloc_admin_queue)(struct nvme_ctrl *ctrl);
+ int (*start_admin_queue)(struct nvme_ctrl *ctrl);
+ void (*stop_admin_queue)(struct nvme_ctrl *ctrl);
+ void (*free_admin_queue)(struct nvme_ctrl *ctrl);
+ int (*alloc_io_queue)(struct nvme_ctrl *ctrl, int qid);
+ int (*start_io_queue)(struct nvme_ctrl *ctrl, int qid);
+ void (*stop_io_queue)(struct nvme_ctrl *ctrl, int qid);
+ void (*free_io_queue)(struct nvme_ctrl *ctrl, int qid);
+
+ /* these should be replaced with a single one setup_transport() */
+ int (*alloc_admin_tag_set)(struct nvme_ctrl *ctrl);
+ int (*alloc_tag_set)(struct nvme_ctrl *ctrl);
+ unsigned int (*nr_io_queues)(struct nvme_ctrl *ctrl);
+ void (*set_io_queues)(struct nvme_ctrl *ctrl, unsigned int nr_io_queues);
+};
+
static inline bool
nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts)
@@ -215,5 +232,13 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts);
+int nvmf_setup_ctrl(struct nvme_ctrl *ctrl, bool new);
+void nvmf_stop_ctrl(struct nvme_ctrl *ctrl);
+void nvmf_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown);
+void nvmf_reset_ctrl_work(struct work_struct *work);
+void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl);
+void nvmf_error_recovery_work(struct work_struct *work);
+void nvmf_reconnect_ctrl_work(struct work_struct *work);
+void nvmf_error_recovery(struct nvme_ctrl *ctrl);

#endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5aa30b00dd17..fcea2678094c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -244,6 +244,8 @@ enum nvme_ctrl_flags {
NVME_CTRL_STOPPED = 3,
};

+struct nvme_fabrics_ops;
+
struct nvme_ctrl {
bool comp_seen;
enum nvme_ctrl_state state;
@@ -251,6 +253,7 @@ struct nvme_ctrl {
spinlock_t lock;
struct mutex scan_lock;
const struct nvme_ctrl_ops *ops;
+ const struct nvme_fabrics_ops *fabrics_ops;
struct request_queue *admin_q;
struct request_queue *connect_q;
struct request_queue *fabrics_q;
--
2.40.0

2023-05-04 09:16:16

by Daniel Wagner

[permalink] [raw]
Subject: [RFC v3 2/9] nvme-rdma: factor rdma specific queue init code out

In preparation to move common code from the fabrics driver to fabrics.c,
move the rmda queue specific initialization code into a separate
function.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/rdma.c | 65 ++++++++++++++++++++++++++++------------
1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 92e5d0ccf3a9..a78c66278b19 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -826,15 +826,16 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl)
nvme_rdma_free_queue(&ctrl->ctrl, 0);
}

-static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
- bool new)
+static int nvme_rdma_init_queue(struct nvme_ctrl *nctrl, int qid)
{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
bool pi_capable = false;
int error;

- error = nvme_rdma_alloc_queue(&ctrl->ctrl, 0);
- if (error)
- return error;
+ if (qid != 0)
+ /* only admin queue needs additional work. */
+ return 0;
+

ctrl->device = ctrl->queues[0].device;
ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev);
@@ -854,6 +855,43 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
*/
error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
sizeof(struct nvme_command), DMA_TO_DEVICE);
+ if (error)
+ return error;
+
+ ctrl->ctrl.max_segments = ctrl->max_fr_pages;
+ ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
+ if (pi_capable)
+ ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
+ else
+ ctrl->ctrl.max_integrity_segments = 0;
+
+ return 0;
+}
+
+static void nvme_rdma_deinit_queue(struct nvme_ctrl *nctrl, int qid)
+{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+ if (qid != 0)
+ return;
+
+ if (ctrl->async_event_sqe.data) {
+ nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+ sizeof(struct nvme_command), DMA_TO_DEVICE);
+ ctrl->async_event_sqe.data = NULL;
+ }
+}
+
+static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
+ bool new)
+{
+ int error;
+
+ error = nvme_rdma_alloc_queue(&ctrl->ctrl, 0);
+ if (error)
+ return error;
+
+ error = nvme_rdma_init_queue(&ctrl->ctrl, 0);
if (error)
goto out_free_queue;

@@ -863,7 +901,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
sizeof(struct nvme_rdma_request) +
NVME_RDMA_DATA_SGL_SIZE);
if (error)
- goto out_free_async_qe;
+ goto out_deinit_admin_queue;

}

@@ -875,13 +913,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (error)
goto out_stop_queue;

- ctrl->ctrl.max_segments = ctrl->max_fr_pages;
- ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
- if (pi_capable)
- ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
- else
- ctrl->ctrl.max_integrity_segments = 0;
-
nvme_unquiesce_admin_queue(&ctrl->ctrl);

error = nvme_init_ctrl_finish(&ctrl->ctrl, false);
@@ -899,12 +930,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
out_remove_admin_tag_set:
if (new)
nvme_remove_admin_tag_set(&ctrl->ctrl);
-out_free_async_qe:
- if (ctrl->async_event_sqe.data) {
- nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
- sizeof(struct nvme_command), DMA_TO_DEVICE);
- ctrl->async_event_sqe.data = NULL;
- }
+out_deinit_admin_queue:
+ nvme_rdma_deinit_queue(&ctrl->ctrl, 0);
out_free_queue:
nvme_rdma_free_queue(&ctrl->ctrl, 0);
return error;
--
2.40.0

2023-05-04 09:16:17

by Daniel Wagner

[permalink] [raw]
Subject: [RFC v3 6/9] nvme-tcp: replace state machine with generic one

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/tcp.c | 698 ++++++++++------------------------------
1 file changed, 175 insertions(+), 523 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 74ccc84d244a..32c4346b7322 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -519,15 +519,6 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
queue->ddgst_remaining = 0;
}

-static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
-{
- if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
- return;
-
- dev_warn(ctrl->device, "starting error recovery\n");
- queue_work(nvme_reset_wq, &ctrl->err_work);
-}
-
static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
struct nvme_completion *cqe)
{
@@ -539,7 +530,7 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
dev_err(queue->ctrl->ctrl.device,
"got bad cqe.command_id %#x on queue %d\n",
cqe->command_id, nvme_tcp_queue_id(queue));
- nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+ nvmf_error_recovery(&queue->ctrl->ctrl);
return -EINVAL;
}

@@ -581,7 +572,7 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
dev_err(queue->ctrl->ctrl.device,
"queue %d tag %#x SUCCESS set but not last PDU\n",
nvme_tcp_queue_id(queue), rq->tag);
- nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+ nvmf_error_recovery(&queue->ctrl->ctrl);
return -EPROTO;
}

@@ -895,7 +886,7 @@ static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
dev_err(queue->ctrl->ctrl.device,
"receive failed: %d\n", result);
queue->rd_enabled = false;
- nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+ nvmf_error_recovery(&queue->ctrl->ctrl);
return result;
}
}
@@ -945,7 +936,7 @@ static void nvme_tcp_state_change(struct sock *sk)
case TCP_LAST_ACK:
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
- nvme_tcp_error_recovery(&queue->ctrl->ctrl);
+ nvmf_error_recovery(&queue->ctrl->ctrl);
break;
default:
dev_info(queue->ctrl->ctrl.device,
@@ -1299,34 +1290,6 @@ static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl)
return 0;
}

-static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
-{
- struct page *page;
- struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
- struct nvme_tcp_queue *queue = &ctrl->queues[qid];
- unsigned int noreclaim_flag;
-
- if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
- return;
-
- if (queue->hdr_digest || queue->data_digest)
- nvme_tcp_free_crypto(queue);
-
- if (queue->pf_cache.va) {
- page = virt_to_head_page(queue->pf_cache.va);
- __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
- queue->pf_cache.va = NULL;
- }
-
- noreclaim_flag = memalloc_noreclaim_save();
- sock_release(queue->sock);
- memalloc_noreclaim_restore(noreclaim_flag);
-
- kfree(queue->pdu);
- mutex_destroy(&queue->send_mutex);
- mutex_destroy(&queue->queue_lock);
-}
-
static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
{
struct nvme_tcp_icreq_pdu *icreq;
@@ -1488,10 +1451,9 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
}

-static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
+static int __nvme_tcp_alloc_queue(struct nvme_tcp_ctrl *ctrl,
+ struct nvme_tcp_queue *queue)
{
- struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
- struct nvme_tcp_queue *queue = &ctrl->queues[qid];
int ret, rcv_pdu_size;

mutex_init(&queue->queue_lock);
@@ -1501,16 +1463,10 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
mutex_init(&queue->send_mutex);
INIT_WORK(&queue->io_work, nvme_tcp_io_work);

- if (qid > 0)
- queue->cmnd_capsule_len = nctrl->ioccsz * 16;
- else
- queue->cmnd_capsule_len = sizeof(struct nvme_command) +
- NVME_TCP_ADMIN_CCSZ;
-
ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
IPPROTO_TCP, &queue->sock);
if (ret) {
- dev_err(nctrl->device,
+ dev_err(ctrl->ctrl.device,
"failed to create socket: %d\n", ret);
goto err_destroy_mutex;
}
@@ -1534,8 +1490,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
sock_set_priority(queue->sock->sk, so_priority);

/* Set socket type of service */
- if (nctrl->opts->tos >= 0)
- ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos);
+ if (ctrl->ctrl.opts->tos >= 0)
+ ip_sock_set_tos(queue->sock->sk, ctrl->ctrl.opts->tos);

/* Set 10 seconds timeout for icresp recvmsg */
queue->sock->sk->sk_rcvtimeo = 10 * HZ;
@@ -1550,38 +1506,39 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
queue->pdu_offset = 0;
sk_set_memalloc(queue->sock->sk);

- if (nctrl->opts->mask & NVMF_OPT_HOST_TRADDR) {
+ if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) {
ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr,
sizeof(ctrl->src_addr));
if (ret) {
- dev_err(nctrl->device,
+ dev_err(ctrl->ctrl.device,
"failed to bind queue %d socket %d\n",
- qid, ret);
+ nvme_tcp_queue_id(queue), ret);
goto err_sock;
}
}

- if (nctrl->opts->mask & NVMF_OPT_HOST_IFACE) {
- char *iface = nctrl->opts->host_iface;
+ if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_IFACE) {
+ char *iface = ctrl->ctrl.opts->host_iface;
sockptr_t optval = KERNEL_SOCKPTR(iface);

ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE,
optval, strlen(iface));
if (ret) {
- dev_err(nctrl->device,
+ dev_err(ctrl->ctrl.device,
"failed to bind to interface %s queue %d err %d\n",
- iface, qid, ret);
+ iface, nvme_tcp_queue_id(queue), ret);
goto err_sock;
}
}

- queue->hdr_digest = nctrl->opts->hdr_digest;
- queue->data_digest = nctrl->opts->data_digest;
+ queue->hdr_digest = ctrl->ctrl.opts->hdr_digest;
+ queue->data_digest = ctrl->ctrl.opts->data_digest;
if (queue->hdr_digest || queue->data_digest) {
ret = nvme_tcp_alloc_crypto(queue);
if (ret) {
- dev_err(nctrl->device,
- "failed to allocate queue %d crypto\n", qid);
+ dev_err(ctrl->ctrl.device,
+ "failed to allocate queue %d crypto\n",
+ nvme_tcp_queue_id(queue));
goto err_sock;
}
}
@@ -1594,13 +1551,13 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
goto err_crypto;
}

- dev_dbg(nctrl->device, "connecting queue %d\n",
- nvme_tcp_queue_id(queue));
+ dev_dbg(ctrl->ctrl.device, "connecting queue %d\n",
+ nvme_tcp_queue_id(queue));

ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr,
- sizeof(ctrl->addr), 0);
+ sizeof(ctrl->addr), 0);
if (ret) {
- dev_err(nctrl->device,
+ dev_err(ctrl->ctrl.device,
"failed to connect socket: %d\n", ret);
goto err_rcv_pdu;
}
@@ -1644,142 +1601,182 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
return ret;
}

-static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
+static void __nvme_tcp_free_queue(struct nvme_tcp_ctrl *ctrl,
+ struct nvme_tcp_queue *queue)
{
- struct socket *sock = queue->sock;
+ struct page *page;
+ unsigned int noreclaim_flag;

- write_lock_bh(&sock->sk->sk_callback_lock);
- sock->sk->sk_user_data = NULL;
- sock->sk->sk_data_ready = queue->data_ready;
- sock->sk->sk_state_change = queue->state_change;
- sock->sk->sk_write_space = queue->write_space;
- write_unlock_bh(&sock->sk->sk_callback_lock);
+ if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
+ return;
+
+ if (queue->hdr_digest || queue->data_digest)
+ nvme_tcp_free_crypto(queue);
+
+ if (queue->pf_cache.va) {
+ page = virt_to_head_page(queue->pf_cache.va);
+ __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
+ queue->pf_cache.va = NULL;
+ }
+
+ noreclaim_flag = memalloc_noreclaim_save();
+ sock_release(queue->sock);
+ memalloc_noreclaim_restore(noreclaim_flag);
+
+ kfree(queue->pdu);
+ mutex_destroy(&queue->send_mutex);
+ mutex_destroy(&queue->queue_lock);
}

-static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
+
+static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *nctrl)
{
- kernel_sock_shutdown(queue->sock, SHUT_RDWR);
- nvme_tcp_restore_sock_calls(queue);
- cancel_work_sync(&queue->io_work);
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[0];
+ int err;
+
+ queue->cmnd_capsule_len = sizeof(struct nvme_command) +
+ NVME_TCP_ADMIN_CCSZ;
+
+ err = __nvme_tcp_alloc_queue(ctrl, queue);
+ if (err)
+ return err;
+
+ err = nvme_tcp_alloc_async_req(ctrl);
+ if (err)
+ __nvme_tcp_free_queue(ctrl, queue);
+
+ return err;
}

-static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
+static int nvme_tcp_alloc_io_queue(struct nvme_ctrl *nctrl, int qid)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];

- if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
- return;
+ queue->cmnd_capsule_len = nctrl->ioccsz * 16;

- mutex_lock(&queue->queue_lock);
- if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
- __nvme_tcp_stop_queue(queue);
- mutex_unlock(&queue->queue_lock);
+ return __nvme_tcp_alloc_queue(ctrl, queue);
}

-static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
+static void nvme_tcp_free_admin_queue(struct nvme_ctrl *nctrl)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
- int ret;
-
- if (idx)
- ret = nvmf_connect_io_queue(nctrl, idx);
- else
- ret = nvmf_connect_admin_queue(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[0];

- if (!ret) {
- set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags);
- } else {
- if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags))
- __nvme_tcp_stop_queue(&ctrl->queues[idx]);
- dev_err(nctrl->device,
- "failed to connect queue: %d ret=%d\n", idx, ret);
+ if (ctrl->async_req.pdu) {
+ cancel_work_sync(&nctrl->async_event_work);
+ nvme_tcp_free_async_req(ctrl);
+ ctrl->async_req.pdu = NULL;
}
- return ret;
+ __nvme_tcp_free_queue(ctrl, queue);
}

-static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl)
+static void nvme_tcp_free_io_queue(struct nvme_ctrl *nctrl, int qid)
{
- if (to_tcp_ctrl(ctrl)->async_req.pdu) {
- cancel_work_sync(&ctrl->async_event_work);
- nvme_tcp_free_async_req(to_tcp_ctrl(ctrl));
- to_tcp_ctrl(ctrl)->async_req.pdu = NULL;
- }
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[qid];

- nvme_tcp_free_queue(ctrl, 0);
+ __nvme_tcp_free_queue(ctrl, queue);
}

-static void nvme_tcp_free_io_queues(struct nvme_ctrl *ctrl)
+static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
{
- int i;
+ struct socket *sock = queue->sock;

- for (i = 1; i < ctrl->queue_count; i++)
- nvme_tcp_free_queue(ctrl, i);
+ write_lock_bh(&sock->sk->sk_callback_lock);
+ sock->sk->sk_user_data = NULL;
+ sock->sk->sk_data_ready = queue->data_ready;
+ sock->sk->sk_state_change = queue->state_change;
+ sock->sk->sk_write_space = queue->write_space;
+ write_unlock_bh(&sock->sk->sk_callback_lock);
}

-static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
+static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
{
- int i;
-
- for (i = 1; i < ctrl->queue_count; i++)
- nvme_tcp_stop_queue(ctrl, i);
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ nvme_tcp_restore_sock_calls(queue);
+ cancel_work_sync(&queue->io_work);
}

-static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl,
- int first, int last)
+static int nvme_tcp_start_admin_queue(struct nvme_ctrl *nctrl)
{
- int i, ret;
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[0];
+ int ret;

- for (i = first; i < last; i++) {
- ret = nvme_tcp_start_queue(ctrl, i);
- if (ret)
- goto out_stop_queues;
+ ret = nvmf_connect_admin_queue(nctrl);
+ if (!ret) {
+ set_bit(NVME_TCP_Q_LIVE, &queue->flags);
+ } else {
+ if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
+ __nvme_tcp_stop_queue(queue);
+ dev_err(nctrl->device,
+ "failed to connect queue: %d ret=%d\n", 0, ret);
}
-
- return 0;
-
-out_stop_queues:
- for (i--; i >= first; i--)
- nvme_tcp_stop_queue(ctrl, i);
return ret;
}

-static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
+static int nvme_tcp_start_io_queue(struct nvme_ctrl *nctrl, int qid)
{
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[qid];
int ret;

- ret = nvme_tcp_alloc_queue(ctrl, 0);
- if (ret)
- return ret;
+ ret = nvmf_connect_io_queue(nctrl, qid);
+ if (!ret) {
+ set_bit(NVME_TCP_Q_LIVE, &queue->flags);
+ } else {
+ if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
+ __nvme_tcp_stop_queue(queue);
+ dev_err(nctrl->device,
+ "failed to connect queue: %d ret=%d\n", qid, ret);
+ }
+ return ret;
+}

- ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
- if (ret)
- goto out_free_queue;
+static void nvme_tcp_stop_admin_queue(struct nvme_ctrl *nctrl)
+{
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[0];

- return 0;
+ if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
+ return;

-out_free_queue:
- nvme_tcp_free_queue(ctrl, 0);
- return ret;
+ mutex_lock(&queue->queue_lock);
+ if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
+ __nvme_tcp_stop_queue(queue);
+ mutex_unlock(&queue->queue_lock);
}

-static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
+static void nvme_tcp_stop_io_queue(struct nvme_ctrl *nctrl, int qid)
{
- int i, ret;
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[qid];

- for (i = 1; i < ctrl->queue_count; i++) {
- ret = nvme_tcp_alloc_queue(ctrl, i);
- if (ret)
- goto out_free_queues;
- }
+ if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
+ return;

- return 0;
+ mutex_lock(&queue->queue_lock);
+ if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
+ __nvme_tcp_stop_queue(queue);
+ mutex_unlock(&queue->queue_lock);
+}

-out_free_queues:
- for (i--; i >= 1; i--)
- nvme_tcp_free_queue(ctrl, i);
+static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *ctrl)
+{
+ return nvme_alloc_admin_tag_set(ctrl, &to_tcp_ctrl(ctrl)->admin_tag_set,
+ &nvme_tcp_admin_mq_ops,
+ sizeof(struct nvme_tcp_request));
+}
+
+static int nvme_tcp_alloc_tag_set(struct nvme_ctrl *ctrl)
+{
+ return nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set,
+ &nvme_tcp_mq_ops,
+ ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
+ sizeof(struct nvme_tcp_request));

- return ret;
}

static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
@@ -1828,370 +1825,9 @@ static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
}
}

-static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
-{
- unsigned int nr_io_queues;
- int ret;
-
- nr_io_queues = nvme_tcp_nr_io_queues(ctrl);
- ret = nvme_set_queue_count(ctrl, &nr_io_queues);
- if (ret)
- return ret;
-
- if (nr_io_queues == 0) {
- dev_err(ctrl->device,
- "unable to set any I/O queues\n");
- return -ENOMEM;
- }
-
- ctrl->queue_count = nr_io_queues + 1;
- dev_info(ctrl->device,
- "creating %d I/O queues.\n", nr_io_queues);
-
- nvme_tcp_set_io_queues(ctrl, nr_io_queues);
-
- return __nvme_tcp_alloc_io_queues(ctrl);
-}
-
-static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
-{
- nvme_tcp_stop_io_queues(ctrl);
- if (remove)
- nvme_remove_io_tag_set(ctrl);
- nvme_tcp_free_io_queues(ctrl);
-}
-
-static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
-{
- int ret, nr_queues;
-
- ret = nvme_tcp_alloc_io_queues(ctrl);
- if (ret)
- return ret;
-
- if (new) {
- ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set,
- &nvme_tcp_mq_ops,
- ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
- sizeof(struct nvme_tcp_request));
- if (ret)
- goto out_free_io_queues;
- }
-
- /*
- * Only start IO queues for which we have allocated the tagset
- * and limitted it to the available queues. On reconnects, the
- * queue number might have changed.
- */
- nr_queues = min(ctrl->tagset->nr_hw_queues + 1, ctrl->queue_count);
- ret = nvme_tcp_start_io_queues(ctrl, 1, nr_queues);
- if (ret)
- goto out_cleanup_connect_q;
-
- if (!new) {
- nvme_unquiesce_io_queues(ctrl);
- if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
- /*
- * If we timed out waiting for freeze we are likely to
- * be stuck. Fail the controller initialization just
- * to be safe.
- */
- ret = -ENODEV;
- goto out_wait_freeze_timed_out;
- }
- blk_mq_update_nr_hw_queues(ctrl->tagset,
- ctrl->queue_count - 1);
- nvme_unfreeze(ctrl);
- }
-
- /*
- * If the number of queues has increased (reconnect case)
- * start all new queues now.
- */
- ret = nvme_tcp_start_io_queues(ctrl, nr_queues,
- ctrl->tagset->nr_hw_queues + 1);
- if (ret)
- goto out_wait_freeze_timed_out;
-
- return 0;
-
-out_wait_freeze_timed_out:
- nvme_quiesce_io_queues(ctrl);
- nvme_sync_io_queues(ctrl);
- nvme_tcp_stop_io_queues(ctrl);
-out_cleanup_connect_q:
- nvme_cancel_tagset(ctrl);
- if (new)
- nvme_remove_io_tag_set(ctrl);
-out_free_io_queues:
- nvme_tcp_free_io_queues(ctrl);
- return ret;
-}
-
-static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
-{
- nvme_tcp_stop_queue(ctrl, 0);
- if (remove)
- nvme_remove_admin_tag_set(ctrl);
- nvme_tcp_free_admin_queue(ctrl);
-}
-
-static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
-{
- int error;
-
- error = nvme_tcp_alloc_admin_queue(ctrl);
- if (error)
- return error;
-
- if (new) {
- error = nvme_alloc_admin_tag_set(ctrl,
- &to_tcp_ctrl(ctrl)->admin_tag_set,
- &nvme_tcp_admin_mq_ops,
- sizeof(struct nvme_tcp_request));
- if (error)
- goto out_free_queue;
- }
-
- error = nvme_tcp_start_queue(ctrl, 0);
- if (error)
- goto out_cleanup_tagset;
-
- error = nvme_enable_ctrl(ctrl);
- if (error)
- goto out_stop_queue;
-
- nvme_unquiesce_admin_queue(ctrl);
-
- error = nvme_init_ctrl_finish(ctrl, false);
- if (error)
- goto out_quiesce_queue;
-
- return 0;
-
-out_quiesce_queue:
- nvme_quiesce_admin_queue(ctrl);
- blk_sync_queue(ctrl->admin_q);
-out_stop_queue:
- nvme_tcp_stop_queue(ctrl, 0);
- nvme_cancel_admin_tagset(ctrl);
-out_cleanup_tagset:
- if (new)
- nvme_remove_admin_tag_set(ctrl);
-out_free_queue:
- nvme_tcp_free_admin_queue(ctrl);
- return error;
-}
-
-static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
- bool remove)
-{
- nvme_quiesce_admin_queue(ctrl);
- blk_sync_queue(ctrl->admin_q);
- nvme_tcp_stop_queue(ctrl, 0);
- nvme_cancel_admin_tagset(ctrl);
- if (remove)
- nvme_unquiesce_admin_queue(ctrl);
- nvme_tcp_destroy_admin_queue(ctrl, remove);
-}
-
-static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
- bool remove)
-{
- if (ctrl->queue_count <= 1)
- return;
- nvme_quiesce_admin_queue(ctrl);
- nvme_start_freeze(ctrl);
- nvme_quiesce_io_queues(ctrl);
- nvme_sync_io_queues(ctrl);
- nvme_tcp_stop_io_queues(ctrl);
- nvme_cancel_tagset(ctrl);
- if (remove)
- nvme_unquiesce_io_queues(ctrl);
- nvme_tcp_destroy_io_queues(ctrl, remove);
-}
-
-static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
-{
- /* If we are resetting/deleting then do nothing */
- if (ctrl->state != NVME_CTRL_CONNECTING) {
- WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
- ctrl->state == NVME_CTRL_LIVE);
- return;
- }
-
- if (nvmf_should_reconnect(ctrl)) {
- dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
- ctrl->opts->reconnect_delay);
- queue_delayed_work(nvme_wq, &ctrl->connect_work,
- ctrl->opts->reconnect_delay * HZ);
- } else {
- dev_info(ctrl->device, "Removing controller...\n");
- nvme_delete_ctrl(ctrl);
- }
-}
-
-static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
-{
- struct nvmf_ctrl_options *opts = ctrl->opts;
- int ret;
-
- ret = nvme_tcp_configure_admin_queue(ctrl, new);
- if (ret)
- return ret;
-
- if (ctrl->icdoff) {
- ret = -EOPNOTSUPP;
- dev_err(ctrl->device, "icdoff is not supported!\n");
- goto destroy_admin;
- }
-
- if (!nvme_ctrl_sgl_supported(ctrl)) {
- ret = -EOPNOTSUPP;
- dev_err(ctrl->device, "Mandatory sgls are not supported!\n");
- goto destroy_admin;
- }
-
- if (opts->queue_size > ctrl->sqsize + 1)
- dev_warn(ctrl->device,
- "queue_size %zu > ctrl sqsize %u, clamping down\n",
- opts->queue_size, ctrl->sqsize + 1);
-
- if (ctrl->sqsize + 1 > ctrl->maxcmd) {
- dev_warn(ctrl->device,
- "sqsize %u > ctrl maxcmd %u, clamping down\n",
- ctrl->sqsize + 1, ctrl->maxcmd);
- ctrl->sqsize = ctrl->maxcmd - 1;
- }
-
- if (ctrl->queue_count > 1) {
- ret = nvme_tcp_configure_io_queues(ctrl, new);
- if (ret)
- goto destroy_admin;
- }
-
- if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) {
- /*
- * state change failure is ok if we started ctrl delete,
- * unless we're during creation of a new controller to
- * avoid races with teardown flow.
- */
- WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
- ctrl->state != NVME_CTRL_DELETING_NOIO);
- WARN_ON_ONCE(new);
- ret = -EINVAL;
- goto destroy_io;
- }
-
- nvme_start_ctrl(ctrl);
- return 0;
-
-destroy_io:
- if (ctrl->queue_count > 1) {
- nvme_quiesce_io_queues(ctrl);
- nvme_sync_io_queues(ctrl);
- nvme_tcp_stop_io_queues(ctrl);
- nvme_cancel_tagset(ctrl);
- nvme_tcp_destroy_io_queues(ctrl, new);
- }
-destroy_admin:
- nvme_quiesce_admin_queue(ctrl);
- blk_sync_queue(ctrl->admin_q);
- nvme_tcp_stop_queue(ctrl, 0);
- nvme_cancel_admin_tagset(ctrl);
- nvme_tcp_destroy_admin_queue(ctrl, new);
- return ret;
-}
-
-static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
-{
- struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
- struct nvme_ctrl, connect_work);
-
- ++ctrl->nr_reconnects;
-
- if (nvme_tcp_setup_ctrl(ctrl, false))
- goto requeue;
-
- dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
- ctrl->nr_reconnects);
-
- ctrl->nr_reconnects = 0;
-
- return;
-
-requeue:
- dev_info(ctrl->device, "Failed reconnect attempt %d\n",
- ctrl->nr_reconnects);
- nvme_tcp_reconnect_or_remove(ctrl);
-}
-
-static void nvme_tcp_error_recovery_work(struct work_struct *work)
-{
- struct nvme_ctrl *ctrl = container_of(work,
- struct nvme_ctrl, err_work);
-
- nvme_stop_keep_alive(ctrl);
- flush_work(&ctrl->async_event_work);
- nvme_tcp_teardown_io_queues(ctrl, false);
- /* unquiesce to fail fast pending requests */
- nvme_unquiesce_io_queues(ctrl);
- nvme_tcp_teardown_admin_queue(ctrl, false);
- nvme_unquiesce_admin_queue(ctrl);
- nvme_auth_stop(ctrl);
-
- if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
- /* state change failure is ok if we started ctrl delete */
- WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
- ctrl->state != NVME_CTRL_DELETING_NOIO);
- return;
- }
-
- nvme_tcp_reconnect_or_remove(ctrl);
-}
-
-static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
-{
- nvme_tcp_teardown_io_queues(ctrl, shutdown);
- nvme_quiesce_admin_queue(ctrl);
- nvme_disable_ctrl(ctrl, shutdown);
- nvme_tcp_teardown_admin_queue(ctrl, shutdown);
-}
-
static void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl)
{
- nvme_tcp_teardown_ctrl(ctrl, true);
-}
-
-static void nvme_reset_ctrl_work(struct work_struct *work)
-{
- struct nvme_ctrl *ctrl =
- container_of(work, struct nvme_ctrl, reset_work);
-
- nvme_stop_ctrl(ctrl);
- nvme_tcp_teardown_ctrl(ctrl, false);
-
- if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
- /* state change failure is ok if we started ctrl delete */
- WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
- ctrl->state != NVME_CTRL_DELETING_NOIO);
- return;
- }
-
- if (nvme_tcp_setup_ctrl(ctrl, false))
- goto out_fail;
-
- return;
-
-out_fail:
- ++ctrl->nr_reconnects;
- nvme_tcp_reconnect_or_remove(ctrl);
-}
-
-static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
-{
- flush_work(&ctrl->err_work);
- cancel_delayed_work_sync(&ctrl->connect_work);
+ nvmf_teardown_ctrl(ctrl, true);
}

static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
@@ -2275,7 +1911,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq)
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;

- nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
+ nvme_tcp_stop_io_queue(ctrl, nvme_tcp_queue_id(req->queue));
nvmf_complete_timed_out_request(rq);
}

@@ -2314,7 +1950,7 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
* LIVE state should trigger the normal error recovery which will
* handle completing this request.
*/
- nvme_tcp_error_recovery(ctrl);
+ nvmf_error_recovery(ctrl);
return BLK_EH_RESET_TIMER;
}

@@ -2540,7 +2176,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
.submit_async_event = nvme_tcp_submit_async_event,
.delete_ctrl = nvme_tcp_delete_ctrl,
.get_address = nvme_tcp_get_address,
- .stop_ctrl = nvme_tcp_stop_ctrl,
+ .stop_ctrl = nvmf_stop_ctrl,
};

static bool
@@ -2560,6 +2196,21 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
return found;
}

+static struct nvme_fabrics_ops nvme_tcp_fabrics_ops = {
+ .alloc_admin_queue = nvme_tcp_alloc_admin_queue,
+ .free_admin_queue = nvme_tcp_free_admin_queue,
+ .start_admin_queue = nvme_tcp_start_admin_queue,
+ .stop_admin_queue = nvme_tcp_stop_admin_queue,
+ .alloc_io_queue = nvme_tcp_alloc_io_queue,
+ .free_io_queue = nvme_tcp_free_io_queue,
+ .start_io_queue = nvme_tcp_start_io_queue,
+ .stop_io_queue = nvme_tcp_stop_io_queue,
+ .alloc_admin_tag_set = nvme_tcp_alloc_admin_tag_set,
+ .alloc_tag_set = nvme_tcp_alloc_tag_set,
+ .nr_io_queues = nvme_tcp_nr_io_queues,
+ .set_io_queues = nvme_tcp_set_io_queues,
+};
+
static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
@@ -2572,15 +2223,16 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,

INIT_LIST_HEAD(&ctrl->list);
ctrl->ctrl.opts = opts;
+ ctrl->ctrl.fabrics_ops = &nvme_tcp_fabrics_ops;
ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
opts->nr_poll_queues + 1;
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;

INIT_DELAYED_WORK(&ctrl->ctrl.connect_work,
- nvme_tcp_reconnect_ctrl_work);
- INIT_WORK(&ctrl->ctrl.err_work, nvme_tcp_error_recovery_work);
- INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);
+ nvmf_reconnect_ctrl_work);
+ INIT_WORK(&ctrl->ctrl.err_work, nvmf_error_recovery_work);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvmf_reset_ctrl_work);

if (!(opts->mask & NVMF_OPT_TRSVCID)) {
opts->trsvcid =
@@ -2641,7 +2293,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
goto out_uninit_ctrl;
}

- ret = nvme_tcp_setup_ctrl(&ctrl->ctrl, true);
+ ret = nvmf_setup_ctrl(&ctrl->ctrl, true);
if (ret)
goto out_uninit_ctrl;

--
2.40.0

2023-05-04 09:16:19

by Daniel Wagner

[permalink] [raw]
Subject: [RFC v3 7/9] nvme-rdma: replace state machine with generic one

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/rdma.c | 703 ++++++++++-----------------------------
1 file changed, 173 insertions(+), 530 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index b0ab5a9d5fe0..1fde65e8c2b5 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -568,35 +568,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
return ret;
}

-static int nvme_rdma_alloc_queue(struct nvme_ctrl *nctrl, int idx)
+static int __nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
+ struct nvme_rdma_queue *queue)
{
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
- struct nvme_rdma_queue *queue;
struct sockaddr *src_addr = NULL;
- size_t queue_size;
int ret;

- if (idx == 0)
- queue_size = NVME_AQ_DEPTH;
- else
- queue_size = ctrl->ctrl.sqsize + 1;
-
- queue = &ctrl->queues[idx];
mutex_init(&queue->queue_lock);
queue->ctrl = ctrl;
- if (idx && ctrl->ctrl.max_integrity_segments)
- queue->pi_support = true;
- else
- queue->pi_support = false;
init_completion(&queue->cm_done);

- if (idx > 0)
- queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16;
- else
- queue->cmnd_capsule_len = sizeof(struct nvme_command);
-
- queue->queue_size = queue_size;
-
queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(queue->cm_id)) {
@@ -638,62 +619,120 @@ static int nvme_rdma_alloc_queue(struct nvme_ctrl *nctrl, int idx)
return ret;
}

-static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
+static void __nvme_rdma_free_queue(struct nvme_rdma_ctrl *ctrl,
+ struct nvme_rdma_queue *queue)
{
- rdma_disconnect(queue->cm_id);
- ib_drain_qp(queue->qp);
+ if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
+ return;
+
+ rdma_destroy_id(queue->cm_id);
+ nvme_rdma_destroy_queue_ib(queue);
+ mutex_destroy(&queue->queue_lock);
}

-static void nvme_rdma_stop_queue(struct nvme_ctrl *nctrl, int qid)
+static int nvme_rdma_alloc_admin_queue(struct nvme_ctrl *nctrl)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
- struct nvme_rdma_queue *queue = &ctrl->queues[qid];
+ struct nvme_rdma_queue *queue = &ctrl->queues[0];
+ bool pi_capable = false;
+ int ret;

- mutex_lock(&queue->queue_lock);
- if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
- __nvme_rdma_stop_queue(queue);
- mutex_unlock(&queue->queue_lock);
+ queue->cmnd_capsule_len = sizeof(struct nvme_command);
+ queue->queue_size = NVME_AQ_DEPTH;
+ queue->pi_support = false;
+
+ ret = __nvme_rdma_alloc_queue(ctrl, queue);
+ if (ret)
+ return ret;
+
+ ctrl->device = queue->device;
+ nctrl->numa_node = ibdev_to_node(ctrl->device->dev);
+
+ /* T10-PI support */
+ if (ctrl->device->dev->attrs.kernel_cap_flags &
+ IBK_INTEGRITY_HANDOVER)
+ pi_capable = true;
+
+ ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
+ pi_capable);
+
+ /*
+ * Bind the async event SQE DMA mapping to the admin queue lifetime.
+ * It's safe, since any chage in the underlying RDMA device will issue
+ * error recovery and queue re-creation.
+ */
+ ret = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+ sizeof(struct nvme_command), DMA_TO_DEVICE);
+ if (ret) {
+ __nvme_rdma_free_queue(ctrl, queue);
+ return ret;
+ }
+
+ ctrl->ctrl.max_segments = ctrl->max_fr_pages;
+ ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
+ if (pi_capable)
+ ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
+ else
+ ctrl->ctrl.max_integrity_segments = 0;
+
+ return 0;
}

-static void nvme_rdma_free_queue(struct nvme_ctrl *nctrl, int qid)
+static int nvme_rdma_alloc_io_queue(struct nvme_ctrl *nctrl, int qid)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
struct nvme_rdma_queue *queue = &ctrl->queues[qid];

- if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
- return;
+ queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16;
+ queue->queue_size = ctrl->ctrl.sqsize + 1;
+ if (ctrl->ctrl.max_integrity_segments)
+ queue->pi_support = true;
+ else
+ queue->pi_support = false;

- rdma_destroy_id(queue->cm_id);
- nvme_rdma_destroy_queue_ib(queue);
- mutex_destroy(&queue->queue_lock);
+ return __nvme_rdma_alloc_queue(ctrl, queue);
}

-static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_free_admin_queue(struct nvme_ctrl *nctrl)
{
- int i;
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+ struct nvme_rdma_queue *queue = &ctrl->queues[0];
+
+ if (ctrl->async_event_sqe.data) {
+ cancel_work_sync(&ctrl->ctrl.async_event_work);
+ nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+ sizeof(struct nvme_command), DMA_TO_DEVICE);
+ ctrl->async_event_sqe.data = NULL;
+ }

- for (i = 1; i < ctrl->ctrl.queue_count; i++)
- nvme_rdma_free_queue(&ctrl->ctrl, i);
+ __nvme_rdma_free_queue(ctrl, queue);
}

-static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_free_io_queue(struct nvme_ctrl *nctrl, int qid)
{
- int i;
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+ struct nvme_rdma_queue *queue = &ctrl->queues[qid];

- for (i = 1; i < ctrl->ctrl.queue_count; i++)
- nvme_rdma_stop_queue(&ctrl->ctrl, i);
+ __nvme_rdma_free_queue(ctrl, queue);
+}
+
+static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
+{
+ mutex_lock(&queue->queue_lock);
+ if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) {
+ rdma_disconnect(queue->cm_id);
+ ib_drain_qp(queue->qp);
+ }
+ mutex_unlock(&queue->queue_lock);
}

-static int nvme_rdma_start_queue(struct nvme_ctrl *nctrl, int idx)
+static int nvme_rdma_start_admin_queue(struct nvme_ctrl *nctrl)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
- struct nvme_rdma_queue *queue = &ctrl->queues[idx];
+ struct nvme_rdma_queue *queue = &ctrl->queues[0];
int ret;

- if (idx)
- ret = nvmf_connect_io_queue(nctrl, idx);
- else
- ret = nvmf_connect_admin_queue(nctrl);
+ ret = nvmf_connect_admin_queue(nctrl);

if (!ret) {
set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
@@ -701,58 +740,74 @@ static int nvme_rdma_start_queue(struct nvme_ctrl *nctrl, int idx)
if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
__nvme_rdma_stop_queue(queue);
dev_info(ctrl->ctrl.device,
- "failed to connect queue: %d ret=%d\n", idx, ret);
+ "failed to connect queue: %d ret=%d\n", 0, ret);
}
return ret;
}

-static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl,
- int first, int last)
+static int nvme_rdma_start_io_queue(struct nvme_ctrl *nctrl, int idx)
{
- int i, ret = 0;
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+ struct nvme_rdma_queue *queue = &ctrl->queues[idx];
+ int ret;

- for (i = first; i < last; i++) {
- ret = nvme_rdma_start_queue(&ctrl->ctrl, i);
- if (ret)
- goto out_stop_queues;
+ ret = nvmf_connect_io_queue(nctrl, idx);
+ if (!ret) {
+ set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
+ } else {
+ if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
+ __nvme_rdma_stop_queue(queue);
+ dev_info(ctrl->ctrl.device,
+ "failed to connect queue: %d ret=%d\n", idx, ret);
}
+ return ret;
+}

- return 0;
+static void nvme_rdma_stop_admin_queue(struct nvme_ctrl *nctrl)
+{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+ struct nvme_rdma_queue *queue = &ctrl->queues[0];

-out_stop_queues:
- for (i--; i >= first; i--)
- nvme_rdma_stop_queue(&ctrl->ctrl, i);
- return ret;
+ __nvme_rdma_stop_queue(queue);
}

-static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_stop_io_queue(struct nvme_ctrl *nctrl, int qid)
{
- struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+ struct nvme_rdma_queue *queue = &ctrl->queues[qid];
+
+ __nvme_rdma_stop_queue(queue);
+}
+
+static unsigned int nvme_rdma_nr_io_queues(struct nvme_ctrl *ctrl)
+{
+ struct ib_device *ibdev = to_rdma_ctrl(ctrl)->device->dev;
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+ unsigned int nr_io_queues;
+
+ nr_io_queues = min_t(unsigned int, ibdev->num_comp_vectors,
+ min(opts->nr_io_queues, num_online_cpus()));
+ nr_io_queues += min_t(unsigned int, ibdev->num_comp_vectors,
+ min(opts->nr_write_queues, num_online_cpus()));
+ nr_io_queues += min(opts->nr_poll_queues, num_online_cpus());
+
+ return nr_io_queues;
+}
+
+static void nvme_rdma_set_io_queues(struct nvme_ctrl *nctrl,
+ unsigned int nr_io_queues)
+{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
struct ib_device *ibdev = ctrl->device->dev;
- unsigned int nr_io_queues, nr_default_queues;
+ struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+ unsigned int nr_default_queues;
unsigned int nr_read_queues, nr_poll_queues;
- int i, ret;

nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors,
min(opts->nr_io_queues, num_online_cpus()));
nr_default_queues = min_t(unsigned int, ibdev->num_comp_vectors,
min(opts->nr_write_queues, num_online_cpus()));
nr_poll_queues = min(opts->nr_poll_queues, num_online_cpus());
- nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues;
-
- ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
- if (ret)
- return ret;
-
- if (nr_io_queues == 0) {
- dev_err(ctrl->ctrl.device,
- "unable to set any I/O queues\n");
- return -ENOMEM;
- }
-
- ctrl->ctrl.queue_count = nr_io_queues + 1;
- dev_info(ctrl->ctrl.device,
- "creating %d I/O queues.\n", nr_io_queues);

if (opts->nr_write_queues && nr_read_queues < nr_io_queues) {
/*
@@ -781,20 +836,6 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
ctrl->io_queues[HCTX_TYPE_POLL] =
min(nr_poll_queues, nr_io_queues);
}
-
- for (i = 1; i < ctrl->ctrl.queue_count; i++) {
- ret = nvme_rdma_alloc_queue(&ctrl->ctrl, i);
- if (ret)
- goto out_free_queues;
- }
-
- return 0;
-
-out_free_queues:
- for (i--; i >= 1; i--)
- nvme_rdma_free_queue(&ctrl->ctrl, i);
-
- return ret;
}

static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl)
@@ -812,231 +853,6 @@ static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl)
cmd_size);
}

-static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl)
-{
- if (ctrl->async_event_sqe.data) {
- cancel_work_sync(&ctrl->ctrl.async_event_work);
- nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
- sizeof(struct nvme_command), DMA_TO_DEVICE);
- ctrl->async_event_sqe.data = NULL;
- }
- nvme_rdma_free_queue(&ctrl->ctrl, 0);
-}
-
-static int nvme_rdma_init_queue(struct nvme_ctrl *nctrl, int qid)
-{
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
- bool pi_capable = false;
- int error;
-
- if (qid != 0)
- /* only admin queue needs additional work. */
- return 0;
-
-
- ctrl->device = ctrl->queues[0].device;
- ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev);
-
- /* T10-PI support */
- if (ctrl->device->dev->attrs.kernel_cap_flags &
- IBK_INTEGRITY_HANDOVER)
- pi_capable = true;
-
- ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
- pi_capable);
-
- /*
- * Bind the async event SQE DMA mapping to the admin queue lifetime.
- * It's safe, since any chage in the underlying RDMA device will issue
- * error recovery and queue re-creation.
- */
- error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
- sizeof(struct nvme_command), DMA_TO_DEVICE);
- if (error)
- return error;
-
- ctrl->ctrl.max_segments = ctrl->max_fr_pages;
- ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
- if (pi_capable)
- ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
- else
- ctrl->ctrl.max_integrity_segments = 0;
-
- return 0;
-}
-
-static void nvme_rdma_deinit_queue(struct nvme_ctrl *nctrl, int qid)
-{
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-
- if (qid != 0)
- return;
-
- if (ctrl->async_event_sqe.data) {
- nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
- sizeof(struct nvme_command), DMA_TO_DEVICE);
- ctrl->async_event_sqe.data = NULL;
- }
-}
-
-static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
- bool new)
-{
- int error;
-
- error = nvme_rdma_alloc_queue(&ctrl->ctrl, 0);
- if (error)
- return error;
-
- error = nvme_rdma_init_queue(&ctrl->ctrl, 0);
- if (error)
- goto out_free_queue;
-
- if (new) {
- error = nvme_alloc_admin_tag_set(&ctrl->ctrl,
- &ctrl->admin_tag_set, &nvme_rdma_admin_mq_ops,
- sizeof(struct nvme_rdma_request) +
- NVME_RDMA_DATA_SGL_SIZE);
- if (error)
- goto out_deinit_admin_queue;
-
- }
-
- error = nvme_rdma_start_queue(&ctrl->ctrl, 0);
- if (error)
- goto out_remove_admin_tag_set;
-
- error = nvme_enable_ctrl(&ctrl->ctrl);
- if (error)
- goto out_stop_queue;
-
- nvme_unquiesce_admin_queue(&ctrl->ctrl);
-
- error = nvme_init_ctrl_finish(&ctrl->ctrl, false);
- if (error)
- goto out_quiesce_queue;
-
- return 0;
-
-out_quiesce_queue:
- nvme_quiesce_admin_queue(&ctrl->ctrl);
- blk_sync_queue(ctrl->ctrl.admin_q);
-out_stop_queue:
- nvme_rdma_stop_queue(&ctrl->ctrl, 0);
- nvme_cancel_admin_tagset(&ctrl->ctrl);
-out_remove_admin_tag_set:
- if (new)
- nvme_remove_admin_tag_set(&ctrl->ctrl);
-out_deinit_admin_queue:
- nvme_rdma_deinit_queue(&ctrl->ctrl, 0);
-out_free_queue:
- nvme_rdma_free_queue(&ctrl->ctrl, 0);
- return error;
-}
-
-static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
-{
- int ret, nr_queues;
-
- ret = nvme_rdma_alloc_io_queues(ctrl);
- if (ret)
- return ret;
-
- if (new) {
- ret = nvme_rdma_alloc_tag_set(&ctrl->ctrl);
- if (ret)
- goto out_free_io_queues;
- }
-
- /*
- * Only start IO queues for which we have allocated the tagset
- * and limitted it to the available queues. On reconnects, the
- * queue number might have changed.
- */
- nr_queues = min(ctrl->tag_set.nr_hw_queues + 1, ctrl->ctrl.queue_count);
- ret = nvme_rdma_start_io_queues(ctrl, 1, nr_queues);
- if (ret)
- goto out_cleanup_tagset;
-
- if (!new) {
- nvme_unquiesce_io_queues(&ctrl->ctrl);
- if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
- /*
- * If we timed out waiting for freeze we are likely to
- * be stuck. Fail the controller initialization just
- * to be safe.
- */
- ret = -ENODEV;
- goto out_wait_freeze_timed_out;
- }
- blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
- ctrl->ctrl.queue_count - 1);
- nvme_unfreeze(&ctrl->ctrl);
- }
-
- /*
- * If the number of queues has increased (reconnect case)
- * start all new queues now.
- */
- ret = nvme_rdma_start_io_queues(ctrl, nr_queues,
- ctrl->tag_set.nr_hw_queues + 1);
- if (ret)
- goto out_wait_freeze_timed_out;
-
- return 0;
-
-out_wait_freeze_timed_out:
- nvme_quiesce_io_queues(&ctrl->ctrl);
- nvme_sync_io_queues(&ctrl->ctrl);
- nvme_rdma_stop_io_queues(ctrl);
-out_cleanup_tagset:
- nvme_cancel_tagset(&ctrl->ctrl);
- if (new)
- nvme_remove_io_tag_set(&ctrl->ctrl);
-out_free_io_queues:
- nvme_rdma_free_io_queues(ctrl);
- return ret;
-}
-
-static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
- bool remove)
-{
- nvme_quiesce_admin_queue(&ctrl->ctrl);
- blk_sync_queue(ctrl->ctrl.admin_q);
- nvme_rdma_stop_queue(&ctrl->ctrl, 0);
- nvme_cancel_admin_tagset(&ctrl->ctrl);
- if (remove) {
- nvme_unquiesce_admin_queue(&ctrl->ctrl);
- nvme_remove_admin_tag_set(&ctrl->ctrl);
- }
- nvme_rdma_destroy_admin_queue(ctrl);
-}
-
-static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
- bool remove)
-{
- if (ctrl->ctrl.queue_count > 1) {
- nvme_start_freeze(&ctrl->ctrl);
- nvme_quiesce_io_queues(&ctrl->ctrl);
- nvme_sync_io_queues(&ctrl->ctrl);
- nvme_rdma_stop_io_queues(ctrl);
- nvme_cancel_tagset(&ctrl->ctrl);
- if (remove) {
- nvme_unquiesce_io_queues(&ctrl->ctrl);
- nvme_remove_io_tag_set(&ctrl->ctrl);
- }
- nvme_rdma_free_io_queues(ctrl);
- }
-}
-
-static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
-{
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-
- flush_work(&ctrl->ctrl.err_work);
- cancel_delayed_work_sync(&ctrl->ctrl.connect_work);
-}
-
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
@@ -1054,169 +870,13 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
kfree(ctrl);
}

-static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
-{
- /* If we are resetting/deleting then do nothing */
- if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
- WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
- ctrl->ctrl.state == NVME_CTRL_LIVE);
- return;
- }
-
- if (nvmf_should_reconnect(&ctrl->ctrl)) {
- dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
- ctrl->ctrl.opts->reconnect_delay);
- queue_delayed_work(nvme_wq, &ctrl->ctrl.connect_work,
- ctrl->ctrl.opts->reconnect_delay * HZ);
- } else {
- nvme_delete_ctrl(&ctrl->ctrl);
- }
-}
-
-static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
-{
- int ret;
- bool changed;
-
- ret = nvme_rdma_configure_admin_queue(ctrl, new);
- if (ret)
- return ret;
-
- if (ctrl->ctrl.icdoff) {
- ret = -EOPNOTSUPP;
- dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
- goto destroy_admin;
- }
-
- if (!(ctrl->ctrl.sgls & (1 << 2))) {
- ret = -EOPNOTSUPP;
- dev_err(ctrl->ctrl.device,
- "Mandatory keyed sgls are not supported!\n");
- goto destroy_admin;
- }
-
- if (ctrl->ctrl.opts->queue_size > ctrl->ctrl.sqsize + 1) {
- dev_warn(ctrl->ctrl.device,
- "queue_size %zu > ctrl sqsize %u, clamping down\n",
- ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
- }
-
- if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) {
- dev_warn(ctrl->ctrl.device,
- "ctrl sqsize %u > max queue size %u, clamping down\n",
- ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE);
- ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1;
- }
-
- if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
- dev_warn(ctrl->ctrl.device,
- "sqsize %u > ctrl maxcmd %u, clamping down\n",
- ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
- ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
- }
-
- if (ctrl->ctrl.sgls & (1 << 20))
- ctrl->use_inline_data = true;
-
- if (ctrl->ctrl.queue_count > 1) {
- ret = nvme_rdma_configure_io_queues(ctrl, new);
- if (ret)
- goto destroy_admin;
- }
-
- changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
- if (!changed) {
- /*
- * state change failure is ok if we started ctrl delete,
- * unless we're during creation of a new controller to
- * avoid races with teardown flow.
- */
- WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
- ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
- WARN_ON_ONCE(new);
- ret = -EINVAL;
- goto destroy_io;
- }
-
- nvme_start_ctrl(&ctrl->ctrl);
- return 0;
-
-destroy_io:
- if (ctrl->ctrl.queue_count > 1) {
- nvme_quiesce_io_queues(&ctrl->ctrl);
- nvme_sync_io_queues(&ctrl->ctrl);
- nvme_rdma_stop_io_queues(ctrl);
- nvme_cancel_tagset(&ctrl->ctrl);
- if (new)
- nvme_remove_io_tag_set(&ctrl->ctrl);
- nvme_rdma_free_io_queues(ctrl);
- }
-destroy_admin:
- nvme_quiesce_admin_queue(&ctrl->ctrl);
- blk_sync_queue(ctrl->ctrl.admin_q);
- nvme_rdma_stop_queue(&ctrl->ctrl, 0);
- nvme_cancel_admin_tagset(&ctrl->ctrl);
- if (new)
- nvme_remove_admin_tag_set(&ctrl->ctrl);
- nvme_rdma_destroy_admin_queue(ctrl);
- return ret;
-}
-
-static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
-{
- struct nvme_ctrl *nctrl = container_of(to_delayed_work(work),
- struct nvme_ctrl, connect_work);
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-
- ++ctrl->ctrl.nr_reconnects;
-
- if (nvme_rdma_setup_ctrl(ctrl, false))
- goto requeue;
-
- dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
- ctrl->ctrl.nr_reconnects);
-
- ctrl->ctrl.nr_reconnects = 0;
-
- return;
-
-requeue:
- dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
- ctrl->ctrl.nr_reconnects);
- nvme_rdma_reconnect_or_remove(ctrl);
-}
-
-static void nvme_rdma_error_recovery_work(struct work_struct *work)
+static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *ctrl)
{
- struct nvme_ctrl *nctrl = container_of(work,
- struct nvme_ctrl, err_work);
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);

- nvme_stop_keep_alive(&ctrl->ctrl);
- flush_work(&ctrl->ctrl.async_event_work);
- nvme_rdma_teardown_io_queues(ctrl, false);
- nvme_unquiesce_io_queues(&ctrl->ctrl);
- nvme_rdma_teardown_admin_queue(ctrl, false);
- nvme_unquiesce_admin_queue(&ctrl->ctrl);
- nvme_auth_stop(&ctrl->ctrl);
-
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
- /* state change failure is ok if we started ctrl delete */
- WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
- ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
- return;
- }
-
- nvme_rdma_reconnect_or_remove(ctrl);
-}
-
-static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
-{
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
- return;
-
- dev_warn(ctrl->ctrl.device, "starting error recovery\n");
- queue_work(nvme_reset_wq, &ctrl->ctrl.err_work);
+ return nvme_alloc_admin_tag_set(ctrl, &to_rdma_ctrl(ctrl)->admin_tag_set,
+ &nvme_rdma_admin_mq_ops,
+ sizeof(struct nvme_rdma_request) +
+ NVME_RDMA_DATA_SGL_SIZE);
}

static void nvme_rdma_end_request(struct nvme_rdma_request *req)
@@ -1240,7 +900,7 @@ static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
"%s for CQE 0x%p failed with status %s (%d)\n",
op, wc->wr_cqe,
ib_wc_status_msg(wc->status), wc->status);
- nvme_rdma_error_recovery(ctrl);
+ nvmf_error_recovery(&ctrl->ctrl);
}

static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1759,7 +1419,7 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
dev_err(queue->ctrl->ctrl.device,
"got bad command_id %#x on QP %#x\n",
cqe->command_id, queue->qp->qp_num);
- nvme_rdma_error_recovery(queue->ctrl);
+ nvmf_error_recovery(&queue->ctrl->ctrl);
return;
}
req = blk_mq_rq_to_pdu(rq);
@@ -1773,7 +1433,7 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
dev_err(queue->ctrl->ctrl.device,
"Bogus remote invalidation for rkey %#x\n",
req->mr ? req->mr->rkey : 0);
- nvme_rdma_error_recovery(queue->ctrl);
+ nvmf_error_recovery(&queue->ctrl->ctrl);
}
} else if (req->mr) {
int ret;
@@ -1783,7 +1443,7 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
dev_err(queue->ctrl->ctrl.device,
"Queueing INV WR for rkey %#x failed (%d)\n",
req->mr->rkey, ret);
- nvme_rdma_error_recovery(queue->ctrl);
+ nvmf_error_recovery(&queue->ctrl->ctrl);
}
/* the local invalidation completion will end the request */
return;
@@ -1810,7 +1470,7 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->byte_len < len)) {
dev_err(queue->ctrl->ctrl.device,
"Unexpected nvme completion length(%d)\n", wc->byte_len);
- nvme_rdma_error_recovery(queue->ctrl);
+ nvmf_error_recovery(&queue->ctrl->ctrl);
return;
}

@@ -1980,7 +1640,7 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
dev_dbg(queue->ctrl->ctrl.device,
"disconnect received - connection closed\n");
- nvme_rdma_error_recovery(queue->ctrl);
+ nvmf_error_recovery(&queue->ctrl->ctrl);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
/* device removal is handled via the ib_client API */
@@ -1988,7 +1648,7 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
default:
dev_err(queue->ctrl->ctrl.device,
"Unexpected RDMA CM event (%d)\n", ev->event);
- nvme_rdma_error_recovery(queue->ctrl);
+ nvmf_error_recovery(&queue->ctrl->ctrl);
break;
}

@@ -2006,7 +1666,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq)
struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
struct nvme_rdma_queue *queue = req->queue;

- nvme_rdma_stop_queue(ctrl, nvme_rdma_queue_id(queue));
+ nvme_rdma_stop_io_queue(ctrl, nvme_rdma_queue_id(queue));
nvmf_complete_timed_out_request(rq);
}

@@ -2041,7 +1701,7 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
* LIVE state should trigger the normal error recovery which will
* handle completing this request.
*/
- nvme_rdma_error_recovery(ctrl);
+ nvmf_error_recovery(&ctrl->ctrl);
return BLK_EH_RESET_TIMER;
}

@@ -2242,41 +1902,9 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
.timeout = nvme_rdma_timeout,
};

-static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
-{
- nvme_rdma_teardown_io_queues(ctrl, shutdown);
- nvme_quiesce_admin_queue(&ctrl->ctrl);
- nvme_disable_ctrl(&ctrl->ctrl, shutdown);
- nvme_rdma_teardown_admin_queue(ctrl, shutdown);
-}
-
static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
{
- nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true);
-}
-
-static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
-{
- struct nvme_rdma_ctrl *ctrl =
- container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
-
- nvme_stop_ctrl(&ctrl->ctrl);
- nvme_rdma_shutdown_ctrl(ctrl, false);
-
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
- /* state change failure should never happen */
- WARN_ON_ONCE(1);
- return;
- }
-
- if (nvme_rdma_setup_ctrl(ctrl, false))
- goto out_fail;
-
- return;
-
-out_fail:
- ++ctrl->ctrl.nr_reconnects;
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvmf_teardown_ctrl(ctrl, true);
}

static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -2290,7 +1918,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_delete_ctrl,
.get_address = nvmf_get_address,
- .stop_ctrl = nvme_rdma_stop_ctrl,
+ .stop_ctrl = nvmf_stop_ctrl,
};

/*
@@ -2322,6 +1950,21 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
return found;
}

+static struct nvme_fabrics_ops nvme_rdma_fabrics_ops = {
+ .alloc_admin_queue = nvme_rdma_alloc_admin_queue,
+ .free_admin_queue = nvme_rdma_free_admin_queue,
+ .start_admin_queue = nvme_rdma_start_admin_queue,
+ .stop_admin_queue = nvme_rdma_stop_admin_queue,
+ .alloc_io_queue = nvme_rdma_alloc_io_queue,
+ .free_io_queue = nvme_rdma_free_io_queue,
+ .start_io_queue = nvme_rdma_start_io_queue,
+ .stop_io_queue = nvme_rdma_stop_io_queue,
+ .alloc_admin_tag_set = nvme_rdma_alloc_admin_tag_set,
+ .alloc_tag_set = nvme_rdma_alloc_tag_set,
+ .nr_io_queues = nvme_rdma_nr_io_queues,
+ .set_io_queues = nvme_rdma_set_io_queues,
+};
+
static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
@@ -2333,6 +1976,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (!ctrl)
return ERR_PTR(-ENOMEM);
ctrl->ctrl.opts = opts;
+ ctrl->ctrl.fabrics_ops = &nvme_rdma_fabrics_ops;
INIT_LIST_HEAD(&ctrl->list);

if (!(opts->mask & NVMF_OPT_TRSVCID)) {
@@ -2369,10 +2013,9 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
}

INIT_DELAYED_WORK(&ctrl->ctrl.connect_work,
- nvme_rdma_reconnect_ctrl_work);
- INIT_WORK(&ctrl->ctrl.err_work, nvme_rdma_error_recovery_work);
- INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
-
+ nvmf_reconnect_ctrl_work);
+ INIT_WORK(&ctrl->ctrl.err_work, nvmf_error_recovery_work);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvmf_reset_ctrl_work);
ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
opts->nr_poll_queues + 1;
ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -2392,7 +2035,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
WARN_ON_ONCE(!changed);

- ret = nvme_rdma_setup_ctrl(ctrl, true);
+ ret = nvmf_setup_ctrl(&ctrl->ctrl, true);
if (ret)
goto out_uninit_ctrl;

--
2.40.0

2023-05-04 09:16:24

by Daniel Wagner

[permalink] [raw]
Subject: [RFC v3 8/9] nvme: move queue flags to middle layer

The queue flags are used to track the state of the queue (deleted, live,
...). Move this generic feature into the fabric middle layer.

Unfortunately, rdma uses an transport flag (TR_READY) which is not used
in the generic part of the state machine.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/fabrics.c | 157 +++++++++++++++++++++++++++++-------
drivers/nvme/host/nvme.h | 19 ++++-
drivers/nvme/host/rdma.c | 75 ++++++-----------
drivers/nvme/host/tcp.c | 87 +++++++-------------
4 files changed, 197 insertions(+), 141 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 3d2cde17338d..5f212cb9421a 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -1134,13 +1134,117 @@ nvmf_create_ctrl(struct device *dev, const char *buf)
return ERR_PTR(ret);
}

+static int __nvmf_alloc_admin_queue(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
+ ret = ctrl->fabrics_ops->alloc_admin_queue(ctrl);
+ if (ret)
+ return ret;
+
+ set_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[0].flags);
+
+ return 0;
+}
+
+static void __nvmf_free_admin_queue(struct nvme_ctrl *ctrl)
+{
+ if (!test_and_clear_bit(NVME_FABRICS_Q_ALLOCATED,
+ ctrl->queues[0].flags))
+ return;
+
+ ctrl->fabrics_ops->free_admin_queue(ctrl);
+}
+
+static int __nvmf_start_admin_queue(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
+ if (!test_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[0].flags))
+ return -EINVAL;
+
+ ret = ctrl->fabrics_ops->start_admin_queue(ctrl);
+ if (ret) {
+ dev_err(ctrl->device,
+ "failed to start admin queue: ret=%d\n", ret);
+ return ret;
+ }
+
+ set_bit(NVME_FABRICS_Q_LIVE, ctrl->queues[0].flags);
+
+ return 0;
+}
+
+static void __nvmf_stop_admin_queue(struct nvme_ctrl *ctrl)
+{
+ if (!test_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[0].flags))
+ return;
+
+ mutex_lock(ctrl->queues[0].queue_lock);
+ if (test_and_clear_bit(NVME_FABRICS_Q_LIVE, ctrl->queues[0].flags))
+ ctrl->fabrics_ops->stop_admin_queue(ctrl);
+ mutex_unlock(ctrl->queues[0].queue_lock);
+}
+
+static int __nvmf_alloc_io_queue(struct nvme_ctrl *ctrl, int qid)
+{
+ int ret;
+
+ ret = ctrl->fabrics_ops->alloc_io_queue(ctrl, qid);
+ if (ret) {
+ dev_err(ctrl->device,
+ "failed to start I/O queue: %d ret=%d\n", qid, ret);
+ return ret;
+ }
+
+ set_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[qid].flags);
+
+ return 0;
+}
+
+static void __nvmf_free_io_queue(struct nvme_ctrl *ctrl, int qid)
+{
+ if (!test_and_clear_bit(NVME_FABRICS_Q_ALLOCATED,
+ ctrl->queues[qid].flags))
+ return;
+
+ ctrl->fabrics_ops->free_io_queue(ctrl, qid);
+}
+
+static int __nvmf_start_io_queue(struct nvme_ctrl *ctrl, int qid)
+{
+ int ret;
+
+ if (!test_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[0].flags))
+ return -EINVAL;
+
+ ret = ctrl->fabrics_ops->start_io_queue(ctrl, qid);
+ if (ret)
+ return ret;
+
+ set_bit(NVME_FABRICS_Q_LIVE, ctrl->queues[qid].flags);
+
+ return 0;
+}
+
+static void __nvmf_stop_io_queue(struct nvme_ctrl *ctrl, int qid)
+{
+ if (!test_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[qid].flags))
+ return;
+
+ mutex_lock(ctrl->queues[qid].queue_lock);
+ if (test_and_clear_bit(NVME_FABRICS_Q_LIVE, ctrl->queues[qid].flags))
+ ctrl->fabrics_ops->stop_io_queue(ctrl, qid);
+ mutex_unlock(ctrl->queues[qid].queue_lock);
+}
+
static int nvmf_start_io_queues(struct nvme_ctrl *ctrl,
int first, int last)
{
int i, ret;

for (i = first; i < last; i++) {
- ret = ctrl->fabrics_ops->start_io_queue(ctrl, i);
+ ret = __nvmf_start_io_queue(ctrl, i);
if (ret)
goto out_stop_queues;
}
@@ -1149,7 +1253,7 @@ static int nvmf_start_io_queues(struct nvme_ctrl *ctrl,

out_stop_queues:
for (i--; i >= first; i--)
- ctrl->fabrics_ops->stop_io_queue(ctrl, i);
+ __nvmf_stop_io_queue(ctrl, i);
return ret;
}

@@ -1158,7 +1262,7 @@ static void nvmf_stop_io_queues(struct nvme_ctrl *ctrl)
int i;

for (i = 1; i < ctrl->queue_count; i++)
- ctrl->fabrics_ops->stop_io_queue(ctrl, i);
+ __nvmf_stop_io_queue(ctrl, i);
}

static int __nvmf_alloc_io_queues(struct nvme_ctrl *ctrl)
@@ -1166,7 +1270,7 @@ static int __nvmf_alloc_io_queues(struct nvme_ctrl *ctrl)
int i, ret;

for (i = 1; i < ctrl->queue_count; i++) {
- ret = ctrl->fabrics_ops->alloc_io_queue(ctrl, i);
+ ret = __nvmf_alloc_io_queue(ctrl, i);
if (ret)
goto out_free_queues;
}
@@ -1175,7 +1279,7 @@ static int __nvmf_alloc_io_queues(struct nvme_ctrl *ctrl)

out_free_queues:
for (i--; i >= 1; i--)
- ctrl->fabrics_ops->free_io_queue(ctrl, i);
+ __nvmf_free_io_queue(ctrl, i);

return ret;
}
@@ -1198,7 +1302,7 @@ static int nvmf_alloc_io_queues(struct nvme_ctrl *ctrl)

ctrl->queue_count = nr_io_queues + 1;
dev_info(ctrl->device,
- "creating %d I/O queues.\n", nr_io_queues);
+ "creating %d I/O queues.\n", nr_io_queues);

ctrl->fabrics_ops->set_io_queues(ctrl, nr_io_queues);

@@ -1210,7 +1314,7 @@ static void nvmf_free_io_queues(struct nvme_ctrl *ctrl)
int i;

for (i = 1; i < ctrl->queue_count; i++)
- ctrl->fabrics_ops->free_io_queue(ctrl, i);
+ __nvmf_free_io_queue(ctrl, i);
}

static int nvmf_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
@@ -1279,31 +1383,31 @@ static int nvmf_configure_io_queues(struct nvme_ctrl *ctrl, bool new)

static int nvmf_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
{
- int error;
+ int ret;

- error = ctrl->fabrics_ops->alloc_admin_queue(ctrl);
- if (error)
- return error;
+ ret = __nvmf_alloc_admin_queue(ctrl);
+ if (ret)
+ return ret;

if (new) {
- error = ctrl->fabrics_ops->alloc_admin_tag_set(ctrl);
- if (error)
+ ret = ctrl->fabrics_ops->alloc_admin_tag_set(ctrl);
+ if (ret)
goto out_free_admin_queue;

}

- error = ctrl->fabrics_ops->start_admin_queue(ctrl);
- if (error)
+ ret = __nvmf_start_admin_queue(ctrl);
+ if (ret)
goto out_remove_admin_tag_set;

- error = nvme_enable_ctrl(ctrl);
- if (error)
+ ret = nvme_enable_ctrl(ctrl);
+ if (ret)
goto out_stop_queue;

nvme_unquiesce_admin_queue(ctrl);

- error = nvme_init_ctrl_finish(ctrl, false);
- if (error)
+ ret = nvme_init_ctrl_finish(ctrl, false);
+ if (ret)
goto out_quiesce_queue;

return 0;
@@ -1312,14 +1416,14 @@ static int nvmf_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
nvme_quiesce_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q);
out_stop_queue:
- ctrl->fabrics_ops->stop_admin_queue(ctrl);
+ __nvmf_stop_admin_queue(ctrl);
nvme_cancel_admin_tagset(ctrl);
out_remove_admin_tag_set:
if (new)
nvme_remove_admin_tag_set(ctrl);
out_free_admin_queue:
- ctrl->fabrics_ops->free_admin_queue(ctrl);
- return error;
+ __nvmf_free_admin_queue(ctrl);
+ return ret;
}

static void nvmf_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
@@ -1332,18 +1436,17 @@ static void nvmf_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)

static void nvmf_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
{
- ctrl->fabrics_ops->stop_admin_queue(ctrl);
+ __nvmf_stop_admin_queue(ctrl);
if (remove)
nvme_remove_admin_tag_set(ctrl);
-
- ctrl->fabrics_ops->free_admin_queue(ctrl);
+ __nvmf_free_admin_queue(ctrl);
}

static void nvmf_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove)
{
nvme_quiesce_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q);
- ctrl->fabrics_ops->stop_admin_queue(ctrl);
+ __nvmf_stop_admin_queue(ctrl);
nvme_cancel_admin_tagset(ctrl);
if (remove)
nvme_unquiesce_admin_queue(ctrl);
@@ -1447,7 +1550,7 @@ int nvmf_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
destroy_admin:
nvme_quiesce_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q);
- ctrl->fabrics_ops->stop_admin_queue(ctrl);
+ __nvmf_stop_admin_queue(ctrl);
nvme_cancel_admin_tagset(ctrl);
nvmf_destroy_admin_queue(ctrl, new);
return ret;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index fcea2678094c..0810bc2a9e13 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -246,6 +246,18 @@ enum nvme_ctrl_flags {

struct nvme_fabrics_ops;

+enum nvme_fabrics_queue_flags {
+ NVME_FABRICS_Q_ALLOCATED = 0,
+ NVME_FABRICS_Q_TR_READY = 1,
+ NVME_FABRICS_Q_LIVE = 2,
+ NVME_FABRICS_Q_POLLING = 3,
+};
+
+struct nvme_fabrics_queue {
+ unsigned long *flags;
+ struct mutex *queue_lock;
+};
+
struct nvme_ctrl {
bool comp_seen;
enum nvme_ctrl_state state;
@@ -253,7 +265,6 @@ struct nvme_ctrl {
spinlock_t lock;
struct mutex scan_lock;
const struct nvme_ctrl_ops *ops;
- const struct nvme_fabrics_ops *fabrics_ops;
struct request_queue *admin_q;
struct request_queue *connect_q;
struct request_queue *fabrics_q;
@@ -342,8 +353,10 @@ struct nvme_ctrl {
struct work_struct ana_work;
#endif

- struct work_struct err_work;
- struct delayed_work connect_work;
+ const struct nvme_fabrics_ops *fabrics_ops;
+ struct nvme_fabrics_queue *queues;
+ struct work_struct err_work;
+ struct delayed_work connect_work;

#ifdef CONFIG_NVME_AUTH
struct work_struct dhchap_auth_work;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 1fde65e8c2b5..023316fdc2c6 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -3,6 +3,7 @@
* NVMe over Fabrics RDMA host code.
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
*/
+#include "linux/gfp_types.h"
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/init.h>
@@ -76,12 +77,6 @@ struct nvme_rdma_request {
bool use_sig_mr;
};

-enum nvme_rdma_queue_flags {
- NVME_RDMA_Q_ALLOCATED = 0,
- NVME_RDMA_Q_LIVE = 1,
- NVME_RDMA_Q_TR_READY = 2,
-};
-
struct nvme_rdma_queue {
struct nvme_rdma_qe *rsp_ring;
int queue_size;
@@ -425,7 +420,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
struct nvme_rdma_device *dev;
struct ib_device *ibdev;

- if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags))
+ if (!test_and_clear_bit(NVME_FABRICS_Q_TR_READY, &queue->flags))
return;

dev = queue->device;
@@ -550,7 +545,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
}
}

- set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
+ set_bit(NVME_FABRICS_Q_TR_READY, &queue->flags);

return 0;

@@ -572,12 +567,17 @@ static int __nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
struct nvme_rdma_queue *queue)
{
struct sockaddr *src_addr = NULL;
+ struct nvme_fabrics_queue *fqueue;
int ret;

mutex_init(&queue->queue_lock);
queue->ctrl = ctrl;
init_completion(&queue->cm_done);

+ fqueue = &ctrl->ctrl.queues[nvme_rdma_queue_id(queue)];
+ fqueue->flags = &queue->flags;
+ fqueue->queue_lock = &queue->queue_lock;
+
queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(queue->cm_id)) {
@@ -607,8 +607,6 @@ static int __nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
goto out_destroy_cm_id;
}

- set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags);
-
return 0;

out_destroy_cm_id:
@@ -622,9 +620,6 @@ static int __nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
static void __nvme_rdma_free_queue(struct nvme_rdma_ctrl *ctrl,
struct nvme_rdma_queue *queue)
{
- if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
- return;
-
rdma_destroy_id(queue->cm_id);
nvme_rdma_destroy_queue_ib(queue);
mutex_destroy(&queue->queue_lock);
@@ -718,49 +713,18 @@ static void nvme_rdma_free_io_queue(struct nvme_ctrl *nctrl, int qid)

static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{
- mutex_lock(&queue->queue_lock);
- if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) {
- rdma_disconnect(queue->cm_id);
- ib_drain_qp(queue->qp);
- }
- mutex_unlock(&queue->queue_lock);
+ rdma_disconnect(queue->cm_id);
+ ib_drain_qp(queue->qp);
}

-static int nvme_rdma_start_admin_queue(struct nvme_ctrl *nctrl)
+static int nvme_rdma_start_admin_queue(struct nvme_ctrl *ctrl)
{
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
- struct nvme_rdma_queue *queue = &ctrl->queues[0];
- int ret;
-
- ret = nvmf_connect_admin_queue(nctrl);
-
- if (!ret) {
- set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
- } else {
- if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
- __nvme_rdma_stop_queue(queue);
- dev_info(ctrl->ctrl.device,
- "failed to connect queue: %d ret=%d\n", 0, ret);
- }
- return ret;
+ return nvmf_connect_admin_queue(ctrl);
}

-static int nvme_rdma_start_io_queue(struct nvme_ctrl *nctrl, int idx)
+static int nvme_rdma_start_io_queue(struct nvme_ctrl *ctrl, int idx)
{
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
- struct nvme_rdma_queue *queue = &ctrl->queues[idx];
- int ret;
-
- ret = nvmf_connect_io_queue(nctrl, idx);
- if (!ret) {
- set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
- } else {
- if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
- __nvme_rdma_stop_queue(queue);
- dev_info(ctrl->ctrl.device,
- "failed to connect queue: %d ret=%d\n", idx, ret);
- }
- return ret;
+ return nvmf_connect_io_queue(ctrl, idx);
}

static void nvme_rdma_stop_admin_queue(struct nvme_ctrl *nctrl)
@@ -1715,7 +1679,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_rdma_qe *sqe = &req->sqe;
struct nvme_command *c = nvme_req(rq)->cmd;
struct ib_device *dev;
- bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
+ bool queue_ready = test_bit(NVME_FABRICS_Q_LIVE, &queue->flags);
blk_status_t ret;
int err;

@@ -2027,6 +1991,12 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (!ctrl->queues)
goto out_free_ctrl;

+ ctrl->ctrl.queues = kcalloc(ctrl->ctrl.queue_count,
+ sizeof(*ctrl->ctrl.queues),
+ GFP_KERNEL);
+ if (!ctrl->ctrl.queues)
+ goto out_free_ctrl_queues;
+
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
0 /* no quirks, we're perfect! */);
if (ret)
@@ -2054,7 +2024,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (ret > 0)
ret = -EIO;
return ERR_PTR(ret);
+ kfree(ctrl->queues);
out_kfree_queues:
+ kfree(ctrl->ctrl.queues);
+out_free_ctrl_queues:
kfree(ctrl->queues);
out_free_ctrl:
kfree(ctrl);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 32c4346b7322..dfdf35b32adc 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -100,12 +100,6 @@ struct nvme_tcp_request {
enum nvme_tcp_send_state state;
};

-enum nvme_tcp_queue_flags {
- NVME_TCP_Q_ALLOCATED = 0,
- NVME_TCP_Q_LIVE = 1,
- NVME_TCP_Q_POLLING = 2,
-};
-
enum nvme_tcp_recv_state {
NVME_TCP_RECV_PDU = 0,
NVME_TCP_RECV_DATA,
@@ -903,7 +897,7 @@ static void nvme_tcp_data_ready(struct sock *sk)
read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (likely(queue && queue->rd_enabled) &&
- !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
+ !test_bit(NVME_FABRICS_Q_POLLING, &queue->flags))
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1454,6 +1448,7 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
static int __nvme_tcp_alloc_queue(struct nvme_tcp_ctrl *ctrl,
struct nvme_tcp_queue *queue)
{
+ struct nvme_fabrics_queue *fqueue;
int ret, rcv_pdu_size;

mutex_init(&queue->queue_lock);
@@ -1463,6 +1458,10 @@ static int __nvme_tcp_alloc_queue(struct nvme_tcp_ctrl *ctrl,
mutex_init(&queue->send_mutex);
INIT_WORK(&queue->io_work, nvme_tcp_io_work);

+ fqueue = &ctrl->ctrl.queues[nvme_tcp_queue_id(queue)];
+ fqueue->flags = &queue->flags;
+ fqueue->queue_lock = &queue->queue_lock;
+
ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
IPPROTO_TCP, &queue->sock);
if (ret) {
@@ -1567,7 +1566,6 @@ static int __nvme_tcp_alloc_queue(struct nvme_tcp_ctrl *ctrl,
goto err_init_connect;

queue->rd_enabled = true;
- set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
nvme_tcp_init_recv_ctx(queue);

write_lock_bh(&queue->sock->sk->sk_callback_lock);
@@ -1607,9 +1605,6 @@ static void __nvme_tcp_free_queue(struct nvme_tcp_ctrl *ctrl,
struct page *page;
unsigned int noreclaim_flag;

- if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
- return;
-
if (queue->hdr_digest || queue->data_digest)
nvme_tcp_free_crypto(queue);

@@ -1699,40 +1694,14 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
cancel_work_sync(&queue->io_work);
}

-static int nvme_tcp_start_admin_queue(struct nvme_ctrl *nctrl)
+static int nvme_tcp_start_admin_queue(struct nvme_ctrl *ctrl)
{
- struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
- struct nvme_tcp_queue *queue = &ctrl->queues[0];
- int ret;
-
- ret = nvmf_connect_admin_queue(nctrl);
- if (!ret) {
- set_bit(NVME_TCP_Q_LIVE, &queue->flags);
- } else {
- if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
- __nvme_tcp_stop_queue(queue);
- dev_err(nctrl->device,
- "failed to connect queue: %d ret=%d\n", 0, ret);
- }
- return ret;
+ return nvmf_connect_admin_queue(ctrl);
}

-static int nvme_tcp_start_io_queue(struct nvme_ctrl *nctrl, int qid)
+static int nvme_tcp_start_io_queue(struct nvme_ctrl *ctrl, int qid)
{
- struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
- struct nvme_tcp_queue *queue = &ctrl->queues[qid];
- int ret;
-
- ret = nvmf_connect_io_queue(nctrl, qid);
- if (!ret) {
- set_bit(NVME_TCP_Q_LIVE, &queue->flags);
- } else {
- if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
- __nvme_tcp_stop_queue(queue);
- dev_err(nctrl->device,
- "failed to connect queue: %d ret=%d\n", qid, ret);
- }
- return ret;
+ return nvmf_connect_io_queue(ctrl, qid);
}

static void nvme_tcp_stop_admin_queue(struct nvme_ctrl *nctrl)
@@ -1740,13 +1709,7 @@ static void nvme_tcp_stop_admin_queue(struct nvme_ctrl *nctrl)
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[0];

- if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
- return;
-
- mutex_lock(&queue->queue_lock);
- if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
- __nvme_tcp_stop_queue(queue);
- mutex_unlock(&queue->queue_lock);
+ __nvme_tcp_stop_queue(queue);
}

static void nvme_tcp_stop_io_queue(struct nvme_ctrl *nctrl, int qid)
@@ -1754,13 +1717,7 @@ static void nvme_tcp_stop_io_queue(struct nvme_ctrl *nctrl, int qid)
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];

- if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
- return;
-
- mutex_lock(&queue->queue_lock);
- if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
- __nvme_tcp_stop_queue(queue);
- mutex_unlock(&queue->queue_lock);
+ __nvme_tcp_stop_queue(queue);
}

static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *ctrl)
@@ -1843,6 +1800,7 @@ static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)

nvmf_free_options(nctrl->opts);
free_ctrl:
+ kfree(ctrl->ctrl.queues);
kfree(ctrl->queues);
kfree(ctrl);
}
@@ -2043,7 +2001,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_tcp_queue *queue = hctx->driver_data;
struct request *rq = bd->rq;
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
- bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags);
+ bool queue_ready = test_bit(NVME_FABRICS_Q_LIVE, &queue->flags);
blk_status_t ret;

if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
@@ -2108,14 +2066,14 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
struct nvme_tcp_queue *queue = hctx->driver_data;
struct sock *sk = queue->sock->sk;

- if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
+ if (!test_bit(NVME_FABRICS_Q_LIVE, &queue->flags))
return 0;

- set_bit(NVME_TCP_Q_POLLING, &queue->flags);
+ set_bit(NVME_FABRICS_Q_POLLING, &queue->flags);
if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
sk_busy_loop(sk, true);
nvme_tcp_try_recv(queue);
- clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
+ clear_bit(NVME_FABRICS_Q_POLLING, &queue->flags);
return queue->nr_cqe;
}

@@ -2129,7 +2087,7 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)

mutex_lock(&queue->queue_lock);

- if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
+ if (!test_bit(NVME_CTRL_LIVE, &queue->flags))
goto done;
ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr);
if (ret > 0) {
@@ -2282,6 +2240,13 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
ret = -ENOMEM;
goto out_free_ctrl;
}
+ ctrl->ctrl.queues = kcalloc(ctrl->ctrl.queue_count,
+ sizeof(*ctrl->ctrl.queues),
+ GFP_KERNEL);
+ if (!ctrl->ctrl.queues) {
+ ret = -ENOMEM;
+ goto out_free_ctrl_queue;
+ }

ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0);
if (ret)
@@ -2313,6 +2278,8 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
ret = -EIO;
return ERR_PTR(ret);
out_kfree_queues:
+ kfree(ctrl->ctrl.queues);
+out_free_ctrl_queue:
kfree(ctrl->queues);
out_free_ctrl:
kfree(ctrl);
--
2.40.0

2023-05-04 09:16:45

by Daniel Wagner

[permalink] [raw]
Subject: [RFC v3 9/9] nvme: introduce setup_transport()

Do the tag allocation in the new setup function.

Nope, this doesn't work because the tag allocation wants to map
the but we haven't allocated them yet.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/fabrics.c | 26 ++++------------
drivers/nvme/host/fabrics.h | 5 ++--
drivers/nvme/host/rdma.c | 60 +++++++++++++++++++++----------------
drivers/nvme/host/tcp.c | 31 +++++++++++--------
4 files changed, 59 insertions(+), 63 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 5f212cb9421a..06e9cf0c9e84 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -1325,12 +1325,6 @@ static int nvmf_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
if (ret)
return ret;

- if (new) {
- ret = ctrl->fabrics_ops->alloc_tag_set(ctrl);
- if (ret)
- goto out_free_io_queues;
- }
-
/*
* Only start IO queues for which we have allocated the tagset
* and limitted it to the available queues. On reconnects, the
@@ -1374,9 +1368,6 @@ static int nvmf_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
nvmf_stop_io_queues(ctrl);
out_cleanup_connect_q:
nvme_cancel_tagset(ctrl);
- if (new)
- nvme_remove_io_tag_set(ctrl);
-out_free_io_queues:
nvmf_free_io_queues(ctrl);
return ret;
}
@@ -1389,16 +1380,9 @@ static int nvmf_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
if (ret)
return ret;

- if (new) {
- ret = ctrl->fabrics_ops->alloc_admin_tag_set(ctrl);
- if (ret)
- goto out_free_admin_queue;
-
- }
-
ret = __nvmf_start_admin_queue(ctrl);
if (ret)
- goto out_remove_admin_tag_set;
+ goto out_remove_admin_queue;

ret = nvme_enable_ctrl(ctrl);
if (ret)
@@ -1418,10 +1402,7 @@ static int nvmf_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
out_stop_queue:
__nvmf_stop_admin_queue(ctrl);
nvme_cancel_admin_tagset(ctrl);
-out_remove_admin_tag_set:
- if (new)
- nvme_remove_admin_tag_set(ctrl);
-out_free_admin_queue:
+out_remove_admin_queue:
__nvmf_free_admin_queue(ctrl);
return ret;
}
@@ -1489,6 +1470,9 @@ int nvmf_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
struct nvmf_ctrl_options *opts = ctrl->opts;
int ret;

+ if (new)
+ ctrl->fabrics_ops->setup_transport(ctrl);
+
ret = nvmf_configure_admin_queue(ctrl, new);
if (ret)
return ret;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 345d6de6bc86..ad4734df7342 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -173,6 +173,7 @@ struct nvmf_transport_ops {
};

struct nvme_fabrics_ops {
+ int (*setup_transport)(struct nvme_ctrl *ctrl);
int (*alloc_admin_queue)(struct nvme_ctrl *ctrl);
int (*start_admin_queue)(struct nvme_ctrl *ctrl);
void (*stop_admin_queue)(struct nvme_ctrl *ctrl);
@@ -182,9 +183,7 @@ struct nvme_fabrics_ops {
void (*stop_io_queue)(struct nvme_ctrl *ctrl, int qid);
void (*free_io_queue)(struct nvme_ctrl *ctrl, int qid);

- /* these should be replaced with a single one setup_transport() */
- int (*alloc_admin_tag_set)(struct nvme_ctrl *ctrl);
- int (*alloc_tag_set)(struct nvme_ctrl *ctrl);
+ /* there should move to setup_transport() as well */
unsigned int (*nr_io_queues)(struct nvme_ctrl *ctrl);
void (*set_io_queues)(struct nvme_ctrl *ctrl, unsigned int nr_io_queues);
};
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 023316fdc2c6..015a6bde732a 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -743,6 +743,39 @@ static void nvme_rdma_stop_io_queue(struct nvme_ctrl *nctrl, int qid)
__nvme_rdma_stop_queue(queue);
}

+static int nvme_rdma_setup_transport(struct nvme_ctrl *ctrl)
+{
+ unsigned int cmd_size;
+ int ret;
+
+ ret = nvme_alloc_admin_tag_set(ctrl, &to_rdma_ctrl(ctrl)->admin_tag_set,
+ &nvme_rdma_admin_mq_ops,
+ sizeof(struct nvme_rdma_request) +
+ NVME_RDMA_DATA_SGL_SIZE);
+ if (ret)
+ return ret;
+
+ cmd_size = sizeof(struct nvme_rdma_request) +
+ NVME_RDMA_DATA_SGL_SIZE;
+
+ if (ctrl->max_integrity_segments)
+ cmd_size += sizeof(struct nvme_rdma_sgl) +
+ NVME_RDMA_METADATA_SGL_SIZE;
+
+ ret = nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set,
+ &nvme_rdma_mq_ops,
+ ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
+ cmd_size);
+ if (ret)
+ goto out_free_admin_tag_set;
+
+ return 0;
+
+out_free_admin_tag_set:
+ nvme_remove_admin_tag_set(ctrl);
+ return ret;
+}
+
static unsigned int nvme_rdma_nr_io_queues(struct nvme_ctrl *ctrl)
{
struct ib_device *ibdev = to_rdma_ctrl(ctrl)->device->dev;
@@ -802,21 +835,6 @@ static void nvme_rdma_set_io_queues(struct nvme_ctrl *nctrl,
}
}

-static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl)
-{
- unsigned int cmd_size = sizeof(struct nvme_rdma_request) +
- NVME_RDMA_DATA_SGL_SIZE;
-
- if (ctrl->max_integrity_segments)
- cmd_size += sizeof(struct nvme_rdma_sgl) +
- NVME_RDMA_METADATA_SGL_SIZE;
-
- return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set,
- &nvme_rdma_mq_ops,
- ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
- cmd_size);
-}
-
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
@@ -834,15 +852,6 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
kfree(ctrl);
}

-static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *ctrl)
-{
-
- return nvme_alloc_admin_tag_set(ctrl, &to_rdma_ctrl(ctrl)->admin_tag_set,
- &nvme_rdma_admin_mq_ops,
- sizeof(struct nvme_rdma_request) +
- NVME_RDMA_DATA_SGL_SIZE);
-}
-
static void nvme_rdma_end_request(struct nvme_rdma_request *req)
{
struct request *rq = blk_mq_rq_from_pdu(req);
@@ -1915,6 +1924,7 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
}

static struct nvme_fabrics_ops nvme_rdma_fabrics_ops = {
+ .setup_transport = nvme_rdma_setup_transport,
.alloc_admin_queue = nvme_rdma_alloc_admin_queue,
.free_admin_queue = nvme_rdma_free_admin_queue,
.start_admin_queue = nvme_rdma_start_admin_queue,
@@ -1923,8 +1933,6 @@ static struct nvme_fabrics_ops nvme_rdma_fabrics_ops = {
.free_io_queue = nvme_rdma_free_io_queue,
.start_io_queue = nvme_rdma_start_io_queue,
.stop_io_queue = nvme_rdma_stop_io_queue,
- .alloc_admin_tag_set = nvme_rdma_alloc_admin_tag_set,
- .alloc_tag_set = nvme_rdma_alloc_tag_set,
.nr_io_queues = nvme_rdma_nr_io_queues,
.set_io_queues = nvme_rdma_set_io_queues,
};
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index dfdf35b32adc..f91575b944a2 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1720,20 +1720,26 @@ static void nvme_tcp_stop_io_queue(struct nvme_ctrl *nctrl, int qid)
__nvme_tcp_stop_queue(queue);
}

-static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *ctrl)
+static int nvme_tcp_setup_transport(struct nvme_ctrl *ctrl)
{
- return nvme_alloc_admin_tag_set(ctrl, &to_tcp_ctrl(ctrl)->admin_tag_set,
- &nvme_tcp_admin_mq_ops,
- sizeof(struct nvme_tcp_request));
-}
+ int ret;

-static int nvme_tcp_alloc_tag_set(struct nvme_ctrl *ctrl)
-{
- return nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set,
- &nvme_tcp_mq_ops,
- ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
- sizeof(struct nvme_tcp_request));
+ ret = nvme_alloc_admin_tag_set(ctrl, &to_tcp_ctrl(ctrl)->admin_tag_set,
+ &nvme_tcp_admin_mq_ops,
+ sizeof(struct nvme_tcp_request));
+ if (ret)
+ return ret;
+
+ ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set,
+ &nvme_tcp_mq_ops,
+ ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
+ sizeof(struct nvme_tcp_request));
+ if (ret)
+ goto out_free_admin_tag_set;

+out_free_admin_tag_set:
+ nvme_remove_admin_tag_set(ctrl);
+ return ret;
}

static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
@@ -2155,6 +2161,7 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
}

static struct nvme_fabrics_ops nvme_tcp_fabrics_ops = {
+ .setup_transport = nvme_tcp_setup_transport,
.alloc_admin_queue = nvme_tcp_alloc_admin_queue,
.free_admin_queue = nvme_tcp_free_admin_queue,
.start_admin_queue = nvme_tcp_start_admin_queue,
@@ -2163,8 +2170,6 @@ static struct nvme_fabrics_ops nvme_tcp_fabrics_ops = {
.free_io_queue = nvme_tcp_free_io_queue,
.start_io_queue = nvme_tcp_start_io_queue,
.stop_io_queue = nvme_tcp_stop_io_queue,
- .alloc_admin_tag_set = nvme_tcp_alloc_admin_tag_set,
- .alloc_tag_set = nvme_tcp_alloc_tag_set,
.nr_io_queues = nvme_tcp_nr_io_queues,
.set_io_queues = nvme_tcp_set_io_queues,
};
--
2.40.0

2023-05-04 09:45:16

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [RFC v3 8/9] nvme: move queue flags to middle layer


> The queue flags are used to track the state of the queue (deleted, live,
> ...). Move this generic feature into the fabric middle layer.
>
> Unfortunately, rdma uses an transport flag (TR_READY) which is not used
> in the generic part of the state machine.

This would be a transport specific flag, it is only accessed in a
transport specific function. So this would need to be in a
nvme_rdma_queue flags field.

But I don't think that we need queues array in the core nvme_ctrl.
I think that all the transports will have an admin queue and io queues.
Representing a queue in the core would be difficult IMO

>
> Signed-off-by: Daniel Wagner <[email protected]>
> ---
> drivers/nvme/host/fabrics.c | 157 +++++++++++++++++++++++++++++-------
> drivers/nvme/host/nvme.h | 19 ++++-
> drivers/nvme/host/rdma.c | 75 ++++++-----------
> drivers/nvme/host/tcp.c | 87 +++++++-------------
> 4 files changed, 197 insertions(+), 141 deletions(-)
>
> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index 3d2cde17338d..5f212cb9421a 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -1134,13 +1134,117 @@ nvmf_create_ctrl(struct device *dev, const char *buf)
> return ERR_PTR(ret);
> }
>
> +static int __nvmf_alloc_admin_queue(struct nvme_ctrl *ctrl)
> +{
> + int ret;
> +
> + ret = ctrl->fabrics_ops->alloc_admin_queue(ctrl);
> + if (ret)
> + return ret;
> +
> + set_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[0].flags);
> +
> + return 0;
> +}
> +
> +static void __nvmf_free_admin_queue(struct nvme_ctrl *ctrl)
> +{
> + if (!test_and_clear_bit(NVME_FABRICS_Q_ALLOCATED,
> + ctrl->queues[0].flags))
> + return;
> +
> + ctrl->fabrics_ops->free_admin_queue(ctrl);
> +}
> +
> +static int __nvmf_start_admin_queue(struct nvme_ctrl *ctrl)
> +{
> + int ret;
> +
> + if (!test_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[0].flags))
> + return -EINVAL;
> +
> + ret = ctrl->fabrics_ops->start_admin_queue(ctrl);
> + if (ret) {
> + dev_err(ctrl->device,
> + "failed to start admin queue: ret=%d\n", ret);
> + return ret;
> + }
> +
> + set_bit(NVME_FABRICS_Q_LIVE, ctrl->queues[0].flags);
> +
> + return 0;
> +}
> +
> +static void __nvmf_stop_admin_queue(struct nvme_ctrl *ctrl)
> +{
> + if (!test_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[0].flags))
> + return;
> +
> + mutex_lock(ctrl->queues[0].queue_lock);
> + if (test_and_clear_bit(NVME_FABRICS_Q_LIVE, ctrl->queues[0].flags))
> + ctrl->fabrics_ops->stop_admin_queue(ctrl);
> + mutex_unlock(ctrl->queues[0].queue_lock);
> +}
> +
> +static int __nvmf_alloc_io_queue(struct nvme_ctrl *ctrl, int qid)
> +{
> + int ret;
> +
> + ret = ctrl->fabrics_ops->alloc_io_queue(ctrl, qid);
> + if (ret) {
> + dev_err(ctrl->device,
> + "failed to start I/O queue: %d ret=%d\n", qid, ret);
> + return ret;
> + }
> +
> + set_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[qid].flags);
> +
> + return 0;
> +}
> +
> +static void __nvmf_free_io_queue(struct nvme_ctrl *ctrl, int qid)
> +{
> + if (!test_and_clear_bit(NVME_FABRICS_Q_ALLOCATED,
> + ctrl->queues[qid].flags))
> + return;
> +
> + ctrl->fabrics_ops->free_io_queue(ctrl, qid);
> +}
> +
> +static int __nvmf_start_io_queue(struct nvme_ctrl *ctrl, int qid)
> +{
> + int ret;
> +
> + if (!test_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[0].flags))
> + return -EINVAL;
> +
> + ret = ctrl->fabrics_ops->start_io_queue(ctrl, qid);
> + if (ret)
> + return ret;
> +
> + set_bit(NVME_FABRICS_Q_LIVE, ctrl->queues[qid].flags);
> +
> + return 0;
> +}
> +
> +static void __nvmf_stop_io_queue(struct nvme_ctrl *ctrl, int qid)
> +{
> + if (!test_bit(NVME_FABRICS_Q_ALLOCATED, ctrl->queues[qid].flags))
> + return;
> +
> + mutex_lock(ctrl->queues[qid].queue_lock);
> + if (test_and_clear_bit(NVME_FABRICS_Q_LIVE, ctrl->queues[qid].flags))
> + ctrl->fabrics_ops->stop_io_queue(ctrl, qid);
> + mutex_unlock(ctrl->queues[qid].queue_lock);
> +}
> +
> static int nvmf_start_io_queues(struct nvme_ctrl *ctrl,
> int first, int last)
> {
> int i, ret;
>
> for (i = first; i < last; i++) {
> - ret = ctrl->fabrics_ops->start_io_queue(ctrl, i);
> + ret = __nvmf_start_io_queue(ctrl, i);
> if (ret)
> goto out_stop_queues;
> }
> @@ -1149,7 +1253,7 @@ static int nvmf_start_io_queues(struct nvme_ctrl *ctrl,
>
> out_stop_queues:
> for (i--; i >= first; i--)
> - ctrl->fabrics_ops->stop_io_queue(ctrl, i);
> + __nvmf_stop_io_queue(ctrl, i);
> return ret;
> }
>
> @@ -1158,7 +1262,7 @@ static void nvmf_stop_io_queues(struct nvme_ctrl *ctrl)
> int i;
>
> for (i = 1; i < ctrl->queue_count; i++)
> - ctrl->fabrics_ops->stop_io_queue(ctrl, i);
> + __nvmf_stop_io_queue(ctrl, i);
> }
>
> static int __nvmf_alloc_io_queues(struct nvme_ctrl *ctrl)
> @@ -1166,7 +1270,7 @@ static int __nvmf_alloc_io_queues(struct nvme_ctrl *ctrl)
> int i, ret;
>
> for (i = 1; i < ctrl->queue_count; i++) {
> - ret = ctrl->fabrics_ops->alloc_io_queue(ctrl, i);
> + ret = __nvmf_alloc_io_queue(ctrl, i);
> if (ret)
> goto out_free_queues;
> }
> @@ -1175,7 +1279,7 @@ static int __nvmf_alloc_io_queues(struct nvme_ctrl *ctrl)
>
> out_free_queues:
> for (i--; i >= 1; i--)
> - ctrl->fabrics_ops->free_io_queue(ctrl, i);
> + __nvmf_free_io_queue(ctrl, i);
>
> return ret;
> }
> @@ -1198,7 +1302,7 @@ static int nvmf_alloc_io_queues(struct nvme_ctrl *ctrl)
>
> ctrl->queue_count = nr_io_queues + 1;
> dev_info(ctrl->device,
> - "creating %d I/O queues.\n", nr_io_queues);
> + "creating %d I/O queues.\n", nr_io_queues);
>
> ctrl->fabrics_ops->set_io_queues(ctrl, nr_io_queues);
>
> @@ -1210,7 +1314,7 @@ static void nvmf_free_io_queues(struct nvme_ctrl *ctrl)
> int i;
>
> for (i = 1; i < ctrl->queue_count; i++)
> - ctrl->fabrics_ops->free_io_queue(ctrl, i);
> + __nvmf_free_io_queue(ctrl, i);
> }
>
> static int nvmf_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
> @@ -1279,31 +1383,31 @@ static int nvmf_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
>
> static int nvmf_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
> {
> - int error;
> + int ret;
>
> - error = ctrl->fabrics_ops->alloc_admin_queue(ctrl);
> - if (error)
> - return error;
> + ret = __nvmf_alloc_admin_queue(ctrl);
> + if (ret)
> + return ret;
>
> if (new) {
> - error = ctrl->fabrics_ops->alloc_admin_tag_set(ctrl);
> - if (error)
> + ret = ctrl->fabrics_ops->alloc_admin_tag_set(ctrl);
> + if (ret)
> goto out_free_admin_queue;
>
> }
>
> - error = ctrl->fabrics_ops->start_admin_queue(ctrl);
> - if (error)
> + ret = __nvmf_start_admin_queue(ctrl);
> + if (ret)
> goto out_remove_admin_tag_set;
>
> - error = nvme_enable_ctrl(ctrl);
> - if (error)
> + ret = nvme_enable_ctrl(ctrl);
> + if (ret)
> goto out_stop_queue;
>
> nvme_unquiesce_admin_queue(ctrl);
>
> - error = nvme_init_ctrl_finish(ctrl, false);
> - if (error)
> + ret = nvme_init_ctrl_finish(ctrl, false);
> + if (ret)
> goto out_quiesce_queue;
>
> return 0;
> @@ -1312,14 +1416,14 @@ static int nvmf_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
> nvme_quiesce_admin_queue(ctrl);
> blk_sync_queue(ctrl->admin_q);
> out_stop_queue:
> - ctrl->fabrics_ops->stop_admin_queue(ctrl);
> + __nvmf_stop_admin_queue(ctrl);
> nvme_cancel_admin_tagset(ctrl);
> out_remove_admin_tag_set:
> if (new)
> nvme_remove_admin_tag_set(ctrl);
> out_free_admin_queue:
> - ctrl->fabrics_ops->free_admin_queue(ctrl);
> - return error;
> + __nvmf_free_admin_queue(ctrl);
> + return ret;
> }
>
> static void nvmf_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
> @@ -1332,18 +1436,17 @@ static void nvmf_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
>
> static void nvmf_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
> {
> - ctrl->fabrics_ops->stop_admin_queue(ctrl);
> + __nvmf_stop_admin_queue(ctrl);
> if (remove)
> nvme_remove_admin_tag_set(ctrl);
> -
> - ctrl->fabrics_ops->free_admin_queue(ctrl);
> + __nvmf_free_admin_queue(ctrl);
> }
>
> static void nvmf_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove)
> {
> nvme_quiesce_admin_queue(ctrl);
> blk_sync_queue(ctrl->admin_q);
> - ctrl->fabrics_ops->stop_admin_queue(ctrl);
> + __nvmf_stop_admin_queue(ctrl);
> nvme_cancel_admin_tagset(ctrl);
> if (remove)
> nvme_unquiesce_admin_queue(ctrl);
> @@ -1447,7 +1550,7 @@ int nvmf_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
> destroy_admin:
> nvme_quiesce_admin_queue(ctrl);
> blk_sync_queue(ctrl->admin_q);
> - ctrl->fabrics_ops->stop_admin_queue(ctrl);
> + __nvmf_stop_admin_queue(ctrl);
> nvme_cancel_admin_tagset(ctrl);
> nvmf_destroy_admin_queue(ctrl, new);
> return ret;
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index fcea2678094c..0810bc2a9e13 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -246,6 +246,18 @@ enum nvme_ctrl_flags {
>
> struct nvme_fabrics_ops;
>
> +enum nvme_fabrics_queue_flags {
> + NVME_FABRICS_Q_ALLOCATED = 0,
> + NVME_FABRICS_Q_TR_READY = 1,
> + NVME_FABRICS_Q_LIVE = 2,
> + NVME_FABRICS_Q_POLLING = 3,
> +};
> +
> +struct nvme_fabrics_queue {
> + unsigned long *flags;
> + struct mutex *queue_lock;
> +};
> +
> struct nvme_ctrl {
> bool comp_seen;
> enum nvme_ctrl_state state;
> @@ -253,7 +265,6 @@ struct nvme_ctrl {
> spinlock_t lock;
> struct mutex scan_lock;
> const struct nvme_ctrl_ops *ops;
> - const struct nvme_fabrics_ops *fabrics_ops;
> struct request_queue *admin_q;
> struct request_queue *connect_q;
> struct request_queue *fabrics_q;
> @@ -342,8 +353,10 @@ struct nvme_ctrl {
> struct work_struct ana_work;
> #endif
>
> - struct work_struct err_work;
> - struct delayed_work connect_work;
> + const struct nvme_fabrics_ops *fabrics_ops;
> + struct nvme_fabrics_queue *queues;
> + struct work_struct err_work;
> + struct delayed_work connect_work;
>
> #ifdef CONFIG_NVME_AUTH
> struct work_struct dhchap_auth_work;
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index 1fde65e8c2b5..023316fdc2c6 100644
> --- a/drivers/nvme/host/rdma.c
> +++ b/drivers/nvme/host/rdma.c
> @@ -3,6 +3,7 @@
> * NVMe over Fabrics RDMA host code.
> * Copyright (c) 2015-2016 HGST, a Western Digital Company.
> */
> +#include "linux/gfp_types.h"
> #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> #include <linux/module.h>
> #include <linux/init.h>
> @@ -76,12 +77,6 @@ struct nvme_rdma_request {
> bool use_sig_mr;
> };
>
> -enum nvme_rdma_queue_flags {
> - NVME_RDMA_Q_ALLOCATED = 0,
> - NVME_RDMA_Q_LIVE = 1,
> - NVME_RDMA_Q_TR_READY = 2,
> -};
> -
> struct nvme_rdma_queue {
> struct nvme_rdma_qe *rsp_ring;
> int queue_size;
> @@ -425,7 +420,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
> struct nvme_rdma_device *dev;
> struct ib_device *ibdev;
>
> - if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags))
> + if (!test_and_clear_bit(NVME_FABRICS_Q_TR_READY, &queue->flags))
> return;
>
> dev = queue->device;
> @@ -550,7 +545,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
> }
> }
>
> - set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
> + set_bit(NVME_FABRICS_Q_TR_READY, &queue->flags);
>
> return 0;
>
> @@ -572,12 +567,17 @@ static int __nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
> struct nvme_rdma_queue *queue)
> {
> struct sockaddr *src_addr = NULL;
> + struct nvme_fabrics_queue *fqueue;
> int ret;
>
> mutex_init(&queue->queue_lock);
> queue->ctrl = ctrl;
> init_completion(&queue->cm_done);
>
> + fqueue = &ctrl->ctrl.queues[nvme_rdma_queue_id(queue)];
> + fqueue->flags = &queue->flags;
> + fqueue->queue_lock = &queue->queue_lock;
> +
> queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
> RDMA_PS_TCP, IB_QPT_RC);
> if (IS_ERR(queue->cm_id)) {
> @@ -607,8 +607,6 @@ static int __nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
> goto out_destroy_cm_id;
> }
>
> - set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags);
> -
> return 0;
>
> out_destroy_cm_id:
> @@ -622,9 +620,6 @@ static int __nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
> static void __nvme_rdma_free_queue(struct nvme_rdma_ctrl *ctrl,
> struct nvme_rdma_queue *queue)
> {
> - if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
> - return;
> -
> rdma_destroy_id(queue->cm_id);
> nvme_rdma_destroy_queue_ib(queue);
> mutex_destroy(&queue->queue_lock);
> @@ -718,49 +713,18 @@ static void nvme_rdma_free_io_queue(struct nvme_ctrl *nctrl, int qid)
>
> static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
> {
> - mutex_lock(&queue->queue_lock);
> - if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) {
> - rdma_disconnect(queue->cm_id);
> - ib_drain_qp(queue->qp);
> - }
> - mutex_unlock(&queue->queue_lock);
> + rdma_disconnect(queue->cm_id);
> + ib_drain_qp(queue->qp);
> }
>
> -static int nvme_rdma_start_admin_queue(struct nvme_ctrl *nctrl)
> +static int nvme_rdma_start_admin_queue(struct nvme_ctrl *ctrl)
> {
> - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
> - struct nvme_rdma_queue *queue = &ctrl->queues[0];
> - int ret;
> -
> - ret = nvmf_connect_admin_queue(nctrl);
> -
> - if (!ret) {
> - set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
> - } else {
> - if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
> - __nvme_rdma_stop_queue(queue);
> - dev_info(ctrl->ctrl.device,
> - "failed to connect queue: %d ret=%d\n", 0, ret);
> - }
> - return ret;
> + return nvmf_connect_admin_queue(ctrl);
> }
>
> -static int nvme_rdma_start_io_queue(struct nvme_ctrl *nctrl, int idx)
> +static int nvme_rdma_start_io_queue(struct nvme_ctrl *ctrl, int idx)
> {
> - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
> - struct nvme_rdma_queue *queue = &ctrl->queues[idx];
> - int ret;
> -
> - ret = nvmf_connect_io_queue(nctrl, idx);
> - if (!ret) {
> - set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
> - } else {
> - if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
> - __nvme_rdma_stop_queue(queue);
> - dev_info(ctrl->ctrl.device,
> - "failed to connect queue: %d ret=%d\n", idx, ret);
> - }
> - return ret;
> + return nvmf_connect_io_queue(ctrl, idx);
> }
>
> static void nvme_rdma_stop_admin_queue(struct nvme_ctrl *nctrl)
> @@ -1715,7 +1679,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
> struct nvme_rdma_qe *sqe = &req->sqe;
> struct nvme_command *c = nvme_req(rq)->cmd;
> struct ib_device *dev;
> - bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
> + bool queue_ready = test_bit(NVME_FABRICS_Q_LIVE, &queue->flags);
> blk_status_t ret;
> int err;
>
> @@ -2027,6 +1991,12 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
> if (!ctrl->queues)
> goto out_free_ctrl;
>
> + ctrl->ctrl.queues = kcalloc(ctrl->ctrl.queue_count,
> + sizeof(*ctrl->ctrl.queues),
> + GFP_KERNEL);
> + if (!ctrl->ctrl.queues)
> + goto out_free_ctrl_queues;
> +
> ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
> 0 /* no quirks, we're perfect! */);
> if (ret)
> @@ -2054,7 +2024,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
> if (ret > 0)
> ret = -EIO;
> return ERR_PTR(ret);
> + kfree(ctrl->queues);
> out_kfree_queues:
> + kfree(ctrl->ctrl.queues);
> +out_free_ctrl_queues:
> kfree(ctrl->queues);
> out_free_ctrl:
> kfree(ctrl);
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 32c4346b7322..dfdf35b32adc 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -100,12 +100,6 @@ struct nvme_tcp_request {
> enum nvme_tcp_send_state state;
> };
>
> -enum nvme_tcp_queue_flags {
> - NVME_TCP_Q_ALLOCATED = 0,
> - NVME_TCP_Q_LIVE = 1,
> - NVME_TCP_Q_POLLING = 2,
> -};
> -
> enum nvme_tcp_recv_state {
> NVME_TCP_RECV_PDU = 0,
> NVME_TCP_RECV_DATA,
> @@ -903,7 +897,7 @@ static void nvme_tcp_data_ready(struct sock *sk)
> read_lock_bh(&sk->sk_callback_lock);
> queue = sk->sk_user_data;
> if (likely(queue && queue->rd_enabled) &&
> - !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
> + !test_bit(NVME_FABRICS_Q_POLLING, &queue->flags))
> queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
> read_unlock_bh(&sk->sk_callback_lock);
> }
> @@ -1454,6 +1448,7 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
> static int __nvme_tcp_alloc_queue(struct nvme_tcp_ctrl *ctrl,
> struct nvme_tcp_queue *queue)
> {
> + struct nvme_fabrics_queue *fqueue;
> int ret, rcv_pdu_size;
>
> mutex_init(&queue->queue_lock);
> @@ -1463,6 +1458,10 @@ static int __nvme_tcp_alloc_queue(struct nvme_tcp_ctrl *ctrl,
> mutex_init(&queue->send_mutex);
> INIT_WORK(&queue->io_work, nvme_tcp_io_work);
>
> + fqueue = &ctrl->ctrl.queues[nvme_tcp_queue_id(queue)];
> + fqueue->flags = &queue->flags;
> + fqueue->queue_lock = &queue->queue_lock;
> +
> ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
> IPPROTO_TCP, &queue->sock);
> if (ret) {
> @@ -1567,7 +1566,6 @@ static int __nvme_tcp_alloc_queue(struct nvme_tcp_ctrl *ctrl,
> goto err_init_connect;
>
> queue->rd_enabled = true;
> - set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
> nvme_tcp_init_recv_ctx(queue);
>
> write_lock_bh(&queue->sock->sk->sk_callback_lock);
> @@ -1607,9 +1605,6 @@ static void __nvme_tcp_free_queue(struct nvme_tcp_ctrl *ctrl,
> struct page *page;
> unsigned int noreclaim_flag;
>
> - if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
> - return;
> -
> if (queue->hdr_digest || queue->data_digest)
> nvme_tcp_free_crypto(queue);
>
> @@ -1699,40 +1694,14 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
> cancel_work_sync(&queue->io_work);
> }
>
> -static int nvme_tcp_start_admin_queue(struct nvme_ctrl *nctrl)
> +static int nvme_tcp_start_admin_queue(struct nvme_ctrl *ctrl)
> {
> - struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
> - struct nvme_tcp_queue *queue = &ctrl->queues[0];
> - int ret;
> -
> - ret = nvmf_connect_admin_queue(nctrl);
> - if (!ret) {
> - set_bit(NVME_TCP_Q_LIVE, &queue->flags);
> - } else {
> - if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
> - __nvme_tcp_stop_queue(queue);
> - dev_err(nctrl->device,
> - "failed to connect queue: %d ret=%d\n", 0, ret);
> - }
> - return ret;
> + return nvmf_connect_admin_queue(ctrl);
> }
>
> -static int nvme_tcp_start_io_queue(struct nvme_ctrl *nctrl, int qid)
> +static int nvme_tcp_start_io_queue(struct nvme_ctrl *ctrl, int qid)
> {
> - struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
> - struct nvme_tcp_queue *queue = &ctrl->queues[qid];
> - int ret;
> -
> - ret = nvmf_connect_io_queue(nctrl, qid);
> - if (!ret) {
> - set_bit(NVME_TCP_Q_LIVE, &queue->flags);
> - } else {
> - if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
> - __nvme_tcp_stop_queue(queue);
> - dev_err(nctrl->device,
> - "failed to connect queue: %d ret=%d\n", qid, ret);
> - }
> - return ret;
> + return nvmf_connect_io_queue(ctrl, qid);
> }
>
> static void nvme_tcp_stop_admin_queue(struct nvme_ctrl *nctrl)
> @@ -1740,13 +1709,7 @@ static void nvme_tcp_stop_admin_queue(struct nvme_ctrl *nctrl)
> struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
> struct nvme_tcp_queue *queue = &ctrl->queues[0];
>
> - if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
> - return;
> -
> - mutex_lock(&queue->queue_lock);
> - if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
> - __nvme_tcp_stop_queue(queue);
> - mutex_unlock(&queue->queue_lock);
> + __nvme_tcp_stop_queue(queue);
> }
>
> static void nvme_tcp_stop_io_queue(struct nvme_ctrl *nctrl, int qid)
> @@ -1754,13 +1717,7 @@ static void nvme_tcp_stop_io_queue(struct nvme_ctrl *nctrl, int qid)
> struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
> struct nvme_tcp_queue *queue = &ctrl->queues[qid];
>
> - if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
> - return;
> -
> - mutex_lock(&queue->queue_lock);
> - if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
> - __nvme_tcp_stop_queue(queue);
> - mutex_unlock(&queue->queue_lock);
> + __nvme_tcp_stop_queue(queue);
> }
>
> static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *ctrl)
> @@ -1843,6 +1800,7 @@ static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
>
> nvmf_free_options(nctrl->opts);
> free_ctrl:
> + kfree(ctrl->ctrl.queues);
> kfree(ctrl->queues);
> kfree(ctrl);
> }
> @@ -2043,7 +2001,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
> struct nvme_tcp_queue *queue = hctx->driver_data;
> struct request *rq = bd->rq;
> struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
> - bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags);
> + bool queue_ready = test_bit(NVME_FABRICS_Q_LIVE, &queue->flags);
> blk_status_t ret;
>
> if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
> @@ -2108,14 +2066,14 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
> struct nvme_tcp_queue *queue = hctx->driver_data;
> struct sock *sk = queue->sock->sk;
>
> - if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
> + if (!test_bit(NVME_FABRICS_Q_LIVE, &queue->flags))
> return 0;
>
> - set_bit(NVME_TCP_Q_POLLING, &queue->flags);
> + set_bit(NVME_FABRICS_Q_POLLING, &queue->flags);
> if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
> sk_busy_loop(sk, true);
> nvme_tcp_try_recv(queue);
> - clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
> + clear_bit(NVME_FABRICS_Q_POLLING, &queue->flags);
> return queue->nr_cqe;
> }
>
> @@ -2129,7 +2087,7 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
>
> mutex_lock(&queue->queue_lock);
>
> - if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
> + if (!test_bit(NVME_CTRL_LIVE, &queue->flags))
> goto done;
> ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr);
> if (ret > 0) {
> @@ -2282,6 +2240,13 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
> ret = -ENOMEM;
> goto out_free_ctrl;
> }
> + ctrl->ctrl.queues = kcalloc(ctrl->ctrl.queue_count,
> + sizeof(*ctrl->ctrl.queues),
> + GFP_KERNEL);
> + if (!ctrl->ctrl.queues) {
> + ret = -ENOMEM;
> + goto out_free_ctrl_queue;
> + }
>
> ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0);
> if (ret)
> @@ -2313,6 +2278,8 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
> ret = -EIO;
> return ERR_PTR(ret);
> out_kfree_queues:
> + kfree(ctrl->ctrl.queues);
> +out_free_ctrl_queue:
> kfree(ctrl->queues);
> out_free_ctrl:
> kfree(ctrl);

2023-05-04 10:40:53

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [RFC v3 0/9] Unifying fabrics drivers

> I've rebased on nvme-6.4 and gave it a bit of testing. Up to the last patch it seems to
> work fine, at least blktests doesn't trigger any errors.
>
> I've tried to split the pure moving code around part from the part where we need
> to change some code. That means the last two patches highlight the problems I
> run into with this attempt to implement the setup_transport() callback

This should be perfectly fine covering loop as well. I think that
eventually it can cover pci as well, but we can promote it there in
time perhaps.

2023-05-04 10:43:36

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [RFC v3 9/9] nvme: introduce setup_transport()


> Do the tag allocation in the new setup function.
>
> Nope, this doesn't work because the tag allocation wants to map
> the but we haven't allocated them yet.

Don't think that the tag allocation is what something like
setup_transport should do.

This would be something that the driver would need to do in
the device/platform level perhaps.

>
> Signed-off-by: Daniel Wagner <[email protected]>
> ---
> drivers/nvme/host/fabrics.c | 26 ++++------------
> drivers/nvme/host/fabrics.h | 5 ++--
> drivers/nvme/host/rdma.c | 60 +++++++++++++++++++++----------------
> drivers/nvme/host/tcp.c | 31 +++++++++++--------
> 4 files changed, 59 insertions(+), 63 deletions(-)
>
> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index 5f212cb9421a..06e9cf0c9e84 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -1325,12 +1325,6 @@ static int nvmf_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
> if (ret)
> return ret;
>
> - if (new) {
> - ret = ctrl->fabrics_ops->alloc_tag_set(ctrl);
> - if (ret)
> - goto out_free_io_queues;
> - }
> -
> /*
> * Only start IO queues for which we have allocated the tagset
> * and limitted it to the available queues. On reconnects, the
> @@ -1374,9 +1368,6 @@ static int nvmf_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
> nvmf_stop_io_queues(ctrl);
> out_cleanup_connect_q:
> nvme_cancel_tagset(ctrl);
> - if (new)
> - nvme_remove_io_tag_set(ctrl);
> -out_free_io_queues:
> nvmf_free_io_queues(ctrl);
> return ret;
> }
> @@ -1389,16 +1380,9 @@ static int nvmf_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
> if (ret)
> return ret;
>
> - if (new) {
> - ret = ctrl->fabrics_ops->alloc_admin_tag_set(ctrl);
> - if (ret)
> - goto out_free_admin_queue;
> -
> - }
> -
> ret = __nvmf_start_admin_queue(ctrl);
> if (ret)
> - goto out_remove_admin_tag_set;
> + goto out_remove_admin_queue;
>
> ret = nvme_enable_ctrl(ctrl);
> if (ret)
> @@ -1418,10 +1402,7 @@ static int nvmf_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
> out_stop_queue:
> __nvmf_stop_admin_queue(ctrl);
> nvme_cancel_admin_tagset(ctrl);
> -out_remove_admin_tag_set:
> - if (new)
> - nvme_remove_admin_tag_set(ctrl);
> -out_free_admin_queue:
> +out_remove_admin_queue:
> __nvmf_free_admin_queue(ctrl);
> return ret;
> }
> @@ -1489,6 +1470,9 @@ int nvmf_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
> struct nvmf_ctrl_options *opts = ctrl->opts;
> int ret;
>
> + if (new)
> + ctrl->fabrics_ops->setup_transport(ctrl);
> +
> ret = nvmf_configure_admin_queue(ctrl, new);
> if (ret)
> return ret;
> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
> index 345d6de6bc86..ad4734df7342 100644
> --- a/drivers/nvme/host/fabrics.h
> +++ b/drivers/nvme/host/fabrics.h
> @@ -173,6 +173,7 @@ struct nvmf_transport_ops {
> };
>
> struct nvme_fabrics_ops {
> + int (*setup_transport)(struct nvme_ctrl *ctrl);
> int (*alloc_admin_queue)(struct nvme_ctrl *ctrl);
> int (*start_admin_queue)(struct nvme_ctrl *ctrl);
> void (*stop_admin_queue)(struct nvme_ctrl *ctrl);
> @@ -182,9 +183,7 @@ struct nvme_fabrics_ops {
> void (*stop_io_queue)(struct nvme_ctrl *ctrl, int qid);
> void (*free_io_queue)(struct nvme_ctrl *ctrl, int qid);
>
> - /* these should be replaced with a single one setup_transport() */
> - int (*alloc_admin_tag_set)(struct nvme_ctrl *ctrl);
> - int (*alloc_tag_set)(struct nvme_ctrl *ctrl);
> + /* there should move to setup_transport() as well */
> unsigned int (*nr_io_queues)(struct nvme_ctrl *ctrl);
> void (*set_io_queues)(struct nvme_ctrl *ctrl, unsigned int nr_io_queues);
> };
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index 023316fdc2c6..015a6bde732a 100644
> --- a/drivers/nvme/host/rdma.c
> +++ b/drivers/nvme/host/rdma.c
> @@ -743,6 +743,39 @@ static void nvme_rdma_stop_io_queue(struct nvme_ctrl *nctrl, int qid)
> __nvme_rdma_stop_queue(queue);
> }
>
> +static int nvme_rdma_setup_transport(struct nvme_ctrl *ctrl)
> +{
> + unsigned int cmd_size;
> + int ret;
> +
> + ret = nvme_alloc_admin_tag_set(ctrl, &to_rdma_ctrl(ctrl)->admin_tag_set,
> + &nvme_rdma_admin_mq_ops,
> + sizeof(struct nvme_rdma_request) +
> + NVME_RDMA_DATA_SGL_SIZE);
> + if (ret)
> + return ret;
> +
> + cmd_size = sizeof(struct nvme_rdma_request) +
> + NVME_RDMA_DATA_SGL_SIZE;
> +
> + if (ctrl->max_integrity_segments)
> + cmd_size += sizeof(struct nvme_rdma_sgl) +
> + NVME_RDMA_METADATA_SGL_SIZE;
> +
> + ret = nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set,
> + &nvme_rdma_mq_ops,
> + ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
> + cmd_size);
> + if (ret)
> + goto out_free_admin_tag_set;
> +
> + return 0;
> +
> +out_free_admin_tag_set:
> + nvme_remove_admin_tag_set(ctrl);
> + return ret;
> +}
> +
> static unsigned int nvme_rdma_nr_io_queues(struct nvme_ctrl *ctrl)
> {
> struct ib_device *ibdev = to_rdma_ctrl(ctrl)->device->dev;
> @@ -802,21 +835,6 @@ static void nvme_rdma_set_io_queues(struct nvme_ctrl *nctrl,
> }
> }
>
> -static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl)
> -{
> - unsigned int cmd_size = sizeof(struct nvme_rdma_request) +
> - NVME_RDMA_DATA_SGL_SIZE;
> -
> - if (ctrl->max_integrity_segments)
> - cmd_size += sizeof(struct nvme_rdma_sgl) +
> - NVME_RDMA_METADATA_SGL_SIZE;
> -
> - return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set,
> - &nvme_rdma_mq_ops,
> - ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
> - cmd_size);
> -}
> -
> static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
> {
> struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
> @@ -834,15 +852,6 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
> kfree(ctrl);
> }
>
> -static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *ctrl)
> -{
> -
> - return nvme_alloc_admin_tag_set(ctrl, &to_rdma_ctrl(ctrl)->admin_tag_set,
> - &nvme_rdma_admin_mq_ops,
> - sizeof(struct nvme_rdma_request) +
> - NVME_RDMA_DATA_SGL_SIZE);
> -}
> -
> static void nvme_rdma_end_request(struct nvme_rdma_request *req)
> {
> struct request *rq = blk_mq_rq_from_pdu(req);
> @@ -1915,6 +1924,7 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
> }
>
> static struct nvme_fabrics_ops nvme_rdma_fabrics_ops = {
> + .setup_transport = nvme_rdma_setup_transport,
> .alloc_admin_queue = nvme_rdma_alloc_admin_queue,
> .free_admin_queue = nvme_rdma_free_admin_queue,
> .start_admin_queue = nvme_rdma_start_admin_queue,
> @@ -1923,8 +1933,6 @@ static struct nvme_fabrics_ops nvme_rdma_fabrics_ops = {
> .free_io_queue = nvme_rdma_free_io_queue,
> .start_io_queue = nvme_rdma_start_io_queue,
> .stop_io_queue = nvme_rdma_stop_io_queue,
> - .alloc_admin_tag_set = nvme_rdma_alloc_admin_tag_set,
> - .alloc_tag_set = nvme_rdma_alloc_tag_set,
> .nr_io_queues = nvme_rdma_nr_io_queues,
> .set_io_queues = nvme_rdma_set_io_queues,
> };
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index dfdf35b32adc..f91575b944a2 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -1720,20 +1720,26 @@ static void nvme_tcp_stop_io_queue(struct nvme_ctrl *nctrl, int qid)
> __nvme_tcp_stop_queue(queue);
> }
>
> -static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *ctrl)
> +static int nvme_tcp_setup_transport(struct nvme_ctrl *ctrl)
> {
> - return nvme_alloc_admin_tag_set(ctrl, &to_tcp_ctrl(ctrl)->admin_tag_set,
> - &nvme_tcp_admin_mq_ops,
> - sizeof(struct nvme_tcp_request));
> -}
> + int ret;
>
> -static int nvme_tcp_alloc_tag_set(struct nvme_ctrl *ctrl)
> -{
> - return nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set,
> - &nvme_tcp_mq_ops,
> - ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
> - sizeof(struct nvme_tcp_request));
> + ret = nvme_alloc_admin_tag_set(ctrl, &to_tcp_ctrl(ctrl)->admin_tag_set,
> + &nvme_tcp_admin_mq_ops,
> + sizeof(struct nvme_tcp_request));
> + if (ret)
> + return ret;
> +
> + ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set,
> + &nvme_tcp_mq_ops,
> + ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
> + sizeof(struct nvme_tcp_request));
> + if (ret)
> + goto out_free_admin_tag_set;
>
> +out_free_admin_tag_set:
> + nvme_remove_admin_tag_set(ctrl);
> + return ret;
> }
>
> static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
> @@ -2155,6 +2161,7 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
> }
>
> static struct nvme_fabrics_ops nvme_tcp_fabrics_ops = {
> + .setup_transport = nvme_tcp_setup_transport,
> .alloc_admin_queue = nvme_tcp_alloc_admin_queue,
> .free_admin_queue = nvme_tcp_free_admin_queue,
> .start_admin_queue = nvme_tcp_start_admin_queue,
> @@ -2163,8 +2170,6 @@ static struct nvme_fabrics_ops nvme_tcp_fabrics_ops = {
> .free_io_queue = nvme_tcp_free_io_queue,
> .start_io_queue = nvme_tcp_start_io_queue,
> .stop_io_queue = nvme_tcp_stop_io_queue,
> - .alloc_admin_tag_set = nvme_tcp_alloc_admin_tag_set,
> - .alloc_tag_set = nvme_tcp_alloc_tag_set,
> .nr_io_queues = nvme_tcp_nr_io_queues,
> .set_io_queues = nvme_tcp_set_io_queues,
> };