I've splitted the last patch into the hopefully non controversial part 'do not
retry when DNR is set' and the 'don't retry auth failures'. I hope we can get at
least the first few patches in and have a lively discussion on the final patch
at LSF.
changes:
v7:
- updated commit message patch #3
- split last patch so that only the DNR checks
are added first and then the auth reconnect failure test
v6:
- reorder series
- extended nvmf_should_reconnect
- added auth retry logic
- https://lore.kernel.org/linux-nvme/[email protected]/
v5:
- nvme: do not mix kernel error code with nvme status
- nvmet: separate nvme status from dhchap status
- https://lore.kernel.org/linux-nvme/[email protected]/
v4:
- rebased
- added 'nvme: fixes for authentication errors' series
- https://lore.kernel.org/all/[email protected]/
v3:
- added my SOB tag
- fixed indention
- https://lore.kernel.org/linux-nvme/[email protected]/
v2:
- refresh/rebase on current head
- extended blktests (nvme/045) to cover this case
(see separate post)
- https://lore.kernel.org/linux-nvme/[email protected]/
v1:
- initial version
- https://lore.kernel.org/linux-nvme/[email protected]/
Daniel Wagner (1):
nvme: do not retry authentication failures
Hannes Reinecke (4):
nvmet: lock config semaphore when accessing DH-HMAC-CHAP key
nvmet: return DHCHAP status codes from nvmet_setup_auth()
nvme: return kernel error codes for admin queue connect
nvme-fabrics: short-circuit reconnect retries
drivers/nvme/host/auth.c | 6 +--
drivers/nvme/host/core.c | 6 +--
drivers/nvme/host/fabrics.c | 51 ++++++++++++++++----------
drivers/nvme/host/fabrics.h | 2 +-
drivers/nvme/host/fc.c | 4 +-
drivers/nvme/host/nvme.h | 2 +-
drivers/nvme/host/rdma.c | 19 ++++++----
drivers/nvme/host/tcp.c | 22 +++++++----
drivers/nvme/target/auth.c | 22 +++++------
drivers/nvme/target/configfs.c | 22 ++++++++---
drivers/nvme/target/fabrics-cmd-auth.c | 49 +++++++++++++------------
drivers/nvme/target/fabrics-cmd.c | 11 +++---
drivers/nvme/target/nvmet.h | 8 ++--
13 files changed, 129 insertions(+), 95 deletions(-)
--
2.44.0
From: Hannes Reinecke <[email protected]>
nvmf_connect_admin_queue returns NVMe error status codes and kernel
error codes. This mixes the different domains which makes maintainability
difficult.
Reviewed-by: Christoph Hellwig <[email protected]>
Reviewed-by: Sagi Grimberg <[email protected]>
Signed-off-by: Hannes Reinecke <[email protected]>
Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/core.c | 6 +++---
drivers/nvme/host/fabrics.c | 31 +++++++++++++------------------
drivers/nvme/host/nvme.h | 2 +-
3 files changed, 17 insertions(+), 22 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 504dc352c458..66387bcca8ae 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -383,14 +383,14 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
if (likely(nvme_req(req)->status == 0))
return COMPLETE;
- if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED)
- return AUTHENTICATE;
-
if (blk_noretry_request(req) ||
(nvme_req(req)->status & NVME_SC_DNR) ||
nvme_req(req)->retries >= nvme_max_retries)
return COMPLETE;
+ if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED)
+ return AUTHENTICATE;
+
if (req->cmd_flags & REQ_NVME_MPATH) {
if (nvme_is_path_error(nvme_req(req)->status) ||
blk_queue_dying(req->q))
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 1f0ea1f32d22..f7eaf9580b4f 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -428,12 +428,6 @@ static void nvmf_connect_cmd_prep(struct nvme_ctrl *ctrl, u16 qid,
* fabrics-protocol connection of the NVMe Admin queue between the
* host system device and the allocated NVMe controller on the
* target system via a NVMe Fabrics "Connect" command.
- *
- * Return:
- * 0: success
- * > 0: NVMe error status code
- * < 0: Linux errno error code
- *
*/
int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
{
@@ -467,7 +461,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
if (result & NVME_CONNECT_AUTHREQ_ASCR) {
dev_warn(ctrl->device,
"qid 0: secure concatenation is not supported\n");
- ret = NVME_SC_AUTH_REQUIRED;
+ ret = -EOPNOTSUPP;
goto out_free_data;
}
/* Authentication required */
@@ -475,14 +469,14 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
if (ret) {
dev_warn(ctrl->device,
"qid 0: authentication setup failed\n");
- ret = NVME_SC_AUTH_REQUIRED;
goto out_free_data;
}
ret = nvme_auth_wait(ctrl, 0);
- if (ret)
+ if (ret) {
dev_warn(ctrl->device,
- "qid 0: authentication failed\n");
- else
+ "qid 0: authentication failed, error %d\n",
+ ret);
+ } else
dev_info(ctrl->device,
"qid 0: authenticated\n");
}
@@ -542,7 +536,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
if (result & NVME_CONNECT_AUTHREQ_ASCR) {
dev_warn(ctrl->device,
"qid 0: secure concatenation is not supported\n");
- ret = NVME_SC_AUTH_REQUIRED;
+ ret = -EOPNOTSUPP;
goto out_free_data;
}
/* Authentication required */
@@ -550,12 +544,13 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
if (ret) {
dev_warn(ctrl->device,
"qid %d: authentication setup failed\n", qid);
- ret = NVME_SC_AUTH_REQUIRED;
- } else {
- ret = nvme_auth_wait(ctrl, qid);
- if (ret)
- dev_warn(ctrl->device,
- "qid %u: authentication failed\n", qid);
+ goto out_free_data;
+ }
+ ret = nvme_auth_wait(ctrl, qid);
+ if (ret) {
+ dev_warn(ctrl->device,
+ "qid %u: authentication failed, error %d\n",
+ qid, ret);
}
}
out_free_data:
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d0ed64dc7380..9b8904a476b8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -1122,7 +1122,7 @@ static inline int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
}
static inline int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid)
{
- return NVME_SC_AUTH_REQUIRED;
+ return -EPROTONOSUPPORT;
}
static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {};
#endif
--
2.44.0
From: Hannes Reinecke <[email protected]>
Returning a nvme status from nvme_tcp_setup_ctrl() indicates that the
association was established and we have received a status from the
controller; consequently we should honour the DNR bit. If not any future
reconnect attempts will just return the same error, so we can
short-circuit the reconnect attempts and fail the connection directly.
Signed-off-by: Hannes Reinecke <[email protected]>
[dwagner: - extended nvme_should_reconnect]
Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/fabrics.c | 14 +++++++++++++-
drivers/nvme/host/fabrics.h | 2 +-
drivers/nvme/host/fc.c | 4 +---
drivers/nvme/host/rdma.c | 19 ++++++++++++-------
drivers/nvme/host/tcp.c | 22 ++++++++++++++--------
5 files changed, 41 insertions(+), 20 deletions(-)
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index f7eaf9580b4f..36d3e2ff27f3 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -559,8 +559,20 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
}
EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
-bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
+/*
+ * Evaluate the status information returned by the transport in order to decided
+ * if a reconnect attempt should be scheduled.
+ *
+ * Do not retry when:
+ *
+ * - the DNR bit is set and the specification states no further connect
+ * attempts with the same set of paramenters should be attempted.
+ */
+bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status)
{
+ if (status > 0 && (status & NVME_SC_DNR))
+ return false;
+
if (ctrl->opts->max_reconnects == -1 ||
ctrl->nr_reconnects < ctrl->opts->max_reconnects)
return true;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 37c974c38dcb..602135910ae9 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -223,7 +223,7 @@ int nvmf_register_transport(struct nvmf_transport_ops *ops);
void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
-bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
+bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status);
bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts);
void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues,
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index a5b29e9ad342..f0b081332749 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3310,12 +3310,10 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
ctrl->cnum, status);
- if (status > 0 && (status & NVME_SC_DNR))
- recon = false;
} else if (time_after_eq(jiffies, rport->dev_loss_end))
recon = false;
- if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
+ if (recon && nvmf_should_reconnect(&ctrl->ctrl, status)) {
if (portptr->port_state == FC_OBJSTATE_ONLINE)
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: Reconnect attempt in %ld "
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 366f0bb4ebfc..821ab3e0fd3b 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -982,7 +982,8 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
kfree(ctrl);
}
-static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl,
+ int status)
{
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
@@ -992,7 +993,7 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
return;
}
- if (nvmf_should_reconnect(&ctrl->ctrl)) {
+ if (nvmf_should_reconnect(&ctrl->ctrl, status)) {
dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
ctrl->ctrl.opts->reconnect_delay);
queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
@@ -1104,10 +1105,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvme_rdma_ctrl, reconnect_work);
+ int ret;
++ctrl->ctrl.nr_reconnects;
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto requeue;
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -1120,7 +1123,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.nr_reconnects);
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -1145,7 +1148,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
return;
}
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, 0);
}
static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -2169,6 +2172,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl =
container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+ int ret;
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -2179,14 +2183,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
return;
}
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto out_fail;
return;
out_fail:
++ctrl->ctrl.nr_reconnects;
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index fdbcdcedcee9..3e0c33323320 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2155,7 +2155,8 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
nvme_tcp_destroy_io_queues(ctrl, remove);
}
-static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
+static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl,
+ int status)
{
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
@@ -2165,13 +2166,14 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
return;
}
- if (nvmf_should_reconnect(ctrl)) {
+ if (nvmf_should_reconnect(ctrl, status)) {
dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
ctrl->opts->reconnect_delay);
queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
ctrl->opts->reconnect_delay * HZ);
} else {
- dev_info(ctrl->device, "Removing controller...\n");
+ dev_info(ctrl->device, "Removing controller (%d)...\n",
+ status);
nvme_delete_ctrl(ctrl);
}
}
@@ -2252,10 +2254,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
struct nvme_tcp_ctrl, connect_work);
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+ int ret;
++ctrl->nr_reconnects;
- if (nvme_tcp_setup_ctrl(ctrl, false))
+ ret = nvme_tcp_setup_ctrl(ctrl, false);
+ if (ret)
goto requeue;
dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
@@ -2268,7 +2272,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
requeue:
dev_info(ctrl->device, "Failed reconnect attempt %d\n",
ctrl->nr_reconnects);
- nvme_tcp_reconnect_or_remove(ctrl);
+ nvme_tcp_reconnect_or_remove(ctrl, ret);
}
static void nvme_tcp_error_recovery_work(struct work_struct *work)
@@ -2295,7 +2299,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
return;
}
- nvme_tcp_reconnect_or_remove(ctrl);
+ nvme_tcp_reconnect_or_remove(ctrl, 0);
}
static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
@@ -2315,6 +2319,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, reset_work);
+ int ret;
nvme_stop_ctrl(ctrl);
nvme_tcp_teardown_ctrl(ctrl, false);
@@ -2328,14 +2333,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
return;
}
- if (nvme_tcp_setup_ctrl(ctrl, false))
+ ret = nvme_tcp_setup_ctrl(ctrl, false);
+ if (ret)
goto out_fail;
return;
out_fail:
++ctrl->nr_reconnects;
- nvme_tcp_reconnect_or_remove(ctrl);
+ nvme_tcp_reconnect_or_remove(ctrl, ret);
}
static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
--
2.44.0
When the key is invalid there is no point in retrying. Because the auth
code returns kernel error codes only, we can't test on the DNR bit.
Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/host/auth.c | 6 +++---
drivers/nvme/host/fabrics.c | 6 ++++++
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index a264b3ae078b..371e14f0a203 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -730,7 +730,7 @@ static void nvme_queue_auth_work(struct work_struct *work)
NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE);
if (ret) {
chap->status = ret;
- chap->error = -ECONNREFUSED;
+ chap->error = -EKEYREJECTED;
return;
}
@@ -797,7 +797,7 @@ static void nvme_queue_auth_work(struct work_struct *work)
NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1);
if (ret) {
chap->status = ret;
- chap->error = -ECONNREFUSED;
+ chap->error = -EKEYREJECTED;
return;
}
@@ -818,7 +818,7 @@ static void nvme_queue_auth_work(struct work_struct *work)
ret = nvme_auth_process_dhchap_success1(ctrl, chap);
if (ret) {
/* Controller authentication failed */
- chap->error = -ECONNREFUSED;
+ chap->error = -EKEYREJECTED;
goto fail2;
}
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 36d3e2ff27f3..c6ad2148c2e0 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -567,12 +567,18 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
*
* - the DNR bit is set and the specification states no further connect
* attempts with the same set of paramenters should be attempted.
+ *
+ * - when the authentication attempt fails, because the key was invalid.
+ * This error code is set on the host side.
*/
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status)
{
if (status > 0 && (status & NVME_SC_DNR))
return false;
+ if (status == -EKEYREJECTED)
+ return false;
+
if (ctrl->opts->max_reconnects == -1 ||
ctrl->nr_reconnects < ctrl->opts->max_reconnects)
return true;
--
2.44.0
From: Hannes Reinecke <[email protected]>
When the DH-HMAC-CHAP key is accessed via configfs we need to take the
config semaphore as a reconnect might be running at the same time.
Reviewed-by: Sagi Grimberg <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Hannes Reinecke <[email protected]>
Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/target/auth.c | 2 ++
drivers/nvme/target/configfs.c | 22 +++++++++++++++++-----
2 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c
index 3ddbc3880cac..9afc28f1ffac 100644
--- a/drivers/nvme/target/auth.c
+++ b/drivers/nvme/target/auth.c
@@ -44,6 +44,7 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
dhchap_secret = kstrdup(secret, GFP_KERNEL);
if (!dhchap_secret)
return -ENOMEM;
+ down_write(&nvmet_config_sem);
if (set_ctrl) {
kfree(host->dhchap_ctrl_secret);
host->dhchap_ctrl_secret = strim(dhchap_secret);
@@ -53,6 +54,7 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
host->dhchap_secret = strim(dhchap_secret);
host->dhchap_key_hash = key_hash;
}
+ up_write(&nvmet_config_sem);
return 0;
}
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 77a6e817b315..7c28b9c0ee57 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1990,11 +1990,17 @@ static struct config_group nvmet_ports_group;
static ssize_t nvmet_host_dhchap_key_show(struct config_item *item,
char *page)
{
- u8 *dhchap_secret = to_host(item)->dhchap_secret;
+ u8 *dhchap_secret;
+ ssize_t ret;
+ down_read(&nvmet_config_sem);
+ dhchap_secret = to_host(item)->dhchap_secret;
if (!dhchap_secret)
- return sprintf(page, "\n");
- return sprintf(page, "%s\n", dhchap_secret);
+ ret = sprintf(page, "\n");
+ else
+ ret = sprintf(page, "%s\n", dhchap_secret);
+ up_read(&nvmet_config_sem);
+ return ret;
}
static ssize_t nvmet_host_dhchap_key_store(struct config_item *item,
@@ -2018,10 +2024,16 @@ static ssize_t nvmet_host_dhchap_ctrl_key_show(struct config_item *item,
char *page)
{
u8 *dhchap_secret = to_host(item)->dhchap_ctrl_secret;
+ ssize_t ret;
+ down_read(&nvmet_config_sem);
+ dhchap_secret = to_host(item)->dhchap_ctrl_secret;
if (!dhchap_secret)
- return sprintf(page, "\n");
- return sprintf(page, "%s\n", dhchap_secret);
+ ret = sprintf(page, "\n");
+ else
+ ret = sprintf(page, "%s\n", dhchap_secret);
+ up_read(&nvmet_config_sem);
+ return ret;
}
static ssize_t nvmet_host_dhchap_ctrl_key_store(struct config_item *item,
--
2.44.0
From: Hannes Reinecke <[email protected]>
A failure in nvmet_setup_auth() does not mean that the NVMe
authentication command failed, so we should rather return a protocol
error with a 'failure1' response than an NVMe status.
Also update the type used for dhchap_step and dhchap_status to u8 to
avoid confusions with nvme status. Furthermore, split dhchap_status and
nvme status so we don't accidentally mix these return values.
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Hannes Reinecke <[email protected]>
[dwagner: - use u8 as type for dhchap_{step|status}
- separate nvme status from dhcap_status]
Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/target/auth.c | 20 +++++------
drivers/nvme/target/fabrics-cmd-auth.c | 49 +++++++++++++-------------
drivers/nvme/target/fabrics-cmd.c | 11 +++---
drivers/nvme/target/nvmet.h | 8 ++---
4 files changed, 43 insertions(+), 45 deletions(-)
diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c
index 9afc28f1ffac..53bf1a084469 100644
--- a/drivers/nvme/target/auth.c
+++ b/drivers/nvme/target/auth.c
@@ -126,12 +126,11 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id)
return ret;
}
-int nvmet_setup_auth(struct nvmet_ctrl *ctrl)
+u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl)
{
int ret = 0;
struct nvmet_host_link *p;
struct nvmet_host *host = NULL;
- const char *hash_name;
down_read(&nvmet_config_sem);
if (nvmet_is_disc_subsys(ctrl->subsys))
@@ -149,13 +148,16 @@ int nvmet_setup_auth(struct nvmet_ctrl *ctrl)
}
if (!host) {
pr_debug("host %s not found\n", ctrl->hostnqn);
- ret = -EPERM;
+ ret = NVME_AUTH_DHCHAP_FAILURE_FAILED;
goto out_unlock;
}
ret = nvmet_setup_dhgroup(ctrl, host->dhchap_dhgroup_id);
- if (ret < 0)
+ if (ret < 0) {
pr_warn("Failed to setup DH group");
+ ret = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE;
+ goto out_unlock;
+ }
if (!host->dhchap_secret) {
pr_debug("No authentication provided\n");
@@ -166,12 +168,6 @@ int nvmet_setup_auth(struct nvmet_ctrl *ctrl)
pr_debug("Re-use existing hash ID %d\n",
ctrl->shash_id);
} else {
- hash_name = nvme_auth_hmac_name(host->dhchap_hash_id);
- if (!hash_name) {
- pr_warn("Hash ID %d invalid\n", host->dhchap_hash_id);
- ret = -EINVAL;
- goto out_unlock;
- }
ctrl->shash_id = host->dhchap_hash_id;
}
@@ -180,7 +176,7 @@ int nvmet_setup_auth(struct nvmet_ctrl *ctrl)
ctrl->host_key = nvme_auth_extract_key(host->dhchap_secret + 10,
host->dhchap_key_hash);
if (IS_ERR(ctrl->host_key)) {
- ret = PTR_ERR(ctrl->host_key);
+ ret = NVME_AUTH_DHCHAP_FAILURE_NOT_USABLE;
ctrl->host_key = NULL;
goto out_free_hash;
}
@@ -198,7 +194,7 @@ int nvmet_setup_auth(struct nvmet_ctrl *ctrl)
ctrl->ctrl_key = nvme_auth_extract_key(host->dhchap_ctrl_secret + 10,
host->dhchap_ctrl_key_hash);
if (IS_ERR(ctrl->ctrl_key)) {
- ret = PTR_ERR(ctrl->ctrl_key);
+ ret = NVME_AUTH_DHCHAP_FAILURE_NOT_USABLE;
ctrl->ctrl_key = NULL;
goto out_free_hash;
}
diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c
index ee76491e8b12..cb34d644ed08 100644
--- a/drivers/nvme/target/fabrics-cmd-auth.c
+++ b/drivers/nvme/target/fabrics-cmd-auth.c
@@ -31,7 +31,7 @@ void nvmet_auth_sq_init(struct nvmet_sq *sq)
sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE;
}
-static u16 nvmet_auth_negotiate(struct nvmet_req *req, void *d)
+static u8 nvmet_auth_negotiate(struct nvmet_req *req, void *d)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmf_auth_dhchap_negotiate_data *data = d;
@@ -109,7 +109,7 @@ static u16 nvmet_auth_negotiate(struct nvmet_req *req, void *d)
return 0;
}
-static u16 nvmet_auth_reply(struct nvmet_req *req, void *d)
+static u8 nvmet_auth_reply(struct nvmet_req *req, void *d)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmf_auth_dhchap_reply_data *data = d;
@@ -172,7 +172,7 @@ static u16 nvmet_auth_reply(struct nvmet_req *req, void *d)
return 0;
}
-static u16 nvmet_auth_failure2(void *d)
+static u8 nvmet_auth_failure2(void *d)
{
struct nvmf_auth_dhchap_failure_data *data = d;
@@ -186,6 +186,7 @@ void nvmet_execute_auth_send(struct nvmet_req *req)
void *d;
u32 tl;
u16 status = 0;
+ u8 dhchap_status;
if (req->cmd->auth_send.secp != NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER) {
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
@@ -237,30 +238,32 @@ void nvmet_execute_auth_send(struct nvmet_req *req)
if (data->auth_type == NVME_AUTH_COMMON_MESSAGES) {
if (data->auth_id == NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE) {
/* Restart negotiation */
- pr_debug("%s: ctrl %d qid %d reset negotiation\n", __func__,
- ctrl->cntlid, req->sq->qid);
+ pr_debug("%s: ctrl %d qid %d reset negotiation\n",
+ __func__, ctrl->cntlid, req->sq->qid);
if (!req->sq->qid) {
- if (nvmet_setup_auth(ctrl) < 0) {
- status = NVME_SC_INTERNAL;
- pr_err("ctrl %d qid 0 failed to setup"
- "re-authentication",
+ dhchap_status = nvmet_setup_auth(ctrl);
+ if (dhchap_status) {
+ pr_err("ctrl %d qid 0 failed to setup re-authentication\n",
ctrl->cntlid);
- goto done_failure1;
+ req->sq->dhchap_status = dhchap_status;
+ req->sq->dhchap_step =
+ NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
+ goto done_kfree;
}
}
- req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE;
+ req->sq->dhchap_step =
+ NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE;
} else if (data->auth_id != req->sq->dhchap_step)
goto done_failure1;
/* Validate negotiation parameters */
- status = nvmet_auth_negotiate(req, d);
- if (status == 0)
+ dhchap_status = nvmet_auth_negotiate(req, d);
+ if (dhchap_status == 0)
req->sq->dhchap_step =
NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE;
else {
req->sq->dhchap_step =
NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
- req->sq->dhchap_status = status;
- status = 0;
+ req->sq->dhchap_status = dhchap_status;
}
goto done_kfree;
}
@@ -284,15 +287,14 @@ void nvmet_execute_auth_send(struct nvmet_req *req)
switch (data->auth_id) {
case NVME_AUTH_DHCHAP_MESSAGE_REPLY:
- status = nvmet_auth_reply(req, d);
- if (status == 0)
+ dhchap_status = nvmet_auth_reply(req, d);
+ if (dhchap_status == 0)
req->sq->dhchap_step =
NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1;
else {
req->sq->dhchap_step =
NVME_AUTH_DHCHAP_MESSAGE_FAILURE1;
- req->sq->dhchap_status = status;
- status = 0;
+ req->sq->dhchap_status = dhchap_status;
}
goto done_kfree;
case NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2:
@@ -301,13 +303,12 @@ void nvmet_execute_auth_send(struct nvmet_req *req)
__func__, ctrl->cntlid, req->sq->qid);
goto done_kfree;
case NVME_AUTH_DHCHAP_MESSAGE_FAILURE2:
- status = nvmet_auth_failure2(d);
- if (status) {
+ dhchap_status = nvmet_auth_failure2(d);
+ if (dhchap_status) {
pr_warn("ctrl %d qid %d: authentication failed (%d)\n",
- ctrl->cntlid, req->sq->qid, status);
- req->sq->dhchap_status = status;
+ ctrl->cntlid, req->sq->qid, dhchap_status);
+ req->sq->dhchap_status = dhchap_status;
req->sq->authenticated = false;
- status = 0;
}
goto done_kfree;
default:
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index f6714453b8bb..69d77d34bec1 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -211,7 +211,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
struct nvmf_connect_data *d;
struct nvmet_ctrl *ctrl = NULL;
u16 status;
- int ret;
+ u8 dhchap_status;
if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
return;
@@ -251,11 +251,12 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
uuid_copy(&ctrl->hostid, &d->hostid);
- ret = nvmet_setup_auth(ctrl);
- if (ret < 0) {
- pr_err("Failed to setup authentication, error %d\n", ret);
+ dhchap_status = nvmet_setup_auth(ctrl);
+ if (dhchap_status) {
+ pr_err("Failed to setup authentication, dhchap status %u\n",
+ dhchap_status);
nvmet_ctrl_put(ctrl);
- if (ret == -EPERM)
+ if (dhchap_status == NVME_AUTH_DHCHAP_FAILURE_FAILED)
status = (NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR);
else
status = NVME_SC_INTERNAL;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 257cace53924..5d6adb5c2fc0 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -113,8 +113,8 @@ struct nvmet_sq {
bool authenticated;
struct delayed_work auth_expired_work;
u16 dhchap_tid;
- u16 dhchap_status;
- int dhchap_step;
+ u8 dhchap_status;
+ u8 dhchap_step;
u8 *dhchap_c1;
u8 *dhchap_c2;
u32 dhchap_s1;
@@ -721,7 +721,7 @@ void nvmet_execute_auth_receive(struct nvmet_req *req);
int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
bool set_ctrl);
int nvmet_auth_set_host_hash(struct nvmet_host *host, const char *hash);
-int nvmet_setup_auth(struct nvmet_ctrl *ctrl);
+u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl);
void nvmet_auth_sq_init(struct nvmet_sq *sq);
void nvmet_destroy_auth(struct nvmet_ctrl *ctrl);
void nvmet_auth_sq_free(struct nvmet_sq *sq);
@@ -740,7 +740,7 @@ int nvmet_auth_ctrl_exponential(struct nvmet_req *req,
int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
u8 *buf, int buf_size);
#else
-static inline int nvmet_setup_auth(struct nvmet_ctrl *ctrl)
+static inline u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl)
{
return 0;
}
--
2.44.0
On Tue, Apr 30, 2024 at 03:19:23PM +0200, Daniel Wagner wrote:
> I've splitted the last patch into the hopefully non controversial part 'do not
> retry when DNR is set' and the 'don't retry auth failures'. I hope we can get at
> least the first few patches in and have a lively discussion on the final patch
> at LSF.
I've dropped the previous series from the nvme-6.9 branch. This series
has been added to the newly created nvme-6.10 branch.
On 30/04/2024 16:19, Daniel Wagner wrote:
> When the key is invalid there is no point in retrying. Because the auth
> code returns kernel error codes only, we can't test on the DNR bit.
I still think that this patch has issues in the sense that in certain cases
the controller can suddenly disappear.