An rebase of Hannes two series which fix the TCP and RDMA transport to handle
the DNR bit on connect attempts.
For testing I extended the nvme/045 test case. I'll update the test case later
when the current batch of blktest changes are done. Also this change depends on
the extension of the debugfs interface of nvmet, which is also not yet merged.
echo "Renew host key on the controller and force reconnect"
new_hostkey="$(nvme gen-dhchap-key -n ${def_subsysnqn} 2> /dev/null)"
_set_nvmet_hostkey "${def_hostnqn}" "${new_hostkey}"
# Force a reconnect
nvmedev=$(_find_nvme_dev "${def_subsysnqn}")
cntlid="$(nvme id-ctrl "/dev/${nvmedev}" | grep cntlid | awk '{print $3}')"
echo "fatal" > /sys/kernel/debug/nvmet/"${def_subsysnqn}/ctrl$((${cntlid}))"/state
nvmf_wait_for_ctrl_delete "${nvmedev}"
baseline:
run 1 loop (nvmet_blkdev_type file)
nvme/045 (Test re-authentication) [passed]
runtime 2.690s ... 2.777s
run 1 tcp (nvmet_blkdev_type file)
nvme/045 (Test re-authentication) [failed]
runtime 2.777s ... 8.030s
--- tests/nvme/045.out 2024-04-04 16:14:22.547250311 +0200
+++ /home/wagi/work/blktests/results/nodev/nvme/045.out.bad 2024-04-04 17:29:03.427799336 +0200
@@ -9,5 +9,6 @@
Change hash to hmac(sha512)
Re-authenticate with changed hash
Renew host key on the controller and force reconnect
-disconnected 0 controller(s)
+controller "nvme2" not deleted within 5 seconds
+disconnected 1 controller(s)
Test complete
run 1 rdma (nvmet_blkdev_type file)
nvme/045 (Test re-authentication) [failed]
runtime 8.030s ... 9.632s
--- tests/nvme/045.out 2024-04-04 16:14:22.547250311 +0200
+++ /home/wagi/work/blktests/results/nodev/nvme/045.out.bad 2024-04-04 17:29:15.017745115 +0200
@@ -9,5 +9,6 @@
Change hash to hmac(sha512)
Re-authenticate with changed hash
Renew host key on the controller and force reconnect
-disconnected 0 controller(s)
+controller "nvme2" not deleted within 5 seconds
+disconnected 1 controller(s)
Test complete
run 1 fc (nvmet_blkdev_type file)
nvme/045 (Test re-authentication) [passed]
runtime 9.632s ... 3.588s
patched:
run 1 loop (nvmet_blkdev_type file)
nvme/045 (Test re-authentication) [passed]
runtime 6.816s ... 2.492s
run 1 tcp (nvmet_blkdev_type file)
nvme/045 (Test re-authentication) [passed]
runtime 2.492s ... 3.663s
run 1 rdma (nvmet_blkdev_type file)
nvme/045 (Test re-authentication) [passed]
runtime 3.663s ... 3.795s
run 1 fc (nvmet_blkdev_type file)
nvme/045 (Test re-authentication) [passed]
runtime 3.795s ... 2.690s
changes:
v4:
- rebased
- added 'nvme: fixes for authentication errors' series
https://lore.kernel.org/linux-nvme/[email protected]/
v3:
- added my SOB tag
- fixed indention
- https://lore.kernel.org/linux-nvme/[email protected]/
v2:
- refresh/rebase on current head
- extended blktests (nvme/045) to cover this case
(see separate post)
- https://lore.kernel.org/linux-nvme/[email protected]/
v1:
- initial version
- https://lore.kernel.org/linux-nvme/[email protected]/
*** BLURB HERE ***
Hannes Reinecke (5):
nvme: authentication error are always non-retryable
nvmet: lock config semaphore when accessing DH-HMAC-CHAP key
nvmet: return DHCHAP status codes from nvmet_setup_auth()
nvme-tcp: short-circuit reconnect retries
nvme-rdma: short-circuit reconnect retries
drivers/nvme/host/core.c | 6 +++---
drivers/nvme/host/fabrics.c | 29 +++++++++++++++-----------
drivers/nvme/host/nvme.h | 19 ++++++++++++++++-
drivers/nvme/host/rdma.c | 22 ++++++++++++-------
drivers/nvme/host/tcp.c | 23 +++++++++++++-------
drivers/nvme/target/auth.c | 20 ++++++++----------
drivers/nvme/target/configfs.c | 22 ++++++++++++++-----
drivers/nvme/target/fabrics-cmd-auth.c | 11 +++++-----
8 files changed, 100 insertions(+), 52 deletions(-)
--
2.44.0
On Thu, Apr 04, 2024 at 05:44:55PM +0200, Daniel Wagner wrote:
> changes:
> v4:
> - rebased
> - added 'nvme: fixes for authentication errors' series
> https://lore.kernel.org/linux-nvme/[email protected]/
Please ignore v4 for now. I've forgot to update 'nvme: fixes for
authentication errors' series.
From: Hannes Reinecke <[email protected]>
When the DH-HMAC-CHAP key is accessed via configfs we need to take the
config semaphore as a reconnect might be running at the same time.
Signed-off-by: Hannes Reinecke <[email protected]>
Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/nvme/target/auth.c | 2 ++
drivers/nvme/target/configfs.c | 22 +++++++++++++++++-----
2 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c
index 3ddbc3880cac..9afc28f1ffac 100644
--- a/drivers/nvme/target/auth.c
+++ b/drivers/nvme/target/auth.c
@@ -44,6 +44,7 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
dhchap_secret = kstrdup(secret, GFP_KERNEL);
if (!dhchap_secret)
return -ENOMEM;
+ down_write(&nvmet_config_sem);
if (set_ctrl) {
kfree(host->dhchap_ctrl_secret);
host->dhchap_ctrl_secret = strim(dhchap_secret);
@@ -53,6 +54,7 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
host->dhchap_secret = strim(dhchap_secret);
host->dhchap_key_hash = key_hash;
}
+ up_write(&nvmet_config_sem);
return 0;
}
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 77a6e817b315..7c28b9c0ee57 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1990,11 +1990,17 @@ static struct config_group nvmet_ports_group;
static ssize_t nvmet_host_dhchap_key_show(struct config_item *item,
char *page)
{
- u8 *dhchap_secret = to_host(item)->dhchap_secret;
+ u8 *dhchap_secret;
+ ssize_t ret;
+ down_read(&nvmet_config_sem);
+ dhchap_secret = to_host(item)->dhchap_secret;
if (!dhchap_secret)
- return sprintf(page, "\n");
- return sprintf(page, "%s\n", dhchap_secret);
+ ret = sprintf(page, "\n");
+ else
+ ret = sprintf(page, "%s\n", dhchap_secret);
+ up_read(&nvmet_config_sem);
+ return ret;
}
static ssize_t nvmet_host_dhchap_key_store(struct config_item *item,
@@ -2018,10 +2024,16 @@ static ssize_t nvmet_host_dhchap_ctrl_key_show(struct config_item *item,
char *page)
{
u8 *dhchap_secret = to_host(item)->dhchap_ctrl_secret;
+ ssize_t ret;
+ down_read(&nvmet_config_sem);
+ dhchap_secret = to_host(item)->dhchap_ctrl_secret;
if (!dhchap_secret)
- return sprintf(page, "\n");
- return sprintf(page, "%s\n", dhchap_secret);
+ ret = sprintf(page, "\n");
+ else
+ ret = sprintf(page, "%s\n", dhchap_secret);
+ up_read(&nvmet_config_sem);
+ return ret;
}
static ssize_t nvmet_host_dhchap_ctrl_key_store(struct config_item *item,
--
2.44.0
Looks good:
Reviewed-by: Christoph Hellwig <[email protected]>