Currently the connection between host and NVMe-oF target gets
disconnected by keep-alive timeout when a user connects to a target
with a relatively large kato value and then sets the smaller kato
with a set features command (e.g. connects with 60 seconds kato value
and then sets 10 seconds kato value).
The cause is that keep alive command interval on the host, which is
defined as unsigned int kato in nvme_ctrl structure, does not follow
the kato value changes.
This patch updates the keep alive interval in the following steps when
the kato is modified by a set features command: stops the keep alive
work queue, then sets the kato as new timer value and re-start the queue.
Signed-off-by: Tatsuya Sasaki <[email protected]>
---
Changes since v1:
- Add nvme_update_keep_alive to update keep alive timer in core routine.
- Add nvme_user_cmd_post to call nvme_update_keep_alive in ioctl.c
drivers/nvme/host/core.c | 12 ++++++++++++
drivers/nvme/host/ioctl.c | 19 +++++++++++++++++++
drivers/nvme/host/nvme.h | 1 +
3 files changed, 32 insertions(+)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dfd9dec0c1f6..76f0ee431b11 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1263,6 +1263,18 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
nvme_queue_keep_alive_work(ctrl);
}
+void nvme_update_keep_alive(struct nvme_ctrl *ctrl, unsigned int new_kato)
+{
+ dev_info(ctrl->device,
+ "keep alive commands interval on the host is updated from %u ms to %u ms\n",
+ ctrl->kato * 1000 / 2, new_kato * 1000 / 2);
+
+ nvme_stop_keep_alive(ctrl);
+ ctrl->kato = new_kato;
+ nvme_start_keep_alive(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvme_update_keep_alive);
+
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
{
if (unlikely(ctrl->kato == 0))
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 305ddd415e45..79006bfb5537 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -187,6 +187,22 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
return true;
}
+static void nvme_user_cmd_post(struct nvme_passthru_cmd *cmd,
+ struct nvme_ctrl *ctrl)
+{
+ /*
+ * Keep alive commands interval on the host should be updated
+ * when KATO is modified by Set Features commands.
+ */
+ if (cmd->opcode == nvme_admin_set_features &&
+ (cmd->cdw10 & 0xFF) == NVME_FEAT_KATO) {
+ /* ms -> s */
+ unsigned int new_kato = DIV_ROUND_UP(cmd->cdw11, 1000);
+
+ nvme_update_keep_alive(ctrl, new_kato);
+ }
+}
+
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_passthru_cmd __user *ucmd)
{
@@ -231,6 +247,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return -EFAULT;
}
+ if (!status)
+ nvme_user_cmd_post(&cmd, ctrl);
+
return status;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5cd1fa3b8464..0372480db508 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -666,6 +666,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen,
u32 *result);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
+void nvme_update_keep_alive(struct nvme_ctrl *ctrl, unsigned int new_kato);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
--
2.25.1
On 8/23/21 10:44 PM, sasaki tatsuya wrote:
> Currently the connection between host and NVMe-oF target gets
> disconnected by keep-alive timeout when a user connects to a target
> with a relatively large kato value and then sets the smaller kato
> with a set features command (e.g. connects with 60 seconds kato value
> and then sets 10 seconds kato value).
>
> The cause is that keep alive command interval on the host, which is
> defined as unsigned int kato in nvme_ctrl structure, does not follow
> the kato value changes.
>
> This patch updates the keep alive interval in the following steps when
> the kato is modified by a set features command: stops the keep alive
> work queue, then sets the kato as new timer value and re-start the queue.
>
> Signed-off-by: Tatsuya Sasaki <[email protected]>
> ---
> Changes since v1:
> - Add nvme_update_keep_alive to update keep alive timer in core routine.
> - Add nvme_user_cmd_post to call nvme_update_keep_alive in ioctl.c
>
> drivers/nvme/host/core.c | 12 ++++++++++++
> drivers/nvme/host/ioctl.c | 19 +++++++++++++++++++
> drivers/nvme/host/nvme.h | 1 +
> 3 files changed, 32 insertions(+)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index dfd9dec0c1f6..76f0ee431b11 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1263,6 +1263,18 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
> nvme_queue_keep_alive_work(ctrl);
> }
>
> +void nvme_update_keep_alive(struct nvme_ctrl *ctrl, unsigned int new_kato)
> +{
> + dev_info(ctrl->device,
> + "keep alive commands interval on the host is updated from %u ms to %u ms\n",
> + ctrl->kato * 1000 / 2, new_kato * 1000 / 2);
> +
> + nvme_stop_keep_alive(ctrl);
> + ctrl->kato = new_kato;
> + nvme_start_keep_alive(ctrl);
> +}
> +EXPORT_SYMBOL_GPL(nvme_update_keep_alive);
> +
> void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
> {
> if (unlikely(ctrl->kato == 0))
> diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
> index 305ddd415e45..79006bfb5537 100644
> --- a/drivers/nvme/host/ioctl.c
> +++ b/drivers/nvme/host/ioctl.c
> @@ -187,6 +187,22 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
> return true;
> }
>
> +static void nvme_user_cmd_post(struct nvme_passthru_cmd *cmd,
> + struct nvme_ctrl *ctrl)
> +{
> + /*
> + * Keep alive commands interval on the host should be updated
> + * when KATO is modified by Set Features commands.
> + */
> + if (cmd->opcode == nvme_admin_set_features &&
> + (cmd->cdw10 & 0xFF) == NVME_FEAT_KATO) {
> + /* ms -> s */
no need for this comment.
> + unsigned int new_kato = DIV_ROUND_UP(cmd->cdw11, 1000);
> +
> + nvme_update_keep_alive(ctrl, new_kato);
I think you can now inline nvme_update_keep_alive here, no need to keep
it in a function.
On 8/25/21 5:42 AM JST, Sagi Grimberg wrote:
> > + if (cmd->opcode == nvme_admin_set_features &&
> > + (cmd->cdw10 & 0xFF) == NVME_FEAT_KATO) {
> > + /* ms -> s */
>
> no need for this comment.
Thanks for your comments. I will remove this /* ms -> s*/ comment.
> > + unsigned int new_kato = DIV_ROUND_UP(cmd->cdw11, 1000);
> > +
> > + nvme_update_keep_alive(ctrl, new_kato);
>
> I think you can now inline nvme_update_keep_alive here, no need to keep
> it in a function.
Does this mean the section below needs to be moved from core routine
to nvme_user_cmd_post function?
> > + dev_info(ctrl->device,
> > + "keep alive commands interval on the host is updated from %u ms to %u ms\n",
> > + ctrl->kato * 1000 / 2, new_kato * 1000 / 2);
> > +
> > + nvme_stop_keep_alive(ctrl);
> > + ctrl->kato = new_kato;
> > + nvme_start_keep_alive(ctrl);
Thanks.
Any reason we can't just call this from nvme_passthru_end instead
of inventing a new API? Right now the nvmet passthrough code never
uses the underlying keep alive code, so it doesn't make a difference,
but I expect we'll need more handling for passthrough commands like
this, and we might also grow more users (e.g. the io_uring based
passthrough).
On Tue, Aug 24, 2021 at 01:41:44PM -0700, Sagi Grimberg wrote:
>> +{
>> + /*
>> + * Keep alive commands interval on the host should be updated
>> + * when KATO is modified by Set Features commands.
>> + */
>> + if (cmd->opcode == nvme_admin_set_features &&
>> + (cmd->cdw10 & 0xFF) == NVME_FEAT_KATO) {
>> + /* ms -> s */
>
> no need for this comment.
>
>> + unsigned int new_kato = DIV_ROUND_UP(cmd->cdw11, 1000);
>> +
>> + nvme_update_keep_alive(ctrl, new_kato);
>
> I think you can now inline nvme_update_keep_alive here, no need to keep
> it in a function.
Actually, іf thinking ahead I think one helper per fixup would be really
useful to keep the code organized. But the DIV_ROUND_UP should move into
nvme_update_keep_alive to keep it self-contained.
We can then restructure the caller a bit to make it easier to expand:
switch (cmd->opcode) {
case nvme_admin_set_features:
switch (cmd->cdw10 & 0xff) {
case NVME_FEAT_KATO:
nvme_update_keep_alive(ctrl, cmd);
break;
}
}
On 8/25/21 10:53 AM, [email protected] wrote:
> Any reason we can't just call this from nvme_passthru_end instead
> of inventing a new API? Right now the nvmet passthrough code never
> uses the underlying keep alive code, so it doesn't make a difference,
> but I expect we'll need more handling for passthrough commands like
> this, and we might also grow more users (e.g. the io_uring based
> passthrough).
>
Yeah, we'll need that anyway if and when hostid becomes settable.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
[email protected] +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
> On 8/25/21 10:53 AM, [email protected] wrote:
> > Any reason we can't just call this from nvme_passthru_end instead
> > of inventing a new API? Right now the nvmet passthrough code never
> > uses the underlying keep alive code, so it doesn't make a difference,
> > but I expect we'll need more handling for passthrough commands like
> > this, and we might also grow more users (e.g. the io_uring based
> > passthrough).
> >
> Yeah, we'll need that anyway if and when hostid becomes settable.
Thanks for your comments. To call nvme_update_keep_alive from
nvme_passthru_end, I think nvme_passthru_end needs an argument of
a pointer to nvme_command. I will try to call it from nvme_passthru_end.
Please correct me, if I am misunderstanding what you mean.
Thanks.