2023-09-12 14:15:05

by Felix Yan

[permalink] [raw]
Subject: Re: [PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec

On 9/12/23 02:00, Keith Busch wrote:
> What do you think about this change instead? We don't need to print a
> warning on every device reset, but we should probably add a comment
> explaining why this is happening.
>
> ---
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 37b6fa7466620..b4577a860e677 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2245,6 +2245,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
> else
> ctrl->ctrl_config = NVME_CC_CSS_NVM;
>
> + timeout = NVME_CAP_TIMEOUT(ctrl->cap);
> if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
> u32 crto;
>
> @@ -2257,12 +2258,15 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
>
> if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
> ctrl->ctrl_config |= NVME_CC_CRIME;
> - timeout = NVME_CRTO_CRIMT(crto);
> + /*
> + * CRIMT should always be greater or equal to CAP.TO,
> + * but some devices are known to get this wrong. Use
> + * the larger of the two values.
> + */
> + timeout = max(timeout, NVME_CRTO_CRIMT(crto));
> } else {
> timeout = NVME_CRTO_CRWMT(crto);
> }
> - } else {
> - timeout = NVME_CAP_TIMEOUT(ctrl->cap);
> }
>
> ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;

I'm fine either way.

Should we also apply the same max() on the NVME_CRTO_CRIMT branch
though? The spec actually says the same thing (Timeout should be FFh)
for that too.

--
Regards,
Felix Yan


Attachments:
OpenPGP_signature.asc (855.00 B)
OpenPGP digital signature

2023-09-12 15:58:01

by Keith Busch

[permalink] [raw]
Subject: Re: [PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec

On Tue, Sep 12, 2023 at 09:26:19AM +0300, Felix Yan wrote:
>
> Should we also apply the same max() on the NVME_CRTO_CRIMT branch though?
> The spec actually says the same thing (Timeout should be FFh) for that too.

The spec is weird here: the CAP.TO value depends on the CC setting, but
we read and cache CAP.TO before setting CC, so TO is always associated
to CRWMT. We'll need to refresh the CAP value after the initial CC
write, but before final CC.EN.

---
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 37b6fa7466620..4adc0b2f12f1e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2245,25 +2245,8 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
else
ctrl->ctrl_config = NVME_CC_CSS_NVM;

- if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
- u32 crto;
-
- ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
- if (ret) {
- dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
- ret);
- return ret;
- }
-
- if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
- ctrl->ctrl_config |= NVME_CC_CRIME;
- timeout = NVME_CRTO_CRIMT(crto);
- } else {
- timeout = NVME_CRTO_CRWMT(crto);
- }
- } else {
- timeout = NVME_CAP_TIMEOUT(ctrl->cap);
- }
+ if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS)
+ ctrl->ctrl_config |= NVME_CC_CRIME;

ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
@@ -2277,6 +2260,33 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
if (ret)
return ret;

+ /* CAP value may change after initial CC write */
+ ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
+ if (ret)
+ return ret;
+
+ timeout = NVME_CAP_TIMEOUT(ctrl->cap);
+ if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
+ u32 crto;
+
+ ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
+ if (ret) {
+ dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
+ ret);
+ return ret;
+ }
+
+ /*
+ * CRTO should always be greater or equal to CAP.TO, but some
+ * devices are known to get this wrong. Use the larger of the
+ * two values.
+ */
+ if (ctrl->ctrl_config & NVME_CC_CRIME)
+ timeout = max(timeout, NVME_CRTO_CRIMT(crto));
+ else
+ timeout = max(timeout, NVME_CRTO_CRWMT(crto));
+ }
+
ctrl->ctrl_config |= NVME_CC_ENABLE;
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret)
--

2023-09-13 02:27:06

by Felix Yan

[permalink] [raw]
Subject: Re: [PATCH] nvme-pci: ignore bogus CRTO according to NVME 2.0 spec

On 9/12/23 18:44, Keith Busch wrote:
> On Tue, Sep 12, 2023 at 09:26:19AM +0300, Felix Yan wrote:
>>
>> Should we also apply the same max() on the NVME_CRTO_CRIMT branch though?
>> The spec actually says the same thing (Timeout should be FFh) for that too.
>
> The spec is weird here: the CAP.TO value depends on the CC setting, but
> we read and cache CAP.TO before setting CC, so TO is always associated
> to CRWMT. We'll need to refresh the CAP value after the initial CC
> write, but before final CC.EN.
>
> ---
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 37b6fa7466620..4adc0b2f12f1e 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2245,25 +2245,8 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
> else
> ctrl->ctrl_config = NVME_CC_CSS_NVM;
>
> - if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
> - u32 crto;
> -
> - ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
> - if (ret) {
> - dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
> - ret);
> - return ret;
> - }
> -
> - if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
> - ctrl->ctrl_config |= NVME_CC_CRIME;
> - timeout = NVME_CRTO_CRIMT(crto);
> - } else {
> - timeout = NVME_CRTO_CRWMT(crto);
> - }
> - } else {
> - timeout = NVME_CAP_TIMEOUT(ctrl->cap);
> - }
> + if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS)
> + ctrl->ctrl_config |= NVME_CC_CRIME;
>
> ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
> ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
> @@ -2277,6 +2260,33 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
> if (ret)
> return ret;
>
> + /* CAP value may change after initial CC write */
> + ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
> + if (ret)
> + return ret;
> +
> + timeout = NVME_CAP_TIMEOUT(ctrl->cap);
> + if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
> + u32 crto;
> +
> + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
> + if (ret) {
> + dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
> + ret);
> + return ret;
> + }
> +
> + /*
> + * CRTO should always be greater or equal to CAP.TO, but some
> + * devices are known to get this wrong. Use the larger of the
> + * two values.
> + */
> + if (ctrl->ctrl_config & NVME_CC_CRIME)
> + timeout = max(timeout, NVME_CRTO_CRIMT(crto));
> + else
> + timeout = max(timeout, NVME_CRTO_CRWMT(crto));
> + }
> +
> ctrl->ctrl_config |= NVME_CC_ENABLE;
> ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
> if (ret)

Thanks for getting into the details. This looks great to me.

I have also tried it on top of 6.5.2 kernel and verified that it works
as expected.

--
Regards,
Felix Yan


Attachments:
OpenPGP_signature.asc (855.00 B)
OpenPGP digital signature