LinuxLists.cc - [PATCH V2 0/2] nvme-pci: fix the timeout case when reset is ongoing

2018-01-08 10:03:14

Subject: [PATCH V2 0/2] nvme-pci: fix the timeout case when reset is ongoing

Hello

NVME_CTRL_RESETTING used to indicate the range of nvme initializing
strictly in fd634f41(nvme: merge probe_work and reset_work), but it
is not now. The NVME_CTRL_RESETTING is set before queue the
reset_work, there could be a big gap before the reset work handles
the outstanding requests. So when the NVME_CTRL_RESETTING is set,
nvme_timeout will not only meet the admin requests from the
initializing procedure, but also the IO and admin requests from
previous work before nvme_dev_disable is invoked.

To fix this, based on Christoph's suggestion, splits the
NVME_CTRL_RESETTING into NVME_CTRL_RESET_PREPARE
and NVME_CTRL_RESETTING. Before queue the reset work, changes state
to NVME_CTRL_RESET_PREPARE, after disable work completes, changes
state to NVME_CTRL_RESETTING. Then we could distinguish the different
requests and handle them separately. More details, please refer to
the comment of the 2nd patch.

V2:
- split NVME_CTRL_RESETTING into NVME_CTRL_RESET_PREPARE and
NVME_CTRL_RESETTING. Introduce new patch based on this.
- distinguish the requests based on the new state in nvme_timeout
- change comments of patch

Jianchao Wang(2)
0001-nvme-split-resetting-state-into-reset_prepate-and-re.patch
0002-nvme-pci-fix-the-timeout-case-when-reset-is-ongoing.patch

drivers/nvme/host/core.c | 17 ++--------------
drivers/nvme/host/fc.c | 2 --
drivers/nvme/host/nvme.h | 1 -
drivers/nvme/host/pci.c | 51 +++++++++++++---------------------------------
drivers/nvme/host/rdma.c | 8 --------
drivers/nvme/target/loop.c | 5 -----
6 files changed, 16 insertions(+), 68 deletions(-)

Thanks
Jianchao

2018-01-08 10:03:06

by jianchao.wang

[permalink] [raw]

Subject: [PATCH V2 1/2] nvme: split resetting state into reset_prepate and resetting

Currently, the ctrl->state will be changed to NVME_CTRL_RESETTING
before queue the reset work. This is not so strict. There could be
a big gap before the reset_work callback is invoked. In addition,
there is some disable work in the reset_work callback, strictly
speaking, not part of reset work, and could lead to some confusion.

This patch splits the NVME_CTRL_RESETTING into NVME_CTRL_RESET_PREPARE
and NVME_CTRL_RESETTING. Before queue the reset work, changes state
to NVME_CTRL_RESET_PREPARE, after disable work completes, changes
state to NVME_CTRL_RESETTING.

Suggested-by: Christoph Hellwig <[email protected]>
Signed-off-by: Jianchao Wang <[email protected]>
---
drivers/nvme/host/core.c | 17 +++++++++++++++--
drivers/nvme/host/fc.c | 2 ++
drivers/nvme/host/nvme.h | 1 +
drivers/nvme/host/pci.c | 28 ++++++++++++++++++++++------
drivers/nvme/host/rdma.c | 8 ++++++++
drivers/nvme/target/loop.c | 5 +++++
6 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1e46e60..106a437 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -87,7 +87,7 @@ static __le32 nvme_get_log_dw10(u8 lid, size_t size)

int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
- if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESET_PREPARE))
return -EBUSY;
if (!queue_work(nvme_wq, &ctrl->reset_work))
return -EBUSY;
@@ -243,7 +243,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
break;
}
break;
- case NVME_CTRL_RESETTING:
+ case NVME_CTRL_RESET_PREPARE:
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
@@ -253,10 +253,21 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
break;
}
break;
+
+ case NVME_CTRL_RESETTING:
+ switch (old_state) {
+ case NVME_CTRL_RESET_PREPARE:
+ changed = true;
+ /* FALLTHRU */
+ default:
+ break;
+ }
+ break;
case NVME_CTRL_RECONNECTING:
switch (old_state) {
case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING:
+ case NVME_CTRL_RESET_PREPARE:
changed = true;
/* FALLTHRU */
default:
@@ -267,6 +278,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING:
+ case NVME_CTRL_RESET_PREPARE:
case NVME_CTRL_RECONNECTING:
changed = true;
/* FALLTHRU */
@@ -2603,6 +2615,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
[NVME_CTRL_RESETTING] = "resetting",
+ [NVME_CTRL_RESET_PREPARE] = "reset-prepare",
[NVME_CTRL_RECONNECTING]= "reconnecting",
[NVME_CTRL_DELETING] = "deleting",
[NVME_CTRL_DEAD] = "dead",
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 794e66e..516c1ea 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -547,6 +547,7 @@ nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
break;

case NVME_CTRL_RESETTING:
+ case NVME_CTRL_RESET_PREPARE:
/*
* Controller is already in the process of terminating the
* association. No need to do anything further. The reconnect
@@ -790,6 +791,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
break;

case NVME_CTRL_RESETTING:
+ case NVME_CTRL_RESET_PREPARE:
/*
* Controller is already in the process of terminating the
* association. No need to do anything further. The reconnect
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ea1aa52..1f095d7 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -120,6 +120,7 @@ enum nvme_ctrl_state {
NVME_CTRL_NEW,
NVME_CTRL_LIVE,
NVME_CTRL_RESETTING,
+ NVME_CTRL_RESET_PREPARE,
NVME_CTRL_RECONNECTING,
NVME_CTRL_DELETING,
NVME_CTRL_DEAD,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f5800c3..e477c35 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1141,8 +1141,13 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);

/* If there is a reset ongoing, we shouldn't reset again. */
- if (dev->ctrl.state == NVME_CTRL_RESETTING)
+ switch (dev->ctrl.state) {
+ case NVME_CTRL_RESETTING:
+ case NVME_CTRL_RESET_PREPARE:
return false;
+ default:
+ break;
+ }

/* We shouldn't reset unless the controller is on fatal error state
* _or_ if we lost the communication with it.
@@ -1220,7 +1225,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_HANDLED;
}
-
/*
* Shutdown the controller immediately and schedule a reset if the
* command was already aborted once before and still hasn't been
@@ -2180,9 +2184,16 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
if (pci_is_enabled(pdev)) {
u32 csts = readl(dev->bar + NVME_REG_CSTS);

- if (dev->ctrl.state == NVME_CTRL_LIVE ||
- dev->ctrl.state == NVME_CTRL_RESETTING)
+ switch (dev->ctrl.state) {
+ case NVME_CTRL_LIVE:
+ case NVME_CTRL_RESETTING:
+ case NVME_CTRL_RESET_PREPARE:
nvme_start_freeze(&dev->ctrl);
+ break;
+ default:
+ break;
+ }
+
dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
pdev->error_state != pci_channel_io_normal);
}
@@ -2292,7 +2303,7 @@ static void nvme_reset_work(struct work_struct *work)
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result = -ENODEV;

- if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
+ if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESET_PREPARE))
goto out;

/*
@@ -2302,6 +2313,11 @@ static void nvme_reset_work(struct work_struct *work)
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false);

+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
+ WARN_ON_ONCE(dev->ctrl.state != NVME_CTRL_DELETING);
+ goto out;
+ }
+
result = nvme_pci_enable(dev);
if (result)
goto out;
@@ -2498,7 +2514,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result)
goto release_pools;

- nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
+ nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESET_PREPARE);
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));

queue_work(nvme_wq, &dev->ctrl.reset_work);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 37af565..8ae073e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1753,6 +1753,14 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);

+ changed = nvme_change_ctrl_state(&ctrl->ctrl,
+ NVME_CTRL_RESET_PREPARE);
+ if (!changed) {
+ /* state change failure is ok if we're in DELETING state */
+ WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
+ return;
+ }
+
ret = nvme_rdma_configure_admin_queue(ctrl, false);
if (ret)
goto out_fail;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 1e21b28..ae9973e 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -481,6 +481,11 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
nvme_stop_ctrl(&ctrl->ctrl);
nvme_loop_shutdown_ctrl(ctrl);

+ changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING);
+ if (!changed) {
+ WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
+ return;
+ }
ret = nvme_loop_configure_admin_queue(ctrl);
if (ret)
goto out_disable;
--
2.7.4

2018-01-08 10:03:18

by jianchao.wang

[permalink] [raw]

Subject: [PATCH V2 2/2] nvme-pci: fix the timeout case when reset is ongoing

There could be request timeout when the reset is ongoing.
nvme_timeout will not only meet the admin requests from the
initializing procedure, but also the IO and admin requests
from previous work before nvme_dev_disable is invoked. These
requests should be handled separately.

We could distinguish them through the ctrl->state.
If the state is NVME_CTRL_RESET_PREPARE, handle the expried
requests as nvme_cancel_request.
If the state is NVME_CTRL_RESETTING, the requests should be
from the initializing procedure. Handle them as before. Because the
nvme_reset_work will see the error and disable the dev itself, so
discard the nvme_dev_disable here.

Signed-off-by: Jianchao Wang <[email protected]>
---
drivers/nvme/host/pci.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e477c35..2947757 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1212,19 +1212,26 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
}

/*
- * Shutdown immediately if controller times out while starting. The
- * reset work will see the pci device disabled when it gets the forced
- * cancellation error. All outstanding requests are completed on
- * shutdown, so we return BLK_EH_HANDLED.
+ * There could be two kinds of expired reqs when reset is ongoing.
+ * Outstanding IO or admin requests from previous work before the
+ * nvme_reset_work invokes nvme_dev_disable. Handle them as the
+ * nvme_cancel_request. Outstanding admin requests from the
+ * initializing procedure. Set NVME_REQ_CANCELLED flag on them,
+ * then nvme_reset_work will see the error, then disable the device
+ * and remove the ctrl.
*/
- if (dev->ctrl.state == NVME_CTRL_RESETTING) {
- dev_warn(dev->ctrl.device,
- "I/O %d QID %d timeout, disable controller\n",
- req->tag, nvmeq->qid);
- nvme_dev_disable(dev, false);
+ switch (dev->ctrl.state) {
+ case NVME_CTRL_RESET_PREPARE:
+ nvme_req(req)->status = NVME_SC_ABORT_REQ;
+ return BLK_EH_HANDLED;
+ case NVME_CTRL_RESETTING:
+ WARN_ON_ONCE(nvmeq->qid);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_HANDLED;
+ default:
+ break;
}
+
/*
* Shutdown the controller immediately and schedule a reset if the
* command was already aborted once before and still hasn't been
--
2.7.4

2018-01-08 15:23:25

by Keith Busch

[permalink] [raw]

Subject: Re: [PATCH V2 0/2] nvme-pci: fix the timeout case when reset is ongoing

On Tue, Jan 09, 2018 at 10:03:11AM +0800, Jianchao Wang wrote:
> Hello

Sorry for the distraction, but could you possibly fix the date on your
machine? For some reason, lists.infradead.org sorts threads by the time
you claim to have sent your message rather than the time it was received,
and you're a full day in the future. :)

2018-01-09 01:55:19

by jianchao.wang

[permalink] [raw]

Subject: Re: [PATCH V2 0/2] nvme-pci: fix the timeout case when reset is ongoing

Hi Keith

On 01/08/2018 11:26 PM, Keith Busch wrote:
> On Tue, Jan 09, 2018 at 10:03:11AM +0800, Jianchao Wang wrote:
>> Hello
>
> Sorry for the distraction, but could you possibly fix the date on your
> machine? For some reason, lists.infradead.org sorts threads by the time
> you claim to have sent your message rather than the time it was received,
> and you're a full day in the future. :)
>
Many thanks for you kindly reminding.
Really sorry for that. I have fixed the time and date. :)

Jianchao

2018-01-10 21:36:35

by kernel test robot

[permalink] [raw]

Subject: Re: [PATCH V2 1/2] nvme: split resetting state into reset_prepate and resetting

Hi Jianchao,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v4.15-rc7]
[cannot apply to block/for-next linus/master hch-configfs/for-next next-20180110]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/Jianchao-Wang/nvme-split-resetting-state-into-reset_prepate-and-resetting/20180111-050049
config: x86_64-randconfig-x018-201801 (attached as .config)
compiler: gcc-7 (Debian 7.2.0-12) 7.2.1 20171025
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64

All errors (new ones prefixed by >>):

In file included from arch/x86/include/asm/bug.h:82:0,
from include/linux/bug.h:5,
from include/linux/jump_label.h:187,
from arch/x86/include/asm/string_64.h:6,
from arch/x86/include/asm/string.h:5,
from include/linux/string.h:19,
from include/linux/scatterlist.h:5,
from drivers/nvme/target/loop.c:15:
drivers/nvme/target/loop.c: In function 'nvme_loop_reset_ctrl_work':
>> drivers/nvme/target/loop.c:486:20: error: 'struct nvme_loop_ctrl' has no member named 'state'
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
^
include/asm-generic/bug.h:68:25: note: in definition of macro 'WARN_ON_ONCE'
int __ret_warn_on = !!(condition); \
^~~~~~~~~

vim +486 drivers/nvme/target/loop.c

473
474 static void nvme_loop_reset_ctrl_work(struct work_struct *work)
475 {
476 struct nvme_loop_ctrl *ctrl =
477 container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
478 bool changed;
479 int ret;
480
481 nvme_stop_ctrl(&ctrl->ctrl);
482 nvme_loop_shutdown_ctrl(ctrl);
483
484 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING);
485 if (!changed) {
> 486 WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
487 return;
488 }
489 ret = nvme_loop_configure_admin_queue(ctrl);
490 if (ret)
491 goto out_disable;
492
493 ret = nvme_loop_init_io_queues(ctrl);
494 if (ret)
495 goto out_destroy_admin;
496
497 ret = nvme_loop_connect_io_queues(ctrl);
498 if (ret)
499 goto out_destroy_io;
500
501 blk_mq_update_nr_hw_queues(&ctrl->tag_set,
502 ctrl->ctrl.queue_count - 1);
503
504 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
505 WARN_ON_ONCE(!changed);
506
507 nvme_start_ctrl(&ctrl->ctrl);
508
509 return;
510
511 out_destroy_io:
512 nvme_loop_destroy_io_queues(ctrl);
513 out_destroy_admin:
514 nvme_loop_destroy_admin_queue(ctrl);
515 out_disable:
516 dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
517 nvme_uninit_ctrl(&ctrl->ctrl);
518 nvme_put_ctrl(&ctrl->ctrl);
519 }
520

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation

Attachments:

(No filename) (3.07 kB)
.config.gz (32.06 kB)
Download all attachments