This enables to capture snapshot of controller information via device
coredump machanism, and it helps diagnose and debug issues.
The nvme device coredump is triggered when command timeout occurs, and
creates the following coredump files.
- regs: NVMe controller registers (00h to 4Fh)
- sq<qid>: Submission queue
- cq<qid>: Completion queue
- telemetry-ctrl-log: Telemetry controller-initiated log (if available)
- data: Empty
(I don't have the NVMe device that supports telemetry log page for now, so
capturing telemetry log is untested.)
The device coredump mechanism currently allows drivers to create only a
single coredump file, so this also provides a new function that allows
drivers to create several device coredump files in one crashed device.
* v2
- Add Reviewed-by tag.
- Add patch to fix typo in comment
- Remove unneeded braces.
- Allocate device_entry followed by an array of devcd_file elements.
- Add telemetry log page definisions
- Add facility to check log page attributes
- Exclude the doorbell registers from register dump.
- Save controller registers in a binary format instead of a text format.
- Create an empty 'data' file in the device coredump.
- Save telemetry controller-initiated log if available
- Make coredump procedure into two phases (before resetting controller and
after resetting as soon as admin queue is available).
Akinobu Mita (7):
devcoredump: use memory_read_from_buffer
devcoredump: fix typo in comment
devcoredump: allow to create several coredump files in one device
nvme.h: add telemetry log page definisions
nvme: add facility to check log page attributes
nvme-pci: add device coredump support
nvme-pci: trigger device coredump on command timeout
drivers/base/devcoredump.c | 168 ++++++++++------
drivers/nvme/host/Kconfig | 1 +
drivers/nvme/host/core.c | 2 +
drivers/nvme/host/nvme.h | 1 +
drivers/nvme/host/pci.c | 460 ++++++++++++++++++++++++++++++++++++++++++--
include/linux/devcoredump.h | 33 ++++
include/linux/nvme.h | 25 +++
7 files changed, 617 insertions(+), 73 deletions(-)
Cc: Johannes Berg <[email protected]>
Cc: Keith Busch <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Sagi Grimberg <[email protected]>
Cc: Minwoo Im <[email protected]>
--
2.7.4
s/dev_coredumpmsg/dev_coredumpsg/
Cc: Johannes Berg <[email protected]>
Cc: Keith Busch <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Sagi Grimberg <[email protected]>
Cc: Minwoo Im <[email protected]>
Signed-off-by: Akinobu Mita <[email protected]>
---
* v2
- New patch in this version.
drivers/base/devcoredump.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index 3c960a6..e42d0b5 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -314,7 +314,7 @@ void dev_coredumpm(struct device *dev, struct module *owner,
EXPORT_SYMBOL_GPL(dev_coredumpm);
/**
- * dev_coredumpmsg - create device coredump that uses scatterlist as data
+ * dev_coredumpsg - create device coredump that uses scatterlist as data
* parameter
* @dev: the struct device for the crashed device
* @table: the dump data
--
2.7.4
This provides a facility to check whether the controller supports the
telemetry log pages and log page offset field for the Get Log Page
command.
Cc: Johannes Berg <[email protected]>
Cc: Keith Busch <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Sagi Grimberg <[email protected]>
Cc: Minwoo Im <[email protected]>
Signed-off-by: Akinobu Mita <[email protected]>
---
* v2
- New patch in this version.
drivers/nvme/host/core.c | 1 +
drivers/nvme/host/nvme.h | 1 +
include/linux/nvme.h | 2 ++
3 files changed, 4 insertions(+)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 6265d92..42f09d6 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2580,6 +2580,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
} else
ctrl->shutdown_timeout = shutdown_timeout;
+ ctrl->lpa = id->lpa;
ctrl->npss = id->npss;
ctrl->apsta = id->apsta;
prev_apst_enabled = ctrl->apst_enabled;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 527d645..8711c71 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -195,6 +195,7 @@ struct nvme_ctrl {
u32 vs;
u32 sgls;
u16 kas;
+ u8 lpa;
u8 npss;
u8 apsta;
u32 oaes;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 5217fe4..c1c4ca5 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -294,6 +294,8 @@ enum {
NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8,
NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1,
+ NVME_CTRL_LPA_EXTENDED_DATA = 1 << 2,
+ NVME_CTRL_LPA_TELEMETRY_LOG = 1 << 3,
};
struct nvme_lbaf {
--
2.7.4
This enables the nvme driver to trigger a device coredump when command
timeout occurs, and it helps diagnose and debug issues.
This can be tested with fail_io_timeout fault injection.
# echo 1 > /sys/kernel/debug/fail_io_timeout/probability
# echo 1 > /sys/kernel/debug/fail_io_timeout/times
# echo 1 > /sys/block/nvme0n1/io-timeout-fail
# dd if=/dev/nvme0n1 of=/dev/null
Cc: Johannes Berg <[email protected]>
Cc: Keith Busch <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Sagi Grimberg <[email protected]>
Cc: Minwoo Im <[email protected]>
Signed-off-by: Akinobu Mita <[email protected]>
---
- Make coredump procedure into two phases (before resetting controller and
after resetting as soon as admin queue is available).
drivers/nvme/host/pci.c | 35 ++++++++++++++++++++++-------------
1 file changed, 22 insertions(+), 13 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4684a86..4ff918f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -87,9 +87,12 @@ MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
struct nvme_dev;
struct nvme_queue;
-static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown, bool dump);
static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode);
+static int nvme_coredump_prologue(struct nvme_dev *dev);
+static void nvme_coredump_epilogue(struct nvme_dev *dev);
+
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
*/
@@ -1289,7 +1292,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
*/
if (nvme_should_reset(dev, csts)) {
nvme_warn_reset(dev, csts);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, true);
nvme_reset_ctrl(&dev->ctrl);
return BLK_EH_DONE;
}
@@ -1316,7 +1319,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
dev_warn_ratelimited(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, true);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_DONE;
default:
@@ -1332,7 +1335,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, true);
nvme_reset_ctrl(&dev->ctrl);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
@@ -2399,7 +2402,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
}
}
-static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown, bool dump)
{
bool dead = true;
struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -2424,6 +2427,9 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
}
+ if (dump)
+ nvme_coredump_prologue(dev);
+
nvme_stop_queues(&dev->ctrl);
if (!dead && dev->ctrl.queue_count > 0) {
@@ -2491,7 +2497,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status);
nvme_get_ctrl(&dev->ctrl);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
nvme_kill_queues(&dev->ctrl);
if (!queue_work(nvme_wq, &dev->remove_work))
nvme_put_ctrl(&dev->ctrl);
@@ -2513,7 +2519,7 @@ static void nvme_reset_work(struct work_struct *work)
* moving on.
*/
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
mutex_lock(&dev->shutdown_lock);
result = nvme_pci_enable(dev);
@@ -2550,6 +2556,8 @@ static void nvme_reset_work(struct work_struct *work)
if (result)
goto out;
+ nvme_coredump_epilogue(dev);
+
if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
if (!dev->ctrl.opal_dev)
dev->ctrl.opal_dev =
@@ -2612,6 +2620,7 @@ static void nvme_reset_work(struct work_struct *work)
out_unlock:
mutex_unlock(&dev->shutdown_lock);
out:
+ nvme_coredump_epilogue(dev);
nvme_remove_dead_ctrl(dev, result);
}
@@ -2802,7 +2811,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
static void nvme_reset_prepare(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
}
static void nvme_reset_done(struct pci_dev *pdev)
@@ -2814,7 +2823,7 @@ static void nvme_reset_done(struct pci_dev *pdev)
static void nvme_shutdown(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, true);
+ nvme_dev_disable(dev, true, false);
}
/*
@@ -2831,14 +2840,14 @@ static void nvme_remove(struct pci_dev *pdev)
if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
- nvme_dev_disable(dev, true);
+ nvme_dev_disable(dev, true, false);
nvme_dev_remove_admin(dev);
}
flush_work(&dev->ctrl.reset_work);
nvme_stop_ctrl(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
- nvme_dev_disable(dev, true);
+ nvme_dev_disable(dev, true, false);
nvme_release_cmb(dev);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
@@ -2855,7 +2864,7 @@ static int nvme_suspend(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_dev_disable(ndev, true);
+ nvme_dev_disable(ndev, true, false);
return 0;
}
@@ -3307,7 +3316,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
case pci_channel_io_frozen:
dev_warn(dev->ctrl.device,
"frozen state error detected, reset controller\n");
- nvme_dev_disable(dev, false);
+ nvme_dev_disable(dev, false, false);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
dev_warn(dev->ctrl.device,
--
2.7.4
On 5/7/2019 10:58 AM, Akinobu Mita wrote:
> This enables to capture snapshot of controller information via device
> coredump machanism, and it helps diagnose and debug issues.
s/machanism/mechanism/
>
> The nvme device coredump is triggered when command timeout occurs, and
> creates the following coredump files.
>
> - regs: NVMe controller registers (00h to 4Fh)
> - sq<qid>: Submission queue
> - cq<qid>: Completion queue
> - telemetry-ctrl-log: Telemetry controller-initiated log (if available)
> - data: Empty