2018-07-19 13:33:02

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 0/4] Rework NVMe abort handling

PCI currently is the only transport we have which is doing more than
queueing a transport reset but trying to abort the command first.

This series brings the other transports we currently have up to the
same level.

On FC we can prepend a third escalation level by first to abort the FC
operation before trying to abort the NVMe command.

Johannes Thumshirn (4):
nvme: factor out pci abort handling into core
nvme: rdma: abort commands before resetting controller
nvmet: loop: abort commands before resetting controller
nvme: fc: abort commands before resetting controller

drivers/nvme/host/core.c | 47 +++++++++++++++++++++++++++++++++++
drivers/nvme/host/fc.c | 9 +++++++
drivers/nvme/host/nvme.h | 1 +
drivers/nvme/host/pci.c | 61 +++++++++-------------------------------------
drivers/nvme/host/rdma.c | 22 ++++++++++++++---
drivers/nvme/target/loop.c | 17 ++++++++++++-
6 files changed, 104 insertions(+), 53 deletions(-)

--
2.16.4



2018-07-19 13:31:43

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 4/4] nvme: fc: abort commands before resetting controller

Currently if a timeout on fc happens we just go ahead and reset the
underlying transport.

Instead try for a more fine grained error recovery by first aborting
the FC OP, if this fails abort the NVMe command and if this as well
fails go the hard route and reset the controller.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
drivers/nvme/host/fc.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 9cc33752539a..0ba0075c3b75 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2054,6 +2054,15 @@ nvme_fc_timeout(struct request *rq, bool reserved)
{
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
struct nvme_fc_ctrl *ctrl = op->ctrl;
+ u16 qid = op->queue->qnum;
+ int ret;
+
+ if (!__nvme_fc_abort_op(ctrl, op))
+ return BLK_EH_RESET_TIMER;
+
+ ret = nvme_abort_cmd(&ctrl->ctrl, rq, cpu_to_le16(qid));
+ if (!ret)
+ return BLK_EH_RESET_TIMER;

/*
* we can't individually ABTS an io without affecting the queue,
--
2.16.4


2018-07-19 13:31:45

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 1/4] nvme: factor out pci abort handling into core

Currently PCI is the only transport which does a more fine grained
error handling than just resetting the controller.

Factor out the command abort logic into nvme-core so other transports
can benefit of it as well.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
drivers/nvme/host/core.c | 47 +++++++++++++++++++++++++++++++++++++
drivers/nvme/host/nvme.h | 1 +
drivers/nvme/host/pci.c | 61 ++++++++++--------------------------------------
3 files changed, 60 insertions(+), 49 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e77e6418a21c..82896be14191 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -702,6 +702,53 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
}
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);

+
+static void abort_endio(struct request *req, blk_status_t error)
+{
+ struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
+
+ dev_warn(ctrl->device,
+ "Abort status: 0x%x", nvme_req(req)->status);
+ atomic_inc(&ctrl->abort_limit);
+ blk_mq_free_request(req);
+}
+
+int nvme_abort_cmd(struct nvme_ctrl *ctrl,
+ struct request *rq, __le16 sqid)
+{
+ struct request *abort_req;
+ struct nvme_command cmd;
+
+ if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
+ return -EAGAIN;
+
+ if (atomic_dec_return(&ctrl->abort_limit) < 0) {
+ atomic_inc(&ctrl->abort_limit);
+ return -EBUSY;
+ }
+
+ nvme_req(rq)->flags |= NVME_REQ_CANCELLED;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.abort.opcode = nvme_admin_abort_cmd;
+ cmd.abort.cid = rq->tag;
+ cmd.abort.sqid = sqid;
+
+ abort_req = nvme_alloc_request(ctrl->admin_q, &cmd,
+ BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
+ if (IS_ERR(abort_req)) {
+ atomic_inc(&ctrl->abort_limit);
+ return PTR_ERR(abort_req);
+ }
+
+ abort_req->timeout = ADMIN_TIMEOUT;
+ abort_req->end_io_data = NULL;
+ blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_abort_cmd);
+
static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
unsigned len, u32 seed, bool write)
{
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 4ad0c8ad2a27..39d6e4bc0402 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -445,6 +445,7 @@ int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);

int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u8 log_page, void *log, size_t size, u64 offset);
+int nvme_abort_cmd(struct nvme_ctrl *ctrl, struct request *rq, __le16 sqid);

extern const struct attribute_group nvme_ns_id_attr_group;
extern const struct block_device_operations nvme_ns_head_ops;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6678e9134348..321b8d55b693 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -188,7 +188,6 @@ struct nvme_iod {
struct nvme_request req;
struct nvme_queue *nvmeq;
bool use_sgl;
- int aborted;
int npages; /* In the PRP list. 0 means small pool in use */
int nents; /* Used in scatterlist */
int length; /* Of data, in bytes */
@@ -495,7 +494,6 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
iod->sg = iod->inline_sg;
}

- iod->aborted = 0;
iod->npages = -1;
iod->nents = 0;
iod->length = size;
@@ -1133,17 +1131,6 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
}

-static void abort_endio(struct request *req, blk_status_t error)
-{
- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = iod->nvmeq;
-
- dev_warn(nvmeq->dev->ctrl.device,
- "Abort status: 0x%x", nvme_req(req)->status);
- atomic_inc(&nvmeq->dev->ctrl.abort_limit);
- blk_mq_free_request(req);
-}
-
static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
{

@@ -1193,9 +1180,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = iod->nvmeq;
struct nvme_dev *dev = nvmeq->dev;
- struct request *abort_req;
- struct nvme_command cmd;
u32 csts = readl(dev->bar + NVME_REG_CSTS);
+ int ret;

/* If PCI error recovery process is happening, we cannot reset or
* the recovery mechanism will surely fail.
@@ -1243,54 +1229,31 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
break;
}

+ /*
+ * The aborted req will be completed on receiving the abort req.
+ * We enable the timer again. If hit twice, it'll cause a device reset,
+ * as the device then is in a faulty state.
+ */
+ ret = nvme_abort_cmd(&dev->ctrl, req, nvmeq->qid);
+ if (!ret)
+ return BLK_EH_RESET_TIMER;
+
/*
* Shutdown the controller immediately and schedule a reset if the
* command was already aborted once before and still hasn't been
* returned to the driver, or if this is the admin queue.
*/
- if (!nvmeq->qid || iod->aborted) {
+ if (ret || !nvmeq->qid || nvme_req(req)->flags & NVME_REQ_CANCELLED) {
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
nvme_reset_ctrl(&dev->ctrl);

- nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_DONE;
}

- if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
- atomic_inc(&dev->ctrl.abort_limit);
- return BLK_EH_RESET_TIMER;
- }
- iod->aborted = 1;
-
- memset(&cmd, 0, sizeof(cmd));
- cmd.abort.opcode = nvme_admin_abort_cmd;
- cmd.abort.cid = req->tag;
- cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
-
- dev_warn(nvmeq->dev->ctrl.device,
- "I/O %d QID %d timeout, aborting\n",
- req->tag, nvmeq->qid);
-
- abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
- BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
- if (IS_ERR(abort_req)) {
- atomic_inc(&dev->ctrl.abort_limit);
- return BLK_EH_RESET_TIMER;
- }
-
- abort_req->timeout = ADMIN_TIMEOUT;
- abort_req->end_io_data = NULL;
- blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio);
-
- /*
- * The aborted req will be completed on receiving the abort req.
- * We enable the timer again. If hit twice, it'll cause a device reset,
- * as the device then is in a faulty state.
- */
- return BLK_EH_RESET_TIMER;
+ return BLK_EH_DONE;
}

static void nvme_free_queue(struct nvme_queue *nvmeq)
--
2.16.4


2018-07-19 13:31:53

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 3/4] nvmet: loop: abort commands before resetting controller

Currently if a timeout on loop happens we just go ahead and reset the
underlying transport.

Instead try to abort the timeout out command and if this fails as well
continue the error path escalation and tear down the transport.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
drivers/nvme/target/loop.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index af7fbf4132b0..b6c355125cb0 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -141,9 +141,24 @@ static enum blk_eh_timer_return
nvme_loop_timeout(struct request *rq, bool reserved)
{
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
+ struct nvme_ctrl *ctrl = &iod->queue->ctrl->ctrl;
+ int qid = nvme_loop_queue_idx(iod->queue);
+ int ret;
+
+ dev_warn(ctrl->device,
+ "I/O %d QID %d timeout, aborting\n",
+ rq->tag, qid);
+
+ ret = nvme_abort_cmd(ctrl, rq, cpu_to_le16(qid));
+ if (!ret)
+ return BLK_EH_RESET_TIMER;
+
+ dev_warn(ctrl->device,
+ "I/O %d QID %d abort failed %d, reset controller\n",
+ rq->tag, qid, ret);

/* queue error recovery */
- nvme_reset_ctrl(&iod->queue->ctrl->ctrl);
+ nvme_reset_ctrl(ctrl);

/* fail with DNR on admin cmd timeout */
nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
--
2.16.4


2018-07-19 13:32:30

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH 2/4] nvme: rdma: abort commands before resetting controller

Currently if a timeout on rdma happens we just go ahead and reset the
underlying transport.

Instead try to abort the timeout out command and if this fails as well
continue the error path escalation and tear down the transport.

Signed-off-by: Johannes Thumshirn <[email protected]>
---
drivers/nvme/host/rdma.c | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 13a6064e4794..aed4752cfac6 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1671,10 +1671,26 @@ static enum blk_eh_timer_return
nvme_rdma_timeout(struct request *rq, bool reserved)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
+ u16 qid = nvme_rdma_queue_idx(req->queue);
+ int ret;
+
+ dev_warn(ctrl->device,
+ "I/O %d QID %d timeout, aborting\n",
+ rq->tag, qid);
+
+ ret = nvme_abort_cmd(ctrl, rq, cpu_to_le16(qid));
+ if (!ret)
+ return BLK_EH_RESET_TIMER;

- dev_warn(req->queue->ctrl->ctrl.device,
- "I/O %d QID %d timeout, reset controller\n",
- rq->tag, nvme_rdma_queue_idx(req->queue));
+ /*
+ * If the request was already cancelled once there's no need
+ * in doing it again, escalate to the next error recovery
+ * level.
+ */
+ dev_warn(ctrl->device,
+ "I/O %d QID %d abort failed %d, reset controller\n",
+ rq->tag, qid, ret);

/* queue error recovery */
nvme_rdma_error_recovery(req->queue->ctrl);
--
2.16.4


2018-07-19 13:42:54

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 0/4] Rework NVMe abort handling

On Thu, Jul 19, 2018 at 03:28:34PM +0200, Johannes Thumshirn wrote:
> PCI currently is the only transport we have which is doing more than
> queueing a transport reset but trying to abort the command first.
>
> This series brings the other transports we currently have up to the
> same level.

Without even looking at the code yet: why? The nvme abort isn't
very useful, and due to the lack of ordering between different
queues almost harmful on fabrics. What problem do you try to
solve?

2018-07-19 14:11:22

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH 0/4] Rework NVMe abort handling

On Thu, Jul 19, 2018 at 03:42:03PM +0200, Christoph Hellwig wrote:
> Without even looking at the code yet: why? The nvme abort isn't
> very useful, and due to the lack of ordering between different
> queues almost harmful on fabrics. What problem do you try to
> solve?

The problem I'm trying to solve here is really just single commands
timing out because of i.e. a bad switch in between which causes frame
loss somewhere.

I know RDMA and FC are defined to be lossless but reality sometimes
has a different view on this (can't talk too much for RDMA but I've
had some nice bugs in SCSI due to faulty switches dropping odd
frames).

Of cause we can still do the big hammer if one command times out due
to a misbehaving switch but we can also at least try to abort it. I
know aborts are defined as best effort, but as we're in the error path
anyways it doesn't hurt to at least try.

This would give us a chance to recover from such situations, of cause
given the target actually does something when receiving an abort.

In the FC case we can even send an ABTS and try to abort the command
on the FC side first, before doing it on NVMe. I'm not sure if we can
do it on RDMA or PCIe as well.

So the issue I'm trying to solve is easy, if one command times out for
whatever reason, there's no need to go the big transport reset route
before not even trying to recover from it. Possibly we should also try
doing a queue reset if aborting failed before doing the transport
reset.

Byte,
Johannes
--
Johannes Thumshirn Storage
[email protected] +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

2018-07-19 14:22:16

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 0/4] Rework NVMe abort handling

On Thu, Jul 19, 2018 at 04:10:25PM +0200, Johannes Thumshirn wrote:
> The problem I'm trying to solve here is really just single commands
> timing out because of i.e. a bad switch in between which causes frame
> loss somewhere.

And that is exactly the case where NVMe abort does not actually work
in any sensible way.

Remember that while NVMe guarantes ordered delivery inside a given
queue it does not guarantee anything between multiple queues.

So now you have your buggy FC setup where an I/O command times out
because your switch delayed it for two hours due to a firmware bug.

After 30 seconds we send an abort over the admin queue, which happens
to pass through just fine. The controller will tell you: no command
found as it has never seen it.

No with the the code following what we have in PCIe that just means
we'll eventually controller reset after the I/O command times out
the second time as we still won't have seen a completion for it.

If you incorrectly just continue and resend the command we'll actually
get the command sent twice and thus a potential bug once the original
command just gets sent along.

2018-07-19 14:37:41

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH 0/4] Rework NVMe abort handling

On Thu, Jul 19, 2018 at 04:23:55PM +0200, Christoph Hellwig wrote:
> On Thu, Jul 19, 2018 at 04:10:25PM +0200, Johannes Thumshirn wrote:
> > The problem I'm trying to solve here is really just single commands
> > timing out because of i.e. a bad switch in between which causes frame
> > loss somewhere.
>
> And that is exactly the case where NVMe abort does not actually work
> in any sensible way.
>
> Remember that while NVMe guarantes ordered delivery inside a given
> queue it does not guarantee anything between multiple queues.
>
> So now you have your buggy FC setup where an I/O command times out
> because your switch delayed it for two hours due to a firmware bug.
>
> After 30 seconds we send an abort over the admin queue, which happens
> to pass through just fine. The controller will tell you: no command
> found as it has never seen it.
>
> No with the the code following what we have in PCIe that just means
> we'll eventually controller reset after the I/O command times out
> the second time as we still won't have seen a completion for it.

Exactly that was my intention.

> If you incorrectly just continue and resend the command we'll actually
> get the command sent twice and thus a potential bug once the original
> command just gets sent along.

OK, let me see where I'm stuck here. We're issuing a command, it gets
lost due to $REASON and I'm aborting it. The upper layers then
eventually retry the command and it arrives at the target side. But so
does the old command as well and we have a duplicate. Correct?

So if we keep our old behavior and tear down the queues and
re-establish them, then the upper layers retry the command and it
arrives on the target. But shortly afterwards the switch happens to
find the old command in it's ingress buffers and decides to forward it
to the target as well, how does that differ? The CMDID and SQID are
probably different but all the payload will be the same, wouldn't it?

So we still have our duplicate on the other side, don't we?

I feel I'm missing out something here.

Byte,
Johannes
--
Johannes Thumshirn Storage
[email protected] +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

2018-07-19 14:48:41

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 0/4] Rework NVMe abort handling

On Thu, Jul 19, 2018 at 04:35:34PM +0200, Johannes Thumshirn wrote:
> > No with the the code following what we have in PCIe that just means
> > we'll eventually controller reset after the I/O command times out
> > the second time as we still won't have seen a completion for it.
>
> Exactly that was my intention.

Which means the only thing you do for your use case is to delay
recovery even further.

> OK, let me see where I'm stuck here. We're issuing a command, it gets
> lost due to $REASON and I'm aborting it. The upper layers then
> eventually retry the command and it arrives at the target side. But so
> does the old command as well and we have a duplicate. Correct?

The upper layer is only going to retry after tearing down the transport
connection. And a tear down of the connection MUST clear all pending
commands on the way. If it doesn't we are in deep, deep trouble.

A NVMe abort has no chance of clearing things at the transport layer.

2018-07-19 14:55:19

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH 0/4] Rework NVMe abort handling

On Thu, Jul 19, 2018 at 04:50:05PM +0200, Christoph Hellwig wrote:
> The upper layer is only going to retry after tearing down the transport
> connection. And a tear down of the connection MUST clear all pending
> commands on the way. If it doesn't we are in deep, deep trouble.
>
> A NVMe abort has no chance of clearing things at the transport layer.

OK so all I can do in my case (if I want soft error recovery) is send
out a transport abort in the timeout handler and then rearm the timer
and requeue the command.

Which leaves us to the FC patch only, modified of cause.

--
Johannes Thumshirn Storage
[email protected] +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

2018-07-19 15:01:04

by James Smart

[permalink] [raw]
Subject: Re: [PATCH 0/4] Rework NVMe abort handling


On 7/19/2018 7:10 AM, Johannes Thumshirn wrote:
> On Thu, Jul 19, 2018 at 03:42:03PM +0200, Christoph Hellwig wrote:
>> Without even looking at the code yet: why? The nvme abort isn't
>> very useful, and due to the lack of ordering between different
>> queues almost harmful on fabrics. What problem do you try to
>> solve?
> The problem I'm trying to solve here is really just single commands
> timing out because of i.e. a bad switch in between which causes frame
> loss somewhere.
>
> I know RDMA and FC are defined to be lossless but reality sometimes
> has a different view on this (can't talk too much for RDMA but I've
> had some nice bugs in SCSI due to faulty switches dropping odd
> frames).
>
> Of cause we can still do the big hammer if one command times out due
> to a misbehaving switch but we can also at least try to abort it. I
> know aborts are defined as best effort, but as we're in the error path
> anyways it doesn't hurt to at least try.
>
> This would give us a chance to recover from such situations, of cause
> given the target actually does something when receiving an abort.
>
> In the FC case we can even send an ABTS and try to abort the command
> on the FC side first, before doing it on NVMe. I'm not sure if we can
> do it on RDMA or PCIe as well.
>
> So the issue I'm trying to solve is easy, if one command times out for
> whatever reason, there's no need to go the big transport reset route
> before not even trying to recover from it. Possibly we should also try
> doing a queue reset if aborting failed before doing the transport
> reset.
>
> Byte,
> Johannes

I'm with Christoph.

It doesn't work that way... command delivery is very much tied to any
command ordering delivery requirements as well as sqhd increment on the
target, and response delivery is tied similarly tied to sqhd delivery to
the host as well as ordering requirements on responses. With aborts as
you're implementing, you drop those things.  Granted, Linux's lack of
paying attention to SQHD (a problem waiting to happen in my mind) as
well as not using fused commands (and no other commands yet requiring
order) make it believe it can get away without it.

You're going to confuse transports as there's no understanding in the
transport protocol on what it means to abort/cancel a single io.   The
specs are rather clear, and for a good reason, that non-delivery (the
abort or cancellation) mandates connection teardown which in turn
mandates association teardown. You will be creating non-standard
implementations that will fail interoperability and compliance.

If you really want single io abort - implement it in the NVMe standard
way with Aborts to the admin queue, subject to the ACL limit.  Then push
on the targets to support deep ACL counts and honestly responding to
ABORT, and there will still be race conditions between the ABORT and its
command that will make an interesting retry policy. Or, wait for Fred
Knights, new proposal on ABORTS.

-- james



2018-07-19 15:06:02

by James Smart

[permalink] [raw]
Subject: Re: [PATCH 0/4] Rework NVMe abort handling

On 7/19/2018 7:54 AM, Johannes Thumshirn wrote:
> On Thu, Jul 19, 2018 at 04:50:05PM +0200, Christoph Hellwig wrote:
>> The upper layer is only going to retry after tearing down the transport
>> connection. And a tear down of the connection MUST clear all pending
>> commands on the way. If it doesn't we are in deep, deep trouble.
>>
>> A NVMe abort has no chance of clearing things at the transport layer.
> OK so all I can do in my case (if I want soft error recovery) is send
> out a transport abort in the timeout handler and then rearm the timer
> and requeue the command.
>
> Which leaves us to the FC patch only, modified of cause.
>

Which I'm going to say no on. I originally did the abort before the
reset and it brought out some confusion in the reset code. Thus the
existing flow which just resets the controller which has to be done
after the abort anyway.

-- james


2018-07-19 16:31:29

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/4] nvme: factor out pci abort handling into core

Hi Johannes,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.18-rc5 next-20180719]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/Johannes-Thumshirn/Rework-NVMe-abort-handling/20180719-230642
config: x86_64-randconfig-x011-201828 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64

All errors (new ones prefixed by >>):

drivers/nvme/host/core.c: In function 'abort_endio':
>> drivers/nvme/host/core.c:711:40: error: 'struct nvme_request' has no member named 'ctrl'
struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
^~

vim +711 drivers/nvme/host/core.c

707
708
709 static void abort_endio(struct request *req, blk_status_t error)
710 {
> 711 struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
712
713 dev_warn(ctrl->device,
714 "Abort status: 0x%x", nvme_req(req)->status);
715 atomic_inc(&ctrl->abort_limit);
716 blk_mq_free_request(req);
717 }
718

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (1.38 kB)
.config.gz (33.88 kB)
Download all attachments

2018-07-20 06:37:22

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH 0/4] Rework NVMe abort handling

On Thu, Jul 19, 2018 at 08:04:09AM -0700, James Smart wrote:
> Which I'm going to say no on. I originally did the abort before the reset
> and it brought out some confusion in the reset code. Thus the existing flow
> which just resets the controller which has to be done after the abort
> anyway.

OK copied that.

--
Johannes Thumshirn Storage
[email protected] +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850