2021-03-01 23:32:49

by Daniel Wagner

[permalink] [raw]
Subject: [PATCH v2] nvme-tcp: Check if request has started before processing it

blk_mq_tag_to_rq() always returns a request if the tag id is in a
valid range [0...max_tags). If the target replies with a tag for which
we don't have a request but it's not started, the host will likely
corrupt data or simply crash.

Add an additional check if the a request has been started if not
reset the connection.

This addition check will not protected against an invalid tag which
maps to a request which has been started. There is nothing we can do
about this. Though it will at a least protect from crashing the host,
which generally thought to be the right thing to do.

Signed-off-by: Daniel Wagner <[email protected]>
---

The patch is against nmve-5.12.

I noted that nvme_tcp_process_nvme_cqe() returns EINVAL
where as the rest uses ENOENT. Looks a bit odd to me.

I've tested this with blktests.

v2:
- moved the check into a helper to avoid code duplication
- use nvme_reset_ctrl if request has not been started
- added nvme_tcp_recv_ddgst() callsite

drivers/nvme/host/tcp.c | 56 +++++++++++++++++++++++------------------
1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 69f59d2c5799..af6f725b842b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -479,19 +479,38 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
}

-static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
- struct nvme_completion *cqe)
+static bool nvme_tcp_tag_to_rq(struct nvme_tcp_queue *queue,
+ __u16 command_id, struct request **req)
{
struct request *rq;

- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id);
+ rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
"queue %d tag 0x%x not found\n",
- nvme_tcp_queue_id(queue), cqe->command_id);
+ nvme_tcp_queue_id(queue), command_id);
nvme_tcp_error_recovery(&queue->ctrl->ctrl);
- return -EINVAL;
+ return false;
}
+ if (!blk_mq_request_started(rq)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "queue %d received invalid tag\n",
+ nvme_tcp_queue_id(queue));
+ nvme_reset_ctrl(&queue->ctrl->ctrl);
+ return false;
+ }
+
+ *req = rq;
+ return true;
+}
+
+static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
+ struct nvme_completion *cqe)
+{
+ struct request *rq;
+
+ if (!nvme_tcp_tag_to_rq(queue, cqe->command_id, &rq))
+ return -EINVAL;

if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
nvme_complete_rq(rq);
@@ -505,13 +524,8 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
{
struct request *rq;

- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
- if (!rq) {
- dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
+ if (!nvme_tcp_tag_to_rq(queue, pdu->command_id, &rq))
return -ENOENT;
- }

if (!blk_rq_payload_bytes(rq)) {
dev_err(queue->ctrl->ctrl.device,
@@ -609,13 +623,8 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
struct request *rq;
int ret;

- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
- if (!rq) {
- dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
+ if (!nvme_tcp_tag_to_rq(queue, pdu->command_id, &rq))
return -ENOENT;
- }
req = blk_mq_rq_to_pdu(rq);

ret = nvme_tcp_setup_h2c_data_pdu(req, pdu);
@@ -695,13 +704,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
struct nvme_tcp_request *req;
struct request *rq;

- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
- if (!rq) {
- dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
+ if (!nvme_tcp_tag_to_rq(queue, pdu->command_id, &rq))
return -ENOENT;
- }
req = blk_mq_rq_to_pdu(rq);

while (true) {
@@ -794,8 +798,10 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
}

if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
- struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
- pdu->command_id);
+ struct request *rq;
+
+ if (!nvme_tcp_tag_to_rq(queue, pdu->command_id, &rq))
+ return -EINVAL;

nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
queue->nr_cqe++;
--
2.29.2


2021-03-05 20:01:16

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it


> blk_mq_tag_to_rq() always returns a request if the tag id is in a
> valid range [0...max_tags). If the target replies with a tag for which
> we don't have a request but it's not started, the host will likely
> corrupt data or simply crash.
>
> Add an additional check if the a request has been started if not
> reset the connection.
>
> This addition check will not protected against an invalid tag which
> maps to a request which has been started. There is nothing we can do
> about this. Though it will at a least protect from crashing the host,
> which generally thought to be the right thing to do.

Daniel, again, there is nothing specific about this to nvme-tcp,
this is a safeguard against a funky controller (or a different
bug that is hidden by this). The same can happen in any other
transport so I would suggest that if this is a safeguard we
want to put in place, we should make it a generic one.

i.e. nvme_tag_to_rq() that _all_ transports call consistently.

2021-03-11 09:45:36

by Daniel Wagner

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it

Hi Sagi,

On Fri, Mar 05, 2021 at 11:57:30AM -0800, Sagi Grimberg wrote:
> Daniel, again, there is nothing specific about this to nvme-tcp,
> this is a safeguard against a funky controller (or a different
> bug that is hidden by this).

As far I can tell, the main difference between nvme-tcp and FC/NVMe,
nvme-tcp has not a FW or a big driver which filter out some noise from a
misbehaving controller. I haven't really checked the other transports
but I wouldn't surprised they share the same properties as FC/NVMe.

> The same can happen in any other transport so I would suggest that if
> this is a safeguard we want to put in place, we should make it a
> generic one.
>
> i.e. nvme_tag_to_rq() that _all_ transports call consistently.

Okay, I'll review all the relevant code and see what could made more
generic and consistent.

Though I think nvme-tcp plays in a different league as it is exposed to
normal networking traffic and this is a very hostile environment.

Thanks,
Daniel

2021-03-15 17:18:56

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it


> Hi Sagi,
>
> On Fri, Mar 05, 2021 at 11:57:30AM -0800, Sagi Grimberg wrote:
>> Daniel, again, there is nothing specific about this to nvme-tcp,
>> this is a safeguard against a funky controller (or a different
>> bug that is hidden by this).
>
> As far I can tell, the main difference between nvme-tcp and FC/NVMe,
> nvme-tcp has not a FW or a big driver which filter out some noise from a
> misbehaving controller. I haven't really checked the other transports
> but I wouldn't surprised they share the same properties as FC/NVMe.
>
>> The same can happen in any other transport so I would suggest that if
>> this is a safeguard we want to put in place, we should make it a
>> generic one.
>>
>> i.e. nvme_tag_to_rq() that _all_ transports call consistently.
>
> Okay, I'll review all the relevant code and see what could made more
> generic and consistent.
>
> Though I think nvme-tcp plays in a different league as it is exposed to
> normal networking traffic and this is a very hostile environment.

It is, but in this situation, the controller is sending a second
completion that results in a use-after-free, which makes the
transport irrelevant. Unless there is some other flow (which is unclear
to me) that causes this which is a bug that needs to be fixed rather
than hidden with a safeguard.

2021-03-30 16:23:20

by Ewan Milne

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it

On Mon, 2021-03-15 at 10:16 -0700, Sagi Grimberg wrote:
> > Hi Sagi,
> >
> > On Fri, Mar 05, 2021 at 11:57:30AM -0800, Sagi Grimberg wrote:
> > > Daniel, again, there is nothing specific about this to nvme-tcp,
> > > this is a safeguard against a funky controller (or a different
> > > bug that is hidden by this).
> >
> > As far I can tell, the main difference between nvme-tcp and
> > FC/NVMe,
> > nvme-tcp has not a FW or a big driver which filter out some noise
> > from a
> > misbehaving controller. I haven't really checked the other
> > transports
> > but I wouldn't surprised they share the same properties as FC/NVMe.
> >
> > > The same can happen in any other transport so I would suggest
> > > that if
> > > this is a safeguard we want to put in place, we should make it a
> > > generic one.
> > >
> > > i.e. nvme_tag_to_rq() that _all_ transports call consistently.
> >
> > Okay, I'll review all the relevant code and see what could made
> > more
> > generic and consistent.
> >
> > Though I think nvme-tcp plays in a different league as it is
> > exposed to
> > normal networking traffic and this is a very hostile environment.
>
> It is, but in this situation, the controller is sending a second
> completion that results in a use-after-free, which makes the
> transport irrelevant. Unless there is some other flow (which is
> unclear
> to me) that causes this which is a bug that needs to be fixed rather
> than hidden with a safeguard.
>

The kernel should not crash regardless of any network traffic that is
sent to the system. It should not be possible to either intentionally
of mistakenly contruct packets that will deny service in this way.

-Ewan



2021-03-30 17:36:04

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it


>> It is, but in this situation, the controller is sending a second
>> completion that results in a use-after-free, which makes the
>> transport irrelevant. Unless there is some other flow (which is
>> unclear
>> to me) that causes this which is a bug that needs to be fixed rather
>> than hidden with a safeguard.
>>
>
> The kernel should not crash regardless of any network traffic that is
> sent to the system. It should not be possible to either intentionally
> of mistakenly contruct packets that will deny service in this way.

This is not specific to nvme-tcp. I can build an rdma or pci controller
that can trigger the same crash... I saw a similar patch from Hannes
implemented in the scsi level, and not the individual scsi transports..

I would also mention, that a crash is not even the scariest issue that
we can see here, because if the request happened to be reused we are
in the silent data corruption realm...

2021-03-30 23:33:19

by Keith Busch

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it

On Tue, Mar 30, 2021 at 10:34:25AM -0700, Sagi Grimberg wrote:
>
> > > It is, but in this situation, the controller is sending a second
> > > completion that results in a use-after-free, which makes the
> > > transport irrelevant. Unless there is some other flow (which is
> > > unclear
> > > to me) that causes this which is a bug that needs to be fixed rather
> > > than hidden with a safeguard.
> > >
> >
> > The kernel should not crash regardless of any network traffic that is
> > sent to the system. It should not be possible to either intentionally
> > of mistakenly contruct packets that will deny service in this way.
>
> This is not specific to nvme-tcp. I can build an rdma or pci controller
> that can trigger the same crash... I saw a similar patch from Hannes
> implemented in the scsi level, and not the individual scsi transports..

If scsi wants this too, this could be made generic at the blk-mq level.
We just need to make something like blk_mq_tag_to_rq(), but return NULL
if the request isn't started.

> I would also mention, that a crash is not even the scariest issue that
> we can see here, because if the request happened to be reused we are
> in the silent data corruption realm...

If this does happen, I think we have to come up with some way to
mitigate it. We're not utilizing the full 16 bits of the command_id, so
maybe we can append something like a generation sequence number that can
be checked for validity.

2021-03-31 07:13:39

by Hannes Reinecke

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it

On 3/31/21 1:28 AM, Keith Busch wrote:
> On Tue, Mar 30, 2021 at 10:34:25AM -0700, Sagi Grimberg wrote:
>>
>>>> It is, but in this situation, the controller is sending a second
>>>> completion that results in a use-after-free, which makes the
>>>> transport irrelevant. Unless there is some other flow (which is
>>>> unclear
>>>> to me) that causes this which is a bug that needs to be fixed rather
>>>> than hidden with a safeguard.
>>>>
>>>
>>> The kernel should not crash regardless of any network traffic that is
>>> sent to the system. It should not be possible to either intentionally
>>> of mistakenly contruct packets that will deny service in this way.
>>
>> This is not specific to nvme-tcp. I can build an rdma or pci controller
>> that can trigger the same crash... I saw a similar patch from Hannes
>> implemented in the scsi level, and not the individual scsi transports..
>
> If scsi wants this too, this could be made generic at the blk-mq level.
> We just need to make something like blk_mq_tag_to_rq(), but return NULL
> if the request isn't started.
>
>> I would also mention, that a crash is not even the scariest issue that
>> we can see here, because if the request happened to be reused we are
>> in the silent data corruption realm...
>
> If this does happen, I think we have to come up with some way to
> mitigate it. We're not utilizing the full 16 bits of the command_id, so
> maybe we can append something like a generation sequence number that can
> be checked for validity.
>
... which will be near impossible.
We can protect against crashing on invalid frames.
We can _not_ protect against maliciously crafted packets referencing any
random _existing_ tag; that's what TLS is for.

What we can do, though, is checking the 'state' field in the tcp
request, and only allow completions for commands which are in a state
allowing for completions.

Let's see if I can whip up a patch.

Cheers,

Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
[email protected] +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer

2021-03-31 21:04:26

by Ewan Milne

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it

On Wed, 2021-03-31 at 09:11 +0200, Hannes Reinecke wrote:
> On 3/31/21 1:28 AM, Keith Busch wrote:
> > On Tue, Mar 30, 2021 at 10:34:25AM -0700, Sagi Grimberg wrote:
> > >
> > > > > It is, but in this situation, the controller is sending a
> > > > > second
> > > > > completion that results in a use-after-free, which makes the
> > > > > transport irrelevant. Unless there is some other flow (which
> > > > > is
> > > > > unclear
> > > > > to me) that causes this which is a bug that needs to be fixed
> > > > > rather
> > > > > than hidden with a safeguard.
> > > > >
> > > >
> > > > The kernel should not crash regardless of any network traffic
> > > > that is
> > > > sent to the system. It should not be possible to either
> > > > intentionally
> > > > of mistakenly contruct packets that will deny service in this
> > > > way.
> > >
> > > This is not specific to nvme-tcp. I can build an rdma or pci
> > > controller
> > > that can trigger the same crash... I saw a similar patch from
> > > Hannes
> > > implemented in the scsi level, and not the individual scsi
> > > transports..
> >
> > If scsi wants this too, this could be made generic at the blk-mq
> > level.
> > We just need to make something like blk_mq_tag_to_rq(), but return
> > NULL
> > if the request isn't started.
> >
> > > I would also mention, that a crash is not even the scariest issue
> > > that
> > > we can see here, because if the request happened to be reused we
> > > are
> > > in the silent data corruption realm...
> >
> > If this does happen, I think we have to come up with some way to
> > mitigate it. We're not utilizing the full 16 bits of the
> > command_id, so
> > maybe we can append something like a generation sequence number
> > that can
> > be checked for validity.
> >
>
> ... which will be near impossible.
> We can protect against crashing on invalid frames.
> We can _not_ protect against maliciously crafted packets referencing
> any
> random _existing_ tag; that's what TLS is for.
>
> What we can do, though, is checking the 'state' field in the tcp
> request, and only allow completions for commands which are in a state
> allowing for completions.
>
> Let's see if I can whip up a patch.

That would be great. BTW in the crash dump I am looking at now, it
looks like pdu->command_id was zero in nvme_tcp_recv_data(), and
blk_mq_tag_to_rq() returned a request struct that had not been used.
So I think we do need to check that the tag was actually allocated.

-Ewan

>
> Cheers,
>
> Hannes

2021-03-31 22:26:22

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it


>> What we can do, though, is checking the 'state' field in the tcp
>> request, and only allow completions for commands which are in a state
>> allowing for completions.
>>
>> Let's see if I can whip up a patch.
>
> That would be great. BTW in the crash dump I am looking at now, it
> looks like pdu->command_id was zero in nvme_tcp_recv_data(), and
> blk_mq_tag_to_rq() returned a request struct that had not been used.
> So I think we do need to check that the tag was actually allocated.

request tag can't be zero? I forget...

2021-03-31 22:39:12

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it


>>>> It is, but in this situation, the controller is sending a second
>>>> completion that results in a use-after-free, which makes the
>>>> transport irrelevant. Unless there is some other flow (which is
>>>> unclear
>>>> to me) that causes this which is a bug that needs to be fixed rather
>>>> than hidden with a safeguard.
>>>>
>>>
>>> The kernel should not crash regardless of any network traffic that is
>>> sent to the system. It should not be possible to either intentionally
>>> of mistakenly contruct packets that will deny service in this way.
>>
>> This is not specific to nvme-tcp. I can build an rdma or pci controller
>> that can trigger the same crash... I saw a similar patch from Hannes
>> implemented in the scsi level, and not the individual scsi transports..
>
> If scsi wants this too, this could be made generic at the blk-mq level.
> We just need to make something like blk_mq_tag_to_rq(), but return NULL
> if the request isn't started.

Makes sense...

>> I would also mention, that a crash is not even the scariest issue that
>> we can see here, because if the request happened to be reused we are
>> in the silent data corruption realm...
>
> If this does happen, I think we have to come up with some way to
> mitigate it. We're not utilizing the full 16 bits of the command_id, so
> maybe we can append something like a generation sequence number that can
> be checked for validity.

That's actually a great idea. scsi needs unique tags so it encodes the
hwq in the upper 16 bits giving the actual tag the lower 16 bits which
is more than enough for a single queue. We can do the same with
a gencnt that will increment in both submission and completion and we
can validate against it.

This will be useful for all transports, so maintaining it in
nvme_req(rq)->genctr and introducing a helper like:
rq = nvme_find_tag(tagset, cqe->command_id)
That will filter genctr, locate the request.

Also:
nvme_validate_request_gen(rq, cqe->command_id) that would
compare against it.


And then a helper to set the command_id like:
cmd->common.command_id = nvme_request_command_id(rq)
that will both increment the genctr and build a command_id
from it.

Thoughts?

2021-04-01 06:22:57

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it

On Wed, Mar 31, 2021 at 03:24:49PM -0700, Sagi Grimberg wrote:
>
>>> What we can do, though, is checking the 'state' field in the tcp
>>> request, and only allow completions for commands which are in a state
>>> allowing for completions.
>>>
>>> Let's see if I can whip up a patch.
>>
>> That would be great. BTW in the crash dump I am looking at now, it
>> looks like pdu->command_id was zero in nvme_tcp_recv_data(), and
>> blk_mq_tag_to_rq() returned a request struct that had not been used.
>> So I think we do need to check that the tag was actually allocated.
>
> request tag can't be zero? I forget...

Of course it can. But the reserved tags are before the normal tags,
so 0 would be a reserved tag for nvme.

2021-04-01 08:28:49

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it


>> request tag can't be zero? I forget...
>
> Of course it can. But the reserved tags are before the normal tags,
> so 0 would be a reserved tag for nvme.

Right.

2021-05-06 23:13:41

by Hannes Reinecke

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it

On 4/1/21 12:37 AM, Sagi Grimberg wrote:
>
>>>>> It is, but in this situation, the controller is sending a second
>>>>> completion that results in a use-after-free, which makes the
>>>>> transport irrelevant. Unless there is some other flow (which is
>>>>> unclear
>>>>> to me) that causes this which is a bug that needs to be fixed rather
>>>>> than hidden with a safeguard.
>>>>>
>>>>
>>>> The kernel should not crash regardless of any network traffic that is
>>>> sent to the system.  It should not be possible to either intentionally
>>>> of mistakenly contruct packets that will deny service in this way.
>>>
>>> This is not specific to nvme-tcp. I can build an rdma or pci controller
>>> that can trigger the same crash... I saw a similar patch from Hannes
>>> implemented in the scsi level, and not the individual scsi transports..
>>
>> If scsi wants this too, this could be made generic at the blk-mq level.
>> We just need to make something like blk_mq_tag_to_rq(), but return NULL
>> if the request isn't started.
>
> Makes sense...
>
>>> I would also mention, that a crash is not even the scariest issue that
>>> we can see here, because if the request happened to be reused we are
>>> in the silent data corruption realm...
>>
>> If this does happen, I think we have to come up with some way to
>> mitigate it. We're not utilizing the full 16 bits of the command_id, so
>> maybe we can append something like a generation sequence number that can
>> be checked for validity.
>
> That's actually a great idea. scsi needs unique tags so it encodes the
> hwq in the upper 16 bits giving the actual tag the lower 16 bits which
> is more than enough for a single queue. We can do the same with
> a gencnt that will increment in both submission and completion and we
> can validate against it.
>
> This will be useful for all transports, so maintaining it in
> nvme_req(rq)->genctr and introducing a helper like:
> rq = nvme_find_tag(tagset, cqe->command_id)
> That will filter genctr, locate the request.
>
> Also:
> nvme_validate_request_gen(rq, cqe->command_id) that would
> compare against it.
>
>
> And then a helper to set the command_id like:
> cmd->common.command_id = nvme_request_command_id(rq)
> that will both increment the genctr and build a command_id
> from it.
>
> Thoughts?
>

Well, that would require a modification to the CQE specification, no?
fmds was not amused when I proposed that :-(

Cheers,

Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
[email protected] +49 911 74053 688
SUSE Software Solutions Germany GmbH, 90409 Nürnberg
GF: F. Imendörffer, HRB 36809 (AG Nürnberg)

2021-05-07 20:29:12

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it


>>>> I would also mention, that a crash is not even the scariest issue that
>>>> we can see here, because if the request happened to be reused we are
>>>> in the silent data corruption realm...
>>>
>>> If this does happen, I think we have to come up with some way to
>>> mitigate it. We're not utilizing the full 16 bits of the command_id, so
>>> maybe we can append something like a generation sequence number that can
>>> be checked for validity.
>>
>> That's actually a great idea. scsi needs unique tags so it encodes the
>> hwq in the upper 16 bits giving the actual tag the lower 16 bits which
>> is more than enough for a single queue. We can do the same with
>> a gencnt that will increment in both submission and completion and we
>> can validate against it.
>>
>> This will be useful for all transports, so maintaining it in
>> nvme_req(rq)->genctr and introducing a helper like:
>> rq = nvme_find_tag(tagset, cqe->command_id)
>> That will filter genctr, locate the request.
>>
>> Also:
>> nvme_validate_request_gen(rq, cqe->command_id) that would
>> compare against it.
>>
>>
>> And then a helper to set the command_id like:
>> cmd->common.command_id = nvme_request_command_id(rq)
>> that will both increment the genctr and build a command_id
>> from it.
>>
>> Thoughts?
>>
>
> Well, that would require a modification to the CQE specification, no?
> fmds was not amused when I proposed that :-(

Why would that require a modification to the CQE? it's just using say
4 msbits of the command_id to a running sequence...

2021-05-07 20:42:20

by Keith Busch

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it

On Fri, May 07, 2021 at 01:26:11PM -0700, Sagi Grimberg wrote:
>
> > > > > I would also mention, that a crash is not even the scariest issue that
> > > > > we can see here, because if the request happened to be reused we are
> > > > > in the silent data corruption realm...
> > > >
> > > > If this does happen, I think we have to come up with some way to
> > > > mitigate it. We're not utilizing the full 16 bits of the command_id, so
> > > > maybe we can append something like a generation sequence number that can
> > > > be checked for validity.
> > >
> > > That's actually a great idea. scsi needs unique tags so it encodes the
> > > hwq in the upper 16 bits giving the actual tag the lower 16 bits which
> > > is more than enough for a single queue. We can do the same with
> > > a gencnt that will increment in both submission and completion and we
> > > can validate against it.
> > >
> > > This will be useful for all transports, so maintaining it in
> > > nvme_req(rq)->genctr and introducing a helper like:
> > > rq = nvme_find_tag(tagset, cqe->command_id)
> > > That will filter genctr, locate the request.
> > >
> > > Also:
> > > nvme_validate_request_gen(rq, cqe->command_id) that would
> > > compare against it.
> > >
> > >
> > > And then a helper to set the command_id like:
> > > cmd->common.command_id = nvme_request_command_id(rq)
> > > that will both increment the genctr and build a command_id
> > > from it.
> > >
> > > Thoughts?
> > >
> >
> > Well, that would require a modification to the CQE specification, no?
> > fmds was not amused when I proposed that :-(
>
> Why would that require a modification to the CQE? it's just using say
> 4 msbits of the command_id to a running sequence...

I think Hannes was under the impression that the counter proposal wasn't
part of the "command_id". The host can encode whatever it wants in that
value, and the controller just has to return the same value.

2021-05-07 23:24:39

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it


>>> Well, that would require a modification to the CQE specification, no?
>>> fmds was not amused when I proposed that :-(
>>
>> Why would that require a modification to the CQE? it's just using say
>> 4 msbits of the command_id to a running sequence...
>
> I think Hannes was under the impression that the counter proposal wasn't
> part of the "command_id". The host can encode whatever it wants in that
> value, and the controller just has to return the same value.

Yea, maybe something like this?
--
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e6612971f4eb..7af48827ea56 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1006,7 +1006,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns,
struct request *req)
return BLK_STS_IOERR;
}

- cmd->common.command_id = req->tag;
+ cmd->common.command_id = nvme_cid(req);
trace_nvme_setup_cmd(req, cmd);
return ret;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 05f31a2c64bb..96abfb0e2ddd 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -158,6 +158,7 @@ enum nvme_quirks {
struct nvme_request {
struct nvme_command *cmd;
union nvme_result result;
+ u8 genctr;
u8 retries;
u8 flags;
u16 status;
@@ -497,6 +498,48 @@ struct nvme_ctrl_ops {
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
};

+/*
+ * nvme command_id is constructed as such:
+ * | xxxx | xxxxxxxxxxxx |
+ * gen request tag
+ */
+#define nvme_cid_install_genctr(gen) ((gen & 0xf) << 12)
+#define nvme_genctr_from_cid(cid) ((cid & 0xf000) >> 12)
+#define nvme_tag_from_cid(cid) (cid & 0xfff)
+
+static inline u16 nvme_cid(struct request *rq)
+{
+ return nvme_cid_install_genctr(nvme_req(rq)->genctr++) | rq->tag;
+}
+
+static inline struct request *nvme_find_rq(struct blk_mq_tags *tags,
+ u16 command_id)
+{
+ u8 genctr = nvme_genctr_from_cid(command_id);
+ u16 tag = nvme_tag_from_cid(command_id);
+ struct request *rq;
+
+ rq = blk_mq_tag_to_rq(tags, tag);
+ if (unlikely(!rq)) {
+ pr_err("could not locate request for tag %#x\n",
+ tag);
+ return NULL;
+ }
+ if (unlikely(nvme_req(rq)->genctr != genctr)) {
+ dev_err(nvme_req(rq)->ctrl->device,
+ "request %#x genctr mismatch (got %#x expected
%#x)\n",
+ tag, nvme_req(rq)->genctr, genctr);
+ return NULL;
+ }
+ return rq;
+}
+
+static inline struct request *nvme_cid_to_rq(struct blk_mq_tags *tags,
+ u16 command_id)
+{
+ return blk_mq_tag_to_rq(tags, nvme_tag_from_cid(command_id));
+}
+
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
const char *dev_name);
@@ -594,7 +637,8 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)

static inline bool nvme_is_aen_req(u16 qid, __u16 command_id)
{
- return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH;
+ return !qid &&
+ nvme_tag_from_cid(command_id) >= NVME_AQ_BLK_MQ_DEPTH;
}

void nvme_complete_rq(struct request *req);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a29b170701fc..92e03f15c9f6 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1017,7 +1017,7 @@ static inline void nvme_handle_cqe(struct
nvme_queue *nvmeq, u16 idx)
return;
}

- req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), command_id);
+ req = nvme_find_rq(nvme_queue_tagset(nvmeq), command_id);
if (unlikely(!req)) {
dev_warn(nvmeq->dev->ctrl.device,
"invalid id %d completed on queue %d\n",
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 203b47a8ec92..ab5b7d175488 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1727,10 +1727,10 @@ static void nvme_rdma_process_nvme_rsp(struct
nvme_rdma_queue *queue,
struct request *rq;
struct nvme_rdma_request *req;

- rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
+ rq = nvme_find_rq(nvme_rdma_tagset(queue), cqe->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "tag 0x%x on QP %#x not found\n",
+ "got bad command_id %#x on QP %#x\n",
cqe->command_id, queue->qp->qp_num);
nvme_rdma_error_recovery(queue->ctrl);
return;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 919f6fe69cb3..c51b70aec6dd 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -488,11 +488,11 @@ static int nvme_tcp_process_nvme_cqe(struct
nvme_tcp_queue *queue,
{
struct request *rq;

- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id);
+ rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "queue %d tag 0x%x not found\n",
- nvme_tcp_queue_id(queue), cqe->command_id);
+ "got bad cqe.command_id %#x on queue %d\n",
+ cqe->command_id, nvme_tcp_queue_id(queue));
nvme_tcp_error_recovery(&queue->ctrl->ctrl);
return -EINVAL;
}
@@ -509,11 +509,11 @@ static int nvme_tcp_handle_c2h_data(struct
nvme_tcp_queue *queue,
{
struct request *rq;

- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
+ "got bad c2hdata.command_id %#x on queue %d\n",
+ pdu->command_id, nvme_tcp_queue_id(queue));
return -ENOENT;
}

@@ -600,7 +600,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct
nvme_tcp_request *req,
data->hdr.plen =
cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
data->ttag = pdu->ttag;
- data->command_id = rq->tag;
+ data->command_id = nvme_cid(rq);
data->data_offset = cpu_to_le32(req->data_sent);
data->data_length = cpu_to_le32(req->pdu_len);
return 0;
@@ -613,11 +613,11 @@ static int nvme_tcp_handle_r2t(struct
nvme_tcp_queue *queue,
struct request *rq;
int ret;

- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
+ "got bad r2t.command_id %#x on queue %d\n",
+ pdu->command_id, nvme_tcp_queue_id(queue));
return -ENOENT;
}
req = blk_mq_rq_to_pdu(rq);
@@ -699,7 +699,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue
*queue, struct sk_buff *skb,
struct nvme_tcp_request *req;
struct request *rq;

- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
req = blk_mq_rq_to_pdu(rq);

while (true) {
@@ -794,8 +794,8 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue
*queue,
}

if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
- struct request *rq =
blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
- pdu->command_id);
+ struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
+ pdu->command_id);

nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
queue->nr_cqe++;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 1b89a6bb819a..9f1f5d572960 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -107,10 +107,10 @@ static void nvme_loop_queue_response(struct
nvmet_req *req)
} else {
struct request *rq;

- rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue),
cqe->command_id);
+ rq = nvme_find_rq(nvme_loop_tagset(queue), cqe->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "tag 0x%x on queue %d not found\n",
+ "got bad command_id %#x on queue %d\n",
cqe->command_id,
nvme_loop_queue_idx(queue));
return;
}
--

2021-05-08 00:05:42

by Keith Busch

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it

On Fri, May 07, 2021 at 04:22:30PM -0700, Sagi Grimberg wrote:
>
> > > > Well, that would require a modification to the CQE specification, no?
> > > > fmds was not amused when I proposed that :-(
> > >
> > > Why would that require a modification to the CQE? it's just using say
> > > 4 msbits of the command_id to a running sequence...
> >
> > I think Hannes was under the impression that the counter proposal wasn't
> > part of the "command_id". The host can encode whatever it wants in that
> > value, and the controller just has to return the same value.
>
> Yea, maybe something like this?

Yes, this looks aligned with what I was thinking. Just one minor problem
below.

> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 05f31a2c64bb..96abfb0e2ddd 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -158,6 +158,7 @@ enum nvme_quirks {
> struct nvme_request {
> struct nvme_command *cmd;
> union nvme_result result;
> + u8 genctr;
> u8 retries;
> u8 flags;
> u16 status;
> @@ -497,6 +498,48 @@ struct nvme_ctrl_ops {
> int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
> };
>
> +/*
> + * nvme command_id is constructed as such:
> + * | xxxx | xxxxxxxxxxxx |
> + * gen request tag
> + */
> +#define nvme_cid_install_genctr(gen) ((gen & 0xf) << 12)
> +#define nvme_genctr_from_cid(cid) ((cid & 0xf000) >> 12)
> +#define nvme_tag_from_cid(cid) (cid & 0xfff)
> +
> +static inline u16 nvme_cid(struct request *rq)
> +{
> + return nvme_cid_install_genctr(nvme_req(rq)->genctr++) | rq->tag;
> +}
> +
> +static inline struct request *nvme_find_rq(struct blk_mq_tags *tags,
> + u16 command_id)
> +{
> + u8 genctr = nvme_genctr_from_cid(command_id);
> + u16 tag = nvme_tag_from_cid(command_id);
> + struct request *rq;
> +
> + rq = blk_mq_tag_to_rq(tags, tag);
> + if (unlikely(!rq)) {
> + pr_err("could not locate request for tag %#x\n",
> + tag);
> + return NULL;
> + }
> + if (unlikely(nvme_req(rq)->genctr != genctr)) {

The nvme_request 'genctr' is 8 bits, but the nvme_genctr_from_cid() can
only return 4 bits, so need to use the same mask for both.

2021-05-09 11:34:11

by Hannes Reinecke

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it

On 5/8/21 1:22 AM, Sagi Grimberg wrote:
>
>>>> Well, that would require a modification to the CQE specification, no?
>>>> fmds was not amused when I proposed that :-(
>>>
>>> Why would that require a modification to the CQE? it's just using say
>>> 4 msbits of the command_id to a running sequence...
>>
>> I think Hannes was under the impression that the counter proposal wasn't
>> part of the "command_id". The host can encode whatever it wants in that
>> value, and the controller just has to return the same value.
>
> Yea, maybe something like this?
> --
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index e6612971f4eb..7af48827ea56 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1006,7 +1006,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns,
> struct request *req)
>                return BLK_STS_IOERR;
>        }
>
> -       cmd->common.command_id = req->tag;
> +       cmd->common.command_id = nvme_cid(req);
>        trace_nvme_setup_cmd(req, cmd);
>        return ret;
> }
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 05f31a2c64bb..96abfb0e2ddd 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -158,6 +158,7 @@ enum nvme_quirks {
> struct nvme_request {
>        struct nvme_command     *cmd;
>        union nvme_result       result;
> +       u8                      genctr;
>        u8                      retries;
>        u8                      flags;
>        u16                     status;
> @@ -497,6 +498,48 @@ struct nvme_ctrl_ops {
>        int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
> };
>
> +/*
> + * nvme command_id is constructed as such:
> + * | xxxx | xxxxxxxxxxxx |
> + *   gen    request tag
> + */
> +#define nvme_cid_install_genctr(gen)           ((gen & 0xf) << 12)
> +#define nvme_genctr_from_cid(cid)              ((cid & 0xf000) >> 12)
> +#define nvme_tag_from_cid(cid)                 (cid & 0xfff)
> +

That is a good idea, but we should ensure to limit the number of
commands a controller can request, too.
As per spec each controller can support a full 32 bit worth of requests,
and if we limit that arbitrarily from the stack we'll need to cap the
number of requests a controller or fabrics driver can request.

Cheers,

Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
[email protected] +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

2021-05-11 18:17:59

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it



On 5/9/21 4:30 AM, Hannes Reinecke wrote:
> On 5/8/21 1:22 AM, Sagi Grimberg wrote:
>>
>>>>> Well, that would require a modification to the CQE specification, no?
>>>>> fmds was not amused when I proposed that :-(
>>>>
>>>> Why would that require a modification to the CQE? it's just using say
>>>> 4 msbits of the command_id to a running sequence...
>>>
>>> I think Hannes was under the impression that the counter proposal wasn't
>>> part of the "command_id". The host can encode whatever it wants in that
>>> value, and the controller just has to return the same value.
>>
>> Yea, maybe something like this?
>> --
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index e6612971f4eb..7af48827ea56 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -1006,7 +1006,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns,
>> struct request *req)
>>                 return BLK_STS_IOERR;
>>         }
>>
>> -       cmd->common.command_id = req->tag;
>> +       cmd->common.command_id = nvme_cid(req);
>>         trace_nvme_setup_cmd(req, cmd);
>>         return ret;
>> }
>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>> index 05f31a2c64bb..96abfb0e2ddd 100644
>> --- a/drivers/nvme/host/nvme.h
>> +++ b/drivers/nvme/host/nvme.h
>> @@ -158,6 +158,7 @@ enum nvme_quirks {
>> struct nvme_request {
>>         struct nvme_command     *cmd;
>>         union nvme_result       result;
>> +       u8                      genctr;
>>         u8                      retries;
>>         u8                      flags;
>>         u16                     status;
>> @@ -497,6 +498,48 @@ struct nvme_ctrl_ops {
>>         int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
>> };
>>
>> +/*
>> + * nvme command_id is constructed as such:
>> + * | xxxx | xxxxxxxxxxxx |
>> + *   gen    request tag
>> + */
>> +#define nvme_cid_install_genctr(gen)           ((gen & 0xf) << 12)
>> +#define nvme_genctr_from_cid(cid)              ((cid & 0xf000) >> 12)
>> +#define nvme_tag_from_cid(cid)                 (cid & 0xfff)
>> +
>
> That is a good idea, but we should ensure to limit the number of
> commands a controller can request, too.

We take the minimum between what the host does vs. what the controller
supports anyways.

> As per spec each controller can support a full 32 bit worth of requests,
> and if we limit that arbitrarily from the stack we'll need to cap the
> number of requests a controller or fabrics driver can request.

NVMF_MAX_QUEUE_SIZE is already 1024, you are right that we also need:
--
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 92e03f15c9f6..66a4a7f7c504 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -60,6 +60,7 @@ MODULE_PARM_DESC(sgl_threshold,
"Use SGLs when average request segment size is larger
or equal to "
"this size. Use 0 to disable SGLs.");

+#define NVME_PCI_MAX_QUEUE_SIZE 4096
static int io_queue_depth_set(const char *val, const struct
kernel_param *kp);
static const struct kernel_param_ops io_queue_depth_ops = {
.set = io_queue_depth_set,
@@ -68,7 +69,7 @@ static const struct kernel_param_ops
io_queue_depth_ops = {

static unsigned int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth,
0644);
-MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and
<= 4096");

static int io_queue_count_set(const char *val, const struct
kernel_param *kp)
{
@@ -164,6 +165,9 @@ static int io_queue_depth_set(const char *val, const
struct kernel_param *kp)
if (ret != 0 || n < 2)
return -EINVAL;

+ if (n > NVME_PCI_MAX_QUEUE_SIZE)
+ return -EINVAL;
+
return param_set_uint(val, kp);
}

--

2021-05-18 09:48:51

by Daniel Wagner

[permalink] [raw]
Subject: Re: [PATCH v2] nvme-tcp: Check if request has started before processing it

Hi Sagi,

On Fri, May 07, 2021 at 04:22:30PM -0700, Sagi Grimberg wrote:
> Yea, maybe something like this?

I did give this a spin with blktests (loopback device) and on real
hardware (FC). Seems to do work fine. Just two things.

> +/*
> + * nvme command_id is constructed as such:
> + * | xxxx | xxxxxxxxxxxx |
> + * gen request tag
> + */
> +#define nvme_cid_install_genctr(gen) ((gen & 0xf) << 12)
> +#define nvme_genctr_from_cid(cid) ((cid & 0xf000) >> 12)
> +#define nvme_tag_from_cid(cid) (cid & 0xfff)
> +
> +static inline u16 nvme_cid(struct request *rq)
> +{
> + return nvme_cid_install_genctr(nvme_req(rq)->genctr++) | rq->tag;
> +}

- return nvme_cid_install_genctr(nvme_req(rq)->genctr++) | rq->tag;
+ nvme_req(rq)->genctr = ++nvme_req(rq)->genctr & 0xf;
+ return nvme_cid_install_genctr(nvme_req(rq)->genctr) | rq->tag;

The first issue, it really needs prefix increment if you want to write
it in one line. And it should store only the first 4 bits.
nvme_find_rq() would complain with 0x0 != 0x10 after the first overflow.

> +static inline struct request *nvme_find_rq(struct blk_mq_tags *tags,
> + u16 command_id)
> +{
> + u8 genctr = nvme_genctr_from_cid(command_id);
> + u16 tag = nvme_tag_from_cid(command_id);
> + struct request *rq;
> +
> + rq = blk_mq_tag_to_rq(tags, tag);
> + if (unlikely(!rq)) {
> + pr_err("could not locate request for tag %#x\n",
> + tag);
> + return NULL;
> + }
> + if (unlikely(nvme_req(rq)->genctr != genctr)) {
> + dev_err(nvme_req(rq)->ctrl->device,
> + "request %#x genctr mismatch (got %#x expected
> %#x)\n",
> + tag, nvme_req(rq)->genctr, genctr);

- tag, nvme_req(rq)->genctr, genctr);
+ tag, genctr, nvme_req(rq)->genctr);

The arguments are in the wrong order. Got me a bit confused.

Are you going to send out a proper patch? I'd like to move things
forward and could offer to do some more testing if needed.

Thanks,
Daniel