The spec says
7.4.6.1 Digest Error handling
When a host detects a data digest error in a C2HData PDU, that host
shall continue processing C2HData PDUs associated with the command and
when the command processing has completed, if a successful status was
returned by the controller, the host shall fail the command with a
non-fatal transport error.
Currently the transport is reseted when a data digest error is
detected. Instead, when a digest error is detected, mark the final
status as NVME_SC_DATA_XFER_ERROR and let the upper layer handle
the error.
In order to keep track of the final result maintain a status field in
nvme_tcp_request object and use it to overwrite the completion queue
status (which might be successful even though a digest error has been
detected) when completing the request.
Reviewed-by: Hannes Reinecke <[email protected]>
Cc: kernel test robot <[email protected]>
Signed-off-by: Daniel Wagner <[email protected]>
---
v6:
- It was this exact moment when he hit the send button
he knew he forgot something: annonate all req->status
access with cpu_to_le16() calls.
v5:
- https://lore.kernel.org/linux-nvme/[email protected]/ - use __le16 instead of u16 for status type. Reported by lkp
v4:
- https://lore.kernel.org/linux-nvme/[email protected]/
- use req->status directly, avoid local variable. Suggested by Sagi.
v3:
- https://lore.kernel.org/linux-nvme/[email protected]/
- initialize req->status in nvme_tcp_setup_cmd_pdu()
- add rb tag from Hannes
v2:
- https://lore.kernel.org/linux-nvme/[email protected]/
- moved 'status' from nvme_tcp_queue to nvme_tcp_request.
v1:
- https://lore.kernel.org/linux-nvme/[email protected]/
drivers/nvme/host/tcp.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 645025620154..f3235d584121 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -45,6 +45,7 @@ struct nvme_tcp_request {
u32 pdu_len;
u32 pdu_sent;
u16 ttag;
+ __le16 status;
struct list_head entry;
struct llist_node lentry;
__le32 ddgst;
@@ -485,6 +486,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
struct nvme_completion *cqe)
{
+ struct nvme_tcp_request *req;
struct request *rq;
rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
@@ -496,7 +498,11 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
return -EINVAL;
}
- if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
+ req = blk_mq_rq_to_pdu(rq);
+ if (req->status == cpu_to_le16(NVME_SC_SUCCESS))
+ req->status = cqe->status;
+
+ if (!nvme_try_complete_req(rq, req->status, cqe->result))
nvme_complete_rq(rq);
queue->nr_cqe++;
@@ -758,7 +764,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
} else {
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
- nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+ nvme_tcp_end_request(rq, req->status);
queue->nr_cqe++;
}
nvme_tcp_init_recv_ctx(queue);
@@ -788,18 +794,24 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
return 0;
if (queue->recv_ddgst != queue->exp_ddgst) {
+ struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
+ pdu->command_id);
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+
+ req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR);
+
dev_err(queue->ctrl->ctrl.device,
"data digest error: recv %#x expected %#x\n",
le32_to_cpu(queue->recv_ddgst),
le32_to_cpu(queue->exp_ddgst));
- return -EIO;
}
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
pdu->command_id);
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
- nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+ nvme_tcp_end_request(rq, req->status);
queue->nr_cqe++;
}
@@ -2293,6 +2305,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
return ret;
req->state = NVME_TCP_SEND_CMD_PDU;
+ req->status = cpu_to_le16(NVME_SC_SUCCESS);
req->offset = 0;
req->data_sent = 0;
req->pdu_len = 0;
--
2.29.2
> @@ -485,6 +486,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
> static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
> struct nvme_completion *cqe)
> {
> + struct nvme_tcp_request *req;
> struct request *rq;
>
> rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
> @@ -496,7 +498,11 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
> return -EINVAL;
> }
>
> - if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
> + req = blk_mq_rq_to_pdu(rq);
> + if (req->status == cpu_to_le16(NVME_SC_SUCCESS))
endian conversion a zero enum? looks weird..
But,
Reviewed-by: Sagi Grimberg <[email protected]>
On Mon, Aug 30, 2021 at 06:38:28PM +0300, Sagi Grimberg wrote:
> > + if (req->status == cpu_to_le16(NVME_SC_SUCCESS))
>
> endian conversion a zero enum? looks weird..
I though it makes sense to stay consistent with the rest and don't make
it harder for the reader to figure out why this might not needed here.
And obviously the fear of the static code analyzers...
> But,
> Reviewed-by: Sagi Grimberg <[email protected]>
Thanks!
Thanks,
applied to nvme-5.15.