2022-08-31 01:46:21

by zhenwei pi

[permalink] [raw]
Subject: [PATCH v2 0/1] nvmet-tcp: Fix NULL pointer dereference during release

v1 -> v2:
- Seperate nvmet_tcp_uninit_data_in_cmds() into nvmet_tcp_uninit_req()
& nvmet_tcp_uninit_data_in_cmds()(free buffers only).

v1:
- Move nvmet_tcp_uninit_data_in_cmds() after nvmet_sq_destroy()

zhenwei pi (1):
nvmet-tcp: Fix NULL pointer dereference during release

drivers/nvme/target/tcp.c | 22 ++++++++++++++++++----
1 file changed, 18 insertions(+), 4 deletions(-)

--
2.20.1


2022-08-31 01:50:22

by zhenwei pi

[permalink] [raw]
Subject: [PATCH v2 1/1] nvmet-tcp: Fix NULL pointer dereference during release

nvmet-tcp frees CMD buffers in nvmet_tcp_uninit_data_in_cmds(),
and waits the inflight IO requests in nvmet_sq_destroy(). During wait
the inflight IO requests, the callback nvmet_tcp_queue_response()
is called from backend after IO complete, this leads a typical
Use-After-Free issue like this:

BUG: kernel NULL pointer dereference, address: 0000000000000008
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
CPU: 14 PID: 207 Comm: kworker/14:1H Kdump: loaded Tainted: G E 6.0.0-rc2.bm.1-amd64 #12
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
Workqueue: nvmet_tcp_wq nvmet_tcp_io_work [nvmet_tcp]
RIP: 0010:shash_ahash_digest+0x2b/0x110
Code: 1f 44 00 00 41 57 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 48 83 ec 08 44 8b 67 30 45 85 e4 74 1c 48 8b 57 38 b8 00 10 00 00 <44> 8b 7a 08 44 29 f8 39 42 0c 0f 46 42 0c 41 39 c4 76 43 48 8b 03
RSP: 0018:ffffc900006e3dd8 EFLAGS: 00010206
RAX: 0000000000001000 RBX: ffff888104ac1650 RCX: 0000000000000000
RDX: 0000000000000000 RSI: ffff888104ac1650 RDI: ffff888104ac1600
RBP: ffff888104ac1600 R08: ffff8881073980c8 R09: ffff8881057798b8
R10: 8080808080808080 R11: 0000000000000000 R12: 0000000000001000
R13: 0000000000000000 R14: ffff88810601a1cc R15: ffff888107398000
FS: 0000000000000000(0000) GS:ffff88823fd80000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000008 CR3: 000000010a8e8000 CR4: 0000000000350ee0
Call Trace:
<TASK>
nvmet_tcp_io_work+0xa1c/0xb1c [nvmet_tcp]
? __switch_to+0x106/0x420
process_one_work+0x1ae/0x380
? process_one_work+0x380/0x380
worker_thread+0x30/0x360
? process_one_work+0x380/0x380
kthread+0xe6/0x110
? kthread_complete_and_exit+0x20/0x20
ret_from_fork+0x1f/0x30

Suggested by Sagi, separate nvmet_tcp_uninit_data_in_cmds() into two
steps:
uninit req <- new step 1
nvmet_sq_destroy();
free CMD buffers <- new step 2

Signed-off-by: zhenwei pi <[email protected]>
---
drivers/nvme/target/tcp.c | 22 ++++++++++++++++++----
1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index dc3b4dc8fe08..3bfd139b1434 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1415,7 +1415,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
nvmet_tcp_free_cmd_buffers(cmd);
}

-static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
+static void nvmet_tcp_uninit_req(struct nvmet_tcp_queue *queue)
{
struct nvmet_tcp_cmd *cmd = queue->cmds;
int i;
@@ -1423,14 +1423,27 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
for (i = 0; i < queue->nr_cmds; i++, cmd++) {
if (nvmet_tcp_need_data_in(cmd))
nvmet_req_uninit(&cmd->req);
+ }
+
+ if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
+ /* failed in connect */
+ nvmet_req_uninit(&queue->connect.req);
+ }
+}
+
+static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmd = queue->cmds;
+ int i;

+ for (i = 0; i < queue->nr_cmds; i++, cmd++) {
nvmet_tcp_unmap_pdu_iovec(cmd);
nvmet_tcp_free_cmd_buffers(cmd);
}

if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
- /* failed in connect */
- nvmet_tcp_finish_cmd(&queue->connect);
+ nvmet_tcp_unmap_pdu_iovec(&queue->connect);
+ nvmet_tcp_free_cmd_buffers(&queue->connect);
}
}

@@ -1449,8 +1462,9 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
/* stop accepting incoming data */
queue->rcv_state = NVMET_TCP_RECV_ERR;

- nvmet_tcp_uninit_data_in_cmds(queue);
+ nvmet_tcp_uninit_req(queue);
nvmet_sq_destroy(&queue->nvme_sq);
+ nvmet_tcp_uninit_data_in_cmds(queue);
cancel_work_sync(&queue->io_work);
sock_release(queue->sock);
nvmet_tcp_free_cmds(queue);
--
2.20.1

2022-08-31 09:48:00

by Sagi Grimberg

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] nvmet-tcp: Fix NULL pointer dereference during release



On 8/31/22 04:34, zhenwei pi wrote:
> nvmet-tcp frees CMD buffers in nvmet_tcp_uninit_data_in_cmds(),
> and waits the inflight IO requests in nvmet_sq_destroy(). During wait
> the inflight IO requests, the callback nvmet_tcp_queue_response()
> is called from backend after IO complete, this leads a typical
> Use-After-Free issue like this:
>
> BUG: kernel NULL pointer dereference, address: 0000000000000008
> #PF: supervisor read access in kernel mode
> #PF: error_code(0x0000) - not-present page
> PGD 0 P4D 0
> Oops: 0000 [#1] PREEMPT SMP NOPTI
> CPU: 14 PID: 207 Comm: kworker/14:1H Kdump: loaded Tainted: G E 6.0.0-rc2.bm.1-amd64 #12
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
> Workqueue: nvmet_tcp_wq nvmet_tcp_io_work [nvmet_tcp]
> RIP: 0010:shash_ahash_digest+0x2b/0x110
> Code: 1f 44 00 00 41 57 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 48 83 ec 08 44 8b 67 30 45 85 e4 74 1c 48 8b 57 38 b8 00 10 00 00 <44> 8b 7a 08 44 29 f8 39 42 0c 0f 46 42 0c 41 39 c4 76 43 48 8b 03
> RSP: 0018:ffffc900006e3dd8 EFLAGS: 00010206
> RAX: 0000000000001000 RBX: ffff888104ac1650 RCX: 0000000000000000
> RDX: 0000000000000000 RSI: ffff888104ac1650 RDI: ffff888104ac1600
> RBP: ffff888104ac1600 R08: ffff8881073980c8 R09: ffff8881057798b8
> R10: 8080808080808080 R11: 0000000000000000 R12: 0000000000001000
> R13: 0000000000000000 R14: ffff88810601a1cc R15: ffff888107398000
> FS: 0000000000000000(0000) GS:ffff88823fd80000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000000000008 CR3: 000000010a8e8000 CR4: 0000000000350ee0
> Call Trace:
> <TASK>
> nvmet_tcp_io_work+0xa1c/0xb1c [nvmet_tcp]
> ? __switch_to+0x106/0x420
> process_one_work+0x1ae/0x380
> ? process_one_work+0x380/0x380
> worker_thread+0x30/0x360
> ? process_one_work+0x380/0x380
> kthread+0xe6/0x110
> ? kthread_complete_and_exit+0x20/0x20
> ret_from_fork+0x1f/0x30
>
> Suggested by Sagi, separate nvmet_tcp_uninit_data_in_cmds() into two
> steps:
> uninit req <- new step 1
> nvmet_sq_destroy();
> free CMD buffers <- new step 2
>
> Signed-off-by: zhenwei pi <[email protected]>
> ---
> drivers/nvme/target/tcp.c | 22 ++++++++++++++++++----
> 1 file changed, 18 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index dc3b4dc8fe08..3bfd139b1434 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -1415,7 +1415,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
> nvmet_tcp_free_cmd_buffers(cmd);
> }
>
> -static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
> +static void nvmet_tcp_uninit_req(struct nvmet_tcp_queue *queue)

The name should reflect what it is doing, so keep it
nvmet_tcp_uninit_data_in_cmds()

> {
> struct nvmet_tcp_cmd *cmd = queue->cmds;
> int i;
> @@ -1423,14 +1423,27 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
> for (i = 0; i < queue->nr_cmds; i++, cmd++) {
> if (nvmet_tcp_need_data_in(cmd))
> nvmet_req_uninit(&cmd->req);
> + }
> +
> + if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
> + /* failed in connect */
> + nvmet_req_uninit(&queue->connect.req);
> + }
> +}
> +
> +static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)

Maybe name this nvmet_tcp_free_cmd_data_in_buffers()

> +{
> + struct nvmet_tcp_cmd *cmd = queue->cmds;
> + int i;
>
> + for (i = 0; i < queue->nr_cmds; i++, cmd++) {

Lets not sweep and call this unconditionally on all cmds,
lets just use the commands that are still waiting for
data from the wire, other commands should complete and
free normally.

> nvmet_tcp_unmap_pdu_iovec(cmd);

You should rebase against the latest nvme tree for 6.0, this
function no longer exists.

> nvmet_tcp_free_cmd_buffers(cmd);
> }
>
> if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
> - /* failed in connect */
> - nvmet_tcp_finish_cmd(&queue->connect);
> + nvmet_tcp_unmap_pdu_iovec(&queue->connect);
> + nvmet_tcp_free_cmd_buffers(&queue->connect);
> }
> }
>
> @@ -1449,8 +1462,9 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
> /* stop accepting incoming data */
> queue->rcv_state = NVMET_TCP_RECV_ERR;
>
> - nvmet_tcp_uninit_data_in_cmds(queue);
> + nvmet_tcp_uninit_req(queue);
> nvmet_sq_destroy(&queue->nvme_sq);
> + nvmet_tcp_uninit_data_in_cmds(queue);
> cancel_work_sync(&queue->io_work);
> sock_release(queue->sock);
> nvmet_tcp_free_cmds(queue);