nvmet-tcp frees CMD buffers in nvmet_tcp_uninit_data_in_cmds(),
and waits the inflight IO requests in nvmet_sq_destroy(). During wait
the inflight IO requests, the callback nvmet_tcp_queue_response()
is called from backend after IO complete, this leads a typical
Use-After-Free issue like this:
BUG: kernel NULL pointer dereference, address: 0000000000000008
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 107f80067 P4D 107f80067 PUD 10789e067 PMD 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 123 Comm: kworker/1:1H Kdump: loaded Tainted: G E 6.0.0-rc2.bm.1-amd64 #15
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
Workqueue: nvmet_tcp_wq nvmet_tcp_io_work [nvmet_tcp]
RIP: 0010:shash_ahash_digest+0x2b/0x110
Code: 1f 44 00 00 41 57 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 48 83 ec 08 44 8b 67 30 45 85 e4 74 1c 48 8b 57 38 b8 00 10 00 00 <44> 8b 7a 08 44 29 f8 39 42 0c 0f 46 42 0c 41 39 c4 76 43 48 8b 03
RSP: 0018:ffffc9000051bdd8 EFLAGS: 00010206
RAX: 0000000000001000 RBX: ffff888100ab5470 RCX: 0000000000000000
RDX: 0000000000000000 RSI: ffff888100ab5470 RDI: ffff888100ab5420
RBP: ffff888100ab5420 R08: ffff8881024d08c8 R09: ffff888103e1b4b8
R10: 8080808080808080 R11: 0000000000000000 R12: 0000000000001000
R13: 0000000000000000 R14: ffff88813412bd4c R15: ffff8881024d0800
FS: 0000000000000000(0000) GS:ffff88883fa40000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000008 CR3: 0000000104b48000 CR4: 0000000000350ee0
Call Trace:
<TASK>
nvmet_tcp_io_work+0xa52/0xb52 [nvmet_tcp]
? __switch_to+0x106/0x420
process_one_work+0x1ae/0x380
? process_one_work+0x380/0x380
worker_thread+0x30/0x360
? process_one_work+0x380/0x380
kthread+0xe6/0x110
? kthread_complete_and_exit+0x20/0x20
ret_from_fork+0x1f/0x30
Suggested by Sagi, separate nvmet_tcp_uninit_data_in_cmds() into two
steps:
uninit data in cmds <- new step 1
nvmet_sq_destroy();
cancel_work_sync(&queue->io_work);
free CMD buffers <- new step 2
Signed-off-by: zhenwei pi <[email protected]>
---
drivers/nvme/target/tcp.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index c07de4f4f719..70baeab6af30 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1406,14 +1406,26 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
for (i = 0; i < queue->nr_cmds; i++, cmd++) {
if (nvmet_tcp_need_data_in(cmd))
nvmet_req_uninit(&cmd->req);
-
- nvmet_tcp_free_cmd_buffers(cmd);
}
if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
/* failed in connect */
- nvmet_tcp_finish_cmd(&queue->connect);
+ nvmet_req_uninit(&queue->connect.req);
+ }
+}
+
+static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_cmd *cmd = queue->cmds;
+ int i;
+
+ for (i = 0; i < queue->nr_cmds; i++, cmd++) {
+ if (nvmet_tcp_need_data_in(cmd))
+ nvmet_tcp_free_cmd_buffers(cmd);
}
+
+ if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect))
+ nvmet_tcp_free_cmd_buffers(&queue->connect);
}
static void nvmet_tcp_release_queue_work(struct work_struct *w)
@@ -1434,6 +1446,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
nvmet_tcp_uninit_data_in_cmds(queue);
nvmet_sq_destroy(&queue->nvme_sq);
cancel_work_sync(&queue->io_work);
+ nvmet_tcp_free_cmd_data_in_buffers(queue);
sock_release(queue->sock);
nvmet_tcp_free_cmds(queue);
if (queue->hdr_digest || queue->data_digest)
--
2.20.1
On 9/20/22 16:16, zhenwei pi wrote:
> nvmet-tcp frees CMD buffers in nvmet_tcp_uninit_data_in_cmds(),
> and waits the inflight IO requests in nvmet_sq_destroy(). During wait
> the inflight IO requests, the callback nvmet_tcp_queue_response()
> is called from backend after IO complete, this leads a typical
> Use-After-Free issue like this:
>
> BUG: kernel NULL pointer dereference, address: 0000000000000008
> #PF: supervisor read access in kernel mode
> #PF: error_code(0x0000) - not-present page
> PGD 107f80067 P4D 107f80067 PUD 10789e067 PMD 0
> Oops: 0000 [#1] PREEMPT SMP NOPTI
> CPU: 1 PID: 123 Comm: kworker/1:1H Kdump: loaded Tainted: G E 6.0.0-rc2.bm.1-amd64 #15
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
> Workqueue: nvmet_tcp_wq nvmet_tcp_io_work [nvmet_tcp]
> RIP: 0010:shash_ahash_digest+0x2b/0x110
> Code: 1f 44 00 00 41 57 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 48 83 ec 08 44 8b 67 30 45 85 e4 74 1c 48 8b 57 38 b8 00 10 00 00 <44> 8b 7a 08 44 29 f8 39 42 0c 0f 46 42 0c 41 39 c4 76 43 48 8b 03
> RSP: 0018:ffffc9000051bdd8 EFLAGS: 00010206
> RAX: 0000000000001000 RBX: ffff888100ab5470 RCX: 0000000000000000
> RDX: 0000000000000000 RSI: ffff888100ab5470 RDI: ffff888100ab5420
> RBP: ffff888100ab5420 R08: ffff8881024d08c8 R09: ffff888103e1b4b8
> R10: 8080808080808080 R11: 0000000000000000 R12: 0000000000001000
> R13: 0000000000000000 R14: ffff88813412bd4c R15: ffff8881024d0800
> FS: 0000000000000000(0000) GS:ffff88883fa40000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000000000008 CR3: 0000000104b48000 CR4: 0000000000350ee0
> Call Trace:
> <TASK>
> nvmet_tcp_io_work+0xa52/0xb52 [nvmet_tcp]
> ? __switch_to+0x106/0x420
> process_one_work+0x1ae/0x380
> ? process_one_work+0x380/0x380
> worker_thread+0x30/0x360
> ? process_one_work+0x380/0x380
> kthread+0xe6/0x110
> ? kthread_complete_and_exit+0x20/0x20
> ret_from_fork+0x1f/0x30
>
> Suggested by Sagi, separate nvmet_tcp_uninit_data_in_cmds() into two
> steps:
> uninit data in cmds <- new step 1
> nvmet_sq_destroy();
> cancel_work_sync(&queue->io_work);
> free CMD buffers <- new step 2
>
> Signed-off-by: zhenwei pi <[email protected]>
> ---
> drivers/nvme/target/tcp.c | 19 ++++++++++++++++---
> 1 file changed, 16 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index c07de4f4f719..70baeab6af30 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -1406,14 +1406,26 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
> for (i = 0; i < queue->nr_cmds; i++, cmd++) {
> if (nvmet_tcp_need_data_in(cmd))
> nvmet_req_uninit(&cmd->req);
> -
> - nvmet_tcp_free_cmd_buffers(cmd);
> }
>
> if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
> /* failed in connect */
> - nvmet_tcp_finish_cmd(&queue->connect);
I think that nvmet_tcp_finish_cmd is now redundant and can be removed
if we fold it to its single remaining call-site.
Other than that,
Reviewed-by: Sagi Grimberg <[email protected]>
On 9/20/22 23:34, Sagi Grimberg wrote:
>
>
> On 9/20/22 16:16, zhenwei pi wrote:
>> nvmet-tcp frees CMD buffers in nvmet_tcp_uninit_data_in_cmds(),
>> and waits the inflight IO requests in nvmet_sq_destroy(). During wait
>> the inflight IO requests, the callback nvmet_tcp_queue_response()
>> is called from backend after IO complete, this leads a typical
>> Use-After-Free issue like this:
>>
>> BUG: kernel NULL pointer dereference, address: 0000000000000008
>> #PF: supervisor read access in kernel mode
>> #PF: error_code(0x0000) - not-present page
>> PGD 107f80067 P4D 107f80067 PUD 10789e067 PMD 0
>> Oops: 0000 [#1] PREEMPT SMP NOPTI
>> CPU: 1 PID: 123 Comm: kworker/1:1H Kdump: loaded Tainted:
>> G E 6.0.0-rc2.bm.1-amd64 #15
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
>> rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
>> Workqueue: nvmet_tcp_wq nvmet_tcp_io_work [nvmet_tcp]
>> RIP: 0010:shash_ahash_digest+0x2b/0x110
>> Code: 1f 44 00 00 41 57 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 48
>> 83 ec 08 44 8b 67 30 45 85 e4 74 1c 48 8b 57 38 b8 00 10 00 00 <44> 8b
>> 7a 08 44 29 f8 39 42 0c 0f 46 42 0c 41 39 c4 76 43 48 8b 03
>> RSP: 0018:ffffc9000051bdd8 EFLAGS: 00010206
>> RAX: 0000000000001000 RBX: ffff888100ab5470 RCX: 0000000000000000
>> RDX: 0000000000000000 RSI: ffff888100ab5470 RDI: ffff888100ab5420
>> RBP: ffff888100ab5420 R08: ffff8881024d08c8 R09: ffff888103e1b4b8
>> R10: 8080808080808080 R11: 0000000000000000 R12: 0000000000001000
>> R13: 0000000000000000 R14: ffff88813412bd4c R15: ffff8881024d0800
>> FS: 0000000000000000(0000) GS:ffff88883fa40000(0000)
>> knlGS:0000000000000000
>> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> CR2: 0000000000000008 CR3: 0000000104b48000 CR4: 0000000000350ee0
>> Call Trace:
>> <TASK>
>> nvmet_tcp_io_work+0xa52/0xb52 [nvmet_tcp]
>> ? __switch_to+0x106/0x420
>> process_one_work+0x1ae/0x380
>> ? process_one_work+0x380/0x380
>> worker_thread+0x30/0x360
>> ? process_one_work+0x380/0x380
>> kthread+0xe6/0x110
>> ? kthread_complete_and_exit+0x20/0x20
>> ret_from_fork+0x1f/0x30
>>
>> Suggested by Sagi, separate nvmet_tcp_uninit_data_in_cmds() into two
>> steps:
>> uninit data in cmds <- new step 1
>> nvmet_sq_destroy();
>> cancel_work_sync(&queue->io_work);
>> free CMD buffers <- new step 2
>>
>> Signed-off-by: zhenwei pi <[email protected]>
>> ---
>> drivers/nvme/target/tcp.c | 19 ++++++++++++++++---
>> 1 file changed, 16 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>> index c07de4f4f719..70baeab6af30 100644
>> --- a/drivers/nvme/target/tcp.c
>> +++ b/drivers/nvme/target/tcp.c
>> @@ -1406,14 +1406,26 @@ static void
>> nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
>> for (i = 0; i < queue->nr_cmds; i++, cmd++) {
>> if (nvmet_tcp_need_data_in(cmd))
>> nvmet_req_uninit(&cmd->req);
>> -
>> - nvmet_tcp_free_cmd_buffers(cmd);
>> }
>> if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
>> /* failed in connect */
>> - nvmet_tcp_finish_cmd(&queue->connect);
>
> I think that nvmet_tcp_finish_cmd is now redundant and can be removed
> if we fold it to its single remaining call-site.
>
Hi, Sagi
What about letting this patch fix the kernel panic only, and I'll send a
followup patch to remove nvmet_tcp_finish_cmd after this patch applies?
> Other than that,
> Reviewed-by: Sagi Grimberg <[email protected]>
--
zhenwei pi
> Hi, Sagi
>
> What about letting this patch fix the kernel panic only, and I'll send a
> followup patch to remove nvmet_tcp_finish_cmd after this patch applies?
That is fine with me.
>
>> Other than that,
>> Reviewed-by: Sagi Grimberg <[email protected]>