2023-08-11 23:22:55

by Luiz Augusto von Dentz

[permalink] [raw]
Subject: [PATCH v3] Bluetooth: hci_sync: Fix UAF in hci_disconnect_all_sync

From: Luiz Augusto von Dentz <[email protected]>

Use-after-free can occur in hci_disconnect_all_sync if a connection is
deleted by concurrent processing of a controller event.

To prevent this the code now tries to iterate over the list backwards
to ensure the links are cleanup before its parents, also it no longer
relies on a cursor, instead it always uses the last element since
hci_abort_conn_sync is guaranteed to call hci_conn_del.

UAF crash log:
==================================================================
BUG: KASAN: slab-use-after-free in hci_set_powered_sync
(net/bluetooth/hci_sync.c:5424) [bluetooth]
Read of size 8 at addr ffff888009d9c000 by task kworker/u9:0/124

CPU: 0 PID: 124 Comm: kworker/u9:0 Tainted: G W
6.5.0-rc1+ #10
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
1.16.2-1.fc38 04/01/2014
Workqueue: hci0 hci_cmd_sync_work [bluetooth]
Call Trace:
<TASK>
dump_stack_lvl+0x5b/0x90
print_report+0xcf/0x670
? __virt_addr_valid+0xdd/0x160
? hci_set_powered_sync+0x2c9/0x4a0 [bluetooth]
kasan_report+0xa6/0xe0
? hci_set_powered_sync+0x2c9/0x4a0 [bluetooth]
? __pfx_set_powered_sync+0x10/0x10 [bluetooth]
hci_set_powered_sync+0x2c9/0x4a0 [bluetooth]
? __pfx_hci_set_powered_sync+0x10/0x10 [bluetooth]
? __pfx_lock_release+0x10/0x10
? __pfx_set_powered_sync+0x10/0x10 [bluetooth]
hci_cmd_sync_work+0x137/0x220 [bluetooth]
process_one_work+0x526/0x9d0
? __pfx_process_one_work+0x10/0x10
? __pfx_do_raw_spin_lock+0x10/0x10
? mark_held_locks+0x1a/0x90
worker_thread+0x92/0x630
? __pfx_worker_thread+0x10/0x10
kthread+0x196/0x1e0
? __pfx_kthread+0x10/0x10
ret_from_fork+0x2c/0x50
</TASK>

Allocated by task 1782:
kasan_save_stack+0x33/0x60
kasan_set_track+0x25/0x30
__kasan_kmalloc+0x8f/0xa0
hci_conn_add+0xa5/0xa80 [bluetooth]
hci_bind_cis+0x881/0x9b0 [bluetooth]
iso_connect_cis+0x121/0x520 [bluetooth]
iso_sock_connect+0x3f6/0x790 [bluetooth]
__sys_connect+0x109/0x130
__x64_sys_connect+0x40/0x50
do_syscall_64+0x60/0x90
entry_SYSCALL_64_after_hwframe+0x6e/0xd8

Freed by task 695:
kasan_save_stack+0x33/0x60
kasan_set_track+0x25/0x30
kasan_save_free_info+0x2b/0x50
__kasan_slab_free+0x10a/0x180
__kmem_cache_free+0x14d/0x2e0
device_release+0x5d/0xf0
kobject_put+0xdf/0x270
hci_disconn_complete_evt+0x274/0x3a0 [bluetooth]
hci_event_packet+0x579/0x7e0 [bluetooth]
hci_rx_work+0x287/0xaa0 [bluetooth]
process_one_work+0x526/0x9d0
worker_thread+0x92/0x630
kthread+0x196/0x1e0
ret_from_fork+0x2c/0x50
==================================================================

Fixes: 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier")
Signed-off-by: Pauli Virtanen <[email protected]>
Signed-off-by: Luiz Augusto von Dentz <[email protected]>
---
net/bluetooth/hci_sync.c | 51 ++++++++++++++++++++++++----------------
1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 5eb30ba21370..86e18e09136e 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -5370,6 +5370,7 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
{
int err = 0;
u16 handle = conn->handle;
+ struct hci_conn *c;

switch (conn->state) {
case BT_CONNECTED:
@@ -5389,43 +5390,53 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
hci_dev_unlock(hdev);
return 0;
default:
+ hci_dev_lock(hdev);
conn->state = BT_CLOSED;
+ hci_disconn_cfm(conn, reason);
+ hci_conn_del(conn);
+ hci_dev_unlock(hdev);
return 0;
}

+ /* Check if the connection hasn't been cleanup while waiting
+ * commands to complete.
+ */
+ c = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!c || c != conn)
+ return 0;
+
/* Cleanup hci_conn object if it cannot be cancelled as it
* likelly means the controller and host stack are out of sync
* or in case of LE it was still scanning so it can be cleanup
* safely.
*/
- if (err) {
- struct hci_conn *c;
-
- /* Check if the connection hasn't been cleanup while waiting
- * commands to complete.
- */
- c = hci_conn_hash_lookup_handle(hdev, handle);
- if (!c || c != conn)
- return 0;
-
- hci_dev_lock(hdev);
- hci_conn_failed(conn, err);
- hci_dev_unlock(hdev);
- }
+ hci_dev_lock(hdev);
+ hci_conn_failed(conn, reason);
+ hci_dev_unlock(hdev);

return err;
}

static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason)
{
- struct hci_conn *conn, *tmp;
- int err;
+ struct list_head *head = &hdev->conn_hash.list;
+ struct hci_conn *conn;

- list_for_each_entry_safe(conn, tmp, &hdev->conn_hash.list, list) {
- err = hci_abort_conn_sync(hdev, conn, reason);
- if (err)
- return err;
+ rcu_read_lock();
+ while ((conn = list_first_or_null_rcu(head, struct hci_conn, list))) {
+ /* Make sure the connection is not freed while unlocking */
+ conn = hci_conn_get(conn);
+ rcu_read_unlock();
+ /* Disregard possible errors since hci_conn_del shall have been
+ * called even in case of errors had occurred since it would
+ * then cause hci_conn_failed to be called which calls
+ * hci_conn_del internally.
+ */
+ hci_abort_conn_sync(hdev, conn, reason);
+ hci_conn_put(conn);
+ rcu_read_lock();
}
+ rcu_read_unlock();

return 0;
}
--
2.41.0



2023-08-12 02:37:53

by bluez.test.bot

[permalink] [raw]
Subject: RE: [v3] Bluetooth: hci_sync: Fix UAF in hci_disconnect_all_sync

This is automated email and please do not reply to this email!

Dear submitter,

Thank you for submitting the patches to the linux bluetooth mailing list.
This is a CI test results with your patch series:
PW Link:https://patchwork.kernel.org/project/bluetooth/list/?series=775536

---Test result---

Test Summary:
CheckPatch PASS 0.77 seconds
GitLint PASS 0.32 seconds
SubjectPrefix PASS 0.11 seconds
BuildKernel PASS 38.70 seconds
CheckAllWarning PASS 42.94 seconds
CheckSparse PASS 49.10 seconds
CheckSmatch PASS 132.56 seconds
BuildKernel32 PASS 37.95 seconds
TestRunnerSetup PASS 580.39 seconds
TestRunner_l2cap-tester PASS 33.28 seconds
TestRunner_iso-tester PASS 65.22 seconds
TestRunner_bnep-tester PASS 13.54 seconds
TestRunner_mgmt-tester FAIL 263.32 seconds
TestRunner_rfcomm-tester PASS 20.26 seconds
TestRunner_sco-tester PASS 22.74 seconds
TestRunner_ioctl-tester PASS 22.66 seconds
TestRunner_mesh-tester PASS 16.88 seconds
TestRunner_smp-tester PASS 18.08 seconds
TestRunner_userchan-tester PASS 14.55 seconds
IncrementalBuild PASS 35.85 seconds

Details
##############################
Test: TestRunner_mgmt-tester - FAIL
Desc: Run mgmt-tester with test-runner
Output:
Total: 497, Passed: 495 (99.6%), Failed: 2, Not Run: 0

Failed Test Cases
LL Privacy - Unpair 1 Timed out 1.888 seconds
LL Privacy - Unpair 2 (Remove from AL) Timed out 4.983 seconds


---
Regards,
Linux Bluetooth

2023-08-12 12:14:23

by Pauli Virtanen

[permalink] [raw]
Subject: Re: [PATCH v3] Bluetooth: hci_sync: Fix UAF in hci_disconnect_all_sync

Hi Luiz,

pe, 2023-08-11 kello 16:16 -0700, Luiz Augusto von Dentz kirjoitti:
> From: Luiz Augusto von Dentz <[email protected]>
>
> Use-after-free can occur in hci_disconnect_all_sync if a connection is
> deleted by concurrent processing of a controller event.
>
> To prevent this the code now tries to iterate over the list backwards
> to ensure the links are cleanup before its parents, also it no longer
> relies on a cursor, instead it always uses the last element since
> hci_abort_conn_sync is guaranteed to call hci_conn_del.
>
> UAF crash log:
> ==================================================================
> BUG: KASAN: slab-use-after-free in hci_set_powered_sync
> (net/bluetooth/hci_sync.c:5424) [bluetooth]
> Read of size 8 at addr ffff888009d9c000 by task kworker/u9:0/124
>
> CPU: 0 PID: 124 Comm: kworker/u9:0 Tainted: G W
> 6.5.0-rc1+ #10
> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
> 1.16.2-1.fc38 04/01/2014
> Workqueue: hci0 hci_cmd_sync_work [bluetooth]
> Call Trace:
> <TASK>
> dump_stack_lvl+0x5b/0x90
> print_report+0xcf/0x670
> ? __virt_addr_valid+0xdd/0x160
> ? hci_set_powered_sync+0x2c9/0x4a0 [bluetooth]
> kasan_report+0xa6/0xe0
> ? hci_set_powered_sync+0x2c9/0x4a0 [bluetooth]
> ? __pfx_set_powered_sync+0x10/0x10 [bluetooth]
> hci_set_powered_sync+0x2c9/0x4a0 [bluetooth]
> ? __pfx_hci_set_powered_sync+0x10/0x10 [bluetooth]
> ? __pfx_lock_release+0x10/0x10
> ? __pfx_set_powered_sync+0x10/0x10 [bluetooth]
> hci_cmd_sync_work+0x137/0x220 [bluetooth]
> process_one_work+0x526/0x9d0
> ? __pfx_process_one_work+0x10/0x10
> ? __pfx_do_raw_spin_lock+0x10/0x10
> ? mark_held_locks+0x1a/0x90
> worker_thread+0x92/0x630
> ? __pfx_worker_thread+0x10/0x10
> kthread+0x196/0x1e0
> ? __pfx_kthread+0x10/0x10
> ret_from_fork+0x2c/0x50
> </TASK>
>
> Allocated by task 1782:
> kasan_save_stack+0x33/0x60
> kasan_set_track+0x25/0x30
> __kasan_kmalloc+0x8f/0xa0
> hci_conn_add+0xa5/0xa80 [bluetooth]
> hci_bind_cis+0x881/0x9b0 [bluetooth]
> iso_connect_cis+0x121/0x520 [bluetooth]
> iso_sock_connect+0x3f6/0x790 [bluetooth]
> __sys_connect+0x109/0x130
> __x64_sys_connect+0x40/0x50
> do_syscall_64+0x60/0x90
> entry_SYSCALL_64_after_hwframe+0x6e/0xd8
>
> Freed by task 695:
> kasan_save_stack+0x33/0x60
> kasan_set_track+0x25/0x30
> kasan_save_free_info+0x2b/0x50
> __kasan_slab_free+0x10a/0x180
> __kmem_cache_free+0x14d/0x2e0
> device_release+0x5d/0xf0
> kobject_put+0xdf/0x270
> hci_disconn_complete_evt+0x274/0x3a0 [bluetooth]
> hci_event_packet+0x579/0x7e0 [bluetooth]
> hci_rx_work+0x287/0xaa0 [bluetooth]
> process_one_work+0x526/0x9d0
> worker_thread+0x92/0x630
> kthread+0x196/0x1e0
> ret_from_fork+0x2c/0x50
> ==================================================================
>
> Fixes: 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier")
> Signed-off-by: Pauli Virtanen <[email protected]>
> Signed-off-by: Luiz Augusto von Dentz <[email protected]>
> ---
> net/bluetooth/hci_sync.c | 51 ++++++++++++++++++++++++----------------
> 1 file changed, 31 insertions(+), 20 deletions(-)
>
> diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
> index 5eb30ba21370..86e18e09136e 100644
> --- a/net/bluetooth/hci_sync.c
> +++ b/net/bluetooth/hci_sync.c
> @@ -5370,6 +5370,7 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
> {
> int err = 0;
> u16 handle = conn->handle;
> + struct hci_conn *c;
>
> switch (conn->state) {
> case BT_CONNECTED:
> @@ -5389,43 +5390,53 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
> hci_dev_unlock(hdev);
> return 0;
> default:
> + hci_dev_lock(hdev);
> conn->state = BT_CLOSED;
> + hci_disconn_cfm(conn, reason);
> + hci_conn_del(conn);
> + hci_dev_unlock(hdev);
> return 0;
> }
>
> + /* Check if the connection hasn't been cleanup while waiting
> + * commands to complete.
> + */
> + c = hci_conn_hash_lookup_handle(hdev, handle);
> + if (!c || c != conn)
> + return 0;
> +
> /* Cleanup hci_conn object if it cannot be cancelled as it
> * likelly means the controller and host stack are out of sync
> * or in case of LE it was still scanning so it can be cleanup
> * safely.
> */
> - if (err) {
> - struct hci_conn *c;
> -
> - /* Check if the connection hasn't been cleanup while waiting
> - * commands to complete.
> - */
> - c = hci_conn_hash_lookup_handle(hdev, handle);
> - if (!c || c != conn)
> - return 0;
> -
> - hci_dev_lock(hdev);
> - hci_conn_failed(conn, err);
> - hci_dev_unlock(hdev);
> - }
> + hci_dev_lock(hdev);
> + hci_conn_failed(conn, reason);
> + hci_dev_unlock(hdev);

To me it seems there is a theoretical race here:

task 1 in hci_conn_abort_sync
task 2 in hci_disconn_complete_evt
task 1: c = hci_conn_hash_lookup_handle(conn->handle)
task 2: hci_dev_lock(hdev)
task 1: hci_dev_lock(hdev) /* waits here */
task 2: hci_conn_del(conn) /* conn deleted the first time */
task 2: hci_dev_unlock(hdev)
task 1: hci_conn_failed(conn, err) /* conn deleted again */

Moving hci_dev_lock before hci_conn_hash_lookup_handle avoids this.


Another related theoretical race seems to be in e.g.
hci_acl/sco/isodata_packet which do

hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_handle(hdev, handle);
hci_dev_unlock(hdev);
if (conn)
do_stuff_without_lock(conn);

These tasks run in hdev->workqueue, so it seems like deletion of conn
in hdev->req_workqueue like in hci_conn_abort_sync here could in theory
occur while do_stuff_without_lock is still accessing conn.


I guess in reality it depends on whether the two workqueues are really
running the tasks on different CPU at the same time, and if one can hit
narrow time windows.


>
> return err;
> }
>
> static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason)
> {
> - struct hci_conn *conn, *tmp;
> - int err;
> + struct list_head *head = &hdev->conn_hash.list;
> + struct hci_conn *conn;
>
> - list_for_each_entry_safe(conn, tmp, &hdev->conn_hash.list, list) {
> - err = hci_abort_conn_sync(hdev, conn, reason);
> - if (err)
> - return err;
> + rcu_read_lock();
> + while ((conn = list_first_or_null_rcu(head, struct hci_conn, list))) {
> + /* Make sure the connection is not freed while unlocking */
> + conn = hci_conn_get(conn);
> + rcu_read_unlock();
> + /* Disregard possible errors since hci_conn_del shall have been
> + * called even in case of errors had occurred since it would
> + * then cause hci_conn_failed to be called which calls
> + * hci_conn_del internally.
> + */
> + hci_abort_conn_sync(hdev, conn, reason);
> + hci_conn_put(conn);
> + rcu_read_lock();
> }
> + rcu_read_unlock();
>
> return 0;
> }