2023-06-23 17:32:04

by Pauli Virtanen

[permalink] [raw]
Subject: [PATCH RFC 2/5] Bluetooth: hci_sync: iterate conn_hash safely in hci_disconnect_all_sync

Any element can be removed from conn_hash when no rcu or hdev->lock,
so list_for_each_entry_safe alone is not safe here.

Add conn_hash iteration that uses RCU and takes hci_conn_get to keep
cursors alive, to allow unlocked loop body safely. Since any item may
then be deleted from conn_hash while locks are released, in rare cases
(next item from cursor deleted) the iteration needs to be restarted.

To process each item only once even if restarted, set HCI_CONN_CANCEL in
hci_abort_conn_sync, similarly to what hci_abort_conn does.

Log trace:
==================================================================
BUG: KASAN: slab-use-after-free in hci_set_powered_sync (net/bluetooth/hci_sync.c:5345 net/bluetooth/hci_sync.c:5385 net/bluetooth/hci_sync.c:5397) bluetooth
Read of size 8 at addr ffff88800a4d9000 by task kworker/u5:2/966

Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
Workqueue: hci0 hci_cmd_sync_work [bluetooth]
Call Trace:
<TASK>
dump_stack_lvl (lib/dump_stack.c:108)
print_report (mm/kasan/report.c:352 mm/kasan/report.c:462)
? __virt_addr_valid (./include/linux/mmzone.h:1901 ./include/linux/mmzone.h:1997 arch/x86/mm/physaddr.c:65)
? hci_set_powered_sync (net/bluetooth/hci_sync.c:5345 net/bluetooth/hci_sync.c:5385 net/bluetooth/hci_sync.c:5397) bluetooth
kasan_report (mm/kasan/report.c:574)
? hci_set_powered_sync (net/bluetooth/hci_sync.c:5345 net/bluetooth/hci_sync.c:5385 net/bluetooth/hci_sync.c:5397) bluetooth
hci_set_powered_sync (net/bluetooth/hci_sync.c:5345 net/bluetooth/hci_sync.c:5385 net/bluetooth/hci_sync.c:5397) bluetooth
? __pfx_hci_set_powered_sync (net/bluetooth/hci_sync.c:5393) bluetooth
? set_powered_sync (net/bluetooth/mgmt.c:1369) bluetooth
? __pfx_set_powered_sync (net/bluetooth/mgmt.c:1367) bluetooth
hci_cmd_sync_work (net/bluetooth/hci_sync.c:306) bluetooth
process_one_work (kernel/workqueue.c:2410)
? __pfx_process_one_work (kernel/workqueue.c:2300)
? __pfx_do_raw_spin_lock (kernel/locking/spinlock_debug.c:113)
? mark_held_locks (kernel/locking/lockdep.c:4240)
worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2553)
? __pfx_worker_thread (kernel/workqueue.c:2495)
kthread (kernel/kthread.c:379)
? __pfx_kthread (kernel/kthread.c:332)
ret_from_fork (arch/x86/entry/entry_64.S:314)
</TASK>

Allocated by task 2366:
kasan_save_stack (mm/kasan/common.c:46)
kasan_set_track (mm/kasan/common.c:52)
__kasan_kmalloc (mm/kasan/common.c:374 mm/kasan/common.c:383)
hci_conn_add (./include/linux/slab.h:559 ./include/linux/slab.h:680 net/bluetooth/hci_conn.c:1002) bluetooth
hci_bind_cis (net/bluetooth/hci_conn.c:1908) bluetooth
iso_connect_cis (net/bluetooth/iso.c:383) bluetooth
iso_sock_connect (net/bluetooth/iso.c:890) bluetooth
__sys_connect (./include/linux/file.h:44 net/socket.c:2021)
__x64_sys_connect (net/socket.c:2027)
do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)

Freed by task 2366:
kasan_save_stack (mm/kasan/common.c:46)
kasan_set_track (mm/kasan/common.c:52)
kasan_save_free_info (mm/kasan/generic.c:523)
__kasan_slab_free (mm/kasan/common.c:238 mm/kasan/common.c:200 mm/kasan/common.c:244)
__kmem_cache_free (mm/slub.c:1807 mm/slub.c:3786 mm/slub.c:3799)
device_release (drivers/base/core.c:2489)
kobject_put (lib/kobject.c:683 lib/kobject.c:714 ./include/linux/kref.h:65 lib/kobject.c:731)
__iso_sock_close (net/bluetooth/iso.c:665) bluetooth
iso_sock_release (net/bluetooth/iso.c:686 net/bluetooth/iso.c:1473) bluetooth
__sock_release (net/socket.c:654)
sock_close (net/socket.c:1399)
__fput (fs/file_table.c:322)
task_work_run (kernel/task_work.c:180)
exit_to_user_mode_prepare (./include/linux/resume_user_mode.h:49 kernel/entry/common.c:171 kernel/entry/common.c:204)
syscall_exit_to_user_mode (kernel/entry/common.c:130 kernel/entry/common.c:299)
do_syscall_64 (arch/x86/entry/common.c:87)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
==================================================================

Fixes: 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier")
Signed-off-by: Pauli Virtanen <[email protected]>
---
net/bluetooth/hci_sync.c | 140 +++++++++++++++++++++++++++++++++++----
1 file changed, 127 insertions(+), 13 deletions(-)

diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index afb8e970e62c..46a156b44a8b 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -5276,9 +5276,6 @@ static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
if (test_bit(HCI_CONN_SCANNING, &conn->flags))
return 0;

- if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
- return 0;
-
return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL,
0, NULL, HCI_CMD_TIMEOUT);
}
@@ -5334,6 +5331,14 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
{
int err;

+ /* No hdev->lock: but only accessing dst/type (immutable) and
+ * state/flags here, in worst case we just send some unnecessary
+ * HCI commands.
+ */
+
+ if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
+ return 0;
+
switch (conn->state) {
case BT_CONNECTED:
case BT_CONFIG:
@@ -5342,10 +5347,12 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
err = hci_connect_cancel_sync(hdev, conn);
/* Cleanup hci_conn object if it cannot be cancelled as it
* likelly means the controller and host stack are out of sync.
+ * Watch out for deleted conn in calling conn_failed.
*/
if (err) {
hci_dev_lock(hdev);
- hci_conn_failed(conn, err);
+ if (hci_conn_is_alive(hdev, conn))
+ hci_conn_failed(conn, err);
hci_dev_unlock(hdev);
}
return err;
@@ -5359,20 +5366,125 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
return 0;
}

-static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason)
+typedef bool (*hci_conn_iter_func_t)(struct hci_dev *hdev,
+ struct hci_conn *conn,
+ void *data);
+
+/* Iterate connections with unlocked loop body, allowing concurrent mutation,
+ * holding references to the cursors. If both the cursor and the next item are
+ * deleted while unlocked, this fails with -EBUSY, or optionally retries
+ * iteration from start. Note that hci_conn_cleanup may be running concurrently
+ * or have already completed for the conn, which you need to deal with.
+ */
+static int hci_conn_hash_list_unlocked(struct hci_dev *hdev,
+ bool retry,
+ hci_conn_iter_func_t func,
+ void *data)
{
- struct hci_conn *conn, *tmp;
- int err;
+ struct list_head *head = &hdev->conn_hash.list;
+ struct hci_conn *pos, *prev, *prev_next;

- list_for_each_entry_safe(conn, tmp, &hdev->conn_hash.list, list) {
- err = hci_abort_conn_sync(hdev, conn, reason);
- if (err)
- return err;
+ if (!func)
+ return 0;
+
+again:
+ rcu_read_lock();
+
+ prev = NULL;
+ prev_next = NULL;
+
+ pos = list_first_or_null_rcu(head, struct hci_conn, list);
+ if (pos)
+ hci_conn_get(pos);
+
+ while (pos) {
+ struct list_head *ptr = &pos->list;
+ struct hci_conn *next;
+
+ next = list_next_or_null_rcu(head, ptr, struct hci_conn, list);
+ if (next)
+ hci_conn_get(next);
+
+ rcu_read_unlock();
+
+ /* Can't unref in RCU, so do it here */
+ if (prev) {
+ hci_conn_put(prev);
+ prev = NULL;
+ }
+
+ if (prev_next) {
+ hci_conn_put(prev_next);
+ prev_next = NULL;
+ }
+
+ if (func(hdev, pos, data)) {
+ hci_conn_put(pos);
+ if (next)
+ hci_conn_put(next);
+
+ return 0;
+ }
+
+ rcu_read_lock();
+
+ if (next && !hci_conn_is_alive(hdev, next)) {
+ if (!hci_conn_is_alive(hdev, pos)) {
+ /* Both cursors deleted */
+ rcu_read_unlock();
+ hci_conn_put(pos);
+ hci_conn_put(next);
+
+ if (retry)
+ goto again;
+
+ return -EBUSY;
+ }
+
+ /* Use the other cursor */
+ prev_next = next;
+ next = list_next_or_null_rcu(head, ptr,
+ struct hci_conn, list);
+ if (next)
+ hci_conn_get(next);
+ }
+
+ prev = pos;
+ pos = next;
}

+ rcu_read_unlock();
+
+ if (prev)
+ hci_conn_put(prev);
+ if (prev_next)
+ hci_conn_put(prev_next);
+
return 0;
}

+struct disconnect_all_info {
+ u8 reason;
+ int err;
+};
+
+static bool disconnect_all_sync(struct hci_dev *hdev, struct hci_conn *conn,
+ void *data)
+{
+ struct disconnect_all_info *info = data;
+
+ info->err = hci_abort_conn_sync(hdev, conn, info->reason);
+ return info->err;
+}
+
+static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason)
+{
+ struct disconnect_all_info info = {reason, 0};
+
+ hci_conn_hash_list_unlocked(hdev, true, disconnect_all_sync, &info);
+ return info.err;
+}
+
/* This function perform power off HCI command sequence as follows:
*
* Clear Advertising
@@ -6254,8 +6366,10 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
conn->conn_timeout, NULL);

done:
- if (err == -ETIMEDOUT)
- hci_le_connect_cancel_sync(hdev, conn);
+ if (err == -ETIMEDOUT) {
+ if (!test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
+ hci_le_connect_cancel_sync(hdev, conn);
+ }

/* Re-enable advertising after the connection attempt is finished. */
hci_resume_advertising_sync(hdev);
--
2.41.0