2023-07-24 11:19:17

by Yun-hao Chung

[permalink] [raw]
Subject: [PATCH v1] Bluetooth: Add timeout in disconnect when power off

For some controllers, it is known that when the HCI disconnect and HCI
Reset are too close to each other, the LMP disconnect command might not
been sent out yet and the command will be dropped by the controoler when
it is asked to reset itself. This could happen on powering off adapter.

One possible issue is that if a connection exists, and then powering off
and on adapter within a short time, then our host stack assumes the
conntection was disconnected but this might not be true, so if we issue
a connection to the peer, it will fail with ACL Already Connected error.

This CL makes the host stack to wait for |HCI_EV_DISCONN_COMPLETE| when
powering off with a configurable timeout unless the timeout is set to 0.

Reviewed-by: Archie Pusaka <[email protected]>
Signed-off-by: Howard Chung <[email protected]>
---
Hi upstream maintainers, this is tested with an AX211 device and Logi
K580 keyboard by the following procedures:
1. pair the peer and stay connected.
2. power off and on immediately
3. observe that the btsnoop log is consistent with the configured
timeout.

include/net/bluetooth/hci_core.h | 1 +
net/bluetooth/hci_core.c | 2 +-
net/bluetooth/hci_sync.c | 38 +++++++++++++++++++++++---------
net/bluetooth/mgmt_config.c | 6 +++++
4 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 8200a6689b39..ce44f9c60059 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -432,6 +432,7 @@ struct hci_dev {
__u16 advmon_allowlist_duration;
__u16 advmon_no_filter_duration;
__u8 enable_advmon_interleave_scan;
+ __u16 discon_on_poweroff_timeout;

__u16 devid_source;
__u16 devid_vendor;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0fefa6788911..769865494f45 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2479,7 +2479,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
hdev->adv_instance_cnt = 0;
hdev->cur_adv_instance = 0x00;
hdev->adv_instance_timeout = 0;
-
+ hdev->discon_on_poweroff_timeout = 0; /* Default to no timeout */
hdev->advmon_allowlist_duration = 300;
hdev->advmon_no_filter_duration = 500;
hdev->enable_advmon_interleave_scan = 0x00; /* Default to disable */
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 3348a1b0e3f7..260e9f05359c 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -5250,6 +5250,8 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
u8 reason)
{
struct hci_cp_disconnect cp;
+ unsigned long timeout;
+ int err;

if (conn->type == AMP_LINK)
return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);
@@ -5258,19 +5260,33 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
cp.handle = cpu_to_le16(conn->handle);
cp.reason = reason;

- /* Wait for HCI_EV_DISCONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
- * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
- * used when suspending or powering off, where we don't want to wait
- * for the peer's response.
+ /* The HCI_ERROR_REMOTE_POWER_OFF is used when suspending or powering off,
+ * so we don't want to waste time waiting for the reply of the peer.
+ * However, if the configuration specified, we'll wait some time to give the
+ * controller chance to actually send the disconnect command.
*/
- if (reason != HCI_ERROR_REMOTE_POWER_OFF)
- return __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
- sizeof(cp), &cp,
- HCI_EV_DISCONN_COMPLETE,
- HCI_CMD_TIMEOUT, NULL);
+ if (reason == HCI_ERROR_REMOTE_POWER_OFF && !hdev->discon_on_poweroff_timeout) {
+ return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ }

- return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp,
- HCI_CMD_TIMEOUT);
+ if (reason == HCI_ERROR_REMOTE_POWER_OFF)
+ timeout = msecs_to_jiffies(hdev->discon_on_poweroff_timeout);
+ else
+ timeout = HCI_CMD_TIMEOUT;
+
+ err = __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
+ sizeof(cp), &cp,
+ HCI_EV_DISCONN_COMPLETE,
+ timeout, NULL);
+
+ /* Ignore the error in suspending or powering off case to avoid the procedure being
+ * aborted.
+ */
+ if (reason == HCI_ERROR_REMOTE_POWER_OFF)
+ return 0;
+
+ return err;
}

static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
index 6ef701c27da4..f3194e3642d9 100644
--- a/net/bluetooth/mgmt_config.c
+++ b/net/bluetooth/mgmt_config.c
@@ -78,6 +78,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
HDEV_PARAM_U16(advmon_allowlist_duration);
HDEV_PARAM_U16(advmon_no_filter_duration);
HDEV_PARAM_U8(enable_advmon_interleave_scan);
+ HDEV_PARAM_U16(discon_on_poweroff_timeout);
} __packed rp = {
TLV_SET_U16(0x0000, def_page_scan_type),
TLV_SET_U16(0x0001, def_page_scan_int),
@@ -111,6 +112,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
TLV_SET_U16(0x001d, advmon_allowlist_duration),
TLV_SET_U16(0x001e, advmon_no_filter_duration),
TLV_SET_U8(0x001f, enable_advmon_interleave_scan),
+ TLV_SET_U16(0x0020, discon_on_poweroff_timeout),
};

bt_dev_dbg(hdev, "sock %p", sk);
@@ -186,6 +188,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
case 0x001b:
case 0x001d:
case 0x001e:
+ case 0x0020:
exp_type_len = sizeof(u16);
break;
case 0x001f:
@@ -314,6 +317,9 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
case 0x0001f:
hdev->enable_advmon_interleave_scan = TLV_GET_U8(buffer);
break;
+ case 0x00020:
+ hdev->discon_on_poweroff_timeout = TLV_GET_LE16(buffer);
+ break;
default:
bt_dev_warn(hdev, "unsupported parameter %u", type);
break;
--
2.41.0.487.g6d72f3e995-goog



2023-07-24 12:15:39

by bluez.test.bot

[permalink] [raw]
Subject: RE: [v1] Bluetooth: Add timeout in disconnect when power off

This is automated email and please do not reply to this email!

Dear submitter,

Thank you for submitting the patches to the linux bluetooth mailing list.
This is a CI test results with your patch series:
PW Link:https://patchwork.kernel.org/project/bluetooth/list/?series=768858

---Test result---

Test Summary:
CheckPatch PASS 1.32 seconds
GitLint PASS 0.30 seconds
SubjectPrefix PASS 0.09 seconds
BuildKernel PASS 40.15 seconds
CheckAllWarning PASS 44.04 seconds
CheckSparse PASS 49.87 seconds
CheckSmatch PASS 134.79 seconds
BuildKernel32 PASS 38.62 seconds
TestRunnerSetup PASS 584.62 seconds
TestRunner_l2cap-tester PASS 28.42 seconds
TestRunner_iso-tester PASS 56.98 seconds
TestRunner_bnep-tester PASS 12.70 seconds
TestRunner_mgmt-tester PASS 247.32 seconds
TestRunner_rfcomm-tester PASS 19.68 seconds
TestRunner_sco-tester PASS 19.99 seconds
TestRunner_ioctl-tester PASS 21.88 seconds
TestRunner_mesh-tester PASS 16.37 seconds
TestRunner_smp-tester PASS 17.27 seconds
TestRunner_userchan-tester PASS 13.62 seconds
IncrementalBuild PASS 36.82 seconds



---
Regards,
Linux Bluetooth

2023-07-24 12:37:00

by Paul Menzel

[permalink] [raw]
Subject: Re: [PATCH v1] Bluetooth: Add timeout in disconnect when power off

Dear Howard,


Thank you for your patch. Some minor nits.

Am 24.07.23 um 13:12 schrieb Howard Chung:
> For some controllers, it is known that when the HCI disconnect and HCI
> Reset are too close to each other, the LMP disconnect command might not
> been sent out yet and the command will be dropped by the controoler when

1. s/been/be/ or *have been*?
2. controller

> it is asked to reset itself. This could happen on powering off adapter.
>
> One possible issue is that if a connection exists, and then powering off
> and on adapter within a short time, then our host stack assumes the

I do not understand the part of the first comma.

> conntection was disconnected but this might not be true, so if we issue

connection

> a connection to the peer, it will fail with ACL Already Connected error.
>
> This CL makes the host stack to wait for |HCI_EV_DISCONN_COMPLETE| when
> powering off with a configurable timeout unless the timeout is set to 0.
>
> Reviewed-by: Archie Pusaka <[email protected]>
> Signed-off-by: Howard Chung <[email protected]>
> ---
> Hi upstream maintainers, this is tested with an AX211 device and Logi
> K580 keyboard by the following procedures:
> 1. pair the peer and stay connected.
> 2. power off and on immediately
> 3. observe that the btsnoop log is consistent with the configured
> timeout.

It’d be great to have this in the commit message.

> include/net/bluetooth/hci_core.h | 1 +
> net/bluetooth/hci_core.c | 2 +-
> net/bluetooth/hci_sync.c | 38 +++++++++++++++++++++++---------
> net/bluetooth/mgmt_config.c | 6 +++++
> 4 files changed, 35 insertions(+), 12 deletions(-)
>
> diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
> index 8200a6689b39..ce44f9c60059 100644
> --- a/include/net/bluetooth/hci_core.h
> +++ b/include/net/bluetooth/hci_core.h
> @@ -432,6 +432,7 @@ struct hci_dev {
> __u16 advmon_allowlist_duration;
> __u16 advmon_no_filter_duration;
> __u8 enable_advmon_interleave_scan;
> + __u16 discon_on_poweroff_timeout;

I’d append the unit to the variable name: `discon_on_poweroff_timeout_ms`.

>
> __u16 devid_source;
> __u16 devid_vendor;
> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> index 0fefa6788911..769865494f45 100644
> --- a/net/bluetooth/hci_core.c
> +++ b/net/bluetooth/hci_core.c
> @@ -2479,7 +2479,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
> hdev->adv_instance_cnt = 0;
> hdev->cur_adv_instance = 0x00;
> hdev->adv_instance_timeout = 0;
> -
> + hdev->discon_on_poweroff_timeout = 0; /* Default to no timeout */
> hdev->advmon_allowlist_duration = 300;
> hdev->advmon_no_filter_duration = 500;
> hdev->enable_advmon_interleave_scan = 0x00; /* Default to disable */
> diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
> index 3348a1b0e3f7..260e9f05359c 100644
> --- a/net/bluetooth/hci_sync.c
> +++ b/net/bluetooth/hci_sync.c
> @@ -5250,6 +5250,8 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
> u8 reason)
> {
> struct hci_cp_disconnect cp;
> + unsigned long timeout;

Ditto.

> + int err;
>
> if (conn->type == AMP_LINK)
> return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);
> @@ -5258,19 +5260,33 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
> cp.handle = cpu_to_le16(conn->handle);
> cp.reason = reason;
>
> - /* Wait for HCI_EV_DISCONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
> - * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
> - * used when suspending or powering off, where we don't want to wait
> - * for the peer's response.
> + /* The HCI_ERROR_REMOTE_POWER_OFF is used when suspending or powering off,
> + * so we don't want to waste time waiting for the reply of the peer.
> + * However, if the configuration specified, we'll wait some time to give the

“if the configuration specified” sounds strange to me.


Kind regards,

Paul


> + * controller chance to actually send the disconnect command.
> */
> - if (reason != HCI_ERROR_REMOTE_POWER_OFF)
> - return __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
> - sizeof(cp), &cp,
> - HCI_EV_DISCONN_COMPLETE,
> - HCI_CMD_TIMEOUT, NULL);
> + if (reason == HCI_ERROR_REMOTE_POWER_OFF && !hdev->discon_on_poweroff_timeout) {
> + return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT,
> + sizeof(cp), &cp, HCI_CMD_TIMEOUT);
> + }
>
> - return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp,
> - HCI_CMD_TIMEOUT);
> + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
> + timeout = msecs_to_jiffies(hdev->discon_on_poweroff_timeout);
> + else
> + timeout = HCI_CMD_TIMEOUT;
> +
> + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
> + sizeof(cp), &cp,
> + HCI_EV_DISCONN_COMPLETE,
> + timeout, NULL);
> +
> + /* Ignore the error in suspending or powering off case to avoid the procedure being
> + * aborted.
> + */
> + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
> + return 0;
> +
> + return err;
> }
>
> static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
> diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
> index 6ef701c27da4..f3194e3642d9 100644
> --- a/net/bluetooth/mgmt_config.c
> +++ b/net/bluetooth/mgmt_config.c
> @@ -78,6 +78,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> HDEV_PARAM_U16(advmon_allowlist_duration);
> HDEV_PARAM_U16(advmon_no_filter_duration);
> HDEV_PARAM_U8(enable_advmon_interleave_scan);
> + HDEV_PARAM_U16(discon_on_poweroff_timeout);
> } __packed rp = {
> TLV_SET_U16(0x0000, def_page_scan_type),
> TLV_SET_U16(0x0001, def_page_scan_int),
> @@ -111,6 +112,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> TLV_SET_U16(0x001d, advmon_allowlist_duration),
> TLV_SET_U16(0x001e, advmon_no_filter_duration),
> TLV_SET_U8(0x001f, enable_advmon_interleave_scan),
> + TLV_SET_U16(0x0020, discon_on_poweroff_timeout),
> };
>
> bt_dev_dbg(hdev, "sock %p", sk);
> @@ -186,6 +188,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> case 0x001b:
> case 0x001d:
> case 0x001e:
> + case 0x0020:
> exp_type_len = sizeof(u16);
> break;
> case 0x001f:
> @@ -314,6 +317,9 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> case 0x0001f:
> hdev->enable_advmon_interleave_scan = TLV_GET_U8(buffer);
> break;
> + case 0x00020:
> + hdev->discon_on_poweroff_timeout = TLV_GET_LE16(buffer);
> + break;
> default:
> bt_dev_warn(hdev, "unsupported parameter %u", type);
> break;

2023-07-24 22:34:11

by Luiz Augusto von Dentz

[permalink] [raw]
Subject: Re: [PATCH v1] Bluetooth: Add timeout in disconnect when power off

Hi Howard,

On Mon, Jul 24, 2023 at 4:12 AM Howard Chung <[email protected]> wrote:
>
> For some controllers, it is known that when the HCI disconnect and HCI
> Reset are too close to each other, the LMP disconnect command might not
> been sent out yet and the command will be dropped by the controoler when
> it is asked to reset itself. This could happen on powering off adapter.
>
> One possible issue is that if a connection exists, and then powering off
> and on adapter within a short time, then our host stack assumes the
> conntection was disconnected but this might not be true, so if we issue
> a connection to the peer, it will fail with ACL Already Connected error.

That sounds more like a bug in the controller though, the spec is
quite clear that it must reset the link-layer state:

BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
page 1972

...HCI_Reset command shall reset the Link Manager, Baseband and Link Layer.

So it sounds like the controller shall perform and the necessary
procedures before it respond with a Command Complete.

> This CL makes the host stack to wait for |HCI_EV_DISCONN_COMPLETE| when
> powering off with a configurable timeout unless the timeout is set to 0.
>
> Reviewed-by: Archie Pusaka <[email protected]>
> Signed-off-by: Howard Chung <[email protected]>
> ---
> Hi upstream maintainers, this is tested with an AX211 device and Logi
> K580 keyboard by the following procedures:
> 1. pair the peer and stay connected.
> 2. power off and on immediately
> 3. observe that the btsnoop log is consistent with the configured
> timeout.
>
> include/net/bluetooth/hci_core.h | 1 +
> net/bluetooth/hci_core.c | 2 +-
> net/bluetooth/hci_sync.c | 38 +++++++++++++++++++++++---------
> net/bluetooth/mgmt_config.c | 6 +++++
> 4 files changed, 35 insertions(+), 12 deletions(-)
>
> diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
> index 8200a6689b39..ce44f9c60059 100644
> --- a/include/net/bluetooth/hci_core.h
> +++ b/include/net/bluetooth/hci_core.h
> @@ -432,6 +432,7 @@ struct hci_dev {
> __u16 advmon_allowlist_duration;
> __u16 advmon_no_filter_duration;
> __u8 enable_advmon_interleave_scan;
> + __u16 discon_on_poweroff_timeout;
>
> __u16 devid_source;
> __u16 devid_vendor;
> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> index 0fefa6788911..769865494f45 100644
> --- a/net/bluetooth/hci_core.c
> +++ b/net/bluetooth/hci_core.c
> @@ -2479,7 +2479,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
> hdev->adv_instance_cnt = 0;
> hdev->cur_adv_instance = 0x00;
> hdev->adv_instance_timeout = 0;
> -
> + hdev->discon_on_poweroff_timeout = 0; /* Default to no timeout */
> hdev->advmon_allowlist_duration = 300;
> hdev->advmon_no_filter_duration = 500;
> hdev->enable_advmon_interleave_scan = 0x00; /* Default to disable */
> diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
> index 3348a1b0e3f7..260e9f05359c 100644
> --- a/net/bluetooth/hci_sync.c
> +++ b/net/bluetooth/hci_sync.c
> @@ -5250,6 +5250,8 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
> u8 reason)
> {
> struct hci_cp_disconnect cp;
> + unsigned long timeout;
> + int err;
>
> if (conn->type == AMP_LINK)
> return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);
> @@ -5258,19 +5260,33 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
> cp.handle = cpu_to_le16(conn->handle);
> cp.reason = reason;
>
> - /* Wait for HCI_EV_DISCONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
> - * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
> - * used when suspending or powering off, where we don't want to wait
> - * for the peer's response.
> + /* The HCI_ERROR_REMOTE_POWER_OFF is used when suspending or powering off,
> + * so we don't want to waste time waiting for the reply of the peer.
> + * However, if the configuration specified, we'll wait some time to give the
> + * controller chance to actually send the disconnect command.
> */
> - if (reason != HCI_ERROR_REMOTE_POWER_OFF)
> - return __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
> - sizeof(cp), &cp,
> - HCI_EV_DISCONN_COMPLETE,
> - HCI_CMD_TIMEOUT, NULL);
> + if (reason == HCI_ERROR_REMOTE_POWER_OFF && !hdev->discon_on_poweroff_timeout) {
> + return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT,
> + sizeof(cp), &cp, HCI_CMD_TIMEOUT);
> + }
>
> - return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp,
> - HCI_CMD_TIMEOUT);
> + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
> + timeout = msecs_to_jiffies(hdev->discon_on_poweroff_timeout);
> + else
> + timeout = HCI_CMD_TIMEOUT;
> +
> + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
> + sizeof(cp), &cp,
> + HCI_EV_DISCONN_COMPLETE,
> + timeout, NULL);
> +
> + /* Ignore the error in suspending or powering off case to avoid the procedure being
> + * aborted.
> + */
> + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
> + return 0;
> +
> + return err;
> }
>
> static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
> diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
> index 6ef701c27da4..f3194e3642d9 100644
> --- a/net/bluetooth/mgmt_config.c
> +++ b/net/bluetooth/mgmt_config.c
> @@ -78,6 +78,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> HDEV_PARAM_U16(advmon_allowlist_duration);
> HDEV_PARAM_U16(advmon_no_filter_duration);
> HDEV_PARAM_U8(enable_advmon_interleave_scan);
> + HDEV_PARAM_U16(discon_on_poweroff_timeout);
> } __packed rp = {
> TLV_SET_U16(0x0000, def_page_scan_type),
> TLV_SET_U16(0x0001, def_page_scan_int),
> @@ -111,6 +112,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> TLV_SET_U16(0x001d, advmon_allowlist_duration),
> TLV_SET_U16(0x001e, advmon_no_filter_duration),
> TLV_SET_U8(0x001f, enable_advmon_interleave_scan),
> + TLV_SET_U16(0x0020, discon_on_poweroff_timeout),
> };
>
> bt_dev_dbg(hdev, "sock %p", sk);
> @@ -186,6 +188,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> case 0x001b:
> case 0x001d:
> case 0x001e:
> + case 0x0020:
> exp_type_len = sizeof(u16);
> break;
> case 0x001f:
> @@ -314,6 +317,9 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> case 0x0001f:
> hdev->enable_advmon_interleave_scan = TLV_GET_U8(buffer);
> break;
> + case 0x00020:
> + hdev->discon_on_poweroff_timeout = TLV_GET_LE16(buffer);
> + break;
> default:
> bt_dev_warn(hdev, "unsupported parameter %u", type);
> break;
> --
> 2.41.0.487.g6d72f3e995-goog
>


--
Luiz Augusto von Dentz

2023-07-25 05:31:02

by Yun-hao Chung

[permalink] [raw]
Subject: Re: [PATCH v1] Bluetooth: Add timeout in disconnect when power off

Thanks Paul for the review. I'll fix them in the next patch.

Hi Luiz,

I think the controller did reset its internal state in the link layer,
so the next time we asked the controller to create a connection to the
same remote, the controller didn't reply the error in the command
complete event but the connection was rejected by the remote. If we
really want the controller to complete the disconnect procedure on
reset, the power-off procedure could be stuck for tens of seconds if
the remote was not responding. A similar problem was found and solved
in this patch [Bluetooth: hci_sync: Don't wait peer's reply when
powering off].

The other reason why we think this is better to be solved in the
software layer is this is not only observed in Intel's AX211 but also
MTK's MT7921 and maybe more.

Since this is a tradeoff problem, I make it a configurable parameter
so that each user can decide a good number from their perspective. If
it is 0, which is the default value, the behavior should remain the
same.

Do you think this sounds good to you?

Thanks,
Howard


On Tue, Jul 25, 2023 at 6:31 AM Luiz Augusto von Dentz
<[email protected]> wrote:
>
> Hi Howard,
>
> On Mon, Jul 24, 2023 at 4:12 AM Howard Chung <[email protected]> wrote:
> >
> > For some controllers, it is known that when the HCI disconnect and HCI
> > Reset are too close to each other, the LMP disconnect command might not
> > been sent out yet and the command will be dropped by the controoler when
> > it is asked to reset itself. This could happen on powering off adapter.
> >
> > One possible issue is that if a connection exists, and then powering off
> > and on adapter within a short time, then our host stack assumes the
> > conntection was disconnected but this might not be true, so if we issue
> > a connection to the peer, it will fail with ACL Already Connected error.
>
> That sounds more like a bug in the controller though, the spec is
> quite clear that it must reset the link-layer state:
>
> BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
> page 1972
>
> ...HCI_Reset command shall reset the Link Manager, Baseband and Link Layer.
>
> So it sounds like the controller shall perform and the necessary
> procedures before it respond with a Command Complete.
>
> > This CL makes the host stack to wait for |HCI_EV_DISCONN_COMPLETE| when
> > powering off with a configurable timeout unless the timeout is set to 0.
> >
> > Reviewed-by: Archie Pusaka <[email protected]>
> > Signed-off-by: Howard Chung <[email protected]>
> > ---
> > Hi upstream maintainers, this is tested with an AX211 device and Logi
> > K580 keyboard by the following procedures:
> > 1. pair the peer and stay connected.
> > 2. power off and on immediately
> > 3. observe that the btsnoop log is consistent with the configured
> > timeout.
> >
> > include/net/bluetooth/hci_core.h | 1 +
> > net/bluetooth/hci_core.c | 2 +-
> > net/bluetooth/hci_sync.c | 38 +++++++++++++++++++++++---------
> > net/bluetooth/mgmt_config.c | 6 +++++
> > 4 files changed, 35 insertions(+), 12 deletions(-)
> >
> > diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
> > index 8200a6689b39..ce44f9c60059 100644
> > --- a/include/net/bluetooth/hci_core.h
> > +++ b/include/net/bluetooth/hci_core.h
> > @@ -432,6 +432,7 @@ struct hci_dev {
> > __u16 advmon_allowlist_duration;
> > __u16 advmon_no_filter_duration;
> > __u8 enable_advmon_interleave_scan;
> > + __u16 discon_on_poweroff_timeout;
> >
> > __u16 devid_source;
> > __u16 devid_vendor;
> > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> > index 0fefa6788911..769865494f45 100644
> > --- a/net/bluetooth/hci_core.c
> > +++ b/net/bluetooth/hci_core.c
> > @@ -2479,7 +2479,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
> > hdev->adv_instance_cnt = 0;
> > hdev->cur_adv_instance = 0x00;
> > hdev->adv_instance_timeout = 0;
> > -
> > + hdev->discon_on_poweroff_timeout = 0; /* Default to no timeout */
> > hdev->advmon_allowlist_duration = 300;
> > hdev->advmon_no_filter_duration = 500;
> > hdev->enable_advmon_interleave_scan = 0x00; /* Default to disable */
> > diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
> > index 3348a1b0e3f7..260e9f05359c 100644
> > --- a/net/bluetooth/hci_sync.c
> > +++ b/net/bluetooth/hci_sync.c
> > @@ -5250,6 +5250,8 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
> > u8 reason)
> > {
> > struct hci_cp_disconnect cp;
> > + unsigned long timeout;
> > + int err;
> >
> > if (conn->type == AMP_LINK)
> > return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);
> > @@ -5258,19 +5260,33 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
> > cp.handle = cpu_to_le16(conn->handle);
> > cp.reason = reason;
> >
> > - /* Wait for HCI_EV_DISCONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
> > - * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
> > - * used when suspending or powering off, where we don't want to wait
> > - * for the peer's response.
> > + /* The HCI_ERROR_REMOTE_POWER_OFF is used when suspending or powering off,
> > + * so we don't want to waste time waiting for the reply of the peer.
> > + * However, if the configuration specified, we'll wait some time to give the
> > + * controller chance to actually send the disconnect command.
> > */
> > - if (reason != HCI_ERROR_REMOTE_POWER_OFF)
> > - return __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
> > - sizeof(cp), &cp,
> > - HCI_EV_DISCONN_COMPLETE,
> > - HCI_CMD_TIMEOUT, NULL);
> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF && !hdev->discon_on_poweroff_timeout) {
> > + return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT,
> > + sizeof(cp), &cp, HCI_CMD_TIMEOUT);
> > + }
> >
> > - return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp,
> > - HCI_CMD_TIMEOUT);
> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
> > + timeout = msecs_to_jiffies(hdev->discon_on_poweroff_timeout);
> > + else
> > + timeout = HCI_CMD_TIMEOUT;
> > +
> > + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
> > + sizeof(cp), &cp,
> > + HCI_EV_DISCONN_COMPLETE,
> > + timeout, NULL);
> > +
> > + /* Ignore the error in suspending or powering off case to avoid the procedure being
> > + * aborted.
> > + */
> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
> > + return 0;
> > +
> > + return err;
> > }
> >
> > static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
> > diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
> > index 6ef701c27da4..f3194e3642d9 100644
> > --- a/net/bluetooth/mgmt_config.c
> > +++ b/net/bluetooth/mgmt_config.c
> > @@ -78,6 +78,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> > HDEV_PARAM_U16(advmon_allowlist_duration);
> > HDEV_PARAM_U16(advmon_no_filter_duration);
> > HDEV_PARAM_U8(enable_advmon_interleave_scan);
> > + HDEV_PARAM_U16(discon_on_poweroff_timeout);
> > } __packed rp = {
> > TLV_SET_U16(0x0000, def_page_scan_type),
> > TLV_SET_U16(0x0001, def_page_scan_int),
> > @@ -111,6 +112,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> > TLV_SET_U16(0x001d, advmon_allowlist_duration),
> > TLV_SET_U16(0x001e, advmon_no_filter_duration),
> > TLV_SET_U8(0x001f, enable_advmon_interleave_scan),
> > + TLV_SET_U16(0x0020, discon_on_poweroff_timeout),
> > };
> >
> > bt_dev_dbg(hdev, "sock %p", sk);
> > @@ -186,6 +188,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> > case 0x001b:
> > case 0x001d:
> > case 0x001e:
> > + case 0x0020:
> > exp_type_len = sizeof(u16);
> > break;
> > case 0x001f:
> > @@ -314,6 +317,9 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> > case 0x0001f:
> > hdev->enable_advmon_interleave_scan = TLV_GET_U8(buffer);
> > break;
> > + case 0x00020:
> > + hdev->discon_on_poweroff_timeout = TLV_GET_LE16(buffer);
> > + break;
> > default:
> > bt_dev_warn(hdev, "unsupported parameter %u", type);
> > break;
> > --
> > 2.41.0.487.g6d72f3e995-goog
> >
>
>
> --
> Luiz Augusto von Dentz

2023-07-25 06:23:48

by Luiz Augusto von Dentz

[permalink] [raw]
Subject: Re: [PATCH v1] Bluetooth: Add timeout in disconnect when power off

Hi Yun-hao,

On Mon, Jul 24, 2023 at 9:56 PM Yun-hao Chung <[email protected]> wrote:
>
> Thanks Paul for the review. I'll fix them in the next patch.
>
> Hi Luiz,
>
> I think the controller did reset its internal state in the link layer, so the next time we asked the controller to create a connection to the same remote, the controller didn't reply the error in the command complete event but the connection was rejected by the remote. If we really want the controller to complete the disconnect procedure on reset, the power-off procedure could be stuck for tens of seconds if the remote was not responding. A similar problem was found and solved in this patch [Bluetooth: hci_sync: Don't wait peer's reply when powering off].
>
> The other reason why we think this is better to be solved in the software layer is this is not only observed in Intel's AX211 but also MTK's MT7921 and maybe more.

Interesting, but in this case shouldn't the link-layer attempt to
cleanup _before_ it reset the state? Because as far I recall doing a
HCI_Reset shall cause a Disconnection even if not initiated by the
host stack with HCI_Disconnect, also there is something really odd
with the fact that the link-layer is generating such an error, because
in case that happens isn't it clear the states are out of sync and it
shall disconnect or something, I'm also curious what would happen if
one quickly toggle power then the same problem should manifest, at
least I don't see a difference between HCI_Reset and power off/on
within the connection supervision timeout.

> Since this is a tradeoff problem, I make it a configurable parameter so that each user can decide a good number from their perspective. If it is 0, which is the default value, the behavior should remain the same.
>
> Do you think this sounds good to you?
>
> Thanks,
> Howard
>
>
>
>
>
>
> On Tue, Jul 25, 2023 at 6:31 AM Luiz Augusto von Dentz <[email protected]> wrote:
>>
>> Hi Howard,
>>
>> On Mon, Jul 24, 2023 at 4:12 AM Howard Chung <[email protected]> wrote:
>> >
>> > For some controllers, it is known that when the HCI disconnect and HCI
>> > Reset are too close to each other, the LMP disconnect command might not
>> > been sent out yet and the command will be dropped by the controoler when
>> > it is asked to reset itself. This could happen on powering off adapter.
>> >
>> > One possible issue is that if a connection exists, and then powering off
>> > and on adapter within a short time, then our host stack assumes the
>> > conntection was disconnected but this might not be true, so if we issue
>> > a connection to the peer, it will fail with ACL Already Connected error.
>>
>> That sounds more like a bug in the controller though, the spec is
>> quite clear that it must reset the link-layer state:
>>
>> BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
>> page 1972
>>
>> ...HCI_Reset command shall reset the Link Manager, Baseband and Link Layer.
>>
>> So it sounds like the controller shall perform and the necessary
>> procedures before it respond with a Command Complete.
>>
>> > This CL makes the host stack to wait for |HCI_EV_DISCONN_COMPLETE| when
>> > powering off with a configurable timeout unless the timeout is set to 0.
>> >
>> > Reviewed-by: Archie Pusaka <[email protected]>
>> > Signed-off-by: Howard Chung <[email protected]>
>> > ---
>> > Hi upstream maintainers, this is tested with an AX211 device and Logi
>> > K580 keyboard by the following procedures:
>> > 1. pair the peer and stay connected.
>> > 2. power off and on immediately
>> > 3. observe that the btsnoop log is consistent with the configured
>> > timeout.
>> >
>> > include/net/bluetooth/hci_core.h | 1 +
>> > net/bluetooth/hci_core.c | 2 +-
>> > net/bluetooth/hci_sync.c | 38 +++++++++++++++++++++++---------
>> > net/bluetooth/mgmt_config.c | 6 +++++
>> > 4 files changed, 35 insertions(+), 12 deletions(-)
>> >
>> > diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
>> > index 8200a6689b39..ce44f9c60059 100644
>> > --- a/include/net/bluetooth/hci_core.h
>> > +++ b/include/net/bluetooth/hci_core.h
>> > @@ -432,6 +432,7 @@ struct hci_dev {
>> > __u16 advmon_allowlist_duration;
>> > __u16 advmon_no_filter_duration;
>> > __u8 enable_advmon_interleave_scan;
>> > + __u16 discon_on_poweroff_timeout;
>> >
>> > __u16 devid_source;
>> > __u16 devid_vendor;
>> > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
>> > index 0fefa6788911..769865494f45 100644
>> > --- a/net/bluetooth/hci_core.c
>> > +++ b/net/bluetooth/hci_core.c
>> > @@ -2479,7 +2479,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
>> > hdev->adv_instance_cnt = 0;
>> > hdev->cur_adv_instance = 0x00;
>> > hdev->adv_instance_timeout = 0;
>> > -
>> > + hdev->discon_on_poweroff_timeout = 0; /* Default to no timeout */
>> > hdev->advmon_allowlist_duration = 300;
>> > hdev->advmon_no_filter_duration = 500;
>> > hdev->enable_advmon_interleave_scan = 0x00; /* Default to disable */
>> > diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
>> > index 3348a1b0e3f7..260e9f05359c 100644
>> > --- a/net/bluetooth/hci_sync.c
>> > +++ b/net/bluetooth/hci_sync.c
>> > @@ -5250,6 +5250,8 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
>> > u8 reason)
>> > {
>> > struct hci_cp_disconnect cp;
>> > + unsigned long timeout;
>> > + int err;
>> >
>> > if (conn->type == AMP_LINK)
>> > return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);
>> > @@ -5258,19 +5260,33 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
>> > cp.handle = cpu_to_le16(conn->handle);
>> > cp.reason = reason;
>> >
>> > - /* Wait for HCI_EV_DISCONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
>> > - * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
>> > - * used when suspending or powering off, where we don't want to wait
>> > - * for the peer's response.
>> > + /* The HCI_ERROR_REMOTE_POWER_OFF is used when suspending or powering off,
>> > + * so we don't want to waste time waiting for the reply of the peer.
>> > + * However, if the configuration specified, we'll wait some time to give the
>> > + * controller chance to actually send the disconnect command.
>> > */
>> > - if (reason != HCI_ERROR_REMOTE_POWER_OFF)
>> > - return __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
>> > - sizeof(cp), &cp,
>> > - HCI_EV_DISCONN_COMPLETE,
>> > - HCI_CMD_TIMEOUT, NULL);
>> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF && !hdev->discon_on_poweroff_timeout) {
>> > + return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT,
>> > + sizeof(cp), &cp, HCI_CMD_TIMEOUT);
>> > + }
>> >
>> > - return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp,
>> > - HCI_CMD_TIMEOUT);
>> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
>> > + timeout = msecs_to_jiffies(hdev->discon_on_poweroff_timeout);
>> > + else
>> > + timeout = HCI_CMD_TIMEOUT;
>> > +
>> > + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
>> > + sizeof(cp), &cp,
>> > + HCI_EV_DISCONN_COMPLETE,
>> > + timeout, NULL);
>> > +
>> > + /* Ignore the error in suspending or powering off case to avoid the procedure being
>> > + * aborted.
>> > + */
>> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
>> > + return 0;
>> > +
>> > + return err;
>> > }
>> >
>> > static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
>> > diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
>> > index 6ef701c27da4..f3194e3642d9 100644
>> > --- a/net/bluetooth/mgmt_config.c
>> > +++ b/net/bluetooth/mgmt_config.c
>> > @@ -78,6 +78,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
>> > HDEV_PARAM_U16(advmon_allowlist_duration);
>> > HDEV_PARAM_U16(advmon_no_filter_duration);
>> > HDEV_PARAM_U8(enable_advmon_interleave_scan);
>> > + HDEV_PARAM_U16(discon_on_poweroff_timeout);
>> > } __packed rp = {
>> > TLV_SET_U16(0x0000, def_page_scan_type),
>> > TLV_SET_U16(0x0001, def_page_scan_int),
>> > @@ -111,6 +112,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
>> > TLV_SET_U16(0x001d, advmon_allowlist_duration),
>> > TLV_SET_U16(0x001e, advmon_no_filter_duration),
>> > TLV_SET_U8(0x001f, enable_advmon_interleave_scan),
>> > + TLV_SET_U16(0x0020, discon_on_poweroff_timeout),
>> > };
>> >
>> > bt_dev_dbg(hdev, "sock %p", sk);
>> > @@ -186,6 +188,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
>> > case 0x001b:
>> > case 0x001d:
>> > case 0x001e:
>> > + case 0x0020:
>> > exp_type_len = sizeof(u16);
>> > break;
>> > case 0x001f:
>> > @@ -314,6 +317,9 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
>> > case 0x0001f:
>> > hdev->enable_advmon_interleave_scan = TLV_GET_U8(buffer);
>> > break;
>> > + case 0x00020:
>> > + hdev->discon_on_poweroff_timeout = TLV_GET_LE16(buffer);
>> > + break;
>> > default:
>> > bt_dev_warn(hdev, "unsupported parameter %u", type);
>> > break;
>> > --
>> > 2.41.0.487.g6d72f3e995-goog
>> >
>>
>>
>> --
>> Luiz Augusto von Dentz



--
Luiz Augusto von Dentz

2023-07-25 07:42:17

by Yun-hao Chung

[permalink] [raw]
Subject: Re: [PATCH v1] Bluetooth: Add timeout in disconnect when power off

On Tue, Jul 25, 2023 at 2:05 PM Luiz Augusto von Dentz
<[email protected]> wrote:
>
> Hi Yun-hao,
>
> On Mon, Jul 24, 2023 at 9:56 PM Yun-hao Chung <[email protected]> wrote:
> >
> > Thanks Paul for the review. I'll fix them in the next patch.
> >
> > Hi Luiz,
> >
> > I think the controller did reset its internal state in the link layer, so the next time we asked the controller to create a connection to the same remote, the controller didn't reply the error in the command complete event but the connection was rejected by the remote. If we really want the controller to complete the disconnect procedure on reset, the power-off procedure could be stuck for tens of seconds if the remote was not responding. A similar problem was found and solved in this patch [Bluetooth: hci_sync: Don't wait peer's reply when powering off].
> >
> > The other reason why we think this is better to be solved in the software layer is this is not only observed in Intel's AX211 but also MTK's MT7921 and maybe more.
>
> Interesting, but in this case shouldn't the link-layer attempt to
> cleanup _before_ it reset the state? Because as far I recall doing a
> HCI_Reset shall cause a Disconnection even if not initiated by the
> host stack with HCI_Disconnect, also there is something really odd
> with the fact that the link-layer is generating such an error, because
> in case that happens isn't it clear the states are out of sync and it
> shall disconnect or something, I'm also curious what would happen if
> one quickly toggle power then the same problem should manifest, at
> least I don't see a difference between HCI_Reset and power off/on
> within the connection supervision timeout.

Do you think it's better to do this instead: Set HCI Supervision
timeout with a configurable or just a small value and then it's more
safe to wait for disconnect complete because the result should be
returned within the supervision timeout.
In this way, we don't need the controller to do a graceful
disconnection on HCI_Reset and the controller state won't be
out-of-sync.

>
> > Since this is a tradeoff problem, I make it a configurable parameter so that each user can decide a good number from their perspective. If it is 0, which is the default value, the behavior should remain the same.
> >
> > Do you think this sounds good to you?
> >
> > Thanks,
> > Howard
> >
> >
> >
> >
> >
> >
> > On Tue, Jul 25, 2023 at 6:31 AM Luiz Augusto von Dentz <[email protected]> wrote:
> >>
> >> Hi Howard,
> >>
> >> On Mon, Jul 24, 2023 at 4:12 AM Howard Chung <[email protected]> wrote:
> >> >
> >> > For some controllers, it is known that when the HCI disconnect and HCI
> >> > Reset are too close to each other, the LMP disconnect command might not
> >> > been sent out yet and the command will be dropped by the controoler when
> >> > it is asked to reset itself. This could happen on powering off adapter.
> >> >
> >> > One possible issue is that if a connection exists, and then powering off
> >> > and on adapter within a short time, then our host stack assumes the
> >> > conntection was disconnected but this might not be true, so if we issue
> >> > a connection to the peer, it will fail with ACL Already Connected error.
> >>
> >> That sounds more like a bug in the controller though, the spec is
> >> quite clear that it must reset the link-layer state:
> >>
> >> BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
> >> page 1972
> >>
> >> ...HCI_Reset command shall reset the Link Manager, Baseband and Link Layer.
> >>
> >> So it sounds like the controller shall perform and the necessary
> >> procedures before it respond with a Command Complete.
> >>
> >> > This CL makes the host stack to wait for |HCI_EV_DISCONN_COMPLETE| when
> >> > powering off with a configurable timeout unless the timeout is set to 0.
> >> >
> >> > Reviewed-by: Archie Pusaka <[email protected]>
> >> > Signed-off-by: Howard Chung <[email protected]>
> >> > ---
> >> > Hi upstream maintainers, this is tested with an AX211 device and Logi
> >> > K580 keyboard by the following procedures:
> >> > 1. pair the peer and stay connected.
> >> > 2. power off and on immediately
> >> > 3. observe that the btsnoop log is consistent with the configured
> >> > timeout.
> >> >
> >> > include/net/bluetooth/hci_core.h | 1 +
> >> > net/bluetooth/hci_core.c | 2 +-
> >> > net/bluetooth/hci_sync.c | 38 +++++++++++++++++++++++---------
> >> > net/bluetooth/mgmt_config.c | 6 +++++
> >> > 4 files changed, 35 insertions(+), 12 deletions(-)
> >> >
> >> > diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
> >> > index 8200a6689b39..ce44f9c60059 100644
> >> > --- a/include/net/bluetooth/hci_core.h
> >> > +++ b/include/net/bluetooth/hci_core.h
> >> > @@ -432,6 +432,7 @@ struct hci_dev {
> >> > __u16 advmon_allowlist_duration;
> >> > __u16 advmon_no_filter_duration;
> >> > __u8 enable_advmon_interleave_scan;
> >> > + __u16 discon_on_poweroff_timeout;
> >> >
> >> > __u16 devid_source;
> >> > __u16 devid_vendor;
> >> > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> >> > index 0fefa6788911..769865494f45 100644
> >> > --- a/net/bluetooth/hci_core.c
> >> > +++ b/net/bluetooth/hci_core.c
> >> > @@ -2479,7 +2479,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
> >> > hdev->adv_instance_cnt = 0;
> >> > hdev->cur_adv_instance = 0x00;
> >> > hdev->adv_instance_timeout = 0;
> >> > -
> >> > + hdev->discon_on_poweroff_timeout = 0; /* Default to no timeout */
> >> > hdev->advmon_allowlist_duration = 300;
> >> > hdev->advmon_no_filter_duration = 500;
> >> > hdev->enable_advmon_interleave_scan = 0x00; /* Default to disable */
> >> > diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
> >> > index 3348a1b0e3f7..260e9f05359c 100644
> >> > --- a/net/bluetooth/hci_sync.c
> >> > +++ b/net/bluetooth/hci_sync.c
> >> > @@ -5250,6 +5250,8 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
> >> > u8 reason)
> >> > {
> >> > struct hci_cp_disconnect cp;
> >> > + unsigned long timeout;
> >> > + int err;
> >> >
> >> > if (conn->type == AMP_LINK)
> >> > return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);
> >> > @@ -5258,19 +5260,33 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
> >> > cp.handle = cpu_to_le16(conn->handle);
> >> > cp.reason = reason;
> >> >
> >> > - /* Wait for HCI_EV_DISCONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
> >> > - * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
> >> > - * used when suspending or powering off, where we don't want to wait
> >> > - * for the peer's response.
> >> > + /* The HCI_ERROR_REMOTE_POWER_OFF is used when suspending or powering off,
> >> > + * so we don't want to waste time waiting for the reply of the peer.
> >> > + * However, if the configuration specified, we'll wait some time to give the
> >> > + * controller chance to actually send the disconnect command.
> >> > */
> >> > - if (reason != HCI_ERROR_REMOTE_POWER_OFF)
> >> > - return __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
> >> > - sizeof(cp), &cp,
> >> > - HCI_EV_DISCONN_COMPLETE,
> >> > - HCI_CMD_TIMEOUT, NULL);
> >> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF && !hdev->discon_on_poweroff_timeout) {
> >> > + return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT,
> >> > + sizeof(cp), &cp, HCI_CMD_TIMEOUT);
> >> > + }
> >> >
> >> > - return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp,
> >> > - HCI_CMD_TIMEOUT);
> >> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
> >> > + timeout = msecs_to_jiffies(hdev->discon_on_poweroff_timeout);
> >> > + else
> >> > + timeout = HCI_CMD_TIMEOUT;
> >> > +
> >> > + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
> >> > + sizeof(cp), &cp,
> >> > + HCI_EV_DISCONN_COMPLETE,
> >> > + timeout, NULL);
> >> > +
> >> > + /* Ignore the error in suspending or powering off case to avoid the procedure being
> >> > + * aborted.
> >> > + */
> >> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
> >> > + return 0;
> >> > +
> >> > + return err;
> >> > }
> >> >
> >> > static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
> >> > diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
> >> > index 6ef701c27da4..f3194e3642d9 100644
> >> > --- a/net/bluetooth/mgmt_config.c
> >> > +++ b/net/bluetooth/mgmt_config.c
> >> > @@ -78,6 +78,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> >> > HDEV_PARAM_U16(advmon_allowlist_duration);
> >> > HDEV_PARAM_U16(advmon_no_filter_duration);
> >> > HDEV_PARAM_U8(enable_advmon_interleave_scan);
> >> > + HDEV_PARAM_U16(discon_on_poweroff_timeout);
> >> > } __packed rp = {
> >> > TLV_SET_U16(0x0000, def_page_scan_type),
> >> > TLV_SET_U16(0x0001, def_page_scan_int),
> >> > @@ -111,6 +112,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> >> > TLV_SET_U16(0x001d, advmon_allowlist_duration),
> >> > TLV_SET_U16(0x001e, advmon_no_filter_duration),
> >> > TLV_SET_U8(0x001f, enable_advmon_interleave_scan),
> >> > + TLV_SET_U16(0x0020, discon_on_poweroff_timeout),
> >> > };
> >> >
> >> > bt_dev_dbg(hdev, "sock %p", sk);
> >> > @@ -186,6 +188,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> >> > case 0x001b:
> >> > case 0x001d:
> >> > case 0x001e:
> >> > + case 0x0020:
> >> > exp_type_len = sizeof(u16);
> >> > break;
> >> > case 0x001f:
> >> > @@ -314,6 +317,9 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> >> > case 0x0001f:
> >> > hdev->enable_advmon_interleave_scan = TLV_GET_U8(buffer);
> >> > break;
> >> > + case 0x00020:
> >> > + hdev->discon_on_poweroff_timeout = TLV_GET_LE16(buffer);
> >> > + break;
> >> > default:
> >> > bt_dev_warn(hdev, "unsupported parameter %u", type);
> >> > break;
> >> > --
> >> > 2.41.0.487.g6d72f3e995-goog
> >> >
> >>
> >>
> >> --
> >> Luiz Augusto von Dentz
>
>
>
> --
> Luiz Augusto von Dentz

2023-07-25 19:06:09

by Luiz Augusto von Dentz

[permalink] [raw]
Subject: Re: [PATCH v1] Bluetooth: Add timeout in disconnect when power off

Hi Yun-hao,

On Tue, Jul 25, 2023 at 12:33 AM Yun-hao Chung <[email protected]> wrote:
>
> On Tue, Jul 25, 2023 at 2:05 PM Luiz Augusto von Dentz
> <[email protected]> wrote:
> >
> > Hi Yun-hao,
> >
> > On Mon, Jul 24, 2023 at 9:56 PM Yun-hao Chung <[email protected]> wrote:
> > >
> > > Thanks Paul for the review. I'll fix them in the next patch.
> > >
> > > Hi Luiz,
> > >
> > > I think the controller did reset its internal state in the link layer, so the next time we asked the controller to create a connection to the same remote, the controller didn't reply the error in the command complete event but the connection was rejected by the remote. If we really want the controller to complete the disconnect procedure on reset, the power-off procedure could be stuck for tens of seconds if the remote was not responding. A similar problem was found and solved in this patch [Bluetooth: hci_sync: Don't wait peer's reply when powering off].
> > >
> > > The other reason why we think this is better to be solved in the software layer is this is not only observed in Intel's AX211 but also MTK's MT7921 and maybe more.
> >
> > Interesting, but in this case shouldn't the link-layer attempt to
> > cleanup _before_ it reset the state? Because as far I recall doing a
> > HCI_Reset shall cause a Disconnection even if not initiated by the
> > host stack with HCI_Disconnect, also there is something really odd
> > with the fact that the link-layer is generating such an error, because
> > in case that happens isn't it clear the states are out of sync and it
> > shall disconnect or something, I'm also curious what would happen if
> > one quickly toggle power then the same problem should manifest, at
> > least I don't see a difference between HCI_Reset and power off/on
> > within the connection supervision timeout.
>
> Do you think it's better to do this instead: Set HCI Supervision
> timeout with a configurable or just a small value and then it's more
> safe to wait for disconnect complete because the result should be
> returned within the supervision timeout.
> In this way, we don't need the controller to do a graceful
> disconnection on HCI_Reset and the controller state won't be
> out-of-sync.

I think it is worth trying at least, not sure if we won't get into the
same situation as to wait for the command to complete though, anyway
the default of supervision timeout is something like 20 seconds which
is way too long compared to our command timeout.

> >
> > > Since this is a tradeoff problem, I make it a configurable parameter so that each user can decide a good number from their perspective. If it is 0, which is the default value, the behavior should remain the same.
> > >
> > > Do you think this sounds good to you?
> > >
> > > Thanks,
> > > Howard
> > >
> > >
> > >
> > >
> > >
> > >
> > > On Tue, Jul 25, 2023 at 6:31 AM Luiz Augusto von Dentz <[email protected]> wrote:
> > >>
> > >> Hi Howard,
> > >>
> > >> On Mon, Jul 24, 2023 at 4:12 AM Howard Chung <[email protected]> wrote:
> > >> >
> > >> > For some controllers, it is known that when the HCI disconnect and HCI
> > >> > Reset are too close to each other, the LMP disconnect command might not
> > >> > been sent out yet and the command will be dropped by the controoler when
> > >> > it is asked to reset itself. This could happen on powering off adapter.
> > >> >
> > >> > One possible issue is that if a connection exists, and then powering off
> > >> > and on adapter within a short time, then our host stack assumes the
> > >> > conntection was disconnected but this might not be true, so if we issue
> > >> > a connection to the peer, it will fail with ACL Already Connected error.
> > >>
> > >> That sounds more like a bug in the controller though, the spec is
> > >> quite clear that it must reset the link-layer state:
> > >>
> > >> BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
> > >> page 1972
> > >>
> > >> ...HCI_Reset command shall reset the Link Manager, Baseband and Link Layer.
> > >>
> > >> So it sounds like the controller shall perform and the necessary
> > >> procedures before it respond with a Command Complete.
> > >>
> > >> > This CL makes the host stack to wait for |HCI_EV_DISCONN_COMPLETE| when
> > >> > powering off with a configurable timeout unless the timeout is set to 0.
> > >> >
> > >> > Reviewed-by: Archie Pusaka <[email protected]>
> > >> > Signed-off-by: Howard Chung <[email protected]>
> > >> > ---
> > >> > Hi upstream maintainers, this is tested with an AX211 device and Logi
> > >> > K580 keyboard by the following procedures:
> > >> > 1. pair the peer and stay connected.
> > >> > 2. power off and on immediately
> > >> > 3. observe that the btsnoop log is consistent with the configured
> > >> > timeout.
> > >> >
> > >> > include/net/bluetooth/hci_core.h | 1 +
> > >> > net/bluetooth/hci_core.c | 2 +-
> > >> > net/bluetooth/hci_sync.c | 38 +++++++++++++++++++++++---------
> > >> > net/bluetooth/mgmt_config.c | 6 +++++
> > >> > 4 files changed, 35 insertions(+), 12 deletions(-)
> > >> >
> > >> > diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
> > >> > index 8200a6689b39..ce44f9c60059 100644
> > >> > --- a/include/net/bluetooth/hci_core.h
> > >> > +++ b/include/net/bluetooth/hci_core.h
> > >> > @@ -432,6 +432,7 @@ struct hci_dev {
> > >> > __u16 advmon_allowlist_duration;
> > >> > __u16 advmon_no_filter_duration;
> > >> > __u8 enable_advmon_interleave_scan;
> > >> > + __u16 discon_on_poweroff_timeout;
> > >> >
> > >> > __u16 devid_source;
> > >> > __u16 devid_vendor;
> > >> > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> > >> > index 0fefa6788911..769865494f45 100644
> > >> > --- a/net/bluetooth/hci_core.c
> > >> > +++ b/net/bluetooth/hci_core.c
> > >> > @@ -2479,7 +2479,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
> > >> > hdev->adv_instance_cnt = 0;
> > >> > hdev->cur_adv_instance = 0x00;
> > >> > hdev->adv_instance_timeout = 0;
> > >> > -
> > >> > + hdev->discon_on_poweroff_timeout = 0; /* Default to no timeout */
> > >> > hdev->advmon_allowlist_duration = 300;
> > >> > hdev->advmon_no_filter_duration = 500;
> > >> > hdev->enable_advmon_interleave_scan = 0x00; /* Default to disable */
> > >> > diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
> > >> > index 3348a1b0e3f7..260e9f05359c 100644
> > >> > --- a/net/bluetooth/hci_sync.c
> > >> > +++ b/net/bluetooth/hci_sync.c
> > >> > @@ -5250,6 +5250,8 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
> > >> > u8 reason)
> > >> > {
> > >> > struct hci_cp_disconnect cp;
> > >> > + unsigned long timeout;
> > >> > + int err;
> > >> >
> > >> > if (conn->type == AMP_LINK)
> > >> > return hci_disconnect_phy_link_sync(hdev, conn->handle, reason);
> > >> > @@ -5258,19 +5260,33 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
> > >> > cp.handle = cpu_to_le16(conn->handle);
> > >> > cp.reason = reason;
> > >> >
> > >> > - /* Wait for HCI_EV_DISCONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
> > >> > - * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
> > >> > - * used when suspending or powering off, where we don't want to wait
> > >> > - * for the peer's response.
> > >> > + /* The HCI_ERROR_REMOTE_POWER_OFF is used when suspending or powering off,
> > >> > + * so we don't want to waste time waiting for the reply of the peer.
> > >> > + * However, if the configuration specified, we'll wait some time to give the
> > >> > + * controller chance to actually send the disconnect command.
> > >> > */
> > >> > - if (reason != HCI_ERROR_REMOTE_POWER_OFF)
> > >> > - return __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
> > >> > - sizeof(cp), &cp,
> > >> > - HCI_EV_DISCONN_COMPLETE,
> > >> > - HCI_CMD_TIMEOUT, NULL);
> > >> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF && !hdev->discon_on_poweroff_timeout) {
> > >> > + return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT,
> > >> > + sizeof(cp), &cp, HCI_CMD_TIMEOUT);
> > >> > + }
> > >> >
> > >> > - return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp,
> > >> > - HCI_CMD_TIMEOUT);
> > >> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
> > >> > + timeout = msecs_to_jiffies(hdev->discon_on_poweroff_timeout);
> > >> > + else
> > >> > + timeout = HCI_CMD_TIMEOUT;
> > >> > +
> > >> > + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT,
> > >> > + sizeof(cp), &cp,
> > >> > + HCI_EV_DISCONN_COMPLETE,
> > >> > + timeout, NULL);
> > >> > +
> > >> > + /* Ignore the error in suspending or powering off case to avoid the procedure being
> > >> > + * aborted.
> > >> > + */
> > >> > + if (reason == HCI_ERROR_REMOTE_POWER_OFF)
> > >> > + return 0;
> > >> > +
> > >> > + return err;
> > >> > }
> > >> >
> > >> > static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
> > >> > diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
> > >> > index 6ef701c27da4..f3194e3642d9 100644
> > >> > --- a/net/bluetooth/mgmt_config.c
> > >> > +++ b/net/bluetooth/mgmt_config.c
> > >> > @@ -78,6 +78,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> > >> > HDEV_PARAM_U16(advmon_allowlist_duration);
> > >> > HDEV_PARAM_U16(advmon_no_filter_duration);
> > >> > HDEV_PARAM_U8(enable_advmon_interleave_scan);
> > >> > + HDEV_PARAM_U16(discon_on_poweroff_timeout);
> > >> > } __packed rp = {
> > >> > TLV_SET_U16(0x0000, def_page_scan_type),
> > >> > TLV_SET_U16(0x0001, def_page_scan_int),
> > >> > @@ -111,6 +112,7 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> > >> > TLV_SET_U16(0x001d, advmon_allowlist_duration),
> > >> > TLV_SET_U16(0x001e, advmon_no_filter_duration),
> > >> > TLV_SET_U8(0x001f, enable_advmon_interleave_scan),
> > >> > + TLV_SET_U16(0x0020, discon_on_poweroff_timeout),
> > >> > };
> > >> >
> > >> > bt_dev_dbg(hdev, "sock %p", sk);
> > >> > @@ -186,6 +188,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> > >> > case 0x001b:
> > >> > case 0x001d:
> > >> > case 0x001e:
> > >> > + case 0x0020:
> > >> > exp_type_len = sizeof(u16);
> > >> > break;
> > >> > case 0x001f:
> > >> > @@ -314,6 +317,9 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
> > >> > case 0x0001f:
> > >> > hdev->enable_advmon_interleave_scan = TLV_GET_U8(buffer);
> > >> > break;
> > >> > + case 0x00020:
> > >> > + hdev->discon_on_poweroff_timeout = TLV_GET_LE16(buffer);
> > >> > + break;
> > >> > default:
> > >> > bt_dev_warn(hdev, "unsupported parameter %u", type);
> > >> > break;
> > >> > --
> > >> > 2.41.0.487.g6d72f3e995-goog
> > >> >
> > >>
> > >>
> > >> --
> > >> Luiz Augusto von Dentz
> >
> >
> >
> > --
> > Luiz Augusto von Dentz



--
Luiz Augusto von Dentz

2023-07-26 10:39:05

by Simon Horman

[permalink] [raw]
Subject: Re: [PATCH v1] Bluetooth: Add timeout in disconnect when power off

On Mon, Jul 24, 2023 at 07:12:04PM +0800, Howard Chung wrote:
> For some controllers, it is known that when the HCI disconnect and HCI
> Reset are too close to each other, the LMP disconnect command might not
> been sent out yet and the command will be dropped by the controoler when

nit: controoler -> controller

...