2014-06-01 01:38:22

by Marcel Holtmann

[permalink] [raw]
Subject: [RFC] Bluetooth: Use struct delayed_work for HCI command timeout

This is an experimental patch that converts hdev->cmd_timer from
struct timer_list to struct delayed_work.

Signed-off-by: Marcel Holtmann <[email protected]>
---
include/net/bluetooth/hci_core.h | 2 +-
net/bluetooth/hci_core.c | 17 +++++++++--------
net/bluetooth/hci_event.c | 4 ++--
3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index b386bf17e6c2..de3bb22e83f9 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -273,7 +273,7 @@ struct hci_dev {

struct delayed_work service_cache;

- struct timer_list cmd_timer;
+ struct delayed_work cmd_timer;

struct work_struct rx_work;
struct work_struct cmd_work;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0a43cce9a914..9e0368b02a11 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2432,7 +2432,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
hci_req_lock(hdev);

if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
- del_timer_sync(&hdev->cmd_timer);
+ cancel_delayed_work_sync(&hdev->cmd_timer);
hci_req_unlock(hdev);
return 0;
}
@@ -2488,7 +2488,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)

/* Drop last sent command */
if (hdev->sent_cmd) {
- del_timer_sync(&hdev->cmd_timer);
+ cancel_delayed_work_sync(&hdev->cmd_timer);
kfree_skb(hdev->sent_cmd);
hdev->sent_cmd = NULL;
}
@@ -3205,9 +3205,10 @@ void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type)
}

/* HCI command timer function */
-static void hci_cmd_timeout(unsigned long arg)
+static void hci_cmd_timeout(struct work_struct *work)
{
- struct hci_dev *hdev = (void *) arg;
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ cmd_timer.work);

if (hdev->sent_cmd) {
struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
@@ -3884,7 +3885,7 @@ struct hci_dev *hci_alloc_dev(void)

init_waitqueue_head(&hdev->req_wait_q);

- setup_timer(&hdev->cmd_timer, hci_cmd_timeout, (unsigned long) hdev);
+ INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout);

hci_init_sysfs(hdev);
discovery_init(hdev);
@@ -5287,10 +5288,10 @@ static void hci_cmd_work(struct work_struct *work)
atomic_dec(&hdev->cmd_cnt);
hci_send_frame(hdev, skb);
if (test_bit(HCI_RESET, &hdev->flags))
- del_timer(&hdev->cmd_timer);
+ cancel_delayed_work(&hdev->cmd_timer);
else
- mod_timer(&hdev->cmd_timer,
- jiffies + HCI_CMD_TIMEOUT);
+ schedule_delayed_work(&hdev->cmd_timer,
+ HCI_CMD_TIMEOUT);
} else {
skb_queue_head(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6cf9596ff69b..4f942ea43cd8 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2705,7 +2705,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
}

if (opcode != HCI_OP_NOP)
- del_timer(&hdev->cmd_timer);
+ cancel_delayed_work(&hdev->cmd_timer);

hci_req_cmd_complete(hdev, opcode, status);

@@ -2796,7 +2796,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
}

if (opcode != HCI_OP_NOP)
- del_timer(&hdev->cmd_timer);
+ cancel_delayed_work(&hdev->cmd_timer);

if (ev->status ||
(hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
--
1.9.3



2014-06-12 09:39:05

by Lukasz Rymanowski

[permalink] [raw]
Subject: Re: [RFC] Bluetooth: Use struct delayed_work for HCI command timeout

Hi Marcel,

On Thu, Jun 12, 2014 at 11:25 AM, Lukasz Rymanowski
<[email protected]> wrote:
> Hi Marcel,
>
> On Sun, Jun 1, 2014 at 3:38 AM, Marcel Holtmann <[email protected]> wrote:
>> This is an experimental patch that converts hdev->cmd_timer from
>> struct timer_list to struct delayed_work.
>>
>> Signed-off-by: Marcel Holtmann <[email protected]>
>> ---
>> include/net/bluetooth/hci_core.h | 2 +-
>> net/bluetooth/hci_core.c | 17 +++++++++--------
>> net/bluetooth/hci_event.c | 4 ++--
>> 3 files changed, 12 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
>> index b386bf17e6c2..de3bb22e83f9 100644
>> --- a/include/net/bluetooth/hci_core.h
>> +++ b/include/net/bluetooth/hci_core.h
>> @@ -273,7 +273,7 @@ struct hci_dev {
>>
>> struct delayed_work service_cache;
>>
>> - struct timer_list cmd_timer;
>> + struct delayed_work cmd_timer;
>>
>> struct work_struct rx_work;
>> struct work_struct cmd_work;
>> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
>> index 0a43cce9a914..9e0368b02a11 100644
>> --- a/net/bluetooth/hci_core.c
>> +++ b/net/bluetooth/hci_core.c
>> @@ -2432,7 +2432,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
>> hci_req_lock(hdev);
>>
>> if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
>> - del_timer_sync(&hdev->cmd_timer);
>> + cancel_delayed_work_sync(&hdev->cmd_timer);
>> hci_req_unlock(hdev);
>> return 0;
>> }
>> @@ -2488,7 +2488,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
>>
>> /* Drop last sent command */
>> if (hdev->sent_cmd) {
>> - del_timer_sync(&hdev->cmd_timer);
>> + cancel_delayed_work_sync(&hdev->cmd_timer);
>> kfree_skb(hdev->sent_cmd);
>> hdev->sent_cmd = NULL;
>> }
>> @@ -3205,9 +3205,10 @@ void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type)
>> }
>>
>> /* HCI command timer function */
>> -static void hci_cmd_timeout(unsigned long arg)
>> +static void hci_cmd_timeout(struct work_struct *work)
>> {
>> - struct hci_dev *hdev = (void *) arg;
>> + struct hci_dev *hdev = container_of(work, struct hci_dev,
>> + cmd_timer.work);
>>
>> if (hdev->sent_cmd) {
>> struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
>> @@ -3884,7 +3885,7 @@ struct hci_dev *hci_alloc_dev(void)
>>
>> init_waitqueue_head(&hdev->req_wait_q);
>>
>> - setup_timer(&hdev->cmd_timer, hci_cmd_timeout, (unsigned long) hdev);
>> + INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout);
>>
>> hci_init_sysfs(hdev);
>> discovery_init(hdev);
>> @@ -5287,10 +5288,10 @@ static void hci_cmd_work(struct work_struct *work)
>> atomic_dec(&hdev->cmd_cnt);
>> hci_send_frame(hdev, skb);
>> if (test_bit(HCI_RESET, &hdev->flags))
>> - del_timer(&hdev->cmd_timer);
>> + cancel_delayed_work(&hdev->cmd_timer);
>> else
>> - mod_timer(&hdev->cmd_timer,
>> - jiffies + HCI_CMD_TIMEOUT);
>> + schedule_delayed_work(&hdev->cmd_timer,
>> + HCI_CMD_TIMEOUT);
>> } else {
>> skb_queue_head(&hdev->cmd_q, skb);
>> queue_work(hdev->workqueue, &hdev->cmd_work);
>> diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
>> index 6cf9596ff69b..4f942ea43cd8 100644
>> --- a/net/bluetooth/hci_event.c
>> +++ b/net/bluetooth/hci_event.c
>> @@ -2705,7 +2705,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
>> }
>>
>> if (opcode != HCI_OP_NOP)
>> - del_timer(&hdev->cmd_timer);
>> + cancel_delayed_work(&hdev->cmd_timer);
>>
>> hci_req_cmd_complete(hdev, opcode, status);
>>
>> @@ -2796,7 +2796,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
>> }
>>
>> if (opcode != HCI_OP_NOP)
>> - del_timer(&hdev->cmd_timer);
>> + cancel_delayed_work(&hdev->cmd_timer);
>>
>> if (ev->status ||
>> (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
>> --
>> 1.9.3
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-bluetooth" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
> When I start to use "[PATCH v3] Bluetooth: Fix for ACL disconnect when
> pairing fails" I was facing
> hci tx timeout on disconnect command with some chips even I saw in
> btmon that Command Status on disconnect command came on time.
>
> After I apply this patch, that issue has gone.
>

Looks like issue is still there. Ignore previous email.

[ 2826.718006] [613] l2cap_disconn_ind: hcon ffff8800c41ed000
[ 2826.718007] [613] hci_disconnect: hcon ffff8800c41ed000
[ 2826.718009] [613] hci_send_cmd: hci0 opcode 0x0406 plen 3
[ 2826.718013] [613] hci_prepare_cmd: skb len 6
[ 2826.718018] [613] hci_cmd_work: hci0 cmd_cnt 1 cmd queued 1
[ 2826.718024] [613] hci_send_frame: hci0 type 1 len 6
[ 2826.718026] [613] hci_send_to_monitor: hdev ffff8801961da000 len 6
[ 2826.718091] [6157] hci_sock_recvmsg: sock ffff8801d19ead00, sk
ffff88016929dc00
[ 2826.718136] [6157] hci_sock_recvmsg: sock ffff8801d19ead00, sk
ffff88016929dc00
[ 2826.719486] [613] hci_rx_work: hci0
[ 2826.719496] [613] hci_send_to_monitor: hdev ffff8801961da000 len 6
[ 2826.719523] [613] hci_rx_work: hci0 Event packet
[ 2826.719554] [6157] hci_sock_recvmsg: sock ffff8801d19ead00, sk
ffff88016929dc00
[ 2826.719589] [6157] hci_sock_recvmsg: sock ffff8801d19ead00, sk
ffff88016929dc00
[ 2828.724246] Bluetooth: hci0 command 0x0406 tx timeout

> \Lukasz

\Lukasz

2014-06-12 09:25:45

by Lukasz Rymanowski

[permalink] [raw]
Subject: Re: [RFC] Bluetooth: Use struct delayed_work for HCI command timeout

Hi Marcel,

On Sun, Jun 1, 2014 at 3:38 AM, Marcel Holtmann <[email protected]> wrote:
> This is an experimental patch that converts hdev->cmd_timer from
> struct timer_list to struct delayed_work.
>
> Signed-off-by: Marcel Holtmann <[email protected]>
> ---
> include/net/bluetooth/hci_core.h | 2 +-
> net/bluetooth/hci_core.c | 17 +++++++++--------
> net/bluetooth/hci_event.c | 4 ++--
> 3 files changed, 12 insertions(+), 11 deletions(-)
>
> diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
> index b386bf17e6c2..de3bb22e83f9 100644
> --- a/include/net/bluetooth/hci_core.h
> +++ b/include/net/bluetooth/hci_core.h
> @@ -273,7 +273,7 @@ struct hci_dev {
>
> struct delayed_work service_cache;
>
> - struct timer_list cmd_timer;
> + struct delayed_work cmd_timer;
>
> struct work_struct rx_work;
> struct work_struct cmd_work;
> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> index 0a43cce9a914..9e0368b02a11 100644
> --- a/net/bluetooth/hci_core.c
> +++ b/net/bluetooth/hci_core.c
> @@ -2432,7 +2432,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
> hci_req_lock(hdev);
>
> if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
> - del_timer_sync(&hdev->cmd_timer);
> + cancel_delayed_work_sync(&hdev->cmd_timer);
> hci_req_unlock(hdev);
> return 0;
> }
> @@ -2488,7 +2488,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
>
> /* Drop last sent command */
> if (hdev->sent_cmd) {
> - del_timer_sync(&hdev->cmd_timer);
> + cancel_delayed_work_sync(&hdev->cmd_timer);
> kfree_skb(hdev->sent_cmd);
> hdev->sent_cmd = NULL;
> }
> @@ -3205,9 +3205,10 @@ void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type)
> }
>
> /* HCI command timer function */
> -static void hci_cmd_timeout(unsigned long arg)
> +static void hci_cmd_timeout(struct work_struct *work)
> {
> - struct hci_dev *hdev = (void *) arg;
> + struct hci_dev *hdev = container_of(work, struct hci_dev,
> + cmd_timer.work);
>
> if (hdev->sent_cmd) {
> struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
> @@ -3884,7 +3885,7 @@ struct hci_dev *hci_alloc_dev(void)
>
> init_waitqueue_head(&hdev->req_wait_q);
>
> - setup_timer(&hdev->cmd_timer, hci_cmd_timeout, (unsigned long) hdev);
> + INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout);
>
> hci_init_sysfs(hdev);
> discovery_init(hdev);
> @@ -5287,10 +5288,10 @@ static void hci_cmd_work(struct work_struct *work)
> atomic_dec(&hdev->cmd_cnt);
> hci_send_frame(hdev, skb);
> if (test_bit(HCI_RESET, &hdev->flags))
> - del_timer(&hdev->cmd_timer);
> + cancel_delayed_work(&hdev->cmd_timer);
> else
> - mod_timer(&hdev->cmd_timer,
> - jiffies + HCI_CMD_TIMEOUT);
> + schedule_delayed_work(&hdev->cmd_timer,
> + HCI_CMD_TIMEOUT);
> } else {
> skb_queue_head(&hdev->cmd_q, skb);
> queue_work(hdev->workqueue, &hdev->cmd_work);
> diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
> index 6cf9596ff69b..4f942ea43cd8 100644
> --- a/net/bluetooth/hci_event.c
> +++ b/net/bluetooth/hci_event.c
> @@ -2705,7 +2705,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
> }
>
> if (opcode != HCI_OP_NOP)
> - del_timer(&hdev->cmd_timer);
> + cancel_delayed_work(&hdev->cmd_timer);
>
> hci_req_cmd_complete(hdev, opcode, status);
>
> @@ -2796,7 +2796,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
> }
>
> if (opcode != HCI_OP_NOP)
> - del_timer(&hdev->cmd_timer);
> + cancel_delayed_work(&hdev->cmd_timer);
>
> if (ev->status ||
> (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
> --
> 1.9.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-bluetooth" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

When I start to use "[PATCH v3] Bluetooth: Fix for ACL disconnect when
pairing fails" I was facing
hci tx timeout on disconnect command with some chips even I saw in
btmon that Command Status on disconnect command came on time.

After I apply this patch, that issue has gone.

\Lukasz

2014-06-09 10:42:42

by Alexander Holler

[permalink] [raw]
Subject: Re: [RFC] Bluetooth: Use struct delayed_work for HCI command timeout

Am 05.06.2014 12:53, schrieb Marcel Holtmann:
> Hi Alexander,
>
>>>> This is an experimental patch that converts hdev->cmd_timer from
>>>> struct timer_list to struct delayed_work.
>>>
>>> I don't know what this patch should change.
>>>
>>> If I understand it correctly, a workqueue is used instead of a timer.
>>> But besides that nothing else was changed. So instead of a timer, a work
>>> now kills the hci-cmd-task and posts an error. And that is exactly what
>>> happened here. So after one (failed) try, I've gone back to use my 2
>>> small patches.
>>
>> I assume the reasoning for the patch was to get the load of system queue somehow into the timeout. But that can't work. E.g. if the queue is already very busy when the hci command is (scheduled to) send and the (delayed) timeout is put on the queue (too), then it might happen that the command is send and immediately afterwards the timeout happens. Or the response from the dongle is already in the queue but happens after the timeout happens.
>
> the main reason is that we have to move away from timer_list usage since we want to fully run outside of the interrupt context. It seems that the cmd_timeout was forgotten when we did the switch to workqueue processing. I did not expect that this change will fix anything right away, but it is the ground work to actually move into simpler locking which could potentially an issue for certain delays and unexpected interactions with the USB subsystem.
>
> Keep in mind that we currently are using 3 workqueues. The system wide one, and two per HCI controller. I am pretty certain we need to investigate our general HCI packet processing vs the actual HCI command/event processing. For the HCI command/event processing we do rely on this being single threaded and ordered, but for the general HCI packet processing, we might can go multi-threaded and allow work to move from one CPU/core to another.
>
> Increasing the timeout like you did is just fixing a symptom. We need to figure out the root cause why a system takes so long to report results of the HCI command and/or what it wastes the time. USB might be a bottle neck here for sure, but then this might needs fixing as well.
>
> Johan, did you have a look at my patch and see if it is generally sound. Should we include it and see how good (or bad) it is behaving in general. From there on we can optimize or workqueue usage.

Increasing the timeout isn't fixing a symptom, the timeout itself is the
problem. It is arbitrary short and getting a reasonable value for such
is almost impossible on non-realtime-systems like Linux. Even on my
fastest machine with plenty RAM, I'm able to pause almost everything for
some seconds by switching kernel versions in my git kernel repo.

But I'm much more concerned about the fact, that the timeout masks
almost all in-kernel errors too. So one never can be sure what the
reason for the timeout is. That's why I've ended up with the first
patch. Using that patch I can at least be sure that I don't have to
search for a deadlock.

Regards,

Alexander Holler

2014-06-05 10:53:59

by Marcel Holtmann

[permalink] [raw]
Subject: Re: [RFC] Bluetooth: Use struct delayed_work for HCI command timeout

Hi Alexander,

>>> This is an experimental patch that converts hdev->cmd_timer from
>>> struct timer_list to struct delayed_work.
>>
>> I don't know what this patch should change.
>>
>> If I understand it correctly, a workqueue is used instead of a timer.
>> But besides that nothing else was changed. So instead of a timer, a work
>> now kills the hci-cmd-task and posts an error. And that is exactly what
>> happened here. So after one (failed) try, I've gone back to use my 2
>> small patches.
>
> I assume the reasoning for the patch was to get the load of system queue somehow into the timeout. But that can't work. E.g. if the queue is already very busy when the hci command is (scheduled to) send and the (delayed) timeout is put on the queue (too), then it might happen that the command is send and immediately afterwards the timeout happens. Or the response from the dongle is already in the queue but happens after the timeout happens.

the main reason is that we have to move away from timer_list usage since we want to fully run outside of the interrupt context. It seems that the cmd_timeout was forgotten when we did the switch to workqueue processing. I did not expect that this change will fix anything right away, but it is the ground work to actually move into simpler locking which could potentially an issue for certain delays and unexpected interactions with the USB subsystem.

Keep in mind that we currently are using 3 workqueues. The system wide one, and two per HCI controller. I am pretty certain we need to investigate our general HCI packet processing vs the actual HCI command/event processing. For the HCI command/event processing we do rely on this being single threaded and ordered, but for the general HCI packet processing, we might can go multi-threaded and allow work to move from one CPU/core to another.

Increasing the timeout like you did is just fixing a symptom. We need to figure out the root cause why a system takes so long to report results of the HCI command and/or what it wastes the time. USB might be a bottle neck here for sure, but then this might needs fixing as well.

Johan, did you have a look at my patch and see if it is generally sound. Should we include it and see how good (or bad) it is behaving in general. From there on we can optimize or workqueue usage.

Regards

Marcel


2014-06-04 21:27:37

by Alexander Holler

[permalink] [raw]
Subject: Re: [RFC] Bluetooth: Use struct delayed_work for HCI command timeout

Am 04.06.2014 21:38, schrieb Alexander Holler:
> Am 01.06.2014 03:38, schrieb Marcel Holtmann:
>> This is an experimental patch that converts hdev->cmd_timer from
>> struct timer_list to struct delayed_work.
>
> I don't know what this patch should change.
>
> If I understand it correctly, a workqueue is used instead of a timer.
> But besides that nothing else was changed. So instead of a timer, a work
> now kills the hci-cmd-task and posts an error. And that is exactly what
> happened here. So after one (failed) try, I've gone back to use my 2
> small patches.

I assume the reasoning for the patch was to get the load of system queue
somehow into the timeout. But that can't work. E.g. if the queue is
already very busy when the hci command is (scheduled to) send and the
(delayed) timeout is put on the queue (too), then it might happen that
the command is send and immediately afterwards the timeout happens. Or
the response from the dongle is already in the queue but happens after
the timeout happens.

Regards,

Alexander Holler

2014-06-04 19:38:08

by Alexander Holler

[permalink] [raw]
Subject: Re: [RFC] Bluetooth: Use struct delayed_work for HCI command timeout

Am 01.06.2014 03:38, schrieb Marcel Holtmann:
> This is an experimental patch that converts hdev->cmd_timer from
> struct timer_list to struct delayed_work.

I don't know what this patch should change.

If I understand it correctly, a workqueue is used instead of a timer.
But besides that nothing else was changed. So instead of a timer, a work
now kills the hci-cmd-task and posts an error. And that is exactly what
happened here. So after one (failed) try, I've gone back to use my 2
small patches.

Regards,

Alexander Holler