LinuxLists.cc - [PATCH net v2] NFC: hci: fix sleep in atomic context bugs in nfc_hci_hcp_message

2022-05-17 22:02:46

Subject: [PATCH net v2] NFC: hci: fix sleep in atomic context bugs in nfc_hci_hcp_message_tx

There are sleep in atomic context bugs when the request to secure
element of st21nfca is timeout. The root cause is that kzalloc and
alloc_skb with GFP_KERNEL parameter and mutex_lock are called in
st21nfca_se_wt_timeout which is a timer handler. The call tree shows
the execution paths that could lead to bugs:

(Interrupt context)
st21nfca_se_wt_timeout
nfc_hci_send_event
nfc_hci_hcp_message_tx
kzalloc(..., GFP_KERNEL) //may sleep
alloc_skb(..., GFP_KERNEL) //may sleep
mutex_lock() //may sleep

This patch changes allocation mode of kzalloc and alloc_skb from
GFP_KERNEL to GFP_ATOMIC and changes mutex_lock to spin_lock in
order to prevent atomic context from sleeping.

Fixes: 2130fb97fecf ("NFC: st21nfca: Adding support for secure element")
Signed-off-by: Duoming Zhou <[email protected]>
---
Changes in v2:
- Change mutex_lock to spin_lock.

include/net/nfc/hci.h | 3 ++-
net/nfc/hci/core.c | 18 +++++++++---------
net/nfc/hci/hcp.c | 10 +++++-----
3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 756c11084f6..8f66e6e6b91 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -103,7 +103,8 @@ struct nfc_hci_dev {

bool shutting_down;

- struct mutex msg_tx_mutex;
+ /* The spinlock is used to protect resources related with hci message TX */
+ spinlock_t msg_tx_spin;

struct list_head msg_tx_queue;

diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index ceb87db57cd..fa22f9fe5fc 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -68,7 +68,7 @@ static void nfc_hci_msg_tx_work(struct work_struct *work)
struct sk_buff *skb;
int r = 0;

- mutex_lock(&hdev->msg_tx_mutex);
+ spin_lock(&hdev->msg_tx_spin);
if (hdev->shutting_down)
goto exit;

@@ -120,7 +120,7 @@ static void nfc_hci_msg_tx_work(struct work_struct *work)
msecs_to_jiffies(hdev->cmd_pending_msg->completion_delay));

exit:
- mutex_unlock(&hdev->msg_tx_mutex);
+ spin_unlock(&hdev->msg_tx_spin);
}

static void nfc_hci_msg_rx_work(struct work_struct *work)
@@ -165,7 +165,7 @@ static void __nfc_hci_cmd_completion(struct nfc_hci_dev *hdev, int err,
void nfc_hci_resp_received(struct nfc_hci_dev *hdev, u8 result,
struct sk_buff *skb)
{
- mutex_lock(&hdev->msg_tx_mutex);
+ spin_lock(&hdev->msg_tx_spin);

if (hdev->cmd_pending_msg == NULL) {
kfree_skb(skb);
@@ -175,7 +175,7 @@ void nfc_hci_resp_received(struct nfc_hci_dev *hdev, u8 result,
__nfc_hci_cmd_completion(hdev, nfc_hci_result_to_errno(result), skb);

exit:
- mutex_unlock(&hdev->msg_tx_mutex);
+ spin_unlock(&hdev->msg_tx_spin);
}

void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
@@ -833,7 +833,7 @@ static int hci_se_io(struct nfc_dev *nfc_dev, u32 se_idx,

static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err)
{
- mutex_lock(&hdev->msg_tx_mutex);
+ spin_lock(&hdev->msg_tx_spin);

if (hdev->cmd_pending_msg == NULL) {
nfc_driver_failure(hdev->ndev, err);
@@ -843,7 +843,7 @@ static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err)
__nfc_hci_cmd_completion(hdev, err, NULL);

exit:
- mutex_unlock(&hdev->msg_tx_mutex);
+ spin_unlock(&hdev->msg_tx_spin);
}

static void nfc_hci_llc_failure(struct nfc_hci_dev *hdev, int err)
@@ -1009,7 +1009,7 @@ EXPORT_SYMBOL(nfc_hci_free_device);

int nfc_hci_register_device(struct nfc_hci_dev *hdev)
{
- mutex_init(&hdev->msg_tx_mutex);
+ spin_lock_init(&hdev->msg_tx_spin);

INIT_LIST_HEAD(&hdev->msg_tx_queue);

@@ -1031,7 +1031,7 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev)
{
struct hci_msg *msg, *n;

- mutex_lock(&hdev->msg_tx_mutex);
+ spin_lock(&hdev->msg_tx_spin);

if (hdev->cmd_pending_msg) {
if (hdev->cmd_pending_msg->cb)
@@ -1044,7 +1044,7 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev)

hdev->shutting_down = true;

- mutex_unlock(&hdev->msg_tx_mutex);
+ spin_unlock(&hdev->msg_tx_spin);

del_timer_sync(&hdev->cmd_timer);
cancel_work_sync(&hdev->msg_tx_work);
diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c
index 05c60988f59..f7eccb4ce35 100644
--- a/net/nfc/hci/hcp.c
+++ b/net/nfc/hci/hcp.c
@@ -30,7 +30,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe,
int hci_len, err;
bool firstfrag = true;

- cmd = kzalloc(sizeof(struct hci_msg), GFP_KERNEL);
+ cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
if (cmd == NULL)
return -ENOMEM;

@@ -58,7 +58,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe,
data_link_len + ndev->tx_tailroom;
hci_len -= data_link_len;

- skb = alloc_skb(skb_len, GFP_KERNEL);
+ skb = alloc_skb(skb_len, GFP_ATOMIC);
if (skb == NULL) {
err = -ENOMEM;
goto out_skb_err;
@@ -90,16 +90,16 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe,
skb_queue_tail(&cmd->msg_frags, skb);
}

- mutex_lock(&hdev->msg_tx_mutex);
+ spin_lock(&hdev->msg_tx_spin);

if (hdev->shutting_down) {
err = -ESHUTDOWN;
- mutex_unlock(&hdev->msg_tx_mutex);
+ spin_unlock(&hdev->msg_tx_spin);
goto out_skb_err;
}

list_add_tail(&cmd->msg_l, &hdev->msg_tx_queue);
- mutex_unlock(&hdev->msg_tx_mutex);
+ spin_unlock(&hdev->msg_tx_spin);

schedule_work(&hdev->msg_tx_work);

--
2.17.1

2022-05-18 03:48:55

by Krzysztof Kozlowski

[permalink] [raw]

Subject: Re: [PATCH net v2] NFC: hci: fix sleep in atomic context bugs in nfc_hci_hcp_message_tx

On 17/05/2022 12:55, Duoming Zhou wrote:
> There are sleep in atomic context bugs when the request to secure
> element of st21nfca is timeout. The root cause is that kzalloc and
> alloc_skb with GFP_KERNEL parameter and mutex_lock are called in
> st21nfca_se_wt_timeout which is a timer handler. The call tree shows
> the execution paths that could lead to bugs:
>
> (Interrupt context)
> st21nfca_se_wt_timeout
> nfc_hci_send_event
> nfc_hci_hcp_message_tx
> kzalloc(..., GFP_KERNEL) //may sleep
> alloc_skb(..., GFP_KERNEL) //may sleep
> mutex_lock() //may sleep
>
> This patch changes allocation mode of kzalloc and alloc_skb from
> GFP_KERNEL to GFP_ATOMIC and changes mutex_lock to spin_lock in
> order to prevent atomic context from sleeping.
>
> Fixes: 2130fb97fecf ("NFC: st21nfca: Adding support for secure element")
> Signed-off-by: Duoming Zhou <[email protected]>
> ---
> Changes in v2:
> - Change mutex_lock to spin_lock.
>
> include/net/nfc/hci.h | 3 ++-
> net/nfc/hci/core.c | 18 +++++++++---------
> net/nfc/hci/hcp.c | 10 +++++-----
> 3 files changed, 16 insertions(+), 15 deletions(-)
>
> diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
> index 756c11084f6..8f66e6e6b91 100644
> --- a/include/net/nfc/hci.h
> +++ b/include/net/nfc/hci.h
> @@ -103,7 +103,8 @@ struct nfc_hci_dev {
>
> bool shutting_down;
>
> - struct mutex msg_tx_mutex;
> + /* The spinlock is used to protect resources related with hci message TX */
> + spinlock_t msg_tx_spin;
>
> struct list_head msg_tx_queue;
>
> diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
> index ceb87db57cd..fa22f9fe5fc 100644
> --- a/net/nfc/hci/core.c
> +++ b/net/nfc/hci/core.c
> @@ -68,7 +68,7 @@ static void nfc_hci_msg_tx_work(struct work_struct *work)
> struct sk_buff *skb;
> int r = 0;
>
> - mutex_lock(&hdev->msg_tx_mutex);
> + spin_lock(&hdev->msg_tx_spin);
> if (hdev->shutting_down)
> goto exit;

How did you test your patch?

Did you check, really check, that this can be an atomic (non-sleeping)
section?

I have doubts because I found at least one path leading to device_lock
(which is a mutex) called within your new code.

Before sending a new version, please wait for discussion to reach some
consensus. The quality of these fixes is really poor. :(

Best regards,
Krzysztof

2022-05-18 03:53:11

by Duoming Zhou

[permalink] [raw]

Subject: Re: [PATCH net v2] NFC: hci: fix sleep in atomic context bugs in nfc_hci_hcp_message_tx

Hello,

On Tue, 17 May 2022 13:42:41 +0200 Krzysztof wrote:

> On 17/05/2022 12:55, Duoming Zhou wrote:
> > There are sleep in atomic context bugs when the request to secure
> > element of st21nfca is timeout. The root cause is that kzalloc and
> > alloc_skb with GFP_KERNEL parameter and mutex_lock are called in
> > st21nfca_se_wt_timeout which is a timer handler. The call tree shows
> > the execution paths that could lead to bugs:
> >
> > (Interrupt context)
> > st21nfca_se_wt_timeout
> > nfc_hci_send_event
> > nfc_hci_hcp_message_tx
> > kzalloc(..., GFP_KERNEL) //may sleep
> > alloc_skb(..., GFP_KERNEL) //may sleep
> > mutex_lock() //may sleep
> >
> > This patch changes allocation mode of kzalloc and alloc_skb from
> > GFP_KERNEL to GFP_ATOMIC and changes mutex_lock to spin_lock in
> > order to prevent atomic context from sleeping.
> >
> > Fixes: 2130fb97fecf ("NFC: st21nfca: Adding support for secure element")
> > Signed-off-by: Duoming Zhou <[email protected]>
> > ---
> > Changes in v2:
> > - Change mutex_lock to spin_lock.
> >
> > include/net/nfc/hci.h | 3 ++-
> > net/nfc/hci/core.c | 18 +++++++++---------
> > net/nfc/hci/hcp.c | 10 +++++-----
> > 3 files changed, 16 insertions(+), 15 deletions(-)
> >
> > diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
> > index 756c11084f6..8f66e6e6b91 100644
> > --- a/include/net/nfc/hci.h
> > +++ b/include/net/nfc/hci.h
> > @@ -103,7 +103,8 @@ struct nfc_hci_dev {
> >
> > bool shutting_down;
> >
> > - struct mutex msg_tx_mutex;
> > + /* The spinlock is used to protect resources related with hci message TX */
> > + spinlock_t msg_tx_spin;
> >
> > struct list_head msg_tx_queue;
> >
> > diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
> > index ceb87db57cd..fa22f9fe5fc 100644
> > --- a/net/nfc/hci/core.c
> > +++ b/net/nfc/hci/core.c
> > @@ -68,7 +68,7 @@ static void nfc_hci_msg_tx_work(struct work_struct *work)
> > struct sk_buff *skb;
> > int r = 0;
> >
> > - mutex_lock(&hdev->msg_tx_mutex);
> > + spin_lock(&hdev->msg_tx_spin);
> > if (hdev->shutting_down)
> > goto exit;
>
> How did you test your patch?
>
> Did you check, really check, that this can be an atomic (non-sleeping)
> section?
>
> I have doubts because I found at least one path leading to device_lock
> (which is a mutex) called within your new code.

The nfc_hci_hcp_message_tx() is called by both process context(hci_dev_up and so on)
and interrupt context(st21nfca_se_wt_timeout()). The process context(hci_dev_up and so on)
calls device_lock, but I think calling spin_lock() within device_lock() is ok. There is
no device_lock() called within spin_lock().

The spinlock could also improve the performance of the program, because processing the
hci messages should be finished in a short time.

> Before sending a new version, please wait for discussion to reach some
> consensus. The quality of these fixes is really poor. :(

Ok, I will wait for discussion to reach consensus.

Best regards,
Duoming Zhou

2022-05-18 09:54:00

by Krzysztof Kozlowski

[permalink] [raw]

Subject: Re: [PATCH net v2] NFC: hci: fix sleep in atomic context bugs in nfc_hci_hcp_message_tx

On 18/05/2022 06:39, [email protected] wrote:
>> There is.
>>
>> nfc_hci_failure -> spin lock -> nfc_driver_failure -> nfc_targets_found
>> -> device_lock
>>
>> I found it just by a very quick look, so I suspect there are several
>> other places, not really checked.
>
> I agree with you, the spin_lock is not a good solution to this problem. There is another solution:
>
> We could put the nfc_hci_send_event() of st21nfca_se_wt_timeout() in a work item, then, using
> schedule_work() in st21nfca_se_wt_timeout() to execute the work item. The schedule_work() will
> wake up another kernel thread which is in process context to execute the bottom half of the interrupt,
> so it allows sleep.
>
> The following is the details.

Yes, this seems good solution. You might also need to add
cancel_work_sync to all places removing the timer.

Best regards,
Krzysztof

2022-05-18 11:09:48

by Duoming Zhou

[permalink] [raw]

Subject: Re: [PATCH net v2] NFC: hci: fix sleep in atomic context bugs in nfc_hci_hcp_message_tx

Hello,

Date: Wed, 18 May 2022 11:39:27 +0200 Krzysztof wrote:

> >> There is.
> >>
> >> nfc_hci_failure -> spin lock -> nfc_driver_failure -> nfc_targets_found
> >> -> device_lock
> >>
> >> I found it just by a very quick look, so I suspect there are several
> >> other places, not really checked.
> >
> > I agree with you, the spin_lock is not a good solution to this problem. There is another solution:
> >
> > We could put the nfc_hci_send_event() of st21nfca_se_wt_timeout() in a work item, then, using
> > schedule_work() in st21nfca_se_wt_timeout() to execute the work item. The schedule_work() will
> > wake up another kernel thread which is in process context to execute the bottom half of the interrupt,
> > so it allows sleep.
> >
> > The following is the details.
>
> Yes, this seems good solution. You might also need to add
> cancel_work_sync to all places removing the timer.

Thanks for your approval.

There are two del_timer_sync() functions related with bwi_timer timer.
I neglect one site in st21nfca_apdu_reader_event_received(), I add
cancel_work_sync() in it this time.

diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c
index c922f10d0d7..7e213f8ddc9 100644
--- a/drivers/nfc/st21nfca/se.c
+++ b/drivers/nfc/st21nfca/se.c
@@ -241,7 +241,7 @@ int st21nfca_hci_se_io(struct nfc_hci_dev *hdev, u32 se_idx,
}
EXPORT_SYMBOL(st21nfca_hci_se_io);

-static void st21nfca_se_wt_timeout(struct timer_list *t)
+static void st21nfca_se_wt_work(struct work_struct *work)
{
/*
* No answer from the secure element
@@ -254,8 +254,9 @@ static void st21nfca_se_wt_timeout(struct timer_list *t)
*/
/* hardware reset managed through VCC_UICC_OUT power supply */
u8 param = 0x01;
- struct st21nfca_hci_info *info = from_timer(info, t,
- se_info.bwi_timer);
+ struct st21nfca_hci_info *info = container_of(work,
+ struct st21nfca_hci_info,
+ se_info.timeout_work);

info->se_info.bwi_active = false;

@@ -271,6 +272,13 @@ static void st21nfca_se_wt_timeout(struct timer_list *t)
info->se_info.cb(info->se_info.cb_context, NULL, 0, -ETIME);
}

+static void st21nfca_se_wt_timeout(struct timer_list *t)
+{
+ struct st21nfca_hci_info *info = from_timer(info, t, se_info.bwi_timer);
+
+ schedule_work(&info->se_info.timeout_work);
+}
+
static void st21nfca_se_activation_timeout(struct timer_list *t)
{
struct st21nfca_hci_info *info = from_timer(info, t,
@@ -360,6 +368,7 @@ int st21nfca_apdu_reader_event_received(struct nfc_hci_dev *hdev,
switch (event) {
case ST21NFCA_EVT_TRANSMIT_DATA:
del_timer_sync(&info->se_info.bwi_timer);
+ cancel_work_sync(&info->se_info.timeout_work);
info->se_info.bwi_active = false;
r = nfc_hci_send_event(hdev, ST21NFCA_DEVICE_MGNT_GATE,
ST21NFCA_EVT_SE_END_OF_APDU_TRANSFER, NULL, 0);
@@ -389,6 +398,7 @@ void st21nfca_se_init(struct nfc_hci_dev *hdev)
struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);

init_completion(&info->se_info.req_completion);
+ INIT_WORK(&info->se_info.timeout_work, st21nfca_se_wt_work);
/* initialize timers */
timer_setup(&info->se_info.bwi_timer, st21nfca_se_wt_timeout, 0);
info->se_info.bwi_active = false;
@@ -416,6 +426,7 @@ void st21nfca_se_deinit(struct nfc_hci_dev *hdev)
if (info->se_info.se_active)
del_timer_sync(&info->se_info.se_active_timer);

+ cancel_work_sync(&info->se_info.timeout_work);
info->se_info.bwi_active = false;
info->se_info.se_active = false;
}
diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h
index cb6ad916be9..ae6771cc989 100644
--- a/drivers/nfc/st21nfca/st21nfca.h
+++ b/drivers/nfc/st21nfca/st21nfca.h
@@ -141,6 +141,7 @@ struct st21nfca_se_info {

se_io_cb_t cb;
void *cb_context;
+ struct work_struct timeout_work;
};

struct st21nfca_hci_info {

If you think this solution is ok, I will send "PATCH v3".

Best regards,
Duoming Zhou

2022-05-18 11:46:00

by Krzysztof Kozlowski

[permalink] [raw]

Subject: Re: [PATCH net v2] NFC: hci: fix sleep in atomic context bugs in nfc_hci_hcp_message_tx

On 18/05/2022 13:05, [email protected] wrote:
> Hello,
>
>
> struct st21nfca_hci_info {
>
> If you think this solution is ok, I will send "PATCH v3".

More or less, send entire patch, so we'll see.

>
> Best regards,
> Duoming Zhou

Best regards,
Krzysztof