2020-04-07 10:12:21

by Sumit Garg

[permalink] [raw]
Subject: [PATCH v2] mac80211: fix race in ieee80211_register_hw()

A race condition leading to a kernel crash is observed during invocation
of ieee80211_register_hw() on a dragonboard410c device having wcn36xx
driver built as a loadable module along with a wifi manager in user-space
waiting for a wifi device (wlanX) to be active.

Sequence diagram for a particular kernel crash scenario:

user-space ieee80211_register_hw() ieee80211_tasklet_handler()
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
| | |
|<---phy0----wiphy_register() |
|-----iwd if_add---->| |
| |<---IRQ----(RX packet)
| Kernel crash |
| due to unallocated |
| workqueue. |
| | |
| alloc_ordered_workqueue() |
| | |
| Misc wiphy init. |
| | |
| ieee80211_if_add() |
| | |

As evident from above sequence diagram, this race condition isn't specific
to a particular wifi driver but rather the initialization sequence in
ieee80211_register_hw() needs to be fixed. So re-order the initialization
sequence and the updated sequence diagram would look like:

user-space ieee80211_register_hw() ieee80211_tasklet_handler()
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
| | |
| alloc_ordered_workqueue() |
| | |
| Misc wiphy init. |
| | |
|<---phy0----wiphy_register() |
|-----iwd if_add---->| |
| |<---IRQ----(RX packet)
| | |
| ieee80211_if_add() |
| | |

Cc: <[email protected]>
Signed-off-by: Sumit Garg <[email protected]>
---

Changes in v2:
- Move rtnl_unlock() just after ieee80211_init_rate_ctrl_alg().
- Update sequence diagrams in commit message for more clarification.

net/mac80211/main.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4c2b5ba..d497129 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1051,7 +1051,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
if (hw->max_signal <= 0) {
result = -EINVAL;
- goto fail_wiphy_register;
+ goto fail_workqueue;
}
}

@@ -1113,7 +1113,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)

result = ieee80211_init_cipher_suites(local);
if (result < 0)
- goto fail_wiphy_register;
+ goto fail_workqueue;

if (!local->ops->remain_on_channel)
local->hw.wiphy->max_remain_on_channel_duration = 5000;
@@ -1139,10 +1139,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)

local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;

- result = wiphy_register(local->hw.wiphy);
- if (result < 0)
- goto fail_wiphy_register;
-
/*
* We use the number of queues for feature tests (QoS, HT) internally
* so restrict them appropriately.
@@ -1207,6 +1203,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
goto fail_rate;
}

+ rtnl_unlock();
+
if (local->rate_ctrl) {
clear_bit(IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, hw->flags);
if (local->rate_ctrl->ops->capa & RATE_CTRL_CAPA_VHT_EXT_NSS_BW)
@@ -1254,6 +1252,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
local->sband_allocated |= BIT(band);
}

+ result = wiphy_register(local->hw.wiphy);
+ if (result < 0)
+ goto fail_wiphy_register;
+
+ rtnl_lock();
+
/* add one default STA interface if supported */
if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
!ieee80211_hw_check(hw, NO_AUTO_VIF)) {
@@ -1293,6 +1297,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
#if defined(CONFIG_INET) || defined(CONFIG_IPV6)
fail_ifa:
#endif
+ wiphy_unregister(local->hw.wiphy);
+ fail_wiphy_register:
rtnl_lock();
rate_control_deinitialize(local);
ieee80211_remove_interfaces(local);
@@ -1302,8 +1308,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
ieee80211_led_exit(local);
destroy_workqueue(local->workqueue);
fail_workqueue:
- wiphy_unregister(local->hw.wiphy);
- fail_wiphy_register:
if (local->wiphy_ciphers_allocated)
kfree(local->hw.wiphy->cipher_suites);
kfree(local->int_scan_req);
@@ -1353,8 +1357,8 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
skb_queue_purge(&local->skb_queue_unreliable);
skb_queue_purge(&local->skb_queue_tdls_chsw);

- destroy_workqueue(local->workqueue);
wiphy_unregister(local->hw.wiphy);
+ destroy_workqueue(local->workqueue);
ieee80211_led_exit(local);
kfree(local->int_scan_req);
}
--
2.7.4


2020-04-07 19:26:08

by Krishna Chaitanya

[permalink] [raw]
Subject: Re: [PATCH v2] mac80211: fix race in ieee80211_register_hw()

On Tue, Apr 7, 2020 at 3:41 PM Sumit Garg <[email protected]> wrote:
>
> A race condition leading to a kernel crash is observed during invocation
> of ieee80211_register_hw() on a dragonboard410c device having wcn36xx
> driver built as a loadable module along with a wifi manager in user-space
> waiting for a wifi device (wlanX) to be active.
>
> Sequence diagram for a particular kernel crash scenario:
>
> user-space ieee80211_register_hw() ieee80211_tasklet_handler()
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> | | |
> |<---phy0----wiphy_register() |
> |-----iwd if_add---->| |
just a nitpick, a better one would be (iwd: if_add + ap_start) since
we need to have 'iwctl ap start'
to trigger the interrupts.
> | |<---IRQ----(RX packet)
> | Kernel crash |
> | due to unallocated |
> | workqueue. |
> | | |
> | alloc_ordered_workqueue() |
> | | |
> | Misc wiphy init. |
> | | |
> | ieee80211_if_add() |
> | | |
>
> As evident from above sequence diagram, this race condition isn't specific
> to a particular wifi driver but rather the initialization sequence in
> ieee80211_register_hw() needs to be fixed. So re-order the initialization
> sequence and the updated sequence diagram would look like:
>
> user-space ieee80211_register_hw() ieee80211_tasklet_handler()
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> | | |
> | alloc_ordered_workqueue() |
> | | |
> | Misc wiphy init. |
> | | |
> |<---phy0----wiphy_register() |
> |-----iwd if_add---->| |
same as above.
> | |<---IRQ----(RX packet)
> | | |
> | ieee80211_if_add() |
> | | |
>
> Cc: <[email protected]>
> Signed-off-by: Sumit Garg <[email protected]>
> ---
>
> Changes in v2:
> - Move rtnl_unlock() just after ieee80211_init_rate_ctrl_alg().
> - Update sequence diagrams in commit message for more clarification.
>
> net/mac80211/main.c | 22 +++++++++++++---------
> 1 file changed, 13 insertions(+), 9 deletions(-)
>
> diff --git a/net/mac80211/main.c b/net/mac80211/main.c
> index 4c2b5ba..d497129 100644
> --- a/net/mac80211/main.c
> +++ b/net/mac80211/main.c
> @@ -1051,7 +1051,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
> if (hw->max_signal <= 0) {
> result = -EINVAL;
> - goto fail_wiphy_register;
> + goto fail_workqueue;
> }
> }
>
> @@ -1113,7 +1113,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
>
> result = ieee80211_init_cipher_suites(local);
> if (result < 0)
> - goto fail_wiphy_register;
> + goto fail_workqueue;
>
> if (!local->ops->remain_on_channel)
> local->hw.wiphy->max_remain_on_channel_duration = 5000;
> @@ -1139,10 +1139,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
>
> local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
>
> - result = wiphy_register(local->hw.wiphy);
> - if (result < 0)
> - goto fail_wiphy_register;
> -
> /*
> * We use the number of queues for feature tests (QoS, HT) internally
> * so restrict them appropriately.
> @@ -1207,6 +1203,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> goto fail_rate;
> }
>
> + rtnl_unlock();
> +
> if (local->rate_ctrl) {
> clear_bit(IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, hw->flags);
> if (local->rate_ctrl->ops->capa & RATE_CTRL_CAPA_VHT_EXT_NSS_BW)
> @@ -1254,6 +1252,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> local->sband_allocated |= BIT(band);
> }
>
> + result = wiphy_register(local->hw.wiphy);
> + if (result < 0)
> + goto fail_wiphy_register;
> +
> + rtnl_lock();
> +
> /* add one default STA interface if supported */
> if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
> !ieee80211_hw_check(hw, NO_AUTO_VIF)) {
> @@ -1293,6 +1297,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> #if defined(CONFIG_INET) || defined(CONFIG_IPV6)
> fail_ifa:
> #endif
> + wiphy_unregister(local->hw.wiphy);
> + fail_wiphy_register:
> rtnl_lock();
> rate_control_deinitialize(local);
> ieee80211_remove_interfaces(local);
> @@ -1302,8 +1308,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> ieee80211_led_exit(local);
> destroy_workqueue(local->workqueue);
> fail_workqueue:
> - wiphy_unregister(local->hw.wiphy);
> - fail_wiphy_register:
> if (local->wiphy_ciphers_allocated)
> kfree(local->hw.wiphy->cipher_suites);
> kfree(local->int_scan_req);
> @@ -1353,8 +1357,8 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
> skb_queue_purge(&local->skb_queue_unreliable);
> skb_queue_purge(&local->skb_queue_tdls_chsw);
>
> - destroy_workqueue(local->workqueue);
> wiphy_unregister(local->hw.wiphy);
> + destroy_workqueue(local->workqueue);
> ieee80211_led_exit(local);
> kfree(local->int_scan_req);
> }
> --
> 2.7.4
>


--
Thanks,
Regards,
Chaitanya T K.

2020-04-15 23:03:07

by Sumit Garg

[permalink] [raw]
Subject: Re: [PATCH v2] mac80211: fix race in ieee80211_register_hw()

On Thu, 9 Apr 2020 at 10:12, Sumit Garg <[email protected]> wrote:
>
> Hi Johannes,
>
> On Wed, 8 Apr 2020 at 00:55, Krishna Chaitanya <[email protected]> wrote:
> >
> > On Tue, Apr 7, 2020 at 3:41 PM Sumit Garg <[email protected]> wrote:
> > >
> > > A race condition leading to a kernel crash is observed during invocation
> > > of ieee80211_register_hw() on a dragonboard410c device having wcn36xx
> > > driver built as a loadable module along with a wifi manager in user-space
> > > waiting for a wifi device (wlanX) to be active.
> > >
> > > Sequence diagram for a particular kernel crash scenario:
> > >
> > > user-space ieee80211_register_hw() ieee80211_tasklet_handler()
> > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > | | |
> > > |<---phy0----wiphy_register() |
> > > |-----iwd if_add---->| |
> > just a nitpick, a better one would be (iwd: if_add + ap_start) since
> > we need to have 'iwctl ap start'
> > to trigger the interrupts.
> > > | |<---IRQ----(RX packet)
> > > | Kernel crash |
> > > | due to unallocated |
> > > | workqueue. |
> > > | | |
> > > | alloc_ordered_workqueue() |
> > > | | |
> > > | Misc wiphy init. |
> > > | | |
> > > | ieee80211_if_add() |
> > > | | |
> > >
> > > As evident from above sequence diagram, this race condition isn't specific
> > > to a particular wifi driver but rather the initialization sequence in
> > > ieee80211_register_hw() needs to be fixed. So re-order the initialization
> > > sequence and the updated sequence diagram would look like:
> > >
> > > user-space ieee80211_register_hw() ieee80211_tasklet_handler()
> > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > | | |
> > > | alloc_ordered_workqueue() |
> > > | | |
> > > | Misc wiphy init. |
> > > | | |
> > > |<---phy0----wiphy_register() |
> > > |-----iwd if_add---->| |
> > same as above.
> > > | |<---IRQ----(RX packet)
> > > | | |
> > > | ieee80211_if_add() |
> > > | | |
> > >
> > > Cc: <[email protected]>
> > > Signed-off-by: Sumit Garg <[email protected]>
> > > ---
> > >
>
> In case we don't have any further comments, could you fix this nitpick
> from Chaitanya while applying or would you like me to respin and send
> v3?

A gentle ping. Is this patch a good candidate for 5.7-rc2?

-Sumit

>
> -Sumit
>
> > > Changes in v2:
> > > - Move rtnl_unlock() just after ieee80211_init_rate_ctrl_alg().
> > > - Update sequence diagrams in commit message for more clarification.
> > >
> > > net/mac80211/main.c | 22 +++++++++++++---------
> > > 1 file changed, 13 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/net/mac80211/main.c b/net/mac80211/main.c
> > > index 4c2b5ba..d497129 100644
> > > --- a/net/mac80211/main.c
> > > +++ b/net/mac80211/main.c
> > > @@ -1051,7 +1051,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> > > local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
> > > if (hw->max_signal <= 0) {
> > > result = -EINVAL;
> > > - goto fail_wiphy_register;
> > > + goto fail_workqueue;
> > > }
> > > }
> > >
> > > @@ -1113,7 +1113,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> > >
> > > result = ieee80211_init_cipher_suites(local);
> > > if (result < 0)
> > > - goto fail_wiphy_register;
> > > + goto fail_workqueue;
> > >
> > > if (!local->ops->remain_on_channel)
> > > local->hw.wiphy->max_remain_on_channel_duration = 5000;
> > > @@ -1139,10 +1139,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> > >
> > > local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
> > >
> > > - result = wiphy_register(local->hw.wiphy);
> > > - if (result < 0)
> > > - goto fail_wiphy_register;
> > > -
> > > /*
> > > * We use the number of queues for feature tests (QoS, HT) internally
> > > * so restrict them appropriately.
> > > @@ -1207,6 +1203,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> > > goto fail_rate;
> > > }
> > >
> > > + rtnl_unlock();
> > > +
> > > if (local->rate_ctrl) {
> > > clear_bit(IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, hw->flags);
> > > if (local->rate_ctrl->ops->capa & RATE_CTRL_CAPA_VHT_EXT_NSS_BW)
> > > @@ -1254,6 +1252,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> > > local->sband_allocated |= BIT(band);
> > > }
> > >
> > > + result = wiphy_register(local->hw.wiphy);
> > > + if (result < 0)
> > > + goto fail_wiphy_register;
> > > +
> > > + rtnl_lock();
> > > +
> > > /* add one default STA interface if supported */
> > > if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
> > > !ieee80211_hw_check(hw, NO_AUTO_VIF)) {
> > > @@ -1293,6 +1297,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> > > #if defined(CONFIG_INET) || defined(CONFIG_IPV6)
> > > fail_ifa:
> > > #endif
> > > + wiphy_unregister(local->hw.wiphy);
> > > + fail_wiphy_register:
> > > rtnl_lock();
> > > rate_control_deinitialize(local);
> > > ieee80211_remove_interfaces(local);
> > > @@ -1302,8 +1308,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
> > > ieee80211_led_exit(local);
> > > destroy_workqueue(local->workqueue);
> > > fail_workqueue:
> > > - wiphy_unregister(local->hw.wiphy);
> > > - fail_wiphy_register:
> > > if (local->wiphy_ciphers_allocated)
> > > kfree(local->hw.wiphy->cipher_suites);
> > > kfree(local->int_scan_req);
> > > @@ -1353,8 +1357,8 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
> > > skb_queue_purge(&local->skb_queue_unreliable);
> > > skb_queue_purge(&local->skb_queue_tdls_chsw);
> > >
> > > - destroy_workqueue(local->workqueue);
> > > wiphy_unregister(local->hw.wiphy);
> > > + destroy_workqueue(local->workqueue);
> > > ieee80211_led_exit(local);
> > > kfree(local->int_scan_req);
> > > }
> > > --
> > > 2.7.4
> > >
> >
> >
> > --
> > Thanks,
> > Regards,
> > Chaitanya T K.