Subject: [PATCH V2] cfg80211: Fix race in bss timeout

It is quite possible to run into a race in bss timeout where
the drivers see the bss entry just before notifying cfg80211
of a roaming event but it got timed out by the time rdev->event_work
got scehduled from cfg80211_wq. This would result in the following
WARN-ON() along with the failure to notify the user space of
the roaming. The other situation which is happening with ath6kl
that runs into issue is when the driver reports roam to same AP
event where the AP bss entry already got expired. To fix this,
move cfg80211_get_bss() from __cfg80211_roamed() to cfg80211_roamed().

[158645.538384] WARNING: at net/wireless/sme.c:586
__cfg80211_roamed+0xc2/0x1b1()
[158645.538810] Call Trace:
[158645.538838] [<c1033527>] warn_slowpath_common+0x65/0x7a
[158645.538917] [<c14cfacf>] ? __cfg80211_roamed+0xc2/0x1b1
[158645.538946] [<c103354b>] warn_slowpath_null+0xf/0x13
[158645.539055] [<c14cfacf>] __cfg80211_roamed+0xc2/0x1b1
[158645.539086] [<c14beb5b>] cfg80211_process_rdev_events+0x153/0x1cc
[158645.539166] [<c14bd57b>] cfg80211_event_work+0x26/0x36
[158645.539195] [<c10482ae>] process_one_work+0x219/0x38b
[158645.539273] [<c14bd555>] ? wiphy_new+0x419/0x419
[158645.539301] [<c10486cb>] worker_thread+0xf6/0x1bf
[158645.539379] [<c10485d5>] ? rescuer_thread+0x1b5/0x1b5
[158645.539407] [<c104b3e2>] kthread+0x62/0x67
[158645.539484] [<c104b380>] ? __init_kthread_worker+0x42/0x42
[158645.539514] [<c151309a>] kernel_thread_helper+0x6/0xd

Reported-by: Kalle Valo <[email protected]>
Signed-off-by: Vasanthakumar Thiagarajan <[email protected]>
---

V2 -- Remove unnecessary spinlock before accessing bss->bssid.
Fix bss reference leake in __cfg80211_roamed().
Export cfg80211_roamed_bss().

include/net/cfg80211.h | 25 ++++++++++++++++++++
net/wireless/core.h | 6 +---
net/wireless/sme.c | 60 +++++++++++++++++++++++++++++++----------------
net/wireless/util.c | 6 ++--
4 files changed, 69 insertions(+), 28 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8d7ba09..24450b7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3085,6 +3085,31 @@ void cfg80211_roamed(struct net_device *dev,
const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp);

/**
+ * cfg80211_roamed_bss - notify cfg80211 of roaming
+ *
+ * @dev: network device
+ * @bss: entry of bss to which STA got roamed
+ * @req_ie: association request IEs (maybe be %NULL)
+ * @req_ie_len: association request IEs length
+ * @resp_ie: association response IEs (may be %NULL)
+ * @resp_ie_len: assoc response IEs length
+ * @gfp: allocation flags
+ *
+ * This is just a wrapper to notify cfg80211 of roaming event with driver
+ * passing bss to avoid a race in timeout of the bss entry. It should be
+ * called by the underlying driver whenever it roamed from one AP to another
+ * while connected. Drivers which have roaming implemented in firmware
+ * may use this function to avoid a race in bss entry timeout where the bss
+ * entry of the new AP is seen in the driver, but gets timed out by the time
+ * it is accessed in __cfg80211_roamed() due to delay in scheduling
+ * rdev->event_work. __cfg80211_romed() will responsible to release the bss
+ * reference.
+ */
+void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_bss *bss,
+ const u8 *req_ie, size_t req_ie_len,
+ const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp);
+
+/**
* cfg80211_disconnected - notify cfg80211 that connection was dropped
*
* @dev: network device
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 1c7d4df..11ff6bb 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -249,12 +249,11 @@ struct cfg80211_event {
u16 status;
} cr;
struct {
- struct ieee80211_channel *channel;
- u8 bssid[ETH_ALEN];
const u8 *req_ie;
const u8 *resp_ie;
size_t req_ie_len;
size_t resp_ie_len;
+ struct cfg80211_bss *bss;
} rm;
struct {
const u8 *ie;
@@ -397,8 +396,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
struct net_device *dev, u16 reason,
bool wextev);
void __cfg80211_roamed(struct wireless_dev *wdev,
- struct ieee80211_channel *channel,
- const u8 *bssid,
+ struct cfg80211_bss *bss,
const u8 *req_ie, size_t req_ie_len,
const u8 *resp_ie, size_t resp_ie_len);
int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 0acfdc9..006ae8f 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -551,45 +551,35 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
EXPORT_SYMBOL(cfg80211_connect_result);

void __cfg80211_roamed(struct wireless_dev *wdev,
- struct ieee80211_channel *channel,
- const u8 *bssid,
+ struct cfg80211_bss *bss,
const u8 *req_ie, size_t req_ie_len,
const u8 *resp_ie, size_t resp_ie_len)
{
- struct cfg80211_bss *bss;
#ifdef CONFIG_CFG80211_WEXT
union iwreq_data wrqu;
#endif
-
ASSERT_WDEV_LOCK(wdev);

if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
- return;
+ goto out;

if (wdev->sme_state != CFG80211_SME_CONNECTED)
- return;
+ goto out;

/* internal error -- how did we get to CONNECTED w/o BSS? */
if (WARN_ON(!wdev->current_bss)) {
- return;
+ goto out;
}

cfg80211_unhold_bss(wdev->current_bss);
cfg80211_put_bss(&wdev->current_bss->pub);
wdev->current_bss = NULL;

- bss = cfg80211_get_bss(wdev->wiphy, channel, bssid,
- wdev->ssid, wdev->ssid_len,
- WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
-
- if (WARN_ON(!bss))
- return;
-
cfg80211_hold_bss(bss_from_pub(bss));
wdev->current_bss = bss_from_pub(bss);

- nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), wdev->netdev, bssid,
+ nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), wdev->netdev, bss->bssid,
req_ie, req_ie_len, resp_ie, resp_ie_len,
GFP_KERNEL);

@@ -610,11 +600,14 @@ void __cfg80211_roamed(struct wireless_dev *wdev,

memset(&wrqu, 0, sizeof(wrqu));
wrqu.ap_addr.sa_family = ARPHRD_ETHER;
- memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
- memcpy(wdev->wext.prev_bssid, bssid, ETH_ALEN);
+ memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN);
+ memcpy(wdev->wext.prev_bssid, bss->bssid, ETH_ALEN);
wdev->wext.prev_bssid_valid = true;
wireless_send_event(wdev->netdev, SIOCGIWAP, &wrqu, NULL);
#endif
+
+out:
+ cfg80211_put_bss(bss);
}

void cfg80211_roamed(struct net_device *dev,
@@ -624,32 +617,57 @@ void cfg80211_roamed(struct net_device *dev,
const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_bss *bss;
+
+ CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED);
+
+ bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid,
+ wdev->ssid_len, WLAN_CAPABILITY_ESS,
+ WLAN_CAPABILITY_ESS);
+ if (WARN_ON(!bss))
+ return;
+
+ cfg80211_roamed_bss(dev, bss, req_ie, req_ie_len, resp_ie,
+ resp_ie_len, gfp);
+}
+EXPORT_SYMBOL(cfg80211_roamed);
+
+void cfg80211_roamed_bss(struct net_device *dev,
+ struct cfg80211_bss *bss, const u8 *req_ie,
+ size_t req_ie_len, const u8 *resp_ie,
+ size_t resp_ie_len, gfp_t gfp)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
struct cfg80211_event *ev;
unsigned long flags;

CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED);

+ if (WARN_ON(!bss))
+ return;
+
ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp);
- if (!ev)
+ if (!ev) {
+ cfg80211_put_bss(bss);
return;
+ }

ev->type = EVENT_ROAMED;
- ev->rm.channel = channel;
- memcpy(ev->rm.bssid, bssid, ETH_ALEN);
ev->rm.req_ie = ((u8 *)ev) + sizeof(*ev);
ev->rm.req_ie_len = req_ie_len;
memcpy((void *)ev->rm.req_ie, req_ie, req_ie_len);
ev->rm.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len;
ev->rm.resp_ie_len = resp_ie_len;
memcpy((void *)ev->rm.resp_ie, resp_ie, resp_ie_len);
+ ev->rm.bss = bss;

spin_lock_irqsave(&wdev->event_lock, flags);
list_add_tail(&ev->list, &wdev->event_list);
spin_unlock_irqrestore(&wdev->event_lock, flags);
queue_work(cfg80211_wq, &rdev->event_work);
}
-EXPORT_SYMBOL(cfg80211_roamed);
+EXPORT_SYMBOL(cfg80211_roamed_bss);

void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
size_t ie_len, u16 reason, bool from_ap)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 4dde429..afda051 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -752,9 +752,9 @@ static void cfg80211_process_wdev_events(struct wireless_dev *wdev)
NULL);
break;
case EVENT_ROAMED:
- __cfg80211_roamed(wdev, ev->rm.channel, ev->rm.bssid,
- ev->rm.req_ie, ev->rm.req_ie_len,
- ev->rm.resp_ie, ev->rm.resp_ie_len);
+ __cfg80211_roamed(wdev, ev->rm.bss, ev->rm.req_ie,
+ ev->rm.req_ie_len, ev->rm.resp_ie,
+ ev->rm.resp_ie_len);
break;
case EVENT_DISCONNECTED:
__cfg80211_disconnected(wdev->netdev,
--
1.7.0.4



2011-12-07 16:41:25

by Johannes Berg

[permalink] [raw]
Subject: Re: [PATCH V2] cfg80211: Fix race in bss timeout

On Wed, 2011-12-07 at 21:58 +0530, Vasanthakumar Thiagarajan wrote:
> It is quite possible to run into a race in bss timeout where
> the drivers see the bss entry just before notifying cfg80211
> of a roaming event but it got timed out by the time rdev->event_work
> got scehduled from cfg80211_wq. This would result in the following
> WARN-ON() along with the failure to notify the user space of
> the roaming. The other situation which is happening with ath6kl
> that runs into issue is when the driver reports roam to same AP
> event where the AP bss entry already got expired. To fix this,
> move cfg80211_get_bss() from __cfg80211_roamed() to cfg80211_roamed().
>
> [158645.538384] WARNING: at net/wireless/sme.c:586
> __cfg80211_roamed+0xc2/0x1b1()
> [158645.538810] Call Trace:
> [158645.538838] [<c1033527>] warn_slowpath_common+0x65/0x7a
> [158645.538917] [<c14cfacf>] ? __cfg80211_roamed+0xc2/0x1b1
> [158645.538946] [<c103354b>] warn_slowpath_null+0xf/0x13
> [158645.539055] [<c14cfacf>] __cfg80211_roamed+0xc2/0x1b1
> [158645.539086] [<c14beb5b>] cfg80211_process_rdev_events+0x153/0x1cc
> [158645.539166] [<c14bd57b>] cfg80211_event_work+0x26/0x36
> [158645.539195] [<c10482ae>] process_one_work+0x219/0x38b
> [158645.539273] [<c14bd555>] ? wiphy_new+0x419/0x419
> [158645.539301] [<c10486cb>] worker_thread+0xf6/0x1bf
> [158645.539379] [<c10485d5>] ? rescuer_thread+0x1b5/0x1b5
> [158645.539407] [<c104b3e2>] kthread+0x62/0x67
> [158645.539484] [<c104b380>] ? __init_kthread_worker+0x42/0x42
> [158645.539514] [<c151309a>] kernel_thread_helper+0x6/0xd
>
> Reported-by: Kalle Valo <[email protected]>
> Signed-off-by: Vasanthakumar Thiagarajan <[email protected]>

Reviewed-by: Johannes Berg <[email protected]>

> @@ -624,32 +617,57 @@ void cfg80211_roamed(struct net_device *dev,
> const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp)
> {
> struct wireless_dev *wdev = dev->ieee80211_ptr;
> + struct cfg80211_bss *bss;
> +
> + CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED);
> +
> + bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid,
> + wdev->ssid_len, WLAN_CAPABILITY_ESS,
> + WLAN_CAPABILITY_ESS);
> + if (WARN_ON(!bss))
> + return;
> +
> + cfg80211_roamed_bss(dev, bss, req_ie, req_ie_len, resp_ie,
> + resp_ie_len, gfp);

Technically, you don't need either of these warnings since you call
cfg80211_roamed_bss() which checks, but I don't really care or mind.

johannes