2010-02-06 13:20:30

by Dave Kilroy

[permalink] [raw]
Subject: [RFC 0/2] cfg80211: Retrieve missing BSS info after connect

This patchset attempts to address the issue where a driver that uses
the cfg80211 connect operation connects to an AP before the scan result
for the AP is available. This triggers a WARN_ON in sme.c, and the
connection fails.

This is only an issue for drivers that do not pass beacons/probes to
cfg80211, except in response to an explicit scan. With typical
userspace the issue will only manifest if using iwconfig to connect, or
if using wpa_supplicant with ap_scan=2 (which you might do with a
hidden SSID).

The patches were originally written for the orinoco cfg80211 conversion
(currently stalled due to lack of time). It sounds like the libertas
cfg80211 drivers have the same problem, so I'm posting them in case
they're useful. There's a bit of cleanup to do, and there may be issues
with the setting of the channels to scan (orinoco can't scan by
channel) so if someone wants to pick these up, please do.

Testing
- a version of this has been running with my orinoco changes for the
last 3 or 4 months (in more or less daily use).
- Untested against drivers that don't use ->connect

Signed-off-by: David Kilroy <[email protected]>

---
David Kilroy (2):
cfg80211: generic trigger scan and callback on completion
cfg80211: scan for missing BSS on connect completion

include/net/cfg80211.h | 3 +
net/wireless/core.h | 6 ++
net/wireless/scan.c | 41 ++++++++++++-
net/wireless/sme.c | 163 ++++++++++++++++++++++++++++++++++++++----------
4 files changed, 178 insertions(+), 35 deletions(-)



2010-02-06 13:35:55

by Johannes Berg

[permalink] [raw]
Subject: Re: [RFC 1/2] cfg80211: generic trigger scan and callback on completion

On Sat, 2010-02-06 at 13:20 +0000, David Kilroy wrote:

> We could maintain a list of functions to call, but this isn't necessary
> for the moment. Just maintain the single function pointer to be called
> on scan completion.
>
> Note that we allow the caller to set the callback even when a scan is
> already in progress. This allows cfg80211 SME to scan again
> when the existing scan completes.

I think the "callback even when busy" thing is somewhat confusing. I'd
prefer if you used a standard notifier struct (probably raw since we
have rdev locking; see include/linux/notifier.h) and the callees sorted
out their state there. As it is, it's really complicated to make sure
that one request doesn't clobber another -- the current use guarantees
that but I'm not sure I want to rely on it going forward.

johannes


Attachments:
signature.asc (801.00 B)
This is a digitally signed message part

2010-02-06 13:20:46

by Dave Kilroy

[permalink] [raw]
Subject: [RFC 2/2] cfg80211: scan for missing BSS on connect completion

Some drivers don't notify cfg80211 of every beacon or probe response
they get. In this case, cfg80211 won't have the BSS structure for the AP
that we just connected with.

We address that issue by doing a scan after the driver indicates it has
connected. Scan for the SSID that we are in. If we have the channel,
specify that as well to reduce scanning time.

Signed-off-by: David Kilroy <[email protected]>
---
include/net/cfg80211.h | 3 +
net/wireless/scan.c | 5 ++
net/wireless/sme.c | 143 +++++++++++++++++++++++++++++++++++++++++------
3 files changed, 132 insertions(+), 19 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a3f0a7e..67bb8f5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1420,6 +1420,7 @@ extern void wiphy_free(struct wiphy *wiphy);
struct cfg80211_conn;
struct cfg80211_internal_bss;
struct cfg80211_cached_keys;
+struct cfg80211_conn2;

#define MAX_AUTH_BSSES 4

@@ -1471,6 +1472,8 @@ struct wireless_dev {
struct cfg80211_conn *conn;
struct cfg80211_cached_keys *connect_keys;

+ struct cfg80211_conn2 *conn2; /* TODO: Rename */
+
struct list_head event_list;
spinlock_t event_lock;

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index e3d0957..140da4c 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -39,6 +39,11 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak)
* This must be before sending the other events!
* Otherwise, wpa_supplicant gets completely confused with
* wext events.
+ *
+ * At the moment this will either call cfg80211_sme_scan_done
+ * or cfg80211_complete_connect. The former should only get
+ * called if the device supports ops->auth & ops->assoc, the
+ * latter if the device supports ops->connect.
*/
if (rdev->async_scan_cb) {
rdev->async_scan_cb(dev);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 2ef83b7..6134332 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -34,6 +34,15 @@ struct cfg80211_conn {
bool auto_auth, prev_bssid_valid;
};

+/* TODO: This struct needs renaming */
+struct cfg80211_conn2 {
+ struct ieee80211_channel *channel;
+ u8 bssid[ETH_ALEN];
+};
+
+static void __cfg80211_complete_connect(struct net_device *dev,
+ struct cfg80211_bss *bss);
+
bool cfg80211_is_all_idle(void)
{
struct cfg80211_registered_device *rdev;
@@ -385,6 +394,62 @@ void cfg80211_sme_failed_assoc(struct wireless_dev *wdev)
schedule_work(&rdev->conn_work);
}

+static void __cfg80211_complete_connect(struct net_device *dev,
+ struct cfg80211_bss *bss)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ u8 *country_ie;
+
+ cfg80211_hold_bss(bss_from_pub(bss));
+ wdev->current_bss = bss_from_pub(bss);
+
+ wdev->sme_state = CFG80211_SME_CONNECTED;
+ cfg80211_upload_connect_keys(wdev);
+
+ country_ie = (u8 *) ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY);
+
+ if (!country_ie)
+ return;
+
+ /*
+ * ieee80211_bss_get_ie() ensures we can access:
+ * - country_ie + 2, the start of the country ie data, and
+ * - and country_ie[1] which is the IE length
+ */
+ regulatory_hint_11d(wdev->wiphy,
+ bss->channel->band,
+ country_ie + 2,
+ country_ie[1]);
+}
+
+void cfg80211_complete_connect(struct net_device *dev)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+ mutex_lock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx);
+ wdev_lock(wdev);
+
+ if (wdev->conn2) {
+ struct cfg80211_bss *bss;
+
+ bss = cfg80211_get_bss(wdev->wiphy, NULL, wdev->conn2->bssid,
+ wdev->ssid, wdev->ssid_len,
+ WLAN_CAPABILITY_ESS,
+ WLAN_CAPABILITY_ESS);
+
+ if (WARN_ON(!bss))
+ wdev->sme_state = CFG80211_SME_IDLE;
+ else
+ __cfg80211_complete_connect(dev, bss);
+
+ kfree(wdev->conn2);
+ wdev->conn2 = NULL;
+ }
+
+ wdev_unlock(wdev);
+ mutex_unlock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx);
+}
+
void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
const u8 *req_ie, size_t req_ie_len,
const u8 *resp_ie, size_t resp_ie_len,
@@ -392,7 +457,6 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
struct cfg80211_bss *bss)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- u8 *country_ie;
#ifdef CONFIG_CFG80211_WEXT
union iwreq_data wrqu;
#endif
@@ -462,29 +526,58 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
WLAN_CAPABILITY_ESS,
WLAN_CAPABILITY_ESS);

- if (WARN_ON(!bss))
- return;
+ if (!bss) {
+ struct cfg80211_scan_request *request;
+ int err;
+
+ /* The driver hasn't reported any beacons or probe
+ * responses for the BSS that we've connected
+ * with. Trigger a scan to get it.
+ *
+ * Record the BSSID we connected with, so we know what
+ * to look for later.
+ *
+ * Then do a scan on the channel (if specified),
+ * specifying the SSID.
+ */
+ memcpy(wdev->conn2->bssid, bssid, ETH_ALEN);

- cfg80211_hold_bss(bss_from_pub(bss));
- wdev->current_bss = bss_from_pub(bss);
+ request = kzalloc(sizeof(*request) + sizeof(request->ssids[0]) +
+ sizeof(request->channels[0]),
+ GFP_KERNEL);
+ if (!request)
+ return;

- wdev->sme_state = CFG80211_SME_CONNECTED;
- cfg80211_upload_connect_keys(wdev);
+ /* Would be better if we knew the channel of the AP we
+ * connected to. Is it available in one of the IEs? */
+ if (wdev->conn2->channel) {
+ request->channels[0] = wdev->conn2->channel;
+ request->n_channels = 1;
+ request->ssids = (void *) &request->channels[1];
+ } else
+ request->ssids = (void *) &request->channels[0];

- country_ie = (u8 *) ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY);
+ request->n_ssids = 1;

- if (!country_ie)
- return;
+ memcpy(request->ssids[0].ssid, wdev->ssid, wdev->ssid_len);
+ request->ssids[0].ssid_len = wdev->ssid_len;
+
+ request->dev = dev;
+ request->wiphy = wdev->wiphy;
+
+ err = cfg80211_async_scan(wdev, request, false,
+ cfg80211_complete_connect);
+ if (err) {
+ kfree(request);
+ wdev->sme_state = CFG80211_SME_IDLE;
+ }
+ } else {
+ kfree(wdev->conn2);
+ wdev->conn2 = NULL;
+
+ __cfg80211_complete_connect(dev, bss);
+ };

- /*
- * ieee80211_bss_get_ie() ensures we can access:
- * - country_ie + 2, the start of the country ie data, and
- * - and country_ie[1] which is the IE length
- */
- regulatory_hint_11d(wdev->wiphy,
- bss->channel->band,
- country_ie + 2,
- country_ie[1]);
}

void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
@@ -854,10 +947,22 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,

return err;
} else {
+
+ if (WARN_ON_ONCE(wdev->conn2))
+ return -EINPROGRESS;
+
+ wdev->conn2 = kzalloc(sizeof(*wdev->conn2), GFP_KERNEL);
+ if (!wdev->conn2)
+ return -ENOMEM;
+
+ wdev->conn2->channel = connect->channel;
+
wdev->sme_state = CFG80211_SME_CONNECTING;
wdev->connect_keys = connkeys;
err = rdev->ops->connect(&rdev->wiphy, dev, connect);
if (err) {
+ kfree(wdev->conn2);
+ wdev->conn2 = NULL;
wdev->connect_keys = NULL;
wdev->sme_state = CFG80211_SME_IDLE;
return err;
--
1.6.4.4


2010-02-06 18:59:13

by Johannes Berg

[permalink] [raw]
Subject: Re: [RFC 0/2] cfg80211: Retrieve missing BSS info after connect

On Sat, 2010-02-06 at 18:26 +0000, Dave wrote:

> Thanks for the feedback. Like I said, the main point of posting was to
> see if this logic helped libertas in any way.

Right.

> Sounds like it doesn't, so
> I won't push this for now. When I get round to looking at orinoco again
> I'll see if I can handle this within the driver. If not I'll address the
> issues you've raised.
>
> I don't think orinoco can provide the BSS info without explicitly doing
> the scan (whether internally or via cfg80211). Even if I could get it to
> work for the firmware version I have, there's no guarantee that it'll
> work for the other supported firmware types (that I don't have).

Ok. Don't get me wrong -- I don't really mind doing it in cfg80211, just
wanted to see why, but if it really _requires_ a scan then that's fine.
Maybe in cfg80211 makes more sense, might simplify other drivers too, or
allow us to convert some of the other drivers that might not have such
functionality.

Since your patch creates a directed scan, it should be pretty quick just
scanning one channel, and maybe the driver knows the channel?

johannes


Attachments:
signature.asc (801.00 B)
This is a digitally signed message part

2010-02-06 17:52:41

by Dave Kilroy

[permalink] [raw]
Subject: Re: [RFC 0/2] cfg80211: Retrieve missing BSS info after connect

Holger Schurig wrote:
>> It sounds like the libertas cfg80211 drivers have the same problem, so I'm
>> posting them in case they're useful.
>
> I'm not sure about that.
>
> One thing that I'm sure of is that for the Libertas case I need to scan
> *before* the connection takes place. The Libertas firmware doesn't roam by
> itself, so I don't tell her an SSID like you'll tell the orinoco firmware.
>
> Instead the libertas firmware needs a BSSID where it should connect to. If
> you let do wpa_supplicant do the job (even for WEP), that's no problem,
> because wpa_supplicant always scans and then connects to a specific BSSID,
> which suits Libertas very well.

Fair enough. If I find the time to finish orinoco cfg80211, then I'll
try to keep this within the driver.


Dave.

2010-02-06 13:38:16

by Johannes Berg

[permalink] [raw]
Subject: Re: [RFC 2/2] cfg80211: scan for missing BSS on connect completion

On Sat, 2010-02-06 at 13:20 +0000, David Kilroy wrote:

> + if (wdev->conn2) {
> + struct cfg80211_bss *bss;
> +
> + bss = cfg80211_get_bss(wdev->wiphy, NULL, wdev->conn2->bssid,
> + wdev->ssid, wdev->ssid_len,
> + WLAN_CAPABILITY_ESS,
> + WLAN_CAPABILITY_ESS);
> +
> + if (WARN_ON(!bss))
> + wdev->sme_state = CFG80211_SME_IDLE;

Can that case happen "legitimately", say the AP crashes right after you
connect, possibly due to the connection?

Even if not, this will leave the driver in a different state than the
state machine here, which we should clean up regardless I think.

johannes


Attachments:
signature.asc (801.00 B)
This is a digitally signed message part

2010-02-06 13:59:09

by Johannes Berg

[permalink] [raw]
Subject: Re: [RFC 0/2] cfg80211: Retrieve missing BSS info after connect

On Sat, 2010-02-06 at 13:20 +0000, David Kilroy wrote:
> This patchset attempts to address the issue where a driver that uses
> the cfg80211 connect operation connects to an AP before the scan result
> for the AP is available. This triggers a WARN_ON in sme.c, and the
> connection fails.

With technical comments out of the way (see other emails) I guess I want
to know whether this really makes sense.

Yes, we definitely need the right BSS struct when the driver signals
that it connected, but we also need the same information when it roamed
for example. Does it really make sense to scan after the fact, instead
of asking the driver to provide the information? This is a genuine
question -- I can see value in this if more than one driver needs it,
but if the devices can just provide BSS information right before the
connected call that would be a lot simpler here.

johannes


Attachments:
signature.asc (801.00 B)
This is a digitally signed message part

2010-02-06 17:28:00

by Holger Schurig

[permalink] [raw]
Subject: Re: [RFC 0/2] cfg80211: Retrieve missing BSS info after connect

> It sounds like the libertas cfg80211 drivers have the same problem, so I'm
> posting them in case they're useful.

I'm not sure about that.

One thing that I'm sure of is that for the Libertas case I need to scan
*before* the connection takes place. The Libertas firmware doesn't roam by
itself, so I don't tell her an SSID like you'll tell the orinoco firmware.

Instead the libertas firmware needs a BSSID where it should connect to. If
you let do wpa_supplicant do the job (even for WEP), that's no problem,
because wpa_supplicant always scans and then connects to a specific BSSID,
which suits Libertas very well.

As for sending back then cfg80211_connect_result() to cfg80211, I'm using
the IEs from the firmware CMD_802_11_ASSOCIATE command, which seems to work
quite nicely.



The only thing where Libertas would need a scan during connect if someone
tries to connect to an AP without a previous scan using the "iw wlan0
connect" command. Not sure what Simon did here, but my previous v4 patch
simply failed.

--
http://www.holgerschurig.de

2010-02-06 18:26:31

by Dave Kilroy

[permalink] [raw]
Subject: Re: [RFC 0/2] cfg80211: Retrieve missing BSS info after connect

Johannes Berg wrote:
> On Sat, 2010-02-06 at 13:20 +0000, David Kilroy wrote:
>> This patchset attempts to address the issue where a driver that uses
>> the cfg80211 connect operation connects to an AP before the scan result
>> for the AP is available. This triggers a WARN_ON in sme.c, and the
>> connection fails.
>
> With technical comments out of the way (see other emails) I guess I want
> to know whether this really makes sense.
>
> Yes, we definitely need the right BSS struct when the driver signals
> that it connected, but we also need the same information when it roamed
> for example. Does it really make sense to scan after the fact, instead
> of asking the driver to provide the information? This is a genuine
> question -- I can see value in this if more than one driver needs it,
> but if the devices can just provide BSS information right before the
> connected call that would be a lot simpler here.

Thanks for the feedback. Like I said, the main point of posting was to
see if this logic helped libertas in any way. Sounds like it doesn't, so
I won't push this for now. When I get round to looking at orinoco again
I'll see if I can handle this within the driver. If not I'll address the
issues you've raised.

I don't think orinoco can provide the BSS info without explicitly doing
the scan (whether internally or via cfg80211). Even if I could get it to
work for the firmware version I have, there's no guarantee that it'll
work for the other supported firmware types (that I don't have).



Dave.

2010-02-06 13:20:38

by Dave Kilroy

[permalink] [raw]
Subject: [RFC 1/2] cfg80211: generic trigger scan and callback on completion

cfg80211 SME already has a hook for it's own callback, so we hook this
into the new mechanism.

We could maintain a list of functions to call, but this isn't necessary
for the moment. Just maintain the single function pointer to be called
on scan completion.

Note that we allow the caller to set the callback even when a scan is
already in progress. This allows cfg80211 SME to scan again
when the existing scan completes.

Signed-off-by: David Kilroy <[email protected]>
---
net/wireless/core.h | 6 ++++++
net/wireless/scan.c | 36 +++++++++++++++++++++++++++++++++++-
net/wireless/sme.c | 20 +++++---------------
3 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/net/wireless/core.h b/net/wireless/core.h
index c326a66..7e0112f 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -63,6 +63,8 @@ struct cfg80211_registered_device {
unsigned long suspend_at;
struct work_struct scan_done_wk;

+ void (*async_scan_cb)(struct net_device *);
+
#ifdef CONFIG_NL80211_TESTMODE
struct genl_info *testmode_info;
#endif
@@ -372,6 +374,10 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype ntype,
u32 *flags, struct vif_params *params);
void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
+int cfg80211_async_scan(struct wireless_dev *wdev,
+ struct cfg80211_scan_request *request,
+ bool set_cb_on_busy,
+ void (*cb)(struct net_device *));

struct ieee80211_channel *
rdev_fixed_channel(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 978cac3..e3d0957 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -40,7 +40,10 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak)
* Otherwise, wpa_supplicant gets completely confused with
* wext events.
*/
- cfg80211_sme_scan_done(dev);
+ if (rdev->async_scan_cb) {
+ rdev->async_scan_cb(dev);
+ rdev->async_scan_cb = NULL;
+ }

if (request->aborted)
nl80211_send_scan_aborted(rdev, dev);
@@ -655,6 +658,37 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
}
EXPORT_SYMBOL(cfg80211_unlink_bss);

+/* Do a scan, and schedule a callback to be run on completion. */
+int cfg80211_async_scan(struct wireless_dev *wdev,
+ struct cfg80211_scan_request *request,
+ bool set_cb_on_busy,
+ void (*cb)(struct net_device *))
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+ int err;
+
+ ASSERT_RDEV_LOCK(rdev);
+
+ if (rdev->scan_req) {
+ if (set_cb_on_busy)
+ rdev->async_scan_cb = cb;
+ return -EBUSY;
+ }
+
+ rdev->scan_req = request;
+ rdev->async_scan_cb = cb;
+
+ err = rdev->ops->scan(wdev->wiphy, wdev->netdev, request);
+ if (!err) {
+ nl80211_send_scan_start(rdev, wdev->netdev);
+ dev_hold(wdev->netdev);
+ } else {
+ rdev->scan_req = NULL;
+ }
+
+ return err;
+}
+
#ifdef CONFIG_CFG80211_WEXT
int cfg80211_wext_siwscan(struct net_device *dev,
struct iw_request_info *info,
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 17fde0d..2ef83b7 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -75,17 +75,12 @@ static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);

static int cfg80211_conn_scan(struct wireless_dev *wdev)
{
- struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
struct cfg80211_scan_request *request;
int n_channels, err;

ASSERT_RTNL();
- ASSERT_RDEV_LOCK(rdev);
ASSERT_WDEV_LOCK(wdev);

- if (rdev->scan_req)
- return -EBUSY;
-
if (wdev->conn->params.channel) {
n_channels = 1;
} else {
@@ -128,19 +123,14 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
request->ssids[0].ssid_len = wdev->conn->params.ssid_len;

request->dev = wdev->netdev;
- request->wiphy = &rdev->wiphy;
-
- rdev->scan_req = request;
+ request->wiphy = wdev->wiphy;

- err = rdev->ops->scan(wdev->wiphy, wdev->netdev, request);
- if (!err) {
+ err = cfg80211_async_scan(wdev, request, true, cfg80211_sme_scan_done);
+ if (!err)
wdev->conn->state = CFG80211_CONN_SCANNING;
- nl80211_send_scan_start(rdev, wdev->netdev);
- dev_hold(wdev->netdev);
- } else {
- rdev->scan_req = NULL;
+ else
kfree(request);
- }
+
return err;
}

--
1.6.4.4