2011-09-14 14:47:59

by Stanislaw Gruszka

[permalink] [raw]
Subject: [PATCH] ipw2x00: fix rtnl mutex deadlock

This fix regression introduced by:

commit: ecb4433550f0620f3d1471ae7099037ede30a91e
Author: Stanislaw Gruszka <[email protected]>
Date: Fri Aug 12 14:00:59 2011 +0200

mac80211: fix suspend/resume races with unregister hw

Above commit add rtnl_lock() into wiphy_register(), what cause deadlock
when initializing ipw2x00 driver, which itself call wiphy_register()
from register_netdev() internal callback with rtnl mutex taken.

To fix move wiphy_register() outside register_netdev(). This solution
have side effect of not creating /sys/class/net/wlanX/phy80211 link,
but that's a minor issue we can live with.

Bisected-by: Witold Baryluk <[email protected]>
Bisected-by: Michael Witten <[email protected]>
Tested-by: Witold Baryluk <[email protected]>
Tested-by: Michael Witten <[email protected]>
Signed-off-by: Stanislaw Gruszka <[email protected]>
---
ecb4433550f0620f3d1471ae7099037ede30a91e was CCed to stable but we
drop it there, when bug was reported. So this patch is only intended
to 3.1

drivers/net/wireless/ipw2x00/ipw2100.c | 21 +++++++++++-----
drivers/net/wireless/ipw2x00/ipw2200.c | 39 +++++++++++++++++++++----------
2 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index 3774dd0..ef9ad79 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -1903,15 +1903,17 @@ static void ipw2100_down(struct ipw2100_priv *priv)
static int ipw2100_net_init(struct net_device *dev)
{
struct ipw2100_priv *priv = libipw_priv(dev);
+
+ return ipw2100_up(priv, 1);
+}
+
+static int ipw2100_wdev_init(struct net_device *dev)
+{
+ struct ipw2100_priv *priv = libipw_priv(dev);
const struct libipw_geo *geo = libipw_get_geo(priv->ieee);
struct wireless_dev *wdev = &priv->ieee->wdev;
- int ret;
int i;

- ret = ipw2100_up(priv, 1);
- if (ret)
- return ret;
-
memcpy(wdev->wiphy->perm_addr, priv->mac_addr, ETH_ALEN);

/* fill-out priv->ieee->bg_band */
@@ -6350,9 +6352,13 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev,
"Error calling register_netdev.\n");
goto fail;
}
+ registered = 1;
+
+ err = ipw2100_wdev_init(dev);
+ if (err)
+ goto fail;

mutex_lock(&priv->action_mutex);
- registered = 1;

IPW_DEBUG_INFO("%s: Bound to %s\n", dev->name, pci_name(pci_dev));

@@ -6389,7 +6395,8 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev,

fail_unlock:
mutex_unlock(&priv->action_mutex);
-
+ wiphy_unregister(priv->ieee->wdev.wiphy);
+ kfree(priv->ieee->bg_band.channels);
fail:
if (dev) {
if (registered)
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index 87813c3..4ffebed 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -11425,16 +11425,23 @@ static void ipw_bg_down(struct work_struct *work)
/* Called by register_netdev() */
static int ipw_net_init(struct net_device *dev)
{
+ int rc = 0;
+ struct ipw_priv *priv = libipw_priv(dev);
+
+ mutex_lock(&priv->mutex);
+ if (ipw_up(priv))
+ rc = -EIO;
+ mutex_unlock(&priv->mutex);
+
+ return rc;
+}
+
+static int ipw_wdev_init(struct net_device *dev)
+{
int i, rc = 0;
struct ipw_priv *priv = libipw_priv(dev);
const struct libipw_geo *geo = libipw_get_geo(priv->ieee);
struct wireless_dev *wdev = &priv->ieee->wdev;
- mutex_lock(&priv->mutex);
-
- if (ipw_up(priv)) {
- rc = -EIO;
- goto out;
- }

memcpy(wdev->wiphy->perm_addr, priv->mac_addr, ETH_ALEN);

@@ -11519,13 +11526,9 @@ static int ipw_net_init(struct net_device *dev)
set_wiphy_dev(wdev->wiphy, &priv->pci_dev->dev);

/* With that information in place, we can now register the wiphy... */
- if (wiphy_register(wdev->wiphy)) {
+ if (wiphy_register(wdev->wiphy))
rc = -EIO;
- goto out;
- }
-
out:
- mutex_unlock(&priv->mutex);
return rc;
}

@@ -11832,14 +11835,22 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev,
goto out_remove_sysfs;
}

+ err = ipw_wdev_init(net_dev);
+ if (err) {
+ IPW_ERROR("failed to register wireless device\n");
+ goto out_unregister_netdev;
+ }
+
#ifdef CONFIG_IPW2200_PROMISCUOUS
if (rtap_iface) {
err = ipw_prom_alloc(priv);
if (err) {
IPW_ERROR("Failed to register promiscuous network "
"device (error %d).\n", err);
- unregister_netdev(priv->net_dev);
- goto out_remove_sysfs;
+ wiphy_unregister(priv->ieee->wdev.wiphy);
+ kfree(priv->ieee->a_band.channels);
+ kfree(priv->ieee->bg_band.channels);
+ goto out_unregister_netdev;
}
}
#endif
@@ -11851,6 +11862,8 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev,

return 0;

+ out_unregister_netdev:
+ unregister_netdev(priv->net_dev);
out_remove_sysfs:
sysfs_remove_group(&pdev->dev.kobj, &ipw_attribute_group);
out_release_irq:
--
1.7.1



2011-09-15 21:22:55

by Witold Baryluk

[permalink] [raw]
Subject: Re: [PATCH] ipw2x00: fix rtnl mutex deadlock

On 09-15 09:42, Stanislaw Gruszka wrote:
> On Wed, Sep 14, 2011 at 12:22:40PM -0500, Dan Williams wrote:
> > On Wed, 2011-09-14 at 16:47 +0200, Stanislaw Gruszka wrote:
> > > This fix regression introduced by:
> > >
> > > commit: ecb4433550f0620f3d1471ae7099037ede30a91e
> > > Author: Stanislaw Gruszka <[email protected]>
> > > Date: Fri Aug 12 14:00:59 2011 +0200
> > >
> > > mac80211: fix suspend/resume races with unregister hw
> > >
> > > Above commit add rtnl_lock() into wiphy_register(), what cause deadlock
> > > when initializing ipw2x00 driver, which itself call wiphy_register()
> > > from register_netdev() internal callback with rtnl mutex taken.
> > >
> > > To fix move wiphy_register() outside register_netdev(). This solution
> > > have side effect of not creating /sys/class/net/wlanX/phy80211 link,
> > > but that's a minor issue we can live with.
> >
> > Unfortunately that path is one of the ways that programs distinguish
> > wifi devices from plain ethernet devices. The other way is to poke it
> > with WEXT. Should poking it with nl80211 be added to the mix instead?
>
> Not sure, maybe device/ieee80211/phy0 could be used?
>
> [stasiu@dhcp-27-172 ~]$ readlink -f /sys/class/net/wlan0/phy80211
> /sys/devices/pci0000:00/0000:00:1c.1/0000:03:00.0/ieee80211/phy0
> [stasiu@dhcp-27-172 ~]$ readlink -f /sys/class/net/wlan0/device/ieee80211/phy0
> /sys/devices/pci0000:00/0000:00:1c.1/0000:03:00.0/ieee80211/phy0

I conifrm that (with patch applied)
/sys/class/net/eth1/device/ieee80211/phy0 does exist
(both ieee80211 and phy0 are real directories not symlinks,
only eth1 -> ../../devices/pci0000:00/0000:00:1e.0/0000:0b:02.0/net/eth1,
and device -> ../../../0000:0b:02.0 are symlinks),
only .../eth1/phy80211 symlinks is missing.

I am using NetworkManafer 0.9.0-1 and do not have any visible
problems now.

Regards,
Witek

--
Witold Baryluk


Attachments:
(No filename) (1.84 kB)
signature.asc (198.00 B)
Digital signature
Download all attachments

2011-09-15 08:35:13

by Stanislaw Gruszka

[permalink] [raw]
Subject: Re: [PATCH] ipw2x00: fix rtnl mutex deadlock

On Wed, Sep 14, 2011 at 12:22:40PM -0500, Dan Williams wrote:
> On Wed, 2011-09-14 at 16:47 +0200, Stanislaw Gruszka wrote:
> > This fix regression introduced by:
> >
> > commit: ecb4433550f0620f3d1471ae7099037ede30a91e
> > Author: Stanislaw Gruszka <[email protected]>
> > Date: Fri Aug 12 14:00:59 2011 +0200
> >
> > mac80211: fix suspend/resume races with unregister hw
> >
> > Above commit add rtnl_lock() into wiphy_register(), what cause deadlock
> > when initializing ipw2x00 driver, which itself call wiphy_register()
> > from register_netdev() internal callback with rtnl mutex taken.
> >
> > To fix move wiphy_register() outside register_netdev(). This solution
> > have side effect of not creating /sys/class/net/wlanX/phy80211 link,
> > but that's a minor issue we can live with.
>
> Unfortunately that path is one of the ways that programs distinguish
> wifi devices from plain ethernet devices. The other way is to poke it
> with WEXT. Should poking it with nl80211 be added to the mix instead?

Not sure, maybe device/ieee80211/phy0 could be used?

[stasiu@dhcp-27-172 ~]$ readlink -f /sys/class/net/wlan0/phy80211
/sys/devices/pci0000:00/0000:00:1c.1/0000:03:00.0/ieee80211/phy0
[stasiu@dhcp-27-172 ~]$ readlink -f /sys/class/net/wlan0/device/ieee80211/phy0
/sys/devices/pci0000:00/0000:00:1c.1/0000:03:00.0/ieee80211/phy0

> I guess for ipw2x00 it's not an issue since the driver depends on WEXT
> and thus the WEXT method will always work.

Hopefully yes.

Generally this link creation problem is fixable, but requires some more
rumble in ipw2x00 driver, which is old and orphaned, hence I'm reluctant
to modify it in some more way.

Stanislaw

2011-09-14 17:19:33

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCH] ipw2x00: fix rtnl mutex deadlock

On Wed, 2011-09-14 at 16:47 +0200, Stanislaw Gruszka wrote:
> This fix regression introduced by:
>
> commit: ecb4433550f0620f3d1471ae7099037ede30a91e
> Author: Stanislaw Gruszka <[email protected]>
> Date: Fri Aug 12 14:00:59 2011 +0200
>
> mac80211: fix suspend/resume races with unregister hw
>
> Above commit add rtnl_lock() into wiphy_register(), what cause deadlock
> when initializing ipw2x00 driver, which itself call wiphy_register()
> from register_netdev() internal callback with rtnl mutex taken.
>
> To fix move wiphy_register() outside register_netdev(). This solution
> have side effect of not creating /sys/class/net/wlanX/phy80211 link,
> but that's a minor issue we can live with.

Unfortunately that path is one of the ways that programs distinguish
wifi devices from plain ethernet devices. The other way is to poke it
with WEXT. Should poking it with nl80211 be added to the mix instead?
I guess for ipw2x00 it's not an issue since the driver depends on WEXT
and thus the WEXT method will always work. But this special usage of
the phy80211 link is something to remember.

Dan

> Bisected-by: Witold Baryluk <[email protected]>
> Bisected-by: Michael Witten <[email protected]>
> Tested-by: Witold Baryluk <[email protected]>
> Tested-by: Michael Witten <[email protected]>
> Signed-off-by: Stanislaw Gruszka <[email protected]>
> ---
> ecb4433550f0620f3d1471ae7099037ede30a91e was CCed to stable but we
> drop it there, when bug was reported. So this patch is only intended
> to 3.1
>
> drivers/net/wireless/ipw2x00/ipw2100.c | 21 +++++++++++-----
> drivers/net/wireless/ipw2x00/ipw2200.c | 39 +++++++++++++++++++++----------
> 2 files changed, 40 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
> index 3774dd0..ef9ad79 100644
> --- a/drivers/net/wireless/ipw2x00/ipw2100.c
> +++ b/drivers/net/wireless/ipw2x00/ipw2100.c
> @@ -1903,15 +1903,17 @@ static void ipw2100_down(struct ipw2100_priv *priv)
> static int ipw2100_net_init(struct net_device *dev)
> {
> struct ipw2100_priv *priv = libipw_priv(dev);
> +
> + return ipw2100_up(priv, 1);
> +}
> +
> +static int ipw2100_wdev_init(struct net_device *dev)
> +{
> + struct ipw2100_priv *priv = libipw_priv(dev);
> const struct libipw_geo *geo = libipw_get_geo(priv->ieee);
> struct wireless_dev *wdev = &priv->ieee->wdev;
> - int ret;
> int i;
>
> - ret = ipw2100_up(priv, 1);
> - if (ret)
> - return ret;
> -
> memcpy(wdev->wiphy->perm_addr, priv->mac_addr, ETH_ALEN);
>
> /* fill-out priv->ieee->bg_band */
> @@ -6350,9 +6352,13 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev,
> "Error calling register_netdev.\n");
> goto fail;
> }
> + registered = 1;
> +
> + err = ipw2100_wdev_init(dev);
> + if (err)
> + goto fail;
>
> mutex_lock(&priv->action_mutex);
> - registered = 1;
>
> IPW_DEBUG_INFO("%s: Bound to %s\n", dev->name, pci_name(pci_dev));
>
> @@ -6389,7 +6395,8 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev,
>
> fail_unlock:
> mutex_unlock(&priv->action_mutex);
> -
> + wiphy_unregister(priv->ieee->wdev.wiphy);
> + kfree(priv->ieee->bg_band.channels);
> fail:
> if (dev) {
> if (registered)
> diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
> index 87813c3..4ffebed 100644
> --- a/drivers/net/wireless/ipw2x00/ipw2200.c
> +++ b/drivers/net/wireless/ipw2x00/ipw2200.c
> @@ -11425,16 +11425,23 @@ static void ipw_bg_down(struct work_struct *work)
> /* Called by register_netdev() */
> static int ipw_net_init(struct net_device *dev)
> {
> + int rc = 0;
> + struct ipw_priv *priv = libipw_priv(dev);
> +
> + mutex_lock(&priv->mutex);
> + if (ipw_up(priv))
> + rc = -EIO;
> + mutex_unlock(&priv->mutex);
> +
> + return rc;
> +}
> +
> +static int ipw_wdev_init(struct net_device *dev)
> +{
> int i, rc = 0;
> struct ipw_priv *priv = libipw_priv(dev);
> const struct libipw_geo *geo = libipw_get_geo(priv->ieee);
> struct wireless_dev *wdev = &priv->ieee->wdev;
> - mutex_lock(&priv->mutex);
> -
> - if (ipw_up(priv)) {
> - rc = -EIO;
> - goto out;
> - }
>
> memcpy(wdev->wiphy->perm_addr, priv->mac_addr, ETH_ALEN);
>
> @@ -11519,13 +11526,9 @@ static int ipw_net_init(struct net_device *dev)
> set_wiphy_dev(wdev->wiphy, &priv->pci_dev->dev);
>
> /* With that information in place, we can now register the wiphy... */
> - if (wiphy_register(wdev->wiphy)) {
> + if (wiphy_register(wdev->wiphy))
> rc = -EIO;
> - goto out;
> - }
> -
> out:
> - mutex_unlock(&priv->mutex);
> return rc;
> }
>
> @@ -11832,14 +11835,22 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev,
> goto out_remove_sysfs;
> }
>
> + err = ipw_wdev_init(net_dev);
> + if (err) {
> + IPW_ERROR("failed to register wireless device\n");
> + goto out_unregister_netdev;
> + }
> +
> #ifdef CONFIG_IPW2200_PROMISCUOUS
> if (rtap_iface) {
> err = ipw_prom_alloc(priv);
> if (err) {
> IPW_ERROR("Failed to register promiscuous network "
> "device (error %d).\n", err);
> - unregister_netdev(priv->net_dev);
> - goto out_remove_sysfs;
> + wiphy_unregister(priv->ieee->wdev.wiphy);
> + kfree(priv->ieee->a_band.channels);
> + kfree(priv->ieee->bg_band.channels);
> + goto out_unregister_netdev;
> }
> }
> #endif
> @@ -11851,6 +11862,8 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev,
>
> return 0;
>
> + out_unregister_netdev:
> + unregister_netdev(priv->net_dev);
> out_remove_sysfs:
> sysfs_remove_group(&pdev->dev.kobj, &ipw_attribute_group);
> out_release_irq: