2010-04-05 09:12:58

by Cong Wang

[permalink] [raw]
Subject: [v2 Patch 1/3] netpoll: add generic support for bridge and bonding devices

V2:
Fix some bugs of previous version.
Remove ->netpoll_setup and ->netpoll_xmit, they are not necessary.
Don't poll all underlying devices, poll ->real_dev in struct netpoll.
Thanks to David for suggesting above.

--------->

This whole patchset is for adding netpoll support to bridge and bonding
devices. I already tested it for bridge, bonding, bridge over bonding,
and bonding over bridge. It looks fine now.

Please comment.


To make bridge and bonding support netpoll, we need to adjust
some netpoll generic code. This patch does the following things:

1) introduce two new priv_flags for struct net_device:
IFF_IN_NETPOLL which identifies we are processing a netpoll;
IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
at run-time;

2) introduce one new method for netdev_ops:
->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
removed.

3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
export netpoll_send_skb() and netpoll_poll_dev() which will be used later;

4) hide a pointer to struct netpoll in struct netpoll_info, ditto.

5) introduce ->real_dev for struct netpoll.

Cc: David Miller <[email protected]>
Cc: Neil Horman <[email protected]>
Signed-off-by: WANG Cong <[email protected]>

---

Index: linux-2.6/include/linux/if.h
===================================================================
--- linux-2.6.orig/include/linux/if.h
+++ linux-2.6/include/linux/if.h
@@ -71,6 +71,8 @@
* release skb->dst
*/
#define IFF_DONT_BRIDGE 0x800 /* disallow bridging this ether dev */
+#define IFF_IN_NETPOLL 0x1000 /* whether we are processing netpoll */
+#define IFF_DISABLE_NETPOLL 0x2000 /* disable netpoll at run-time */

#define IF_GET_IFACE 0x0001 /* for querying only */
#define IF_GET_PROTO 0x0002
Index: linux-2.6/include/linux/netdevice.h
===================================================================
--- linux-2.6.orig/include/linux/netdevice.h
+++ linux-2.6/include/linux/netdevice.h
@@ -667,6 +667,7 @@ struct net_device_ops {
unsigned short vid);
#ifdef CONFIG_NET_POLL_CONTROLLER
void (*ndo_poll_controller)(struct net_device *dev);
+ void (*ndo_netpoll_cleanup)(struct net_device *dev);
#endif
int (*ndo_set_vf_mac)(struct net_device *dev,
int queue, u8 *mac);
Index: linux-2.6/include/linux/netpoll.h
===================================================================
--- linux-2.6.orig/include/linux/netpoll.h
+++ linux-2.6/include/linux/netpoll.h
@@ -14,6 +14,7 @@

struct netpoll {
struct net_device *dev;
+ struct net_device *real_dev;
char dev_name[IFNAMSIZ];
const char *name;
void (*rx_hook)(struct netpoll *, int, char *, int);
@@ -36,8 +37,11 @@ struct netpoll_info {
struct sk_buff_head txq;

struct delayed_work tx_work;
+
+ struct netpoll *netpoll;
};

+void netpoll_poll_dev(struct net_device *dev);
void netpoll_poll(struct netpoll *np);
void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
void netpoll_print_options(struct netpoll *np);
@@ -47,6 +51,7 @@ int netpoll_trap(void);
void netpoll_set_trap(int trap);
void netpoll_cleanup(struct netpoll *np);
int __netpoll_rx(struct sk_buff *skb);
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);


#ifdef CONFIG_NETPOLL
Index: linux-2.6/net/core/netpoll.c
===================================================================
--- linux-2.6.orig/net/core/netpoll.c
+++ linux-2.6/net/core/netpoll.c
@@ -178,9 +178,8 @@ static void service_arp_queue(struct net
}
}

-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
{
- struct net_device *dev = np->dev;
const struct net_device_ops *ops;

if (!dev || !netif_running(dev))
@@ -200,6 +199,11 @@ void netpoll_poll(struct netpoll *np)
zap_completion_queue();
}

+void netpoll_poll(struct netpoll *np)
+{
+ netpoll_poll_dev(np->dev);
+}
+
static void refill_skbs(void)
{
struct sk_buff *skb;
@@ -281,7 +285,7 @@ static int netpoll_owner_active(struct n
return 0;
}

-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
int status = NETDEV_TX_BUSY;
unsigned long tries;
@@ -307,7 +311,9 @@ static void netpoll_send_skb(struct netp
tries > 0; --tries) {
if (__netif_tx_trylock(txq)) {
if (!netif_tx_queue_stopped(txq)) {
+ dev->priv_flags |= IFF_IN_NETPOLL;
status = ops->ndo_start_xmit(skb, dev);
+ dev->priv_flags &= ~IFF_IN_NETPOLL;
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
}
@@ -755,7 +761,10 @@ int netpoll_setup(struct netpoll *np)
atomic_inc(&npinfo->refcnt);
}

- if (!ndev->netdev_ops->ndo_poll_controller) {
+ npinfo->netpoll = np;
+
+ if (ndev->priv_flags & IFF_DISABLE_NETPOLL
+ || !ndev->netdev_ops->ndo_poll_controller) {
printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
np->name, np->dev_name);
err = -ENOTSUPP;
@@ -877,6 +886,7 @@ void netpoll_cleanup(struct netpoll *np)
}

if (atomic_dec_and_test(&npinfo->refcnt)) {
+ const struct net_device_ops *ops;
skb_queue_purge(&npinfo->arp_tx);
skb_queue_purge(&npinfo->txq);
cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -884,7 +894,11 @@ void netpoll_cleanup(struct netpoll *np)
/* clean after last, unfinished work */
__skb_queue_purge(&npinfo->txq);
kfree(npinfo);
- np->dev->npinfo = NULL;
+ ops = np->dev->netdev_ops;
+ if (ops->ndo_netpoll_cleanup)
+ ops->ndo_netpoll_cleanup(np->dev);
+ else
+ np->dev->npinfo = NULL;
}
}

@@ -907,6 +921,7 @@ void netpoll_set_trap(int trap)
atomic_dec(&trapped);
}

+EXPORT_SYMBOL(netpoll_send_skb);
EXPORT_SYMBOL(netpoll_set_trap);
EXPORT_SYMBOL(netpoll_trap);
EXPORT_SYMBOL(netpoll_print_options);
@@ -914,4 +929,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
EXPORT_SYMBOL(netpoll_setup);
EXPORT_SYMBOL(netpoll_cleanup);
EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll_dev);
EXPORT_SYMBOL(netpoll_poll);


2010-04-05 09:13:10

by Cong Wang

[permalink] [raw]
Subject: [v2 Patch 2/3] bridge: make bridge support netpoll


Based on the previous patch, make bridge support netpoll by:

1) implement the 2 methods to support netpoll for bridge;

2) modify netpoll during forwarding packets via bridge;

3) disable netpoll support of bridge when a netpoll-unabled device
is added to bridge;

4) enable netpoll support when all underlying devices support netpoll.

Cc: David Miller <[email protected]>
Cc: Neil Horman <[email protected]>
Cc: Stephen Hemminger <[email protected]>
Cc: Matt Mackall <[email protected]>
Signed-off-by: WANG Cong <[email protected]>

---

Index: linux-2.6/net/bridge/br_device.c
===================================================================
--- linux-2.6.orig/net/bridge/br_device.c
+++ linux-2.6/net/bridge/br_device.c
@@ -13,8 +13,10 @@

#include <linux/kernel.h>
#include <linux/netdevice.h>
+#include <linux/netpoll.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
+#include <linux/list.h>

#include <asm/uaccess.h>
#include "br_private.h"
@@ -162,6 +164,59 @@ static int br_set_tx_csum(struct net_dev
return 0;
}

+#ifdef CONFIG_NET_POLL_CONTROLLER
+bool br_devices_support_netpoll(struct net_bridge *br)
+{
+ struct net_bridge_port *p;
+ bool ret = true;
+ int count = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&br->lock, flags);
+ list_for_each_entry(p, &br->port_list, list) {
+ count++;
+ if (p->dev->priv_flags & IFF_DISABLE_NETPOLL
+ || !p->dev->netdev_ops->ndo_poll_controller)
+ ret = false;
+ }
+ spin_unlock_irqrestore(&br->lock, flags);
+ return count != 0 && ret;
+}
+
+static void br_poll_controller(struct net_device *br_dev)
+{
+ struct netpoll *np = br_dev->npinfo->netpoll;
+
+ if (np->real_dev != br_dev)
+ netpoll_poll_dev(np->real_dev);
+}
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+ struct net_bridge *br = netdev_priv(br_dev);
+ struct net_bridge_port *p, *n;
+ const struct net_device_ops *ops;
+
+ br->dev->npinfo = NULL;
+ list_for_each_entry_safe(p, n, &br->port_list, list) {
+ if (p->dev) {
+ ops = p->dev->netdev_ops;
+ if (ops->ndo_netpoll_cleanup)
+ ops->ndo_netpoll_cleanup(p->dev);
+ else
+ p->dev->npinfo = NULL;
+ }
+ }
+}
+
+#else
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+}
+
+#endif
+
static const struct ethtool_ops br_ethtool_ops = {
.get_drvinfo = br_getinfo,
.get_link = ethtool_op_get_link,
@@ -184,6 +239,10 @@ static const struct net_device_ops br_ne
.ndo_set_multicast_list = br_dev_set_multicast_list,
.ndo_change_mtu = br_change_mtu,
.ndo_do_ioctl = br_dev_ioctl,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_netpoll_cleanup = br_netpoll_cleanup,
+ .ndo_poll_controller = br_poll_controller,
+#endif
};

void br_dev_setup(struct net_device *dev)
Index: linux-2.6/net/bridge/br_forward.c
===================================================================
--- linux-2.6.orig/net/bridge/br_forward.c
+++ linux-2.6/net/bridge/br_forward.c
@@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
+#include <linux/netpoll.h>
#include <linux/skbuff.h>
#include <linux/if_vlan.h>
#include <linux/netfilter_bridge.h>
@@ -49,7 +50,13 @@ int br_dev_queue_push_xmit(struct sk_buf
else {
skb_push(skb, ETH_HLEN);

- dev_queue_xmit(skb);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (skb->dev->priv_flags & IFF_IN_NETPOLL) {
+ netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
+ skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
+ } else
+#endif
+ dev_queue_xmit(skb);
}
}

@@ -65,9 +72,23 @@ int br_forward_finish(struct sk_buff *sk

static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ struct net_bridge *br = to->br;
+ if (br->dev->priv_flags & IFF_IN_NETPOLL) {
+ struct netpoll *np;
+ to->dev->npinfo = skb->dev->npinfo;
+ np = skb->dev->npinfo->netpoll;
+ np->real_dev = np->dev = to->dev;
+ to->dev->priv_flags |= IFF_IN_NETPOLL;
+ }
+#endif
skb->dev = to->dev;
NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
br_forward_finish);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (skb->dev->npinfo)
+ skb->dev->npinfo->netpoll->dev = br->dev;
+#endif
}

static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
Index: linux-2.6/net/bridge/br_if.c
===================================================================
--- linux-2.6.orig/net/bridge/br_if.c
+++ linux-2.6/net/bridge/br_if.c
@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/rtnetlink.h>
#include <linux/if_ether.h>
+#include <linux/netpoll.h>
#include <net/sock.h>

#include "br_private.h"
@@ -152,6 +153,14 @@ static void del_nbp(struct net_bridge_po
kobject_uevent(&p->kobj, KOBJ_REMOVE);
kobject_del(&p->kobj);

+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (br_devices_support_netpoll(br))
+ br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+ if (dev->netdev_ops->ndo_netpoll_cleanup)
+ dev->netdev_ops->ndo_netpoll_cleanup(dev);
+ else
+ dev->npinfo = NULL;
+#endif
call_rcu(&p->rcu, destroy_nbp_rcu);
}

@@ -164,6 +173,8 @@ static void del_br(struct net_bridge *br
del_nbp(p);
}

+ br_netpoll_cleanup(br->dev);
+
del_timer_sync(&br->gc_timer);

br_sysfs_delbr(br->dev);
@@ -437,6 +448,20 @@ int br_add_if(struct net_bridge *br, str

kobject_uevent(&p->kobj, KOBJ_ADD);

+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (br_devices_support_netpoll(br)) {
+ br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+ if (br->dev->npinfo)
+ dev->npinfo = br->dev->npinfo;
+ } else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+ br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
+ printk(KERN_INFO "New device %s does not support netpoll\n",
+ dev->name);
+ printk(KERN_INFO "Disabling netpoll for %s\n",
+ br->dev->name);
+ }
+#endif
+
return 0;
err2:
br_fdb_delete_by_port(br, p, 1);
Index: linux-2.6/net/bridge/br_private.h
===================================================================
--- linux-2.6.orig/net/bridge/br_private.h
+++ linux-2.6/net/bridge/br_private.h
@@ -233,6 +233,8 @@ static inline int br_is_root_bridge(cons
extern void br_dev_setup(struct net_device *dev);
extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
struct net_device *dev);
+extern bool br_devices_support_netpoll(struct net_bridge *br);
+extern void br_netpoll_cleanup(struct net_device *br_dev);

/* br_fdb.c */
extern int br_fdb_init(void);

2010-04-05 09:13:20

by Cong Wang

[permalink] [raw]
Subject: [v2 Patch 3/3] bonding: make bonding support netpoll


Based on Andy's work, but I modified a lot.

Similar to the patch for bridge, this patch does:

1) implement the 2 methods to support netpoll for bonding;

2) modify netpoll during forwarding packets via bonding;

3) disable netpoll support of bonding when a netpoll-unabled device
is added to bonding;

4) enable netpoll support when all underlying devices support netpoll.

Cc: Andy Gospodarek <[email protected]>
Cc: Jeff Moyer <[email protected]>
Cc: Matt Mackall <[email protected]>
Cc: Neil Horman <[email protected]>
Cc: Jay Vosburgh <[email protected]>
Cc: David Miller <[email protected]>
Signed-off-by: WANG Cong <[email protected]>

---

Index: linux-2.6/drivers/net/bonding/bond_main.c
===================================================================
--- linux-2.6.orig/drivers/net/bonding/bond_main.c
+++ linux-2.6/drivers/net/bonding/bond_main.c
@@ -59,6 +59,7 @@
#include <linux/uaccess.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
+#include <linux/netpoll.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/etherdevice.h>
@@ -430,7 +431,18 @@ int bond_dev_queue_xmit(struct bonding *
}

skb->priority = 1;
- dev_queue_xmit(skb);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (bond->dev->priv_flags & IFF_IN_NETPOLL) {
+ struct netpoll *np = bond->dev->npinfo->netpoll;
+ slave_dev->npinfo = bond->dev->npinfo;
+ np->real_dev = np->dev = skb->dev;
+ slave_dev->priv_flags |= IFF_IN_NETPOLL;
+ netpoll_send_skb(np, skb);
+ slave_dev->priv_flags &= ~IFF_IN_NETPOLL;
+ np->dev = bond->dev;
+ } else
+#endif
+ dev_queue_xmit(skb);

return 0;
}
@@ -1329,6 +1341,60 @@ static void bond_detach_slave(struct bon
bond->slave_cnt--;
}

+#ifdef CONFIG_NET_POLL_CONTROLLER
+static bool slaves_support_netpoll(struct net_device *bond_dev)
+{
+ struct bonding *bond = netdev_priv(bond_dev);
+ struct slave *slave;
+ int i = 0;
+ bool ret = true;
+
+ read_lock(&bond->lock);
+ bond_for_each_slave(bond, slave, i) {
+ if ((slave->dev->priv_flags & IFF_DISABLE_NETPOLL)
+ || !slave->dev->netdev_ops->ndo_poll_controller)
+ ret = false;
+ }
+ read_unlock(&bond->lock);
+ return i != 0 && ret;
+}
+
+static void bond_poll_controller(struct net_device *bond_dev)
+{
+ struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
+ if (dev != bond_dev)
+ netpoll_poll_dev(dev);
+}
+
+static void bond_netpoll_cleanup(struct net_device *bond_dev)
+{
+ struct bonding *bond = netdev_priv(bond_dev);
+ struct slave *slave;
+ const struct net_device_ops *ops;
+ int i;
+
+ read_lock(&bond->lock);
+ bond_dev->npinfo = NULL;
+ bond_for_each_slave(bond, slave, i) {
+ if (slave->dev) {
+ ops = slave->dev->netdev_ops;
+ if (ops->ndo_netpoll_cleanup)
+ ops->ndo_netpoll_cleanup(slave->dev);
+ else
+ slave->dev->npinfo = NULL;
+ }
+ }
+ read_unlock(&bond->lock);
+}
+
+#else
+
+static void bond_netpoll_cleanup(struct net_device *bond_dev)
+{
+}
+
+#endif
+
/*---------------------------------- IOCTL ----------------------------------*/

static int bond_sethwaddr(struct net_device *bond_dev,
@@ -1746,6 +1812,18 @@ int bond_enslave(struct net_device *bond
new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup",
new_slave->link != BOND_LINK_DOWN ? "n up" : " down");

+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (slaves_support_netpoll(bond_dev)) {
+ bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+ if (bond_dev->npinfo)
+ slave_dev->npinfo = bond_dev->npinfo;
+ } else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+ bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
+ pr_info("New slave device %s does not support netpoll\n",
+ slave_dev->name);
+ pr_info("Disabling netpoll support for %s\n", bond_dev->name);
+ }
+#endif
/* enslave is successful */
return 0;

@@ -1929,6 +2007,15 @@ int bond_release(struct net_device *bond

netdev_set_master(slave_dev, NULL);

+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (slaves_support_netpoll(bond_dev))
+ bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+ if (slave_dev->netdev_ops->ndo_netpoll_cleanup)
+ slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev);
+ else
+ slave_dev->npinfo = NULL;
+#endif
+
/* close slave before restoring its mac address */
dev_close(slave_dev);

@@ -4448,6 +4535,10 @@ static const struct net_device_ops bond_
.ndo_vlan_rx_register = bond_vlan_rx_register,
.ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_netpoll_cleanup = bond_netpoll_cleanup,
+ .ndo_poll_controller = bond_poll_controller,
+#endif
};

static void bond_setup(struct net_device *bond_dev)
@@ -4533,6 +4624,8 @@ static void bond_uninit(struct net_devic
{
struct bonding *bond = netdev_priv(bond_dev);

+ bond_netpoll_cleanup(bond_dev);
+
/* Release the bonded slaves */
bond_release_all(bond_dev);

2010-04-05 19:45:19

by Andy Gospodarek

[permalink] [raw]
Subject: Re: [v2 Patch 3/3] bonding: make bonding support netpoll

On Mon, Apr 05, 2010 at 05:12:40AM -0400, Amerigo Wang wrote:
>
> Based on Andy's work, but I modified a lot.
>
> Similar to the patch for bridge, this patch does:
>
> 1) implement the 2 methods to support netpoll for bonding;
>
> 2) modify netpoll during forwarding packets via bonding;
>
> 3) disable netpoll support of bonding when a netpoll-unabled device
> is added to bonding;
>
> 4) enable netpoll support when all underlying devices support netpoll.
>
> Cc: Andy Gospodarek <[email protected]>
> Cc: Jeff Moyer <[email protected]>
> Cc: Matt Mackall <[email protected]>
> Cc: Neil Horman <[email protected]>
> Cc: Jay Vosburgh <[email protected]>
> Cc: David Miller <[email protected]>
> Signed-off-by: WANG Cong <[email protected]>
>

I tried these patches on top of Linus' latest tree and still get
deadlocks. Your line numbers might differ a bit, but you should be
seeing them too.

# echo 7 4 1 7 > /proc/sys/kernel/printk
# ifup bond0
bonding: bond0: setting mode to balance-rr (0).
bonding: bond0: Setting MII monitoring interval to 1000.
ADDRCONF(NETDEV_UP): bond0: link is not ready
bonding: bond0: Adding slave eth4.
bnx2 0000:10:00.0: eth4: using MSIX
bonding: bond0: enslaving eth4 as an active interface with a down link.
bonding: bond0: Adding slave eth5.
bnx2 0000:10:00.1: eth5: using MSIX
bonding: bond0: enslaving eth5 as an active interface with a down link.
bnx2 0000:10:00.0: eth4: NIC Copper Link is Up, 100 Mbps full duplex,
receive & transmit flow control ON
bonding: bond0: link status definitely up for interface eth4.
ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready
bnx2 0000:10:00.1: eth5: NIC Copper Link is Up, 100 Mbps full duplex,
receive & transmit flow control ON
bond0: IPv6 duplicate address fe80::210:18ff:fe36:ad4 detected!
bonding: bond0: link status definitely up for interface eth5.
# cat /proc/net/bonding/bond0
Ethernet Channel Bonding Driver: v3.6.0 (September 26, 2009)

Bonding Mode: load balancing (round-robin)
MII Status: up
MII Polling Interval (ms): 1000
Up Delay (ms): 0
Down Delay (ms): 0

Slave Interface: eth4
MII Status: up
Link Failure Count: 0
Permanent HW addr: 00:10:18:36:0a:d4

Slave Interface: eth5
MII Status: up
Link Failure Count: 0
Permanent HW addr: 00:10:18:36:0a:d6
# modprobe netconsole
netconsole: local port 1234
netconsole: local IP 10.0.100.2
netconsole: interface 'bond0'
netconsole: remote port 6666
netconsole: remote IP 10.0.100.1
netconsole: remote ethernet address 00:e0:81:71:ee:aa
console [netcon0] enabled
netconsole: network logging started
# echo -eth4 > /sys/class/net/bond0/bonding/slaves
bonding: bond0: Removing slave eth4

[ now the system is hung ]

My suspicion from dealing with this problem in the past is that there is
contention over bond->lock.

Since there statements that will result in netconsole messages inside
the write_lock_bh in bond_release:

1882 write_lock_bh(&bond->lock);
1883
1884 slave = bond_get_slave_by_dev(bond, slave_dev);
1885 if (!slave) {
1886 /* not a slave of this bond */
1887 pr_info("%s: %s not enslaved\n",
1888 bond_dev->name, slave_dev->name);
1889 write_unlock_bh(&bond->lock);
1890 return -EINVAL;
1891 }
1892
1893 if (!bond->params.fail_over_mac) {
1894 if (!compare_ether_addr(bond_dev->dev_addr, slave->perm_hwaddr) &&
1895 bond->slave_cnt > 1)
1896 pr_warning("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s.

we are getting stuck at 1986 since bond_xmit_roundrobin (in my case)
will try and acquire bond->lock for reading.

One valuable aspect netpoll_start_xmit routine was that is could be used
to check to be sure that bond->lock could be taken for writing. This
made us sure that we were not in a call stack that has already taken the
lock and queuing the skb to be sent later would prevent the imminent
deadlock.

A way to prevent this is needed and a first-pass might be to do
something similar to what I below above for all the xmit routines. I
confirmed the following patch prevents that deadlock:

# git diff drivers/net/bonding/
diff --git a/drivers/net/bonding/bond_main.c
b/drivers/net/bonding/bond_main.c
index 4a41886..53b39cc 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4232,7 +4232,8 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struc
int i, slave_no, res = 1;
struct iphdr *iph = ip_hdr(skb);

- read_lock(&bond->lock);
+ if (!read_trylock(&bond->lock))
+ return NETDEV_TX_BUSY;

if (!BOND_IS_OK(bond))
goto out;

The kernel no longer hangs, but a new warning message shows up (over
netconsole even!):

------------[ cut here ]------------
WARNING: at kernel/softirq.c:143 local_bh_enable+0x43/0xba()
Hardware name: HP xw4400 Workstation
Modules linked in: tg3 netconsole bonding ipt_REJECT bridge stp autofs4
i2c_dev i2c_core hidp rfcomm l2cap crc16 bluetooth rfkill sunrpc 8021q
iptable_filter ip_tables ip6t_REJECT xt_tcpudp ip6table_filter
ip6_tables x_tables ipv6 cpufreq_ondemand acpi_cpufreq dm_multipath
video output sbs sbshc battery acpi_memhotplug ac lp sg ide_cd_mod
tpm_tis rtc_cmos rtc_core serio_raw cdrom libphy e1000e floppy
parport_pc parport button tpm tpm_bios bnx2 rtc_lib tulip pcspkr shpchp
dm_snapshot dm_zero dm_mirror dm_region_hash dm_log dm_mod ata_piix ahci
libata sd_mod scsi_mod ext3 jbd uhci_hcd ohci_hcd ehci_hcd [last
unloaded: tg3]
Pid: 9, comm: events/0 Not tainted 2.6.34-rc3 #6
Call Trace:
[<ffffffff81058754>] ? cpu_clock+0x2d/0x41
[<ffffffff810404d9>] ? local_bh_enable+0x43/0xba
[<ffffffff8103a350>] warn_slowpath_common+0x77/0x8f
[<ffffffff812a4659>] ? dev_queue_xmit+0x408/0x467
[<ffffffff8103a377>] warn_slowpath_null+0xf/0x11
[<ffffffff810404d9>] local_bh_enable+0x43/0xba
[<ffffffff812a4659>] dev_queue_xmit+0x408/0x467
[<ffffffff812a435e>] ? dev_queue_xmit+0x10d/0x467
[<ffffffffa04a3868>] bond_dev_queue_xmit+0x1cd/0x1f9 [bonding]
[<ffffffffa04a4217>] bond_start_xmit+0x139/0x3e9 [bonding]
[<ffffffff812b0e9a>] queue_process+0xa8/0x160
[<ffffffff812b0df2>] ? queue_process+0x0/0x160
[<ffffffff81003794>] kernel_thread_helper+0x4/0x10
[<ffffffff813362bc>] ? restore_args+0x0/0x30
[<ffffffff81053884>] ? kthread+0x0/0x85

to point out possible locking issues (probably in netpoll_send_skb) that
I would suggest you investigate further. It may point to why we cannot
perform an:

# rmmod bonding

without the system deadlocking (even with my patch above).

> ---
>
> Index: linux-2.6/drivers/net/bonding/bond_main.c
> ===================================================================
> --- linux-2.6.orig/drivers/net/bonding/bond_main.c
> +++ linux-2.6/drivers/net/bonding/bond_main.c
> @@ -59,6 +59,7 @@
> #include <linux/uaccess.h>
> #include <linux/errno.h>
> #include <linux/netdevice.h>
> +#include <linux/netpoll.h>
> #include <linux/inetdevice.h>
> #include <linux/igmp.h>
> #include <linux/etherdevice.h>
> @@ -430,7 +431,18 @@ int bond_dev_queue_xmit(struct bonding *
> }
>
> skb->priority = 1;
> - dev_queue_xmit(skb);
> +#ifdef CONFIG_NET_POLL_CONTROLLER
> + if (bond->dev->priv_flags & IFF_IN_NETPOLL) {
> + struct netpoll *np = bond->dev->npinfo->netpoll;
> + slave_dev->npinfo = bond->dev->npinfo;
> + np->real_dev = np->dev = skb->dev;
> + slave_dev->priv_flags |= IFF_IN_NETPOLL;
> + netpoll_send_skb(np, skb);
> + slave_dev->priv_flags &= ~IFF_IN_NETPOLL;
> + np->dev = bond->dev;
> + } else
> +#endif
> + dev_queue_xmit(skb);
>
> return 0;
> }
> @@ -1329,6 +1341,60 @@ static void bond_detach_slave(struct bon
> bond->slave_cnt--;
> }
>
> +#ifdef CONFIG_NET_POLL_CONTROLLER
> +static bool slaves_support_netpoll(struct net_device *bond_dev)
> +{
> + struct bonding *bond = netdev_priv(bond_dev);
> + struct slave *slave;
> + int i = 0;
> + bool ret = true;
> +
> + read_lock(&bond->lock);
> + bond_for_each_slave(bond, slave, i) {
> + if ((slave->dev->priv_flags & IFF_DISABLE_NETPOLL)
> + || !slave->dev->netdev_ops->ndo_poll_controller)
> + ret = false;
> + }
> + read_unlock(&bond->lock);
> + return i != 0 && ret;
> +}
> +
> +static void bond_poll_controller(struct net_device *bond_dev)
> +{
> + struct net_device *dev = bond_dev->npinfo->netpoll->real_dev;
> + if (dev != bond_dev)
> + netpoll_poll_dev(dev);
> +}
> +
> +static void bond_netpoll_cleanup(struct net_device *bond_dev)
> +{
> + struct bonding *bond = netdev_priv(bond_dev);
> + struct slave *slave;
> + const struct net_device_ops *ops;
> + int i;
> +
> + read_lock(&bond->lock);
> + bond_dev->npinfo = NULL;
> + bond_for_each_slave(bond, slave, i) {
> + if (slave->dev) {
> + ops = slave->dev->netdev_ops;
> + if (ops->ndo_netpoll_cleanup)
> + ops->ndo_netpoll_cleanup(slave->dev);
> + else
> + slave->dev->npinfo = NULL;
> + }
> + }
> + read_unlock(&bond->lock);
> +}
> +
> +#else
> +
> +static void bond_netpoll_cleanup(struct net_device *bond_dev)
> +{
> +}
> +
> +#endif
> +
> /*---------------------------------- IOCTL ----------------------------------*/
>
> static int bond_sethwaddr(struct net_device *bond_dev,
> @@ -1746,6 +1812,18 @@ int bond_enslave(struct net_device *bond
> new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup",
> new_slave->link != BOND_LINK_DOWN ? "n up" : " down");
>
> +#ifdef CONFIG_NET_POLL_CONTROLLER
> + if (slaves_support_netpoll(bond_dev)) {
> + bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
> + if (bond_dev->npinfo)
> + slave_dev->npinfo = bond_dev->npinfo;
> + } else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) {
> + bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
> + pr_info("New slave device %s does not support netpoll\n",
> + slave_dev->name);
> + pr_info("Disabling netpoll support for %s\n", bond_dev->name);
> + }
> +#endif
> /* enslave is successful */
> return 0;
>
> @@ -1929,6 +2007,15 @@ int bond_release(struct net_device *bond
>
> netdev_set_master(slave_dev, NULL);
>
> +#ifdef CONFIG_NET_POLL_CONTROLLER
> + if (slaves_support_netpoll(bond_dev))
> + bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
> + if (slave_dev->netdev_ops->ndo_netpoll_cleanup)
> + slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev);
> + else
> + slave_dev->npinfo = NULL;
> +#endif
> +
> /* close slave before restoring its mac address */
> dev_close(slave_dev);
>
> @@ -4448,6 +4535,10 @@ static const struct net_device_ops bond_
> .ndo_vlan_rx_register = bond_vlan_rx_register,
> .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid,
> .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid,
> +#ifdef CONFIG_NET_POLL_CONTROLLER
> + .ndo_netpoll_cleanup = bond_netpoll_cleanup,
> + .ndo_poll_controller = bond_poll_controller,
> +#endif
> };
>
> static void bond_setup(struct net_device *bond_dev)
> @@ -4533,6 +4624,8 @@ static void bond_uninit(struct net_devic
> {
> struct bonding *bond = netdev_priv(bond_dev);
>
> + bond_netpoll_cleanup(bond_dev);
> +
> /* Release the bonded slaves */
> bond_release_all(bond_dev);
>

2010-04-06 02:40:29

by Cong Wang

[permalink] [raw]
Subject: Re: [v2 Patch 3/3] bonding: make bonding support netpoll

Andy Gospodarek wrote:
>
> I tried these patches on top of Linus' latest tree and still get
> deadlocks. Your line numbers might differ a bit, but you should be
> seeing them too.
>


Yeah, my local clone is some days behind Linus' latest tree. :)


> # echo 7 4 1 7 > /proc/sys/kernel/printk
> # ifup bond0
> bonding: bond0: setting mode to balance-rr (0).
> bonding: bond0: Setting MII monitoring interval to 1000.
> ADDRCONF(NETDEV_UP): bond0: link is not ready
> bonding: bond0: Adding slave eth4.
> bnx2 0000:10:00.0: eth4: using MSIX
> bonding: bond0: enslaving eth4 as an active interface with a down link.
> bonding: bond0: Adding slave eth5.
> bnx2 0000:10:00.1: eth5: using MSIX
> bonding: bond0: enslaving eth5 as an active interface with a down link.
> bnx2 0000:10:00.0: eth4: NIC Copper Link is Up, 100 Mbps full duplex,
> receive & transmit flow control ON
> bonding: bond0: link status definitely up for interface eth4.
> ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready
> bnx2 0000:10:00.1: eth5: NIC Copper Link is Up, 100 Mbps full duplex,
> receive & transmit flow control ON
> bond0: IPv6 duplicate address fe80::210:18ff:fe36:ad4 detected!
> bonding: bond0: link status definitely up for interface eth5.
> # cat /proc/net/bonding/bond0
> Ethernet Channel Bonding Driver: v3.6.0 (September 26, 2009)
>
> Bonding Mode: load balancing (round-robin)
> MII Status: up
> MII Polling Interval (ms): 1000
> Up Delay (ms): 0
> Down Delay (ms): 0
>
> Slave Interface: eth4
> MII Status: up
> Link Failure Count: 0
> Permanent HW addr: 00:10:18:36:0a:d4
>
> Slave Interface: eth5
> MII Status: up
> Link Failure Count: 0
> Permanent HW addr: 00:10:18:36:0a:d6
> # modprobe netconsole
> netconsole: local port 1234
> netconsole: local IP 10.0.100.2
> netconsole: interface 'bond0'
> netconsole: remote port 6666
> netconsole: remote IP 10.0.100.1
> netconsole: remote ethernet address 00:e0:81:71:ee:aa
> console [netcon0] enabled
> netconsole: network logging started
> # echo -eth4 > /sys/class/net/bond0/bonding/slaves
> bonding: bond0: Removing slave eth4
>
> [ now the system is hung ]
>
> My suspicion from dealing with this problem in the past is that there is
> contention over bond->lock.
>
> Since there statements that will result in netconsole messages inside
> the write_lock_bh in bond_release:
>
> 1882 write_lock_bh(&bond->lock);
> 1883
> 1884 slave = bond_get_slave_by_dev(bond, slave_dev);
> 1885 if (!slave) {
> 1886 /* not a slave of this bond */
> 1887 pr_info("%s: %s not enslaved\n",
> 1888 bond_dev->name, slave_dev->name);
> 1889 write_unlock_bh(&bond->lock);
> 1890 return -EINVAL;
> 1891 }
> 1892
> 1893 if (!bond->params.fail_over_mac) {
> 1894 if (!compare_ether_addr(bond_dev->dev_addr, slave->perm_hwaddr) &&
> 1895 bond->slave_cnt > 1)
> 1896 pr_warning("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s.
>
> we are getting stuck at 1986 since bond_xmit_roundrobin (in my case)
> will try and acquire bond->lock for reading.
>
> One valuable aspect netpoll_start_xmit routine was that is could be used
> to check to be sure that bond->lock could be taken for writing. This
> made us sure that we were not in a call stack that has already taken the
> lock and queuing the skb to be sent later would prevent the imminent
> deadlock.
>
> A way to prevent this is needed and a first-pass might be to do
> something similar to what I below above for all the xmit routines. I
> confirmed the following patch prevents that deadlock:
>
> # git diff drivers/net/bonding/
> diff --git a/drivers/net/bonding/bond_main.c
> b/drivers/net/bonding/bond_main.c
> index 4a41886..53b39cc 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -4232,7 +4232,8 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struc
> int i, slave_no, res = 1;
> struct iphdr *iph = ip_hdr(skb);
>
> - read_lock(&bond->lock);
> + if (!read_trylock(&bond->lock))
> + return NETDEV_TX_BUSY;
>
> if (!BOND_IS_OK(bond))
> goto out;
>
> The kernel no longer hangs, but a new warning message shows up (over
> netconsole even!):
>
> ------------[ cut here ]------------
> WARNING: at kernel/softirq.c:143 local_bh_enable+0x43/0xba()
> Hardware name: HP xw4400 Workstation
> Modules linked in: tg3 netconsole bonding ipt_REJECT bridge stp autofs4
> i2c_dev i2c_core hidp rfcomm l2cap crc16 bluetooth rfkill sunrpc 8021q
> iptable_filter ip_tables ip6t_REJECT xt_tcpudp ip6table_filter
> ip6_tables x_tables ipv6 cpufreq_ondemand acpi_cpufreq dm_multipath
> video output sbs sbshc battery acpi_memhotplug ac lp sg ide_cd_mod
> tpm_tis rtc_cmos rtc_core serio_raw cdrom libphy e1000e floppy
> parport_pc parport button tpm tpm_bios bnx2 rtc_lib tulip pcspkr shpchp
> dm_snapshot dm_zero dm_mirror dm_region_hash dm_log dm_mod ata_piix ahci
> libata sd_mod scsi_mod ext3 jbd uhci_hcd ohci_hcd ehci_hcd [last
> unloaded: tg3]
> Pid: 9, comm: events/0 Not tainted 2.6.34-rc3 #6
> Call Trace:
> [<ffffffff81058754>] ? cpu_clock+0x2d/0x41
> [<ffffffff810404d9>] ? local_bh_enable+0x43/0xba
> [<ffffffff8103a350>] warn_slowpath_common+0x77/0x8f
> [<ffffffff812a4659>] ? dev_queue_xmit+0x408/0x467
> [<ffffffff8103a377>] warn_slowpath_null+0xf/0x11
> [<ffffffff810404d9>] local_bh_enable+0x43/0xba
> [<ffffffff812a4659>] dev_queue_xmit+0x408/0x467
> [<ffffffff812a435e>] ? dev_queue_xmit+0x10d/0x467
> [<ffffffffa04a3868>] bond_dev_queue_xmit+0x1cd/0x1f9 [bonding]
> [<ffffffffa04a4217>] bond_start_xmit+0x139/0x3e9 [bonding]
> [<ffffffff812b0e9a>] queue_process+0xa8/0x160
> [<ffffffff812b0df2>] ? queue_process+0x0/0x160
> [<ffffffff81003794>] kernel_thread_helper+0x4/0x10
> [<ffffffff813362bc>] ? restore_args+0x0/0x30
> [<ffffffff81053884>] ? kthread+0x0/0x85
>
> to point out possible locking issues (probably in netpoll_send_skb) that
> I would suggest you investigate further. It may point to why we cannot
> perform an:
>
> # rmmod bonding
>
> without the system deadlocking (even with my patch above).
>

Thanks a lot for testing!

Before I try to reproduce it, could you please try to replace the 'read_lock()'
in slaves_support_netpoll() with 'read_lock_bh()'? (read_unlock() too) Try if this helps.

After I reproduce this, I will try it too.

2010-04-06 04:36:10

by Cong Wang

[permalink] [raw]
Subject: Re: [v2 Patch 3/3] bonding: make bonding support netpoll

Cong Wang wrote:
> Before I try to reproduce it, could you please try to replace the
> 'read_lock()'
> in slaves_support_netpoll() with 'read_lock_bh()'? (read_unlock() too)
> Try if this helps.
>

Confirmed. Please use the attached patch instead, for your testing.

Thanks!


Attachments:
bonding-support-netpoll.diff (4.13 kB)

2010-04-06 14:49:52

by Andy Gospodarek

[permalink] [raw]
Subject: Re: [v2 Patch 3/3] bonding: make bonding support netpoll

On Tue, Apr 06, 2010 at 12:38:16PM +0800, Cong Wang wrote:
> Cong Wang wrote:
>> Before I try to reproduce it, could you please try to replace the
>> 'read_lock()'
>> in slaves_support_netpoll() with 'read_lock_bh()'? (read_unlock() too)
>> Try if this helps.
>>
>
> Confirmed. Please use the attached patch instead, for your testing.
>
> Thanks!
>

Moving those locks to bh-locks will not resolve this. I tried that
yesterday and tried your new patch today without success. That warning
is a WARN_ON_ONCE so you need to reboot to see that it is still a
problem. Simply unloading and loading the new module is not an accurate
test.

Also, my system still hangs when removing the bonding module. I do not
think you intended to fix this with the patch, but wanted it to be clear
to everyone on the list.

You should also configure your kernel with a some of the lock debugging
enabled. I've been using the following:

CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCKDEP=y
CONFIG_LOCK_STAT=y
CONFIG_DEBUG_LOCKDEP=y

Here is the output when I remove a slave from the bond. My
xmit_roundrobin patch from earlier (replacing read_lock with
read_trylock) was applied. It might be helpful for you when debugging
these issues.

------------[ cut here ]------------
WARNING: at kernel/softirq.c:143 local_bh_enable+0x43/0xba()
Hardware name: HP xw4400 Workstation
Modules linked in: netconsole bonding ipt_REJECT bridge stp autofs4 i2c_dev i2c_core hidp rfcomm
l2cap crc16 bluetooth rfki]
Pid: 10, comm: events/1 Not tainted 2.6.34-rc3 #6
Call Trace:
[<ffffffff81058754>] ? cpu_clock+0x2d/0x41
[<ffffffff810404d9>] ? local_bh_enable+0x43/0xba
[<ffffffff8103a350>] warn_slowpath_common+0x77/0x8f
[<ffffffff812a4659>] ? dev_queue_xmit+0x408/0x467
[<ffffffff8103a377>] warn_slowpath_null+0xf/0x11
[<ffffffff810404d9>] local_bh_enable+0x43/0xba
[<ffffffff812a4659>] dev_queue_xmit+0x408/0x467
[<ffffffff812a435e>] ? dev_queue_xmit+0x10d/0x467
[<ffffffffa04a383f>] bond_dev_queue_xmit+0x1cd/0x1f9 [bonding]
[<ffffffffa04a41ee>] bond_start_xmit+0x139/0x3e9 [bonding]
[<ffffffff812b0e9a>] queue_process+0xa8/0x160
[<ffffffff812b0df2>] ? queue_process+0x0/0x160
[<ffffffff81050bfb>] worker_thread+0x1af/0x2ae
[<ffffffff81050ba2>] ? worker_thread+0x156/0x2ae
[<ffffffff81053c34>] ? autoremove_wake_function+0x0/0x38
[<ffffffff81050a4c>] ? worker_thread+0x0/0x2ae
[<ffffffff81053901>] kthread+0x7d/0x85
[<ffffffff81003794>] kernel_thread_helper+0x4/0x10
[<ffffffff813362bc>] ? restore_args+0x0/0x30
[<ffffffff81053884>] ? kthread+0x0/0x85
[<ffffffff81003790>] ? kernel_thread_helper+0x0/0x10
---[ end trace 241f49bf65e0f4f0 ]---

=========================================================
[ INFO: possible irq lock inversion dependency detected ]
2.6.34-rc3 #6
---------------------------------------------------------
events/1/10 just changed the state of lock:
(&bonding_netdev_xmit_lock_key){+.+...}, at: [<ffffffff812b0e75>] queue_process+0x83/0x160
but this lock was taken by another, SOFTIRQ-safe lock in the past:
(&(&dev->tx_global_lock)->rlock){+.-...}

and interrupts could create inverse lock ordering between them.


other info that might help us debug this:
4 locks held by events/1/10:
#0: (events){+.+.+.}, at: [<ffffffff81050ba2>] worker_thread+0x156/0x2ae
#1: ((&(&npinfo->tx_work)->work)){+.+...}, at: [<ffffffff81050ba2>] worker_thread+0x156/0x2ae
#2: (&bonding_netdev_xmit_lock_key){+.+...}, at: [<ffffffff812b0e75>] queue_process+0x83/0x160
#3: (&bond->lock){++.+..}, at: [<ffffffffa04a4107>] bond_start_xmit+0x52/0x3e9 [bonding]

the shortest dependencies between 2nd lock and 1st lock:
-> (&(&dev->tx_global_lock)->rlock){+.-...} ops: 129 {
HARDIRQ-ON-W at:
[<ffffffff810651ef>] __lock_acquire+0x643/0x813
[<ffffffff81065487>] lock_acquire+0xc8/0xed
[<ffffffff81335742>] _raw_spin_lock+0x31/0x66
[<ffffffff812b64bd>] dev_deactivate+0x6f/0x195
[<ffffffff812ad7c4>] linkwatch_do_dev+0x9a/0xae
[<ffffffff812ada6a>] __linkwatch_run_queue+0x106/0x14a
[<ffffffff812adad8>] linkwatch_event+0x2a/0x31
[<ffffffff81050bfb>] worker_thread+0x1af/0x2ae
[<ffffffff81053901>] kthread+0x7d/0x85
[<ffffffff81003794>] kernel_thread_helper+0x4/0x10
IN-SOFTIRQ-W at:
[<ffffffff810651a3>] __lock_acquire+0x5f7/0x813
[<ffffffff81065487>] lock_acquire+0xc8/0xed
[<ffffffff81335742>] _raw_spin_lock+0x31/0x66
[<ffffffff812b6606>] dev_watchdog+0x23/0x1f2
[<ffffffff8104701b>] run_timer_softirq+0x1d1/0x285
[<ffffffff81040021>] __do_softirq+0xdb/0x1ab
[<ffffffff8100388c>] call_softirq+0x1c/0x34
[<ffffffff81004f9d>] do_softirq+0x38/0x83
[<ffffffff8103ff44>] irq_exit+0x45/0x47
[<ffffffff810193bc>] smp_apic_timer_interrupt+0x88/0x98
[<ffffffff81003353>] apic_timer_interrupt+0x13/0x20
[<ffffffff81001a21>] cpu_idle+0x4d/0x6b
[<ffffffff8131da3a>] rest_init+0xbe/0xc2
[<ffffffff81a00d4e>] start_kernel+0x38c/0x399
[<ffffffff81a002a5>] x86_64_start_reservations+0xb5/0xb9
[<ffffffff81a0038f>] x86_64_start_kernel+0xe6/0xed
INITIAL USE at:
[<ffffffff8106525c>] __lock_acquire+0x6b0/0x813
[<ffffffff81065487>] lock_acquire+0xc8/0xed
[<ffffffff81335742>] _raw_spin_lock+0x31/0x66
[<ffffffff812b64bd>] dev_deactivate+0x6f/0x195
[<ffffffff812ad7c4>] linkwatch_do_dev+0x9a/0xae
[<ffffffff812ada6a>] __linkwatch_run_queue+0x106/0x14a
[<ffffffff812adad8>] linkwatch_event+0x2a/0x31
[<ffffffff81050bfb>] worker_thread+0x1af/0x2ae
[<ffffffff81053901>] kthread+0x7d/0x85
[<ffffffff81003794>] kernel_thread_helper+0x4/0x10
}
... key at: [<ffffffff8282ceb0>] __key.51521+0x0/0x8
... acquired at:
[<ffffffff810649f9>] validate_chain+0xb87/0xd3a
[<ffffffff81065359>] __lock_acquire+0x7ad/0x813
[<ffffffff81065487>] lock_acquire+0xc8/0xed
[<ffffffff81335742>] _raw_spin_lock+0x31/0x66
[<ffffffff812b64e4>] dev_deactivate+0x96/0x195
[<ffffffff812a17fc>] __dev_close+0x69/0x86
[<ffffffff8129f8ed>] __dev_change_flags+0xa8/0x12b
[<ffffffff812a148c>] dev_change_flags+0x1c/0x51
[<ffffffff812eee8a>] devinet_ioctl+0x26e/0x5d0
[<ffffffff812ef978>] inet_ioctl+0x8a/0xa2
[<ffffffff8128fc28>] sock_do_ioctl+0x26/0x45
[<ffffffff8128fe5a>] sock_ioctl+0x213/0x226
[<ffffffff810e5988>] vfs_ioctl+0x2a/0x9d
[<ffffffff810e5f13>] do_vfs_ioctl+0x491/0x4e2
[<ffffffff810e5fbb>] sys_ioctl+0x57/0x7a
[<ffffffff8100296b>] system_call_fastpath+0x16/0x1b

-> (&bonding_netdev_xmit_lock_key){+.+...} ops: 2 {
HARDIRQ-ON-W at:
[<ffffffff810651ef>] __lock_acquire+0x643/0x813
[<ffffffff81065487>] lock_acquire+0xc8/0xed
[<ffffffff81335742>] _raw_spin_lock+0x31/0x66
[<ffffffff812b64e4>] dev_deactivate+0x96/0x195
[<ffffffff812a17fc>] __dev_close+0x69/0x86
[<ffffffff8129f8ed>] __dev_change_flags+0xa8/0x12b
[<ffffffff812a148c>] dev_change_flags+0x1c/0x51
[<ffffffff812eee8a>] devinet_ioctl+0x26e/0x5d0
[<ffffffff812ef978>] inet_ioctl+0x8a/0xa2
[<ffffffff8128fc28>] sock_do_ioctl+0x26/0x45
[<ffffffff8128fe5a>] sock_ioctl+0x213/0x226
[<ffffffff810e5988>] vfs_ioctl+0x2a/0x9d
[<ffffffff810e5f13>] do_vfs_ioctl+0x491/0x4e2
[<ffffffff810e5fbb>] sys_ioctl+0x57/0x7a
[<ffffffff8100296b>] system_call_fastpath+0x16/0x1b
SOFTIRQ-ON-W at:
[<ffffffff81062006>] mark_held_locks+0x49/0x69
[<ffffffff81062139>] trace_hardirqs_on_caller+0x113/0x13e
[<ffffffff81062171>] trace_hardirqs_on+0xd/0xf
[<ffffffff81040548>] local_bh_enable+0xb2/0xba
[<ffffffff812a4659>] dev_queue_xmit+0x408/0x467
[<ffffffffa04a383f>] bond_dev_queue_xmit+0x1cd/0x1f9 [bonding]
[<ffffffffa04a41ee>] bond_start_xmit+0x139/0x3e9 [bonding]
[<ffffffff812b0e9a>] queue_process+0xa8/0x160
[<ffffffff81050bfb>] worker_thread+0x1af/0x2ae
[<ffffffff81053901>] kthread+0x7d/0x85
[<ffffffff81003794>] kernel_thread_helper+0x4/0x10
INITIAL USE at:
[<ffffffff8106525c>] __lock_acquire+0x6b0/0x813
[<ffffffff81065487>] lock_acquire+0xc8/0xed
[<ffffffff81335742>] _raw_spin_lock+0x31/0x66
[<ffffffff812b64e4>] dev_deactivate+0x96/0x195
[<ffffffff812a17fc>] __dev_close+0x69/0x86
[<ffffffff8129f8ed>] __dev_change_flags+0xa8/0x12b
[<ffffffff812a148c>] dev_change_flags+0x1c/0x51
[<ffffffff812eee8a>] devinet_ioctl+0x26e/0x5d0
[<ffffffff812ef978>] inet_ioctl+0x8a/0xa2
[<ffffffff8128fc28>] sock_do_ioctl+0x26/0x45
[<ffffffff8128fe5a>] sock_ioctl+0x213/0x226
[<ffffffff810e5988>] vfs_ioctl+0x2a/0x9d
[<ffffffff810e5f13>] do_vfs_ioctl+0x491/0x4e2
[<ffffffff810e5fbb>] sys_ioctl+0x57/0x7a
[<ffffffff8100296b>] system_call_fastpath+0x16/0x1b
}
... key at: [<ffffffffa04b1968>] bonding_netdev_xmit_lock_key+0x0/0xffffffffffffa78c [bonding]
... acquired at:
[<ffffffff8106386d>] check_usage_backwards+0xb8/0xc7
[<ffffffff81061d81>] mark_lock+0x311/0x54d
[<ffffffff81062006>] mark_held_locks+0x49/0x69
[<ffffffff81062139>] trace_hardirqs_on_caller+0x113/0x13e
[<ffffffff81062171>] trace_hardirqs_on+0xd/0xf
[<ffffffff81040548>] local_bh_enable+0xb2/0xba
[<ffffffff812a4659>] dev_queue_xmit+0x408/0x467
[<ffffffffa04a383f>] bond_dev_queue_xmit+0x1cd/0x1f9 [bonding]
[<ffffffffa04a41ee>] bond_start_xmit+0x139/0x3e9 [bonding]
[<ffffffff812b0e9a>] queue_process+0xa8/0x160
[<ffffffff81050bfb>] worker_thread+0x1af/0x2ae
[<ffffffff81053901>] kthread+0x7d/0x85
[<ffffffff81003794>] kernel_thread_helper+0x4/0x10


stack backtrace:
Pid: 10, comm: events/1 Tainted: G W 2.6.34-rc3 #6
Call Trace:
[<ffffffff8106189e>] print_irq_inversion_bug+0x121/0x130
[<ffffffff8106386d>] check_usage_backwards+0xb8/0xc7
[<ffffffff810637b5>] ? check_usage_backwards+0x0/0xc7
[<ffffffff81061d81>] mark_lock+0x311/0x54d
[<ffffffff81062006>] mark_held_locks+0x49/0x69
[<ffffffff81040548>] ? local_bh_enable+0xb2/0xba
[<ffffffff81062139>] trace_hardirqs_on_caller+0x113/0x13e
[<ffffffff812a4659>] ? dev_queue_xmit+0x408/0x467
[<ffffffff81062171>] trace_hardirqs_on+0xd/0xf
[<ffffffff81040548>] local_bh_enable+0xb2/0xba
[<ffffffff812a4659>] dev_queue_xmit+0x408/0x467
[<ffffffff812a435e>] ? dev_queue_xmit+0x10d/0x467
[<ffffffffa04a383f>] bond_dev_queue_xmit+0x1cd/0x1f9 [bonding]
[<ffffffffa04a41ee>] bond_start_xmit+0x139/0x3e9 [bonding]
[<ffffffff812b0e9a>] queue_process+0xa8/0x160
[<ffffffff812b0df2>] ? queue_process+0x0/0x160
[<ffffffff81050bfb>] worker_thread+0x1af/0x2ae
[<ffffffff81050ba2>] ? worker_thread+0x156/0x2ae
[<ffffffff81053c34>] ? autoremove_wake_function+0x0/0x38
[<ffffffff81050a4c>] ? worker_thread+0x0/0x2ae
[<ffffffff81053901>] kthread+0x7d/0x85
[<ffffffff81003794>] kernel_thread_helper+0x4/0x10
[<ffffffff813362bc>] ? restore_args+0x0/0x30
[<ffffffff81053884>] ? kthread+0x0/0x85
[<ffffffff81003790>] ? kernel_thread_helper+0x0/0x10
Dead loop on virtual device bond0, fix it urgently!

2010-04-07 02:28:54

by Cong Wang

[permalink] [raw]
Subject: Re: [v2 Patch 3/3] bonding: make bonding support netpoll

Andy Gospodarek wrote:
> On Tue, Apr 06, 2010 at 12:38:16PM +0800, Cong Wang wrote:
>> Cong Wang wrote:
>>> Before I try to reproduce it, could you please try to replace the
>>> 'read_lock()'
>>> in slaves_support_netpoll() with 'read_lock_bh()'? (read_unlock() too)
>>> Try if this helps.
>>>
>> Confirmed. Please use the attached patch instead, for your testing.
>>
>> Thanks!
>>
>
> Moving those locks to bh-locks will not resolve this. I tried that
> yesterday and tried your new patch today without success. That warning
> is a WARN_ON_ONCE so you need to reboot to see that it is still a
> problem. Simply unloading and loading the new module is not an accurate
> test.
>
> Also, my system still hangs when removing the bonding module. I do not
> think you intended to fix this with the patch, but wanted it to be clear
> to everyone on the list.


Actually I did reboot and then tested the module. I didn't get any warning.
I just tried again today, and no warnings at all.

For removing bonding module, you may need another fix of mine,
which is to fix a potential deadlock of workqueue. Try:

http://lkml.org/lkml/2010/4/1/58

>
> You should also configure your kernel with a some of the lock debugging
> enabled. I've been using the following:
>
> CONFIG_DETECT_HUNG_TASK=y
> CONFIG_DEBUG_SPINLOCK=y
> CONFIG_DEBUG_MUTEXES=y
> CONFIG_DEBUG_LOCK_ALLOC=y
> CONFIG_PROVE_LOCKING=y
> CONFIG_LOCKDEP=y
> CONFIG_LOCK_STAT=y
> CONFIG_DEBUG_LOCKDEP=y


Sure, I always keep these.

>
> Here is the output when I remove a slave from the bond. My
> xmit_roundrobin patch from earlier (replacing read_lock with
> read_trylock) was applied. It might be helpful for you when debugging
> these issues.


I don't apply your patch, just tested my patch.

>
> Dead loop on virtual device bond0, fix it urgently!
>

Please provide your bonding configuration and steps to reproduce it.

What I did is:

1. Load bonding module with "mode=0 miimon=100"
2. Enslave eth0 and active bond0
3. Load netconsole and send messages via bond0
4. Remove eth0 from bond0
5. Remove bonding module
6. Remove netconsole module

And no deadlocks, no warnings.

Thanks.

2010-04-07 04:08:36

by Andy Gospodarek

[permalink] [raw]
Subject: Re: [v2 Patch 3/3] bonding: make bonding support netpoll

On Apr 6, 2010, at 10:32 PM, Cong Wang <[email protected]> wrote:

> Andy Gospodarek wrote:
>> On Tue, Apr 06, 2010 at 12:38:16PM +0800, Cong Wang wrote:
>>> Cong Wang wrote:
>>>> Before I try to reproduce it, could you please try to replace
>>>> the 'read_lock()'
>>>> in slaves_support_netpoll() with 'read_lock_bh()'? (read_unlock()
>>>> too) Try if this helps.
>>>>
>>> Confirmed. Please use the attached patch instead, for your testing.
>>>
>>> Thanks!
>>>
>> Moving those locks to bh-locks will not resolve this. I tried that
>> yesterday and tried your new patch today without success. That
>> warning
>> is a WARN_ON_ONCE so you need to reboot to see that it is still a
>> problem. Simply unloading and loading the new module is not an
>> accurate
>> test.
>> Also, my system still hangs when removing the bonding module. I do
>> not
>> think you intended to fix this with the patch, but wanted it to be
>> clear
>> to everyone on the list.
>
>
> Actually I did reboot and then tested the module. I didn't get any
> warning.
> I just tried again today, and no warnings at all.
>
> For removing bonding module, you may need another fix of mine,
> which is to fix a potential deadlock of workqueue. Try:
>
> http://lkml.org/lkml/2010/4/1/58
>
>> You should also configure your kernel with a some of the lock
>> debugging
>> enabled. I've been using the following:
>> CONFIG_DETECT_HUNG_TASK=y
>> CONFIG_DEBUG_SPINLOCK=y
>> CONFIG_DEBUG_MUTEXES=y
>> CONFIG_DEBUG_LOCK_ALLOC=y
>> CONFIG_PROVE_LOCKING=y
>> CONFIG_LOCKDEP=y
>> CONFIG_LOCK_STAT=y
>> CONFIG_DEBUG_LOCKDEP=y
>
>
> Sure, I always keep these.
>
>> Here is the output when I remove a slave from the bond. My
>> xmit_roundrobin patch from earlier (replacing read_lock with
>> read_trylock) was applied. It might be helpful for you when
>> debugging
>> these issues.
>
>
> I don't apply your patch, just tested my patch.
>
>> Dead loop on virtual device bond0, fix it urgently!
>
> Please provide your bonding configuration and steps to reproduce it.
>

My first response in this thread provides the commands and
configuration needed to reproduce this.

> What I did is:
>
> 1. Load bonding module with "mode=0 miimon=100"
> 2. Enslave eth0 and active bond0
> 3. Load netconsole and send messages via bond0
> 4. Remove eth0 from bond0
> 5. Remove bonding module
> 6. Remove netconsole module

Thanks for sending your configuration.

What values are in /proc/sys/kernel/printk?

> And no deadlocks, no warnings.
>
> Thanks.

2010-04-07 04:18:23

by Cong Wang

[permalink] [raw]
Subject: Re: [v2 Patch 3/3] bonding: make bonding support netpoll

Andy Gospodarek wrote:
> On Apr 6, 2010, at 10:32 PM, Cong Wang <[email protected]> wrote:
>
>> Andy Gospodarek wrote:
>>> On Tue, Apr 06, 2010 at 12:38:16PM +0800, Cong Wang wrote:
>>>> Cong Wang wrote:
>>>>> Before I try to reproduce it, could you please try to replace
>>>>> the 'read_lock()'
>>>>> in slaves_support_netpoll() with 'read_lock_bh()'? (read_unlock()
>>>>> too) Try if this helps.
>>>>>
>>>> Confirmed. Please use the attached patch instead, for your testing.
>>>>
>>>> Thanks!
>>>>
>>> Moving those locks to bh-locks will not resolve this. I tried that
>>> yesterday and tried your new patch today without success. That
>>> warning
>>> is a WARN_ON_ONCE so you need to reboot to see that it is still a
>>> problem. Simply unloading and loading the new module is not an
>>> accurate
>>> test.
>>> Also, my system still hangs when removing the bonding module. I do
>>> not
>>> think you intended to fix this with the patch, but wanted it to be
>>> clear
>>> to everyone on the list.
>>
>> Actually I did reboot and then tested the module. I didn't get any
>> warning.
>> I just tried again today, and no warnings at all.
>>
>> For removing bonding module, you may need another fix of mine,
>> which is to fix a potential deadlock of workqueue. Try:
>>
>> http://lkml.org/lkml/2010/4/1/58
>>
>>> You should also configure your kernel with a some of the lock
>>> debugging
>>> enabled. I've been using the following:
>>> CONFIG_DETECT_HUNG_TASK=y
>>> CONFIG_DEBUG_SPINLOCK=y
>>> CONFIG_DEBUG_MUTEXES=y
>>> CONFIG_DEBUG_LOCK_ALLOC=y
>>> CONFIG_PROVE_LOCKING=y
>>> CONFIG_LOCKDEP=y
>>> CONFIG_LOCK_STAT=y
>>> CONFIG_DEBUG_LOCKDEP=y
>>
>> Sure, I always keep these.
>>
>>> Here is the output when I remove a slave from the bond. My
>>> xmit_roundrobin patch from earlier (replacing read_lock with
>>> read_trylock) was applied. It might be helpful for you when
>>> debugging
>>> these issues.
>>
>> I don't apply your patch, just tested my patch.
>>
>>> Dead loop on virtual device bond0, fix it urgently!
>> Please provide your bonding configuration and steps to reproduce it.
>>
>
> My first response in this thread provides the commands and
> configuration needed to reproduce this.


Then I should do the right thing.

>
>> What I did is:
>>
>> 1. Load bonding module with "mode=0 miimon=100"
>> 2. Enslave eth0 and active bond0
>> 3. Load netconsole and send messages via bond0
>> 4. Remove eth0 from bond0
>> 5. Remove bonding module
>> 6. Remove netconsole module
>
> Thanks for sending your configuration.
>
> What values are in /proc/sys/kernel/printk?
>

I use default values on RHEL5:

6 4 1 7

I don't think this is related with loglevel, what I checked
is dmesg, not just the console screen.

Thanks.