Message-ID: <3B4C98DA.BC9ECE5@colorfullife.com>
Date: Wed, 11 Jul 2001 20:20:10 +0200
From: Manfred Spraul <manfred@colorfullife.com>
MIME-Version: 1.0
To: linux-kernel@vger.kernel.org, linux-net@vger.kernel.org
Subject: [PATCH] natsemi compiler workaround & cleanup
Content-Type: multipart/mixed;
 boundary="------------FAB27239A9C021B21203DBD6"
Sender: linux-kernel-owner@vger.kernel.org

This is a multi-part message in MIME format.
--------------FAB27239A9C021B21203DBD6
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

The rx setup in init_ring() in drivers/net/natsemi.c in 2.4.6 is
miscompiled by several gcc-2.95 versions.

I could reproduce it with 2.95.1, and I received bug reports with 
gcc version 2.95.2 20000220 (Debian GNU/Linux)
gcc 2.95.3 19991030 from Mandrake 7.2

egcs-1.12 and rh gcc 2.96-85 are not affected.

Symptom: receive process hangs after 2 packets.

The patch also cleans up the suspend/resume synchronization and removes
2 superflous (& wrong) spin_unlock calls.

--
	Manfred
--------------FAB27239A9C021B21203DBD6
Content-Type: text/plain; charset=us-ascii;
 name="patch-natsemi"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="patch-natsemi"

--- 2.4/drivers/net/natsemi.c	Sat Jul  7 13:06:04 2001
+++ build-2.4/drivers/net/natsemi.c	Wed Jul 11 20:06:34 2001
@@ -326,6 +326,8 @@
 	IntrNormalSummary=0x0251, IntrAbnormalSummary=0xED20,
 };
 
+#define DEFAULT_INTR 0x00f1cd65
+
 /* Bits in the RxMode register. */
 enum rx_mode_bits {
 	AcceptErr=0x20, AcceptRunt=0x10,
@@ -388,6 +390,7 @@
 static int  eeprom_read(long ioaddr, int location);
 static int  mdio_read(struct net_device *dev, int phy_id, int location);
 static void natsemi_reset(struct net_device *dev);
+static void natsemi_stop_rxtx(struct net_device *dev);
 static int  netdev_open(struct net_device *dev);
 static void check_link(struct net_device *dev);
 static void netdev_timer(unsigned long data);
@@ -640,7 +643,26 @@
 	}
 }
 
-
+static void natsemi_stop_rxtx(struct net_device *dev)
+{
+	long ioaddr = dev->base_addr;
+	int i;
+
+	writel(RxOff | TxOff, ioaddr + ChipCmd);
+	for(i=0;i< NATSEMI_HW_TIMEOUT;i++) {
+		if ((readl(ioaddr + ChipCmd) & (TxOn|RxOn)) == 0)
+			break;
+		udelay(5);
+	}
+	if (i==NATSEMI_HW_TIMEOUT && debug) {
+		printk(KERN_INFO "%s: Tx/Rx process did not stop in %d usec.\n",
+				dev->name, i*5);
+	} else if (debug > 2) {
+		printk(KERN_DEBUG "%s: Tx/Rx process stopped in %d usec.\n",
+				dev->name, i*5);
+	}
+}
+
 static int netdev_open(struct net_device *dev)
 {
 	struct netdev_private *np = dev->priv;
@@ -662,7 +684,9 @@
 		return i;
 	}
 	init_ring(dev);
+	spin_lock_irq(&np->lock);
 	init_registers(dev);
+	spin_unlock_irq(&np->lock);
 
 	netif_start_queue(dev);
 
@@ -798,7 +822,7 @@
 	__set_rx_mode(dev);
 
 	/* Enable interrupts by setting the interrupt mask. */
-	writel(IntrNormalSummary | IntrAbnormalSummary | 0x1f, ioaddr + IntrMask);
+	writel(DEFAULT_INTR, ioaddr + IntrMask);
 	writel(1, ioaddr + IntrEnable);
 
 	writel(RxOn | TxOn, ioaddr + ChipCmd);
@@ -825,30 +849,51 @@
 	add_timer(&np->timer);
 }
 
-static void tx_timeout(struct net_device *dev)
+static void dump_ring(struct net_device *dev)
 {
 	struct netdev_private *np = dev->priv;
-	long ioaddr = dev->base_addr;
-
-	printk(KERN_WARNING "%s: Transmit timed out, status %8.8x,"
-		   " resetting...\n", dev->name, (int)readl(ioaddr + TxRingPtr));
 
-	{
+	if (debug > 2) {
 		int i;
-		printk(KERN_DEBUG "  Rx ring %p: ", np->rx_ring);
-		for (i = 0; i < RX_RING_SIZE; i++)
-			printk(" %8.8x", (unsigned int)np->rx_ring[i].cmd_status);
-		printk("\n"KERN_DEBUG"  Tx ring %p: ", np->tx_ring);
+		printk(KERN_DEBUG "  Tx ring at %8.8x:\n",
+			   (int)np->tx_ring);
 		for (i = 0; i < TX_RING_SIZE; i++)
-			printk(" %4.4x", np->tx_ring[i].cmd_status);
-		printk("\n");
+			printk(KERN_DEBUG " #%d desc. %8.8x %8.8x %8.8x.\n",
+				   i, np->tx_ring[i].next_desc,
+				   np->tx_ring[i].cmd_status, np->tx_ring[i].addr);
+		printk(KERN_DEBUG "  Rx ring %8.8x:\n",
+			   (int)np->rx_ring);
+		for (i = 0; i < RX_RING_SIZE; i++) {
+			printk(KERN_DEBUG " #%d desc. %8.8x %8.8x %8.8x.\n",
+				   i, np->rx_ring[i].next_desc,
+				   np->rx_ring[i].cmd_status, np->rx_ring[i].addr);
+		}
 	}
+}
+
+static void tx_timeout(struct net_device *dev)
+{
+	struct netdev_private *np = dev->priv;
+	long ioaddr = dev->base_addr;
+
+
+	disable_irq(dev->irq);
 	spin_lock_irq(&np->lock);
-	natsemi_reset(dev);
-	drain_ring(dev);
-	init_ring(dev);
-	init_registers(dev);
+	if (netif_device_present(dev)) {
+		printk(KERN_WARNING "%s: Transmit timed out, status %8.8x,"
+			   " resetting...\n", dev->name, readl(ioaddr + IntrStatus));
+		dump_ring(dev);
+
+		natsemi_reset(dev);
+		drain_ring(dev);
+		init_ring(dev);
+		init_registers(dev);
+	} else {
+		printk(KERN_WARNING "%s: tx_timeout while in suspended state?\n",
+		   		dev->name);
+	}
 	spin_unlock_irq(&np->lock);
+	enable_irq(dev->irq);
 
 	dev->trans_start = jiffies;
 	np->stats.tx_errors++;
@@ -879,14 +924,17 @@
 	np->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
 	np->rx_head_desc = &np->rx_ring[0];
 
+	/* Please be carefull before changing this loop - at least gcc-2.95.1
+	 * miscompiles it otherwise.
+	 */
 	/* Initialize all Rx descriptors. */
 	for (i = 0; i < RX_RING_SIZE; i++) {
-		np->rx_ring[i].next_desc = cpu_to_le32(np->ring_dma+sizeof(struct netdev_desc)*(i+1));
+		np->rx_ring[i].next_desc = cpu_to_le32(np->ring_dma
+				+sizeof(struct netdev_desc)
+				 *((i+1)%RX_RING_SIZE));
 		np->rx_ring[i].cmd_status = cpu_to_le32(DescOwn);
 		np->rx_skbuff[i] = NULL;
 	}
-	/* Mark the last entry as wrapping the ring. */
-	np->rx_ring[i-1].next_desc = cpu_to_le32(np->ring_dma);
 
 	/* Fill in the Rx buffers.  Handle allocation failure gracefully. */
 	for (i = 0; i < RX_RING_SIZE; i++) {
@@ -898,18 +946,18 @@
 		np->rx_dma[i] = pci_map_single(np->pci_dev,
 						skb->data, skb->len, PCI_DMA_FROMDEVICE);
 		np->rx_ring[i].addr = cpu_to_le32(np->rx_dma[i]);
-		np->rx_ring[i].cmd_status = cpu_to_le32(DescIntr | np->rx_buf_sz);
+		np->rx_ring[i].cmd_status = cpu_to_le32(np->rx_buf_sz);
 	}
 	np->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
 
 	for (i = 0; i < TX_RING_SIZE; i++) {
 		np->tx_skbuff[i] = NULL;
 		np->tx_ring[i].next_desc = cpu_to_le32(np->ring_dma
-					+sizeof(struct netdev_desc)*(i+1+RX_RING_SIZE));
+					+sizeof(struct netdev_desc)
+					 *((i+1)%TX_RING_SIZE+RX_RING_SIZE));
 		np->tx_ring[i].cmd_status = 0;
 	}
-	np->tx_ring[i-1].next_desc = cpu_to_le32(np->ring_dma
-					+sizeof(struct netdev_desc)*(RX_RING_SIZE));
+	dump_ring(dev);
 }
 
 static void drain_ring(struct net_device *dev)
@@ -968,25 +1016,25 @@
 	np->tx_ring[entry].addr = cpu_to_le32(np->tx_dma[entry]);
 
 	spin_lock_irq(&np->lock);
-
-#if 0
-	np->tx_ring[entry].cmd_status = cpu_to_le32(DescOwn | DescIntr | skb->len);
-#else
-	np->tx_ring[entry].cmd_status = cpu_to_le32(DescOwn | skb->len);
-#endif
-	/* StrongARM: Explicitly cache flush np->tx_ring and skb->data,skb->len. */
-	wmb();
-	np->cur_tx++;
-	if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1) {
-		netdev_tx_done(dev);
-		if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1)
-			netif_stop_queue(dev);
+	
+	if (netif_device_present(dev)) {
+		np->tx_ring[entry].cmd_status = cpu_to_le32(DescOwn | skb->len);
+		/* StrongARM: Explicitly cache flush np->tx_ring and skb->data,skb->len. */
+		wmb();
+		np->cur_tx++;
+		if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1) {
+			netdev_tx_done(dev);
+			if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1)
+				netif_stop_queue(dev);
+		}
+		/* Wake the potentially-idle transmit channel. */
+		writel(TxOn, dev->base_addr + ChipCmd);
+	} else {
+		dev_kfree_skb_irq(skb);
+		np->stats.tx_dropped++;
 	}
 	spin_unlock_irq(&np->lock);
 
-	/* Wake the potentially-idle transmit channel. */
-	writel(TxOn, dev->base_addr + ChipCmd);
-
 	dev->trans_start = jiffies;
 
 	if (debug > 4) {
@@ -1035,9 +1083,8 @@
 		/* The ring is no longer full, wake queue. */
 		netif_wake_queue(dev);
 	}
-	spin_unlock(&np->lock);
-
 }
+
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
 static void intr_handler(int irq, void *dev_instance, struct pt_regs *rgs)
@@ -1049,7 +1096,9 @@
 
 	ioaddr = dev->base_addr;
 	np = dev->priv;
-
+	
+	if (!netif_device_present(dev))
+		return;
 	do {
 		/* Reading automatically acknowledges all int sources. */
 		u32 intr_status = readl(ioaddr + IntrStatus);
@@ -1173,7 +1222,7 @@
 			np->rx_ring[entry].addr = cpu_to_le32(np->rx_dma[entry]);
 		}
 		np->rx_ring[entry].cmd_status =
-			cpu_to_le32(DescIntr | np->rx_buf_sz);
+			cpu_to_le32(np->rx_buf_sz);
 	}
 
 	/* Restart Rx engine if stopped. */
@@ -1229,20 +1278,20 @@
 
 	/* The chip only need report frame silently dropped. */
 	np->stats.rx_crc_errors	+= readl(ioaddr + RxCRCErrs);
-	np->stats.rx_missed_errors	+= readl(ioaddr + RxMissed);
+	np->stats.rx_missed_errors += readl(ioaddr + RxMissed);
 }
 
 static struct net_device_stats *get_stats(struct net_device *dev)
 {
 	struct netdev_private *np = dev->priv;
 
-	/* The chip only need report frame silently dropped. */
 	spin_lock_irq(&np->lock);
-	__get_stats(dev);
+	if (netif_running(dev) && netif_device_present(dev))
+		__get_stats(dev);
 	spin_unlock_irq(&np->lock);
-
 	return &np->stats;
 }
+
 /* The little-endian AUTODIN II ethernet CRC calculations.
    A big-endian version is also available.
    This is slow but compact code.  Do not use this routine for bulk data,
@@ -1335,13 +1384,13 @@
 	}
 	writel(rx_mode, ioaddr + RxFilterAddr);
 	np->cur_rx_mode = rx_mode;
-	spin_unlock_irq(&np->lock);
 }
 static void set_rx_mode(struct net_device *dev)
 {
 	struct netdev_private *np = dev->priv;
 	spin_lock_irq(&np->lock);
-	__set_rx_mode(dev);
+	if (netif_device_present(dev))
+		__set_rx_mode(dev);
 	spin_unlock_irq(&np->lock);
 }
 
@@ -1408,48 +1457,49 @@
 	netif_stop_queue(dev);
 
 	if (debug > 1) {
-		printk(KERN_DEBUG "%s: Shutting down ethercard, status was %4.4x.",
+		printk(KERN_DEBUG "%s: Shutting down ethercard, status was %4.4x.\n",
 			   dev->name, (int)readl(ioaddr + ChipCmd));
 		printk(KERN_DEBUG "%s: Queue pointers were Tx %d / %d,  Rx %d / %d.\n",
 			   dev->name, np->cur_tx, np->dirty_tx, np->cur_rx, np->dirty_rx);
 	}
 
-	/* Disable interrupts using the mask. */
-	writel(0, ioaddr + IntrMask);
-	writel(0, ioaddr + IntrEnable);
-	writel(2, ioaddr + StatsCtrl); 					/* Freeze Stats */
-
-	/* Stop the chip's Tx and Rx processes. */
-	writel(RxOff | TxOff, ioaddr + ChipCmd);
-
 	del_timer_sync(&np->timer);
 
-#ifdef __i386__
-	if (debug > 2) {
-		int i;
-		printk("\n"KERN_DEBUG"  Tx ring at %8.8x:\n",
-			   (int)np->tx_ring);
-		for (i = 0; i < TX_RING_SIZE; i++)
-			printk(" #%d desc. %8.8x %8.8x.\n",
-				   i, np->tx_ring[i].cmd_status, np->tx_ring[i].addr);
-		printk("\n"KERN_DEBUG "  Rx ring %8.8x:\n",
-			   (int)np->rx_ring);
-		for (i = 0; i < RX_RING_SIZE; i++) {
-			printk(KERN_DEBUG " #%d desc. %8.8x %8.8x\n",
-				   i, np->rx_ring[i].cmd_status, np->rx_ring[i].addr);
-		}
+	disable_irq(dev->irq);
+	spin_lock_irq(&np->lock);
+	if (netif_device_present(dev)) {
+		writel(0, ioaddr + IntrMask);
+		writel(0, ioaddr + IntrEnable);
+		writel(2, ioaddr + StatsCtrl);		/* Freeze Stats */
+
+		natsemi_stop_rxtx(dev);
+		__get_stats(dev);
 	}
-#endif /* __i386__ debugging only */
+	spin_unlock_irq(&np->lock);
 
+	/* race: shared irq and as most nics the DP83815
+	 * reports _all_ interrupt conditions in IntrStatus, even
+	 * disabled ones.
+	 * packet received after disable_irq, but before stop_rxtx
+	 * --> race. intr_handler would restart the rx process.
+	 * netif_device_{de,a}tach around {enable,free}_irq.
+	 */
+	netif_device_detach(dev);
+	enable_irq(dev->irq);
 	free_irq(dev->irq, dev);
+	netif_device_attach(dev);
+
+	dump_ring(dev);
 	drain_ring(dev);
 	free_ring(dev);
 
-	/* Restore PME enable bit */
-	writel(np->SavedClkRun, ioaddr + ClkRun);
+	if (netif_device_present(dev))  {
+		/* Restore PME enable bit */
+		writel(np->SavedClkRun, ioaddr + ClkRun);
 #if 0
-	writel(0x0200, ioaddr + ChipConfig); /* Power down Xcvr. */
+		writel(0x0200, ioaddr + ChipConfig); /* Power down Xcvr. */
 #endif
+	}
 
 	return 0;
 }
@@ -1468,49 +1518,54 @@
 
 #ifdef CONFIG_PM
 
+/*
+ * suspend/resume synchronization:
+ * entry points:
+ *   netdev_open, netdev_close, netdev_ioctl, set_rx_mode, intr_handler,
+ *   start_tx, tx_timeout
+ * - no function accesses the hardware without checking netif_device_present().
+ * 	the check occurs under spin_lock_irq(&np->lock);
+ * 	exceptions:
+ * 	* netdev_ioctl, netdev_open.
+ * 		net/core checks netif_device_present() before calling them.
+ *	* netdev_timer: timer stopped by natsemi_suspend.
+ *	* intr_handler: doesn't acquire the spinlock. suspend calls
+ *		disable_irq() to enforce synchronization.
+ *
+ * netif_device_detach must occur under spin_unlock_irq(), interrupts from a detached
+ * device would cause an irq storm.
+ */
+
 static int natsemi_suspend (struct pci_dev *pdev, u32 state)
 {
 	struct net_device *dev = pci_get_drvdata (pdev);
 	struct netdev_private *np = dev->priv;
 	long ioaddr = dev->base_addr;
 
-	netif_device_detach(dev);
-	/* no more calls to tx_timeout, hard_start_xmit, set_rx_mode */
 	rtnl_lock();
-	rtnl_unlock();
-	/* noone within ->open */
 	if (netif_running (dev)) {
-		int i;
 		del_timer_sync(&np->timer);
-		/* no more link beat timer calls */
+
+		disable_irq(dev->irq);
 		spin_lock_irq(&np->lock);
-		writel(RxOff | TxOff, ioaddr + ChipCmd);
-		for(i=0;i< NATSEMI_HW_TIMEOUT;i++) {
-			if ((readl(ioaddr + ChipCmd) & (TxOn|RxOn)) == 0)
-				break;
-			udelay(5);
-		}
-		if (i==NATSEMI_HW_TIMEOUT && debug) {
-			printk(KERN_INFO "%s: Tx/Rx process did not stop in %d usec.\n",
-					dev->name, i*5);
-		} else if (debug > 2) {
-			printk(KERN_DEBUG "%s: Tx/Rx process stopped in %d usec.\n",
-					dev->name, i*5);
-		}
-		/* Tx and Rx processes stopped */
 
 		writel(0, ioaddr + IntrEnable);
-		/* all irq events disabled. */
-		spin_unlock_irq(&np->lock);
-
-		synchronize_irq();
+		natsemi_stop_rxtx(dev);
+		netif_stop_queue(dev);
+		netif_device_detach(dev);
 
+		spin_unlock_irq(&np->lock);
+		enable_irq(dev->irq);
+		
 		/* Update the error counts. */
 		__get_stats(dev);
 
 		/* pci_power_off(pdev, -1); */
 		drain_ring(dev);
+	} else {
+		netif_device_detach(dev);
 	}
+	rtnl_unlock();
 	return 0;
 }
 
@@ -1520,18 +1575,27 @@
 	struct net_device *dev = pci_get_drvdata (pdev);
 	struct netdev_private *np = dev->priv;
 
-	if (netif_running (dev)) {
+	rtnl_lock();
+	if (netif_device_present(dev))
+		goto out;
+	if (netif_running(dev)) {
 		pci_enable_device(pdev);
 	/*	pci_power_on(pdev); */
 		
 		natsemi_reset(dev);
 		init_ring(dev);
+		spin_lock_irq(&np->lock);
 		init_registers(dev);
+		netif_device_attach(dev);
+		spin_unlock_irq(&np->lock);
 
 		np->timer.expires = jiffies + 1*HZ;
 		add_timer(&np->timer);
+	} else {
+		netif_device_attach(dev);
 	}
-	netif_device_attach(dev);
+out:
+	rtnl_unlock();
 	return 0;
 }
 

--------------FAB27239A9C021B21203DBD6--


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/