--- 2.4/drivers/net/natsemi.c Sat Jul 7 13:06:04 2001
+++ build-2.4/drivers/net/natsemi.c Wed Jul 11 20:06:34 2001
@@ -326,6 +326,8 @@
IntrNormalSummary=0x0251, IntrAbnormalSummary=0xED20,
};
+#define DEFAULT_INTR 0x00f1cd65
+
/* Bits in the RxMode register. */
enum rx_mode_bits {
AcceptErr=0x20, AcceptRunt=0x10,
@@ -388,6 +390,7 @@
static int eeprom_read(long ioaddr, int location);
static int mdio_read(struct net_device *dev, int phy_id, int location);
static void natsemi_reset(struct net_device *dev);
+static void natsemi_stop_rxtx(struct net_device *dev);
static int netdev_open(struct net_device *dev);
static void check_link(struct net_device *dev);
static void netdev_timer(unsigned long data);
@@ -640,7 +643,26 @@
}
}
-
+static void natsemi_stop_rxtx(struct net_device *dev)
+{
+ long ioaddr = dev->base_addr;
+ int i;
+
+ writel(RxOff | TxOff, ioaddr + ChipCmd);
+ for(i=0;i< NATSEMI_HW_TIMEOUT;i++) {
+ if ((readl(ioaddr + ChipCmd) & (TxOn|RxOn)) == 0)
+ break;
+ udelay(5);
+ }
+ if (i==NATSEMI_HW_TIMEOUT && debug) {
+ printk(KERN_INFO "%s: Tx/Rx process did not stop in %d usec.\n",
+ dev->name, i*5);
+ } else if (debug > 2) {
+ printk(KERN_DEBUG "%s: Tx/Rx process stopped in %d usec.\n",
+ dev->name, i*5);
+ }
+}
+
static int netdev_open(struct net_device *dev)
{
struct netdev_private *np = dev->priv;
@@ -662,7 +684,9 @@
return i;
}
init_ring(dev);
+ spin_lock_irq(&np->lock);
init_registers(dev);
+ spin_unlock_irq(&np->lock);
netif_start_queue(dev);
@@ -798,7 +822,7 @@
__set_rx_mode(dev);
/* Enable interrupts by setting the interrupt mask. */
- writel(IntrNormalSummary | IntrAbnormalSummary | 0x1f, ioaddr + IntrMask);
+ writel(DEFAULT_INTR, ioaddr + IntrMask);
writel(1, ioaddr + IntrEnable);
writel(RxOn | TxOn, ioaddr + ChipCmd);
@@ -825,30 +849,51 @@
add_timer(&np->timer);
}
-static void tx_timeout(struct net_device *dev)
+static void dump_ring(struct net_device *dev)
{
struct netdev_private *np = dev->priv;
- long ioaddr = dev->base_addr;
-
- printk(KERN_WARNING "%s: Transmit timed out, status %8.8x,"
- " resetting...\n", dev->name, (int)readl(ioaddr + TxRingPtr));
- {
+ if (debug > 2) {
int i;
- printk(KERN_DEBUG " Rx ring %p: ", np->rx_ring);
- for (i = 0; i < RX_RING_SIZE; i++)
- printk(" %8.8x", (unsigned int)np->rx_ring[i].cmd_status);
- printk("\n"KERN_DEBUG" Tx ring %p: ", np->tx_ring);
+ printk(KERN_DEBUG " Tx ring at %8.8x:\n",
+ (int)np->tx_ring);
for (i = 0; i < TX_RING_SIZE; i++)
- printk(" %4.4x", np->tx_ring[i].cmd_status);
- printk("\n");
+ printk(KERN_DEBUG " #%d desc. %8.8x %8.8x %8.8x.\n",
+ i, np->tx_ring[i].next_desc,
+ np->tx_ring[i].cmd_status, np->tx_ring[i].addr);
+ printk(KERN_DEBUG " Rx ring %8.8x:\n",
+ (int)np->rx_ring);
+ for (i = 0; i < RX_RING_SIZE; i++) {
+ printk(KERN_DEBUG " #%d desc. %8.8x %8.8x %8.8x.\n",
+ i, np->rx_ring[i].next_desc,
+ np->rx_ring[i].cmd_status, np->rx_ring[i].addr);
+ }
}
+}
+
+static void tx_timeout(struct net_device *dev)
+{
+ struct netdev_private *np = dev->priv;
+ long ioaddr = dev->base_addr;
+
+
+ disable_irq(dev->irq);
spin_lock_irq(&np->lock);
- natsemi_reset(dev);
- drain_ring(dev);
- init_ring(dev);
- init_registers(dev);
+ if (netif_device_present(dev)) {
+ printk(KERN_WARNING "%s: Transmit timed out, status %8.8x,"
+ " resetting...\n", dev->name, readl(ioaddr + IntrStatus));
+ dump_ring(dev);
+
+ natsemi_reset(dev);
+ drain_ring(dev);
+ init_ring(dev);
+ init_registers(dev);
+ } else {
+ printk(KERN_WARNING "%s: tx_timeout while in suspended state?\n",
+ dev->name);
+ }
spin_unlock_irq(&np->lock);
+ enable_irq(dev->irq);
dev->trans_start = jiffies;
np->stats.tx_errors++;
@@ -879,14 +924,17 @@
np->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
np->rx_head_desc = &np->rx_ring[0];
+ /* Please be carefull before changing this loop - at least gcc-2.95.1
+ * miscompiles it otherwise.
+ */
/* Initialize all Rx descriptors. */
for (i = 0; i < RX_RING_SIZE; i++) {
- np->rx_ring[i].next_desc = cpu_to_le32(np->ring_dma+sizeof(struct netdev_desc)*(i+1));
+ np->rx_ring[i].next_desc = cpu_to_le32(np->ring_dma
+ +sizeof(struct netdev_desc)
+ *((i+1)%RX_RING_SIZE));
np->rx_ring[i].cmd_status = cpu_to_le32(DescOwn);
np->rx_skbuff[i] = NULL;
}
- /* Mark the last entry as wrapping the ring. */
- np->rx_ring[i-1].next_desc = cpu_to_le32(np->ring_dma);
/* Fill in the Rx buffers. Handle allocation failure gracefully. */
for (i = 0; i < RX_RING_SIZE; i++) {
@@ -898,18 +946,18 @@
np->rx_dma[i] = pci_map_single(np->pci_dev,
skb->data, skb->len, PCI_DMA_FROMDEVICE);
np->rx_ring[i].addr = cpu_to_le32(np->rx_dma[i]);
- np->rx_ring[i].cmd_status = cpu_to_le32(DescIntr | np->rx_buf_sz);
+ np->rx_ring[i].cmd_status = cpu_to_le32(np->rx_buf_sz);
}
np->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
for (i = 0; i < TX_RING_SIZE; i++) {
np->tx_skbuff[i] = NULL;
np->tx_ring[i].next_desc = cpu_to_le32(np->ring_dma
- +sizeof(struct netdev_desc)*(i+1+RX_RING_SIZE));
+ +sizeof(struct netdev_desc)
+ *((i+1)%TX_RING_SIZE+RX_RING_SIZE));
np->tx_ring[i].cmd_status = 0;
}
- np->tx_ring[i-1].next_desc = cpu_to_le32(np->ring_dma
- +sizeof(struct netdev_desc)*(RX_RING_SIZE));
+ dump_ring(dev);
}
static void drain_ring(struct net_device *dev)
@@ -968,25 +1016,25 @@
np->tx_ring[entry].addr = cpu_to_le32(np->tx_dma[entry]);
spin_lock_irq(&np->lock);
-
-#if 0
- np->tx_ring[entry].cmd_status = cpu_to_le32(DescOwn | DescIntr | skb->len);
-#else
- np->tx_ring[entry].cmd_status = cpu_to_le32(DescOwn | skb->len);
-#endif
- /* StrongARM: Explicitly cache flush np->tx_ring and skb->data,skb->len. */
- wmb();
- np->cur_tx++;
- if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1) {
- netdev_tx_done(dev);
- if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1)
- netif_stop_queue(dev);
+
+ if (netif_device_present(dev)) {
+ np->tx_ring[entry].cmd_status = cpu_to_le32(DescOwn | skb->len);
+ /* StrongARM: Explicitly cache flush np->tx_ring and skb->data,skb->len. */
+ wmb();
+ np->cur_tx++;
+ if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1) {
+ netdev_tx_done(dev);
+ if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1)
+ netif_stop_queue(dev);
+ }
+ /* Wake the potentially-idle transmit channel. */
+ writel(TxOn, dev->base_addr + ChipCmd);
+ } else {
+ dev_kfree_skb_irq(skb);
+ np->stats.tx_dropped++;
}
spin_unlock_irq(&np->lock);
- /* Wake the potentially-idle transmit channel. */
- writel(TxOn, dev->base_addr + ChipCmd);
-
dev->trans_start = jiffies;
if (debug > 4) {
@@ -1035,9 +1083,8 @@
/* The ring is no longer full, wake queue. */
netif_wake_queue(dev);
}
- spin_unlock(&np->lock);
-
}
+
/* The interrupt handler does all of the Rx thread work and cleans up
after the Tx thread. */
static void intr_handler(int irq, void *dev_instance, struct pt_regs *rgs)
@@ -1049,7 +1096,9 @@
ioaddr = dev->base_addr;
np = dev->priv;
-
+
+ if (!netif_device_present(dev))
+ return;
do {
/* Reading automatically acknowledges all int sources. */
u32 intr_status = readl(ioaddr + IntrStatus);
@@ -1173,7 +1222,7 @@
np->rx_ring[entry].addr = cpu_to_le32(np->rx_dma[entry]);
}
np->rx_ring[entry].cmd_status =
- cpu_to_le32(DescIntr | np->rx_buf_sz);
+ cpu_to_le32(np->rx_buf_sz);
}
/* Restart Rx engine if stopped. */
@@ -1229,20 +1278,20 @@
/* The chip only need report frame silently dropped. */
np->stats.rx_crc_errors += readl(ioaddr + RxCRCErrs);
- np->stats.rx_missed_errors += readl(ioaddr + RxMissed);
+ np->stats.rx_missed_errors += readl(ioaddr + RxMissed);
}
static struct net_device_stats *get_stats(struct net_device *dev)
{
struct netdev_private *np = dev->priv;
- /* The chip only need report frame silently dropped. */
spin_lock_irq(&np->lock);
- __get_stats(dev);
+ if (netif_running(dev) && netif_device_present(dev))
+ __get_stats(dev);
spin_unlock_irq(&np->lock);
-
return &np->stats;
}
+
/* The little-endian AUTODIN II ethernet CRC calculations.
A big-endian version is also available.
This is slow but compact code. Do not use this routine for bulk data,
@@ -1335,13 +1384,13 @@
}
writel(rx_mode, ioaddr + RxFilterAddr);
np->cur_rx_mode = rx_mode;
- spin_unlock_irq(&np->lock);
}
static void set_rx_mode(struct net_device *dev)
{
struct netdev_private *np = dev->priv;
spin_lock_irq(&np->lock);
- __set_rx_mode(dev);
+ if (netif_device_present(dev))
+ __set_rx_mode(dev);
spin_unlock_irq(&np->lock);
}
@@ -1408,48 +1457,49 @@
netif_stop_queue(dev);
if (debug > 1) {
- printk(KERN_DEBUG "%s: Shutting down ethercard, status was %4.4x.",
+ printk(KERN_DEBUG "%s: Shutting down ethercard, status was %4.4x.\n",
dev->name, (int)readl(ioaddr + ChipCmd));
printk(KERN_DEBUG "%s: Queue pointers were Tx %d / %d, Rx %d / %d.\n",
dev->name, np->cur_tx, np->dirty_tx, np->cur_rx, np->dirty_rx);
}
- /* Disable interrupts using the mask. */
- writel(0, ioaddr + IntrMask);
- writel(0, ioaddr + IntrEnable);
- writel(2, ioaddr + StatsCtrl); /* Freeze Stats */
-
- /* Stop the chip's Tx and Rx processes. */
- writel(RxOff | TxOff, ioaddr + ChipCmd);
-
del_timer_sync(&np->timer);
-#ifdef __i386__
- if (debug > 2) {
- int i;
- printk("\n"KERN_DEBUG" Tx ring at %8.8x:\n",
- (int)np->tx_ring);
- for (i = 0; i < TX_RING_SIZE; i++)
- printk(" #%d desc. %8.8x %8.8x.\n",
- i, np->tx_ring[i].cmd_status, np->tx_ring[i].addr);
- printk("\n"KERN_DEBUG " Rx ring %8.8x:\n",
- (int)np->rx_ring);
- for (i = 0; i < RX_RING_SIZE; i++) {
- printk(KERN_DEBUG " #%d desc. %8.8x %8.8x\n",
- i, np->rx_ring[i].cmd_status, np->rx_ring[i].addr);
- }
+ disable_irq(dev->irq);
+ spin_lock_irq(&np->lock);
+ if (netif_device_present(dev)) {
+ writel(0, ioaddr + IntrMask);
+ writel(0, ioaddr + IntrEnable);
+ writel(2, ioaddr + StatsCtrl); /* Freeze Stats */
+
+ natsemi_stop_rxtx(dev);
+ __get_stats(dev);
}
-#endif /* __i386__ debugging only */
+ spin_unlock_irq(&np->lock);
+ /* race: shared irq and as most nics the DP83815
+ * reports _all_ interrupt conditions in IntrStatus, even
+ * disabled ones.
+ * packet received after disable_irq, but before stop_rxtx
+ * --> race. intr_handler would restart the rx process.
+ * netif_device_{de,a}tach around {enable,free}_irq.
+ */
+ netif_device_detach(dev);
+ enable_irq(dev->irq);
free_irq(dev->irq, dev);
+ netif_device_attach(dev);
+
+ dump_ring(dev);
drain_ring(dev);
free_ring(dev);
- /* Restore PME enable bit */
- writel(np->SavedClkRun, ioaddr + ClkRun);
+ if (netif_device_present(dev)) {
+ /* Restore PME enable bit */
+ writel(np->SavedClkRun, ioaddr + ClkRun);
#if 0
- writel(0x0200, ioaddr + ChipConfig); /* Power down Xcvr. */
+ writel(0x0200, ioaddr + ChipConfig); /* Power down Xcvr. */
#endif
+ }
return 0;
}
@@ -1468,49 +1518,54 @@
#ifdef CONFIG_PM
+/*
+ * suspend/resume synchronization:
+ * entry points:
+ * netdev_open, netdev_close, netdev_ioctl, set_rx_mode, intr_handler,
+ * start_tx, tx_timeout
+ * - no function accesses the hardware without checking netif_device_present().
+ * the check occurs under spin_lock_irq(&np->lock);
+ * exceptions:
+ * * netdev_ioctl, netdev_open.
+ * net/core checks netif_device_present() before calling them.
+ * * netdev_timer: timer stopped by natsemi_suspend.
+ * * intr_handler: doesn't acquire the spinlock. suspend calls
+ * disable_irq() to enforce synchronization.
+ *
+ * netif_device_detach must occur under spin_unlock_irq(), interrupts from a detached
+ * device would cause an irq storm.
+ */
+
static int natsemi_suspend (struct pci_dev *pdev, u32 state)
{
struct net_device *dev = pci_get_drvdata (pdev);
struct netdev_private *np = dev->priv;
long ioaddr = dev->base_addr;
- netif_device_detach(dev);
- /* no more calls to tx_timeout, hard_start_xmit, set_rx_mode */
rtnl_lock();
- rtnl_unlock();
- /* noone within ->open */
if (netif_running (dev)) {
- int i;
del_timer_sync(&np->timer);
- /* no more link beat timer calls */
+
+ disable_irq(dev->irq);
spin_lock_irq(&np->lock);
- writel(RxOff | TxOff, ioaddr + ChipCmd);
- for(i=0;i< NATSEMI_HW_TIMEOUT;i++) {
- if ((readl(ioaddr + ChipCmd) & (TxOn|RxOn)) == 0)
- break;
- udelay(5);
- }
- if (i==NATSEMI_HW_TIMEOUT && debug) {
- printk(KERN_INFO "%s: Tx/Rx process did not stop in %d usec.\n",
- dev->name, i*5);
- } else if (debug > 2) {
- printk(KERN_DEBUG "%s: Tx/Rx process stopped in %d usec.\n",
- dev->name, i*5);
- }
- /* Tx and Rx processes stopped */
writel(0, ioaddr + IntrEnable);
- /* all irq events disabled. */
- spin_unlock_irq(&np->lock);
-
- synchronize_irq();
+ natsemi_stop_rxtx(dev);
+ netif_stop_queue(dev);
+ netif_device_detach(dev);
+ spin_unlock_irq(&np->lock);
+ enable_irq(dev->irq);
+
/* Update the error counts. */
__get_stats(dev);
/* pci_power_off(pdev, -1); */
drain_ring(dev);
+ } else {
+ netif_device_detach(dev);
}
+ rtnl_unlock();
return 0;
}
@@ -1520,18 +1575,27 @@
struct net_device *dev = pci_get_drvdata (pdev);
struct netdev_private *np = dev->priv;
- if (netif_running (dev)) {
+ rtnl_lock();
+ if (netif_device_present(dev))
+ goto out;
+ if (netif_running(dev)) {
pci_enable_device(pdev);
/* pci_power_on(pdev); */
natsemi_reset(dev);
init_ring(dev);
+ spin_lock_irq(&np->lock);
init_registers(dev);
+ netif_device_attach(dev);
+ spin_unlock_irq(&np->lock);
np->timer.expires = jiffies + 1*HZ;
add_timer(&np->timer);
+ } else {
+ netif_device_attach(dev);
}
- netif_device_attach(dev);
+out:
+ rtnl_unlock();
return 0;
}
On Wed, 11 Jul 2001, Manfred Spraul wrote:
> The rx setup in init_ring() in drivers/net/natsemi.c in 2.4.6 is
> miscompiled by several gcc-2.95 versions.
>
> I could reproduce it with 2.95.1, and I received bug reports with
> gcc version 2.95.2 20000220 (Debian GNU/Linux)
> gcc 2.95.3 19991030 from Mandrake 7.2
I don't agree with all of the patch, but I can address this specific point.
> egcs-1.12 and rh gcc 2.96-85 are not affected.
My version had the code structured as
/* Initialize all Rx descriptors. */
for (i = 0; i < RX_RING_SIZE; i++) {
np->rx_ring[i].next_desc = virt_to_le32desc(&np->rx_ring[i+1]);
np->rx_ring[i].cmd_status = DescOwn;
np->rx_skbuff[i] = 0;
}
This code does not trigger the difference in compiler behavior. (I'm not
certain that the less-than-transparent behavior could be accurately
called a bug.)
I realize the 2.4 code was changed to have the descriptor ring base
be pre-translated from a virtual address to PCI bus-accessable physical
memory address, and used offsets from that base. I'm pointing out that
this problem doesn't exist in the 2.2. Note that the code above
explicitly translates each descriptor ring entry to a physical address
individually.
> The patch also cleans up the suspend/resume synchronization and removes
> 2 superflous (& wrong) spin_unlock calls.
The (large) patch seems to add some unnecessary locking.
Donald Becker [email protected]
Scyld Computing Corporation http://www.scyld.com
410 Severn Ave. Suite 210 Second Generation Beowulf Clusters
Annapolis MD 21403 410-990-9993
Donald Becker wrote:
>
> On Wed, 11 Jul 2001, Manfred Spraul wrote:
>
> > The rx setup in init_ring() in drivers/net/natsemi.c in 2.4.6 is
> > miscompiled by several gcc-2.95 versions.
> >
> > I could reproduce it with 2.95.1, and I received bug reports with
> > gcc version 2.95.2 20000220 (Debian GNU/Linux)
> > gcc 2.95.3 19991030 from Mandrake 7.2
>
> I don't agree with all of the patch, but I can address this specific point.
>
> > egcs-1.12 and rh gcc 2.96-85 are not affected.
>
> My version had the code structured as
>
> /* Initialize all Rx descriptors. */
> for (i = 0; i < RX_RING_SIZE; i++) {
> np->rx_ring[i].next_desc = virt_to_le32desc(&np->rx_ring[i+1]);
> np->rx_ring[i].cmd_status = DescOwn;
> np->rx_skbuff[i] = 0;
> }
>
> This code does not trigger the difference in compiler behavior. (I'm not
> certain that the less-than-transparent behavior could be accurately
> called a bug.)
>
It is a bug. np->rx_ring[i].next_desc is initialized by 2.4.6 with
gcc-2.95.1 to
[0]: 0x...210
[1]: 0x00000000
[2]: 0x...220
[3]: 0x...230
[4]: 0x...240.
etc.
> I realize the 2.4 code was changed to have the descriptor ring base
> be pre-translated from a virtual address to PCI bus-accessable physical
> memory address, and used offsets from that base. I'm pointing out that
> this problem doesn't exist in the 2.2. Note that the code above
> explicitly translates each descriptor ring entry to a physical address
> individually.
>
Correct. The problem was introduce by the virt_to_desc to pci_dma
conversion.
> > The patch also cleans up the suspend/resume synchronization and removes
> > 2 superflous (& wrong) spin_unlock calls.
>
> The (large) patch seems to add some unnecessary locking.
>
It's possible that some locking outside of the tx and rx codepath is not
required, I'm concentrating on a race free suspend & resume
implementation.
It shouldn't affect the critical functions:
rx interrupts run without a spinlock, and start_tx only acquires the
lock around "status = DescOwn;np->cur_rx++". netdev_tx_done() during
start_tx is an idea for tx interrupt mitigation, it's not yet finished.
--
Manfred