Hi Jeff,
This update adds sparc64 support (tested!) and better error and stats
handling. It also removes the experimental tag from the driver, since I
don't consider it experimental anymore.
Please apply.
Thanks,
Ion
--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.
--------------------------------------
--- linux/drivers/net/Config.in Wed Mar 6 14:14:37 2002
+++ linux-2.4.18-rc4/drivers/net/Config.in Wed Mar 6 14:55:36 2002
@@ -153,7 +153,7 @@
fi
if [ "$CONFIG_NET_PCI" = "y" ]; then
dep_tristate ' AMD PCnet32 PCI support' CONFIG_PCNET32 $CONFIG_PCI
- dep_tristate ' Adaptec Starfire support (EXPERIMENTAL)' CONFIG_ADAPTEC_STARFIRE $CONFIG_PCI $CONFIG_EXPERIMENTAL
+ dep_tristate ' Adaptec Starfire/DuraLAN support' CONFIG_ADAPTEC_STARFIRE $CONFIG_PCI
if [ "$CONFIG_ISA" = "y" -o "$CONFIG_EISA" = "y" ]; then
dep_tristate ' Ansel Communications EISA 3200 support (EXPERIMENTAL)' CONFIG_AC3200 $CONFIG_EXPERIMENTAL
fi
--- linux/drivers/net/starfire.c Wed Mar 6 14:14:38 2002
+++ linux-2.4.18-rc4/drivers/net/starfire.c Wed Mar 6 16:32:58 2002
@@ -96,13 +96,18 @@
LK1.3.5 (jgarzik)
- ethtool NWAY_RST, GLINK, [GS]MSGLVL support
+ LK1.3.6 (Ion Badulescu)
+ - Sparc64 support and fixes
+ - Better stats and error handling
+
TODO:
- implement tx_timeout() properly
+ - VLAN support
*/
#define DRV_NAME "starfire"
-#define DRV_VERSION "1.03+LK1.3.5"
-#define DRV_RELDATE "November 17, 2001"
+#define DRV_VERSION "1.03+LK1.3.6"
+#define DRV_RELDATE "March 6, 2002"
#include <linux/version.h>
#include <linux/module.h>
@@ -127,8 +132,11 @@
* for this driver to really use the firmware. Note that Rx/Tx
* hardware TCP checksumming is not possible without the firmware.
*
- * I'm currently [Feb 2001] talking to Adaptec about this redistribution
- * issue. Stay tuned...
+ * If Adaptec could allow redistribution of the firmware (even in binary
+ * format), life would become a lot easier. Unfortunately, I've lost my
+ * Adaptec contacts, so progress on this front is rather unlikely to
+ * occur. If anybody from Adaptec reads this and can help with this matter,
+ * please let me know...
*/
#undef HAS_FIRMWARE
/*
@@ -608,6 +616,7 @@
long ioaddr;
int drv_flags, io_size;
int boguscnt;
+ u16 cmd;
u8 cache;
/* when built into the kernel, we only print version if device is found */
@@ -643,14 +652,22 @@
goto err_out_free_netdev;
}
- ioaddr = (long) ioremap (ioaddr, io_size);
+ /* ioremap is borken in Linux-2.2.x/sparc64 */
+#if !defined(CONFIG_SPARC64) || LINUX_VERSION_CODE > 0x20300
+ ioaddr = (long) ioremap(ioaddr, io_size);
if (!ioaddr) {
printk (KERN_ERR DRV_NAME " %d: cannot remap 0x%x @ 0x%lx, aborting\n",
card_idx, io_size, ioaddr);
goto err_out_free_res;
}
+#endif /* !CONFIG_SPARC64 || Linux 2.3.0+ */
+
+ pci_set_master(pdev);
- pci_set_master (pdev);
+ /* enable MWI -- it vastly improves Rx performance on sparc64 */
+ pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+ cmd |= PCI_COMMAND_INVALIDATE;
+ pci_write_config_word(pdev, PCI_COMMAND, cmd);
/* set PCI cache size */
pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache);
@@ -669,7 +686,7 @@
/* Serial EEPROM reads are hidden by the hardware. */
for (i = 0; i < 6; i++)
- dev->dev_addr[i] = readb(ioaddr + EEPROMCtrl + 20-i);
+ dev->dev_addr[i] = readb(ioaddr + EEPROMCtrl + 20 - i);
#if ! defined(final_version) /* Dump the EEPROM contents during development. */
if (debug > 4)
@@ -931,7 +948,7 @@
/* Fill both the unused Tx SA register and the Rx perfect filter. */
for (i = 0; i < 6; i++)
- writeb(dev->dev_addr[i], ioaddr + StationAddr + 5-i);
+ writeb(dev->dev_addr[i], ioaddr + StationAddr + 5 - i);
for (i = 0; i < 16; i++) {
u16 *eaddrs = (u16 *)dev->dev_addr;
long setup_frm = ioaddr + PerfFilterTable + i * 16;
@@ -978,9 +995,9 @@
#ifdef HAS_FIRMWARE
/* Load Rx/Tx firmware into the frame processors */
for (i = 0; i < FIRMWARE_RX_SIZE * 2; i++)
- writel(cpu_to_le32(firmware_rx[i]), ioaddr + RxGfpMem + i * 4);
+ writel(firmware_rx[i], ioaddr + RxGfpMem + i * 4);
for (i = 0; i < FIRMWARE_TX_SIZE * 2; i++)
- writel(cpu_to_le32(firmware_tx[i]), ioaddr + TxGfpMem + i * 4);
+ writel(firmware_tx[i], ioaddr + TxGfpMem + i * 4);
/* Enable the Rx and Tx units, and the Rx/Tx frame processors. */
writel(0x003F, ioaddr + GenCtrl);
#else /* not HAS_FIRMWARE */
@@ -1155,8 +1172,8 @@
np->tx_ring[entry].first_addr = cpu_to_le32(np->tx_info[entry].first_mapping);
#ifdef ZEROCOPY
- np->tx_ring[entry].first_len = cpu_to_le32(skb_first_frag_len(skb));
- np->tx_ring[entry].total_len = cpu_to_le32(skb->len);
+ np->tx_ring[entry].first_len = cpu_to_le16(skb_first_frag_len(skb));
+ np->tx_ring[entry].total_len = cpu_to_le16(skb->len);
/* Add "| TxDescIntr" to generate Tx-done interrupts. */
np->tx_ring[entry].status = cpu_to_le32(TxDescID | TxCRCEn);
np->tx_ring[entry].nbufs = cpu_to_le32(skb_shinfo(skb)->nr_frags + 1);
@@ -1169,8 +1186,10 @@
np->tx_ring[entry].status |= cpu_to_le32(TxRingWrap | TxDescIntr);
#ifdef ZEROCOPY
- if (skb->ip_summed == CHECKSUM_HW)
+ if (skb->ip_summed == CHECKSUM_HW) {
np->tx_ring[entry].status |= cpu_to_le32(TxCalTCP);
+ np->stats.tx_compressed++;
+ }
#endif /* ZEROCOPY */
if (debug > 5) {
@@ -1448,6 +1467,7 @@
#if defined(full_rx_status) || defined(csum_rx_status)
if (le32_to_cpu(np->rx_done_q[np->rx_done].status2) & 0x01000000) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
+ np->stats.rx_compressed++;
}
/*
* This feature doesn't seem to be working, at least
@@ -1579,12 +1599,17 @@
printk(KERN_NOTICE "%s: Increasing Tx FIFO threshold to %d bytes\n",
dev->name, np->tx_threshold * 16);
}
- if ((intr_status & ~(IntrNormalMask | IntrAbnormalSummary | IntrLinkChange | IntrStatsMax | IntrTxDataLow | IntrPCIPad)) && debug)
+ if (intr_status & IntrRxGFPDead) {
+ np->stats.rx_fifo_errors++;
+ np->stats.rx_errors++;
+ }
+ if (intr_status & (IntrNoTxCsum | IntrDMAErr)) {
+ np->stats.tx_fifo_errors++;
+ np->stats.tx_errors++;
+ }
+ if ((intr_status & ~(IntrNormalMask | IntrAbnormalSummary | IntrLinkChange | IntrStatsMax | IntrTxDataLow | IntrRxGFPDead | IntrNoTxCsum | IntrPCIPad)) && debug)
printk(KERN_ERR "%s: Something Wicked happened! %4.4x.\n",
dev->name, intr_status);
- /* Hmmmmm, it's not clear how to recover from DMA faults. */
- if (intr_status & IntrDMAErr)
- np->stats.tx_fifo_errors++;
}
Thanks, applied.
There is a bugfix, which I will make locally before submitting:
PCI_COMMAND_INVALIDATE should be enabled -after- messing with
PCI_CACHE_LINE_SIZE.
--
Jeff Garzik | Usenet Rule #2 (John Gilmore): "The Net interprets
Building 1024 | censorship as damage and routes around it."
MandrakeSoft |
On Wed, 6 Mar 2002, Jeff Garzik wrote:
> There is a bugfix, which I will make locally before submitting:
> PCI_COMMAND_INVALIDATE should be enabled -after- messing with
> PCI_CACHE_LINE_SIZE.
I didn't find anything in the starfire chipset's documentation about this,
so is there a deeper reason for this ordering? As far as I know, most if
not all x86 PCI chipsets silently map MWI to MW, so it should only matter
for non-x86 plaforms, right?
And, in general, are there any other tricks one can do to speed up the PCI
transactions on non-x86 platforms? I'm still getting occasional overruns
on sparc64 (card receiving packets faster than it can push them over PCI),
which is somewhat disturbing..
Thanks,
Ion
--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.
Ion Badulescu wrote:
>
> On Wed, 6 Mar 2002, Jeff Garzik wrote:
>
> > There is a bugfix, which I will make locally before submitting:
> > PCI_COMMAND_INVALIDATE should be enabled -after- messing with
> > PCI_CACHE_LINE_SIZE.
>
> I didn't find anything in the starfire chipset's documentation about this,
> so is there a deeper reason for this ordering? As far as I know, most if
> not all x86 PCI chipsets silently map MWI to MW, so it should only matter
> for non-x86 plaforms, right?
More PCI than a Starfire requirement.
And there are plenty of ia32 platforms that benefit from MWI, too.
Often its server mobos that support MWI, but some cheaper ones do too.
> And, in general, are there any other tricks one can do to speed up the PCI
> transactions on non-x86 platforms? I'm still getting occasional overruns
> on sparc64 (card receiving packets faster than it can push them over PCI),
> which is somewhat disturbing..
Dynamically tune your RX and TX DMA burst settings when you notice these
conditions... It is indeed possible to saturate PCI bus bandwidth.
Jeff
--
Jeff Garzik | Usenet Rule #2 (John Gilmore): "The Net interprets
Building 1024 | censorship as damage and routes around it."
MandrakeSoft |
From: Jeff Garzik <[email protected]>
Date: Wed, 06 Mar 2002 17:01:45 -0500
> And, in general, are there any other tricks one can do to speed up the PCI
> transactions on non-x86 platforms? I'm still getting occasional overruns
> on sparc64 (card receiving packets faster than it can push them over PCI),
> which is somewhat disturbing..
Dynamically tune your RX and TX DMA burst settings when you notice these
conditions... It is indeed possible to saturate PCI bus bandwidth.
On sparc64 you should set the burst settings to 64-byte read/write
bursts because the PCI chipset is going to disconnect you on 64-byte
boundaries anyways. If the chip is bursting more than this, you
are wasting lots of PCI cycles with the retries done after the
disconnect.
Also make sure to use PCI READ MULTIPLE commands for DMA if the chip
provides such an option, this helps performance on many PCI
controllers to no end.
On Wed, 6 Mar 2002, David S. Miller wrote:
> On sparc64 you should set the burst settings to 64-byte read/write
> bursts because the PCI chipset is going to disconnect you on 64-byte
> boundaries anyways. If the chip is bursting more than this, you
> are wasting lots of PCI cycles with the retries done after the
> disconnect.
Ahh.. indeed, changing the burst size to 64 bytes (from the default 128)
makes a big difference on my ultra5, thanks for the hint. Does it make any
sense to differentiate between platforms, or is 64 a good all-around
value?
> Also make sure to use PCI READ MULTIPLE commands for DMA if the chip
> provides such an option, this helps performance on many PCI
> controllers to no end.
MRM (and MRL) seem to be enabled by default, although the chip docs are a
bit unclear about it.
Thanks,
Ion
--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.
From: Ion Badulescu <[email protected]>
Date: Thu, 7 Mar 2002 14:13:06 -0500 (EST)
Ahh.. indeed, changing the burst size to 64 bytes (from the default 128)
makes a big difference on my ultra5, thanks for the hint. Does it make any
sense to differentiate between platforms, or is 64 a good all-around
value?
Jeff and I want to add some pci_optimal_burst_size() or whatever
interface so that drivers don't get stuffed with ifdefs, but for now
use CONFIG_SPARC64 for this :-) I think on Alpha a similar situation
exists and you should use 128 instead of 64 there.
Franks a lot,
David S. Miller
[email protected]