2001-02-07 18:42:53

by Dave Jones

[permalink] [raw]
Subject: [PATCH] starfire reads irq before pci_enable_device.


Hi Alan, Donald,

This driver should call pci_enable_device before reading
pdev->irq.

regards,

Dave.

--
| Dave Jones. http://www.suse.de/~davej
| SuSE Labs

diff -urN --exclude-from=/home/davej/.exclude linux/drivers/net/starfire.c linux-dj/drivers/net/starfire.c
--- linux/drivers/net/starfire.c Wed Feb 7 12:42:42 2001
+++ linux-dj/drivers/net/starfire.c Wed Feb 7 18:34:22 2001
@@ -409,6 +409,9 @@
}
SET_MODULE_OWNER(dev);

+ if (pci_enable_device (pdev))
+ goto err_out_free_netdev;
+
irq = pdev->irq;

if (request_mem_region (ioaddr, io_size, dev->name) == NULL) {
@@ -416,10 +419,7 @@
card_idx, io_size, ioaddr);
goto err_out_free_netdev;
}
-
- if (pci_enable_device (pdev))
- goto err_out_free_res;
-
+
ioaddr = (long) ioremap (ioaddr, io_size);
if (!ioaddr) {
printk (KERN_ERR "starfire %d: cannot remap 0x%x @ 0x%lx, aborting\n",


2001-02-07 19:53:13

by Dave Jones

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.


> rejected -- ioaddr assigned a value before pci_enable_device is called

Better ?

Dave.

--
| Dave Jones. http://www.suse.de/~davej
| SuSE Labs

diff -urN --exclude-from=/home/davej/.exclude linux/drivers/net/starfire.c linux-dj/drivers/net/starfire.c
--- linux/drivers/net/starfire.c Wed Feb 7 12:42:42 2001
+++ linux-dj/drivers/net/starfire.c Wed Feb 7 19:47:54 2001
@@ -396,12 +396,6 @@
printk(KERN_INFO "%s" KERN_INFO "%s" KERN_INFO "%s",
version1, version2, version3);

- ioaddr = pci_resource_start (pdev, 0);
- if (!ioaddr || ((pci_resource_flags (pdev, 0) & IORESOURCE_MEM) == 0)) {
- printk (KERN_ERR "starfire %d: no PCI MEM resources, aborting\n", card_idx);
- return -ENODEV;
- }
-
dev = init_etherdev(NULL, sizeof(*np));
if (!dev) {
printk (KERN_ERR "starfire %d: cannot alloc etherdev, aborting\n", card_idx);
@@ -409,6 +403,14 @@
}
SET_MODULE_OWNER(dev);

+ if (pci_enable_device (pdev))
+ goto err_out_free_netdev;
+
+ ioaddr = pci_resource_start (pdev, 0);
+ if (!ioaddr || ((pci_resource_flags (pdev, 0) & IORESOURCE_MEM) == 0)) {
+ printk (KERN_ERR "starfire %d: no PCI MEM resources, aborting\n", card_idx);
+ return -ENODEV;
+ }
irq = pdev->irq;

if (request_mem_region (ioaddr, io_size, dev->name) == NULL) {
@@ -416,10 +418,7 @@
card_idx, io_size, ioaddr);
goto err_out_free_netdev;
}
-
- if (pci_enable_device (pdev))
- goto err_out_free_res;
-
+
ioaddr = (long) ioremap (ioaddr, io_size);
if (!ioaddr) {
printk (KERN_ERR "starfire %d: cannot remap 0x%x @ 0x%lx, aborting\n",


2001-02-07 19:57:43

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

Index: linux_2_4/drivers/net/starfire.c
diff -u linux_2_4/drivers/net/starfire.c:1.1.1.5 linux_2_4/drivers/net/starfire.c:1.1.1.5.2.5
--- linux_2_4/drivers/net/starfire.c:1.1.1.5 Sun Feb 4 09:38:52 2001
+++ linux_2_4/drivers/net/starfire.c Wed Feb 7 11:51:25 2001
@@ -88,25 +88,31 @@

#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/

+/*
+ * The ia64 doesn't allow for unaligned loads even of integers being
+ * misaligned on a 2 byte boundary. Thus always force copying of
+ * packets as the starfire doesn't allow for misaligned DMAs ;-(
+ * 23/10/2000 - Jes
+ */
+#ifdef __ia64__
+#define PKT_SHOULD_COPY(pkt_len) 1
+#else
+#define PKT_SHOULD_COPY(pkt_len) (pkt_len < rx_copybreak)
+#endif
+
#if !defined(__OPTIMIZE__)
#warning You must compile this file with the correct options!
#warning See the last lines of the source file.
#error You must compile this driver with "-O".
#endif

-/* Include files, designed to support most kernel versions 2.0.0 and later. */
-#include <linux/version.h>
#include <linux/module.h>
-#if LINUX_VERSION_CODE < 0x20300 && defined(MODVERSIONS)
-#include <linux/modversions.h>
-#endif
-
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/timer.h>
#include <linux/errno.h>
#include <linux/ioport.h>
-#include <linux/malloc.h>
+#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
@@ -394,11 +400,14 @@

card_idx++;
option = card_idx < MAX_UNITS ? options[card_idx] : 0;
-
+
if (!printed_version++)
printk(KERN_INFO "%s" KERN_INFO "%s" KERN_INFO "%s",
version1, version2, version3);

+ if (pci_enable_device (pdev))
+ return -EIO;
+
ioaddr = pci_resource_start (pdev, 0);
if (!ioaddr || ((pci_resource_flags (pdev, 0) & IORESOURCE_MEM) == 0)) {
printk (KERN_ERR "starfire %d: no PCI MEM resources, aborting\n", card_idx);
@@ -410,6 +419,7 @@
printk (KERN_ERR "starfire %d: cannot alloc etherdev, aborting\n", card_idx);
return -ENOMEM;
}
+ SET_MODULE_OWNER(dev);

irq = pdev->irq;

@@ -419,9 +429,6 @@
goto err_out_free_netdev;
}

- if (pci_enable_device (pdev))
- goto err_out_free_res;
-
ioaddr = (long) ioremap (ioaddr, io_size);
if (!ioaddr) {
printk (KERN_ERR "starfire %d: cannot remap 0x%x @ 0x%lx, aborting\n",
@@ -540,19 +547,15 @@

static int netdev_open(struct net_device *dev)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
long ioaddr = dev->base_addr;
int i, retval;

/* Do we ever need to reset the chip??? */

- MOD_INC_USE_COUNT;
-
retval = request_irq(dev->irq, &intr_handler, SA_SHIRQ, dev->name, dev);
- if (retval) {
- MOD_DEC_USE_COUNT;
+ if (retval)
return retval;
- }

/* Disable the Rx and Tx, and reset the chip. */
writel(0, ioaddr + GenCtrl);
@@ -583,7 +586,6 @@
if (np->rx_ring)
pci_free_consistent(np->pci_dev, PAGE_SIZE,
np->rx_ring, np->rx_ring_dma);
- MOD_DEC_USE_COUNT;
return -ENOMEM;
}

@@ -669,7 +671,7 @@

static void check_duplex(struct net_device *dev, int startup)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
long ioaddr = dev->base_addr;
int new_tx_mode ;

@@ -702,7 +704,7 @@
static void netdev_timer(unsigned long data)
{
struct net_device *dev = (struct net_device *)data;
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
long ioaddr = dev->base_addr;
int next_tick = 60*HZ; /* Check before driver release. */

@@ -730,7 +732,7 @@

static void tx_timeout(struct net_device *dev)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
long ioaddr = dev->base_addr;

printk(KERN_WARNING "%s: Transmit timed out, status %8.8x,"
@@ -757,14 +759,14 @@

dev->trans_start = jiffies;
np->stats.tx_errors++;
- return;
+ netif_wake_queue(dev);
}


/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
static void init_ring(struct net_device *dev)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
int i;

np->tx_full = 0;
@@ -812,8 +814,8 @@

static int start_tx(struct sk_buff *skb, struct net_device *dev)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
- unsigned entry;
+ struct netdev_private *np = dev->priv;
+ unsigned int entry;

/* Caution: the write order is important here, set the field
with the "ownership" bits last. */
@@ -843,6 +845,9 @@
#endif

/* Non-x86: explicitly flush descriptor cache lines here. */
+ /* Ensure everything is written back above before the transmit is
+ initiated. - Jes */
+ wmb();

/* Update the producer index. */
writel(++entry, dev->base_addr + TxProducerIdx);
@@ -977,7 +982,7 @@
for clarity and better register allocation. */
static int netdev_rx(struct net_device *dev)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
int boguscnt = np->dirty_rx + RX_RING_SIZE - np->cur_rx;
u32 desc_status;

@@ -1015,7 +1020,7 @@
#endif
/* Check if the packet is long enough to accept without copying
to a minimally-sized skbuff. */
- if (pkt_len < rx_copybreak
+ if (PKT_SHOULD_COPY(pkt_len)
&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
skb->dev = dev;
skb_reserve(skb, 2); /* 16 byte align the IP header */
@@ -1037,14 +1042,6 @@
temp = skb_put(skb, pkt_len);
np->rx_info[entry].skb = NULL;
np->rx_info[entry].mapping = 0;
-#ifndef final_version /* Remove after testing. */
- if (le32_to_cpu(np->rx_ring[entry].rxaddr & ~3) != ((unsigned long) temp))
- printk(KERN_ERR "%s: Internal fault: The skbuff addresses "
- "do not match in netdev_rx: %d vs. %p / %p.\n",
- dev->name,
- le32_to_cpu(np->rx_ring[entry].rxaddr),
- skb->head, temp);
-#endif
}
#ifndef final_version /* Remove after testing. */
/* You will want this info for the initial debug. */
@@ -1107,7 +1104,7 @@

static void netdev_error(struct net_device *dev, int intr_status)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;

if (intr_status & LinkChange) {
printk(KERN_NOTICE "%s: Link changed: Autonegotiation advertising"
@@ -1136,7 +1133,7 @@
static struct net_device_stats *get_stats(struct net_device *dev)
{
long ioaddr = dev->base_addr;
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;

/* This adapter architecture needs no SMP locks. */
np->stats.tx_bytes = readl(ioaddr + 0x57010);
@@ -1241,7 +1238,7 @@

static int mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
u16 *data = (u16 *)&rq->ifr_data;

switch(cmd) {
@@ -1279,7 +1276,7 @@
static int netdev_close(struct net_device *dev)
{
long ioaddr = dev->base_addr;
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
int i;

netif_stop_queue(dev);
@@ -1340,8 +1337,6 @@
np->tx_info[i].skb = NULL;
np->tx_info[i].mapping = 0;
}
-
- MOD_DEC_USE_COUNT;

return 0;
}


Attachments:
starfire.patch (7.25 kB)

2001-02-07 20:34:54

by Manfred Spraul

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

Jeff Garzik wrote:
>
> + SET_MODULE_OWNER(dev);
>
> irq = pdev->irq;
>

One question:
The code copies 'pdev->irq' into 'dev->irq'.

Is that required, who need 'dev->irq'?

> retval = request_irq(dev->irq, &intr_handler, SA_SHIRQ, dev->name, dev);

Can't the driver use?
retval = request_irq(np->pci_dev->irq)

--
Manfred

2001-02-08 01:52:38

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

Hi Jeff,

On Wed, 07 Feb 2001 14:57:16 -0500, Jeff Garzik <[email protected]> wrote:

> Here's the patch I have, against vanilla 2.4.2-pre1, with the
> pci_enable_device preferred changes included...

Well, I decided to bite the bullet and port my zerocopy starfire
changes to the official tree, properly ifdef'ed. So here it goes,
the patch was made against 2.4.1 vanilla and includes all the
fixes from Jeff and myself that were sent to the list so far.

I've also added myself as the starfire maintainer -- I hope
nobody objects.

Alan, if you want, I can rediff this against 2.4.1-ac. I'll
also try and send a 2.2.19 patch shortly.

Thanks,
Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.
--------------------------
--- linux-2.4.vanilla/MAINTAINERS Wed Feb 7 15:45:15 2001
+++ linux-2.4-boxter/MAINTAINERS Wed Feb 7 17:38:50 2001
@@ -1166,6 +1166,11 @@
W: http://www.stallion.com
S: Supported

+STARFIRE/DURALAN NETWORK DRIVER
+P: Ion Badulescu
+M: [email protected]
+S: Maintained
+
STARMODE RADIO IP (STRIP) PROTOCOL DRIVER
W: http://mosquitonet.Stanford.EDU/strip.html
S: Unsupported ?
--- linux-2.4.vanilla/drivers/net/starfire.c Fri Aug 11 15:57:58 2000
+++ linux-2.4-boxter/drivers/net/starfire.c Wed Feb 7 17:38:19 2001
@@ -34,6 +34,10 @@

LK1.1.4 (jgarzik):
- Merge Becker version 1.03
+
+ LK1.2.1 (Ion Badulescu <[email protected]>)
+ - Support hardware Rx/Tx checksumming
+ - Use the GFP firmware taken from Adaptec's Netware driver
*/

/* These identify the driver base version and may not be removed. */
@@ -43,11 +47,36 @@
" Updates and info at http://www.scyld.com/network/starfire.html\n";

static const char version3[] =
-" (unofficial 2.4.x kernel port, version 1.1.4, August 10, 2000)\n";
+" (unofficial 2.4.x kernel port, version 1.2.1, February 07, 2001)\n";

/* The user-configurable values.
These may be modified when a driver module is loaded.*/

+/*
+ * Adaptec's license for their Novell drivers (which is where I got the
+ * firmware files) does not allow to redistribute them. Thus, we can't
+ * include them with this driver.
+ *
+ * However, an end-user is allowed to download and use them, after
+ * converting them to C header files using starfire_firmware.pl.
+ * Once that's done, the #undef must be changed into a #define
+ * for this driver to really use the firmware. Note that Rx/Tx
+ * hardware TCP checksumming is not possible without the firmware.
+ *
+ * I'm currently talking to Adaptec about this redistribution issue.
+ * Stay tuned...
+ */
+#undef HAS_FIRMWARE
+/*
+ * The current frame processor firmware fails to checksum a fragment
+ * of length 1. If and when this is fixed, the #define below can be removed.
+ */
+#define HAS_BROKEN_FIRMWARE
+/*
+ * Define this if using the driver with the zero-copy patch
+ */
+#undef ZEROCOPY
+
/* Used for tuning interrupt latency vs. overhead. */
static int interrupt_mitigation = 0x0;

@@ -88,25 +117,39 @@

#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/

+/*
+ * The ia64 doesn't allow for unaligned loads even of integers being
+ * misaligned on a 2 byte boundary. Thus always force copying of
+ * packets as the starfire doesn't allow for misaligned DMAs ;-(
+ * 23/10/2000 - Jes
+ *
+ * Neither does the Alpha. -Ion
+ */
+#if defined(__ia64__) || defined(__alpha__)
+#define PKT_SHOULD_COPY(pkt_len) 1
+#else
+#define PKT_SHOULD_COPY(pkt_len) (pkt_len < rx_copybreak)
+#endif
+
+#ifdef ZEROCOPY
+#define skb_first_frag_len(skb) skb_headlen(skb)
+#else /* not ZEROCOPY */
+#define skb_first_frag_len(skb) (skb->len)
+#endif /* not ZEROCOPY */
+
#if !defined(__OPTIMIZE__)
#warning You must compile this file with the correct options!
#warning See the last lines of the source file.
#error You must compile this driver with "-O".
#endif

-/* Include files, designed to support most kernel versions 2.0.0 and later. */
-#include <linux/version.h>
#include <linux/module.h>
-#if LINUX_VERSION_CODE < 0x20300 && defined(MODVERSIONS)
-#include <linux/modversions.h>
-#endif
-
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/timer.h>
#include <linux/errno.h>
#include <linux/ioport.h>
-#include <linux/malloc.h>
+#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
@@ -117,12 +160,17 @@
#include <asm/bitops.h>
#include <asm/io.h>

+#ifdef HAS_FIRMWARE
+#include "starfire_firmware.h"
+#endif /* HAS_FIRMWARE */
+
MODULE_AUTHOR("Donald Becker <[email protected]>");
MODULE_DESCRIPTION("Adaptec Starfire Ethernet driver");
MODULE_PARM(max_interrupt_work, "i");
MODULE_PARM(mtu, "i");
MODULE_PARM(debug, "i");
MODULE_PARM(rx_copybreak, "i");
+MODULE_PARM(interrupt_mitigation, "i");
MODULE_PARM(options, "1-" __MODULE_STRING(MAX_UNITS) "i");
MODULE_PARM(full_duplex, "1-" __MODULE_STRING(MAX_UNITS) "i");

@@ -155,8 +203,9 @@
See the Adaptec manual for the many possible structures, and options for
each structure. There are far too many to document here.

-For transmit this driver uses type 1 transmit descriptors, and relies on
-automatic minimum-length padding. It does not use the completion queue
+For transmit this driver uses type 0/1 transmit descriptors (depending
+on the presence of the zerocopy patches), and relies on automatic
+minimum-length padding. It does not use the completion queue
consumer index, but instead checks for non-zero status entries.

For receive this driver uses type 0 receive descriptors. The driver
@@ -167,14 +216,15 @@
When an incoming frame is less than RX_COPYBREAK bytes long, a fresh skbuff
is allocated and the frame is copied to the new skbuff. When the incoming
frame is larger, the skbuff is passed directly up the protocol stack.
-Buffers consumed this way are replaced by newly allocated skbuffs in a later
-phase of receive.
+Buffers consumed this way are replaced by newly allocated skbuffs in a
+later phase of receive.

A notable aspect of operation is that unaligned buffers are not permitted by
the Starfire hardware. The IP header at offset 14 in an ethernet frame thus
isn't longword aligned, which may cause problems on some machine
-e.g. Alphas. Copied frames are put into the skbuff at an offset of "+2",
-16-byte aligning the IP header.
+e.g. Alphas and IA64. For these architectures, the driver is forced to copy
+the frame into a new skbuff unconditionally. Copied frames are put into the
+skbuff at an offset of "+2", thus 16-byte aligning the IP header.

IIId. Synchronization

@@ -256,19 +306,31 @@
TxThreshold=0x500B0,
CompletionHiAddr=0x500B4, TxCompletionAddr=0x500B8,
RxCompletionAddr=0x500BC, RxCompletionQ2Addr=0x500C0,
- CompletionQConsumerIdx=0x500C4,
+ CompletionQConsumerIdx=0x500C4, RxDMACtrl=0x500D0,
RxDescQCtrl=0x500D4, RxDescQHiAddr=0x500DC, RxDescQAddr=0x500E0,
RxDescQIdx=0x500E8, RxDMAStatus=0x500F0, RxFilterMode=0x500F4,
- TxMode=0x55000,
+ TxMode=0x55000, TxGfpMem=0x58000, RxGfpMem=0x5a000,
};

/* Bits in the interrupt status/mask registers. */
enum intr_status_bits {
- IntrNormalSummary=0x8000, IntrAbnormalSummary=0x02000000,
- IntrRxDone=0x0300, IntrRxEmpty=0x10040, IntrRxPCIErr=0x80000,
- IntrTxDone=0x4000, IntrTxEmpty=0x1000, IntrTxPCIErr=0x80000,
- StatsMax=0x08000000, LinkChange=0xf0000000,
- IntrTxDataLow=0x00040000,
+ IntrLinkChange=0xf0000000, IntrStatsMax=0x08000000,
+ IntrAbnormalSummary=0x02000000, IntrGeneralTimer=0x01000000,
+ IntrSoftware=0x800000, IntrRxComplQ1Low=0x400000,
+ IntrTxComplQLow=0x200000, IntrPCI=0x100000,
+ IntrDMAErr=0x080000, IntrTxDataLow=0x040000,
+ IntrRxComplQ2Low=0x020000, IntrRxDescQ1Low=0x010000,
+ IntrNormalSummary=0x8000, IntrTxDone=0x4000,
+ IntrTxDMADone=0x2000, IntrTxEmpty=0x1000,
+ IntrEarlyRxQ2=0x0800, IntrEarlyRxQ1=0x0400,
+ IntrRxQ2Done=0x0200, IntrRxQ1Done=0x0100,
+ IntrRxGFPDead=0x80, IntrRxDescQ2Low=0x40,
+ IntrNoTxCsum=0x20, IntrTxBadID=0x10,
+ IntrHiPriTxBadID=0x08, IntrRxGfp=0x04,
+ IntrTxGfp=0x02, IntrPCIPad=0x01,
+ /* not quite bits */
+ IntrRxDone=IntrRxQ2Done | IntrRxQ1Done,
+ IntrRxEmpty=IntrRxDescQ1Low | IntrRxDescQ2Low,
};

/* Bits in the RxFilterMode register. */
@@ -277,6 +339,37 @@
AcceptMulticast=0x10, AcceptMyPhys=0xE040,
};

+/* Bits in the TxDescCtrl register. */
+enum tx_ctrl_bits {
+ TxDescSpaceUnlim=0x00, TxDescSpace32=0x10, TxDescSpace64=0x20,
+ TxDescSpace128=0x30, TxDescSpace256=0x40,
+ TxDescType0=0x00, TxDescType1=0x01, TxDescType2=0x02,
+ TxDescType3=0x03, TxDescType4=0x04,
+ TxNoDMACompletion=0x08, TxDescQ64bit=0x80,
+ TxHiPriFIFOThreshShift=24, TxPadLenShift=16,
+ TxDMABurstSizeShift=8,
+};
+
+/* Bits in the RxDescQCtrl register. */
+enum rx_ctrl_bits {
+ RxBufferLenShift=16, RxMinDescrThreshShift=0,
+ RxPrefetchMode=0x8000, Rx2048QEntries=0x4000,
+ RxVariableQ=0x2000, RxDesc64bit=0x1000,
+ RxDescQAddr64bit=0x0100,
+ RxDescSpace4=0x000, RxDescSpace8=0x100,
+ RxDescSpace16=0x200, RxDescSpace32=0x300,
+ RxDescSpace64=0x400, RxDescSpace128=0x500,
+ RxConsumerWrEn=0x80,
+};
+
+/* Bits in the RxCompletionAddr register */
+enum rx_compl_bits {
+ RxComplQAddr64bit=0x80, TxComplProducerWrEn=0x40,
+ RxComplType0=0x00, RxComplType1=0x10,
+ RxComplType2=0x20, RxComplType3=0x30,
+ RxComplThreshShift=0,
+};
+
/* The Rx and Tx buffer descriptors. */
struct starfire_rx_desc {
u32 rxaddr; /* Optionally 64 bits. */
@@ -288,27 +381,51 @@
/* Completion queue entry.
You must update the page allocation, init_ring and the shift count in rx()
if using a larger format. */
+#ifdef HAS_FIRMWARE
+#define csum_rx_status
+#endif /* HAS_FIRMWARE */
struct rx_done_desc {
u32 status; /* Low 16 bits is length. */
+#ifdef csum_rx_status
+ u32 status2; /* Low 16 bits is csum */
+#endif /* csum_rx_status */
#ifdef full_rx_status
u32 status2;
u16 vlanid;
u16 csum; /* partial checksum */
u32 timestamp;
-#endif
+#endif /* full_rx_status */
};
enum rx_done_bits {
RxOK=0x20000000, RxFIFOErr=0x10000000, RxBufQ2=0x08000000,
};

+#ifdef ZEROCOPY
+/* Type 0 Tx descriptor. */
+/* If more fragments are needed, don't forget to change the
+ descriptor spacing as well! */
+struct starfire_tx_desc {
+ u32 status;
+ u32 nbufs;
+ u32 first_addr;
+ u16 first_len;
+ u16 total_len;
+ struct {
+ u32 addr;
+ u32 len;
+ } frag[6];
+};
+#else /* not ZEROCOPY */
/* Type 1 Tx descriptor. */
struct starfire_tx_desc {
u32 status; /* Upper bits are status, lower 16 length. */
- u32 addr;
+ u32 first_addr;
};
+#endif /* not ZEROCOPY */
enum tx_desc_bits {
- TxDescID=0xB1010000, /* Also marks single fragment, add CRC. */
- TxDescIntr=0x08000000, TxRingWrap=0x04000000,
+ TxDescID=0xB0000000,
+ TxCRCEn=0x01000000, TxDescIntr=0x08000000,
+ TxRingWrap=0x04000000, TxCalTCP=0x02000000,
};
struct tx_done_report {
u32 status; /* timestamp, index. */
@@ -318,10 +435,17 @@
};

#define PRIV_ALIGN 15 /* Required alignment mask */
-struct ring_info {
+struct rx_ring_info {
struct sk_buff *skb;
dma_addr_t mapping;
};
+struct tx_ring_info {
+ struct sk_buff *skb;
+ dma_addr_t first_mapping;
+#ifdef ZEROCOPY
+ dma_addr_t frag_mapping[6];
+#endif /* ZEROCOPY */
+};

struct netdev_private {
/* Descriptor rings first for alignment. */
@@ -330,8 +454,8 @@
dma_addr_t rx_ring_dma;
dma_addr_t tx_ring_dma;
/* The addresses of rx/tx-in-place skbuffs. */
- struct ring_info rx_info[RX_RING_SIZE];
- struct ring_info tx_info[TX_RING_SIZE];
+ struct rx_ring_info rx_info[RX_RING_SIZE];
+ struct tx_ring_info tx_info[TX_RING_SIZE];
/* Pointers to completion queues (full pages). I should cache line pad..*/
u8 pad0[100];
struct rx_done_desc *rx_done_q;
@@ -390,16 +514,20 @@
static int card_idx = -1;
static int printed_version = 0;
long ioaddr;
- int drv_flags, io_size = netdrv_tbl[chip_idx].io_size;
+ int drv_flags, io_size;

card_idx++;
option = card_idx < MAX_UNITS ? options[card_idx] : 0;
-
+
if (!printed_version++)
printk(KERN_INFO "%s" KERN_INFO "%s" KERN_INFO "%s",
version1, version2, version3);

+ if (pci_enable_device (pdev))
+ return -EIO;
+
ioaddr = pci_resource_start (pdev, 0);
+ io_size = pci_resource_len (pdev, 0);
if (!ioaddr || ((pci_resource_flags (pdev, 0) & IORESOURCE_MEM) == 0)) {
printk (KERN_ERR "starfire %d: no PCI MEM resources, aborting\n", card_idx);
return -ENODEV;
@@ -410,6 +538,7 @@
printk (KERN_ERR "starfire %d: cannot alloc etherdev, aborting\n", card_idx);
return -ENOMEM;
}
+ SET_MODULE_OWNER(dev);

irq = pdev->irq;

@@ -419,9 +548,6 @@
goto err_out_free_netdev;
}

- if (pci_enable_device (pdev))
- goto err_out_free_res;
-
ioaddr = (long) ioremap (ioaddr, io_size);
if (!ioaddr) {
printk (KERN_ERR "starfire %d: cannot remap 0x%x @ 0x%lx, aborting\n",
@@ -434,6 +560,14 @@
printk(KERN_INFO "%s: %s at 0x%lx, ",
dev->name, netdrv_tbl[chip_idx].name, ioaddr);

+#ifdef ZEROCOPY
+ /* Starfire can do SG and TCP/UDP checksumming */
+ dev->features |= NETIF_F_SG;
+#ifdef HAS_FIRMWARE
+ dev->features |= NETIF_F_IP_CSUM;
+#endif /* HAS_FIRMWARE */
+#endif /* ZEROCOPY */
+
/* Serial EEPROM reads are hidden by the hardware. */
for (i = 0; i < 6; i++)
dev->dev_addr[i] = readb(ioaddr + EEPROMCtrl + 20-i);
@@ -540,19 +674,15 @@

static int netdev_open(struct net_device *dev)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
long ioaddr = dev->base_addr;
int i, retval;

/* Do we ever need to reset the chip??? */

- MOD_INC_USE_COUNT;
-
retval = request_irq(dev->irq, &intr_handler, SA_SHIRQ, dev->name, dev);
- if (retval) {
- MOD_DEC_USE_COUNT;
+ if (retval)
return retval;
- }

/* Disable the Rx and Tx, and reset the chip. */
writel(0, ioaddr + GenCtrl);
@@ -564,7 +694,7 @@
if (np->tx_done_q == 0)
np->tx_done_q = pci_alloc_consistent(np->pci_dev, PAGE_SIZE, &np->tx_done_q_dma);
if (np->rx_done_q == 0)
- np->rx_done_q = pci_alloc_consistent(np->pci_dev, PAGE_SIZE, &np->rx_done_q_dma);
+ np->rx_done_q = pci_alloc_consistent(np->pci_dev, sizeof(struct rx_done_desc) * DONE_Q_SIZE, &np->rx_done_q_dma);
if (np->tx_ring == 0)
np->tx_ring = pci_alloc_consistent(np->pci_dev, PAGE_SIZE, &np->tx_ring_dma);
if (np->rx_ring == 0)
@@ -575,7 +705,7 @@
pci_free_consistent(np->pci_dev, PAGE_SIZE,
np->tx_done_q, np->tx_done_q_dma);
if (np->rx_done_q)
- pci_free_consistent(np->pci_dev, PAGE_SIZE,
+ pci_free_consistent(np->pci_dev, sizeof(struct rx_done_desc) * DONE_Q_SIZE,
np->rx_done_q, np->rx_done_q_dma);
if (np->tx_ring)
pci_free_consistent(np->pci_dev, PAGE_SIZE,
@@ -583,16 +713,32 @@
if (np->rx_ring)
pci_free_consistent(np->pci_dev, PAGE_SIZE,
np->rx_ring, np->rx_ring_dma);
- MOD_DEC_USE_COUNT;
return -ENOMEM;
}

init_ring(dev);
/* Set the size of the Rx buffers. */
- writel((np->rx_buf_sz<<16) | 0xA000, ioaddr + RxDescQCtrl);
-
+ writel((np->rx_buf_sz << RxBufferLenShift) |
+ (0 << RxMinDescrThreshShift) |
+ RxPrefetchMode | RxVariableQ |
+ RxDescSpace4,
+ ioaddr + RxDescQCtrl);
+
+#ifdef ZEROCOPY
+ /* Set Tx descriptor to type 0 and spacing to 64 bytes. */
+ writel((2 << TxHiPriFIFOThreshShift) |
+ (0 << TxPadLenShift) |
+ (4 << TxDMABurstSizeShift) |
+ TxDescSpace64 | TxDescType0,
+ ioaddr + TxDescCtrl);
+#else /* not ZEROCOPY */
/* Set Tx descriptor to type 1 and padding to 0 bytes. */
- writel(0x02000401, ioaddr + TxDescCtrl);
+ writel((2 << TxHiPriFIFOThreshShift) |
+ (0 << TxPadLenShift) |
+ (4 << TxDMABurstSizeShift) |
+ TxDescSpaceUnlim | TxDescType1,
+ ioaddr + TxDescCtrl);
+#endif /* not ZEROCOPY */

#if defined(ADDR_64BITS) && defined(__alpha__)
/* XXX We really need a 64-bit PCI dma interfaces too... -DaveM */
@@ -607,7 +753,24 @@
writel(np->tx_ring_dma, ioaddr + TxRingPtr);

writel(np->tx_done_q_dma, ioaddr + TxCompletionAddr);
- writel(np->rx_done_q_dma, ioaddr + RxCompletionAddr);
+#ifdef full_rx_status
+ writel(np->rx_done_q_dma |
+ RxComplType3 |
+ (0 << RxComplThreshShift),
+ ioaddr + RxCompletionAddr);
+#else /* not full_rx_status */
+#ifdef csum_rx_status
+ writel(np->rx_done_q_dma |
+ RxComplType2 |
+ (0 << RxComplThreshShift),
+ ioaddr + RxCompletionAddr);
+#else /* not csum_rx_status */
+ writel(np->rx_done_q_dma |
+ RxComplType0 |
+ (0 << RxComplThreshShift),
+ ioaddr + RxCompletionAddr);
+#endif /* not csum_rx_status */
+#endif /* not full_rx_status */

if (debug > 1)
printk(KERN_DEBUG "%s: Filling in the station address.\n", dev->name);
@@ -643,15 +806,26 @@
check_duplex(dev, 1);

/* Set the interrupt mask and enable PCI interrupts. */
- writel(IntrRxDone | IntrRxEmpty | IntrRxPCIErr |
- IntrTxDone | IntrTxEmpty | IntrTxPCIErr |
- StatsMax | LinkChange | IntrNormalSummary | IntrAbnormalSummary
- | 0x0010 , ioaddr + IntrEnable);
+ writel(IntrRxDone | IntrRxEmpty | IntrDMAErr |
+ IntrTxDone | IntrStatsMax | IntrLinkChange |
+ IntrNormalSummary | IntrAbnormalSummary |
+ IntrRxGFPDead | IntrNoTxCsum | IntrTxBadID,
+ ioaddr + IntrEnable);
writel(0x00800000 | readl(ioaddr + PCIDeviceConfig),
ioaddr + PCIDeviceConfig);

- /* Enable the Rx and Tx units. */
+#ifdef HAS_FIRMWARE
+ /* Load Rx/Tx firmware into the frame processors */
+ for (i = 0; i < FIRMWARE_RX_SIZE * 2; i++)
+ writel(cpu_to_le32(firmware_rx[i]), ioaddr + RxGfpMem + i * 4);
+ for (i = 0; i < FIRMWARE_TX_SIZE * 2; i++)
+ writel(cpu_to_le32(firmware_tx[i]), ioaddr + TxGfpMem + i * 4);
+ /* Enable the Rx and Tx units, and the Rx/Tx frame processors. */
+ writel(0x003F, ioaddr + GenCtrl);
+#else /* not HAS_FIRMWARE */
+ /* Enable the Rx and Tx units only. */
writel(0x000F, ioaddr + GenCtrl);
+#endif /* not HAS_FIRMWARE */

if (debug > 2)
printk(KERN_DEBUG "%s: Done netdev_open().\n",
@@ -669,7 +843,7 @@

static void check_duplex(struct net_device *dev, int startup)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
long ioaddr = dev->base_addr;
int new_tx_mode ;

@@ -702,7 +876,7 @@
static void netdev_timer(unsigned long data)
{
struct net_device *dev = (struct net_device *)data;
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
long ioaddr = dev->base_addr;
int next_tick = 60*HZ; /* Check before driver release. */

@@ -730,7 +904,7 @@

static void tx_timeout(struct net_device *dev)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
long ioaddr = dev->base_addr;

printk(KERN_WARNING "%s: Transmit timed out, status %8.8x,"
@@ -757,14 +931,14 @@

dev->trans_start = jiffies;
np->stats.tx_errors++;
- return;
+ netif_wake_queue(dev);
}


/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
static void init_ring(struct net_device *dev)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
int i;

np->tx_full = 0;
@@ -804,7 +978,14 @@

for (i = 0; i < TX_RING_SIZE; i++) {
np->tx_info[i].skb = NULL;
- np->tx_info[i].mapping = 0;
+ np->tx_info[i].first_mapping = 0;
+#ifdef ZEROCOPY
+ {
+ int j;
+ for (j = 0; j < 6; j++)
+ np->tx_info[i].frag_mapping[j] = 0;
+ }
+#endif /* ZEROCOPY */
np->tx_ring[i].status = 0;
}
return;
@@ -812,8 +993,11 @@

static int start_tx(struct sk_buff *skb, struct net_device *dev)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
- unsigned entry;
+ struct netdev_private *np = dev->priv;
+ unsigned int entry;
+#ifdef ZEROCOPY
+ int i;
+#endif

/* Caution: the write order is important here, set the field
with the "ownership" bits last. */
@@ -821,42 +1005,102 @@
/* Calculate the next Tx descriptor entry. */
entry = np->cur_tx % TX_RING_SIZE;

+#if defined(ZEROCOPY) && defined(HAS_FIRMWARE) && defined(HAS_BROKEN_FIRMWARE)
+ {
+ int has_bad_length = 0;
+
+ if (skb_first_frag_len(skb) == 1)
+ has_bad_length = 1;
+ else {
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ if (skb_shinfo(skb)->frags[i].size == 1) {
+ has_bad_length = 1;
+ break;
+ }
+ }
+
+ if (has_bad_length)
+ skb_checksum_help(skb);
+ }
+#endif /* ZEROCOPY && HAS_FIRMWARE && HAS_BROKEN_FIRMWARE */
+
np->tx_info[entry].skb = skb;
- np->tx_info[entry].mapping =
- pci_map_single(np->pci_dev, skb->data, skb->len, PCI_DMA_TODEVICE);
+ np->tx_info[entry].first_mapping =
+ pci_map_single(np->pci_dev, skb->data, skb_first_frag_len(skb), PCI_DMA_TODEVICE);

- np->tx_ring[entry].addr = cpu_to_le32(np->tx_info[entry].mapping);
+ np->tx_ring[entry].first_addr = cpu_to_le32(np->tx_info[entry].first_mapping);
+#ifdef ZEROCOPY
+ np->tx_ring[entry].first_len = cpu_to_le32(skb_first_frag_len(skb));
+ np->tx_ring[entry].total_len = cpu_to_le32(skb->len);
/* Add "| TxDescIntr" to generate Tx-done interrupts. */
- np->tx_ring[entry].status = cpu_to_le32(skb->len | TxDescID);
+ np->tx_ring[entry].status = cpu_to_le32(TxDescID | TxCRCEn);
+ np->tx_ring[entry].nbufs = cpu_to_le32(skb_shinfo(skb)->nr_frags + 1);
+#else /* not ZEROCOPY */
+ /* Add "| TxDescIntr" to generate Tx-done interrupts. */
+ np->tx_ring[entry].status = cpu_to_le32(skb->len | TxDescID | TxCRCEn | 1 << 16);
+#endif /* not ZEROCOPY */
+
+ if (entry >= TX_RING_SIZE-1) /* Wrap ring */
+ np->tx_ring[entry].status |= cpu_to_le32(TxRingWrap | TxDescIntr);
+
+ /* not ifdef'ed, but shouldn't happen without ZEROCOPY */
+ if (skb->ip_summed == CHECKSUM_HW)
+ np->tx_ring[entry].status |= cpu_to_le32(TxCalTCP);
+
if (debug > 5) {
- printk(KERN_DEBUG "%s: Tx #%d slot %d %8.8x %8.8x.\n",
+#ifdef ZEROCOPY
+ printk(KERN_DEBUG "%s: Tx #%d slot %d status %8.8x nbufs %d len %4.4x/%4.4x.\n",
dev->name, np->cur_tx, entry,
le32_to_cpu(np->tx_ring[entry].status),
- le32_to_cpu(np->tx_ring[entry].addr));
+ le32_to_cpu(np->tx_ring[entry].nbufs),
+ le32_to_cpu(np->tx_ring[entry].first_len),
+ le32_to_cpu(np->tx_ring[entry].total_len));
+#else /* not ZEROCOPY */
+ printk(KERN_DEBUG "%s: Tx #%d slot %d status %8.8x.\n",
+ dev->name, np->cur_tx, entry,
+ le32_to_cpu(np->tx_ring[entry].status));
+#endif /* not ZEROCOPY */
}
+
+#ifdef ZEROCOPY
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *this_frag = &skb_shinfo(skb)->frags[i];
+
+ /* we already have the proper value in entry */
+ np->tx_info[entry].frag_mapping[i] =
+ pci_map_single(np->pci_dev, page_address(this_frag->page) + this_frag->page_offset, this_frag->size, PCI_DMA_TODEVICE);
+
+ np->tx_ring[entry].frag[i].addr = cpu_to_le32(np->tx_info[entry].frag_mapping[i]);
+ np->tx_ring[entry].frag[i].len = cpu_to_le32(this_frag->size);
+ if (debug > 5) {
+ printk(KERN_DEBUG "%s: Tx #%d frag %d len %4.4x.\n",
+ dev->name, np->cur_tx, i,
+ le32_to_cpu(np->tx_ring[entry].frag[i].len));
+ }
+ }
+#endif /* ZEROCOPY */
+
np->cur_tx++;
-#if 1
- if (entry >= TX_RING_SIZE-1) { /* Wrap ring */
- np->tx_ring[entry].status |= cpu_to_le32(TxRingWrap | TxDescIntr);
+
+ if (entry >= TX_RING_SIZE-1) /* Wrap ring */
entry = -1;
- }
-#endif
+ entry++;

/* Non-x86: explicitly flush descriptor cache lines here. */
+ /* Ensure everything is written back above before the transmit is
+ initiated. - Jes */
+ wmb();

/* Update the producer index. */
- writel(++entry, dev->base_addr + TxProducerIdx);
+ writel(entry * (sizeof(struct starfire_tx_desc) / 8), dev->base_addr + TxProducerIdx);

if (np->cur_tx - np->dirty_tx >= TX_RING_SIZE - 1) {
np->tx_full = 1;
netif_stop_queue(dev);
}
+
dev->trans_start = jiffies;

- if (debug > 4) {
- printk(KERN_DEBUG "%s: Transmit frame #%d queued in slot %d.\n",
- dev->name, np->cur_tx, entry);
- }
return 0;
}

@@ -878,7 +1122,7 @@
#endif

ioaddr = dev->base_addr;
- np = (struct netdev_private *)dev->priv;
+ np = dev->priv;

do {
u32 intr_status = readl(ioaddr + IntrClear);
@@ -919,18 +1163,33 @@
np->stats.tx_packets++;
} else if ((tx_status & 0xe0000000) == 0x80000000) {
struct sk_buff *skb;
+#ifdef ZEROCOPY
+ int i;
+#endif /* ZEROCOPY */
u16 entry = tx_status; /* Implicit truncate */
- entry >>= 3;
+ entry /= sizeof(struct starfire_tx_desc);

skb = np->tx_info[entry].skb;
+ np->tx_info[entry].skb = NULL;
pci_unmap_single(np->pci_dev,
- np->tx_info[entry].mapping,
- skb->len, PCI_DMA_TODEVICE);
+ np->tx_info[entry].first_mapping,
+ skb_first_frag_len(skb),
+ PCI_DMA_TODEVICE);
+ np->tx_info[entry].first_mapping = 0;
+
+#ifdef ZEROCOPY
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ pci_unmap_single(np->pci_dev,
+ np->tx_info[entry].frag_mapping[i],
+ skb_shinfo(skb)->frags[i].size,
+ PCI_DMA_TODEVICE);
+ np->tx_info[entry].frag_mapping[i] = 0;
+ }
+#endif /* ZEROCOPY */

/* Scavenge the descriptor. */
dev_kfree_skb_irq(skb);
- np->tx_info[entry].skb = NULL;
- np->tx_info[entry].mapping = 0;
+
np->dirty_tx++;
}
np->tx_done_q[np->tx_done].status = 0;
@@ -977,7 +1236,7 @@
for clarity and better register allocation. */
static int netdev_rx(struct net_device *dev)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
int boguscnt = np->dirty_rx + RX_RING_SIZE - np->cur_rx;
u32 desc_status;

@@ -1015,7 +1274,7 @@
#endif
/* Check if the packet is long enough to accept without copying
to a minimally-sized skbuff. */
- if (pkt_len < rx_copybreak
+ if (PKT_SHOULD_COPY(pkt_len)
&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
skb->dev = dev;
skb_reserve(skb, 2); /* 16 byte align the IP header */
@@ -1037,14 +1296,6 @@
temp = skb_put(skb, pkt_len);
np->rx_info[entry].skb = NULL;
np->rx_info[entry].mapping = 0;
-#ifndef final_version /* Remove after testing. */
- if (le32_to_cpu(np->rx_ring[entry].rxaddr & ~3) != ((unsigned long) temp))
- printk(KERN_ERR "%s: Internal fault: The skbuff addresses "
- "do not match in netdev_rx: %d vs. %p / %p.\n",
- dev->name,
- le32_to_cpu(np->rx_ring[entry].rxaddr),
- skb->head, temp);
-#endif
}
#ifndef final_version /* Remove after testing. */
/* You will want this info for the initial debug. */
@@ -1060,9 +1311,24 @@
skb->data[17]);
#endif
skb->protocol = eth_type_trans(skb, dev);
-#ifdef full_rx_status
- if (le32_to_cpu(np->rx_done_q[np->rx_done].status2) & 0x01000000)
+#if defined(full_rx_status) || defined(csum_rx_status)
+ if (le32_to_cpu(np->rx_done_q[np->rx_done].status2) & 0x01000000) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+ /*
+ * This feature doesn't seem to be working, at least
+ * with the two firmware versions I have. If the GFP sees
+ * a fragment, it either ignores it completely, or reports
+ * "bad checksum" on it.
+ *
+ * Maybe I missed something -- corrections are welcome.
+ * Until then, the printk stays. :-) -Ion
+ */
+ else if (le32_to_cpu(np->rx_done_q[np->rx_done].status2) & 0x00400000) {
+ skb->ip_summed = CHECKSUM_HW;
+ skb->csum = le32_to_cpu(np->rx_done_q[np->rx_done].status2) & 0xffff;
+ printk(KERN_DEBUG "%s: checksum_hw, status2 = %x\n", dev->name, np->rx_done_q[np->rx_done].status2);
+ }
#endif
netif_rx(skb);
dev->last_rx = jiffies;
@@ -1107,36 +1373,34 @@

static void netdev_error(struct net_device *dev, int intr_status)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;

- if (intr_status & LinkChange) {
+ if (intr_status & IntrLinkChange) {
printk(KERN_NOTICE "%s: Link changed: Autonegotiation advertising"
" %4.4x partner %4.4x.\n", dev->name,
mdio_read(dev, np->phys[0], 4),
mdio_read(dev, np->phys[0], 5));
check_duplex(dev, 0);
}
- if (intr_status & StatsMax) {
+ if (intr_status & IntrStatsMax) {
get_stats(dev);
}
/* Came close to underrunning the Tx FIFO, increase threshold. */
if (intr_status & IntrTxDataLow)
writel(++np->tx_threshold, dev->base_addr + TxThreshold);
if ((intr_status &
- ~(IntrAbnormalSummary|LinkChange|StatsMax|IntrTxDataLow|1)) && debug)
+ ~(IntrAbnormalSummary|IntrLinkChange|IntrStatsMax|IntrTxDataLow|1)) && debug)
printk(KERN_ERR "%s: Something Wicked happened! %4.4x.\n",
dev->name, intr_status);
- /* Hmmmmm, it's not clear how to recover from PCI faults. */
- if (intr_status & IntrTxPCIErr)
+ /* Hmmmmm, it's not clear how to recover from DMA faults. */
+ if (intr_status & IntrDMAErr)
np->stats.tx_fifo_errors++;
- if (intr_status & IntrRxPCIErr)
- np->stats.rx_fifo_errors++;
}

static struct net_device_stats *get_stats(struct net_device *dev)
{
long ioaddr = dev->base_addr;
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;

/* This adapter architecture needs no SMP locks. */
np->stats.tx_bytes = readl(ioaddr + 0x57010);
@@ -1241,7 +1505,7 @@

static int mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
u16 *data = (u16 *)&rq->ifr_data;

switch(cmd) {
@@ -1279,7 +1543,7 @@
static int netdev_close(struct net_device *dev)
{
long ioaddr = dev->base_addr;
- struct netdev_private *np = (struct netdev_private *)dev->priv;
+ struct netdev_private *np = dev->priv;
int i;

netif_stop_queue(dev);
@@ -1305,7 +1569,7 @@
for (i = 0; i < 8 /* TX_RING_SIZE is huge! */; i++)
printk(KERN_DEBUG " #%d desc. %8.8x %8.8x -> %8.8x.\n",
i, le32_to_cpu(np->tx_ring[i].status),
- le32_to_cpu(np->tx_ring[i].addr),
+ le32_to_cpu(np->tx_ring[i].first_addr),
le32_to_cpu(np->tx_done_q[i].status));
printk(KERN_DEBUG " Rx ring at %8.8x -> %p:\n",
np->rx_ring_dma, np->rx_done_q);
@@ -1331,18 +1595,30 @@
}
for (i = 0; i < TX_RING_SIZE; i++) {
struct sk_buff *skb = np->tx_info[i].skb;
+#ifdef ZEROCOPY
+ int j;
+#endif /* ZEROCOPY */
if (skb != NULL) {
pci_unmap_single(np->pci_dev,
- np->tx_info[i].mapping,
- skb->len, PCI_DMA_TODEVICE);
+ np->tx_info[i].first_mapping,
+ skb_first_frag_len(skb), PCI_DMA_TODEVICE);
+ np->tx_info[i].first_mapping = 0;
dev_kfree_skb(skb);
+ np->tx_info[i].skb = NULL;
+#ifdef ZEROCOPY
+ for (j = 0; j < 6; j++)
+ if (np->tx_info[i].frag_mapping[j]) {
+ pci_unmap_single(np->pci_dev,
+ np->tx_info[i].frag_mapping[j],
+ skb_shinfo(skb)->frags[j].size,
+ PCI_DMA_TODEVICE);
+ np->tx_info[i].frag_mapping[j] = 0;
+ } else
+ break;
+#endif /* ZEROCOPY */
}
- np->tx_info[i].skb = NULL;
- np->tx_info[i].mapping = 0;
}

- MOD_DEC_USE_COUNT;
-
return 0;
}

@@ -1359,6 +1635,9 @@

unregister_netdev(dev);
iounmap((char *)dev->base_addr);
+
+ release_mem_region(pci_resource_start (pdev, 0),
+ pci_resource_len (pdev, 0));

if (np->tx_done_q)
pci_free_consistent(np->pci_dev, PAGE_SIZE,
--- linux-2.4.vanilla/drivers/net/starfire_firmware.pl Wed Feb 7 17:40:48 2001
+++ linux-2.4-boxter/drivers/net/starfire_firmware.pl Wed Feb 7 16:22:14 2001
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+
+# This script can be used to generate a new starfire_firmware.h
+# from GFP_RX.DAT and GFP_TX.DAT, files included with the DDK
+# and also with the Novell drivers.
+
+open FW, "GFP_RX.DAT" || die;
+open FWH, ">starfire_firmware.h" || die;
+
+printf(FWH "static u32 firmware_rx[] = {\n");
+$counter = 0;
+while ($foo = <FW>) {
+ chomp;
+ printf(FWH " 0x%s, 0x0000%s,\n", substr($foo, 4, 8), substr($foo, 0, 4));
+ $counter++;
+}
+
+close FW;
+open FW, "GFP_TX.DAT" || die;
+
+printf(FWH "};\t/* %d Rx instructions */\n#define FIRMWARE_RX_SIZE %d\n\nstatic u32 firmware_tx[] = {\n", $counter, $counter);
+$counter = 0;
+while ($foo = <FW>) {
+ chomp;
+ printf(FWH " 0x%s, 0x0000%s,\n", substr($foo, 4, 8), substr($foo, 0, 4));
+ $counter++;
+}
+
+close FW;
+printf(FWH "};\t/* %d Tx instructions */\n#define FIRMWARE_TX_SIZE %d\n", $counter, $counter);
+close(FWH);

2001-02-08 04:02:01

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

Manfred Spraul wrote:
>
> Jeff Garzik wrote:
> >
> > + SET_MODULE_OWNER(dev);
> >
> > irq = pdev->irq;
> >
>
> One question:
> The code copies 'pdev->irq' into 'dev->irq'.
>
> Is that required, who need 'dev->irq'?
>
> > retval = request_irq(dev->irq, &intr_handler, SA_SHIRQ, dev->name, dev);
>
> Can't the driver use?
> retval = request_irq(np->pci_dev->irq)

Sure it can. A PCI driver can completely ignore dev->irq, if it so
desires. However...

Setting dev->irq is really a courtesy, because while the net core code
doesn't use it at all, it is reported to userspace such as ifconfig.
And because the netdev setup code could potentially assign a value to
dev->irq on its own, if the driver does -not- set dev->irq, the value
reported to userspace might incorrect instead of just being zero.

[side note - ifconfig only obtains the lower 16 bits of the
dev->base_addr value, grump grump]

Jeff


--
Jeff Garzik | "You see, in this world there's two kinds of
Building 1024 | people, my friend: Those with loaded guns
MandrakeSoft | and those who dig. You dig." --Blondie

2001-02-08 20:34:21

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

Ion Badulescu wrote:
>
> Hi Jeff,
>
> On Wed, 07 Feb 2001 14:57:16 -0500, Jeff Garzik <[email protected]> wrote:
>
> > Here's the patch I have, against vanilla 2.4.2-pre1, with the
> > pci_enable_device preferred changes included...
>
> Well, I decided to bite the bullet and port my zerocopy starfire
> changes to the official tree, properly ifdef'ed. So here it goes,
> the patch was made against 2.4.1 vanilla and includes all the
> fixes from Jeff and myself that were sent to the list so far.

I would prefer that the zerocopy changes stay in DaveM's external patch
until they are ready to be merged. Zerocopy is still changing and being
actively debugged, so it is possible that we might have to patch
starfire.c again with zerocopy updates, before the final patch makes it
to Linus. Let's wait on zerocopy in the main tree..


> I've also added myself as the starfire maintainer -- I hope
> nobody objects.

If you've got the hardware and time, I'm always happy to see someone
step up .. I must confess that I haven't seen much of your work to
date, however.


> +/*
> + * The ia64 doesn't allow for unaligned loads even of integers being
> + * misaligned on a 2 byte boundary. Thus always force copying of
> + * packets as the starfire doesn't allow for misaligned DMAs ;-(
> + * 23/10/2000 - Jes
> + *
> + * Neither does the Alpha. -Ion
> + */
> +#if defined(__ia64__) || defined(__alpha__)
> +#define PKT_SHOULD_COPY(pkt_len) 1
> +#else
> +#define PKT_SHOULD_COPY(pkt_len) (pkt_len < rx_copybreak)
> +#endif

Note that I have not yet sent this patch onto Linus for a reason...
Here is Don Becker's comment on the subject:

Donald Becker wrote:
> On Tue, 16 Jan 2001, Jeff Garzik wrote:
> > * IA64 support (Jes)
> Oh, and this is completely bogus.
> This isn't a fix, it's a hack that covers up the real problem.
>
> The align-copy should *never* be required because the alignment differs
> between DIX and E-II encapsulated packets. The machine shouldn't crash
> because someone sends you a different encapsulation type!

> @@ -757,14 +931,14 @@
>
> dev->trans_start = jiffies;
> np->stats.tx_errors++;
> - return;
> + netif_wake_queue(dev);
> }

this has not been sent on to linus/alan because, if you do not empty the
Tx ring on tx_timeout, you should check to see if there is space on the
ring before waking the queue. Otherwise corruption/problems occur...

Jeff


--
Jeff Garzik | "You see, in this world there's two kinds of
Building 1024 | people, my friend: Those with loaded guns
MandrakeSoft | and those who dig. You dig." --Blondie

2001-02-08 21:19:03

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Thu, 8 Feb 2001, Jeff Garzik wrote:

> I would prefer that the zerocopy changes stay in DaveM's external patch
> until they are ready to be merged.

I would actually prefer to have a single source for all the driver
versions. The 2.2.x version I sent to Alan later on actually compiles on
2.2, 2.4 and 2.4+zerocopy. I just want to test it some more; the 2.4
version I submitted was quite well tested.

> Zerocopy is still changing and being
> actively debugged, so it is possible that we might have to patch
> starfire.c again with zerocopy updates, before the final patch makes it
> to Linus. Let's wait on zerocopy in the main tree..

It's true that zerocopy might change. But remember, zerocopy support in
the driver is not mandatory, and nobody forces you to #define ZEROCOPY. If
the zerocopy proves to be unacceptable for the official kernel, ripping
out the stuff that's #ifdef ZEROCOPY will be a trivial exercise.

> > I've also added myself as the starfire maintainer -- I hope
> > nobody objects.
>
> If you've got the hardware and time, I'm always happy to see someone
> step up ..

.. the hardware, the docs, the time, and the day-to-day duty to maintain
the starfire driver (and the eepro100 driver) for an older version of
BSDI. It's the job that pays my salary...

> I must confess that I haven't seen much of your work to
> date, however.

Oh well, I thought the code would be enough proof. For what it's worth,
I was the main developer for the Linux version of Erez Zadok's stackable
filesystems work. I'm also co-maintainer (with Erez and two more people)
of the am-utils automounter suite, taking care of the Linux port and
currently implementing autofs support for Linux and Solaris.

But hey, if you want to continue maintaining the starfire driver yourself,
I don't really mind.

> > +/*
> > + * The ia64 doesn't allow for unaligned loads even of integers being
> > + * misaligned on a 2 byte boundary. Thus always force copying of
> > + * packets as the starfire doesn't allow for misaligned DMAs ;-(
> > + * 23/10/2000 - Jes
> > + *
> > + * Neither does the Alpha. -Ion
> > + */
> > +#if defined(__ia64__) || defined(__alpha__)
> > +#define PKT_SHOULD_COPY(pkt_len) 1
> > +#else
> > +#define PKT_SHOULD_COPY(pkt_len) (pkt_len < rx_copybreak)
> > +#endif
>
> Note that I have not yet sent this patch onto Linus for a reason...
> Here is Don Becker's comment on the subject:
>
> Donald Becker wrote:
> > On Tue, 16 Jan 2001, Jeff Garzik wrote:
> > > * IA64 support (Jes)
> > Oh, and this is completely bogus.
> > This isn't a fix, it's a hack that covers up the real problem.
> >
> > The align-copy should *never* be required because the alignment differs
> > between DIX and E-II encapsulated packets. The machine shouldn't crash
> > because someone sends you a different encapsulation type!

It's not *required* per se, as far as I know both the Alpha and IA64 have
handlers for unaligned access traps. *However*, copying each packet is
definitely better than taking an exception for each packet!

So the box won't crash. But it should behave better with this patch, in
regular operation.

Jes can probably comment some more, it's his change after all. I have
neither alphas nor itaniums, so my comment is based on second-hand
knowledge and on reading the arch code.

> > @@ -757,14 +931,14 @@
> >
> > dev->trans_start = jiffies;
> > np->stats.tx_errors++;
> > - return;
> > + netif_wake_queue(dev);
> > }
>
> this has not been sent on to linus/alan because, if you do not empty the
> Tx ring on tx_timeout, you should check to see if there is space on the
> ring before waking the queue. Otherwise corruption/problems occur...

tx_timeout is completely bogus right now, to be honest. If it gets there,
you might as well just down the interface. So I just applied this part
from your patch, without really thinking about it.

What tx_timeout really needs to do is a full reset and re-initialization
of the chip. Why? Because when the timeout happens, the chip is basically
fubar'ed (usually due to driver bugs, but that's not the point).

It's in the TODO list..

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-08 21:44:12

by Manfred Spraul

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

Ion Badulescu wrote:
>
> > > +#if defined(__ia64__) || defined(__alpha__)
> > > +#define PKT_SHOULD_COPY(pkt_len) 1
> > > +#else
> > > +#define PKT_SHOULD_COPY(pkt_len) (pkt_len < rx_copybreak)
> > > +#endif
> >
> [snip]
>
> It's not *required* per se, as far as I know both the Alpha and IA64 have
> handlers for unaligned access traps. *However*, copying each packet is
> definitely better than taking an exception for each packet!
>

What about changing the default for rx_copybreak instead?
Set it to 1536 on ia64 and Alpha, 0 for the rest.
tulip and eepro100 use that aproach.

--
Manfred

2001-02-08 21:43:45

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

Ion Badulescu wrote:
>
> On Thu, 8 Feb 2001, Jeff Garzik wrote:
>
> > I would prefer that the zerocopy changes stay in DaveM's external patch
> > until they are ready to be merged.
>
> I would actually prefer to have a single source for all the driver
> versions. The 2.2.x version I sent to Alan later on actually compiles on
> 2.2, 2.4 and 2.4+zerocopy. I just want to test it some more; the 2.4
> version I submitted was quite well tested.

Well at least let's do it the Linux Kernel Way(tm): separate out the
zerocopy stuff such that there are minimal ifdefs in the code... For
example:

/* add these functions... */
#ifdef ZEROCOPY
static inline setup_txrx_rings(...) { /*...*/ }
#else
static inline setup_txrx_rings(...) { /*...*/ }
#endif

then in the code itself, where the TxRx ring setup occurs now (ie. where
ifdefs exist in the code) simply call the new static inline functions.


> > Zerocopy is still changing and being
> > actively debugged, so it is possible that we might have to patch
> > starfire.c again with zerocopy updates, before the final patch makes it
> > to Linus. Let's wait on zerocopy in the main tree..
>
> It's true that zerocopy might change. But remember, zerocopy support in
> the driver is not mandatory, and nobody forces you to #define ZEROCOPY. If
> the zerocopy proves to be unacceptable for the official kernel, ripping
> out the stuff that's #ifdef ZEROCOPY will be a trivial exercise.

You totally missed my point. It's unclean. If you have some code that
will not work at all in the current tree, it should not be in the
current tree. If Alan or Linus applies a starfire.c patch that includes
ZEROCOPY support while the tree as a whole does not include such
support, you are effectively including a developer-local change in the
global tree. With your patch but without zercopy infrastructure,
defining ZEROCOPY is completely pointless without an additional,
experimental patch.

The #ifdef ZEROCOPY code you added is a classic example of the kind of
code I -remove- from the kernel tree.


> > > I've also added myself as the starfire maintainer -- I hope
> > > nobody objects.
> >
> > If you've got the hardware and time, I'm always happy to see someone
> > step up ..
>
> .. the hardware, the docs, the time, and the day-to-day duty to maintain
> the starfire driver (and the eepro100 driver) for an older version of
> BSDI. It's the job that pays my salary...

excellent :)


> tx_timeout is completely bogus right now, to be honest. If it gets there,
> you might as well just down the interface. So I just applied this part
> from your patch, without really thinking about it.
>
> What tx_timeout really needs to do is a full reset and re-initialization
> of the chip. Why? Because when the timeout happens, the chip is basically
> fubar'ed (usually due to driver bugs, but that's not the point).

agreed

Jeff



--
Jeff Garzik | "You see, in this world there's two kinds of
Building 1024 | people, my friend: Those with loaded guns
MandrakeSoft | and those who dig. You dig." --Blondie

2001-02-08 21:47:25

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Thu, 8 Feb 2001, Manfred Spraul wrote:

> What about changing the default for rx_copybreak instead?
> Set it to 1536 on ia64 and Alpha, 0 for the rest.
> tulip and eepro100 use that aproach.

That makes a lot of sense, thanks for the suggestion. I'll do so in the
next version.

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-08 22:05:35

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Thu, 8 Feb 2001, Jeff Garzik wrote:

> Well at least let's do it the Linux Kernel Way(tm): separate out the
> zerocopy stuff such that there are minimal ifdefs in the code... For
> example:
>
> /* add these functions... */
> #ifdef ZEROCOPY
> static inline setup_txrx_rings(...) { /*...*/ }
> #else
> static inline setup_txrx_rings(...) { /*...*/ }
> #endif
>
> then in the code itself, where the TxRx ring setup occurs now (ie. where
> ifdefs exist in the code) simply call the new static inline functions.

Hmm. Ok. I'll think about it. Roughly 1/3 of the driver code will be
duplicated if we go this route with the existing structure. I'll try to
make use of a few helper inline functions which are smaller and can be
ifdef'ed without much code duplication.

> The #ifdef ZEROCOPY code you added is a classic example of the kind of
> code I -remove- from the kernel tree.

It's an issue of maintainer convenience vs. esthetics. And (last but not
least) it's also about other people's ability to easily make changes to
the driver, changes they can understand and test. So while in principle I
agree with you, I'm also beginning to understand Donald Becker's
frustration when others remove backward compatibility from his code.

So let's try to find some middle ground, ok? Your first suggestion sounds
reasonable, and hopefully the fate of the zerocopy stuff will be decided
sooner than later.

Stay tuned, there should be another version coming your way sometime
today...

Thanks,
Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-08 22:05:15

by Donald Becker

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Thu, 8 Feb 2001, Jeff Garzik wrote:

> > Well, I decided to bite the bullet and port my zerocopy starfire
> > changes to the official tree, properly ifdef'ed. So here it goes,
>
> I would prefer that the zerocopy changes stay in DaveM's external patch
> until they are ready to be merged. Zerocopy is still changing and being

Good idea -- I expect that there will be significant interface changes
before the zero-copy code stabilizes.

> > + * The ia64 doesn't allow for unaligned loads even of integers being
> > + * misaligned on a 2 byte boundary. Thus always force copying of
> > + * packets as the starfire doesn't allow for misaligned DMAs ;-(
...
> Note that I have not yet sent this patch onto Linus for a reason...
> Here is Don Becker's comment on the subject:

> > Oh, and this is completely bogus.
> > This isn't a fix, it's a hack that covers up the real problem.
> >
> > The align-copy should *never* be required because the alignment differs
> > between DIX and E-II encapsulated packets. The machine shouldn't crash
> > because someone sends you a different encapsulation type!

This is true for a number of drivers -- triggering the copy-align code
might eliminate the misaligned traps on your local network, but it's not
a solution.

I saw the Adaptec people last week at LinuxWorld. The 2.4.0 starfire
has a number of actual bugs that should be fixed RSN:
The consistency check in the Rx code was broken. Did anyone ever try
the driver after the changes? The test triggers with every received
packet. The easiest patch is to just get rid the consistency checks
inside "#ifndef final_version".

The region resource was not released, requiring a reboot between each
driver test. Trivial fix.

The MII read code is no longer reliable. I spent twenty minutes at
the show, but couldn't figure out the problem. I haven't been able
reproduce the problem locally with my 2.2 code and someone older
hardware.

Donald Becker [email protected]
Scyld Computing Corporation http://www.scyld.com
410 Severn Ave. Suite 210 Second Generation Beowulf Clusters
Annapolis MD 21403 410-990-9993

2001-02-08 22:16:35

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Thu, 8 Feb 2001, Donald Becker wrote:

> > > The align-copy should *never* be required because the alignment differs
> > > between DIX and E-II encapsulated packets. The machine shouldn't crash
> > > because someone sends you a different encapsulation type!
>
> This is true for a number of drivers -- triggering the copy-align code
> might eliminate the misaligned traps on your local network, but it's not
> a solution.

Ok, so what *is* a good solution then? I'm not arguing that unaligned
memory access traps should be avoided because they are deadly (they
shouldn't be), but because they are costly.

Or we can just tell people, "hey, don't use this 64-bit PCI card on a real
64-bit system, it's broken by design"? I don't think that's a good
solution either.

The lack of a 64-bit frame descriptor isn't going to help either, if and
when zerocopy makes into the official tree. Using buffer descriptors with
fragmented sk_buffs is *not* fun (and yet that's what I do in 32-bit for
my BSDI driver, but for unrelated reasons).

> I saw the Adaptec people last week at LinuxWorld. The 2.4.0 starfire
> has a number of actual bugs that should be fixed RSN:
> The consistency check in the Rx code was broken. Did anyone ever try
> the driver after the changes? The test triggers with every received
> packet. The easiest patch is to just get rid the consistency checks
> inside "#ifndef final_version".

Done.

> The region resource was not released, requiring a reboot between each
> driver test. Trivial fix.

Done.

> The MII read code is no longer reliable. I spent twenty minutes at
> the show, but couldn't figure out the problem. I haven't been able
> reproduce the problem locally with my 2.2 code and someone older
> hardware.

Yes, I've noticed this too, the PHY doesn't seem to get detected in all
cases, and it's pretty random at that. Other times the same PHY gets
detected multiple times at different addresses.

The good news is that the same code behaves the same on 2.4 and 2.2, so
I think it's not a core kernel issue. I'll try to track it down;
fortunately it doesn't affect card functionality as long as the user
sticks with autonegotiation.

Thanks,
Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-09 00:09:43

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Thu, 8 Feb 2001, Ion Badulescu wrote:

> > The MII read code is no longer reliable. I spent twenty minutes at
> > the show, but couldn't figure out the problem. I haven't been able
> > reproduce the problem locally with my 2.2 code and someone older
> > hardware.
>
> Yes, I've noticed this too, the PHY doesn't seem to get detected in all
> cases, and it's pretty random at that. Other times the same PHY gets
> detected multiple times at different addresses.
>
> The good news is that the same code behaves the same on 2.4 and 2.2, so
> I think it's not a core kernel issue. I'll try to track it down;
> fortunately it doesn't affect card functionality as long as the user
> sticks with autonegotiation.

Kicking the chip *hard* when probing can do wonders. :-)

The attached patch fixes MII detection for me, reliably. It's the same
thing my BSDI driver does. The patch is against the previous version I
sent to the list; it applies almost cleanly to 2.4.1-vanilla and the
reject is easy to apply manually.

Full patch (for Jeff) will follow later.

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.
-----------------------
--- linux-2.4-boxter/drivers/net/starfire.c Thu Feb 8 16:03:05 2001
+++ linux-2.4-zc/drivers/net/starfire.c Thu Feb 8 16:02:26 2001
@@ -160,6 +160,7 @@
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/init.h>
+#include <linux/delay.h>
#include <asm/processor.h> /* Processor type for cache alignment. */
#include <asm/bitops.h>
#include <asm/io.h>
@@ -519,6 +520,7 @@
static int printed_version = 0;
long ioaddr;
int drv_flags, io_size;
+ int boguscnt;

card_idx++;
option = card_idx < MAX_UNITS ? options[card_idx] : 0;
@@ -586,8 +588,23 @@
i % 16 != 15 ? " " : "\n");
#endif

+ /* Issue soft reset */
+ writel(0x8000, ioaddr + TxMode);
+ udelay(1000);
+ writel(0, ioaddr + TxMode);
+
/* Reset the chip to erase previous misconfiguration. */
writel(1, ioaddr + PCIDeviceConfig);
+ boguscnt = 1000;
+ while (--boguscnt > 0) {
+ udelay(10);
+ if ((readl(ioaddr + PCIDeviceConfig) & 1) == 0)
+ break;
+ }
+ if (boguscnt == 0)
+ printk("%s: chipset reset never completed!\n", dev->name);
+ /* wait a little longer */
+ udelay(1000);

dev->base_addr = ioaddr;
dev->irq = irq;
@@ -630,14 +647,27 @@

if (drv_flags & CanHaveMII) {
int phy, phy_idx = 0;
+ int mii_status;
for (phy = 0; phy < 32 && phy_idx < 4; phy++) {
- int mii_status = mdio_read(dev, phy, 1);
- if (mii_status != 0xffff && mii_status != 0x0000) {
+ mdio_write(dev, phy, 0, 0x8000);
+ udelay(500);
+ boguscnt = 1000;
+ while (--boguscnt > 0)
+ if ((mdio_read(dev, phy, 0) & 0x8000) == 0)
+ break;
+ if (boguscnt == 0) {
+ printk("%s: PHY reset never completed!\n", dev->name);
+ continue;
+ }
+ mii_status = mdio_read(dev, phy, 1);
+ if (mii_status != 0x0000) {
np->phys[phy_idx++] = phy;
np->advertising = mdio_read(dev, phy, 4);
printk(KERN_INFO "%s: MII PHY found at address %d, status "
"0x%4.4x advertising %4.4x.\n",
dev->name, phy, mii_status, np->advertising);
+ /* there can be only one PHY on-board */
+ break;
}
}
np->mii_cnt = phy_idx;
@@ -663,7 +693,11 @@
/* ??? Should we add a busy-wait here? */
do
result = readl(mdio_addr);
- while ((result & 0xC0000000) != 0x80000000 && --boguscnt >= 0);
+ while ((result & 0xC0000000) != 0x80000000 && --boguscnt > 0);
+ if (boguscnt == 0)
+ return 0;
+ if ((result & 0xffff) == 0xffff)
+ return 0;
return result & 0xffff;
}



2001-02-09 00:37:17

by Donald Becker

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Thu, 8 Feb 2001, Ion Badulescu wrote:

> On Thu, 8 Feb 2001, Donald Becker wrote:
>
> > > > The align-copy should *never* be required because the alignment differs
> > > > between DIX and E-II encapsulated packets. The machine shouldn't crash
> > > > because someone sends you a different encapsulation type!
> >
> > This is true for a number of drivers -- triggering the copy-align code
> > might eliminate the misaligned traps on your local network, but it's not
> > a solution.
>
> Ok, so what *is* a good solution then? I'm not arguing that unaligned
> memory access traps should be avoided because they are deadly (they
> shouldn't be), but because they are costly.
>
> Or we can just tell people, "hey, don't use this 64-bit PCI card on a real
> 64-bit system, it's broken by design"? I don't think that's a good
> solution either.

This is not a 64 bit PCI issue. It is an issue with the protocol
stack. The IP protocol handling code must expect that the header words
will be misaligned in some circumstances.

It's amusing that a full receive copy is added without any concern, in
the same discussion where zero-copy transmit is treated as a holy grail!

> > The MII read code is no longer reliable. I spent twenty minutes at
> > the show, but couldn't figure out the problem. I haven't been able
> > reproduce the problem locally with my 2.2 code and somewhat older
> > hardware.
>
> Yes, I've noticed this too, the PHY doesn't seem to get detected in all
> cases, and it's pretty random at that. Other times the same PHY gets
> detected multiple times at different addresses.

This might be a transceiver preamble issue with the specific
transceivers on the recent cards. Debugging this type of problem
sometimes requires a D-Oscope on the MII data pins.

Normally I would suspect a timing problem with a very fast machine, but
the Starfire hardware generates its own preamble and clock signals, not
the driver code.

> The good news is that the same code behaves the same on 2.4 and 2.2, so
> I think it's not a core kernel issue. I'll try to track it down;

It's likely easier to use the starfire-diag program from
http://www.scyld.com/diag/index.html

Donald Becker [email protected]
Scyld Computing Corporation http://www.scyld.com
410 Severn Ave. Suite 210 Second Generation Beowulf Clusters
Annapolis MD 21403 410-990-9993

2001-02-09 00:47:48

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Thu, 8 Feb 2001, Donald Becker wrote:

> > Or we can just tell people, "hey, don't use this 64-bit PCI card on a real
> > 64-bit system, it's broken by design"? I don't think that's a good
> > solution either.
>
> This is not a 64 bit PCI issue.

I know. It was just an ironic comment: we have a card with a 64-bit PCI
bus, we have a 64-bit system which very likely has some 64-bit PCI slots
on its motherboard, perfect match, right? Well, au contraire, the
performance is going to suck big-time, at least for Rx.

> It is an issue with the protocol
> stack. The IP protocol handling code must expect that the header words
> will be misaligned in some circumstances.

I won't get into this...

> It's amusing that a full receive copy is added without any concern, in
> the same discussion where zero-copy transmit is treated as a holy grail!

Amusing? Maybe. Zerocopy will still help with Tx, and with Rx we're just
trying to contain the damage, *with the existent stack*.

> This might be a transceiver preamble issue with the specific
> transceivers on the recent cards. Debugging this type of problem
> sometimes requires a D-Oscope on the MII data pins.
>
> Normally I would suspect a timing problem with a very fast machine, but
> the Starfire hardware generates its own preamble and clock signals, not
> the driver code.

See my previous mail. It turned out to be just a confused chipset.

Thanks,
Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-09 10:51:11

by Alan Cox

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

> It's amusing that a full receive copy is added without any concern, in
> the same discussion where zero-copy transmit is treated as a holy grail!

For non routing paths its virtually free because the DMA forced the lines
from cache anyway. Basically its a deficiency in the chipset. We don't support
chipsets with alignment rules well on cpus with alignment rules that clash

That shouldnt be a suprise

2001-02-09 19:09:18

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

Ion Badulescu wrote:
> On Thu, 8 Feb 2001, Jeff Garzik wrote:
> > The #ifdef ZEROCOPY code you added is a classic example of the kind of
> > code I -remove- from the kernel tree.
>
> It's an issue of maintainer convenience vs. esthetics. And (last but not
> least) it's also about other people's ability to easily make changes to
> the driver, changes they can understand and test. So while in principle I
> agree with you, I'm also beginning to understand Donald Becker's
> frustration when others remove backward compatibility from his code.
>
> So let's try to find some middle ground, ok? Your first suggestion sounds
> reasonable, and hopefully the fate of the zerocopy stuff will be decided
> sooner than later.

I would prefer that zerocopy code remain out of all official kernels
until zerocopy itself is in said kernels. It's experimental code that
simply cannot work in its present form, due to lack of infrastructure in
the general kernel. And being based on experimental code itself, there
is definitely the potential for changes yet.

Remember: we are in a stable series of kernels. This is experimental
code. Maintain a separate branch of development like everyone else.
:) Yes it's a bit more effort, but that's what being a maintainer is
all about. The kernel needs a -stable- starfire.c, let's talk about
adding experimental code later.

BTW, I would suggest looking at Jes' acenic.c as an example of a 2.4
driver that is clean but also [hopefully!] works under 2.2.

Jeff


--
Jeff Garzik | "You see, in this world there's two kinds of
Building 1024 | people, my friend: Those with loaded guns
MandrakeSoft | and those who dig. You dig." --Blondie

2001-02-09 20:07:58

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Fri, 9 Feb 2001, Jeff Garzik wrote:

> I would prefer that zerocopy code remain out of all official kernels
> until zerocopy itself is in said kernels. It's experimental code that
> simply cannot work in its present form, due to lack of infrastructure in
> the general kernel. And being based on experimental code itself, there
> is definitely the potential for changes yet.

Fine.

> BTW, I would suggest looking at Jes' acenic.c as an example of a 2.4
> driver that is clean but also [hopefully!] works under 2.2.

The *only* thing I couldn't solve cleanly is the MOD_{INC,DEC}_USE_COUNT
vs MODULE_SET_OWNER(). And I would really appreciate pointers for that. :)

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-09 20:11:58

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

Ion Badulescu wrote:
> On Fri, 9 Feb 2001, Jeff Garzik wrote:
> > BTW, I would suggest looking at Jes' acenic.c as an example of a 2.4
> > driver that is clean but also [hopefully!] works under 2.2.
>
> The *only* thing I couldn't solve cleanly is the MOD_{INC,DEC}_USE_COUNT
> vs MODULE_SET_OWNER(). And I would really appreciate pointers for that. :)

For 2.2, define SET_MODULE_OWNER to null.

Define STAR_MOD_INC_USE_COUNT and STAR_MOD_DEC_USE_COUNT. For 2.4,
these are null. For 2.2, these point to MOD_{INC,DEC}_USE_COUNT.

Jeff



--
Jeff Garzik | "You see, in this world there's two kinds of
Building 1024 | people, my friend: Those with loaded guns
MandrakeSoft | and those who dig. You dig." --Blondie

2001-02-09 20:21:41

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Fri, 9 Feb 2001, Jeff Garzik wrote:

> For 2.2, define SET_MODULE_OWNER to null.
>
> Define STAR_MOD_INC_USE_COUNT and STAR_MOD_DEC_USE_COUNT. For 2.4,
> these are null. For 2.2, these point to MOD_{INC,DEC}_USE_COUNT.

... and use both SET_MODULE_OWNER and STAR_MOD_*_USE_COUNT. It's along the
lines of what I was thinking -- though I don't think it's very clean.

Thanks a lot for the suggestion.

And one more question: is 2.2.x compatibility stuff acceptable in a 2.4
driver, given that all that stuff is in *one* #ifdef and not sprinkled
throughout the file?

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-09 20:27:21

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

Ion Badulescu wrote:
> ... and use both SET_MODULE_OWNER and STAR_MOD_*_USE_COUNT. It's along the
> lines of what I was thinking -- though I don't think it's very clean.

It's about the best you can do, considering the 'no ifdefs in raw'
axiom.. Better suggestions are certainly welcome.


> And one more question: is 2.2.x compatibility stuff acceptable in a 2.4
> driver, given that all that stuff is in *one* #ifdef and not sprinkled
> throughout the file?

I have no problem with such overall, as long as the 2.2 section is
self-contained and mostly out of the mainline code.

Jeff


--
Jeff Garzik | "You see, in this world there's two kinds of
Building 1024 | people, my friend: Those with loaded guns
MandrakeSoft | and those who dig. You dig." --Blondie

2001-02-09 21:43:20

by Jes Sorensen

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

>>>>> "Jeff" == Jeff Garzik <[email protected]> writes:

Jeff> Donald Becker wrote:
>> On Tue, 16 Jan 2001, Jeff Garzik wrote: > * IA64 support (Jes) Oh,
>> and this is completely bogus. This isn't a fix, it's a hack that
>> covers up the real problem.
>>
>> The align-copy should *never* be required because the alignment
>> differs between DIX and E-II encapsulated packets. The machine
>> shouldn't crash because someone sends you a different encapsulation
>> type!

The ia64 kernel has gotten mis aligned load support, but it's slow as
a dog so we really want to copy the packet every time anyway when the
header is not aligned. If people send out 802.3 headers or other crap
on Ethernet then it's just too bad.

Jes

2001-02-09 21:45:10

by Jes Sorensen

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

>>>>> "Manfred" == Manfred Spraul <[email protected]> writes:

Manfred> Ion Badulescu wrote:
>> > > +#if defined(__ia64__) || defined(__alpha__) > > +#define
>> PKT_SHOULD_COPY(pkt_len) 1 > > +#else > > +#define
>> PKT_SHOULD_COPY(pkt_len) (pkt_len < rx_copybreak) > > +#endif >
>> [snip]
>>
>> It's not *required* per se, as far as I know both the Alpha and
>> IA64 have handlers for unaligned access traps. *However*, copying
>> each packet is definitely better than taking an exception for each
>> packet!

Manfred> What about changing the default for rx_copybreak instead?
Manfred> Set it to 1536 on ia64 and Alpha, 0 for the rest. tulip and
Manfred> eepro100 use that aproach.

Inefficient, my patch will make the unused code path disappear during
compilation, what you suggest results in an extra branch and unused
code.

The eepro100 and tulip drivers really should be changed to use the
same trick.

Jes

2001-02-09 21:53:10

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On 9 Feb 2001, Jes Sorensen wrote:

> Manfred> What about changing the default for rx_copybreak instead?
> Manfred> Set it to 1536 on ia64 and Alpha, 0 for the rest. tulip and
> Manfred> eepro100 use that aproach.
>
> Inefficient, my patch will make the unused code path disappear during
> compilation, what you suggest results in an extra branch and unused
> code.

Yes, but I'd rather let people turn off the always-copy behavior by simply
changing rx_copybreak. The unused code is not really that much of a deal,
it's only a few lines.

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-09 22:56:58

by Donald Becker

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On 9 Feb 2001, Jes Sorensen wrote:

> >>>>> "Jeff" == Jeff Garzik <[email protected]> writes:
> Jeff> Donald Becker wrote:
> >> On Tue, 16 Jan 2001, Jeff Garzik wrote: > * IA64 support (Jes) Oh,
> >> and this is completely bogus. This isn't a fix, it's a hack that
> >> covers up the real problem.
> >>
> >> The align-copy should *never* be required because the alignment
> >> differs between DIX and E-II encapsulated packets. The machine
> >> shouldn't crash because someone sends you a different encapsulation
> >> type!
>
> The ia64 kernel has gotten mis aligned load support, but it's slow as
> a dog so we really want to copy the packet every time anyway when the
> header is not aligned. If people send out 802.3 headers or other crap
> on Ethernet then it's just too bad.

Note the word "required", meaning "must be done" vs. "recommended"
meaning "should be done".

The initial issue was a comment in a starfire patch that claimed an IA64
bug had been fixed. The copy breakpoint change might have improved
performance by doing a copy-align, but it didn't fix a bug.

That performance tradeoff was already anticipated: the 'rx_copybreak'
value that was changed was a module parameter, not a constant. That
allows a module-load-time tradeoff, based the specific implementation,
of copying the received packet or accepting a few unaligned loads of the
usually small IP header. See the comments in starfire.c, as well as
several other bus-master drivers.


Donald Becker [email protected]
Scyld Computing Corporation http://www.scyld.com
410 Severn Ave. Suite 210 Second Generation Beowulf Clusters
Annapolis MD 21403 410-990-9993

2001-02-09 23:33:01

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Fri, 9 Feb 2001, Alan Cox wrote:

> > It's amusing that a full receive copy is added without any concern, in
> > the same discussion where zero-copy transmit is treated as a holy grail!
>
> For non routing paths its virtually free because the DMA forced the lines
> from cache anyway.

Are you actually sure about this? I thought DMA from PCI devices reached
the main memory without polluting the L2 cache. Otherwise any large DMA
transfer would kill the cache (think frame grabbers...)

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-09 23:36:51

by Alan

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

> > For non routing paths its virtually free because the DMA forced the lines
> > from cache anyway.
>
> Are you actually sure about this? I thought DMA from PCI devices reached
> the main memory without polluting the L2 cache. Otherwise any large DMA
> transfer would kill the cache (think frame grabbers...)

DMA to main memory normally invalidates those lines in the CPU cache rather
than the cache snooping and updating its view of them.

2001-02-10 09:50:32

by Gérard Roudier

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.



On Fri, 9 Feb 2001, Alan Cox wrote:

> > > For non routing paths its virtually free because the DMA forced the lines
> > > from cache anyway.
> >
> > Are you actually sure about this? I thought DMA from PCI devices reached
> > the main memory without polluting the L2 cache. Otherwise any large DMA
> > transfer would kill the cache (think frame grabbers...)
>
> DMA to main memory normally invalidates those lines in the CPU cache rather
> than the cache snooping and updating its view of them.

In PCI, it is the Memory Write and Invalidate PCI transaction that is
intended to allow core-logics to optimize DMA this way. For normal Memory
Write PCI transactions or when the core-logic is aliasing MWI to MW, the
snooping may well happen. All that stuff, very probably, varies a lot
depending on the core-logic.

As we know, in normal PCI, the target is not told about the transaction
length prior to the bursting of the data. This makes difficult for a core
logic to use cache invalidation rather than dma snooping when a normal MW
is used, thus the invention of MWI.

G?rard.

2001-02-10 14:48:54

by Manfred Spraul

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

Hi Jes,

I read through your acenic driver and noticed that you replaced
spinlocks with bitops.

Is that a good idea? I always avoid bitops and replace them with
spinlocks:

* On uniprocessor they are obviously slower.
* on SMP i386 spin_lock() / spin_unlock() is faster than
test_and_set_bit()/clear_bit(): the spinlock operations have a
direction, and thus no memory barrier is required in spin_unlock,
Intel's default memory ordering is sufficient. clear_bit() doesn't know
that it will be used to end a protected area, thus it needs a full
memory barrier.

* on ia64 spinlocks are probably faster, and it seems that clear_bit()
instead of spin_unlock() might even cause races:
spin_unlock() needs a 'release' memory barrier, but clear_bit() contains
an 'acquire' memory barrier.

I only see 2 advantages for bitops:
* you can avoid disabling local interrupts in hard_tx_xmit() or other
bottom half handlers, but often you only need the disabled interrupts
for a few instructions.
* you won't spin - but spinning should be rare, or you can use
spin_trylock().

--
Manfred

2001-02-12 18:55:36

by Jes Sorensen

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

>>>>> "Donald" == Donald Becker <[email protected]> writes:

Donald> On 9 Feb 2001, Jes Sorensen wrote:
>> The ia64 kernel has gotten mis aligned load support, but it's slow
>> as a dog so we really want to copy the packet every time anyway
>> when the header is not aligned. If people send out 802.3 headers or
>> other crap on Ethernet then it's just too bad.

Donald> Note the word "required", meaning "must be done"
Donald> vs. "recommended" meaning "should be done".

Donald> The initial issue was a comment in a starfire patch that
Donald> claimed an IA64 bug had been fixed. The copy breakpoint
Donald> change might have improved performance by doing a copy-align,
Donald> but it didn't fix a bug.

I agree it was a bug, and yes it has been fixed.

Donald> That performance tradeoff was already anticipated: the
Donald> 'rx_copybreak' value that was changed was a module parameter,
Donald> not a constant. That allows a module-load-time tradeoff,
Donald> based the specific implementation, of copying the received
Donald> packet or accepting a few unaligned loads of the usually small
Donald> IP header. See the comments in starfire.c, as well as several
Donald> other bus-master drivers.

In this case it just results in a performance degradation for 99% of
the usage. What about making the change so it is optimized away unless
IPX is enabled?

Jes

2001-02-12 18:56:26

by Jes Sorensen

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

>>>>> "Ion" == Ion Badulescu <[email protected]> writes:

Ion> On 9 Feb 2001, Jes Sorensen wrote:
>> Inefficient, my patch will make the unused code path disappear
>> during compilation, what you suggest results in an extra branch and
>> unused code.

Ion> Yes, but I'd rather let people turn off the always-copy behavior
Ion> by simply changing rx_copybreak. The unused code is not really
Ion> that much of a deal, it's only a few lines.

However, it is in the hot path code where it hurts the most.

Jes

2001-02-12 19:06:46

by Jes Sorensen

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

>>>>> "G?rard" == G?rard Roudier <[email protected]> writes:

G?rard> On Fri, 9 Feb 2001, Alan Cox wrote:

>> DMA to main memory normally invalidates those lines in the CPU
>> cache rather than the cache snooping and updating its view of them.

G?rard> In PCI, it is the Memory Write and Invalidate PCI transaction
G?rard> that is intended to allow core-logics to optimize DMA this
G?rard> way. For normal Memory Write PCI transactions or when the
G?rard> core-logic is aliasing MWI to MW, the snooping may well
G?rard> happen. All that stuff, very probably, varies a lot depending
G?rard> on the core-logic.

In fact one has to look out for this and disable the feature in some
cases. On the acenic not disabling Memory Write and Invalidate costs
~20% on performance on some systems.

Jes

2001-02-13 13:07:48

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On 12 Feb 2001, Jes Sorensen wrote:
> >>>>> "G?rard" == G?rard Roudier <[email protected]> writes:
> G?rard> In PCI, it is the Memory Write and Invalidate PCI transaction
> G?rard> that is intended to allow core-logics to optimize DMA this
> G?rard> way. For normal Memory Write PCI transactions or when the
> G?rard> core-logic is aliasing MWI to MW, the snooping may well
> G?rard> happen. All that stuff, very probably, varies a lot depending
> G?rard> on the core-logic.
>
> In fact one has to look out for this and disable the feature in some
> cases. On the acenic not disabling Memory Write and Invalidate costs
> ~20% on performance on some systems.

And in another message, On Mon, 12 Feb 2001, David S. Miller wrote:
> 3) The acenic/gbit performance anomalies have been cured
> by reverting the PCI mem_inval tweaks.


Just to be clear, acenic should or should not use MWI?

And can a general rule be applied here? Newer Tulip hardware also
has the ability to enable/disable MWI usage, IIRC.

Jeff




2001-02-13 20:30:09

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Tue, 13 Feb 2001 07:06:44 -0600 (CST), Jeff Garzik <[email protected]> wrote:

> On 12 Feb 2001, Jes Sorensen wrote:
>> In fact one has to look out for this and disable the feature in some
>> cases. On the acenic not disabling Memory Write and Invalidate costs
>> ~20% on performance on some systems.
>
> And in another message, On Mon, 12 Feb 2001, David S. Miller wrote:
>> 3) The acenic/gbit performance anomalies have been cured
>> by reverting the PCI mem_inval tweaks.
>
> Just to be clear, acenic should or should not use MWI?
>
> And can a general rule be applied here? Newer Tulip hardware also
> has the ability to enable/disable MWI usage, IIRC.

And so do eepro100 and starfire. On the eepro100 we're enabling MWI
unconditionally, and on the starfire we disable it unconditionally...

I should probably take a look at acenic's use of PCI_COMMAND_INVALIDATE
to see when it gets activated. Some benchmarking would probably help,
too -- maybe later today.

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-14 01:36:21

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On 12 Feb 2001, Jes Sorensen wrote:

> Ion> Yes, but I'd rather let people turn off the always-copy behavior
> Ion> by simply changing rx_copybreak. The unused code is not really
> Ion> that much of a deal, it's only a few lines.
>
> However, it is in the hot path code where it hurts the most.

I couldn't measure any difference, really. And for one extra branch, I
really wouldn't expect a measurable difference..

Not even defining final_version, which removes a *lot* more conditional
branches from the hot path, makes any measurable difference in the CPU
utilization.

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.


2001-02-14 02:05:43

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On Tue, 13 Feb 2001 12:29:16 -0800, Ion Badulescu <[email protected]> wrote:
> On Tue, 13 Feb 2001 07:06:44 -0600 (CST), Jeff Garzik <[email protected]> wrote:
>
>> On 12 Feb 2001, Jes Sorensen wrote:
>>> In fact one has to look out for this and disable the feature in some
>>> cases. On the acenic not disabling Memory Write and Invalidate costs
>>> ~20% on performance on some systems.
>>
>> And in another message, On Mon, 12 Feb 2001, David S. Miller wrote:
>>> 3) The acenic/gbit performance anomalies have been cured
>>> by reverting the PCI mem_inval tweaks.
>>
>> Just to be clear, acenic should or should not use MWI?

With the zerocopy patch, acenic always disables MWI by default.

>> And can a general rule be applied here? Newer Tulip hardware also
>> has the ability to enable/disable MWI usage, IIRC.
>
> And so do eepro100 and starfire. On the eepro100 we're enabling MWI
> unconditionally, and on the starfire we disable it unconditionally...
>
> I should probably take a look at acenic's use of PCI_COMMAND_INVALIDATE
> to see when it gets activated. Some benchmarking would probably help,
> too -- maybe later today.

I did some testing with starfire, and the results are inconclusive --
at least on my P-III is makes absolutely no difference. Does it make
a difference on other architectures? sparc64, ia64 maybe?

I should probably rephrase this: MWI makes no difference on i386, but
it is claimed that using MWI *reduces* performance on some systems.
Are there any systems on which MWI *increases* performance?

I've added some code to the starfire driver that allows changing the
use of MWI at module load time, just in case. By default, it activates
it.

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-14 05:26:26

by Donald Becker

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On 12 Feb 2001, Jes Sorensen wrote:

> >>>>> "Donald" == Donald Becker <[email protected]> writes:
>
> Donald> On 9 Feb 2001, Jes Sorensen wrote:
> >> The ia64 kernel has gotten mis aligned load support, but it's slow
> >> as a dog so we really want to copy the packet every time anyway
> >> when the header is not aligned. If people send out 802.3 headers or
> >> other crap on Ethernet then it's just too bad.
>
> Donald> Note the word "required", meaning "must be done"
> Donald> vs. "recommended" meaning "should be done".
>
> Donald> The initial issue was a comment in a starfire patch that
> Donald> claimed an IA64 bug had been fixed. The copy breakpoint
> Donald> change might have improved performance by doing a copy-align,
> Donald> but it didn't fix a bug.
>
> I agree it was a bug, and yes it has been fixed.

There was not a bug in the driver. The bug was/is in the protocol handling
code. The protocol handling code *must* be able to handle unaligned IP
headers.

> Donald> That performance tradeoff was already anticipated: the
> Donald> 'rx_copybreak' value that was changed was a module parameter,
..
> In this case it just results in a performance degradation for 99% of
> the usage. What about making the change so it is optimized away unless
> IPX is enabled?

???
- It's not just IPX hosts that send 802.3 headers.
- While a good initial value might depend on the architecture, the
best setting is processor implementation and environment dependent.
Those details are not known at compile time.
- The code path cost of a module option is only a compare and a
conditional branch.

Donald Becker [email protected]
Scyld Computing Corporation http://www.scyld.com
410 Severn Ave. Suite 210 Second Generation Beowulf Clusters
Annapolis MD 21403 410-990-9993

2001-02-14 12:39:29

by Alan Cox

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

> There was not a bug in the driver. The bug was/is in the protocol handling
> code. The protocol handling code *must* be able to handle unaligned IP
> headers.

It does. It does so on IA64 now as well. The only architecture which has troubles
with alignment on IP frames now is ARM2

2001-02-14 12:50:30

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device

On Wed, 14 Feb 2001, Alan Cox wrote:
> > There was not a bug in the driver. The bug was/is in the protocol handling
> > code. The protocol handling code *must* be able to handle unaligned IP
> > headers.

> It does. It does so on IA64 now as well. The only architecture which has troubles
> with alignment on IP frames now is ARM2

So the IA64-specific PKT_CAN_COPY code in starfire can go away
completely? Jes, can you test such w/ the latest kernel and starfire,
less your IA64-specific code?

The starfire update has never gone to Linus because this issue was
unresolved...

Jeff




2001-02-14 12:54:40

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device

On Wed, 14 Feb 2001, Jeff Garzik wrote:

> On Wed, 14 Feb 2001, Alan Cox wrote:
> > It does. It does so on IA64 now as well. The only architecture which has troubles
> > with alignment on IP frames now is ARM2
>
> So the IA64-specific PKT_CAN_COPY code in starfire can go away
> completely? Jes, can you test such w/ the latest kernel and starfire,
> less your IA64-specific code?

The way I understand it, IA64 and Alpha cope with it, but at the expense
of taking an exception for each packet -- so it's not worth it.

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-14 13:10:54

by Alan

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device

> The way I understand it, IA64 and Alpha cope with it, but at the expense
> of taking an exception for each packet -- so it's not worth it.

You want to copy_checksum the frame on these platforms, or better yet use
a decent network card that can start the frame on odd word alignment. You need
either the CPU or card to be able to handle misaligns efficiently.

2001-02-14 13:38:46

by Ion Badulescu

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device

On Wed, 14 Feb 2001, Alan Cox wrote:

> > The way I understand it, IA64 and Alpha cope with it, but at the expense
> > of taking an exception for each packet -- so it's not worth it.
>
> You want to copy_checksum the frame on these platforms,

That's what we're doing...

> or better yet use
> a decent network card that can start the frame on odd word alignment. You need
> either the CPU or card to be able to handle misaligns efficiently.

Oh well. It's not too bad as long as Rx traffic is kept to a minimum,
because Tx is not affected. So use it for anon ftp servers or web servers.
:)

Ion

--
It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.

2001-02-14 15:37:10

by Jes Sorensen

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

>>>>> "Donald" == Donald Becker <[email protected]> writes:

Donald> On 12 Feb 2001, Jes Sorensen wrote:
>> In this case it just results in a performance degradation for 99%
>> of the usage. What about making the change so it is optimized away
>> unless IPX is enabled?

Donald> ??? - It's not just IPX hosts that send 802.3 headers. -
Donald> While a good initial value might depend on the architecture,
Donald> the best setting is processor implementation and environment
Donald> dependent. Those details are not known at compile time. -
Donald> The code path cost of a module option is only a compare and a
Donald> conditional branch.

What else is sending out 802.3 frames these days? I really don't care
about IPX when it comes to performance.

I am just advocating that we optimize for the common case which is DIX
frames and not 802.3.

Jes

2001-02-14 15:43:11

by Jes Sorensen

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

>>>>> "Jeff" == Jeff Garzik <[email protected]> writes:

Jeff> On 12 Feb 2001, Jes Sorensen wrote:
>> 3) The acenic/gbit performance anomalies have been cured by
>> reverting the PCI mem_inval tweaks.

Jeff> Just to be clear, acenic should or should not use MWI?

Jeff> And can a general rule be applied here? Newer Tulip hardware
Jeff> also has the ability to enable/disable MWI usage, IIRC.

AceNIC always used to do this until the ZC patches appeared. It's a
recommendation from the hardware designers so I figure it's a bug in
the AceNIC hardware. I can probably go dig up the details on this, but
it's hidden somewhere deep down, ie. it's been ages since I looked at
it last.

Jes

2001-02-14 15:56:31

by Petr Vandrovec

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

On 14 Feb 01 at 16:35, Jes Sorensen wrote:
> >>>>> "Donald" == Donald Becker <[email protected]> writes:
> Donald> On 12 Feb 2001, Jes Sorensen wrote:

> Donald> ??? - It's not just IPX hosts that send 802.3 headers. -
> Donald> While a good initial value might depend on the architecture,
> Donald> the best setting is processor implementation and environment
> Donald> dependent. Those details are not known at compile time. -
> Donald> The code path cost of a module option is only a compare and a
> Donald> conditional branch.
>
> What else is sending out 802.3 frames these days? I really don't care
> about IPX when it comes to performance.
>
> I am just advocating that we optimize for the common case which is DIX
> frames and not 802.3.

Pardon me, but IPX in 802.3 and IPX in DIX are exactly same frames
on wire, except that IPX/802.3 contains frame length in bytes
0x0C/0x0D, while IPX/DIX contains 0x8137 here. They have same length,
and same length of media header, so I really do not understand.

If you are talking about encapsulation which is known as `ethernet_802.2'
in IPX world, then it is true, it has odd bytes in header. But nobody sane
except Appletalk uses 802.2 now... Our Suns already died due to this couple
of years ago ;-)

And as Ethernet SNAP has 8byte long header, it should be safe too, unless
architecture requires 16byte alignment - so only odd 802.2 should
be baned.
Best regards,
Petr Vandrovec
[email protected]

2001-02-14 21:12:37

by Gérard Roudier

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.



On Tue, 13 Feb 2001, Ion Badulescu wrote:

> On Tue, 13 Feb 2001 12:29:16 -0800, Ion Badulescu <[email protected]> wrote:
> > On Tue, 13 Feb 2001 07:06:44 -0600 (CST), Jeff Garzik <[email protected]> wrote:
> >
> >> On 12 Feb 2001, Jes Sorensen wrote:
> >>> In fact one has to look out for this and disable the feature in some
> >>> cases. On the acenic not disabling Memory Write and Invalidate costs
> >>> ~20% on performance on some systems.
> >>
> >> And in another message, On Mon, 12 Feb 2001, David S. Miller wrote:
> >>> 3) The acenic/gbit performance anomalies have been cured
> >>> by reverting the PCI mem_inval tweaks.
> >>
> >> Just to be clear, acenic should or should not use MWI?
>
> With the zerocopy patch, acenic always disables MWI by default.
>
> >> And can a general rule be applied here? Newer Tulip hardware also
> >> has the ability to enable/disable MWI usage, IIRC.
> >
> > And so do eepro100 and starfire. On the eepro100 we're enabling MWI
> > unconditionally, and on the starfire we disable it unconditionally...
> >
> > I should probably take a look at acenic's use of PCI_COMMAND_INVALIDATE
> > to see when it gets activated. Some benchmarking would probably help,
> > too -- maybe later today.
>
> I did some testing with starfire, and the results are inconclusive --
> at least on my P-III is makes absolutely no difference. Does it make
> a difference on other architectures? sparc64, ia64 maybe?
>
> I should probably rephrase this: MWI makes no difference on i386, but
> it is claimed that using MWI *reduces* performance on some systems.
> Are there any systems on which MWI *increases* performance?

I have read several data sheets about Intel PCI-HOST bridges and they all
were said to alias PCI MWI to normal PCI MEMORY WRITE transactions.
This matches your observations just fine.
Even when MWI is handled as MW, the PCI master is required to transfer
entire cache lines and this cannot be bad for performances. But this
should probably not make significant difference with normal MW.

Btw, your rephrasing looks improper to me. The processor is not involved
in the handling of MWI., especially when the MWI targets the memory. It is
the PCI-HOST bridge that must be considered here. What about ServerWorks
chipset ? Hmmm... I would be glad to have docs about these ones. :(

The MWI is intended to allow optimizations based on cache lines
invalidations rather than snooping. The target (or bridge) can perfectly
elect to handle the MWI as a normal MW and so, performance should not be
significantly lowered using MWI. But nothing is perfect, as we know.

The MWI is interesting for PCI throughput optimization but the MEMORY READ
LINE and MEMORY READ MULTIPLE transactions are a lot more interesting, in
my opinion. WRITEs can be posted (buffered), but in order to stream data
from memory (prefetchable) the bridge can do a better work when it knows
the intention of the PCI MASTER. All bridges should take in considerations
hints associated with MRL and MRM. IIRC, Intel bridges do.

> I've added some code to the starfire driver that allows changing the
> use of MWI at module load time, just in case. By default, it activates
> it.

You should also play with MRL and MRM, in my opinion.

G?rard.

2001-02-15 16:11:10

by Jes Sorensen

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

>>>>> "Petr" == Petr Vandrovec <[email protected]> writes:

Petr> On 14 Feb 01 at 16:35, Jes Sorensen wrote:
>> What else is sending out 802.3 frames these days? I really don't
>> care about IPX when it comes to performance.
>>
>> I am just advocating that we optimize for the common case which is
>> DIX frames and not 802.3.

Petr> Pardon me, but IPX in 802.3 and IPX in DIX are exactly same
Petr> frames on wire, except that IPX/802.3 contains frame length in
Petr> bytes 0x0C/0x0D, while IPX/DIX contains 0x8137 here. They have
Petr> same length, and same length of media header, so I really do not
Petr> understand.

Petr> If you are talking about encapsulation which is known as
Petr> `ethernet_802.2' in IPX world, then it is true, it has odd bytes
Petr> in header. But nobody sane except Appletalk uses 802.2
Petr> now... Our Suns already died due to this couple of years ago ;-)

My point is that you rarely see Ethernet frames with 802.3 except for
places running IPX.

Jes

2001-02-17 21:37:32

by David Miller

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.


Jeff Garzik writes:
> And in another message, On Mon, 12 Feb 2001, David S. Miller wrote:
> > 3) The acenic/gbit performance anomalies have been cured
> > by reverting the PCI mem_inval tweaks.
>
>
> Just to be clear, acenic should or should not use MWI?
>
> And can a general rule be applied here? Newer Tulip hardware also
> has the ability to enable/disable MWI usage, IIRC.

I think this is an Acenic specific issue. The second processor on the
Acenic board is only there to work around bugs in their DMA
controller.

Later,
David S. Miller
[email protected]

2001-02-19 11:03:33

by Jes Sorensen

[permalink] [raw]
Subject: Re: [PATCH] starfire reads irq before pci_enable_device.

>>>>> "David" == David S Miller <[email protected]> writes:

David> I think this is an Acenic specific issue. The second processor
David> on the Acenic board is only there to work around bugs in their
David> DMA controller.

It wasn't put there for that reason. It was intended for better work
;-)

Jes