2002-06-12 12:41:34

by Zhang Fuxin

[permalink] [raw]
Subject: NAPI for eepro100

--- eepro100-napi.c Wed Jun 12 17:11:38 2002
+++ eepro100-napi-proc.c Wed Jun 12 17:33:51 2002
@@ -119,6 +119,10 @@

#define CONFIG_EEPRO100_NAPI

+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#endif
+
MODULE_AUTHOR("Maintainer: Andrey V. Savochkin <[email protected]>");
MODULE_DESCRIPTION("Intel i82557/i82558/i82559 PCI EtherExpressPro driver");
MODULE_LICENSE("GPL");
@@ -516,6 +520,10 @@
unsigned long alloc_fail;
unsigned long long poll_cycles;

+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *proc_parent;
+#endif
+
#ifdef CONFIG_NET_FASTROUTE
unsigned long fastroute_hit;
unsigned long fastroute_success;
@@ -582,6 +590,11 @@
static void enable_rx_and_rxnobuf_ints(struct net_device *dev);
static void disable_rx_and_rxnobuf_ints(struct net_device *dev);

+#ifdef CONFIG_PROC_FS
+int __devinit speedo_create_proc_subdir(struct net_device *sp);
+void speedo_remove_proc_subdir(struct net_device *sp);
+#endif
+
#endif


@@ -883,6 +896,14 @@
#ifdef CONFIG_EEPRO100_NAPI
dev->poll = speedo_poll;
dev->quota = dev->weight = RX_RING_SIZE;
+
+#ifdef CONFIG_PROC_FS
+ if (speedo_create_proc_subdir(dev) < 0) {
+ printk(KERN_ERR "Failed to create proc directory for %s\n",
+ dev->name);
+ }
+#endif
+
#endif
return 0;
}
@@ -1885,6 +1906,354 @@
return 1; /* not_done */
}

+#ifdef CONFIG_PROC_FS
+/* adapted from intel's e100 code */
+static struct proc_dir_entry *adapters_proc_dir = 0;
+
+static void speedo_proc_cleanup(void);
+static unsigned char speedo_init_proc_dir(void);
+
+#define ADAPTERS_PROC_DIR "eepro100"
+#define WRITE_BUF_MAX_LEN 20
+#define READ_BUF_MAX_LEN 256
+#define SPEEDO_PE_LEN 25
+
+#define sp_off(off) (unsigned long)(offsetof(struct speedo_private, off))
+
+typedef struct _speedo_proc_entry {
+ char *name;
+ read_proc_t *read_proc;
+ write_proc_t *write_proc;
+ unsigned long offset; /* offset into sp. ~0 means no value, pass NULL. */
+} speedo_proc_entry;
+
+static int
+generic_read(char *page, char **start, off_t off, int count, int *eof, int len)
+{
+ if (len <= off + count)
+ *eof = 1;
+
+ *start = page + off;
+ len -= off;
+ if (len > count)
+ len = count;
+
+ if (len < 0)
+ len = 0;
+
+ return len;
+}
+
+static int
+read_ulong(char *page, char **start, off_t off,
+ int count, int *eof, unsigned long l)
+{
+ int len;
+
+ len = sprintf(page, "%lu\n", l);
+
+ return generic_read(page, start, off, count, eof, len);
+}
+
+static int
+read_gen_ulong(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ unsigned long val = 0;
+
+ if (data)
+ val = *((unsigned long *) data);
+
+ return read_ulong(page, start, off, count, eof, val);
+}
+
+static int
+read_ulonglong(char *page, char **start, off_t off,
+ int count, int *eof, unsigned long long ll)
+{
+ int len;
+
+ len = sprintf(page, "%llu\n", ll);
+
+ return generic_read(page, start, off, count, eof, len);
+}
+
+static int
+read_gen_ulonglong(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ unsigned long val = 0;
+
+ if (data)
+ val = *((unsigned long long *) data);
+
+ return read_ulonglong(page, start, off, count, eof, val);
+}
+
+static int
+set_debug(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+
+{
+ if (speedo_debug == 1)
+ speedo_debug = 6;
+ else
+ speedo_debug = 1;
+ return count;
+}
+
+static int
+_speedo_show_state(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+
+ struct net_device *dev = (struct net_device *)data;
+
+ speedo_show_state(dev);
+
+ return count;
+}
+
+static speedo_proc_entry speedo_proc_list[] = {
+ {"set_debug", 0, set_debug, ~0},
+ {"show_state", 0, _speedo_show_state, ~0},
+ {"poll_switch",read_gen_ulong,0,sp_off(poll_switch)},
+ {"failed_poll_switch",read_gen_ulong,0,sp_off(failed_poll_switch)},
+ {"done_poll",read_gen_ulong,0,sp_off(done_poll)},
+ {"notdone_poll",read_gen_ulong,0,sp_off(notdone_poll)},
+ {"empty_poll",read_gen_ulong,0,sp_off(empty_poll)},
+ {"soft_reset_count",read_gen_ulong,0,sp_off(soft_reset_count)},
+ {"rx_resume_count",read_gen_ulong,0,sp_off(rx_resume_count)},
+ {"alloc_fail",read_gen_ulong,0,sp_off(alloc_fail)},
+ {"poll_cycles",read_gen_ulonglong,0,sp_off(poll_cycles)},
+ {"fastroute_hit",read_gen_ulonglong,0,sp_off(fastroute_hit)},
+ {"fastroute_success",read_gen_ulonglong,0,sp_off(fastroute_success)},
+ {"fastroute_defer",read_gen_ulonglong,0,sp_off(fastroute_defer)},
+ {"", 0, 0, 0}
+};
+
+static int
+read_info(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+ struct speedo_private *sp = data;
+ speedo_proc_entry *pe;
+ int tmp;
+ void *val;
+ int len = 0;
+
+ for (pe = speedo_proc_list; pe->name[0]; pe++) {
+ if (pe->name[0] == '\n') {
+ len += sprintf(page + len, "\n");
+ continue;
+ }
+
+ if (pe->read_proc) {
+ if ((len + READ_BUF_MAX_LEN + SPEEDO_PE_LEN + 1) >=
+ PAGE_SIZE)
+ break;
+
+ if (pe->offset != ~0)
+ val = ((char *) sp) + pe->offset;
+ else
+ val = NULL;
+
+ len += sprintf(page + len, "%-"
+ __MODULE_STRING(SPEEDO_PE_LEN)
+ "s ", pe->name);
+ len += pe->read_proc(page + len, start, 0,
+ READ_BUF_MAX_LEN + 1, &tmp, val);
+ }
+ }
+
+ return generic_read(page, start, off, count, eof, len);
+}
+
+static struct proc_dir_entry * __devinit
+create_proc_rw(char *name, void *data, struct proc_dir_entry *parent,
+ read_proc_t * read_proc, write_proc_t * write_proc)
+{
+ struct proc_dir_entry *pdep;
+ mode_t mode = S_IFREG;
+
+ if (write_proc) {
+ mode |= S_IWUSR;
+ if (read_proc) {
+ mode |= S_IRUSR;
+ }
+
+ } else if (read_proc) {
+ mode |= S_IRUGO;
+ }
+
+ if (!(pdep = create_proc_entry(name, mode, parent)))
+ return NULL;
+
+ pdep->read_proc = read_proc;
+ pdep->write_proc = write_proc;
+ pdep->data = data;
+ return pdep;
+}
+
+void
+speedo_remove_proc_subdir(struct net_device *dev)
+{
+ struct speedo_private *sp = (struct speedo_private *)dev->priv;
+ speedo_proc_entry *pe;
+ char info[256];
+ int len;
+
+ /* If our root /proc dir was not created, there is nothing to remove */
+ if (adapters_proc_dir == NULL) {
+ return;
+ }
+
+ len = strlen(dev->name);
+ strncpy(info, dev->name, sizeof (info));
+ strncat(info + len, ".info", sizeof (info) - len);
+
+ if (sp->proc_parent) {
+ for (pe = speedo_proc_list; pe->name[0]; pe++) {
+ if (pe->name[0] == '\n')
+ continue;
+
+ remove_proc_entry(pe->name, sp->proc_parent);
+ }
+
+ remove_proc_entry(dev->name, adapters_proc_dir);
+ sp->proc_parent = NULL;
+ }
+
+ remove_proc_entry(info, adapters_proc_dir);
+
+ /* try to remove the main /proc dir, if it's empty */
+ speedo_proc_cleanup();
+}
+
+int __devinit
+speedo_create_proc_subdir(struct net_device *dev)
+{
+ struct speedo_private *sp = (struct speedo_private *)dev->priv;
+ struct proc_dir_entry *dev_dir;
+ speedo_proc_entry *pe;
+ char info[256];
+ int len;
+ void *data;
+
+ /* create the main /proc dir if needed */
+ if (!adapters_proc_dir) {
+ if (!speedo_init_proc_dir())
+ return -ENOMEM;
+ }
+
+ strncpy(info, dev->name, sizeof (info));
+ len = strlen(info);
+ strncat(info + len, ".info", sizeof (info) - len);
+
+ /* info */
+ if (!(create_proc_rw(info, sp, adapters_proc_dir, read_info, 0))) {
+ speedo_proc_cleanup();
+ return -ENOMEM;
+ }
+
+ dev_dir = create_proc_entry(dev->name, S_IFDIR,
+ adapters_proc_dir);
+ sp->proc_parent = dev_dir;
+
+ if (!dev_dir) {
+ speedo_remove_proc_subdir(dev);
+ return -ENOMEM;
+ }
+
+ for (pe = speedo_proc_list; pe->name[0]; pe++) {
+ if (pe->name[0] == '\n')
+ continue;
+
+ if (pe->offset != ~0)
+ data = ((char *) sp) + pe->offset;
+ else
+ data = dev;
+
+ if (!(create_proc_rw(pe->name, data, dev_dir,
+ pe->read_proc, pe->write_proc))) {
+ speedo_remove_proc_subdir(dev);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************
+ * Name: speedo_init_proc_dir
+ *
+ * Description: This routine creates the top-level /proc directory for the
+ * driver in /proc/net
+ *
+ * Arguments: none
+ *
+ * Returns: true on success, false on fail
+ *
+ ***************************************************************************/
+static unsigned char
+speedo_init_proc_dir(void)
+{
+ int len;
+
+ /* first check if adapters_proc_dir already exists */
+ len = strlen(ADAPTERS_PROC_DIR);
+ for (adapters_proc_dir = proc_net->subdir;
+ adapters_proc_dir; adapters_proc_dir = adapters_proc_dir->next) {
+
+ if ((adapters_proc_dir->namelen == len) &&
+ (!memcmp(adapters_proc_dir->name, ADAPTERS_PROC_DIR, len)))
+ break;
+ }
+
+ if (!adapters_proc_dir)
+ adapters_proc_dir =
+ create_proc_entry(ADAPTERS_PROC_DIR, S_IFDIR, proc_net);
+
+ if (!adapters_proc_dir)
+ return 0;
+
+ return 1;
+}
+
+/****************************************************************************
+ * Name: speedo_proc_cleanup
+ *
+ * Description: This routine clears the top-level /proc directory, if empty.
+ *
+ * Arguments: none
+ *
+ * Returns: none
+ *
+ ***************************************************************************/
+static void
+speedo_proc_cleanup(void)
+{
+ struct proc_dir_entry *de;
+
+ if (adapters_proc_dir == NULL) {
+ return;
+ }
+
+ /* check if subdir list is empty before removing adapters_proc_dir */
+ for (de = adapters_proc_dir->subdir; de; de = de->next) {
+ /* ignore . and .. */
+ if (*(de->name) != '.')
+ break;
+ }
+
+ if (de)
+ return;
+
+ remove_proc_entry(ADAPTERS_PROC_DIR, proc_net);
+ adapters_proc_dir = NULL;
+}
+
+#endif /* CONFIG_PROC_FS */
+
#endif /* NAPI */

static int
@@ -2474,6 +2843,9 @@

unregister_netdev(dev);

+#if defined(CONFIG_EEPRO100_NAPI) && defined(CONFIG_PROC_FS)
+ speedo_remove_proc_subdir(dev);
+#endif
release_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1));
release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));


Attachments:
eepro100-mips.patch (3.38 kB)
eepro100-napi.patch (9.42 kB)
eepro100-proc.patch (9.77 kB)
Download all attachments

2002-06-19 11:51:07

by Bill Davidsen

[permalink] [raw]
Subject: Re: NAPI for eepro100

On Thu, 13 Jun 2002, Samuel Maftoul wrote:

> Maybe a bit off topic, but we (at my work) are using plenty of eepro100
> cards with both drivers ( e100 and eepro100 )(shipped with dell
> machines, and others).
> We have lot of problem with these card: from link autonegociation to the
> really frequent cmd_timeout.
> We expreienced some freezes, slowdowns, problems with copying from NFS
> to a Firwire disk ( systematic cmd_timeout at about 250 MB).

Yes, this would be better in the cosl.networking, but a quick answer since
it seems kernel related. I had problems with these until recent kernels.
The e100 driver helped in some cases, but other issues were reported. I
don't really have any problems now, on news servers which get ~250GB/day
in and push ~700 out (yes, they run 70-80Mbit all day).

This may be related to the blessing of the new scheduler.

> Do you have any advice ? should I test eepro100 NAPI driver ?
> I've try to play with ethtool(chang some eepro100 bits , like the
> "sleeping" one ...

We noticed long ago that different blades on the same switch would work
right with either auto at both ends or 100TX forced at both ends. Don;t
know why, just look at ifconfig reports for collisions and if you see them
change the settings.

> I have quitely the same card at home wich doesn't make any problem ( I
> noticed some cmd_timeout when I changed my hub).
> Is this hub related ? Is there a standart way autonegociation is working
> ( we use mostly cisco switches, are they compliant?).

You have said both "hub" and "switch" in this paragraph, trying to run
a hub full deplex will cause problems. I have no idea what a "switching
hub" means, I see them around cheap, but not from Cisco.

> We are actually trying to force 10FD or 100FD any new installed card
> because we think this is the best way to avoid performances problem ...

We have never found the slightest way to predect if auto or forced 100TX
works with any given blade in any given switch. But it doesn't change, so
get it right and it's fixed. See above, look for collisions, try auto
first. No collisions, it's as good as it gets.

--
bill davidsen <[email protected]>
CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.

2002-06-12 23:03:31

by Jeff Garzik

[permalink] [raw]
Subject: Re: NAPI for eepro100

Zhang Fuxin wrote:
> hi,all
> Recently i've converted eepro100 driver to use napi,in order to improve
> network performance of my poor 150M mips machine. It does eliminate
> the interrupt live lock seen before,maintaining a peak throughput under
> heavy load.
> In case anybody are interested,i post the patches to the list. They are
> 3 incremental patchs:
> eepro100-napi.patch is against 2.5.20 eepro100.c and provide basic
> napi support

Nifty, I'll take a look at this.


> eepro100-proc.patch is proc file system support adapted from intel's
> e100 driver. I am using it for debugging.
> eepro100-mips.patch is mips specific patch to make it work(well) for
> my mips
> platform.


Just FWIW I'm not gonna apply these... for the 'proc' patch, that either
needs to be moved to ethtool, or we should make a filesystem for net
drivers that exports procfs-like inodes. for the 'mips' patch, it looks
like the arch maintainer(s) need to fix the PCI DMA support...

Jeff



2002-06-12 23:10:02

by David Miller

[permalink] [raw]
Subject: Re: NAPI for eepro100

From: Jeff Garzik <[email protected]>
Date: Wed, 12 Jun 2002 19:00:00 -0400

for the 'mips' patch, it looks
like the arch maintainer(s) need to fix the PCI DMA support...

No, it's worse than that.

See how non-consistent memory is used by the eepro100 driver
for descriptor bits? The skb->tail bits?

That is very problematic.

2002-06-12 23:21:19

by Jeff Garzik

[permalink] [raw]
Subject: Re: NAPI for eepro100

David S. Miller wrote:
> From: Jeff Garzik <[email protected]>
> Date: Wed, 12 Jun 2002 19:00:00 -0400
>
> for the 'mips' patch, it looks
> like the arch maintainer(s) need to fix the PCI DMA support...
>
> No, it's worse than that.
>
> See how non-consistent memory is used by the eepro100 driver
> for descriptor bits? The skb->tail bits?
>
> That is very problematic.


Oh crap, you're right... eepro100 in general does funky stuff with the
way packets are handled, mainly due to the need to issue commands to the
NIC engine instead of the normal per-descriptor owner bit way of doing
things.

Well, I accept patches to that clean eepro100 up... I'm not terribly
motivated to clean it up myself, as we have e100 and an e100 maintainer
we can beat on if such uglies arise :)

Jeff



2002-06-12 23:38:12

by David Miller

[permalink] [raw]
Subject: Re: NAPI for eepro100

From: Jeff Garzik <[email protected]>
Date: Wed, 12 Jun 2002 19:17:58 -0400

Oh crap, you're right... eepro100 in general does funky stuff with the
way packets are handled, mainly due to the need to issue commands to the
NIC engine instead of the normal per-descriptor owner bit way of doing
things.

The question is, do the descriptor bits have to live right before
the RX packet data buffer or can other schemes be used?

2002-06-13 02:25:36

by Donald Becker

[permalink] [raw]
Subject: Re: NAPI for eepro100

On Wed, 12 Jun 2002, David S. Miller wrote:
> From: Jeff Garzik <[email protected]>
> Oh crap, you're right... eepro100 in general does funky stuff with the
> way packets are handled, mainly due to the need to issue commands to the
> NIC engine instead of the normal per-descriptor owner bit way of doing
> things.

The eepro100 has a unique design in many different aspects.

> The question is, do the descriptor bits have to live right before
> the RX packet data buffer or can other schemes be used?

With the current driver structure, yes, the descriptor words must be
immediately before the packet data. You can use other Rx and Tx
structures/modes to avoid this, but they use less efficient memory access.
For instance, the current Tx structure allows transmitting a packet with
a single PCI burst, rather than multiple transfers.


--
Donald Becker [email protected]
Scyld Computing Corporation http://www.scyld.com
410 Severn Ave. Suite 210 Second Generation Beowulf Clusters
Annapolis MD 21403 410-990-9993

2002-06-13 07:16:03

by Samuel Maftoul

[permalink] [raw]
Subject: Re: NAPI for eepro100

On Wed, Jun 12, 2002 at 10:25:22PM -0400, Donald Becker wrote:
> On Wed, 12 Jun 2002, David S. Miller wrote:
> > From: Jeff Garzik <[email protected]>
> > Oh crap, you're right... eepro100 in general does funky stuff with the
> > way packets are handled, mainly due to the need to issue commands to the
> > NIC engine instead of the normal per-descriptor owner bit way of doing
> > things.
>
> The eepro100 has a unique design in many different aspects.
>
> > The question is, do the descriptor bits have to live right before
> > the RX packet data buffer or can other schemes be used?
>
> With the current driver structure, yes, the descriptor words must be
> immediately before the packet data. You can use other Rx and Tx
> structures/modes to avoid this, but they use less efficient memory access.
> For instance, the current Tx structure allows transmitting a packet with
> a single PCI burst, rather than multiple transfers.
Maybe a bit off topic, but we (at my work) are using plenty of eepro100
cards with both drivers ( e100 and eepro100 )(shipped with dell
machines, and others).
We have lot of problem with these card: from link autonegociation to the
really frequent cmd_timeout.
We expreienced some freezes, slowdowns, problems with copying from NFS
to a Firwire disk ( systematic cmd_timeout at about 250 MB).

Do you have any advice ? should I test eepro100 NAPI driver ?
I've try to play with ethtool(chang some eepro100 bits , like the
"sleeping" one ...

I have quitely the same card at home wich doesn't make any problem ( I
noticed some cmd_timeout when I changed my hub).
Is this hub related ? Is there a standart way autonegociation is working
( we use mostly cisco switches, are they compliant?).

We are actually trying to force 10FD or 100FD any new installed card
because we think this is the best way to avoid performances problem ...

Thanks for any advice.
Sam

2002-06-13 08:48:20

by Andrey Savochkin

[permalink] [raw]
Subject: Re: NAPI for eepro100

On Wed, Jun 12, 2002 at 04:05:32PM -0700, David S. Miller wrote:
> From: Jeff Garzik <[email protected]>
> Date: Wed, 12 Jun 2002 19:00:00 -0400
>
> for the 'mips' patch, it looks
> like the arch maintainer(s) need to fix the PCI DMA support...
>
> No, it's worse than that.
>
> See how non-consistent memory is used by the eepro100 driver
> for descriptor bits? The skb->tail bits?
>
> That is very problematic.

What's the problem?
If it isn't allowed to do, then what is the meaning of PCI_DMA_BIDIRECTIONAL
mappings?

Andrey

2002-06-13 08:51:31

by David Miller

[permalink] [raw]
Subject: Re: NAPI for eepro100

From: Andrey Savochkin <[email protected]>
Date: Thu, 13 Jun 2002 12:57:53 +0400

On Wed, Jun 12, 2002 at 04:05:32PM -0700, David S. Miller wrote:
> No, it's worse than that.
>
> See how non-consistent memory is used by the eepro100 driver
> for descriptor bits? The skb->tail bits?
>
> That is very problematic.

What's the problem?
If it isn't allowed to do, then what is the meaning of PCI_DMA_BIDIRECTIONAL
mappings?

It's slow. Not wrong, just inefficient.

Descriptors were meant to be done using consistent mappings, not
"pci_map_*()"'d memory. The latter is meant to be used for long
linear DMA transfers to/from the device. It is not meant for things
the cpu pokes small bits of data in and out of, that is what
consistent DMA memory is for.