2008-08-20 12:05:41

by Mel Gorman

[permalink] [raw]
Subject: Major stalls due to commit 7c5026aa9b81dd45df8d3f4e0be73e485976a8b6

I had the pleasure of getting hold of one of these machines
http://www.terrasoftsolutions.com/products/powerstation/ and after some
tinkering installed 2.6.27-rc3. However, it was way slower and jerkier
(keyboard input for example would stop processing for up to 2 seconds)
than the distribution kernel. Network is a bit useless and kernel builds
went from about 4 minutes to about 12 even when built locally. I also noted
with NO_HZ enabled that a soft-lockup would be reported in a timer related
to tg3. The card in question is

0001:02:04.0 Ethernet controller: Broadcom Corporation NetXtreme BCM5780
Gigabit
Ethernet (rev 10)
Subsystem: IBM Device 0329
Flags: bus master, 66MHz, medium devsel, latency 64, IRQ 19
Memory at b8100000 (64-bit, non-prefetchable) [size=64K]
Memory at b8110000 (64-bit, non-prefetchable) [size=64K]
Expansion ROM at <ignored> [disabled]
Capabilities: [40] PCI-X non-bridge device
Capabilities: [48] Power Management version 2
Capabilities: [50] Vital Product Data <?>
Capabilities: [58] Message Signalled Interrupts: Mask- 64bit+ Queue=0/3 Enable-
Kernel driver in use: tg3
Kernel modules: tg3

I bisected part of the problem to the commit below and reverted it in
2.6.27-rc3. Much of the jerkiness is gone as well as the soft-lockup.
Kernel builds are still a little slower but I'm guessing that is
something else.

From: Matt Carlson <[email protected]>
Date: Fri, 2 May 2008 16:49:29 -0700
Subject: [PATCH] tg3: Add link state reporting to UMP firmware

All variants of the 5714, 5715, and 5780 offer a feature called the
"Universal Management Port". This feature is implemented in firmware
and is largely transparent to the driver, except...

It turns out that the UMP firmware needs to know the current status
of the link. Because the firmware cannot touch the PHY registers while
the driver is in control of the device, it needs the driver to report
link status changes through an additional handshaking mechanism.
Without this handshake, it has been observed in the field that the UMP
firmware will not operate correctly.

This patch implements the new handshake with the UMP firmware. Since
the handshake uses the same mechanism ASF heartbeats use, code was
added to detect and wait for completion of a pending previous event.

Signed-off-by: Matt Carlson <[email protected]>
Signed-off-by: Michael Chan <[email protected]>
Signed-off-by: David S. Miller <[email protected]>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index bf376b3..3ba6c52 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -1656,12 +1656,76 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state)
return 0;
}

+/* tp->lock is held. */
+static void tg3_wait_for_event_ack(struct tg3 *tp)
+{
+ int i;
+
+ /* Wait for up to 2.5 milliseconds */
+ for (i = 0; i < 250000; i++) {
+ if (!(tr32(GRC_RX_CPU_EVENT) & GRC_RX_CPU_DRIVER_EVENT))
+ break;
+ udelay(10);
+ }
+}
+
+/* tp->lock is held. */
+static void tg3_ump_link_report(struct tg3 *tp)
+{
+ u32 reg;
+ u32 val;
+
+ if (!(tp->tg3_flags2 & TG3_FLG2_5780_CLASS) ||
+ !(tp->tg3_flags & TG3_FLAG_ENABLE_ASF))
+ return;
+
+ tg3_wait_for_event_ack(tp);
+
+ tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX, FWCMD_NICDRV_LINK_UPDATE);
+
+ tg3_write_mem(tp, NIC_SRAM_FW_CMD_LEN_MBOX, 14);
+
+ val = 0;
+ if (!tg3_readphy(tp, MII_BMCR, &reg))
+ val = reg << 16;
+ if (!tg3_readphy(tp, MII_BMSR, &reg))
+ val |= (reg & 0xffff);
+ tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX, val);
+
+ val = 0;
+ if (!tg3_readphy(tp, MII_ADVERTISE, &reg))
+ val = reg << 16;
+ if (!tg3_readphy(tp, MII_LPA, &reg))
+ val |= (reg & 0xffff);
+ tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX + 4, val);
+
+ val = 0;
+ if (!(tp->tg3_flags2 & TG3_FLG2_MII_SERDES)) {
+ if (!tg3_readphy(tp, MII_CTRL1000, &reg))
+ val = reg << 16;
+ if (!tg3_readphy(tp, MII_STAT1000, &reg))
+ val |= (reg & 0xffff);
+ }
+ tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX + 8, val);
+
+ if (!tg3_readphy(tp, MII_PHYADDR, &reg))
+ val = reg << 16;
+ else
+ val = 0;
+ tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX + 12, val);
+
+ val = tr32(GRC_RX_CPU_EVENT);
+ val |= GRC_RX_CPU_DRIVER_EVENT;
+ tw32_f(GRC_RX_CPU_EVENT, val);
+}
+
static void tg3_link_report(struct tg3 *tp)
{
if (!netif_carrier_ok(tp->dev)) {
if (netif_msg_link(tp))
printk(KERN_INFO PFX "%s: Link is down.\n",
tp->dev->name);
+ tg3_ump_link_report(tp);
} else if (netif_msg_link(tp)) {
printk(KERN_INFO PFX "%s: Link is up at %d Mbps, %s duplex.\n",
tp->dev->name,
@@ -1679,6 +1743,7 @@ static void tg3_link_report(struct tg3 *tp)
"on" : "off",
(tp->link_config.active_flowctrl & TG3_FLOW_CTRL_RX) ?
"on" : "off");
+ tg3_ump_link_report(tp);
}
}

@@ -5500,19 +5565,17 @@ static void tg3_stop_fw(struct tg3 *tp)
if ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) &&
!(tp->tg3_flags3 & TG3_FLG3_ENABLE_APE)) {
u32 val;
- int i;
+
+ /* Wait for RX cpu to ACK the previous event. */
+ tg3_wait_for_event_ack(tp);

tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX, FWCMD_NICDRV_PAUSE_FW);
val = tr32(GRC_RX_CPU_EVENT);
- val |= (1 << 14);
+ val |= GRC_RX_CPU_DRIVER_EVENT;
tw32(GRC_RX_CPU_EVENT, val);

- /* Wait for RX cpu to ACK the event. */
- for (i = 0; i < 100; i++) {
- if (!(tr32(GRC_RX_CPU_EVENT) & (1 << 14)))
- break;
- udelay(1);
- }
+ /* Wait for RX cpu to ACK this event. */
+ tg3_wait_for_event_ack(tp);
}
}

@@ -7402,14 +7465,16 @@ static void tg3_timer(unsigned long __opaque)
if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) {
u32 val;

+ tg3_wait_for_event_ack(tp);
+
tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX,
FWCMD_NICDRV_ALIVE3);
tg3_write_mem(tp, NIC_SRAM_FW_CMD_LEN_MBOX, 4);
/* 5 seconds timeout */
tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX, 5);
val = tr32(GRC_RX_CPU_EVENT);
- val |= (1 << 14);
- tw32(GRC_RX_CPU_EVENT, val);
+ val |= GRC_RX_CPU_DRIVER_EVENT;
+ tw32_f(GRC_RX_CPU_EVENT, val);
}
tp->asf_counter = tp->asf_multiplier;
}
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index bf387ff..0404f93 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -1429,6 +1429,7 @@
#define GRC_LCLCTRL_AUTO_SEEPROM 0x01000000
#define GRC_TIMER 0x0000680c
#define GRC_RX_CPU_EVENT 0x00006810
+#define GRC_RX_CPU_DRIVER_EVENT 0x00004000
#define GRC_RX_TIMER_REF 0x00006814
#define GRC_RX_CPU_SEM 0x00006818
#define GRC_REMOTE_RX_CPU_ATTN 0x0000681c
@@ -1676,6 +1677,7 @@
#define FWCMD_NICDRV_IPV6ADDR_CHG 0x00000004
#define FWCMD_NICDRV_FIX_DMAR 0x00000005
#define FWCMD_NICDRV_FIX_DMAW 0x00000006
+#define FWCMD_NICDRV_LINK_UPDATE 0x0000000c
#define FWCMD_NICDRV_ALIVE2 0x0000000d
#define FWCMD_NICDRV_ALIVE3 0x0000000e
#define NIC_SRAM_FW_CMD_LEN_MBOX 0x00000b7c

--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab


2008-08-20 15:07:33

by Michael Chan

[permalink] [raw]
Subject: Re: Major stalls due to commit 7c5026aa9b81dd45df8d3f4e0be73e485976a8b6

Mel Gorman wrote:

> I had the pleasure of getting hold of one of these machines
> http://www.terrasoftsolutions.com/products/powerstation/ and
> after some
> tinkering installed 2.6.27-rc3. However, it was way slower and jerkier
> (keyboard input for example would stop processing for up to 2 seconds)
> than the distribution kernel. Network is a bit useless and
> kernel builds
> went from about 4 minutes to about 12 even when built
> locally. I also noted
> with NO_HZ enabled that a soft-lockup would be reported in a
> timer related
> to tg3. The card in question is
>

This has been fixed a few days ago by the patch:

tg3: Fix firmware event timeouts

in the net-2.6 git tree. Please try that to see if it fixes
the problem. Thanks.

2008-08-20 15:57:29

by Mel Gorman

[permalink] [raw]
Subject: Re: Major stalls due to commit 7c5026aa9b81dd45df8d3f4e0be73e485976a8b6

On (20/08/08 08:06), Michael Chan didst pronounce:
> Mel Gorman wrote:
>
> > I had the pleasure of getting hold of one of these machines
> > http://www.terrasoftsolutions.com/products/powerstation/ and
> > after some
> > tinkering installed 2.6.27-rc3. However, it was way slower and jerkier
> > (keyboard input for example would stop processing for up to 2 seconds)
> > than the distribution kernel. Network is a bit useless and
> > kernel builds
> > went from about 4 minutes to about 12 even when built
> > locally. I also noted
> > with NO_HZ enabled that a soft-lockup would be reported in a
> > timer related
> > to tg3. The card in question is
> >
>
> This has been fixed a few days ago by the patch:
>
> tg3: Fix firmware event timeouts
>
> in the net-2.6 git tree. Please try that to see if it fixes
> the problem. Thanks.
>

I've pasted that patch below for the convenience of anyone watching. It
doesn't apply cleanly to 2.6.27-rc3 but the merge is obvious. It does fix
the problem and as this is affects 2.6.26, you should consider pushing it
to stable if you are not doing so already. Thanks


>From 4ba526ced990f4d61ee8d65fe8a6f0745e8e455c Mon Sep 17 00:00:00 2001
From: Matt Carlson <[email protected]>
Date: Fri, 15 Aug 2008 14:10:04 -0700
Subject: [PATCH] tg3: Fix firmware event timeouts

The git commit 7c5026aa9b81dd45df8d3f4e0be73e485976a8b6 ("tg3: Add
link state reporting to UMP firmware") introduced code that waits for
previous firmware events to be serviced before attempting to submit a
new event. Unfortunately that patch contained a bug that cause the
driver to wait 2.5 seconds, rather than 2.5 milliseconds as intended.
This patch fixes that bug.

This bug revealed that not all firmware versions service driver events
though. Since we do not know which versions of the firmware do and don't
service these events, the driver needs some way to minimize the effects
of the delay. This patch solves the problem by recording a jiffies
timestamp when it submits an event to the hardware. If the jiffies
counter shows that 2.5 milliseconds have already passed, a wait is not
needed and the driver can proceed to submit a new event.

Signed-off-by: Matt Carlson <[email protected]>
Signed-off-by: Michael Chan <[email protected]>
Signed-off-by: David S. Miller <[email protected]>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index e952b91..c26011e 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -1020,15 +1020,43 @@ static void tg3_mdio_fini(struct tg3 *tp)
}

/* tp->lock is held. */
+static inline void tg3_generate_fw_event(struct tg3 *tp)
+{
+ u32 val;
+
+ val = tr32(GRC_RX_CPU_EVENT);
+ val |= GRC_RX_CPU_DRIVER_EVENT;
+ tw32_f(GRC_RX_CPU_EVENT, val);
+
+ tp->last_event_jiffies = jiffies;
+}
+
+#define TG3_FW_EVENT_TIMEOUT_USEC 2500
+
+/* tp->lock is held. */
static void tg3_wait_for_event_ack(struct tg3 *tp)
{
int i;
+ unsigned int delay_cnt;
+ long time_remain;
+
+ /* If enough time has passed, no wait is necessary. */
+ time_remain = (long)(tp->last_event_jiffies + 1 +
+ usecs_to_jiffies(TG3_FW_EVENT_TIMEOUT_USEC)) -
+ (long)jiffies;
+ if (time_remain < 0)
+ return;

- /* Wait for up to 2.5 milliseconds */
- for (i = 0; i < 250000; i++) {
+ /* Check if we can shorten the wait time. */
+ delay_cnt = jiffies_to_usecs(time_remain);
+ if (delay_cnt > TG3_FW_EVENT_TIMEOUT_USEC)
+ delay_cnt = TG3_FW_EVENT_TIMEOUT_USEC;
+ delay_cnt = (delay_cnt >> 3) + 1;
+
+ for (i = 0; i < delay_cnt; i++) {
if (!(tr32(GRC_RX_CPU_EVENT) & GRC_RX_CPU_DRIVER_EVENT))
break;
- udelay(10);
+ udelay(8);
}
}

@@ -1077,9 +1105,7 @@ static void tg3_ump_link_report(struct tg3 *tp)
val = 0;
tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX + 12, val);

- val = tr32(GRC_RX_CPU_EVENT);
- val |= GRC_RX_CPU_DRIVER_EVENT;
- tw32_f(GRC_RX_CPU_EVENT, val);
+ tg3_generate_fw_event(tp);
}

static void tg3_link_report(struct tg3 *tp)
@@ -5953,6 +5979,7 @@ static int tg3_chip_reset(struct tg3 *tp)
tg3_read_mem(tp, NIC_SRAM_DATA_CFG, &nic_cfg);
if (nic_cfg & NIC_SRAM_DATA_CFG_ASF_ENABLE) {
tp->tg3_flags |= TG3_FLAG_ENABLE_ASF;
+ tp->last_event_jiffies = jiffies;
if (tp->tg3_flags2 & TG3_FLG2_5750_PLUS)
tp->tg3_flags2 |= TG3_FLG2_ASF_NEW_HANDSHAKE;
}
@@ -5966,15 +5993,12 @@ static void tg3_stop_fw(struct tg3 *tp)
{
if ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) &&
!(tp->tg3_flags3 & TG3_FLG3_ENABLE_APE)) {
- u32 val;
-
/* Wait for RX cpu to ACK the previous event. */
tg3_wait_for_event_ack(tp);

tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX, FWCMD_NICDRV_PAUSE_FW);
- val = tr32(GRC_RX_CPU_EVENT);
- val |= GRC_RX_CPU_DRIVER_EVENT;
- tw32(GRC_RX_CPU_EVENT, val);
+
+ tg3_generate_fw_event(tp);

/* Wait for RX cpu to ACK this event. */
tg3_wait_for_event_ack(tp);
@@ -7864,8 +7888,6 @@ static void tg3_timer(unsigned long __opaque)
if (!--tp->asf_counter) {
if ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) &&
!(tp->tg3_flags3 & TG3_FLG3_ENABLE_APE)) {
- u32 val;
-
tg3_wait_for_event_ack(tp);

tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX,
@@ -7873,9 +7895,8 @@ static void tg3_timer(unsigned long __opaque)
tg3_write_mem(tp, NIC_SRAM_FW_CMD_LEN_MBOX, 4);
/* 5 seconds timeout */
tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX, 5);
- val = tr32(GRC_RX_CPU_EVENT);
- val |= GRC_RX_CPU_DRIVER_EVENT;
- tw32_f(GRC_RX_CPU_EVENT, val);
+
+ tg3_generate_fw_event(tp);
}
tp->asf_counter = tp->asf_multiplier;
}
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 3772349..f5b8cab 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2432,7 +2432,10 @@ struct tg3 {
struct tg3_ethtool_stats estats;
struct tg3_ethtool_stats estats_prev;

+ union {
unsigned long phy_crc_errors;
+ unsigned long last_event_jiffies;
+ };

u32 rx_offset;
u32 tg3_flags;
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab