2004-09-29 22:46:30

by white phoenix

[permalink] [raw]
Subject: nforce2 bugs?

I'm wondering if any of the older nforce2 chipset bugs have been fixed
in recent kernels. i've seen a number of nforce2 kernel patches
floating around. can someone tell me what some of these do? lol.

---------------------------nforce2-disconnect-quirk.patch:------------------------------------------------------------

[x86] fix lockups with APIC support on nForce2

Add PCI quirk to disable Halt Disconnect and Stop Grant Disconnect
(based on athcool program by Osamu Kayasono).

Spotted by Prakash K. Cheemplavam <[email protected]>
and Mathieu <[email protected]>.

arch/i386/pci/fixup.c | 17 +++++++++++++++++
1 files changed, 17 insertions(+)

diff -puN arch/i386/pci/fixup.c~nforce2-disconnect-quirk arch/i386/pci/fixup.c
--- linux-2.6.0-test11/arch/i386/pci/fixup.c~nforce2-disconnect-quirk 2003-12-08
00:09:56.480294672 +0100
+++ linux-2.6.0-test11-root/arch/i386/pci/fixup.c 2003-12-08
00:09:56.484294064 +0100
@@ -187,6 +187,22 @@ static void __devinit pci_fixup_transpar
dev->transparent = 1;
}

+/*
+ * Halt Disconnect and Stop Grant Disconnect (bit 4 at offset 0x6F)
+ * must be disabled when APIC is used (or lockups will happen).
+ */
+static void __devinit pci_fixup_nforce2_disconnect(struct pci_dev *d)
+{
+ u8 t;
+
+ pci_read_config_byte(d, 0x6F, &t);
+ if (t & 0x10) {
+ printk(KERN_INFO "PCI: disabling nForce2 Halt Disconnect"
+ " and Stop Grant Disconnect\n");
+ pci_write_config_byte(d, 0x6F, (t & 0xef));
+ }
+}
+
struct pci_fixup pcibios_fixups[] = {
{ PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx
},
{ PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx
},
@@ -205,5 +221,6 @@ struct pci_fixup pcibios_fixups[] = {
{ PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug
},
{ PCI_FIXUP_HEADER, PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810
},
{ PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge
},
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci_fixup_nforce2_disconnect
},
{ 0 }
};

_

----------------------------------nforce2-idleC1halt-rd-2.6.5.patch---------------------------------------------------

--- linux-2.6.5/arch/i386/kernel/process.c.orig 2004-04-04
13:36:10.000000000 +1000
+++ linux-2.6.5/arch/i386/kernel/process.c 2004-04-15 20:41:13.000000000 +1000
@@ -47,10 +47,13 @@
#include <asm/irq.h>
#include <asm/desc.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif
+#if defined(CONFIG_X86_UP_APIC)
+#include <asm/apic.h>
+#endif

#include <linux/irq.h>
#include <linux/err.h>

asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -98,10 +101,34 @@ void default_idle(void)
local_irq_enable();
}
}

/*
+ * We use this to avoid nforce2 lockups
+ * Reduces frequency of C1 disconnects
+ */
+static void c1halt_idle(void)
+{
+ if (!hlt_counter && current_cpu_data.hlt_works_ok) {
+ local_irq_disable();
+#if defined(CONFIG_X86_UP_APIC)
+ /* only hlt disconnect if more than 1.6% of apic interval remains */
+ extern int enable_local_apic;
+ if(!need_resched() && (enable_local_apic < 0 ||
+ (apic_read(APIC_TMCCT) > (apic_read(APIC_TMICT)>>6)))) {
+#else
+ /* just adds a little delay to assist in back to back disconnects */
+ if(!need_resched()) {
+#endif
+ ndelay(600); /* helps nforce2 but adds 0.6us hard int latency */
+ safe_halt(); /* nothing better to do until we wake up */
+ } else {
+ local_irq_enable();
+ }
+ }
+}
+/*
* On SMP it's slightly faster (but much more power-consuming!)
* to poll the ->work.need_resched flag instead of waiting for the
* cross-CPU IPI to arrive. Use this option with caution.
*/
static void poll_idle (void)
@@ -135,20 +162,18 @@ static void poll_idle (void)
* The idle thread. There's no useful work to be
* done, so just try to conserve power and have a
* low exit latency (ie sit in a loop waiting for
* somebody to say that they'd like to reschedule)
*/
+static void (*idle)(void);
void cpu_idle (void)
{
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
- void (*idle)(void) = pm_idle;
-
if (!idle)
- idle = default_idle;
-
+ idle = pm_idle ? pm_idle : default_idle;
irq_stat[smp_processor_id()].idle_timestamp = jiffies;
idle();
}
schedule();
}
@@ -199,16 +224,18 @@ void __init select_idle_routine(const st

static int __init idle_setup (char *str)
{
if (!strncmp(str, "poll", 4)) {
printk("using polling idle threads.\n");
- pm_idle = poll_idle;
+ idle = poll_idle;
} else if (!strncmp(str, "halt", 4)) {
printk("using halt in idle threads.\n");
- pm_idle = default_idle;
+ idle = default_idle;
+ } else if (!strncmp(str, "C1halt", 6)) {
+ printk("using C1 halt disconnect friendly idle threads.\n");
+ idle = c1halt_idle;
}
-
return 1;
}

__setup("idle=", idle_setup);

--------------------------------------------nforce2-ioapic-rd-2.6.5.patch-----------------------------------------------

--- linux-2.6.5/arch/i386/kernel/io_apic.c.orig 2004-04-16
00:20:54.000000000 +1000
+++ linux-2.6.5/arch/i386/kernel/io_apic.c 2004-04-15 20:24:18.000000000 +1000
@@ -2179,10 +2179,13 @@ static inline void check_timer(void)

if (pin1 != -1) {
/*
* Ok, does IRQ0 through the IOAPIC work?
*/
+ extern int acpi_skip_timer_override;
+ if(acpi_skip_timer_override)
+ timer_ack=0;
unmask_IO_APIC_irq(0);
if (timer_irq_works()) {
if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0);
setup_nmi();

--------------------------------------------nforce-apic-tack.patch--------------------------------------------------------

--- linux-2.6.0/arch/i386/kernel/apic.c 2003-12-18 12:59:58.000000000 +1000
+++ linux-2.6.0-rd/arch/i386/kernel/apic.c 2003-12-21 12:39:28.000000000 +1000
@@ -1070,10 +1070,17 @@ inline void smp_local_timer_interrupt(st
* we can take more than 100K local irqs per second on a 100 MHz P5.
*/
}

/*
+ * Athlon nforce2 R.D.
+ * preset timer ack mode if desired
+ * e.g. static int apic_timerack = 2;
+*/
+static int apic_timerack;
+
+/*
* Local APIC timer interrupt. This is the most natural way for doing
* local interrupts, but local timer interrupts can be emulated by
* broadcast interrupts too. [in case the hw doesn't support APIC timers]
*
* [ if a single-CPU system runs an SMP kernel then we call the local
@@ -1088,10 +1095,54 @@ void smp_apic_timer_interrupt(struct pt_
* the NMI deadlock-detector uses this.
*/
irq_stat[cpu].apic_timer_irqs++;

/*
+ * Athlon nforce2 timer ack delay. Ross Dickson.
+ * works around issue of hard lockups in code location
+ * where linux exposes underlying system timing fault?
+ * hopefully manufacturers will fix it soon.
+ * We leave C1 disconnect bit alone as bios/SMM wants?
+ */
+ if(apic_timerack) {
+ if(apic_timerack==1) {
+ /* v1 timer ack delay, inline delay version
+ * on AMDXP & nforce2 chipset we use at least 500ns
+ * try to scale delay time with cpu speed.
+ * safe all cpu cores?
+ */
+ ndelay((cpu_khz >> 12)+200); /* don't ack too soon or hard lockup */
+ } else {
+ static unsigned int passno, safecnt;
+ /* v2 timer ack delay, timeout version, more efficient
+ * on AMDXP & nforce2 chipset we need 800ns?
+ * from timer irq start to apic irq ack, read apic timer,
+ * may be unsafe for thoroughbred cores?
+ */
+ if(!passno) { /* calculate timing */
+ safecnt = apic_read(APIC_TMICT) -
+ ( (800UL * apic_read(APIC_TMICT) ) /
+ (1000000000UL/HZ) );
+ printk("..APIC TIMER ack delay, reload:%lu, safe:%u\n",
+ apic_read(APIC_TMICT), safecnt);
+ passno++;
+ }
+#if APIC_DEBUG
+ if(passno<12) {
+ unsigned int at1 = apic_read(APIC_TMCCT);
+ if( passno > 1 )
+ Dprintk("..APIC TIMER ack delay, predelay count:%u \n", at1 );
+ passno++;
+ }
+# endif
+ /* delay only if required */
+ while( apic_read(APIC_TMCCT) > safecnt )
+ ndelay(100);
+ }
+ }
+
+ /*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow.
*/
ack_APIC_irq();
/*
@@ -1157,10 +1208,28 @@ asmlinkage void smp_error_interrupt(void
smp_processor_id(), v , v1);
irq_exit();
}

/*
+* Athlon nforce2 timer ack delay. R.D.
+* kernel arg apic_tack=[012]
+* 0 off, 1 always delay, 2 timeout
+*/
+static int __init setup_apic_timerack(char *str)
+{
+ int tack;
+
+ get_option(&str, &tack);
+
+ if ( tack < 0 || tack > 2 )
+ return 0;
+ apic_timerack = tack;
+ return 1;
+}
+__setup("apic_tack=", setup_apic_timerack);
+
+/*
* This initializes the IO-APIC and APIC hardware if this is
* a UP kernel.
*/
int __init APIC_init_uniprocessor (void)
{
--- CUT HERE ---

io-apic edge:
--- CUT HERE ---
--- linux-2.6.0/arch/i386/kernel/io_apic.c 2003-12-18 12:58:39.000000000 +1000
+++ linux-2.6.0-rd/arch/i386/kernel/io_apic.c 2003-12-20
21:41:52.000000000 +1000
@@ -2123,12 +2123,56 @@ static inline void check_timer(void)
check_nmi_watchdog();
}
return;
}
clear_IO_APIC_pin(0, pin1);
- printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+ printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC
INTIN%d\n",pin1);
+ }
+
+#if defined(CONFIG_ACPI_BOOT) && defined(CONFIG_X86_UP_IOAPIC)
+ /* for nforce2 try vector 0 on pin0
+ * Note 8259a is already masked, also by default
+ * the io_apic_set_pci_routing call disables the 8259 irq 0
+ * so we must be connected directly to the 8254 timer if this works
+ * Note2: this violates the above comment re Subtle but works!
+ */
+ printk(KERN_INFO "..TIMER: Is timer irq0 connected to IO-APIC INTIN0? ...\n");
+ if (pin1 != -1) {
+ extern spinlock_t i8259A_lock;
+ unsigned long flags;
+ int tok, saved_timer_ack = timer_ack;
+ /*
+ * Ok, does IRQ0 through the IOAPIC work?
+ */
+ io_apic_set_pci_routing ( 0, 0, 0, 0, 0); /* connect pin */
+ unmask_IO_APIC_irq(0);
+ timer_ack = 0;
+
+ /*
+ * Ok, does IRQ0 through the IOAPIC work?
+ */
+ spin_lock_irqsave(&i8259A_lock, flags);
+ Dprintk("..TIMER 8259A ints disabled?, imr1:%02x, imr2:%02x\n",
inb(0x21), inb(0xA1));
+ tok = timer_irq_works();
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+ if (tok) {
+ if (nmi_watchdog == NMI_IO_APIC) {
+ disable_8259A_irq(0);
+ setup_nmi();
+ enable_8259A_irq(0);
+ check_nmi_watchdog();
+ }
+ printk(KERN_INFO "..TIMER: works OK on IO-APIC INTIN0 irq0\n" );
+ return;
+ }
+ /* failed */
+ timer_ack = saved_timer_ack;
+ clear_IO_APIC_pin(0, 0);
+ io_apic_set_pci_routing ( 0, pin1, 0, 0, 0);
+ printk(KERN_ERR "..MP-BIOS: 8254 timer not connected to IO-APIC INTIN0\n");
}
+#endif

printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
if (pin2 != -1) {
printk("\n..... (found pin %d) ...", pin2);
/*

-----------------------------------more-nforce-fixes.patch----------------------------------------------------------------

===== Documentation/kernel-parameters.txt 1.44 vs edited =====
--- 1.44/Documentation/kernel-parameters.txt Mon Mar 22 16:03:22 2004
+++ edited/Documentation/kernel-parameters.txt Wed Apr 21 15:28:12 2004
@@ -122,6 +122,10 @@

acpi_serialize [HW,ACPI] force serialization of AML methods

+ acpi_skip_timer_override [HW,ACPI]
+ Recognize and ignore IRQ0/pin2 Interrupt Override.
+ For broken nForce2 BIOS resulting in XT-PIC timer.
+
ad1816= [HW,OSS]
Format: <io>,<irq>,<dma>,<dma2>
See also Documentation/sound/oss/AD1816.
===== arch/i386/kernel/dmi_scan.c 1.57 vs edited =====
--- 1.57/arch/i386/kernel/dmi_scan.c Fri Apr 16 22:03:06 2004
+++ edited/arch/i386/kernel/dmi_scan.c Wed Apr 21 18:29:35 2004
@@ -540,6 +540,19 @@
#endif

/*
+ * early nForce2 reference BIOS shipped with a
+ * bogus ACPI IRQ0 -> pin2 interrupt override -- ignore it
+ */
+static __init int ignore_timer_override(struct dmi_blacklist *d)
+{
+ extern int acpi_skip_timer_override;
+ printk(KERN_NOTICE "%s detected: BIOS IRQ0 pin2 override"
+ " will be ignored\n", d->ident);
+
+ acpi_skip_timer_override = 1;
+ return 0;
+}
+/*
* Process the DMI blacklists
*/

@@ -944,6 +957,37 @@
MATCH(DMI_BOARD_VENDOR, "IBM"),
MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
NO_MATCH, NO_MATCH }},
+
+/*
+ * Systems with nForce2 BIOS timer override bug
+ * add Albatron KM18G Pro
+ * add DFI NFII 400-AL
+ * add Epox 8RGA+
+ * add Shuttle AN35N
+ */
+ { ignore_timer_override, "Abit NF7-S v2", {
+ MATCH(DMI_BOARD_VENDOR, "http://www.abit.com.tw/"),
+ MATCH(DMI_BOARD_NAME, "NF7-S/NF7,NF7-V (nVidia-nForce2)"),
+ MATCH(DMI_BIOS_VERSION, "6.00 PG"),
+ MATCH(DMI_BIOS_DATE, "03/24/2004") }},
+
+ { ignore_timer_override, "Asus A7N8X v2", {
+ MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ MATCH(DMI_BOARD_NAME, "A7N8X2.0"),
+ MATCH(DMI_BIOS_VERSION, "ASUS A7N8X2.0 Deluxe ACPI BIOS Rev 1007"),
+ MATCH(DMI_BIOS_DATE, "10/06/2003") }},
+
+ { ignore_timer_override, "Asus A7N8X-X", {
+ MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ MATCH(DMI_BOARD_NAME, "A7N8X-X"),
+ MATCH(DMI_BIOS_VERSION, "ASUS A7N8X-X ACPI BIOS Rev 1007"),
+ MATCH(DMI_BIOS_DATE, "10/07/2003") }},
+
+ { ignore_timer_override, "Shuttle SN41G2", {
+ MATCH(DMI_BOARD_VENDOR, "Shuttle Inc"),
+ MATCH(DMI_BOARD_NAME, "FN41"),
+ MATCH(DMI_BIOS_VERSION, "6.00 PG"),
+ MATCH(DMI_BIOS_DATE, "01/14/2004") }},
#endif // CONFIG_ACPI_BOOT

#ifdef CONFIG_ACPI_PCI
===== arch/i386/kernel/setup.c 1.115 vs edited =====
--- 1.115/arch/i386/kernel/setup.c Fri Apr 2 07:21:43 2004
+++ edited/arch/i386/kernel/setup.c Wed Apr 21 15:28:12 2004
@@ -614,6 +614,9 @@
else if (!memcmp(from, "acpi_sci=low", 12))
acpi_sci_flags.polarity = 3;

+ else if (!memcmp(from, "acpi_skip_timer_override", 24))
+ acpi_skip_timer_override = 1;
+
#ifdef CONFIG_X86_LOCAL_APIC
/* disable IO-APIC */
else if (!memcmp(from, "noapic", 6))
===== arch/i386/kernel/acpi/boot.c 1.58 vs edited =====
--- 1.58/arch/i386/kernel/acpi/boot.c Tue Apr 20 20:54:03 2004
+++ edited/arch/i386/kernel/acpi/boot.c Wed Apr 21 15:28:13 2004
@@ -62,6 +62,7 @@

acpi_interrupt_flags acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata;
+int acpi_skip_timer_override __initdata;

#ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -327,6 +328,12 @@
acpi_sci_ioapic_setup(intsrc->global_irq,
intsrc->flags.polarity, intsrc->flags.trigger);
return 0;
+ }
+
+ if (acpi_skip_timer_override &&
+ intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
+ printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+ return 0;
}

mp_override_legacy_irq (
===== include/asm-i386/acpi.h 1.18 vs edited =====
--- 1.18/include/asm-i386/acpi.h Tue Mar 30 17:05:19 2004
+++ edited/include/asm-i386/acpi.h Wed Apr 21 15:28:14 2004
@@ -118,6 +118,7 @@
#ifdef CONFIG_X86_IO_APIC
extern int skip_ioapic_setup;
extern int acpi_irq_to_vector(u32 irq); /* deprecated in favor of
acpi_gsi_to_irq */
+extern int acpi_skip_timer_override;

static inline void disable_ioapic_setup(void)
{


2004-09-29 23:21:46

by Alan

[permalink] [raw]
Subject: Re: nforce2 bugs?

On Mer, 2004-09-29 at 23:42, white phoenix wrote:
> [x86] fix lockups with APIC support on nForce2

Looks reasonable (anyone from Nvidia care to ack any of these)

> Add PCI quirk to disable Halt Disconnect and Stop Grant Disconnect
> (based on athcool program by Osamu Kayasono).

Is this always safe - if not why does the BIOS not do it.


APIC one I don't have enough background on

2004-09-30 00:32:21

by white phoenix

[permalink] [raw]
Subject: Re: nforce2 bugs?

would be nice to have some of these in the mainstream kernel, if they
are legit bugs. i see one of those patches fixes the timer. nforce2
timer isn't connected to pin0 so it falls back to XT-PIC unless i add
"acpi_skip_timer_override" to the kernel perameters.
2.6.8.1 oops's on me very rarely with something about irq not syncing,
which may be related to some of these nforce2 quirks. 2.6.9 is just a
mess, always freezes on me.

2004-09-30 09:25:13

by Prakash K. Cheemplavam

[permalink] [raw]
Subject: Re: nforce2 bugs?

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Alan Cox schrieb:
| On Mer, 2004-09-29 at 23:42, white phoenix wrote:
|
|>[x86] fix lockups with APIC support on nForce2
|
|
| Looks reasonable (anyone from Nvidia care to ack any of these)

As far as I could see, none of the posted patches are needed, or rather
the correct one(s) is already included in the kernel. The older ones
were workarounds, not needed anymore, thus obsolete.

The only problem is the apic timer thing. It just gets activated if the
correct BIOS Version is found (see the dmi scan thingie). So I just pass
acpi_skip_timer_override to the kernel to be sure.

|>Add PCI quirk to disable Halt Disconnect and Stop Grant Disconnect
|>(based on athcool program by Osamu Kayasono).
|
|
| Is this always safe - if not why does the BIOS not do it.

It is safe but makes your CPU hotter. Thus the real fix just changes the
disconnect intervall (or alike). Look into arch/i386/pci/fixup.h and
search for nforce2.

Prakash

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.6 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFBW9EjxU2n/+9+t5gRAhKPAKDHnBuJs9bN4ZeQwCa9r4hu3woTcgCfbWmB
4yz7q8RBHXeodlkrpwYUH8w=
=Ipiu
-----END PGP SIGNATURE-----

2004-09-30 14:24:46

by Maciej W. Rozycki

[permalink] [raw]
Subject: Re: nforce2 bugs?

On Thu, 30 Sep 2004, Prakash K. Cheemplavam wrote:

> The only problem is the apic timer thing. It just gets activated if the
> correct BIOS Version is found (see the dmi scan thingie). So I just pass
> acpi_skip_timer_override to the kernel to be sure.

There appears to be another timer problem, too -- at least for some
boards the system timer (the 8254 PIT) has a noisy output. When routed to
an I/O APIC input it makes the system time go fast enough the NTP daemon
isn't able to compensate (it's a few minutes per day fast). The problem
goes away when routing it to the 8259A PIC, presumably because the 8259A
inputs are not "sticky" in the edge-triggered mode -- at the worst you
only get spurious interrupts reported in /proc/interrupts in the "ERR"
counter.

An nVidia feedback would be appreciated. Allen?

Maciej

2004-09-30 15:43:48

by Andy Currid

[permalink] [raw]
Subject: RE: nforce2 bugs?


I'm taking a look at the patches discussed in other recent emails on the
list, but I'm curious about the timer issue that Maciej notes here. In
systems running in IOAPIC mode where this problem has been observed, is
ACPI enabled?

I strongly suspect that it is. Some BIOSes on nForce systems contain an
incorrect INT override for the timer interrupt in their ACPI tables,
indicating that in IOAPIC mode the timer interrupts on IRQ2 rather than
IRQ0. The kernel honors the override, then notices the timer interrupt
isn't working and subsequently rescues the situation by configuring the
timer in ExtInt mode. That recovers the timer interrupt but I suspect
that configuration may be responsible for the "noisy" behavior (it's a
faulty configuration).

The workaround for the faulty override in IOAPIC/ACPI mode is to specify
acpi_skip_timer_override as a boot parameter.

If anyone has a system that is exhibiting the noisy behavior, I'd be
interested to hear if this workaround addresses the problem. I haven't
seen this specific behavior on my own nForce2 systems.

Regards

Andy
--
Andy Currid, NVIDIA Corporation
[email protected] 408 566 6743


> -----Original Message-----
> From: [email protected]
> [mailto:[email protected]] On Behalf Of
> Maciej W. Rozycki
> Sent: Thursday, September 30, 2004 07:24
> To: Prakash K. Cheemplavam; Allen Martin
> Cc: Alan Cox; white phoenix; Linux Kernel Mailing List
> Subject: Re: nforce2 bugs?
>
>
> On Thu, 30 Sep 2004, Prakash K. Cheemplavam wrote:
>
> > The only problem is the apic timer thing. It just gets
> activated if the
> > correct BIOS Version is found (see the dmi scan thingie).
> So I just pass
> > acpi_skip_timer_override to the kernel to be sure.
>
> There appears to be another timer problem, too -- at least for some
> boards the system timer (the 8254 PIT) has a noisy output.
> When routed to
> an I/O APIC input it makes the system time go fast enough the
> NTP daemon
> isn't able to compensate (it's a few minutes per day fast).
> The problem
> goes away when routing it to the 8259A PIC, presumably
> because the 8259A
> inputs are not "sticky" in the edge-triggered mode -- at the worst you
> only get spurious interrupts reported in /proc/interrupts in the "ERR"
> counter.
>
> An nVidia feedback would be appreciated. Allen?
>
> Maciej
> -
> To unsubscribe from this list: send the line "unsubscribe
> linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

2004-09-30 16:59:59

by Maciej W. Rozycki

[permalink] [raw]
Subject: RE: nforce2 bugs?

On Thu, 30 Sep 2004, Andy Currid wrote:

> I'm taking a look at the patches discussed in other recent emails on the
> list, but I'm curious about the timer issue that Maciej notes here. In
> systems running in IOAPIC mode where this problem has been observed, is
> ACPI enabled?

One I can test a bit has indeed ACPI enabled.

> I strongly suspect that it is. Some BIOSes on nForce systems contain an
> incorrect INT override for the timer interrupt in their ACPI tables,
> indicating that in IOAPIC mode the timer interrupts on IRQ2 rather than
> IRQ0. The kernel honors the override, then notices the timer interrupt
> isn't working and subsequently rescues the situation by configuring the
> timer in ExtInt mode. That recovers the timer interrupt but I suspect
> that configuration may be responsible for the "noisy" behavior (it's a
> faulty configuration).

The firmware (BIOS) reports I/O APIC interrupts correctly on this box --
there's no override for IRQ0. Timer interrupts work correctly in the
ExtInt mode. They only fail in the I/O APIC mode.

Older reports from the list show exactly the same problem, e.g.
"http://www.uwsg.iu.edu/hypermail/linux/kernel/0404.1/0739.html", which is
probably one of the earliest references to the clock skew problem with I/O
APIC routing. As I believe both the 8254 and 8259A and the I/O APIC are
internal to the chipset, I doubt that can be a problem specific to board
design; it may be a firmware fault, though, such as an initialization bug.
As Ross used to maintain temporary workarounds for nforce2 problems, he
may be able to comment on what reports he received. Ross?

Maciej

2004-10-01 03:40:46

by Ross Dickson

[permalink] [raw]
Subject: Re: nforce2 bugs?

On Friday 01 October 2004 02:59, Maciej W. Rozycki wrote:
> On Thu, 30 Sep 2004, Andy Currid wrote:
>
> > I'm taking a look at the patches discussed in other recent emails on the
> > list, but I'm curious about the timer issue that Maciej notes here. In
> > systems running in IOAPIC mode where this problem has been observed, is
> > ACPI enabled?
>
> One I can test a bit has indeed ACPI enabled.
>
> > I strongly suspect that it is. Some BIOSes on nForce systems contain an
> > incorrect INT override for the timer interrupt in their ACPI tables,
> > indicating that in IOAPIC mode the timer interrupts on IRQ2 rather than
> > IRQ0. The kernel honors the override, then notices the timer interrupt
> > isn't working and subsequently rescues the situation by configuring the
> > timer in ExtInt mode. That recovers the timer interrupt but I suspect
> > that configuration may be responsible for the "noisy" behavior (it's a
> > faulty configuration).

It is indeed the other way around, the clock skew occurs on some Mobos
with timer interrupts routed to the IntIn0 and to my knowledge is not evident
in the ExtInt routing mode.
The downside of using the ExtInt routing mode is that you can no longer use
nmi_watchdog=1 because it only works on nforce2 boards with IntIn0 timer routing.

>
> The firmware (BIOS) reports I/O APIC interrupts correctly on this box --
> there's no override for IRQ0. Timer interrupts work correctly in the
> ExtInt mode. They only fail in the I/O APIC mode.
>
> Older reports from the list show exactly the same problem, e.g.
> "http://www.uwsg.iu.edu/hypermail/linux/kernel/0404.1/0739.html", which is
> probably one of the earliest references to the clock skew problem with I/O
> APIC routing. As I believe both the 8254 and 8259A and the I/O APIC are
> internal to the chipset, I doubt that can be a problem specific to board
> design; it may be a firmware fault, though, such as an initialization bug.
> As Ross used to maintain temporary workarounds for nforce2 problems, he
> may be able to comment on what reports he received. Ross?

An earlier Thread on the Topic of the time skew with I/O Apic routing:
http://linux.derkeiler.com/Mailing-Lists/Kernel/2004-01/3129.html

A couple of Skewing Mobos Involved:
Abit NF7-S V2.0 motherboard.
A7N8X Deluxe mobo/Athlon

Maybe they are using the same revision of non GPU nforce2 silicon?
I personally never had any clock skew but I have only used Mobos with graphics
onboard, several Albatron KM18G and an Epox 8RGA+

-Ross



>
> Maciej
>
>
>

2004-10-01 11:17:40

by Jesse Stockall

[permalink] [raw]
Subject: Re: nforce2 bugs?


> An earlier Thread on the Topic of the time skew with I/O Apic routing:
> http://linux.derkeiler.com/Mailing-Lists/Kernel/2004-01/3129.html
>
> A couple of Skewing Mobos Involved:
> Abit NF7-S V2.0 motherboard.
> A7N8X Deluxe mobo/Athlon
>

I noticed that my MSI K7N2 Delta-ILSR was off by 15 minutes yesterday.

Jesse

--
Jesse Stockall <[email protected]>

2004-10-02 02:13:05

by Jesse Allen

[permalink] [raw]
Subject: Re: nforce2 bugs?

Alan Cox wrote:
> On Mer, 2004-09-29 at 23:42, white phoenix wrote:
> > [x86] fix lockups with APIC support on nForce2
>
> Looks reasonable (anyone from Nvidia care to ack any of these)
>
> > Add PCI quirk to disable Halt Disconnect and Stop Grant Disconnect
> > (based on athcool program by Osamu Kayasono).
>
> Is this always safe - if not why does the BIOS not do it.

An older Nvidia reference BIOS has a bug. Nvidia provided this information on
it:

http://marc.theaimsgroup.com/?l=linux-kernel&m=108362246902784&w=2

A more appropriate patch was merged. It was similar to this:

http://marc.theaimsgroup.com/?l=linux-kernel&m=108362608309197&w=2

So disconnect lockups should not happen anymore. He may be hitting a rarer
nforce2 bug.

For me, an issue with noise on the timer remains.

Ross Dickson wrote:
> A couple of Skewing Mobos Involved:
> Abit NF7-S V2.0 motherboard.
> A7N8X Deluxe mobo/Athlon

Shuttle AN35N Ultra V1.1

I have not tried the newest BIOS release. I have the one that fixes
disconnect timings.

> Maybe they are using the same revision of non GPU nforce2 silicon?
> I personally never had any clock skew but I have only used Mobos with
> graphics onboard, several Albatron KM18G and an Epox 8RGA+

Well, my motherboard has no integrated GPU as well.

Andy Currid wrote:
> In systems running in IOAPIC mode where this problem has been observed, is
> ACPI enabled?

I have ACPI enabled. I don't explicitly set acpi_skip_timer_override.

Jesse

bash-3.00$ cat /proc/interrupts
CPU0
0: 16741454 IO-APIC-edge timer
1: 9300 IO-APIC-edge i8042
7: 0 IO-APIC-edge parport0
8: 324750 IO-APIC-edge rtc
9: 0 IO-APIC-level acpi
14: 12284 IO-APIC-edge ide0
15: 11 IO-APIC-edge ide1
16: 92 IO-APIC-level aic7xxx
17: 6156 IO-APIC-level CMI8738
19: 290981 IO-APIC-level radeon@pci:0000:03:00.0
20: 2 IO-APIC-level ehci_hcd
21: 0 IO-APIC-level NVidia nForce2, ohci_hcd
22: 1925390 IO-APIC-level eth0, ohci_hcd
NMI: 0
LOC: 16698953
ERR: 0
MIS: 0

shortened dmesg follows:

DMI 2.2 present.
Shuttle AN35N detected: BIOS IRQ0 pin2 override will be ignored
ACPI: RSDP (v000 Nvidia ) @ 0x000f6f70
ACPI: RSDT (v001 Nvidia AWRDACPI 0x42302e31 AWRD 0x00000000) @ 0x0fff3000
ACPI: FADT (v001 Nvidia AWRDACPI 0x42302e31 AWRD 0x00000000) @ 0x0fff3040
ACPI: MADT (v001 Nvidia AWRDACPI 0x42302e31 AWRD 0x00000000) @ 0x0fff7880
ACPI: DSDT (v001 NVIDIA AWRDACPI 0x00001000 MSFT 0x0100000e) @ 0x00000000
ACPI: Local APIC address 0xfee00000
ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
Processor #0 6:10 APIC version 16
ACPI: LAPIC_NMI (acpi_id[0x00] high edge lint[0x1])
ACPI: IOAPIC (id[0x02] address[0xfec00000] gsi_base[0])
IOAPIC[0]: apic_id 2, version 17, address 0xfec00000, GSI 0-23
ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
ACPI: BIOS IRQ0 pin2 override ignored.
ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
ACPI: INT_SRC_OVR (bus 0 bus_irq 14 global_irq 14 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 15 global_irq 15 high edge)
ACPI: IRQ9 used by override.
ACPI: IRQ14 used by override.
ACPI: IRQ15 used by override.
Enabling APIC mode: Flat. Using 1 I/O APICs
Using ACPI (MADT) for SMP configuration information
Built 1 zonelists
Kernel command line: auto BOOT_IMAGE=Linux ro root=301

(cut)

ENABLING IO-APIC IRQs
..TIMER: vector=0x31 pin1=0 pin2=-1