Replace use of outb to "unused" diagnostic port 0x80 for time delay
with udelay based time delay on x86_64 architecture machines. Fix for
bugs 9511 and 6307 in bugzilla, plus bugs reported in
bugzilla.redhat.com.
Derived from suggestion (that didn't compile) by Pavel Machek, and
tested, also based on measurements of typical timings of out's
collated by Rene Herman from many in the community.
This patch fixes a number of bugs known to cause problems on HP
Pavilion dv9000z and dv6000z laptops - in the form of solid freezes
when hwclock is used to show or set the time. Also, it potentially
improves bus utilization on SMP machines, by using a waiting process
that doesn't tie up the ISA/LPC bus for 1 or 2 microseconds.
i386 family fixes (completely parallel) were not included, considering
that such machines might involve more risk of problems on legacy machines.
Signed-off-by: David P. Reed <[email protected]>
Index: linux-2.6/arch/x86/boot/compressed/misc_64.c
===================================================================
--- linux-2.6.orig/arch/x86/boot/compressed/misc_64.c
+++ linux-2.6/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-2.6/include/asm/io_64.h
===================================================================
--- linux-2.6.orig/include/asm/io_64.h
+++ linux-2.6/include/asm/io_64.h
@@ -1,6 +1,7 @@
#ifndef _ASM_IO_H
#define _ASM_IO_H
+#include <linux/delay.h>
/*
* This file contains the definitions for the x86 IO instructions
@@ -15,19 +16,7 @@
* mistake somewhere.
*/
-/*
- * Thanks to James van Artsdalen for a better timing-fix than
- * the two short jumps: using outb's to a nonexistent port seems
- * to guarantee better timings even on fast machines.
- *
- * On the other hand, I'd like to be sure of a non-existent port:
- * I feel a bit unsafe about using 0x80 (should be safe, though)
- *
- * Linus
- */
-
- /*
- * Bit simplified and optimized by Jan Hubicka
+/* Bit simplified and optimized by Jan Hubicka
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
*
* isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
@@ -35,36 +24,36 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
-
+/* the following delays are really conservative, at least for modern
machines */
#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
__SLOW_DOWN_IO __SLOW_DOWN_IO
+#define _IOPORT_PAUSE_DELAY 10
#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+#define _IOPORT_PAUSE_DELAY 2
#endif
/*
* Talk about misusing macros..
*/
-#define __OUT1(s,x) \
+#define __OUT1(s, x) \
static inline void out##s(unsigned x value, unsigned short port) {
-#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
-
-#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value),
"Nd" (port));} \
+#define __OUT2(s, s1, s2) \
+ __asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value),
"Nd" \
+(port));
+
+#define __OUT(s, s1, x) \
+__OUT1(s, x) __OUT2(s, s1, "w") } \
+ __OUT1(s##_p, x) __OUT2(s, s1, "w") udelay(_IOPORT_PAUSE_DELAY); } \
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
-#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+#define __IN2(s, s1, s2) \
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd"
(port));
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd"
(port) ,##i ); return _v; } \
+#define __IN(s, s1) \
+__IN1(s) __IN2(s, s1, "w") return _v; } \
+ __IN1(s##_p) __IN2(s, s1, "w") udelay(_IOPORT_PAUSE_DELAY); return _v; } \
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned
long count) \
On Dec 13, 2007 6:59 PM, David P. Reed <[email protected]> wrote:
> Replace use of outb to "unused" diagnostic port 0x80 for time delay
> with udelay based time delay on x86_64 architecture machines. Fix for
> bugs 9511 and 6307 in bugzilla, plus bugs reported in
> bugzilla.redhat.com.
>
> Derived from suggestion (that didn't compile) by Pavel Machek, and
> tested, also based on measurements of typical timings of out's
> collated by Rene Herman from many in the community.
>
> This patch fixes a number of bugs known to cause problems on HP
> Pavilion dv9000z and dv6000z laptops - in the form of solid freezes
> when hwclock is used to show or set the time. Also, it potentially
> improves bus utilization on SMP machines, by using a waiting process
> that doesn't tie up the ISA/LPC bus for 1 or 2 microseconds.
>
> i386 family fixes (completely parallel) were not included, considering
> that such machines might involve more risk of problems on legacy machines.
>
> Signed-off-by: David P. Reed <[email protected]>
>
> Index: linux-2.6/arch/x86/boot/compressed/misc_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/boot/compressed/misc_64.c
> +++ linux-2.6/arch/x86/boot/compressed/misc_64.c
> @@ -269,10 +269,10 @@ static void putstr(const char *s)
> RM_SCREEN_INFO.orig_y = y;
>
> pos = (x + cols * y) * 2; /* Update cursor position */
> - outb_p(14, vidport);
> - outb_p(0xff & (pos >> 9), vidport+1);
> - outb_p(15, vidport);
> - outb_p(0xff & (pos >> 1), vidport+1);
> + outb(14, vidport);
> + outb(0xff & (pos >> 9), vidport+1);
> + outb(15, vidport);
> + outb(0xff & (pos >> 1), vidport+1);
> }
>
> static void* memset(void* s, int c, unsigned n)
> Index: linux-2.6/include/asm/io_64.h
> ===================================================================
> --- linux-2.6.orig/include/asm/io_64.h
> +++ linux-2.6/include/asm/io_64.h
include/asm-x64/io_64.h ?
YH
From: Pavel Machek <[email protected]>
32-bit part of the port 0x80 delay replacement.
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index b74d60d..288e162 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index fe881cd..9abc215 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -3,6 +3,7 @@
#include <linux/string.h>
#include <linux/compiler.h>
+#include <linux/delay.h>
/*
* This file contains the definitions for the x86 IO instructions
@@ -17,17 +18,6 @@
* mistake somewhere.
*/
-/*
- * Thanks to James van Artsdalen for a better timing-fix than
- * the two short jumps: using outb's to a nonexistent port seems
- * to guarantee better timings even on fast machines.
- *
- * On the other hand, I'd like to be sure of a non-existent port:
- * I feel a bit unsafe about using 0x80 (should be safe, though)
- *
- * Linus
- */
-
/*
* Bit simplified and optimized by Jan Hubicka
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
@@ -252,7 +242,7 @@ static inline void flush_write_buffers(void)
static inline void native_io_delay(void)
{
- asm volatile("outb %%al,$0x80" : : : "memory");
+ udelay(2);
}
#if defined(CONFIG_PARAVIRT)
"David P. Reed" <[email protected]> writes:
>
> i386 family fixes (completely parallel) were not included, considering
> that such machines might involve more risk of problems on legacy machines.
They're needed because lots of people fomr some reason still boot 32bit kernels
on 64bit machines.
> +#define __OUT(s, s1, x) \
> +__OUT1(s, x) __OUT2(s, s1, "w") } \
> + __OUT1(s##_p, x) __OUT2(s, s1, "w") udelay(_IOPORT_PAUSE_DELAY); } \
With the additional call this should be completely out of line now to save
code size. Similar for the in variant.
-Andi
Andi Kleen wrote:
> "
> With the additional call this should be completely out of line now to save
> code size. Similar for the in variant.
>
>
>
Sure. Want me to make a new patch with the _p croutines out-of-line?
* David P. Reed <[email protected]> wrote:
> Replace use of outb to "unused" diagnostic port 0x80 for time delay
> with udelay based time delay on x86_64 architecture machines. Fix for
> bugs 9511 and 6307 in bugzilla, plus bugs reported in
> bugzilla.redhat.com.
>
> Derived from suggestion (that didn't compile) by Pavel Machek, and
> tested, also based on measurements of typical timings of out's
> collated by Rene Herman from many in the community.
>
> This patch fixes a number of bugs known to cause problems on HP
> Pavilion dv9000z and dv6000z laptops - in the form of solid freezes
> when hwclock is used to show or set the time. Also, it potentially
> improves bus utilization on SMP machines, by using a waiting process
> that doesn't tie up the ISA/LPC bus for 1 or 2 microseconds.
>
> i386 family fixes (completely parallel) were not included, considering
> that such machines might involve more risk of problems on legacy
> machines.
wow, cool fix! (I remember that there were other systems as well that
are affected by port 0x80 muckery - i thought we had removed port 0x80
accesses long ago.)
how about the simpler fix below, as a first-level approach? We can then
remove the _p in/out sequences after this.
this is also something for v2.6.24 merging.
Ingo
----------------------------->
Subject: x86: fix in/out_p delays
From: Ingo Molnar <[email protected]>
Debugged by David P. Reed <[email protected]>.
Do not use port 0x80, it can cause crashes, see:
http://bugzilla.kernel.org/show_bug.cgi?id=6307
http://bugzilla.kernel.org/show_bug.cgi?id=9511
instead of just removing _p postfixes en masse, lets just first
remove the 0x80 port usage, then remove any unnecessary _p io ops
gradually. It's more debuggable this way.
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/boot/compressed/misc_32.c | 8 ++++----
arch/x86/boot/compressed/misc_64.c | 8 ++++----
arch/x86/kernel/quirks.c | 9 +++++++++
include/asm-x86/io_32.h | 5 +----
include/asm-x86/io_64.h | 5 +----
5 files changed, 19 insertions(+), 16 deletions(-)
Index: linux-x86.q/arch/x86/boot/compressed/misc_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_32.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/boot/compressed/misc_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_64.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_64.c
@@ -275,10 +275,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/kernel/quirks.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/quirks.c
+++ linux-x86.q/arch/x86/kernel/quirks.c
@@ -6,6 +6,15 @@
#include <asm/hpet.h>
+/*
+ * Some legacy devices need delays for IN/OUT sequences. Most are
+ * probably not needed but it's the safest to just do this short delay:
+ */
+void native_io_delay(void)
+{
+ udelay(1);
+}
+
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
Index: linux-x86.q/include/asm-x86/io_32.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_32.h
+++ linux-x86.q/include/asm-x86/io_32.h
@@ -250,10 +250,7 @@ static inline void flush_write_buffers(v
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
Index: linux-x86.q/include/asm-x86/io_64.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_64.h
+++ linux-x86.q/include/asm-x86/io_64.h
@@ -35,10 +35,7 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
* Ingo Molnar <[email protected]> wrote:
> wow, cool fix! (I remember that there were other systems as well that
> are affected by port 0x80 muckery - i thought we had removed port 0x80
> accesses long ago.)
>
> how about the simpler fix below, as a first-level approach? We can
> then remove the _p in/out sequences after this.
>
> this is also something for v2.6.24 merging.
updated patch attached. (from the MakeItBuild'n'Stuff dept)
Ingo
-------------->
Subject: x86: fix in/out_p delays
From: Ingo Molnar <[email protected]>
Debugged by David P. Reed <[email protected]>.
Do not use port 0x80, it can cause crashes, see:
http://bugzilla.kernel.org/show_bug.cgi?id=6307
http://bugzilla.kernel.org/show_bug.cgi?id=9511
instead of just removing _p postfixes en masse, lets just first
remove the 0x80 port usage, then remove any unnecessary _p io ops
gradually. It's more debuggable this way.
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/boot/compressed/misc_32.c | 8 ++++----
arch/x86/boot/compressed/misc_64.c | 8 ++++----
arch/x86/kernel/quirks.c | 10 ++++++++++
include/asm-x86/io_32.h | 5 +----
include/asm-x86/io_64.h | 5 +----
5 files changed, 20 insertions(+), 16 deletions(-)
Index: linux-x86.q/arch/x86/boot/compressed/misc_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_32.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/boot/compressed/misc_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_64.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_64.c
@@ -275,10 +275,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/kernel/quirks.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/quirks.c
+++ linux-x86.q/arch/x86/kernel/quirks.c
@@ -3,9 +3,19 @@
*/
#include <linux/pci.h>
#include <linux/irq.h>
+#include <linux/delay.h>
#include <asm/hpet.h>
+/*
+ * Some legacy devices need delays for IN/OUT sequences. Most are
+ * probably not needed but it's the safest to just do this short delay:
+ */
+void native_io_delay(void)
+{
+ udelay(1);
+}
+
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
Index: linux-x86.q/include/asm-x86/io_32.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_32.h
+++ linux-x86.q/include/asm-x86/io_32.h
@@ -250,10 +250,7 @@ static inline void flush_write_buffers(v
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
Index: linux-x86.q/include/asm-x86/io_64.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_64.h
+++ linux-x86.q/include/asm-x86/io_64.h
@@ -35,10 +35,7 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
On 14-12-07 14:15, Ingo Molnar wrote:
> wow, cool fix! (I remember that there were other systems as well that
> are affected by port 0x80 muckery - i thought we had removed port 0x80
> accesses long ago.)
>
> how about the simpler fix below, as a first-level approach? We can then
> remove the _p in/out sequences after this.
Your version does the same thing that the version from Pavel/David does for
32-bit at least.
> +/*
> + * Some legacy devices need delays for IN/OUT sequences. Most are
> + * probably not needed but it's the safest to just do this short delay:
> + */
> +void native_io_delay(void)
> +{
> + udelay(1);
> +}
Also note the thread(s) on LKML where 2 us was decided to be a nicely
conservative value:
http://lkml.org/lkml/2007/12/12/309
Also see: http://lkml.org/lkml/2007/12/14/72
And also: http://lkml.org/lkml/2007/12/12/221
As such, please wait a bit for a fuller resolution. We're still discussing this.
Rene.
* Ingo Molnar <[email protected]> wrote:
> updated patch attached. (from the MakeItBuild'n'Stuff dept)
the one below is against current upstream. (previous ones were against
x86.git)
Ingo
------------------->
Subject: x86: fix in/out_p delays
From: Ingo Molnar <[email protected]>
Debugged by David P. Reed <[email protected]>.
Do not use port 0x80, it can cause crashes, see:
http://bugzilla.kernel.org/show_bug.cgi?id=6307
http://bugzilla.kernel.org/show_bug.cgi?id=9511
instead of just removing _p postfixes en masse, lets just first
remove the 0x80 port usage, then remove any unnecessary _p io ops
gradually. It's more debuggable this way.
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/boot/compressed/misc_32.c | 8 ++++----
arch/x86/boot/compressed/misc_64.c | 8 ++++----
arch/x86/kernel/quirks.c | 10 ++++++++++
include/asm-x86/io_32.h | 5 +----
include/asm-x86/io_64.h | 14 +++++---------
5 files changed, 24 insertions(+), 21 deletions(-)
Index: linux-x86.q/arch/x86/boot/compressed/misc_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_32.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/boot/compressed/misc_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_64.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/kernel/quirks.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/quirks.c
+++ linux-x86.q/arch/x86/kernel/quirks.c
@@ -3,9 +3,19 @@
*/
#include <linux/pci.h>
#include <linux/irq.h>
+#include <linux/delay.h>
#include <asm/hpet.h>
+/*
+ * Some legacy devices need delays for IN/OUT sequences. Most are
+ * probably not needed but it's the safest to just do this short delay:
+ */
+void native_io_delay(void)
+{
+ udelay(1);
+}
+
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
Index: linux-x86.q/include/asm-x86/io_32.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_32.h
+++ linux-x86.q/include/asm-x86/io_32.h
@@ -250,10 +250,7 @@ static inline void flush_write_buffers(v
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
Index: linux-x86.q/include/asm-x86/io_64.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_64.h
+++ linux-x86.q/include/asm-x86/io_64.h
@@ -35,13 +35,7 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
-
-#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
-#endif
+extern void native_io_delay(void);
/*
* Talk about misusing macros..
@@ -54,7 +48,8 @@ __asm__ __volatile__ ("out" #s " %" s1 "
#define __OUT(s,s1,x) \
__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); \
+native_io_delay(); } \
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
@@ -64,7 +59,8 @@ __asm__ __volatile__ ("in" #s " %" s2 "1
#define __IN(s,s1,i...) \
__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; \
+native_io_delay(); } \
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
* Rene Herman <[email protected]> wrote:
> On 14-12-07 14:15, Ingo Molnar wrote:
>
>> wow, cool fix! (I remember that there were other systems as well that are
>> affected by port 0x80 muckery - i thought we had removed port 0x80
>> accesses long ago.)
>>
>> how about the simpler fix below, as a first-level approach? We can
>> then remove the _p in/out sequences after this.
>
> Your version does the same thing that the version from Pavel/David
> does for 32-bit at least.
well, if you carefully look at the code it's not the "same" but a
similar but cleaner thing - it moves this quirk out of a common include
file. I take back the "simpler" characterisation - my patch indeed ended
up being almost the same as David's.
>> +/*
>> + * Some legacy devices need delays for IN/OUT sequences. Most are
>> + * probably not needed but it's the safest to just do this short delay:
>> + */
>> +void native_io_delay(void)
>> +{
>> + udelay(1);
>> +}
>
> Also note the thread(s) on LKML where 2 us was decided to be a nicely
> conservative value:
yep, i have updated the delay to 2 usecs. The latest patch is below, as
queued up in x86.git. (not yet queued up for .24 - it's pending testing
and more feedback, etc.)
Ingo
----------------->
Subject: x86: fix in_p/out_p crashes
From: David P. Reed <[email protected]>
Do not use port 0x80, it can cause crashes, see:
http://bugzilla.kernel.org/show_bug.cgi?id=6307
http://bugzilla.kernel.org/show_bug.cgi?id=9511
Replace use of outb to "unused" diagnostic port 0x80 for time delay
with udelay based time delay on x86_64 architecture machines. Fix for
bugs 9511 and 6307 in bugzilla, plus bugs reported in
bugzilla.redhat.com.
Derived from suggestion (that didn't compile) by Pavel Machek, and
tested, also based on measurements of typical timings of out's
collated by Rene Herman from many in the community.
This patch fixes a number of bugs known to cause problems on HP
Pavilion dv9000z and dv6000z laptops - in the form of solid freezes
when hwclock is used to show or set the time. Also, it potentially
improves bus utilization on SMP machines, by using a waiting process
that doesn't tie up the ISA/LPC bus for 1 or 2 microseconds.
[ [email protected]: minor restructuring, 32-bit support. ]
Signed-off-by: David P. Reed <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/boot/compressed/misc_32.c | 8 ++++----
arch/x86/boot/compressed/misc_64.c | 8 ++++----
arch/x86/kernel/quirks.c | 10 ++++++++++
include/asm-x86/io_32.h | 5 +----
include/asm-x86/io_64.h | 14 +++++---------
5 files changed, 24 insertions(+), 21 deletions(-)
Index: linux-x86.q/arch/x86/boot/compressed/misc_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_32.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/boot/compressed/misc_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_64.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/kernel/quirks.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/quirks.c
+++ linux-x86.q/arch/x86/kernel/quirks.c
@@ -3,9 +3,19 @@
*/
#include <linux/pci.h>
#include <linux/irq.h>
+#include <linux/delay.h>
#include <asm/hpet.h>
+/*
+ * Some legacy devices need delays for IN/OUT sequences. Most are
+ * probably not needed but it's the safest to just do this short delay:
+ */
+void native_io_delay(void)
+{
+ udelay(2);
+}
+
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
Index: linux-x86.q/include/asm-x86/io_32.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_32.h
+++ linux-x86.q/include/asm-x86/io_32.h
@@ -250,10 +250,7 @@ static inline void flush_write_buffers(v
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
Index: linux-x86.q/include/asm-x86/io_64.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_64.h
+++ linux-x86.q/include/asm-x86/io_64.h
@@ -35,13 +35,7 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
-
-#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
-#endif
+extern void native_io_delay(void);
/*
* Talk about misusing macros..
@@ -54,7 +48,8 @@ __asm__ __volatile__ ("out" #s " %" s1 "
#define __OUT(s,s1,x) \
__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); \
+native_io_delay(); } \
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
@@ -64,7 +59,8 @@ __asm__ __volatile__ ("in" #s " %" s2 "1
#define __IN(s,s1,i...) \
__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; \
+native_io_delay(); } \
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
On 14-12-07 15:03, Ingo Molnar wrote:
> yep, i have updated the delay to 2 usecs. The latest patch is below, as
> queued up in x86.git. (not yet queued up for .24 - it's pending testing
> and more feedback, etc.)
Yes, I'd like feedback on the initial value thing:
http://lkml.org/lkml/2007/12/14/72
and Alan's comments here:
http://lkml.org/lkml/2007/12/12/221
And as to testing -- good luck finding a machine that cares at all ;-)
Rene.
* Rene Herman <[email protected]> wrote:
> And as to testing -- good luck finding a machine that cares at all ;-)
actually, there's a whole lot more testing angle to a change like this
than ancient boxes.
Ingo
* Rene Herman <[email protected]> wrote:
> --- a/init/main.c
> +++ b/init/main.c
> @@ -229,10 +229,9 @@ static int __init obsolete_checksetup(char *line)
> }
>
> /*
> - * This should be approx 2 Bo*oMips to start (note initial shift), and will
> - * still work even if initially too large, it will just take slightly longer
> + * Initial value roughly corresponds to a 1 GHz CPU
> */
> -unsigned long loops_per_jiffy = (1<<12);
> +unsigned long loops_per_jiffy = 1000000000 / HZ;
>
> EXPORT_SYMBOL(loops_per_jiffy);
this is a factor of ~2400 increase - this will take an eternity to boot
on any older CPU.
Ingo
On 14-12-07 15:23, Ingo Molnar wrote:
> * Rene Herman <[email protected]> wrote:
>
>> --- a/init/main.c
>> +++ b/init/main.c
>> @@ -229,10 +229,9 @@ static int __init obsolete_checksetup(char *line)
>> }
>>
>> /*
>> - * This should be approx 2 Bo*oMips to start (note initial shift), and will
>> - * still work even if initially too large, it will just take slightly longer
>> + * Initial value roughly corresponds to a 1 GHz CPU
>> */
>> -unsigned long loops_per_jiffy = (1<<12);
>> +unsigned long loops_per_jiffy = 1000000000 / HZ;
>>
>> EXPORT_SYMBOL(loops_per_jiffy);
>
> this is a factor of ~2400 increase - this will take an eternity to boot
> on any older CPU.
Only any outb_p's used before loops_per_jiffy is calibrated are affected.
This pre-calibation thing is what's historically held this change back (it's
been discussed dozens of times before). At 4096, not any machine is going to
have an appreciable delay before calibration when switching from the outb to
0x80.
Rene.
> > updated patch attached. (from the MakeItBuild'n'Stuff dept)
>
> the one below is against current upstream. (previous ones were against
> x86.git)
the last version is the one below. Pending further discussion and
testing. And David, i nominate your fix as the coolest Linux kernel fix
of 2007 :-)
Ingo
-------------------------------->
Subject: x86: fix in_p/out_p crashes
From: David P. Reed <[email protected]>
Do not use port 0x80, it can cause crashes, see:
http://bugzilla.kernel.org/show_bug.cgi?id=6307
http://bugzilla.kernel.org/show_bug.cgi?id=9511
Replace use of outb to "unused" diagnostic port 0x80 for time delay
with udelay based time delay on x86_64 architecture machines. Fix for
bugs 9511 and 6307 in bugzilla, plus bugs reported in
bugzilla.redhat.com.
Derived from suggestion (that didn't compile) by Pavel Machek, and
tested, also based on measurements of typical timings of out's
collated by Rene Herman from many in the community.
This patch fixes a number of bugs known to cause problems on HP
Pavilion dv9000z and dv6000z laptops - in the form of solid freezes
when hwclock is used to show or set the time. Also, it potentially
improves bus utilization on SMP machines, by using a waiting process
that doesn't tie up the ISA/LPC bus for 1 or 2 microseconds.
[ [email protected]: minor restructuring, 32-bit support. ]
Signed-off-by: David P. Reed <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/boot/compressed/misc_32.c | 8 ++++----
arch/x86/boot/compressed/misc_64.c | 8 ++++----
arch/x86/kernel/quirks.c | 10 ++++++++++
include/asm-x86/io_32.h | 5 +----
include/asm-x86/io_64.h | 14 +++++---------
5 files changed, 24 insertions(+), 21 deletions(-)
Index: linux-x86.q/arch/x86/boot/compressed/misc_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_32.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/boot/compressed/misc_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_64.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/kernel/quirks.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/quirks.c
+++ linux-x86.q/arch/x86/kernel/quirks.c
@@ -3,9 +3,19 @@
*/
#include <linux/pci.h>
#include <linux/irq.h>
+#include <linux/delay.h>
#include <asm/hpet.h>
+/*
+ * Some legacy devices need delays for IN/OUT sequences. Most are
+ * probably not needed but it's the safest to just do this short delay:
+ */
+void native_io_delay(void)
+{
+ udelay(2);
+}
+
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
Index: linux-x86.q/include/asm-x86/io_32.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_32.h
+++ linux-x86.q/include/asm-x86/io_32.h
@@ -250,10 +250,7 @@ static inline void flush_write_buffers(v
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
Index: linux-x86.q/include/asm-x86/io_64.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_64.h
+++ linux-x86.q/include/asm-x86/io_64.h
@@ -35,13 +35,7 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
-
-#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
-#endif
+extern void native_io_delay(void);
/*
* Talk about misusing macros..
@@ -54,7 +48,8 @@ __asm__ __volatile__ ("out" #s " %" s1 "
#define __OUT(s,s1,x) \
__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); \
+native_io_delay(); } \
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
@@ -64,7 +59,8 @@ __asm__ __volatile__ ("in" #s " %" s2 "1
#define __IN(s,s1,i...) \
__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; \
+native_io_delay(); } \
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
* Rene Herman <[email protected]> wrote:
>>> /*
>>> - * This should be approx 2 Bo*oMips to start (note initial shift), and will
>>> - * still work even if initially too large, it will just take slightly longer
>>> + * Initial value roughly corresponds to a 1 GHz CPU
>>> */
>>> -unsigned long loops_per_jiffy = (1<<12);
>>> +unsigned long loops_per_jiffy = 1000000000 / HZ;
>>>
>>> EXPORT_SYMBOL(loops_per_jiffy);
>>
>> this is a factor of ~2400 increase - this will take an eternity to boot on
>> any older CPU.
>
> Only any outb_p's used before loops_per_jiffy is calibrated are
> affected.
yes - but there are a couple of early udelays, which would thus be
affected.
> This pre-calibation thing is what's historically held this change back
> (it's been discussed dozens of times before). At 4096, not any machine
> is going to have an appreciable delay before calibration when
> switching from the outb to 0x80.
i dont think this should matter: old systems that truly _need_ the ISA
delay will be slow enough to not trip up. (nor are they really affected
by these early delays - the delays were more for crappy ISA devices that
get initialized later down, when the delay loop is already calibrated)
modern systems learned to depend on the PCI write posting side-effects
of port 0x80 activities - those wont be helped by this initialization
change either. That is a far more serious concern.
Ingo
On 14-12-07 15:46, Ingo Molnar wrote:
> * Rene Herman <[email protected]> wrote:
>
>>>> /*
>>>> - * This should be approx 2 Bo*oMips to start (note initial shift), and will
>>>> - * still work even if initially too large, it will just take slightly longer
>>>> + * Initial value roughly corresponds to a 1 GHz CPU
>>>> */
>>>> -unsigned long loops_per_jiffy = (1<<12);
>>>> +unsigned long loops_per_jiffy = 1000000000 / HZ;
>>>>
>>>> EXPORT_SYMBOL(loops_per_jiffy);
>>> this is a factor of ~2400 increase - this will take an eternity to boot on
>>> any older CPU.
>> Only any outb_p's used before loops_per_jiffy is calibrated are
>> affected.
>
> yes - but there are a couple of early udelays, which would thus be
> affected.
True. At the moment though they're just always not delaying anywhere close
the intended amount (on anything with more than 2 bogomips). Pre-calibration
all this stuff is just broken it seems.
>> This pre-calibation thing is what's historically held this change back
>> (it's been discussed dozens of times before). At 4096, not any machine
>> is going to have an appreciable delay before calibration when
>> switching from the outb to 0x80.
>
> i dont think this should matter: old systems that truly _need_ the ISA
> delay will be slow enough to not trip up. (nor are they really affected
> by these early delays - the delays were more for crappy ISA devices that
> get initialized later down, when the delay loop is already calibrated)
8253 (DMAC) and 8254 (PIT) have been reported in earlier versions of the
thread. By Alan, I believe.
Rene.
David P. Reed wrote:
> Replace use of outb to "unused" diagnostic port 0x80 for time delay
> with udelay based time delay on x86_64 architecture machines. Fix for
> bugs 9511 and 6307 in bugzilla, plus bugs reported in
> bugzilla.redhat.com.
>
> Derived from suggestion (that didn't compile) by Pavel Machek, and
> tested, also based on measurements of typical timings of out's
> collated by Rene Herman from many in the community.
>
> This patch fixes a number of bugs known to cause problems on HP
> Pavilion dv9000z and dv6000z laptops - in the form of solid freezes
> when hwclock is used to show or set the time. Also, it potentially
> improves bus utilization on SMP machines, by using a waiting process
> that doesn't tie up the ISA/LPC bus for 1 or 2 microseconds.
>
kvm will forward a virtual machine's writes to port 0x80 to the real
port. The reason is that the write is much faster than exiting and
emulating it; the difference is measurable when compiling kernels.
Now if the cause is simply writing to port 0x80, then we must stop doing
that. But if the reason is the back-to-back writes, when we can keep
it, since the other writes will be trapped by kvm and emulated. Do you
which is the case?
--
Any sufficiently difficult bug is indistinguishable from a feature.
Ingo Molnar wrote:
>
> wow, cool fix! (I remember that there were other systems as well that
> are affected by port 0x80 muckery - i thought we had removed port 0x80
> accesses long ago.)
>
> how about the simpler fix below, as a first-level approach? We can then
> remove the _p in/out sequences after this.
>
I believe this will suffer from the issue that was raised: this will use
udelay() long before loop calibration (and no, we can't just "be
conservative" since there is no "conservative" value we can use.)
Worse, I suspect that at least the PIT, which may need to be used for
udelay calibration, is one of the devices that may be affected. I have
seen the Verilog for a contemporary chipset, and it can only access the
PIT once per microsecond -- this actually has to do with the definition
of the PIT; some of the PIT operations are ill-defined if allowed at a
higher frequency than the PIT clock.
-hpa
On 14-12-07 19:02, H. Peter Anvin wrote:
> I believe this will suffer from the issue that was raised: this will use
> udelay() long before loop calibration (and no, we can't just "be
> conservative" since there is no "conservative" value we can use.)
>
> Worse, I suspect that at least the PIT, which may need to be used for
> udelay calibration, is one of the devices that may be affected. I have
> seen the Verilog for a contemporary chipset, and it can only access the
> PIT once per microsecond -- this actually has to do with the definition
> of the PIT; some of the PIT operations are ill-defined if allowed at a
> higher frequency than the PIT clock.
Was reported before indeed:
http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-09/5764.html
Rene.
> > i dont think this should matter: old systems that truly _need_ the ISA
> > delay will be slow enough to not trip up. (nor are they really affected
> > by these early delays - the delays were more for crappy ISA devices that
> > get initialized later down, when the delay loop is already calibrated)
>
> 8253 (DMAC) and 8254 (PIT) have been reported in earlier versions of the
> thread. By Alan, I believe.
They've been seen to be problems up to PII era machines. I'm not aware of
any newer than that with this problem. We also don't need to touch the
DMAC that early anyway that I can see - just the PIT.
In fact if we have a fast processor we have a TSC and APIC so we don't
need the PIT ?
Alan
Alan Cox wrote:
>>> i dont think this should matter: old systems that truly _need_ the ISA
>>> delay will be slow enough to not trip up. (nor are they really affected
>>> by these early delays - the delays were more for crappy ISA devices that
>>> get initialized later down, when the delay loop is already calibrated)
>> 8253 (DMAC) and 8254 (PIT) have been reported in earlier versions of the
>> thread. By Alan, I believe.
>
> They've been seen to be problems up to PII era machines. I'm not aware of
> any newer than that with this problem. We also don't need to touch the
> DMAC that early anyway that I can see - just the PIT.
>
> In fact if we have a fast processor we have a TSC and APIC so we don't
> need the PIT ?
Well, the TSC may be unstable and the APIC may be disabled.
-hpa
On Fri 2007-12-14 18:36:26, Alan Cox wrote:
> > > i dont think this should matter: old systems that truly _need_ the ISA
> > > delay will be slow enough to not trip up. (nor are they really affected
> > > by these early delays - the delays were more for crappy ISA devices that
> > > get initialized later down, when the delay loop is already calibrated)
> >
> > 8253 (DMAC) and 8254 (PIT) have been reported in earlier versions of the
> > thread. By Alan, I believe.
>
> They've been seen to be problems up to PII era machines. I'm not aware of
> any newer than that with this problem. We also don't need to touch the
> DMAC that early anyway that I can see - just the PIT.
>
> In fact if we have a fast processor we have a TSC and APIC so we don't
> need the PIT ?
It is still good to be able to disable APIC/TSC. Neither are
particulary reliable time sources.
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Fri 2007-12-14 10:02:57, H. Peter Anvin wrote:
> Ingo Molnar wrote:
>> wow, cool fix! (I remember that there were other systems as well that are
>> affected by port 0x80 muckery - i thought we had removed port 0x80
>> accesses long ago.)
>> how about the simpler fix below, as a first-level approach? We can then
>> remove the _p in/out sequences after this.
>
> I believe this will suffer from the issue that was raised: this will use
> udelay() long before loop calibration (and no, we can't just "be
> conservative" since there is no "conservative" value we can use.)
?? Just initialize bogomips to 6GHz equivalent... and we are fine
until 6GHz cpus come out.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Pavel Machek wrote:
> On Fri 2007-12-14 10:02:57, H. Peter Anvin wrote:
>> Ingo Molnar wrote:
>>> wow, cool fix! (I remember that there were other systems as well that are
>>> affected by port 0x80 muckery - i thought we had removed port 0x80
>>> accesses long ago.)
>>> how about the simpler fix below, as a first-level approach? We can then
>>> remove the _p in/out sequences after this.
>> I believe this will suffer from the issue that was raised: this will use
>> udelay() long before loop calibration (and no, we can't just "be
>> conservative" since there is no "conservative" value we can use.)
>
> ?? Just initialize bogomips to 6GHz equivalent... and we are fine
> until 6GHz cpus come out.
How long will that take to boot on a 386?
-hpa
On Fri, 14 Dec 2007 14:13:46 -0800
"H. Peter Anvin" <[email protected]> wrote:
> Pavel Machek wrote:
> > On Fri 2007-12-14 10:02:57, H. Peter Anvin wrote:
> >> Ingo Molnar wrote:
> >>> wow, cool fix! (I remember that there were other systems as well that are
> >>> affected by port 0x80 muckery - i thought we had removed port 0x80
> >>> accesses long ago.)
> >>> how about the simpler fix below, as a first-level approach? We can then
> >>> remove the _p in/out sequences after this.
> >> I believe this will suffer from the issue that was raised: this will use
> >> udelay() long before loop calibration (and no, we can't just "be
> >> conservative" since there is no "conservative" value we can use.)
> >
> > ?? Just initialize bogomips to 6GHz equivalent... and we are fine
> > until 6GHz cpus come out.
>
> How long will that take to boot on a 386?
Well the dumb approach to fix that would seem to be to initialise it to
cpu->family 3 -> 50MHz 4 -> 300Mhz 5-> etc...
Alan
Avi Kivity wrote:
> kvm will forward a virtual machine's writes to port 0x80 to the real
> port. The reason is that the write is much faster than exiting and
> emulating it; the difference is measurable when compiling kernels.
>
> Now if the cause is simply writing to port 0x80, then we must stop
> doing that. But if the reason is the back-to-back writes, when we can
> keep it, since the other writes will be trapped by kvm and emulated.
> Do you which is the case?
>
As for kvm, I don't have enough info to know anything about that. Is
there a test you'd like me to try?
I think you are also asking if the crash on these laptops is caused only
by back-to-back writes. Actually, it doesn't seem to matter if they are
back to back. I can cause the crash if the writes to 80 are very much
spread out in time - it seems only to matter how many of them get
executed - almost as if there is a buffer overflow. (And of course if
you do back to back writes to other ports that are apparently fully
unused, such as 0xED on my machine, no crash occurs).
I believe (though no one seems to have confirming documentation from the
chipset or motherboard vendor) that port 80 is actually functional for
some unknown function on these machines. (They do respond to "in"
instructions faster than a bus cycle abort does - more evidence).
I searched the DSDT to see if there is any evidence of an ACPI use for
this port, but found nothing.
David P. Reed wrote:
>
> I believe (though no one seems to have confirming documentation from the
> chipset or motherboard vendor) that port 80 is actually functional for
> some unknown function on these machines. (They do respond to "in"
> instructions faster than a bus cycle abort does - more evidence).
>
This is normal. IN from port 0x80 is used by the DMA address map chip.
As far as I understand, there are other laptops with the same chipset
which don't have this problem, so it's likely either a motherboard or
firmware issue. My guess is that they probably let debugging code out
in the field (trap port 0x80 in SMM, and then try to output it on some
debugging bus.)
-hpa
Just a thought for a way to fix the "very early" timing needed to set up
udelay to work in a way that works on all machines. Perhaps we haven't
exploited the BIOS enough: The PC BIOS since the PC AT (286) has
always had a standard "countdown timer" way to delay for n microseconds,
which as far as I know still works. This can be used to measure the
speed of a delay loop, without using ANY in/out instructions directly
(the ones in the BIOS are presumably correctly delayed...).
So first thing in the boot sequence, one can calibrate a timing loop
using this technique, and save the value to be used for all the "early"
stuff.
Here's skeleton code from old ASM code I found lying around in my
archives to use BIOS to measure how many unrolled short jumps can
execute in 10 msec. Note that it can run without knowing anything
whatsoever about port timing.
haltbyte db 0
calibrate:
les bx,haltbyte ; address of halt flag into es:bx
mov ax,8300h
sub cx,cx
mov dx,10000 ; 10 msec. in cx:dx
int 15h
jc bad
sub dx,dx
sub cx,cx ; decrement counter in dx:cx
tloop:
jmp short $+2 ; 10 short jmps
jmp short $+2
jmp short $+2
jmp short $+2
jmp short $+2
jmp short $+2
jmp short $+2
jmp short $+2
jmp short $+2
test haltbyte
loopz tloop
jnz done
dec dx
jnz tloop
" overflowed 32 bits ... never happens, cancel BIOS event wait.
mov ax,8301h
int 15h
jmp error
done:
mov ax,cx
negl
" here dx:ax contains 32 bit loop count corresponding to 10 msec.
ret ; return 32-bit value
Doc on function 83h of int 15h should be available online.
Alan Cox wrote:
> On Fri, 14 Dec 2007 14:13:46 -0800
> "H. Peter Anvin" <[email protected]> wrote:
>
>
>> Pavel Machek wrote:
>>
>>> On Fri 2007-12-14 10:02:57, H. Peter Anvin wrote:
>>>
>>>> Ingo Molnar wrote:
>>>>
>>>>> wow, cool fix! (I remember that there were other systems as well that are
>>>>> affected by port 0x80 muckery - i thought we had removed port 0x80
>>>>> accesses long ago.)
>>>>> how about the simpler fix below, as a first-level approach? We can then
>>>>> remove the _p in/out sequences after this.
>>>>>
>>>> I believe this will suffer from the issue that was raised: this will use
>>>> udelay() long before loop calibration (and no, we can't just "be
>>>> conservative" since there is no "conservative" value we can use.)
>>>>
>>> ?? Just initialize bogomips to 6GHz equivalent... and we are fine
>>> until 6GHz cpus come out.
>>>
>> How long will that take to boot on a 386?
>>
>
> Well the dumb approach to fix that would seem to be to initialise it to
>
> cpu->family 3 -> 50MHz 4 -> 300Mhz 5-> etc...
>
> Alan
>
>
David P. Reed wrote:
> Just a thought for a way to fix the "very early" timing needed to set up
> udelay to work in a way that works on all machines. Perhaps we haven't
> exploited the BIOS enough: The PC BIOS since the PC AT (286) has
> always had a standard "countdown timer" way to delay for n microseconds,
> which as far as I know still works. This can be used to measure the
> speed of a delay loop, without using ANY in/out instructions directly
> (the ones in the BIOS are presumably correctly delayed...).
If we enter from the 32-bit entrypoint, we already don't have access to
the BIOS, though.
-hpa
* H. Peter Anvin <[email protected]> wrote:
> I believe this will suffer from the issue that was raised: this will
> use udelay() long before loop calibration (and no, we can't just "be
> conservative" since there is no "conservative" value we can use.)
>
> Worse, I suspect that at least the PIT, which may need to be used for
> udelay calibration, is one of the devices that may be affected. I
> have seen the Verilog for a contemporary chipset, and it can only
> access the PIT once per microsecond -- this actually has to do with
> the definition of the PIT; some of the PIT operations are ill-defined
> if allowed at a higher frequency than the PIT clock.
i think the native_io_delay() in quirks.c signals the obvious solution:
a DMI (or otherwise) driven quirk that activates a port 0x80 based delay
on such boards. Combined with an iodelay=port80 boot option as well
perhaps, just in case someone hits a system that is not blacklisted yet.
This way such crazy broken hardware can be mapped correctly - like we
map such quirks in every other case. Perhaps even do this workaround on
the PIT driver level. Instead of perpetuating the superstition of port
80 forever.
Ingo
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index b74d60d..288e162 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 8a322c9..c95d313 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -222,6 +222,19 @@ void __init hpet_time_init(void)
time_init_hook();
}
+static void port_io_delay(void)
+{
+ asm volatile ("outb %%al, $0x80": : : "memory");
+}
+
+static void udelay_io_delay(void)
+{
+ udelay(2);
+}
+
+void (*native_io_delay)(void) = port_io_delay;
+EXPORT_SYMBOL(native_io_delay);
+
/*
* This is called directly from init code; we must delay timer setup in the
* HPET case as we can't make the decision to turn on HPET this early in the
@@ -233,5 +246,7 @@ void __init hpet_time_init(void)
void __init time_init(void)
{
tsc_init();
+ if (!tsc_disable)
+ native_io_delay = udelay_io_delay;
late_time_init = choose_time_init();
}
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index fe881cd..1b73f49 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -250,10 +250,7 @@ static inline void flush_write_buffers(void)
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void (*native_io_delay)(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
Hello,
On Fri, 14 Dec 2007 23:29:55 +0000
Alan Cox <[email protected]> wrote:
> On Fri, 14 Dec 2007 14:13:46 -0800
> "H. Peter Anvin" <[email protected]> wrote:
>
> > Pavel Machek wrote:
> > > On Fri 2007-12-14 10:02:57, H. Peter Anvin wrote:
> >
> > How long will that take to boot on a 386?
>
> Well the dumb approach to fix that would seem to be to initialise it to
>
> cpu->family 3 -> 50MHz 4 -> 300Mhz 5-> etc...
Just an idea : from what I've read, the problem (port 80 hanging) only occurs
on 'modern' machines... So why not :
- use port 80 for old CPUs (PII, PIII) where it has never really been
a problem,
- use the cpu->family to do a best match for CPU freq
thus we could avoid increasing boot time too much...
Paul
On 15-12-07 09:08, Paul Rolland wrote:
> Hello,
>
> On Fri, 14 Dec 2007 23:29:55 +0000
> Alan Cox <[email protected]> wrote:
>
>> On Fri, 14 Dec 2007 14:13:46 -0800
>> "H. Peter Anvin" <[email protected]> wrote:
>>
>>> Pavel Machek wrote:
>>>> On Fri 2007-12-14 10:02:57, H. Peter Anvin wrote:
>>> How long will that take to boot on a 386?
>> Well the dumb approach to fix that would seem to be to initialise it to
>>
>> cpu->family 3 -> 50MHz 4 -> 300Mhz 5-> etc...
>
> Just an idea : from what I've read, the problem (port 80 hanging) only occurs
> on 'modern' machines... So why not :
> - use port 80 for old CPUs (PII, PIII) where it has never really been
> a problem,
> - use the cpu->family to do a best match for CPU freq
> thus we could avoid increasing boot time too much...
Yes, just posted a Patch-For-Comments that switches on the availability of a
TSC (tsc_init sets tsc_disable also for !cpu_has_tsc) which would mean that
only really old stuff would be using the outb still. A TSC is really all we
need to have a sensible udelay().
Rene.
* Rene Herman <[email protected]> wrote:
> The issue is -- how do you safely replace the outb pre-loops_per_jiffy
> calibration? I'm currently running with the attached hack (not
> submitted, only for 32-bit and discussion) the idea of which might be
> the best we can do?
how about doing a known-NOP chipset cycle? For example:
inb(PIC_SLAVE_IMR)
? I.e. instead of trying to find an unused port, lets try to find a
known-used platform register that has no side-effects if read. Use it
unconditionally during early bootup and change it to an udelay after
calibration. (or use it after early bootup too if a boot parameter has
been specified) Or something like this.
Ingo
On 15-12-07 14:27, Ingo Molnar wrote:
> * Rene Herman <[email protected]> wrote:
>
>> The issue is -- how do you safely replace the outb pre-loops_per_jiffy
>> calibration? I'm currently running with the attached hack (not
>> submitted, only for 32-bit and discussion) the idea of which might be
>> the best we can do?
>
> how about doing a known-NOP chipset cycle? For example:
>
> inb(PIC_SLAVE_IMR)
An inb is annoying in that it clobbers register al (well, with an inline
native_io_delay it is at least) and more importantly -- the timing of this
is going to vary wildly. We really want a register that is effectively
guaranteed to be unused so that it dies on ISA/LPC or we might get _much_
faster PCI only decodes. Even reading port 0x80 itself varies wildly:
http://lkml.org/lkml/2007/12/12/309
> ? I.e. instead of trying to find an unused port, lets try to find a
> known-used platform register that has no side-effects if read. Use it
> unconditionally during early bootup and change it to an udelay after
> calibration. (or use it after early bootup too if a boot parameter has
> been specified) Or something like this.
It's really going to have to be a known _un_used register and (the write
direction of) port 0x80 is used exactly for that reason. Port 0xed is a
known "alternate diagnostic port" used by Phoenix BIOSes at least but Peter
Anvin reported trouble with that one -- probably for the outb direction but
assuming that means something was in fact responding, we'd have the same
timing problem.
I believe we have two "good" options:
1) port 0xed was tested by the current reporter and found to be safe (and
provide slow enough timing). If DMI based quirk hacks are available soon
enough we can switch 0x80 to 0xed based on it. Are they?
2) the thing I posted in the message replied to where immediately after
tsc_init() (which is before the PIT init) we switch to udelay() if we have a
TSC which is ofcourse anything modern.
Rene.
On Sat, 15 Dec 2007 14:27:25 +0100
Ingo Molnar <[email protected]> wrote:
>
> * Rene Herman <[email protected]> wrote:
>
> > The issue is -- how do you safely replace the outb pre-loops_per_jiffy
> > calibration? I'm currently running with the attached hack (not
> > submitted, only for 32-bit and discussion) the idea of which might be
> > the best we can do?
>
> how about doing a known-NOP chipset cycle? For example:
>
> inb(PIC_SLAVE_IMR)
It needs tobe a different chip to the main one (or macrocell anyway) - so
PIC for PIT and vice versa. However since we know 0x80 works for
everything on the planet but this one specifies of laptop which seems to
need a firmware update its a very high risk approach.
Alan
I understand the risks of such a fundamental change, and it may be only
a minor concern, but I did also point out that using an unused port
causes the bus to be tied up for a microsecond or two, which matters on
a fast SMP machine.
Of course all the other concerns you guys are worrying about are really
important. I don't want to break anybody's running systems... I'd like
to see my machine run smoothly, and all the other machines that may or
may not have this problem (google "hwclock freeze" to see that I'm far
from alone - I just have persevered in "bisecting" this problem with
kernel tweaks for months, whereas the others did not or did not know how).
By the way, this laptop is really nice for Linux in lots of ways. Dual
drives, so I set it up with software RAID for reliability, dual 64-bit
processors, fast 3D graphics, etc. Great battery life. Just one last
kernel issue.
I also note that curent machines like the problem machine have ACPI, and
maybe those would be the ones that vendors might start to define port 80
to mean something. As I noted, it /seems/ to be only when ACPI is turned
on that this problem happens on my machine - that's when the OS starts
to be involved in servicing various things, so it suggests that maybe
things change about port 80's unknown function on these machines when
ACPI is servicing the system management code (that's not something I
have been able to verify).
My belief is that my machine has some device that is responding to port
80 by doing something. And that something requires some other program
to "service" port 80 in some way. But it sure would be nice to know.
I can't personally sand off the top of the chipset to put probes into it
- so my normal approach of putting a logic analyzer on the bus doesn't work.
PS: If I have time, I may try to build Rene's port 80 test for Windows
and run it under WinXP on this machine (I still have a crappy little
partition that boots it). If it freezes the same way, it's almost
certain a design "feature", and if it doesn't freeze, we might suspect
that there is compensating logic in either Windows ACPI code or some way
that windows "sets up" the machine.
Alan Cox wrote:
> On Sat, 15 Dec 2007 14:27:25 +0100
> Ingo Molnar <[email protected]> wrote:
>
>
>> * Rene Herman <[email protected]> wrote:
>>
>>
>>> The issue is -- how do you safely replace the outb pre-loops_per_jiffy
>>> calibration? I'm currently running with the attached hack (not
>>> submitted, only for 32-bit and discussion) the idea of which might be
>>> the best we can do?
>>>
>> how about doing a known-NOP chipset cycle? For example:
>>
>> inb(PIC_SLAVE_IMR)
>>
>
> It needs tobe a different chip to the main one (or macrocell anyway) - so
> PIC for PIT and vice versa. However since we know 0x80 works for
> everything on the planet but this one specifies of laptop which seems to
> need a firmware update its a very high risk approach.
>
> Alan
>
>
This change seems rather unlikely for 2.6.24 at this point (high risk),
but could be good for 2.6.25.
One thing it should probably have for the early going,
is a simple way to turn it on/off at boot time,
so that we don't have people "stuck" unable to run
the test kernels should something weird happen.
Alan / David / Ingo,
What do you think of the idea of a *temporary* boot flag for this,
something like port80=on/off (pick a suitable name) ?
Cheers
H. Peter Anvin wrote:
> David P. Reed wrote:
>> Just a thought for a way to fix the "very early" timing needed to set
>> up udelay to work in a way that works on all machines. Perhaps we
>> haven't exploited the BIOS enough: The PC BIOS since the PC AT
>> (286) has always had a standard "countdown timer" way to delay for n
>> microseconds, which as far as I know still works. This can be used
>> to measure the speed of a delay loop, without using ANY in/out
>> instructions directly (the ones in the BIOS are presumably correctly
>> delayed...).
>
> If we enter from the 32-bit entrypoint, we already don't have access
> to the BIOS, though.
>
My understanding is that the linux starts in real mode, and uses the
BIOS for such things as reading the very first image.
arch/x86/boot/main.c seems to use BIOS calls, and one can do what I
wrote in C or asm. Good place to measure the appropriate delay timing,
and pass it on forward. That's what I was suggesting, which is why I
copied the ASM routine from my old code listing as I did.
> My understanding is that the linux starts in real mode, and uses the
> BIOS for such things as reading the very first image.
Not always. We may enter from 32bit in some cases, and we may also not
have a PC BIOS in the first place.
> a minor concern, but I did also point out that using an unused port
> causes the bus to be tied up for a microsecond or two, which matters on
> a fast SMP machine.
And I did point out I'd found locking cases that may be relying upon this
> I also note that curent machines like the problem machine have ACPI, and
> maybe those would be the ones that vendors might start to define port 80
> to mean something. As I noted, it /seems/ to be only when ACPI is turned
Port 0x80 means debug. You appear to have a laptop with some kind of
buggy firmware that wants a BIOS update. Everyone use 0x80 for debug -
its in the chipset hardware quite often.
> My belief is that my machine has some device that is responding to port
> 80 by doing something. And that something requires some other program
> to "service" port 80 in some way. But it sure would be nice to know.
> I can't personally sand off the top of the chipset to put probes into it
> - so my normal approach of putting a logic analyzer on the bus doesn't work.
Almost certainly a SMI trap.
> PS: If I have time, I may try to build Rene's port 80 test for Windows
> and run it under WinXP on this machine
That would be very interesting.
Alan
Rene Herman wrote:
>
> It's really going to have to be a known _un_used register and (the write
> direction of) port 0x80 is used exactly for that reason. Port 0xed is a
> known "alternate diagnostic port" used by Phoenix BIOSes at least but
> Peter Anvin reported trouble with that one -- probably for the outb
> direction but assuming that means something was in fact responding, we'd
> have the same timing problem.
>
Yes, for the outbound direction.
> I believe we have two "good" options:
>
> 1) port 0xed was tested by the current reporter and found to be safe
> (and provide slow enough timing). If DMI based quirk hacks are
> available soon enough we can switch 0x80 to 0xed based on it. Are they?
DMI is just a data structure parked in memory, so it should at least be
theoretically possible to get to it.
-hpa
Paul Rolland wrote:
> Just an idea : from what I've read, the problem (port 80 hanging) only occurs
> on 'modern' machines...
It happens on *one single* "modern" machine...
Let's keep that in perspective.
-hpa
Rene Herman wrote:
>
> Yes, just posted a Patch-For-Comments that switches on the availability
> of a TSC (tsc_init sets tsc_disable also for !cpu_has_tsc) which would
> mean that only really old stuff would be using the outb still. A TSC is
> really all we need to have a sensible udelay().
>
Uhm, no. You have no clue what the speed of the TSC is until you have
been able to calibrate it against a fixed timesource - like the PIT.
-hpa
On Sat 2007-12-15 12:26:26, H. Peter Anvin wrote:
> Paul Rolland wrote:
>> Just an idea : from what I've read, the problem (port 80 hanging) only
>> occurs
>> on 'modern' machines...
>
> It happens on *one single* "modern" machine...
>
> Let's keep that in perspective.
it hurts on other machines (like debug leds being useless), and it may
be incorrect as soon as you insert leds-on-port-0x80-on-PCI card.
No, it is not critical but yes, I'd like to see it fixed.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Fri 2007-12-14 15:23:55, Ingo Molnar wrote:
>
> * Rene Herman <[email protected]> wrote:
>
> > --- a/init/main.c
> > +++ b/init/main.c
> > @@ -229,10 +229,9 @@ static int __init obsolete_checksetup(char *line)
> > }
> >
> > /*
> > - * This should be approx 2 Bo*oMips to start (note initial shift), and will
> > - * still work even if initially too large, it will just take slightly longer
> > + * Initial value roughly corresponds to a 1 GHz CPU
> > */
> > -unsigned long loops_per_jiffy = (1<<12);
> > +unsigned long loops_per_jiffy = 1000000000 / HZ;
> >
> > EXPORT_SYMBOL(loops_per_jiffy);
>
> this is a factor of ~2400 increase - this will take an eternity to boot
> on any older CPU.
I don't think we are using outb_p before loops_per_jiffy are
initialized -- I believe I'd see oopsen if we did. Factor 2400
increase is bad, but if it only converts 10x 1usec delay into 10x
24msec delay, it is not _that_ bad.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Fri 2007-12-14 14:15:03, Ingo Molnar wrote:
>
> * David P. Reed <[email protected]> wrote:
>
> > Replace use of outb to "unused" diagnostic port 0x80 for time delay
> > with udelay based time delay on x86_64 architecture machines. Fix for
> > bugs 9511 and 6307 in bugzilla, plus bugs reported in
> > bugzilla.redhat.com.
> >
> > Derived from suggestion (that didn't compile) by Pavel Machek, and
> > tested, also based on measurements of typical timings of out's
> > collated by Rene Herman from many in the community.
> >
> > This patch fixes a number of bugs known to cause problems on HP
> > Pavilion dv9000z and dv6000z laptops - in the form of solid freezes
> > when hwclock is used to show or set the time. Also, it potentially
> > improves bus utilization on SMP machines, by using a waiting process
> > that doesn't tie up the ISA/LPC bus for 1 or 2 microseconds.
> >
> > i386 family fixes (completely parallel) were not included, considering
> > that such machines might involve more risk of problems on legacy
> > machines.
>
> wow, cool fix! (I remember that there were other systems as well that
> are affected by port 0x80 muckery - i thought we had removed port 0x80
> accesses long ago.)
>
> how about the simpler fix below, as a first-level approach? We can then
> remove the _p in/out sequences after this.
>
> this is also something for v2.6.24 merging.
As much as I like this patch, I do not think it is suitable for
.24. Too risky, I'd say.
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Pavel Machek wrote:
>>
>> this is also something for v2.6.24 merging.
>
> As much as I like this patch, I do not think it is suitable for
> .24. Too risky, I'd say.
>
No kidding! We're talking about removing a hack that has been
successful on thousands of pieces of hardware over 15 years because it
breaks ONE machine.
If this should be done at all it should be done in the most careful
manner possible. 2.6.25 would be an aggressive schedule.
-hpa
commit 4a7e75776c648102488a89dbfad516448830ab1a
Author: Rene Herman <[email protected]>
Date: Sun Dec 16 00:24:32 2007 +0100
foo
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index b74d60d..288e162 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 6ea015a..43e5fcc 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a7bc93c..0cc1981 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 5a88890..08a68f0 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- i8253.o
+ i8253.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
new file mode 100644
index 0000000..1a2a856
--- /dev/null
+++ b/arch/x86/kernel/io_delay.c
@@ -0,0 +1,53 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+#define IO_DELAY(port) asm volatile ("outb %%al, %0" : : "N" ((port)))
+
+static void standard_io_delay(void)
+{
+ IO_DELAY(0x80);
+}
+
+static void alternate_io_delay(void)
+{
+ IO_DELAY(0xed);
+}
+
+void (*native_io_delay)(void) = standard_io_delay;
+
+void slow_down_io(void) {
+ native_io_delay();
+#ifdef REALLY_SLOW_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
+#endif
+}
+EXPORT_SYMBOL(slow_down_io);
+
+static int __init dmi_alternate_io_delay(const struct dmi_system_id *id)
+{
+ printk(KERN_NOTICE "%s: using alternate I/O delay.\n", id->ident);
+ native_io_delay = alternate_io_delay;
+ return 0;
+}
+
+static struct dmi_system_id __initdata alternate_io_delay_dmi_table[] = {
+ {
+ .callback = dmi_alternate_io_delay,
+ .ident = "Gigabyte GA-7IXE4",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
+ DMI_MATCH(DMI_BOARD_NAME, "7IXE4")
+ }
+ },
+ {
+ }
+};
+
+void __init io_delay_init(void)
+{
+ dmi_check_system(alternate_io_delay_dmi_table);
+}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index e1e18c3..6c3a3b4 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();;
+
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 30d94d1..ec976ed 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_SMP
/* setup to use the static apicid table during kernel startup */
x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index fe881cd..bf352e3 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -250,24 +250,13 @@ static inline void flush_write_buffers(void)
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void io_delay_init(void);
+extern void (*native_io_delay)(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
#else
-
-static inline void slow_down_io(void) {
- native_io_delay();
-#ifdef REALLY_SLOW_IO
- native_io_delay();
- native_io_delay();
- native_io_delay();
-#endif
-}
-
+extern void slow_down_io(void);
#endif
#ifdef CONFIG_X86_NUMAQ
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index a037b07..486a110 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -35,13 +35,8 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
-
-#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
-#endif
+extern void io_delay_init(void);
+extern void slow_down_io(void);
/*
* Talk about misusing macros..
@@ -50,21 +45,21 @@
static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
Rene Herman wrote:
>
> I hope this is considered half-way correct/sane (note by the way that
> it's not a good idea to switch a "native_io_delay_port" value since
> plugging in a variable port would clobber register dx for every outb_p,
> which would then have to be reloaded for the next outb again). Comments
> appreciated.
>
That actually wouldn't be that big of a deal. Switching values in and
out of registers is dirt cheap (and MUCH cheaper than an indirect
function call) -- of course, if there is a reason to do it for
paravirtualization then that's fine; we're talking about something that
makes even the slowest CPU operation look speedy anyhow.
If in*_p and out*_p are out-of-lined then %dx would be dead anyway, and
so there is even less reason to deal with it.
In theory we could use an alternatives section and patch the
instruction, too; seems like way overkill, though.
Note, however, that your code doesn't deal with io_delay()'s in the boot
code (arch/x86/boot) at all, nor (obviously) io_delay()'s in boot
loaders. In the boot code, access to DMI data is NOT available (we
can't even use the INT 15h mover if we want to continue to support Loadlin.)
In the boot code, io_delay() is used to slow down accesses to the KBC,
interrupt controller, INT13h logic, and the NMI gate, and to provide a
fixed delay during A20 stabilization.
-hpa
H. Peter Anvin wrote:
>
> Note, however, that your code doesn't deal with io_delay()'s in the boot
> code (arch/x86/boot) at all, nor (obviously) io_delay()'s in boot
> loaders. In the boot code, access to DMI data is NOT available (we
> can't even use the INT 15h mover if we want to continue to support
> Loadlin.)
>
Correction: DMI data are at least supposedly available via the PNPBIOS
calls specified in the SMBIOS spec.
-hpa
Here we go.
# dmidecode -s baseboard-manufacturer
Quanta
# dmidecode -s baseboard-product-name
30B9
There do seem to be other systems, besides mine, that have the same
problem. I think it's pretty likely that other machines that have this
problem are Quanta machines, since Quanta is one of the primary ODM's
that does HP laptops. Don't know about the product-name being common
with the HP dv6000z family, which is another one reported to have this
problem. We could try to ask all the reporters of hwclock freezes to
report their results from dmidecode.
Rene Herman wrote:
> On 15-12-07 21:27, H. Peter Anvin wrote:
>
>> Rene Herman wrote:
>>>
>>> Yes, just posted a Patch-For-Comments that switches on the
>>> availability of a TSC (tsc_init sets tsc_disable also for
>>> !cpu_has_tsc) which would mean that only really old stuff would be
>>> using the outb still. A TSC is really all we need to have a sensible
>>> udelay().
>>
>> Uhm, no. You have no clue what the speed of the TSC is until you
>> have been able to calibrate it against a fixed timesource - like the
>> PIT.
>
> Yes. Hnng. Okay, this is going nowhere in a hurry, so back to the very
> first suggestion in this thread. How about this? This allows to switch
> from port 0x80 to port 0xed based on DMI.
>
> David: I plugged in my own DMI values for testing, but obviously yours
> are needed. The values that are needed are retrieved by the
> "dmidecode" program which you will probably have installed (it might
> be in an sbin directory) or will be able to install through whatever
> package manager you use.
>
> dmidecode -s baseboard-manufacturer
> dmidecode -s baseboard-product-name
>
> are the values you should plug into the .matches field in the
> dmi_system_id struct in this. It would be great if you could do that,
> test, and post back with those values. .ident should be a nice human
> name.
>
> It's been tested on x86-32 and seems to work fine. It's not been
> tested on x86-64 but seems to stand a fair chance of working similarly.
>
> It ofcourse remains possible to switch to a udelay() based method
> later on anyways but with all the pre-calibratin trouble, this might
> be the lowest risk method in the short run.
>
> This is partly based on previous patches by Pavel Machek and David P.
> Reed.
>
> I hope this is considered half-way correct/sane (note by the way that
> it's not a good idea to switch a "native_io_delay_port" value since
> plugging in a variable port would clobber register dx for every
> outb_p, which would then have to be reloaded for the next outb again).
> Comments appreciated.
>
> Signed-off-by: Rene Herman <[email protected]>
>
> arch/x86/boot/compressed/misc_32.c | 8 ++---
> arch/x86/boot/compressed/misc_64.c | 8 ++---
> arch/x86/kernel/Makefile_32 | 2 -
> arch/x86/kernel/Makefile_64 | 2 -
> arch/x86/kernel/io_delay.c | 53
> +++++++++++++++++++++++++++++++++++++
> arch/x86/kernel/setup_32.c | 2 +
> arch/x86/kernel/setup_64.c | 2 +
> include/asm-x86/io_32.h | 17 ++---------
> include/asm-x86/io_64.h | 23 ++++++----------
>
* H. Peter Anvin <[email protected]> wrote:
> Paul Rolland wrote:
>> Just an idea : from what I've read, the problem (port 80 hanging) only occurs
>> on 'modern' machines...
>
> It happens on *one single* "modern" machine...
>
> Let's keep that in perspective.
two or three i think (and an unknown of others where "random,
unexplained freezes" were thought to be hw borkage), but yeah, it's
still a very low proportion.
Ingo
* H. Peter Anvin <[email protected]> wrote:
> Pavel Machek wrote:
>>>
>>> this is also something for v2.6.24 merging.
>>
>> As much as I like this patch, I do not think it is suitable for
>> .24. Too risky, I'd say.
>>
>
> No kidding! We're talking about removing a hack that has been
> successful on thousands of pieces of hardware over 15 years because it
^----[*]
> breaks ONE machine.
[*] "- none of which needs it anymore -"
there, fixed it for you ;-)
So lets keep this in perspective: this is a hack that only helps on a
very low number of systems. (the PIT of one PII era chipset is known to
be affected)
unfortunately this hack's side-effects are mis-used by an unknown number
of drivers to mask PCI posting bugs. We want to figure out those bugs
(safely and carefully) and we want to remove this hack from modern
machines that dont need it. Doing anything else would be superstition.
anyway, we likely wont be doing anything about this in .24.
Ingo
commit a17ccb1964b53fd4ab00d501b7f229a9a6cf91d1
Author: Rene Herman <[email protected]>
Date: Sun Dec 16 13:36:39 2007 +0100
x86: provide a DMI based port 0x80 I/O delay override.
Certain (HP) laptops experience trouble from our port 0x80 I/O delay
writes. This patch provides for a DMI based switch to the "alternate
diagnostic port" 0xed (as used by some BIOSes as well) for these.
David P. Reed confirmed that port 0xed works for him and provides a
proper delay. The symptoms of _not_ working are a hanging machine,
with "hwclock" use being a direct trigger.
Earlier versions of this attempted to simply use udelay(2), with the
2 being a value tested to be a nicely conservative upper-bound with
help from many on the linux-kernel mailinglist, but that approach has
two problems.
First, pre-loops_per_jiffy calibration (which is post PIT init while
some implementations of the PIT are actually one of the historically
problematic devices that need the delay) udelay() isn't particularly
well-defined. We could initialise loops_per_jiffy conservatively (and
based on CPU family so as to not unduly delay old machines) which
would sort of work, but still leaves problem 2.
Second, delaying isn't the only effect that a write to port 0x80 has.
It's also a PCI posting barrier which some devices may be explicitly
or implicitly relying on. Alan Cox did a survey and found evidence
that additionally some drivers may be racy on SMP without the bus
locking outb.
Switching to an inb() makes the timing too unpredictable and as such,
this DMI based switch should be the safest approach for now. Any more
invasive changes should get more rigid testing first. It's moreover
only very few machines with the problem and a DMI based hack seems
to fit that situation.
This does not change the io_delay() in the boot code which is using
the same port 0x80 I/O delay but those do not appear to be a problem
as David P. Reed reported the problem was already gone after using the
udelay(2) version of this. He moreover reported that booting with
"acpi=off" also fixed things and seeing as how ACPI isn't touched
until after this DMI based I/O port switch I believe it's safe to
leave the ones in the boot code be.
The DMI strings from David's HP Pavilion dv9000z are in there already
and we need to get/verify the DMI info from other machines with the
problem, notably the HP Pavilion dv6000z.
This patch is partly based on earlier patches from Pavel Machek and
David P. Reed.
Signed-off-by: Rene Herman <[email protected]>
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index b74d60d..288e162 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 6ea015a..43e5fcc 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a7bc93c..0cc1981 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 5a88890..08a68f0 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- i8253.o
+ i8253.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
new file mode 100644
index 0000000..d889c43
--- /dev/null
+++ b/arch/x86/kernel/io_delay.c
@@ -0,0 +1,54 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+/*
+ * Some machines get upset at port 0x80 writes which we use as
+ * an I/O delay. Allow for a DMI based override to alternate
+ * port 0xed.
+ */
+#define STANDARD_IO_DELAY_PORT 0x80
+#define ALTERNATE_IO_DELAY_PORT 0xed
+
+static unsigned short io_delay_port = STANDARD_IO_DELAY_PORT;
+
+void native_io_delay(void)
+{
+ asm volatile ("outb %%al, %w0" : : "d" (io_delay_port));
+}
+
+void slow_down_io(void) {
+ native_io_delay();
+#ifdef REALLY_SLOW_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
+#endif
+}
+EXPORT_SYMBOL(slow_down_io);
+
+static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id)
+{
+ printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
+ io_delay_port = ALTERNATE_IO_DELAY_PORT;
+ return 0;
+}
+
+static struct dmi_system_id __initdata alternate_io_delay_port_dmi_table[] = {
+ {
+ .callback = dmi_alternate_io_delay_port,
+ .ident = "HP Pavilion dv9000z",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B9")
+ }
+ },
+ {
+ }
+};
+
+void __init io_delay_init(void)
+{
+ dmi_check_system(alternate_io_delay_port_dmi_table);
+}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index e1e18c3..6c3a3b4 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();;
+
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 30d94d1..ec976ed 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_SMP
/* setup to use the static apicid table during kernel startup */
x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index fe881cd..5d4e5e5 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -250,24 +250,13 @@ static inline void flush_write_buffers(void)
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void io_delay_init(void);
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
#else
-
-static inline void slow_down_io(void) {
- native_io_delay();
-#ifdef REALLY_SLOW_IO
- native_io_delay();
- native_io_delay();
- native_io_delay();
-#endif
-}
-
+extern void slow_down_io(void);
#endif
#ifdef CONFIG_X86_NUMAQ
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index a037b07..486a110 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -35,13 +35,8 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
-
-#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
-#endif
+extern void io_delay_init(void);
+extern void slow_down_io(void);
/*
* Talk about misusing macros..
@@ -50,21 +45,21 @@
static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
* Rene Herman <[email protected]> wrote:
> Any ACKs, NAKs or further comments from others in this thread also
> welcome.
looks good to me. Could you please also provide three more controls that
i suggested earlier:
- a boot option enabling/disabling the udelay based code
- a .config method of enabling/disabling the udelay based code
- a sysctl to toggle it
if we want to clean this all up we'll need as many controls as possible.
Ingo
Rene Herman wrote:
>
> Well, I suppose. With stuff inline, constantly reloading dx also bloats
> things up a bit but yes, out of line who cares. Do you think this
> version is better?
>
It probably comes down to which version is bigger (you probably also
want to try uninlining.)
>> In the boot code, io_delay() is used to slow down accesses to the KBC,
>> interrupt controller, INT13h logic, and the NMI gate, and to provide a
>> fixed delay during A20 stabilization.
>
> Thanks for the heads up (also saw the SMBIOS update to this) but those
> don't seem to be a problem in fact. David Reed has been running with the
> simple udelay(2) version of this and reported no more hangs. He moreover
> reported no trouble after booting with "acpi=off" meaning that things
> seem to be fine pre-acpi which the boot code (and this io_delay_init)
> is. So I believe we get to ignore those.
Okay, so there is something inside ACPI which tickles this. Which
brings further credibility that it's activating a debugging hack,
probably inside the SuperIO/system controller chip.
It would be interesting to know exactly which part of ACPI triggers
this. I bet it is a reference to system controller namespace.
-hpa
Ingo Molnar wrote:
> * H. Peter Anvin <[email protected]> wrote:
>
>> Pavel Machek wrote:
>>>> this is also something for v2.6.24 merging.
>>> As much as I like this patch, I do not think it is suitable for
>>> .24. Too risky, I'd say.
>>>
>> No kidding! We're talking about removing a hack that has been
>> successful on thousands of pieces of hardware over 15 years because it
> ^----[*]
>> breaks ONE machine.
>
> [*] "- none of which needs it anymore -"
>
> there, fixed it for you ;-)
>
> So lets keep this in perspective: this is a hack that only helps on a
> very low number of systems. (the PIT of one PII era chipset is known to
> be affected)
Yes, but the status quo has been *tested* on thousands of systems and is
known to work. Thus, changing it puts things into unknown territory,
even if only a small number of machines actually need the current
configuration.
Heck, there are only a small number of 386/486 machines still in
operation and being actively updated.
> unfortunately this hack's side-effects are mis-used by an unknown number
> of drivers to mask PCI posting bugs. We want to figure out those bugs
> (safely and carefully) and we want to remove this hack from modern
> machines that dont need it. Doing anything else would be superstition.
>
> anyway, we likely wont be doing anything about this in .24.
Again, 24 is "right out". 25 is a "maybe", IMO. Rene's fix could be an
exception, since it is a DMI-keyed workaround for a specific machine and
doesn't change behaviour in general.
-hpa
The process of safely making delicate changes here is beyond my
responsibility as just a user - believe me, I'm not suggesting that a
risky fix be put in .24. I can patch my own kernels, and I can even
share an unofficial patch with others for now, or suggest that Fedora
and Ubuntu add it to their downstream.
May I make a small suggestion, though. If the decision is a DMI-keyed
switch from out-80 to udelay(2) gets put in, perhaps there should also
be a way for people to test their own configuration for the underlying
problem made available as a script. Though it is a "hack", all you
need to freeze a problem system is to run a loop doing about 1000 "cat
/dev/nvram > /dev/null" commands. If that leads to a freeze, one might
ask to have the motherboard added to the DMI-key list.
H. Peter Anvin wrote:
> Ingo Molnar wrote:
>> * H. Peter Anvin <[email protected]> wrote:
>>
>>> Pavel Machek wrote:
>>>>> this is also something for v2.6.24 merging.
>>>> As much as I like this patch, I do not think it is suitable for
>>>> .24. Too risky, I'd say.
>>>>
>>> No kidding! We're talking about removing a hack that has been
>>> successful on thousands of pieces of hardware over 15 years because it
>> ^----[*]
>>> breaks ONE machine.
>>
>> [*] "- none of which needs it anymore -"
>>
>> there, fixed it for you ;-)
>>
>> So lets keep this in perspective: this is a hack that only helps on a
>> very low number of systems. (the PIT of one PII era chipset is known
>> to be affected)
>
> Yes, but the status quo has been *tested* on thousands of systems and
> is known to work. Thus, changing it puts things into unknown
> territory, even if only a small number of machines actually need the
> current configuration.
>
> Heck, there are only a small number of 386/486 machines still in
> operation and being actively updated.
>
>> unfortunately this hack's side-effects are mis-used by an unknown
>> number of drivers to mask PCI posting bugs. We want to figure out
>> those bugs (safely and carefully) and we want to remove this hack
>> from modern machines that dont need it. Doing anything else would be
>> superstition.
>>
>> anyway, we likely wont be doing anything about this in .24.
>
> Again, 24 is "right out". 25 is a "maybe", IMO. Rene's fix could be
> an exception, since it is a DMI-keyed workaround for a specific
> machine and doesn't change behaviour in general.
>
> -hpa
>
Rene Herman wrote:
> David: I've plugged in your DMI values in this. Could you perhaps test
> this to confirm that it works for you?
>
Will test it by tomorrow morning.
Hi!
> The process of safely making delicate changes here is beyond my
> responsibility as just a user - believe me, I'm not suggesting that a risky
> fix be put in .24. I can patch my own kernels, and I can even share an
> unofficial patch with others for now, or suggest that Fedora and Ubuntu add
> it to their downstream.
>
> May I make a small suggestion, though. If the decision is a DMI-keyed
> switch from out-80 to udelay(2) gets put in, perhaps there should also be
> a way for people to test their own configuration for the underlying problem
> made available as a script. Though it is a "hack", all you need to freeze
> a problem system is to run a loop doing about 1000 "cat /dev/nvram >
> /dev/null" commands. If that leads to a freeze, one might ask to have the
> motherboard added to the DMI-key list.
Can you freeze it by catting /dev/rtc, too? That may be significant,
because that is readable for group audio (at least on some
systems)... which would smell like "small security hole" to me.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Pavel Machek wrote:
> Hi!
>
>> The process of safely making delicate changes here is beyond my
>> responsibility as just a user - believe me, I'm not suggesting that a risky
>> fix be put in .24. I can patch my own kernels, and I can even share an
>> unofficial patch with others for now, or suggest that Fedora and Ubuntu add
>> it to their downstream.
>>
>> May I make a small suggestion, though. If the decision is a DMI-keyed
>> switch from out-80 to udelay(2) gets put in, perhaps there should also be
>> a way for people to test their own configuration for the underlying problem
>> made available as a script. Though it is a "hack", all you need to freeze
>> a problem system is to run a loop doing about 1000 "cat /dev/nvram >
>> /dev/null" commands. If that leads to a freeze, one might ask to have the
>> motherboard added to the DMI-key list.
>
> Can you freeze it by catting /dev/rtc, too? That may be significant,
> because that is readable for group audio (at least on some
> systems)... which would smell like "small security hole" to me.
> Pavel
Heck, on my system (Fedora 7), it's mode 644...
-hpa
David P. Reed wrote:
> PS: If I have time, I may try to build Rene's port 80 test for Windows
> and run it under WinXP on this machine (I still have a crappy little
> partition that boots it). If it freezes the same way, it's almost
> certain a design "feature", and if it doesn't freeze, we might suspect
> that there is compensating logic in either Windows ACPI code or some way
> that windows "sets up" the machine.
You'd have to replace the iopl call to an equivalent one for Windows
(seems like NtSetInformationProcess(ProcessUserModeIOPL) might do what
you need).
--
Robert Hancock Saskatoon, SK, Canada
To email, remove "nospam" from [email protected]
Home Page: http://www.roberthancock.com/
Ingo Molnar wrote:
> * H. Peter Anvin <[email protected]> wrote:
>
>> Pavel Machek wrote:
>>>> this is also something for v2.6.24 merging.
>>> As much as I like this patch, I do not think it is suitable for
>>> .24. Too risky, I'd say.
>>>
>> No kidding! We're talking about removing a hack that has been
>> successful on thousands of pieces of hardware over 15 years because it
> ^----[*]
>> breaks ONE machine.
>
> [*] "- none of which needs it anymore -"
>
> there, fixed it for you ;-)
>
> So lets keep this in perspective: this is a hack that only helps on a
> very low number of systems. (the PIT of one PII era chipset is known to
> be affected)
>
> unfortunately this hack's side-effects are mis-used by an unknown number
> of drivers to mask PCI posting bugs. We want to figure out those bugs
> (safely and carefully) and we want to remove this hack from modern
> machines that dont need it. Doing anything else would be superstition.
Are there any such examples known of such drivers? It doesn't seem to
make much sense.. PCI IO writes are not posted on any known system (the
spec allows them to be posted in the host bus bridge, but if they were
they could only be flushed by a read, not a write) and PCI MMIO writes
are only guaranteed to flush by doing a read from that device, not by
other random port accesses. I suppose using the _p versions of port
accesses might happen to mask such problems on certain machines..
--
Robert Hancock Saskatoon, SK, Canada
To email, remove "nospam" from [email protected]
Home Page: http://www.roberthancock.com/
commit 5001121e449040aa9cc021f69bfb191662c13004
Author: Rene Herman <[email protected]>
Date: Sun Dec 16 13:36:39 2007 +0100
x86: provide a DMI based port 0x80 I/O delay override.
Certain (HP) laptops experience trouble from our port 0x80 I/O delay
writes. This patch provides for a DMI based switch to the "alternate
diagnostic port" 0xed (as used by some BIOSes as well) for these.
David P. Reed confirmed that port 0xed works for him and provides a
proper delay. The symptoms of _not_ working are a hanging machine,
with "hwclock" use being a direct trigger.
Earlier versions of this attempted to simply use udelay(2), with the
2 being a value tested to be a nicely conservative upper-bound with
help from many on the linux-kernel mailinglist but that approach has
two problems.
First, pre-loops_per_jiffy calibration (which is post PIT init while
some implementations of the PIT are actually one of the historically
problematic devices that need the delay) udelay() isn't particularly
well-defined. We could initialise loops_per_jiffy conservatively (and
based on CPU family so as to not unduly delay old machines) which
would sort of work, but...
Second, delaying isn't the only effect that a write to port 0x80 has.
It's also a PCI posting barrier which some devices may be explicitly
or implicitly relying on. Alan Cox did a survey and found evidence
that additionally some drivers may be racy on SMP without the bus
locking outb.
Switching to an inb() makes the timing too unpredictable and as such,
this DMI based switch should be the safest approach for now. Any more
invasive changes should get more rigid testing first. It's moreover
only very few machines with the problem and a DMI based hack seems
to fit that situation.
This also introduces a command-line parameter "io_delay" to override
the DMI based choice again:
io_delay=<standard|alternate>
where "standard" means using the standard port 0x80 and "alternate"
port 0xed.
At the request of Ingo Molnar this retains the udelay method as a
config (CONFIG_UDELAY_IO_DELAY) and command-line ("io_delay=udelay")
choice for testing purposes as well.
This does not change the io_delay() in the boot code which is using
the same port 0x80 I/O delay but those do not appear to be a problem
as David P. Reed reported the problem was already gone after using the
udelay version. He moreover reported that booting with "acpi=off" also
fixed things and seeing as how ACPI isn't touched until after this DMI
based I/O port switch I believe it's safe to leave the ones in the boot
code be.
The DMI strings from David's HP Pavilion dv9000z are in there already
and we need to get/verify the DMI info from other machines with the
problem, notably the HP Pavilion dv6000z.
This patch is partly based on earlier patches from Pavel Machek and
David P. Reed.
Signed-off-by: Rene Herman <[email protected]>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 33121d6..9dce154 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -785,6 +785,14 @@ and is between 256 and 4096 characters. It is defined in the file
for translation below 32 bit and if not available
then look in the higher range.
+ io_delay= [X86-32,X86-64] I/O delay method
+ standard
+ Standard port 0x80 delay
+ alternate
+ Alternate port 0xed delay
+ udelay
+ Simple two microsecond delay
+
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 761ca7b..40aba67 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -112,4 +112,13 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
+config UDELAY_IO_DELAY
+ bool "Delay I/O through udelay instead of outb"
+ depends on DEBUG_KERNEL
+ help
+ Make inb_p/outb_p use udelay() based delays by default. Please note
+ that udelay() does not have the same bus-level side-effects that
+ the normal outb based delay does meaning this could cause drivers
+ to change behaviour and/or bugs to surface.
+
endmenu
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index b74d60d..288e162 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 6ea015a..43e5fcc 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a7bc93c..0cc1981 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 5a88890..08a68f0 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- i8253.o
+ i8253.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
new file mode 100644
index 0000000..4d955e7
--- /dev/null
+++ b/arch/x86/kernel/io_delay.c
@@ -0,0 +1,106 @@
+/*
+ * I/O delay strategies for inb_p/outb_p
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+/*
+ * Allow for a DMI based override of port 0x80 needed for certain HP laptops
+ */
+#define IO_DELAY_PORT_STD 0x80
+#define IO_DELAY_PORT_ALT 0xed
+
+static void standard_io_delay(void)
+{
+ asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_STD));
+}
+
+static void alternate_io_delay(void)
+{
+ asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_ALT));
+}
+
+/*
+ * 2 usecs is an upper-bound for the outb delay but note that udelay doesn't
+ * have the bus-level side-effects that outb does
+ */
+#define IO_DELAY_USECS 2
+
+/*
+ * High on a hill was a lonely goatherd
+ */
+static void udelay_io_delay(void)
+{
+ udelay(IO_DELAY_USECS);
+}
+
+#ifndef CONFIG_UDELAY_IO_DELAY
+static void (*io_delay)(void) = standard_io_delay;
+#else
+static void (*io_delay)(void) = udelay_io_delay;
+#endif
+
+/*
+ * Paravirt wants native_io_delay to be a constant.
+ */
+void native_io_delay(void)
+{
+ io_delay();
+}
+EXPORT_SYMBOL(native_io_delay);
+
+#ifndef CONFIG_UDELAY_IO_DELAY
+static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id)
+{
+ printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
+ io_delay = alternate_io_delay;
+ return 0;
+}
+
+static struct dmi_system_id __initdata alternate_io_delay_port_dmi_table[] = {
+ {
+ .callback = dmi_alternate_io_delay_port,
+ .ident = "HP Pavilion dv9000z",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B9")
+ }
+ },
+ {
+ }
+};
+
+static int __initdata io_delay_override;
+
+void __init io_delay_init(void)
+{
+ if (!io_delay_override)
+ dmi_check_system(alternate_io_delay_port_dmi_table);
+}
+#endif
+
+static int __init io_delay_param(char *s)
+{
+ if (!s)
+ return -EINVAL;
+
+ if (!strcmp(s, "standard"))
+ io_delay = standard_io_delay;
+ else if (!strcmp(s, "alternate"))
+ io_delay = alternate_io_delay;
+ else if (!strcmp(s, "udelay"))
+ io_delay = udelay_io_delay;
+ else
+ return -EINVAL;
+
+#ifndef CONFIG_UDELAY_IO_DELAY
+ io_delay_override = 1;
+#endif
+ return 0;
+}
+
+early_param("io_delay", io_delay_param);
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index e1e18c3..6c3a3b4 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();;
+
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 30d94d1..ec976ed 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_SMP
/* setup to use the static apicid table during kernel startup */
x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index fe881cd..a8d25c3 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -250,10 +250,14 @@ static inline void flush_write_buffers(void)
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
+#ifndef CONFIG_UDELAY_IO_DELAY
+extern void io_delay_init(void);
+#else
+static inline void io_delay_init(void)
{
- asm volatile("outb %%al,$0x80" : : : "memory");
}
+#endif
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index a037b07..5bebaf9 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -35,13 +35,24 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+#ifndef CONFIG_UDELAY_IO_DELAY
+extern void io_delay_init(void);
+#else
+static inline void io_delay_init(void)
+{
+}
+#endif
+extern void native_io_delay(void);
+static inline void slow_down_io(void)
+{
+ native_io_delay();
#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
#endif
+}
/*
* Talk about misusing macros..
@@ -50,21 +61,21 @@
static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
On 16-12-07 22:42, H. Peter Anvin wrote:
> It probably comes down to which version is bigger (you probably also
> want to try uninlining.)
slow_down_io() sort of needs to stay inline due to the REALLY_SLOW_IO thing.
That stuff could use a cleanup, but that would be a diferent patch.
>> Thanks for the heads up (also saw the SMBIOS update to this) but those
>> don't seem to be a problem in fact. David Reed has been running with
>> the simple udelay(2) version of this and reported no more hangs. He
>> moreover reported no trouble after booting with "acpi=off" meaning
>> that things seem to be fine pre-acpi which the boot code (and this
>> io_delay_init) is. So I believe we get to ignore those.
>
> Okay, so there is something inside ACPI which tickles this. Which
> brings further credibility that it's activating a debugging hack,
> probably inside the SuperIO/system controller chip.
>
> It would be interesting to know exactly which part of ACPI triggers
> this. I bet it is a reference to system controller namespace.
Do you expect a BIOS update to be able to fix it? If so, I guess any DMI
hack should take BIOS version into account.
Rene.
On 16-12-07 22:43, H. Peter Anvin wrote:
> Again, 24 is "right out". 25 is a "maybe", IMO. Rene's fix could be an
> exception, since it is a DMI-keyed workaround for a specific machine and
> doesn't change behaviour in general.
I've not much opinion on the schedule as I've not the problem but yes, it's
intended as the low risk option.
Rene.
Rene Herman wrote:
>
> Do you expect a BIOS update to be able to fix it? If so, I guess any DMI
> hack should take BIOS version into account.
>
Hard to know without knowing what it is.
-hpa
commit 5001121e449040aa9cc021f69bfb191662c13004
Author: Rene Herman <[email protected]>
Date: Sun Dec 16 13:36:39 2007 +0100
x86: provide a DMI based port 0x80 I/O delay override.
Certain (HP) laptops experience trouble from our port 0x80 I/O delay
writes. This patch provides for a DMI based switch to the "alternate
diagnostic port" 0xed (as used by some BIOSes as well) for these.
David P. Reed confirmed that port 0xed works for him and provides a
proper delay. The symptoms of _not_ working are a hanging machine,
with "hwclock" use being a direct trigger.
Earlier versions of this attempted to simply use udelay(2), with the
2 being a value tested to be a nicely conservative upper-bound with
help from many on the linux-kernel mailinglist but that approach has
two problems.
First, pre-loops_per_jiffy calibration (which is post PIT init while
some implementations of the PIT are actually one of the historically
problematic devices that need the delay) udelay() isn't particularly
well-defined. We could initialise loops_per_jiffy conservatively (and
based on CPU family so as to not unduly delay old machines) which
would sort of work, but...
Second, delaying isn't the only effect that a write to port 0x80 has.
It's also a PCI posting barrier which some devices may be explicitly
or implicitly relying on. Alan Cox did a survey and found evidence
that additionally some drivers may be racy on SMP without the bus
locking outb.
Switching to an inb() makes the timing too unpredictable and as such,
this DMI based switch should be the safest approach for now. Any more
invasive changes should get more rigid testing first. It's moreover
only very few machines with the problem and a DMI based hack seems
to fit that situation.
This also introduces a command-line parameter "io_delay" to override
the DMI based choice again:
io_delay=<standard|alternate>
where "standard" means using the standard port 0x80 and "alternate"
port 0xed.
At the request of Ingo Molnar this retains the udelay method as a
config (CONFIG_UDELAY_IO_DELAY) and command-line ("io_delay=udelay")
choice for testing purposes as well.
This does not change the io_delay() in the boot code which is using
the same port 0x80 I/O delay but those do not appear to be a problem
as David P. Reed reported the problem was already gone after using the
udelay version. He moreover reported that booting with "acpi=off" also
fixed things and seeing as how ACPI isn't touched until after this DMI
based I/O port switch I believe it's safe to leave the ones in the boot
code be.
The DMI strings from David's HP Pavilion dv9000z are in there already
and we need to get/verify the DMI info from other machines with the
problem, notably the HP Pavilion dv6000z.
This patch is partly based on earlier patches from Pavel Machek and
David P. Reed.
Signed-off-by: Rene Herman <[email protected]>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 33121d6..9dce154 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -785,6 +785,14 @@ and is between 256 and 4096 characters. It is defined in the file
for translation below 32 bit and if not available
then look in the higher range.
+ io_delay= [X86-32,X86-64] I/O delay method
+ standard
+ Standard port 0x80 delay
+ alternate
+ Alternate port 0xed delay
+ udelay
+ Simple two microsecond delay
+
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 761ca7b..40aba67 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -112,4 +112,13 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
+config UDELAY_IO_DELAY
+ bool "Delay I/O through udelay instead of outb"
+ depends on DEBUG_KERNEL
+ help
+ Make inb_p/outb_p use udelay() based delays by default. Please note
+ that udelay() does not have the same bus-level side-effects that
+ the normal outb based delay does meaning this could cause drivers
+ to change behaviour and/or bugs to surface.
+
endmenu
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index b74d60d..288e162 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 6ea015a..43e5fcc 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a7bc93c..0cc1981 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 5a88890..08a68f0 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- i8253.o
+ i8253.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
new file mode 100644
index 0000000..4d955e7
--- /dev/null
+++ b/arch/x86/kernel/io_delay.c
@@ -0,0 +1,106 @@
+/*
+ * I/O delay strategies for inb_p/outb_p
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+/*
+ * Allow for a DMI based override of port 0x80 needed for certain HP laptops
+ */
+#define IO_DELAY_PORT_STD 0x80
+#define IO_DELAY_PORT_ALT 0xed
+
+static void standard_io_delay(void)
+{
+ asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_STD));
+}
+
+static void alternate_io_delay(void)
+{
+ asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_ALT));
+}
+
+/*
+ * 2 usecs is an upper-bound for the outb delay but note that udelay doesn't
+ * have the bus-level side-effects that outb does
+ */
+#define IO_DELAY_USECS 2
+
+/*
+ * High on a hill was a lonely goatherd
+ */
+static void udelay_io_delay(void)
+{
+ udelay(IO_DELAY_USECS);
+}
+
+#ifndef CONFIG_UDELAY_IO_DELAY
+static void (*io_delay)(void) = standard_io_delay;
+#else
+static void (*io_delay)(void) = udelay_io_delay;
+#endif
+
+/*
+ * Paravirt wants native_io_delay to be a constant.
+ */
+void native_io_delay(void)
+{
+ io_delay();
+}
+EXPORT_SYMBOL(native_io_delay);
+
+#ifndef CONFIG_UDELAY_IO_DELAY
+static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id)
+{
+ printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
+ io_delay = alternate_io_delay;
+ return 0;
+}
+
+static struct dmi_system_id __initdata alternate_io_delay_port_dmi_table[] = {
+ {
+ .callback = dmi_alternate_io_delay_port,
+ .ident = "HP Pavilion dv9000z",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B9")
+ }
+ },
+ {
+ }
+};
+
+static int __initdata io_delay_override;
+
+void __init io_delay_init(void)
+{
+ if (!io_delay_override)
+ dmi_check_system(alternate_io_delay_port_dmi_table);
+}
+#endif
+
+static int __init io_delay_param(char *s)
+{
+ if (!s)
+ return -EINVAL;
+
+ if (!strcmp(s, "standard"))
+ io_delay = standard_io_delay;
+ else if (!strcmp(s, "alternate"))
+ io_delay = alternate_io_delay;
+ else if (!strcmp(s, "udelay"))
+ io_delay = udelay_io_delay;
+ else
+ return -EINVAL;
+
+#ifndef CONFIG_UDELAY_IO_DELAY
+ io_delay_override = 1;
+#endif
+ return 0;
+}
+
+early_param("io_delay", io_delay_param);
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index e1e18c3..6c3a3b4 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();;
+
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 30d94d1..ec976ed 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_SMP
/* setup to use the static apicid table during kernel startup */
x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index fe881cd..a8d25c3 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -250,10 +250,14 @@ static inline void flush_write_buffers(void)
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
+#ifndef CONFIG_UDELAY_IO_DELAY
+extern void io_delay_init(void);
+#else
+static inline void io_delay_init(void)
{
- asm volatile("outb %%al,$0x80" : : : "memory");
}
+#endif
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index a037b07..5bebaf9 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -35,13 +35,24 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+#ifndef CONFIG_UDELAY_IO_DELAY
+extern void io_delay_init(void);
+#else
+static inline void io_delay_init(void)
+{
+}
+#endif
+extern void native_io_delay(void);
+static inline void slow_down_io(void)
+{
+ native_io_delay();
#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
#endif
+}
/*
* Talk about misusing macros..
@@ -50,21 +61,21 @@
static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
Rene Herman wrote:
> On 17-12-07 00:12, David P. Reed wrote:
>
>> Rene Herman wrote:
>>> David: I've plugged in your DMI values in this. Could you perhaps
>>> test this to confirm that it works for you?
>>>
>> Will test it by tomorrow morning.
>
> Might as well test the new version then. Ingo Molnar requested a few
> changes and this fixes a couple of problems as well.
>
As far as I can tell, the code still uses udelay() before calibration if
io_delay=udelay?
Just so we're clear on that...
-hpa
Rene Herman wrote:
> On 16-12-07 16:22, Ingo Molnar wrote:
>
>> looks good to me. Could you please also provide three more controls
>> that i suggested earlier:
>>
>> - a boot option enabling/disabling the udelay based code
>> - a .config method of enabling/disabling the udelay based code
>> - a sysctl to toggle it
>>
>> if we want to clean this all up we'll need as many controls as possible.
>
> This version does the boot and the .config option but not the sysctl. It
> makes for clumsy code and I don't believe it provides for much added
> value as soon as you have the boot option. I am moreover not completely
> confident about things such as paravirt liking the possibility of the
> native_io_delay being changed out from under them at unpredictable times.
>
Incidentally, I had the thought earlier today that port 0xf0 might be a
suitable delay port. It is used only by the 387-emulating-a-287 hack
for IRQ 13, which Linux doesn't use on 486+.
-hpa
On 17-12-07 03:04, H. Peter Anvin wrote:
> Rene Herman wrote:
>> On 17-12-07 00:12, David P. Reed wrote:
>>
>>> Rene Herman wrote:
>>>> David: I've plugged in your DMI values in this. Could you perhaps
>>>> test this to confirm that it works for you?
>>>>
>>> Will test it by tomorrow morning.
>>
>> Might as well test the new version then. Ingo Molnar requested a few
>> changes and this fixes a couple of problems as well.
>>
>
> As far as I can tell, the code still uses udelay() before calibration if
> io_delay=udelay?
>
> Just so we're clear on that...
Yes. This patch is explicitly about the alternate port and not about udelay.
As discussed (and changelogged) the calibration is just one problem with
PCI posting and possible SMP races the other ones. Ingo Molnar wanted it as
a debugging thing already though.
Once we start discussing udelay() again I believe we should go with the
simple per CPU-Family loops_per_jiffy initialization to fix that first
problem (and I guess I could hack that in now) but then the bigger problem
remains and will need a fair amount of testing at least and mostly on
machines that are by now gathering dust in a few basements...
Rene
On 17-12-07 03:05, H. Peter Anvin wrote:
> Incidentally, I had the thought earlier today that port 0xf0 might be a
> suitable delay port. It is used only by the 387-emulating-a-287 hack
> for IRQ 13, which Linux doesn't use on 486+.
rene@7ixe4:~/src/port80$ su -c ./port80
cycles: out 2400, in 2400
rene@7ixe4:~/src/port80$ su -c ./portf0
cycles: out 2400, in 2400
(Duron 1300)
I suppose you mean using it instead of port 0x80 always and not just as an
alternate port? For the latter 0xed is alright I guess...
Rene.
Rene Herman wrote:
> On 17-12-07 03:05, H. Peter Anvin wrote:
>
>> Incidentally, I had the thought earlier today that port 0xf0 might be
>> a suitable delay port. It is used only by the 387-emulating-a-287
>> hack for IRQ 13, which Linux doesn't use on 486+.
>
> rene@7ixe4:~/src/port80$ su -c ./port80
> cycles: out 2400, in 2400
> rene@7ixe4:~/src/port80$ su -c ./portf0
> cycles: out 2400, in 2400
>
> (Duron 1300)
>
> I suppose you mean using it instead of port 0x80 always and not just as
> an alternate port? For the latter 0xed is alright I guess...
>
Well, we probably should leave the possibility in to use 0x80 -- for one
thing, we need to use 0x80 on 386, and there is always the possibility
that the switch will have different timing properties on some or all
machines.
Note that this doesn't require that a machine actually implements port
0xf0 for FERR/IGNNE, it just requires that they don't use it for
something else.
I would be rather inclined to try using port 0xf0 by default as long as
family > 3[*] (with fallback to port 0x80) at least experimentally (-mm).
We *might* even be able to use port 0xf0 unconditionally in the setup
code, since we're not using the FPU there (the only FPU instructions in
the setup code are there to detect the FPU.)
One thing: although I believe most actual implementations of port 0xf0
implement it as a strobe alone (data is ignored), all documentation I've
found, including "The Undocumented PC" specifically says "write 0x00 to
this port." This *could* mean there are platforms which use other
values than 0x00 for other hacks.
-hpa
[*] The following statements are equivalent:
- family > 3.
- CR0.NE is settable.
- EFLAGS.AC is settable.
Rene Herman wrote:
> On 17-12-07 03:05, H. Peter Anvin wrote:
>
>> Incidentally, I had the thought earlier today that port 0xf0 might be
>> a suitable delay port. It is used only by the 387-emulating-a-287
>> hack for IRQ 13, which Linux doesn't use on 486+.
>
> rene@7ixe4:~/src/port80$ su -c ./port80
> cycles: out 2400, in 2400
> rene@7ixe4:~/src/port80$ su -c ./portf0
> cycles: out 2400, in 2400
>
> (Duron 1300)
>
> I suppose you mean using it instead of port 0x80 always and not just as
> an alternate port? For the latter 0xed is alright I guess...
>
FWIW, the criterion used in the kernel for when to use IRQ 13 is:
/*
* External FPU? Set up irq13 if so, for
* original braindamaged IBM FERR coupling.
*/
if (boot_cpu_data.hard_math && !cpu_has_fpu)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
setup_irq(FPU_IRQ, &fpu_irq);
In that case we can't actually use port 0xF0 (it is, however, safe to
use it during setup, specifically before we can take our first FPU
exception.)
-hpa
* Rene Herman <[email protected]> wrote:
> On 16-12-07 16:22, Ingo Molnar wrote:
>
>> looks good to me. Could you please also provide three more controls that i
>> suggested earlier:
>>
>> - a boot option enabling/disabling the udelay based code
>> - a .config method of enabling/disabling the udelay based code
>> - a sysctl to toggle it
>>
>> if we want to clean this all up we'll need as many controls as possible.
>
> This version does the boot and the .config option but not the sysctl.
> It makes for clumsy code and I don't believe it provides for much
> added value as soon as you have the boot option. I am moreover not
> completely confident about things such as paravirt liking the
> possibility of the native_io_delay being changed out from under them
> at unpredictable times.
>
> So how is this? Also fixes a few problems with the previous version.
thanks Rene! I've added your patch to x86.git. I changed a few things
ontop of it, see the additional changelog and delta patch below.
Ingo
------------>
- add the io_delay=none method
- make each method selectable from the kernel config
- simplify the delay code a bit by getting rid of an indirect function call
- add the /proc/sys/kernel/io_delay_type sysctl
- change 'standard' and 'alternate' to 0x80 and 0xed
- make the io delay config not depend on CONFIG_DEBUG_KERNEL
---
Documentation/kernel-parameters.txt | 12 ++--
arch/x86/Kconfig.debug | 79 +++++++++++++++++++++++++--
arch/x86/kernel/io_delay.c | 103 ++++++++++++++++--------------------
include/asm-x86/io_32.h | 2
include/asm-x86/io_64.h | 2
kernel/sysctl.c | 9 +++
6 files changed, 138 insertions(+), 69 deletions(-)
Index: linux-x86.q/Documentation/kernel-parameters.txt
===================================================================
--- linux-x86.q.orig/Documentation/kernel-parameters.txt
+++ linux-x86.q/Documentation/kernel-parameters.txt
@@ -786,12 +786,14 @@ and is between 256 and 4096 characters.
then look in the higher range.
io_delay= [X86-32,X86-64] I/O delay method
- standard
- Standard port 0x80 delay
- alternate
- Alternate port 0xed delay
+ 0x80
+ Standard port 0x80 based delay
+ 0xed
+ Alternate port 0xed based delay (needed on some systems)
udelay
- Simple two microsecond delay
+ Simple two microseconds delay
+ none
+ No delay
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
Index: linux-x86.q/arch/x86/Kconfig.debug
===================================================================
--- linux-x86.q.orig/arch/x86/Kconfig.debug
+++ linux-x86.q/arch/x86/Kconfig.debug
@@ -112,13 +112,78 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
-config UDELAY_IO_DELAY
- bool "Delay I/O through udelay instead of outb"
- depends on DEBUG_KERNEL
+#
+# IO delay types:
+#
+
+config IO_DELAY_TYPE_0X80
+ int
+ default "0"
+
+config IO_DELAY_TYPE_0XED
+ int
+ default "1"
+
+config IO_DELAY_TYPE_UDELAY
+ int
+ default "2"
+
+config IO_DELAY_TYPE_NONE
+ int
+ default "3"
+
+choice
+ prompt "IO delay type"
+ default IO_DELAY_0X80
+
+config IO_DELAY_0X80
+ bool "port 0x80 based port-IO delay [recommended]"
+ help
+ This is the traditional Linux IO delay used for in/out_p.
+ It is the most tested hence safest selection here.
+
+config IO_DELAY_0XED
+ bool "port 0xed based port-IO delay"
help
- Make inb_p/outb_p use udelay() based delays by default. Please note
- that udelay() does not have the same bus-level side-effects that
- the normal outb based delay does meaning this could cause drivers
- to change behaviour and/or bugs to surface.
+ Use port 0xed as the IO delay. This frees up port 0x80 which is
+ often used as a hardware-debug port.
+
+config IO_DELAY_UDELAY
+ bool "udelay based port-IO delay"
+ help
+ Use udelay(2) as the IO delay method. This provides the delay
+ while not having any side-effect on the IO port space.
+
+config IO_DELAY_NONE
+ bool "no port-IO delay"
+ help
+ No port-IO delay. Will break on old boxes that require port-IO
+ delay for certain operations. Should work on most new machines.
+
+endchoice
+
+if IO_DELAY_0X80
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_0X80
+endif
+
+if IO_DELAY_0XED
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_0XED
+endif
+
+if IO_DELAY_UDELAY
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_UDELAY
+endif
+
+if IO_DELAY_NONE
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_NONE
+endif
endmenu
Index: linux-x86.q/arch/x86/kernel/io_delay.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/io_delay.c
+++ linux-x86.q/arch/x86/kernel/io_delay.c
@@ -1,5 +1,9 @@
/*
* I/O delay strategies for inb_p/outb_p
+ *
+ * Allow for a DMI based override of port 0x80, needed for certain HP laptops
+ * and possibly other systems. Also allow for the gradual elimination of
+ * outb_p/inb_p API uses.
*/
#include <linux/kernel.h>
#include <linux/module.h>
@@ -8,98 +12,83 @@
#include <linux/dmi.h>
#include <asm/io.h>
-/*
- * Allow for a DMI based override of port 0x80 needed for certain HP laptops
- */
-#define IO_DELAY_PORT_STD 0x80
-#define IO_DELAY_PORT_ALT 0xed
-
-static void standard_io_delay(void)
-{
- asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_STD));
-}
-
-static void alternate_io_delay(void)
-{
- asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_ALT));
-}
-
-/*
- * 2 usecs is an upper-bound for the outb delay but note that udelay doesn't
- * have the bus-level side-effects that outb does
- */
-#define IO_DELAY_USECS 2
+int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
+EXPORT_SYMBOL_GPL(io_delay_type);
-/*
- * High on a hill was a lonely goatherd
- */
-static void udelay_io_delay(void)
-{
- udelay(IO_DELAY_USECS);
-}
-
-#ifndef CONFIG_UDELAY_IO_DELAY
-static void (*io_delay)(void) = standard_io_delay;
-#else
-static void (*io_delay)(void) = udelay_io_delay;
-#endif
+static int __initdata io_delay_override;
/*
* Paravirt wants native_io_delay to be a constant.
*/
void native_io_delay(void)
{
- io_delay();
+ switch (io_delay_type) {
+ default:
+ case CONFIG_IO_DELAY_TYPE_0X80:
+ asm volatile ("outb %al, $0x80");
+ break;
+ case CONFIG_IO_DELAY_TYPE_0XED:
+ asm volatile ("outb %al, $0xed");
+ break;
+ case CONFIG_IO_DELAY_TYPE_UDELAY:
+ /*
+ * 2 usecs is an upper-bound for the outb delay but
+ * note that udelay doesn't have the bus-level
+ * side-effects that outb does, nor does udelay() have
+ * precise timings during very early bootup (the delays
+ * are shorter until calibrated):
+ */
+ udelay(2);
+ case CONFIG_IO_DELAY_TYPE_NONE:
+ break;
+ }
}
EXPORT_SYMBOL(native_io_delay);
-#ifndef CONFIG_UDELAY_IO_DELAY
-static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id)
+static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
{
- printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
- io_delay = alternate_io_delay;
+ printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", id->ident);
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+
return 0;
}
-static struct dmi_system_id __initdata alternate_io_delay_port_dmi_table[] = {
+/*
+ * Quirk table for systems that misbehave (lock up, etc.) if port
+ * 0x80 is used:
+ */
+static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
{
- .callback = dmi_alternate_io_delay_port,
+ .callback = dmi_io_delay_0xed_port,
.ident = "HP Pavilion dv9000z",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
DMI_MATCH(DMI_BOARD_NAME, "30B9")
}
},
- {
- }
+ { }
};
-static int __initdata io_delay_override;
-
void __init io_delay_init(void)
{
if (!io_delay_override)
- dmi_check_system(alternate_io_delay_port_dmi_table);
+ dmi_check_system(io_delay_0xed_port_dmi_table);
}
-#endif
static int __init io_delay_param(char *s)
{
- if (!s)
- return -EINVAL;
-
- if (!strcmp(s, "standard"))
- io_delay = standard_io_delay;
- else if (!strcmp(s, "alternate"))
- io_delay = alternate_io_delay;
+ if (!strcmp(s, "0x80"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
+ else if (!strcmp(s, "0xed"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
else if (!strcmp(s, "udelay"))
- io_delay = udelay_io_delay;
+ io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY;
+ else if (!strcmp(s, "none"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_NONE;
else
return -EINVAL;
-#ifndef CONFIG_UDELAY_IO_DELAY
io_delay_override = 1;
-#endif
return 0;
}
Index: linux-x86.q/include/asm-x86/io_32.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_32.h
+++ linux-x86.q/include/asm-x86/io_32.h
@@ -259,6 +259,8 @@ static inline void io_delay_init(void)
#endif
extern void native_io_delay(void);
+extern int io_delay_type;
+
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
#else
Index: linux-x86.q/include/asm-x86/io_64.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_64.h
+++ linux-x86.q/include/asm-x86/io_64.h
@@ -44,6 +44,8 @@ static inline void io_delay_init(void)
#endif
extern void native_io_delay(void);
+extern int io_delay_type;
+
static inline void slow_down_io(void)
{
native_io_delay();
Index: linux-x86.q/kernel/sysctl.c
===================================================================
--- linux-x86.q.orig/kernel/sysctl.c
+++ linux-x86.q/kernel/sysctl.c
@@ -53,6 +53,7 @@
#ifdef CONFIG_X86
#include <asm/nmi.h>
#include <asm/stacktrace.h>
+#include <asm/io.h>
#endif
static int deprecated_sysctl_warning(struct __sysctl_args *args);
@@ -683,6 +684,14 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "io_delay_type",
+ .data = &io_delay_type,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#endif
#if defined(CONFIG_MMU)
{
* Ingo Molnar <[email protected]> wrote:
> > So how is this? Also fixes a few problems with the previous version.
>
> thanks Rene! I've added your patch to x86.git. I changed a few things
> ontop of it, see the additional changelog and delta patch below.
here's an updated rollup patch, against 2.6.24-rc4. David, could you
please try this? This should work out of box on your system, without any
boot option or other tweak needed.
Ingo
------------------------->
Subject: x86: provide a DMI based port 0x80 I/O delay override.
From: Rene Herman <[email protected]>
x86: provide a DMI based port 0x80 I/O delay override.
Certain (HP) laptops experience trouble from our port 0x80 I/O delay
writes. This patch provides for a DMI based switch to the "alternate
diagnostic port" 0xed (as used by some BIOSes as well) for these.
David P. Reed confirmed that port 0xed works for him and provides a
proper delay. The symptoms of _not_ working are a hanging machine,
with "hwclock" use being a direct trigger.
Earlier versions of this attempted to simply use udelay(2), with the
2 being a value tested to be a nicely conservative upper-bound with
help from many on the linux-kernel mailinglist but that approach has
two problems.
First, pre-loops_per_jiffy calibration (which is post PIT init while
some implementations of the PIT are actually one of the historically
problematic devices that need the delay) udelay() isn't particularly
well-defined. We could initialise loops_per_jiffy conservatively (and
based on CPU family so as to not unduly delay old machines) which
would sort of work, but...
Second, delaying isn't the only effect that a write to port 0x80 has.
It's also a PCI posting barrier which some devices may be explicitly
or implicitly relying on. Alan Cox did a survey and found evidence
that additionally some drivers may be racy on SMP without the bus
locking outb.
Switching to an inb() makes the timing too unpredictable and as such,
this DMI based switch should be the safest approach for now. Any more
invasive changes should get more rigid testing first. It's moreover
only very few machines with the problem and a DMI based hack seems
to fit that situation.
This also introduces a command-line parameter "io_delay" to override
the DMI based choice again:
io_delay=<0x80|0xed|udelay|none>
where 0x80 means using the standard port 0x80 and 0xed means the
alternate port 0xed.
All these methods can also be selected via the kernel .config,
and can be runtime tuned via /proc/sys/kernel/io_delay_type (for
debugging purposes).
The DMI strings from David's HP Pavilion dv9000z are in there already
and we need to get/verify the DMI info from other machines with the
problem, notably the HP Pavilion dv6000z.
This patch is partly based on earlier patches from Pavel Machek and
David P. Reed.
[ [email protected]:
- add the io_delay=none method
- make each method selectable from the kernel config
- eliminate the indirect function calls
- add the /proc/sys/kernel/io_delay_type sysctl
- change 'standard' and 'alternate' to 0x80 and 0xed
- make the io delay config not depend on CONFIG_DEBUG_KERNEL ]
Signed-off-by: Rene Herman <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
Documentation/kernel-parameters.txt | 10 +++
arch/x86/Kconfig.debug | 74 ++++++++++++++++++++++++++++
arch/x86/boot/compressed/misc_32.c | 8 +--
arch/x86/boot/compressed/misc_64.c | 8 +--
arch/x86/kernel/Makefile_32 | 2
arch/x86/kernel/Makefile_64 | 2
arch/x86/kernel/io_delay.c | 95 ++++++++++++++++++++++++++++++++++++
arch/x86/kernel/setup_32.c | 2
arch/x86/kernel/setup_64.c | 2
include/asm-x86/io_32.h | 8 +--
include/asm-x86/io_64.h | 29 ++++++----
kernel/sysctl.c | 9 +++
12 files changed, 224 insertions(+), 25 deletions(-)
Index: linux-x86.q/Documentation/kernel-parameters.txt
===================================================================
--- linux-x86.q.orig/Documentation/kernel-parameters.txt
+++ linux-x86.q/Documentation/kernel-parameters.txt
@@ -785,6 +785,16 @@ and is between 256 and 4096 characters.
for translation below 32 bit and if not available
then look in the higher range.
+ io_delay= [X86-32,X86-64] I/O delay method
+ 0x80
+ Standard port 0x80 based delay
+ 0xed
+ Alternate port 0xed based delay (needed on some systems)
+ udelay
+ Simple two microseconds delay
+ none
+ No delay
+
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
Index: linux-x86.q/arch/x86/Kconfig.debug
===================================================================
--- linux-x86.q.orig/arch/x86/Kconfig.debug
+++ linux-x86.q/arch/x86/Kconfig.debug
@@ -112,4 +112,78 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
+#
+# IO delay types:
+#
+
+config IO_DELAY_TYPE_0X80
+ int
+ default "0"
+
+config IO_DELAY_TYPE_0XED
+ int
+ default "1"
+
+config IO_DELAY_TYPE_UDELAY
+ int
+ default "2"
+
+config IO_DELAY_TYPE_NONE
+ int
+ default "3"
+
+choice
+ prompt "IO delay type"
+ default IO_DELAY_0X80
+
+config IO_DELAY_0X80
+ bool "port 0x80 based port-IO delay [recommended]"
+ help
+ This is the traditional Linux IO delay used for in/out_p.
+ It is the most tested hence safest selection here.
+
+config IO_DELAY_0XED
+ bool "port 0xed based port-IO delay"
+ help
+ Use port 0xed as the IO delay. This frees up port 0x80 which is
+ often used as a hardware-debug port.
+
+config IO_DELAY_UDELAY
+ bool "udelay based port-IO delay"
+ help
+ Use udelay(2) as the IO delay method. This provides the delay
+ while not having any side-effect on the IO port space.
+
+config IO_DELAY_NONE
+ bool "no port-IO delay"
+ help
+ No port-IO delay. Will break on old boxes that require port-IO
+ delay for certain operations. Should work on most new machines.
+
+endchoice
+
+if IO_DELAY_0X80
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_0X80
+endif
+
+if IO_DELAY_0XED
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_0XED
+endif
+
+if IO_DELAY_UDELAY
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_UDELAY
+endif
+
+if IO_DELAY_NONE
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_NONE
+endif
+
endmenu
Index: linux-x86.q/arch/x86/boot/compressed/misc_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_32.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/boot/compressed/misc_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_64.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/kernel/Makefile_32
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/Makefile_32
+++ linux-x86.q/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
Index: linux-x86.q/arch/x86/kernel/Makefile_64
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/Makefile_64
+++ linux-x86.q/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_
x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- i8253.o
+ i8253.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
Index: linux-x86.q/arch/x86/kernel/io_delay.c
===================================================================
--- /dev/null
+++ linux-x86.q/arch/x86/kernel/io_delay.c
@@ -0,0 +1,95 @@
+/*
+ * I/O delay strategies for inb_p/outb_p
+ *
+ * Allow for a DMI based override of port 0x80, needed for certain HP laptops
+ * and possibly other systems. Also allow for the gradual elimination of
+ * outb_p/inb_p API uses.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
+EXPORT_SYMBOL_GPL(io_delay_type);
+
+static int __initdata io_delay_override;
+
+/*
+ * Paravirt wants native_io_delay to be a constant.
+ */
+void native_io_delay(void)
+{
+ switch (io_delay_type) {
+ default:
+ case CONFIG_IO_DELAY_TYPE_0X80:
+ asm volatile ("outb %al, $0x80");
+ break;
+ case CONFIG_IO_DELAY_TYPE_0XED:
+ asm volatile ("outb %al, $0xed");
+ break;
+ case CONFIG_IO_DELAY_TYPE_UDELAY:
+ /*
+ * 2 usecs is an upper-bound for the outb delay but
+ * note that udelay doesn't have the bus-level
+ * side-effects that outb does, nor does udelay() have
+ * precise timings during very early bootup (the delays
+ * are shorter until calibrated):
+ */
+ udelay(2);
+ case CONFIG_IO_DELAY_TYPE_NONE:
+ break;
+ }
+}
+EXPORT_SYMBOL(native_io_delay);
+
+static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
+{
+ printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", id->ident);
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+
+ return 0;
+}
+
+/*
+ * Quirk table for systems that misbehave (lock up, etc.) if port
+ * 0x80 is used:
+ */
+static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
+ {
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "HP Pavilion dv9000z",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B9")
+ }
+ },
+ { }
+};
+
+void __init io_delay_init(void)
+{
+ if (!io_delay_override)
+ dmi_check_system(io_delay_0xed_port_dmi_table);
+}
+
+static int __init io_delay_param(char *s)
+{
+ if (!strcmp(s, "0x80"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
+ else if (!strcmp(s, "0xed"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ else if (!strcmp(s, "udelay"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY;
+ else if (!strcmp(s, "none"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_NONE;
+ else
+ return -EINVAL;
+
+ io_delay_override = 1;
+ return 0;
+}
+
+early_param("io_delay", io_delay_param);
Index: linux-x86.q/arch/x86/kernel/setup_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/setup_32.c
+++ linux-x86.q/arch/x86/kernel/setup_32.c
@@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();;
+
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
Index: linux-x86.q/arch/x86/kernel/setup_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/setup_64.c
+++ linux-x86.q/arch/x86/kernel/setup_64.c
@@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_SMP
/* setup to use the static apicid table during kernel startup */
x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
Index: linux-x86.q/include/asm-x86/io_32.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_32.h
+++ linux-x86.q/include/asm-x86/io_32.h
@@ -250,10 +250,10 @@ static inline void flush_write_buffers(v
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void native_io_delay(void);
+
+extern int io_delay_type;
+extern void io_delay_init(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
Index: linux-x86.q/include/asm-x86/io_64.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_64.h
+++ linux-x86.q/include/asm-x86/io_64.h
@@ -35,13 +35,20 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+extern void native_io_delay(void);
+extern int io_delay_type;
+extern void io_delay_init(void);
+
+static inline void slow_down_io(void)
+{
+ native_io_delay();
#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
#endif
+}
/*
* Talk about misusing macros..
@@ -50,21 +57,21 @@
static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
Index: linux-x86.q/kernel/sysctl.c
===================================================================
--- linux-x86.q.orig/kernel/sysctl.c
+++ linux-x86.q/kernel/sysctl.c
@@ -53,6 +53,7 @@
#ifdef CONFIG_X86
#include <asm/nmi.h>
#include <asm/stacktrace.h>
+#include <asm/io.h>
#endif
static int deprecated_sysctl_warning(struct __sysctl_args *args);
@@ -683,6 +684,14 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "io_delay_type",
+ .data = &io_delay_type,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#endif
#if defined(CONFIG_MMU)
{
On 17-12-07 11:57, Ingo Molnar wrote:
> thanks Rene! I've added your patch to x86.git. I changed a few things
> ontop of it, see the additional changelog and delta patch below.
"appropriated it", more. Definitely not going to forgive you for deleting
that comment.
> void native_io_delay(void)
> {
> - io_delay();
> + switch (io_delay_type) {
That's the clumsy bit. native_io_delay() used to be an inline outb, now it's
a switch. Yes, sure, versus an indirect call it's not actually worse, except
as an uglification.
> -#ifndef CONFIG_UDELAY_IO_DELAY
> -static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id)
> +static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
> {
> - printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
> - io_delay = alternate_io_delay;
> + printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", id->ident);
> + io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
> +
> return 0;
> }
This isn't correct. DMI shouldn't override the CONFIG choice or someone with
matching DMI will have a defective CONFIG option. That's why I put all of it
inside #ifndef.
Rene.
On 17-12-07 04:35, H. Peter Anvin wrote:
> Well, we probably should leave the possibility in to use 0x80 -- for one
> thing, we need to use 0x80 on 386, and there is always the possibility
> that the switch will have different timing properties on some or all
> machines.
>
> Note that this doesn't require that a machine actually implements port
> 0xf0 for FERR/IGNNE, it just requires that they don't use it for
> something else.
>
> I would be rather inclined to try using port 0xf0 by default as long as
> family > 3[*] (with fallback to port 0x80) at least experimentally (-mm).
Possible timing differences would be what worry me. 0x80 is well-known for
its delay purposes, and frankly, I dont believe that one type of machine
having a problem, which may very well have to be categorized a possibly BIOS
fixable bug, is enough ground for switching everyone over to a different port
It's enough ground to look at not doing outputs at all AFAIC but that's more
due to the outb being somewhat cheesy to start with which using a different
port wouldn't change. But, on the other hand:
> We *might* even be able to use port 0xf0 unconditionally in the setup
> code, since we're not using the FPU there (the only FPU instructions in
> the setup code are there to detect the FPU.)
>
> One thing: although I believe most actual implementations of port 0xf0
> implement it as a strobe alone (data is ignored), all documentation I've
> found, including "The Undocumented PC" specifically says "write 0x00 to
> this port." This *could* mean there are platforms which use other
> values than 0x00 for other hacks.
The Intel PIIX/PIIX3 datasheet confirms that the data is of no consequence,
but yes, most documentation talks about 0.
The PIIX/PIIX3 datasheet also says that both reads and writes flow through
to the ISA bus, while for port 0x80 only writes do, and reads do not.
I do not know how universal that is, but _reading_ port 0xf0 might in fact
be sensible then? And should even work on a 386/387 pair? (I have a 386/387
in fact, although I'd need to dig it up).
Versus the out it has the al clobber disadvantage, but givne that we're by
now seem to be talking about out of line switch() native_io_delays anyways,
that's not much of a problem anymore...
> [*] The following statements are equivalent:
> - family > 3.
> - CR0.NE is settable.
> - EFLAGS.AC is settable.
For the boot code, I gather (which could I suppose then also plug in the
delay port in the zero page or somewhere for use by the kernel proper? I
don't know how/if these bits communicate).
But, well, _reading_ port 0xf0 sounds promising across the board and low
risk replacement _if_ teh PIIX/PIIX3 behaviour is as guaranteed as the port
0x80 behaviour...
Rene.
* Rene Herman <[email protected]> wrote:
> On 17-12-07 11:57, Ingo Molnar wrote:
>
>> thanks Rene! I've added your patch to x86.git. I changed a few things
>> ontop of it, see the additional changelog and delta patch below.
>
> "appropriated it", more. [...]
huh?
> [...] Definitely not going to forgive you for deleting that comment.
Do you mean:
+/*
+ * High on a hill was a lonely goatherd
+ */
?
>> void native_io_delay(void)
>> {
>> - io_delay();
>> + switch (io_delay_type) {
>
> That's the clumsy bit. native_io_delay() used to be an inline outb,
> now it's a switch. Yes, sure, versus an indirect call it's not
> actually worse, except as an uglification.
the switch enableds the sysctl. I dont see the callback as in any way
cleaner. (in fact it made things more inflexible.)
>> -#ifndef CONFIG_UDELAY_IO_DELAY
>> -static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id)
>> +static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
>> {
>> - printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
>> - io_delay = alternate_io_delay;
>> + printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", id->ident);
>> + io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
>> +
>> return 0;
>> }
>
> This isn't correct. DMI shouldn't override the CONFIG choice or
> someone with matching DMI will have a defective CONFIG option. That's
> why I put all of it inside #ifndef.
no, the DMI quirk is just that: a quirk that makes boxes work. The DMI
quirk takes precedence over just about any .config default, except an
explicit boot-commandline override.
Ingo
On 17-12-07 14:09, Ingo Molnar wrote:
>>> -#ifndef CONFIG_UDELAY_IO_DELAY
>>> -static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id)
>>> +static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
>>> {
>>> - printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
>>> - io_delay = alternate_io_delay;
>>> + printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", id->ident);
>>> + io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
>>> +
>>> return 0;
>>> }
>> This isn't correct. DMI shouldn't override the CONFIG choice or
>> someone with matching DMI will have a defective CONFIG option. That's
>> why I put all of it inside #ifndef.
>
> no, the DMI quirk is just that: a quirk that makes boxes work. The DMI
> quirk takes precedence over just about any .config default, except an
> explicit boot-commandline override.
No, most definitely not. Having the user select udelay or none through the
kernel config and then the kernel deciding "ah, you know what, I'll know
better and use port access anyway" is _utterly_ broken behaviour. Software
needs to listen to its master.
Rene.
On Mon 2007-12-17 14:22:26, Rene Herman wrote:
> On 17-12-07 14:09, Ingo Molnar wrote:
>
>>>> -#ifndef CONFIG_UDELAY_IO_DELAY
>>>> -static int __init dmi_alternate_io_delay_port(const struct
>>>> dmi_system_id *id)
>>>> +static int __init dmi_io_delay_0xed_port(const struct dmi_system_id
>>>> *id)
>>>> {
>>>> - printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
>>>> - io_delay = alternate_io_delay;
>>>> + printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", id->ident);
>>>> + io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
>>>> +
>>>> return 0;
>>>> }
>>> This isn't correct. DMI shouldn't override the CONFIG choice or someone
>>> with matching DMI will have a defective CONFIG option. That's why I put
>>> all of it inside #ifndef.
>> no, the DMI quirk is just that: a quirk that makes boxes work. The DMI
>> quirk takes precedence over just about any .config default, except an
>> explicit boot-commandline override.
>
> No, most definitely not. Having the user select udelay or none through the
> kernel config and then the kernel deciding "ah, you know what, I'll know
> better and use port access anyway" is _utterly_ broken behaviour. Software
> needs to listen to its master.
That's what command line is for. Ingo is right here.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On 17-12-07 14:31, Pavel Machek wrote:
> On Mon 2007-12-17 14:22:26, Rene Herman wrote:
>> On 17-12-07 14:09, Ingo Molnar wrote:
>>
>>>>> -#ifndef CONFIG_UDELAY_IO_DELAY
>>>>> -static int __init dmi_alternate_io_delay_port(const struct
>>>>> dmi_system_id *id)
>>>>> +static int __init dmi_io_delay_0xed_port(const struct dmi_system_id
>>>>> *id)
>>>>> {
>>>>> - printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
>>>>> - io_delay = alternate_io_delay;
>>>>> + printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", id->ident);
>>>>> + io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
>>>>> +
>>>>> return 0;
>>>>> }
>>>> This isn't correct. DMI shouldn't override the CONFIG choice or someone
>>>> with matching DMI will have a defective CONFIG option. That's why I put
>>>> all of it inside #ifndef.
>>> no, the DMI quirk is just that: a quirk that makes boxes work. The DMI
>>> quirk takes precedence over just about any .config default, except an
>>> explicit boot-commandline override.
>> No, most definitely not. Having the user select udelay or none through the
>> kernel config and then the kernel deciding "ah, you know what, I'll know
>> better and use port access anyway" is _utterly_ broken behaviour. Software
>> needs to listen to its master.
>
> That's what command line is for. Ingo is right here.
No. The kernel shouldn't provide defective config options.
Rene.
Rene Herman wrote:
> No, most definitely not. Having the user select udelay or none through
> the kernel config and then the kernel deciding "ah, you know what,
> I'll know better and use port access anyway" is _utterly_ broken
> behaviour. Software needs to listen to its master.
>
When acting as an ordinary user, the .config is beyond my control
(except on Gentoo). It is in control of the distro (Fedora, Ubuntu,
... but perhaps not Gentoo). I think the distro guys want a default
behavior that is set in .config, with quirk overrides being done when
needed. And of course the user in his/her boot params gets the final say.
About to start building and testing. It will take a few hours.
Ingo Molnar wrote:
> here's an updated rollup patch, against 2.6.24-rc4. David, could you
> please try this? This should work out of box on your system, without any
> boot option or other tweak needed.
>
>
>
On 17-12-07 14:32, David P. Reed wrote:
> Rene Herman wrote:
>> No, most definitely not. Having the user select udelay or none through
>> the kernel config and then the kernel deciding "ah, you know what,
>> I'll know better and use port access anyway" is _utterly_ broken
>> behaviour. Software needs to listen to its master.
>>
> When acting as an ordinary user, the .config is beyond my control
> (except on Gentoo). It is in control of the distro (Fedora, Ubuntu,
> ... but perhaps not Gentoo). I think the distro guys want a default
> behavior that is set in .config, with quirk overrides being done when
> needed. And of course the user in his/her boot params gets the final say.
Yes, and when the user/distributor specifically selected udelay or none as
an I/O delay method it makes no sense whatsoever to have the kernel override
that again -- the DMI hack only fixes something for the default case, when
_no_ specific choice had been made (which the current setup can't express
but mine did).
I feel particularly strongly (always) about that "listen to its master" bit.
The kernel does not know better then whomever configured it, even when it does.
Rene.
* David P. Reed <[email protected]> wrote:
> Rene Herman wrote:
>> No, most definitely not. Having the user select udelay or none through the
>> kernel config and then the kernel deciding "ah, you know what, I'll know
>> better and use port access anyway" is _utterly_ broken behaviour. Software
>> needs to listen to its master.
>
> When acting as an ordinary user, the .config is beyond my control
> (except on Gentoo). It is in control of the distro (Fedora, Ubuntu,
> ... but perhaps not Gentoo). I think the distro guys want a default
> behavior that is set in .config, with quirk overrides being done when
> needed. And of course the user in his/her boot params gets the final
> say.
yeah, that's exactly the thinking. Distros basically set general policy,
but a quirk is (almost) always specific and correct enough to override
that. We could perhaps refine this by directing the quirk to only be
applied if the current type is 0x80 - because in that case we know that
it's definitely not going to work. I.e. something like the small patch
below?
Ingo
---
arch/x86/kernel/io_delay.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
Index: linux-x86.q/arch/x86/kernel/io_delay.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/io_delay.c
+++ linux-x86.q/arch/x86/kernel/io_delay.c
@@ -47,8 +47,11 @@ EXPORT_SYMBOL(native_io_delay);
static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
{
- printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", id->ident);
- io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
+ printk(KERN_NOTICE "%s: using 0xed I/O delay port\n",
+ id->ident);
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ }
return 0;
}
* Robert Hancock <[email protected]> wrote:
>> unfortunately this hack's side-effects are mis-used by an unknown
>> number of drivers to mask PCI posting bugs. We want to figure out
>> those bugs (safely and carefully) and we want to remove this hack
>> from modern machines that dont need it. Doing anything else would be
>> superstition.
>
> Are there any such examples known of such drivers? It doesn't seem to
> make much sense.. PCI IO writes are not posted on any known system
> (the spec allows them to be posted in the host bus bridge, but if they
> were they could only be flushed by a read, not a write) and PCI MMIO
> writes are only guaranteed to flush by doing a read from that device,
> not by other random port accesses. I suppose using the _p versions of
> port accesses might happen to mask such problems on certain machines..
yeah, that's the fear - that timing sensitivities or outright races are
hidden via _p() uses. It's a bit like the BKL - nobody really knows why
it's still needed in some places but there's "fear" that "stuff might
break" so removal is very slow. So we should get rid of all _p() uses,
by either removing them (concluding that the _p() was not needed), or by
adding in an udelay(2) (documenting that the device indeed relies on the
delay from the host side) or by adding whatever posting/flushing is
needed. That will gradually reduce the amount of code that uses _p()
methods, and will improve the quality of the kernel.
Ingo
I don't think we should be offering udelay based delays at this point.
There are a lot of drivers to fix first. This is just one trivial example
...
--- drivers/watchdog/wdt.c~ 2007-12-17 15:58:49.000000000 +0000
+++ drivers/watchdog/wdt.c 2007-12-17 15:58:49.000000000 +0000
@@ -70,6 +70,8 @@
static int io=0x240;
static int irq=11;
+static DEFINE_SPINLOCK(wdt_lock);
+
module_param(io, int, 0);
MODULE_PARM_DESC(io, "WDT io port (default=0x240)");
module_param(irq, int, 0);
@@ -109,6 +111,8 @@
static int wdt_start(void)
{
+ unsigned long flags;
+ spin_lock_irqsave(&wdt_lock, flags);
inb_p(WDT_DC); /* Disable watchdog */
wdt_ctr_mode(0,3); /* Program CTR0 for Mode 3:
Square Wave Generator */ wdt_ctr_mode(1,2); /* Program
CTR1 for Mode 2: Rate Generator */ @@ -117,6 +121,7 @@
wdt_ctr_load(1,wd_heartbeat); /* Heartbeat */
wdt_ctr_load(2,65535); /* Length of reset pulse */
outb_p(0, WDT_DC); /* Enable watchdog */
+ spin_unlock_irqrestore(&wdt_lock, flags);
return 0;
}
@@ -128,9 +133,12 @@
static int wdt_stop (void)
{
+ unsigned long flags;
+ spin_lock_irqsave(&wdt_lock, flags);
/* Turn the card off */
inb_p(WDT_DC); /* Disable watchdog */
wdt_ctr_load(2,0); /* 0 length reset pulses now */
+ spin_unlock_irqrestore(&wdt_lock, flags);
return 0;
}
@@ -143,11 +151,14 @@
static int wdt_ping(void)
{
+ unsigned long flags;
+ spin_lock_irqsave(&wdt_lock, flags);
/* Write a watchdog value */
inb_p(WDT_DC); /* Disable watchdog */
wdt_ctr_mode(1,2); /* Re-Program CTR1 for Mode 2:
Rate Generator */ wdt_ctr_load(1,wd_heartbeat); /* Heartbeat */
outb_p(0, WDT_DC); /* Enable watchdog */
+ spin_unlock_irqrestore(&wdt_lock, flags);
return 0;
}
@@ -182,7 +193,12 @@
static int wdt_get_status(int *status)
{
- unsigned char new_status=inb_p(WDT_SR);
+ unsigned char new_status;
+ unsigned long flags;
+
+ spinlock_irqsave(&wdt_lock, flags);
+ new_status = inb_p(WDT_SR);
+ spin_unlock_irqrestore(&wdt_lock, flags);
*status=0;
if (new_status & WDC_SR_ISOI0)
@@ -214,8 +230,12 @@
static int wdt_get_temperature(int *temperature)
{
- unsigned short c=inb_p(WDT_RT);
+ unsigned short c;
+ unsigned long flags;
+ spinlock_irqsave(&wdt_lock, flags);
+ c=inb_p(WDT_RT);
+ spin_unlock_irqrestore(&wdt_lock, flags);
*temperature = (c * 11 / 15) + 7;
return 0;
}
@@ -237,7 +257,10 @@
* Read the status register see what is up and
* then printk it.
*/
- unsigned char status=inb_p(WDT_SR);
+ unsigned char status;
+
+ spin_lock(&wdt_lock);
+ status = inb_p(WDT_SR);
printk(KERN_CRIT "WDT status %d\n", status);
@@ -265,6 +288,7 @@
printk(KERN_CRIT "Reset in 5ms.\n");
#endif
}
+ spin_unlock(&wdt_lock);
return IRQ_HANDLED;
}
* Alan Cox <[email protected]> wrote:
> I don't think we should be offering udelay based delays at this point.
> There are a lot of drivers to fix first. This is just one trivial
> example
>
> ...
>
> --- drivers/watchdog/wdt.c~ 2007-12-17 15:58:49.000000000 +0000
> +++ drivers/watchdog/wdt.c 2007-12-17 15:58:49.000000000 +0000
> @@ -70,6 +70,8 @@
> static int io=0x240;
> static int irq=11;
>
> +static DEFINE_SPINLOCK(wdt_lock);
> +
> module_param(io, int, 0);
> MODULE_PARM_DESC(io, "WDT io port (default=0x240)");
> module_param(irq, int, 0);
> @@ -109,6 +111,8 @@
>
> static int wdt_start(void)
> {
> + unsigned long flags;
> + spin_lock_irqsave(&wdt_lock, flags);
> inb_p(WDT_DC); /* Disable watchdog */
> wdt_ctr_mode(0,3); /* Program CTR0 for Mode 3:
a really stupid question, in what way does:
inb_p(WDT_DC);
work better than:
inb(WDT_DC);
delay(2);
?
(i'm not suggesting you are wrong, this detail just fails to click at
the moment.)
Ingo
Rene Herman wrote:
>
> I do not know how universal that is, but _reading_ port 0xf0 might in
> fact be sensible then? And should even work on a 386/387 pair? (I have a
> 386/387 in fact, although I'd need to dig it up).
>
No. Someone might have used 0xf0 as a readonly port for other uses.
-hpa
On Fri, 14 Dec 2007, David P. Reed wrote:
> Avi Kivity wrote:
>> kvm will forward a virtual machine's writes to port 0x80 to the real
>> port. The reason is that the write is much faster than exiting and
>> emulating it; the difference is measurable when compiling kernels.
>>
>> Now if the cause is simply writing to port 0x80, then we must stop
>> doing that. But if the reason is the back-to-back writes, when we can
>> keep it, since the other writes will be trapped by kvm and emulated.
>> Do you which is the case?
>>
> As for kvm, I don't have enough info to know anything about that. Is
> there a test you'd like me to try?
>
> I think you are also asking if the crash on these laptops is caused only
> by back-to-back writes. Actually, it doesn't seem to matter if they are
> back to back. I can cause the crash if the writes to 80 are very much
> spread out in time - it seems only to matter how many of them get
> executed - almost as if there is a buffer overflow. (And of course if
> you do back to back writes to other ports that are apparently fully
> unused, such as 0xED on my machine, no crash occurs).
>
> I believe (though no one seems to have confirming documentation from the
> chipset or motherboard vendor) that port 80 is actually functional for
> some unknown function on these machines. (They do respond to "in"
> instructions faster than a bus cycle abort does - more evidence).
>
> I searched the DSDT to see if there is any evidence of an ACPI use for
> this port, but found nothing.
>
>
Attached is a patch that changes the outs to ins on port 0x80.
I did NOT let gcc decide what to do about modified registers.
Instead, the code saves/restores EAX itself so that all of the
times (whatever they are) are the same.
The code works and is running here. I also patched a very early
version (2.4.26) running on a 400 MHz i486 with an real ISA
bus (Adaptec AHA1453). It works too.
David, will you please try it on your machine. Maybe reading
from the port is less harmful than writing.
Cheers,
Dick Johnson
Penguin : Linux version 2.6.22.1 on an i686 machine (5588.27 BogoMips).
My book : http://www.AbominableFirebug.com/
_
****************************************************************
The information transmitted in this message is confidential and may be privileged. Any review, retransmission, dissemination, or other use of this information by persons or entities other than the intended recipient is prohibited. If you are not the intended recipient, please notify Analogic Corporation immediately - by replying to this message or by sending an email to [email protected] - and destroy all copies of this information, including any attachments, without reading or disclosing them.
Thank you.
On 17-12-07 19:14, linux-os (Dick Johnson) wrote:
> Attached is a patch that changes the outs to ins on port 0x80.
No, that isn't useful. Only a write is "guaranteed" to make ISA/LPC meaning
the timing for a read varies wildly. See the in/out cycles results posted
earlier. Was also reading the Intel PIIX(3) chiset datasheet today which
specifically mentions that only writes flow through to ISA, reads do not.
Rene.
Ingo -
I finished testing the rolled up patch that you provided. It seems to
work just fine. Thank you for putting this all together and persevering
in this long and complex discussion.
Here are the results, on the offending laptop, using 2.6.24-rc5 plus
that one patch.
First: booted with normal boot parameters (no io_delay=):
According to dmesg, 0xed is used.
hwclock ran fine, hundreds of times.
my shell script loop doing "cat /dev/nvram > /dev/null" ran fine,
several times.
Running Rene's "port 80" speed test ran fine once, then froze the
system hard. (expected)
Second: booted with io_delay=0x80, several tests, rebooting after freezes:
hwclock froze system hard. (this is the problem that drove me to
find this bug).
my shell script loop froze system hard.
Third: booted with io_delay=none:
hwclock ran fine, also hundreds of times.
my shell script loop ran fine several times.
Running rene's port80 test ran fine twice, froze system hard on
third try.
Fourth: booted with io_delay=udelay:
hwclock ran fine, also hundreds of times.
my shell script loop ran fine several times.
Running Rene's port80 test ran fine, froze system hard on second try.
Analysis:
patch works fine, and default to 0xed seems super conservative.
I will probably use the boot parameter io_delay=none, because I
don't seem to have any I/O
devices that require any delays - and this way I can find any that do.
Still wondering:
what the heck is going on with port 80 on my laptop motherboard.
Clearly it "does something".
I will in my spare time continue investigating, though having a
reliable system is GREAT.
H. Peter Anvin wrote:
> Rene Herman wrote:
>>
>> I do not know how universal that is, but _reading_ port 0xf0 might in
>> fact be sensible then? And should even work on a 386/387 pair? (I
>> have a 386/387 in fact, although I'd need to dig it up).
>>
>
> No. Someone might have used 0xf0 as a readonly port for other uses.
>
As support: port 80 on the reporter's (my) HP dv9000z laptop clearly
responds to reads differently than "unused" ports. In particular, an
inb takes 1/2 the elapsed time compared to a read to "known" unused port
0xed - 792 tsc ticks for port 80 compared to about 1450 tsc ticks for
port 0xed and other unused ports (tsc at 800 MHz).
David P. Reed wrote:
>
> Still wondering:
>
> what the heck is going on with port 80 on my laptop motherboard.
> Clearly it "does something".
> I will in my spare time continue investigating, though having a
> reliable system is GREAT.
>
Almost guaranteed to be some kind of debugging hack, probably
implemented either in the SuperIO chip or in SMM (or both). When some
sort of log buffer fills up, the system dies.
-hpa
David P. Reed wrote:
> H. Peter Anvin wrote:
>> Rene Herman wrote:
>>>
>>> I do not know how universal that is, but _reading_ port 0xf0 might in
>>> fact be sensible then? And should even work on a 386/387 pair? (I
>>> have a 386/387 in fact, although I'd need to dig it up).
>>>
>>
>> No. Someone might have used 0xf0 as a readonly port for other uses.
>>
> As support: port 80 on the reporter's (my) HP dv9000z laptop clearly
> responds to reads differently than "unused" ports. In particular, an
> inb takes 1/2 the elapsed time compared to a read to "known" unused port
> 0xed - 792 tsc ticks for port 80 compared to about 1450 tsc ticks for
> port 0xed and other unused ports (tsc at 800 MHz).
>
Any timings for port 0xf0 (write zero), out of curiosity?
-hpa
commit e5f4d11c2470550500e8d8b798d902f2fe07b5c4
Author: Rene Herman <[email protected]>
Date: Mon Dec 17 21:23:55 2007 +0100
x86: provide a DMI based port 0x80 I/O delay override.
Certain (HP) laptops experience trouble from our port 0x80 I/O delay
writes. This patch provides for a DMI based switch to the "alternate
diagnostic port" 0xed (as used by some BIOSes as well) for these.
David P. Reed confirmed that port 0xed works for him and provides a
proper delay. The symptoms of _not_ working are a hanging machine,
with "hwclock" use being a direct trigger.
Earlier versions of this attempted to simply use udelay(2), with the
2 being a value tested to be a nicely conservative upper-bound with
help from many on the linux-kernel mailinglist, but that approach has
two problems.
First, pre-loops_per_jiffy calibration (which is post PIT init while
some implementations of the PIT are actually one of the historically
problematic devices that need the delay) udelay() isn't particularly
well-defined. We could initialise loops_per_jiffy conservatively (and
based on CPU family so as to not unduly delay old machines) which
would sort of work, but still leaves:
Second, delaying isn't the only effect that a write to port 0x80 has.
It's also a PCI posting barrier which some devices may be explicitly
or implicitly relying on. Alan Cox did a survey and found evidence
that additionally various drivers are racy on SMP without the bus
locking outb.
Switching to an inb() makes the timing too unpredictable and as such,
this DMI based switch should be the safest approach for now. Any more
invasive changes should get more rigid testing first. It's moreover
only very few machines with the problem and a DMI based hack seems
to fit that situation.
This does not change the io_delay() in the boot code which is using
the same port 0x80 I/O delay but those do not appear to be a problem
as tested by David P. Reed. He moreover reported that booting with
"acpi=off" also fixed things and seeing as how ACPI isn't touched
until after this DMI based I/O port switch leaving the ones in the
boot code be is safe.
The DMI strings from David's HP Pavilion dv9000z are in there already
and we need to get/verify the DMI info from other machines with the
problem, notably the HP Pavilion dv6000z.
This patch is partly based on earlier patches from Pavel Machek and
David P. Reed.
Signed-off-by: Rene Herman <[email protected]>
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index b74d60d..288e162 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 6ea015a..43e5fcc 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a7bc93c..0cc1981 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 5a88890..08a68f0 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- i8253.o
+ i8253.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
new file mode 100644
index 0000000..e736bab
--- /dev/null
+++ b/arch/x86/kernel/io_delay.c
@@ -0,0 +1,48 @@
+/*
+ * I/O delay strategies for inb_p/outb_p
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+/*
+ * Allow for a DMI based override of port 0x80
+ */
+#define IO_DELAY_PORT_STD 0x80
+#define IO_DELAY_PORT_ALT 0xed
+
+static unsigned short io_delay_port __read_mostly = IO_DELAY_PORT_STD;
+
+void native_io_delay(void)
+{
+ asm volatile ("outb %%al, %w0" : : "d" (io_delay_port));
+}
+EXPORT_SYMBOL(native_io_delay);
+
+static int __init dmi_io_delay_port_alt(const struct dmi_system_id *id)
+{
+ printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
+ io_delay_port = IO_DELAY_PORT_ALT;
+ return 0;
+}
+
+static struct dmi_system_id __initdata dmi_io_delay_port_alt_table[] = {
+ {
+ .callback = dmi_io_delay_port_alt,
+ .ident = "HP Pavilion dv9000z",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B9")
+ }
+ },
+ {
+ }
+};
+
+void __init io_delay_init(void)
+{
+ dmi_check_system(dmi_io_delay_port_alt_table);
+}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index e1e18c3..6c3a3b4 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();;
+
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 30d94d1..ec976ed 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_SMP
/* setup to use the static apicid table during kernel startup */
x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index fe881cd..690b8f4 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -250,10 +250,8 @@ static inline void flush_write_buffers(void)
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void io_delay_init(void);
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index a037b07..b2d4994 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -35,13 +35,18 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+extern void io_delay_init(void);
+extern void native_io_delay(void);
+static inline void slow_down_io(void)
+{
+ native_io_delay();
#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
#endif
+}
/*
* Talk about misusing macros..
@@ -50,21 +55,21 @@
static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
Rene Herman wrote:
> On 17-12-07 17:12, Alan Cox wrote:
>
>> I don't think we should be offering udelay based delays at this point.
>> There are a lot of drivers to fix first. This is just one trivial example
>
> I agree. This thread's too full of people calling this outb method a
> dumb hack. It's a well-known legacy PC thing and while in practice the
> udelay might be a functional replacement for a majority of cases (save
> the races you are finding) a delay proportional to the bus speed makes
> great sense certainly when talking to hardware that itself runs
> proportinal to the bus speed for example.
>
> So, really, how about just sticking in this minimal version for now?
> Only switches the port to 0xed based on DMI and is all that is needed to
> fix the actual problem. This should be minimal and no-risk enough that
> it could also go to .24 if people want it to. It'll fix a few HP laptops
> (I'll try and get/verify the dv6000z DMI strings as well).
>
I think retaining the command-line option available is a good thing,
though. If nothing else, it is something very quick we can ask other
people to try if they seem to have similar problems.
Other than that, this alternate-port patch is a low-impact patch not
affecting hardware not on the blacklist, which makes it appropriate for
2.6.24 IMO.
-hpa
H. Peter Anvin wrote:
> David P. Reed wrote:
>> As support: port 80 on the reporter's (my) HP dv9000z laptop clearly
>> responds to reads differently than "unused" ports. In particular, an
>> inb takes 1/2 the elapsed time compared to a read to "known" unused
>> port 0xed - 792 tsc ticks for port 80 compared to about 1450 tsc
>> ticks for port 0xed and other unused ports (tsc at 800 MHz).
>>
>
> Any timings for port 0xf0 (write zero), out of curiosity?
>
Here's a bunch of data:
port 0xF0: cycles: out 919, in 933
port 0xed: cycles: out 2541, in 2036
port 0x70: cycles: out n/a, in 934
port 0x80: cycles: out 1424, in 795
AMD Turion 64x2 TL-60 CPU running at 800 MHz, nVidia MCP51 chipset,
Quanta motherboard. Running 2.6.24-rc5 with Ingo's patch so inb_p, etc.
use port 0xed.
Note that I can run the port 80 test once, the second time I get the
hard freeze. I didn't try writing to port 70 from userspace - that
one's dangerous, but the reading of it was included for a timing typical
of a chipset supported device. These are all pretty consistent.
I find the "read" timing from 0x80 verrrrry interesting. The write
timeing is also interesting, being faster than an unused port.
On 15-12-07 00:29, Alan Cox wrote:
>>> ?? Just initialize bogomips to 6GHz equivalent... and we are fine
>>> until 6GHz cpus come out.
>> How long will that take to boot on a 386?
>
> Well the dumb approach to fix that would seem to be to initialise it to
>
> cpu->family 3 -> 50MHz 4 -> 300Mhz 5-> etc...
By the way, you have a 300 MHz 486? I believe 3 -> 40, 4 -> 133, 5 -> 233
would be good? And I'm not really sure about the etc. P6 has a large range
again...
Rene.
David P. Reed wrote:
>
> Note that I can run the port 80 test once, the second time I get the
> hard freeze. I didn't try writing to port 70 from userspace - that
> one's dangerous, but the reading of it was included for a timing typical
> of a chipset supported device. These are all pretty consistent.
>
> I find the "read" timing from 0x80 verrrrry interesting. The write
> timeing is also interesting, being faster than an unused port.
>
Once again: reading from port 0x80 goes to the DMA page device.
-hpa
* David P. Reed <[email protected]> wrote:
> Ingo -
>
> I finished testing the rolled up patch that you provided. It seems to work
> just fine. Thank you for putting this all together and persevering in this
> long and complex discussion.
> Here are the results, on the offending laptop, using 2.6.24-rc5 plus that
> one patch.
>
> First: booted with normal boot parameters (no io_delay=):
>
> According to dmesg, 0xed is used.
>
> hwclock ran fine, hundreds of times.
> my shell script loop doing "cat /dev/nvram > /dev/null" ran fine,
> several times.
> Running Rene's "port 80" speed test ran fine once, then froze the system
> hard. (expected)
>
> Second: booted with io_delay=0x80, several tests, rebooting after freezes:
>
> hwclock froze system hard. (this is the problem that drove me to find
> this bug).
> my shell script loop froze system hard.
>
> Third: booted with io_delay=none:
>
> hwclock ran fine, also hundreds of times.
> my shell script loop ran fine several times.
> Running rene's port80 test ran fine twice, froze system hard on third
> try.
>
> Fourth: booted with io_delay=udelay:
>
> hwclock ran fine, also hundreds of times.
> my shell script loop ran fine several times.
> Running Rene's port80 test ran fine, froze system hard on second try.
>
> Analysis:
>
> patch works fine, and default to 0xed seems super conservative. I
> will probably use the boot parameter io_delay=none, because I don't
> seem to have any I/O
> devices that require any delays - and this way I can find any that
> do.
great, and thanks for the extensive testing! I've added this line to the
patch:
Tested-by: "David P. Reed" <[email protected]>
if you dont mind.
Ingo
> responds to reads differently than "unused" ports. In particular, an
> inb takes 1/2 the elapsed time compared to a read to "known" unused port
> 0xed - 792 tsc ticks for port 80 compared to about 1450 tsc ticks for
> port 0xed and other unused ports (tsc at 800 MHz).
Well at least we know where the port is now - thats too fast for an LPC
bus device, so it must be an SMI trap.
Only easy way to find out is to use the debugging event counters and see
how many instruction cycles are issued as part of the 0x80 port. If its
suprisingly high then you've got a firmware bug and can go spank HP.
commit c83008ff40e95f89407807cb122127c5444b3bc4
Author: Rene Herman <[email protected]>
Date: Mon Dec 17 21:23:55 2007 +0100
x86: provide a DMI based port 0x80 I/O delay override.
Certain (HP) laptops experience trouble from our port 0x80 I/O delay
writes. This patch provides for a DMI based switch to the "alternate
diagnostic port" 0xed (as used by some BIOSes as well) for these.
David P. Reed confirmed that port 0xed works for him and provides a
proper delay. The symptoms of _not_ working are a hanging machine,
with "hwclock" use being a direct trigger.
Earlier versions of this attempted to simply use udelay(2), with the
2 being a value tested to be a nicely conservative upper-bound with
help from many on the linux-kernel mailinglist, but that approach has
two problems.
First, pre-loops_per_jiffy calibration (which is post PIT init while
some implementations of the PIT are actually one of the historically
problematic devices that need the delay) udelay() isn't particularly
well-defined. We could initialise loops_per_jiffy conservatively (and
based on CPU family so as to not unduly delay old machines) which
would sort of work, but still leaves:
Second, delaying isn't the only effect that a write to port 0x80 has.
It's also a PCI posting barrier which some devices may be explicitly
or implicitly relying on. Alan Cox did a survey and found evidence
that additionally various drivers are racy on SMP without the bus
locking outb.
Switching to an inb() makes the timing too unpredictable and as such,
this DMI based switch should be the safest approach for now. Any more
invasive changes should get more rigid testing first. It's moreover
only very few machines with the problem and a DMI based hack seems
to fit that situation.
An early boot parameter to make the choice manually (and override any
possible DMI based decision) is also provided:
io_delay=standard|alternate
This does not change the io_delay() in the boot code which is using
the same port 0x80 I/O delay but those do not appear to be a problem
as tested by David P. Reed. He moreover reported that booting with
"acpi=off" also fixed things and seeing as how ACPI isn't touched
until after this DMI based I/O port switch leaving the ones in the
boot code be is safe.
The DMI strings from David's HP Pavilion dv9000z are in there already
and we need to get/verify the DMI info from other machines with the
problem, notably the HP Pavilion dv6000z.
This patch is partly based on earlier patches from Pavel Machek and
David P. Reed.
Signed-off-by: Rene Herman <[email protected]>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 33121d6..ff66cf4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -785,6 +785,13 @@ and is between 256 and 4096 characters. It is defined in the file
for translation below 32 bit and if not available
then look in the higher range.
+ io_delay= [X86-32,X86-64] I/O delay port
+ standard
+ Use the 0x80 standard I/O delay port (default)
+ alternate
+ Use the 0xed alternate I/O delay port
+
+ Use the
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index b74d60d..288e162 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 6ea015a..43e5fcc 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a7bc93c..0cc1981 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 5a88890..08a68f0 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- i8253.o
+ i8253.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
new file mode 100644
index 0000000..5029e7a
--- /dev/null
+++ b/arch/x86/kernel/io_delay.c
@@ -0,0 +1,69 @@
+/*
+ * I/O delay strategies for inb_p/outb_p
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+/*
+ * Allow for a DMI based override of port 0x80
+ */
+#define IO_DELAY_PORT_STD 0x80
+#define IO_DELAY_PORT_ALT 0xed
+
+static unsigned short io_delay_port __read_mostly = IO_DELAY_PORT_STD;
+
+void native_io_delay(void)
+{
+ asm volatile ("outb %%al, %w0" : : "d" (io_delay_port));
+}
+EXPORT_SYMBOL(native_io_delay);
+
+static int __init dmi_io_delay_port_alt(const struct dmi_system_id *id)
+{
+ printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
+ io_delay_port = IO_DELAY_PORT_ALT;
+ return 0;
+}
+
+static struct dmi_system_id __initdata dmi_io_delay_port_alt_table[] = {
+ {
+ .callback = dmi_io_delay_port_alt,
+ .ident = "HP Pavilion dv9000z",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B9")
+ }
+ },
+ {
+ }
+};
+
+static int __initdata io_delay_override;
+
+static int __init io_delay_param(char *s)
+{
+ if (!s)
+ return -EINVAL;
+
+ if (!strcmp(s, "standard"))
+ io_delay_port = IO_DELAY_PORT_STD;
+ else if (!strcmp(s, "alternate"))
+ io_delay_port = IO_DELAY_PORT_ALT;
+ else
+ return -EINVAL;
+
+ io_delay_override = 1;
+ return 0;
+}
+
+early_param("io_delay", io_delay_param);
+
+void __init io_delay_init(void)
+{
+ if (!io_delay_override)
+ dmi_check_system(dmi_io_delay_port_alt_table);
+}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index e1e18c3..6c3a3b4 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();;
+
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 30d94d1..ec976ed 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_SMP
/* setup to use the static apicid table during kernel startup */
x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index fe881cd..690b8f4 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -250,10 +250,8 @@ static inline void flush_write_buffers(void)
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void io_delay_init(void);
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index a037b07..b2d4994 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -35,13 +35,18 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+extern void io_delay_init(void);
+extern void native_io_delay(void);
+static inline void slow_down_io(void)
+{
+ native_io_delay();
#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
#endif
+}
/*
* Talk about misusing macros..
@@ -50,21 +55,21 @@
static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
Rene Herman wrote:
>
> Well, yes, I guess that does make sense. It's back again. Named the
> choices "standard" and "alternate" again as I feel "0x80" and "0xed"
> suggest they're free values a bit too much but if anyone feels strongly
> about it, so be it.
>
They ARE -- or really, should be, free values (0xeb and 0xf0 are other
reasonable values, for example.)
-hpa
* Rene Herman <[email protected]> wrote:
> On 17-12-07 17:12, Alan Cox wrote:
>
>> I don't think we should be offering udelay based delays at this point.
>> There are a lot of drivers to fix first. This is just one trivial example
>
> I agree. This thread's too full of people calling this outb method a
> dumb hack. It's a well-known legacy PC thing and while in practice the
> udelay might be a functional replacement for a majority of cases (save
> the races you are finding) a delay proportional to the bus speed makes
> great sense certainly when talking to hardware that itself runs
> proportinal to the bus speed for example.
>
> So, really, how about just sticking in this minimal version for now?
> Only switches the port to 0xed based on DMI and is all that is needed
> to fix the actual problem. This should be minimal and no-risk enough
> that it could also go to .24 if people want it to. It'll fix a few HP
> laptops (I'll try and get/verify the dv6000z DMI strings as well).
>
> Ingo?
>
> Signed-off-by: Rene Herman <[email protected]>
hm, i see this as a step backwards from the pretty flexible patch that
David already tested. (and which also passed a few hundred bootup tests
on my x86 test-grid)
Ingo
* H. Peter Anvin <[email protected]> wrote:
> Rene Herman wrote:
>>
>> Well, yes, I guess that does make sense. It's back again. Named the
>> choices "standard" and "alternate" again as I feel "0x80" and "0xed"
>> suggest they're free values a bit too much but if anyone feels
>> strongly about it, so be it.
>
> They ARE -- or really, should be, free values (0xeb and 0xf0 are other
> reasonable values, for example.)
yeah. We've got the variant below for now, tested by David. We can still
change things later on if the need arises.
Ingo
-------------->
Subject: x86: provide a DMI based port 0x80 I/O delay override.
From: Rene Herman <[email protected]>
x86: provide a DMI based port 0x80 I/O delay override.
Certain (HP) laptops experience trouble from our port 0x80 I/O delay
writes. This patch provides for a DMI based switch to the "alternate
diagnostic port" 0xed (as used by some BIOSes as well) for these.
David P. Reed confirmed that port 0xed works for him and provides a
proper delay. The symptoms of _not_ working are a hanging machine,
with "hwclock" use being a direct trigger.
Earlier versions of this attempted to simply use udelay(2), with the
2 being a value tested to be a nicely conservative upper-bound with
help from many on the linux-kernel mailinglist but that approach has
two problems.
First, pre-loops_per_jiffy calibration (which is post PIT init while
some implementations of the PIT are actually one of the historically
problematic devices that need the delay) udelay() isn't particularly
well-defined. We could initialise loops_per_jiffy conservatively (and
based on CPU family so as to not unduly delay old machines) which
would sort of work, but...
Second, delaying isn't the only effect that a write to port 0x80 has.
It's also a PCI posting barrier which some devices may be explicitly
or implicitly relying on. Alan Cox did a survey and found evidence
that additionally some drivers may be racy on SMP without the bus
locking outb.
Switching to an inb() makes the timing too unpredictable and as such,
this DMI based switch should be the safest approach for now. Any more
invasive changes should get more rigid testing first. It's moreover
only very few machines with the problem and a DMI based hack seems
to fit that situation.
This also introduces a command-line parameter "io_delay" to override
the DMI based choice again:
io_delay=<0x80|0xed|udelay|none>
where 0x80 means using the standard port 0x80 and 0xed means the
alternate port 0xed.
All these methods can also be selected via the kernel .config,
and can be runtime tuned via /proc/sys/kernel/io_delay_type (for
debugging purposes).
The DMI strings from David's HP Pavilion dv9000z are in there already
and we need to get/verify the DMI info from other machines with the
problem, notably the HP Pavilion dv6000z.
This patch is partly based on earlier patches from Pavel Machek and
David P. Reed.
[ [email protected]:
- add the io_delay=none method
- make each method selectable from the kernel config
- eliminate the indirect function calls
- add the /proc/sys/kernel/io_delay_type sysctl
- change 'standard' and 'alternate' to 0x80 and 0xed
- make the io delay config not depend on CONFIG_DEBUG_KERNEL ]
Signed-off-by: Rene Herman <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: "David P. Reed" <[email protected]>
---
Documentation/kernel-parameters.txt | 10 +++
arch/x86/Kconfig.debug | 74 +++++++++++++++++++++++++++
arch/x86/boot/compressed/misc_32.c | 8 +-
arch/x86/boot/compressed/misc_64.c | 8 +-
arch/x86/kernel/Makefile_32 | 2
arch/x86/kernel/Makefile_64 | 2
arch/x86/kernel/io_delay.c | 98 ++++++++++++++++++++++++++++++++++++
arch/x86/kernel/setup_32.c | 2
arch/x86/kernel/setup_64.c | 2
include/asm-x86/io_32.h | 8 +-
include/asm-x86/io_64.h | 29 ++++++----
kernel/sysctl.c | 9 +++
12 files changed, 227 insertions(+), 25 deletions(-)
Index: linux-x86.q/Documentation/kernel-parameters.txt
===================================================================
--- linux-x86.q.orig/Documentation/kernel-parameters.txt
+++ linux-x86.q/Documentation/kernel-parameters.txt
@@ -785,6 +785,16 @@ and is between 256 and 4096 characters.
for translation below 32 bit and if not available
then look in the higher range.
+ io_delay= [X86-32,X86-64] I/O delay method
+ 0x80
+ Standard port 0x80 based delay
+ 0xed
+ Alternate port 0xed based delay (needed on some systems)
+ udelay
+ Simple two microseconds delay
+ none
+ No delay
+
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
Index: linux-x86.q/arch/x86/Kconfig.debug
===================================================================
--- linux-x86.q.orig/arch/x86/Kconfig.debug
+++ linux-x86.q/arch/x86/Kconfig.debug
@@ -112,4 +112,78 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
+#
+# IO delay types:
+#
+
+config IO_DELAY_TYPE_0X80
+ int
+ default "0"
+
+config IO_DELAY_TYPE_0XED
+ int
+ default "1"
+
+config IO_DELAY_TYPE_UDELAY
+ int
+ default "2"
+
+config IO_DELAY_TYPE_NONE
+ int
+ default "3"
+
+choice
+ prompt "IO delay type"
+ default IO_DELAY_0X80
+
+config IO_DELAY_0X80
+ bool "port 0x80 based port-IO delay [recommended]"
+ help
+ This is the traditional Linux IO delay used for in/out_p.
+ It is the most tested hence safest selection here.
+
+config IO_DELAY_0XED
+ bool "port 0xed based port-IO delay"
+ help
+ Use port 0xed as the IO delay. This frees up port 0x80 which is
+ often used as a hardware-debug port.
+
+config IO_DELAY_UDELAY
+ bool "udelay based port-IO delay"
+ help
+ Use udelay(2) as the IO delay method. This provides the delay
+ while not having any side-effect on the IO port space.
+
+config IO_DELAY_NONE
+ bool "no port-IO delay"
+ help
+ No port-IO delay. Will break on old boxes that require port-IO
+ delay for certain operations. Should work on most new machines.
+
+endchoice
+
+if IO_DELAY_0X80
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_0X80
+endif
+
+if IO_DELAY_0XED
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_0XED
+endif
+
+if IO_DELAY_UDELAY
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_UDELAY
+endif
+
+if IO_DELAY_NONE
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_NONE
+endif
+
endmenu
Index: linux-x86.q/arch/x86/boot/compressed/misc_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_32.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/boot/compressed/misc_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_64.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/kernel/Makefile_32
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/Makefile_32
+++ linux-x86.q/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
Index: linux-x86.q/arch/x86/kernel/Makefile_64
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/Makefile_64
+++ linux-x86.q/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_
x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- i8253.o
+ i8253.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
Index: linux-x86.q/arch/x86/kernel/io_delay.c
===================================================================
--- /dev/null
+++ linux-x86.q/arch/x86/kernel/io_delay.c
@@ -0,0 +1,98 @@
+/*
+ * I/O delay strategies for inb_p/outb_p
+ *
+ * Allow for a DMI based override of port 0x80, needed for certain HP laptops
+ * and possibly other systems. Also allow for the gradual elimination of
+ * outb_p/inb_p API uses.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
+EXPORT_SYMBOL_GPL(io_delay_type);
+
+static int __initdata io_delay_override;
+
+/*
+ * Paravirt wants native_io_delay to be a constant.
+ */
+void native_io_delay(void)
+{
+ switch (io_delay_type) {
+ default:
+ case CONFIG_IO_DELAY_TYPE_0X80:
+ asm volatile ("outb %al, $0x80");
+ break;
+ case CONFIG_IO_DELAY_TYPE_0XED:
+ asm volatile ("outb %al, $0xed");
+ break;
+ case CONFIG_IO_DELAY_TYPE_UDELAY:
+ /*
+ * 2 usecs is an upper-bound for the outb delay but
+ * note that udelay doesn't have the bus-level
+ * side-effects that outb does, nor does udelay() have
+ * precise timings during very early bootup (the delays
+ * are shorter until calibrated):
+ */
+ udelay(2);
+ case CONFIG_IO_DELAY_TYPE_NONE:
+ break;
+ }
+}
+EXPORT_SYMBOL(native_io_delay);
+
+static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
+{
+ if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
+ printk(KERN_NOTICE "%s: using 0xed I/O delay port\n",
+ id->ident);
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ }
+
+ return 0;
+}
+
+/*
+ * Quirk table for systems that misbehave (lock up, etc.) if port
+ * 0x80 is used:
+ */
+static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
+ {
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "HP Pavilion dv9000z",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B9")
+ }
+ },
+ { }
+};
+
+void __init io_delay_init(void)
+{
+ if (!io_delay_override)
+ dmi_check_system(io_delay_0xed_port_dmi_table);
+}
+
+static int __init io_delay_param(char *s)
+{
+ if (!strcmp(s, "0x80"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
+ else if (!strcmp(s, "0xed"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ else if (!strcmp(s, "udelay"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY;
+ else if (!strcmp(s, "none"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_NONE;
+ else
+ return -EINVAL;
+
+ io_delay_override = 1;
+ return 0;
+}
+
+early_param("io_delay", io_delay_param);
Index: linux-x86.q/arch/x86/kernel/setup_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/setup_32.c
+++ linux-x86.q/arch/x86/kernel/setup_32.c
@@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();;
+
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
Index: linux-x86.q/arch/x86/kernel/setup_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/setup_64.c
+++ linux-x86.q/arch/x86/kernel/setup_64.c
@@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_SMP
/* setup to use the static apicid table during kernel startup */
x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
Index: linux-x86.q/include/asm-x86/io_32.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_32.h
+++ linux-x86.q/include/asm-x86/io_32.h
@@ -250,10 +250,10 @@ static inline void flush_write_buffers(v
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void native_io_delay(void);
+
+extern int io_delay_type;
+extern void io_delay_init(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
Index: linux-x86.q/include/asm-x86/io_64.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_64.h
+++ linux-x86.q/include/asm-x86/io_64.h
@@ -35,13 +35,20 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+extern void native_io_delay(void);
+extern int io_delay_type;
+extern void io_delay_init(void);
+
+static inline void slow_down_io(void)
+{
+ native_io_delay();
#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
#endif
+}
/*
* Talk about misusing macros..
@@ -50,21 +57,21 @@
static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
Index: linux-x86.q/kernel/sysctl.c
===================================================================
--- linux-x86.q.orig/kernel/sysctl.c
+++ linux-x86.q/kernel/sysctl.c
@@ -53,6 +53,7 @@
#ifdef CONFIG_X86
#include <asm/nmi.h>
#include <asm/stacktrace.h>
+#include <asm/io.h>
#endif
static int deprecated_sysctl_warning(struct __sysctl_args *args);
@@ -683,6 +684,14 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "io_delay_type",
+ .data = &io_delay_type,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#endif
#if defined(CONFIG_MMU)
{
commit c12c7a47b9af87e8d867d5aa0ca5c6bcdd2463da
Author: Rene Herman <[email protected]>
Date: Mon Dec 17 21:23:55 2007 +0100
x86: provide a DMI based port 0x80 I/O delay override.
Certain (HP) laptops experience trouble from our port 0x80 I/O delay
writes. This patch provides for a DMI based switch to the "alternate
diagnostic port" 0xed (as used by some BIOSes as well) for these.
David P. Reed confirmed that port 0xed works for him and provides a
proper delay. The symptoms of _not_ working are a hanging machine,
with "hwclock" use being a direct trigger.
Earlier versions of this attempted to simply use udelay(2), with the
2 being a value tested to be a nicely conservative upper-bound with
help from many on the linux-kernel mailinglist, but that approach has
two problems.
First, pre-loops_per_jiffy calibration (which is post PIT init while
some implementations of the PIT are actually one of the historically
problematic devices that need the delay) udelay() isn't particularly
well-defined. We could initialise loops_per_jiffy conservatively (and
based on CPU family so as to not unduly delay old machines) which
would sort of work, but still leaves:
Second, delaying isn't the only effect that a write to port 0x80 has.
It's also a PCI posting barrier which some devices may be explicitly
or implicitly relying on. Alan Cox did a survey and found evidence
that additionally various drivers are racy on SMP without the bus
locking outb.
Switching to an inb() makes the timing too unpredictable and as such,
this DMI based switch should be the safest approach for now. Any more
invasive changes should get more rigid testing first. It's moreover
only very few machines with the problem and a DMI based hack seems
to fit that situation.
An early boot parameter to make the choice manually (and override any
possible DMI based decision) is also provided:
io_delay=standard|alternate
This does not change the io_delay() in the boot code which is using
the same port 0x80 I/O delay but those do not appear to be a problem
as tested by David P. Reed. He moreover reported that booting with
"acpi=off" also fixed things and seeing as how ACPI isn't touched
until after this DMI based I/O port switch leaving the ones in the
boot code be is safe.
The DMI strings from David's HP Pavilion dv9000z are in there already
and we need to get/verify the DMI info from other machines with the
problem, notably the HP Pavilion dv6000z.
This patch is partly based on earlier patches from Pavel Machek and
David P. Reed.
Signed-off-by: Rene Herman <[email protected]>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 33121d6..6948e25 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -785,6 +785,12 @@ and is between 256 and 4096 characters. It is defined in the file
for translation below 32 bit and if not available
then look in the higher range.
+ io_delay= [X86-32,X86-64] I/O delay port
+ standard
+ Use the 0x80 standard I/O delay port (default)
+ alternate
+ Use the 0xed alternate I/O delay port
+
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c
index b74d60d..288e162 100644
--- a/arch/x86/boot/compressed/misc_32.c
+++ b/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c
index 6ea015a..43e5fcc 100644
--- a/arch/x86/boot/compressed/misc_64.c
+++ b/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a7bc93c..0cc1981 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 5a88890..08a68f0 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- i8253.o
+ i8253.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
new file mode 100644
index 0000000..5029e7a
--- /dev/null
+++ b/arch/x86/kernel/io_delay.c
@@ -0,0 +1,69 @@
+/*
+ * I/O delay strategies for inb_p/outb_p
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+/*
+ * Allow for a DMI based override of port 0x80
+ */
+#define IO_DELAY_PORT_STD 0x80
+#define IO_DELAY_PORT_ALT 0xed
+
+static unsigned short io_delay_port __read_mostly = IO_DELAY_PORT_STD;
+
+void native_io_delay(void)
+{
+ asm volatile ("outb %%al, %w0" : : "d" (io_delay_port));
+}
+EXPORT_SYMBOL(native_io_delay);
+
+static int __init dmi_io_delay_port_alt(const struct dmi_system_id *id)
+{
+ printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident);
+ io_delay_port = IO_DELAY_PORT_ALT;
+ return 0;
+}
+
+static struct dmi_system_id __initdata dmi_io_delay_port_alt_table[] = {
+ {
+ .callback = dmi_io_delay_port_alt,
+ .ident = "HP Pavilion dv9000z",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B9")
+ }
+ },
+ {
+ }
+};
+
+static int __initdata io_delay_override;
+
+static int __init io_delay_param(char *s)
+{
+ if (!s)
+ return -EINVAL;
+
+ if (!strcmp(s, "standard"))
+ io_delay_port = IO_DELAY_PORT_STD;
+ else if (!strcmp(s, "alternate"))
+ io_delay_port = IO_DELAY_PORT_ALT;
+ else
+ return -EINVAL;
+
+ io_delay_override = 1;
+ return 0;
+}
+
+early_param("io_delay", io_delay_param);
+
+void __init io_delay_init(void)
+{
+ if (!io_delay_override)
+ dmi_check_system(dmi_io_delay_port_alt_table);
+}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index e1e18c3..6c3a3b4 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();;
+
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 30d94d1..ec976ed 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_SMP
/* setup to use the static apicid table during kernel startup */
x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index fe881cd..690b8f4 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -250,10 +250,8 @@ static inline void flush_write_buffers(void)
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void io_delay_init(void);
+extern void native_io_delay(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index a037b07..b2d4994 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -35,13 +35,18 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+extern void io_delay_init(void);
+extern void native_io_delay(void);
+static inline void slow_down_io(void)
+{
+ native_io_delay();
#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
#endif
+}
/*
* Talk about misusing macros..
@@ -50,21 +55,21 @@
static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
On 17-12-07 22:40, H. Peter Anvin wrote:
> Rene Herman wrote:
>>
>> Well, yes, I guess that does make sense. It's back again. Named the
>> choices "standard" and "alternate" again as I feel "0x80" and "0xed"
>> suggest they're free values a bit too much but if anyone feels
>> strongly about it, so be it.
>>
>
> They ARE -- or really, should be, free values (0xeb and 0xf0 are other
> reasonable values, for example.)
I was afraid someone would say that. Making a random port available is fine
for testing purposes but a failry dangerous thing to do generally. For a
minimal version at -rc4 time, I believe sticking with 0x80 and 0xed ie best.
Lots of time during .25 to go wild...
Rene.
* Rene Herman <[email protected]> wrote:
>>> Signed-off-by: Rene Herman <[email protected]>
>>
>> hm, i see this as a step backwards from the pretty flexible patch
>> that David already tested. (and which also passed a few hundred
>> bootup tests on my x86 test-grid)
>
> Please see Alan's comment that udelay (and none) shouldn't yet be
> provided as a choice. It opens race windows in drivers even when it
> works in practice on most setups. The version with "udelay" and "none"
> is not minimal, not low risk and certainly not .24 material.
huh? By default we still use port 0x80. Any udelay is non-default and
needs the user to explicitly switch to it. But it enables us to debug
any suspected drivers by asking testers to: "please try this driver with
io_delay=udelay, does it still work fine?". So those extra options are
quite sensible. If you have any real technical arguments against that
then please let us know.
Ingo
On 17-12-07 22:56, Ingo Molnar wrote:
> * Rene Herman <[email protected]> wrote:
>
>>>> Signed-off-by: Rene Herman <[email protected]>
>>> hm, i see this as a step backwards from the pretty flexible patch
>>> that David already tested. (and which also passed a few hundred
>>> bootup tests on my x86 test-grid)
>> Please see Alan's comment that udelay (and none) shouldn't yet be
>> provided as a choice. It opens race windows in drivers even when it
>> works in practice on most setups. The version with "udelay" and "none"
>> is not minimal, not low risk and certainly not .24 material.
>
> huh? By default we still use port 0x80. Any udelay is non-default and
> needs the user to explicitly switch to it. But it enables us to debug
> any suspected drivers by asking testers to: "please try this driver with
> io_delay=udelay, does it still work fine?". So those extra options are
> quite sensible. If you have any real technical arguments against that
> then please let us know.
Ingo, have lots of fun playing with yourself, but remove my sign off from
anything with the udelay and none methods.
Rene.
Besides the two reports of freezes on bugzilla.kernel.org (9511, 6307),
the following two bug reports on bugzilla.redhat.com are almost
certainly due to the same cause (imo, of course): 245834, 227234.
Ubuntu launchpad bug 158849 also seems to report the same problem, for
an HP dv6258se 64-bit machine.
Also this one:
http://www.mail-archive.com/[email protected]/msg10321.html
If you want to collect dmidecode data from these folks, perhaps we might
get a wider sense of what categories of machines are affected. They all
seem to be recemt HP and Compaq AMD64 laptops, probably all Quanta
motherboards.
On Dec 14 2007 14:13, H. Peter Anvin wrote:
>>
>> ?? Just initialize bogomips to 6GHz equivalent... and we are fine
>> until 6GHz cpus come out.
>
> How long will that take to boot on a 386?
>
Load it up in bochs and have look at the wallclock. I think that is a
good estimate when you have no real 386 nearby.
On Dec 15 2007 17:46, Alan Cox wrote:
>
>> My understanding is that the linux starts in real mode, and uses the
>> BIOS for such things as reading the very first image.
>
>Not always. We may enter from 32bit in some cases, and we may also not
>have a PC BIOS in the first place.
Computers without a PC BIOS (I'm trying to think of something, e.g.
the typical SUN sparc64 box) should have other means of accessing a
clocksource, no?
Jan Engelhardt wrote:
> On Dec 15 2007 17:46, Alan Cox wrote:
>>> My understanding is that the linux starts in real mode, and uses the
>>> BIOS for such things as reading the very first image.
>> Not always. We may enter from 32bit in some cases, and we may also not
>> have a PC BIOS in the first place.
>
> Computers without a PC BIOS (I'm trying to think of something, e.g.
> the typical SUN sparc64 box) should have other means of accessing a
> clocksource, no?
We were talking about x86 here, though.
Even on x86 we sometimes run from the 32-bit entrypoint.
-hpa
On Mon 2007-12-17 22:04:19, Rene Herman wrote:
> On 15-12-07 00:29, Alan Cox wrote:
>
>>>> ?? Just initialize bogomips to 6GHz equivalent... and we are fine
>>>> until 6GHz cpus come out.
>>> How long will that take to boot on a 386?
>> Well the dumb approach to fix that would seem to be to initialise it to
>> cpu->family 3 -> 50MHz 4 -> 300Mhz 5-> etc...
>
> By the way, you have a 300 MHz 486? I believe 3 -> 40, 4 -> 133, 5 -> 233
> would be good? And I'm not really sure about the etc. P6 has a large range
> again...
Some nexgen 5x86 boxes were pretty fast, still could not do 486... so
family 3 iirc.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
> > By the way, you have a 300 MHz 486? I believe 3 -> 40, 4 -> 133, 5 -> 233
> > would be good? And I'm not really sure about the etc. P6 has a large range
> > again...
>
> Some nexgen 5x86 boxes were pretty fast, still could not do 486... so
> family 3 iirc.
300MHz 486 -> Nat Semi Geode.
NextGen as you say are 386 - 586 depending on the BIOS hypercode but I
believe lack WP even in > 386 mode.
Alan
On Tue, Dec 18, 2007 at 12:06:08AM +0000, Alan Cox wrote:
> 300MHz 486 -> Nat Semi Geode.
>
> NextGen as you say are 386 - 586 depending on the BIOS hypercode but I
> believe lack WP even in > 386 mode.
Geode identifies itself as family 5 though. It may prefer 486 code but
it's still family 5. Well Geode GX, SCx200 and LX that is. I imagine
the Geode NX would be family 6.
--
Len Sorensen
David P. Reed wrote:
> Avi Kivity wrote:
>> kvm will forward a virtual machine's writes to port 0x80 to the real
>> port. The reason is that the write is much faster than exiting and
>> emulating it; the difference is measurable when compiling kernels.
>>
>> Now if the cause is simply writing to port 0x80, then we must stop
>> doing that. But if the reason is the back-to-back writes, when we
>> can keep it, since the other writes will be trapped by kvm and
>> emulated. Do you which is the case?
>>
> As for kvm, I don't have enough info to know anything about that. Is
> there a test you'd like me to try?
>
I have a test, but I see that it is broken for mainline. I'll update it
eventually, but...
> I think you are also asking if the crash on these laptops is caused
> only by back-to-back writes. Actually, it doesn't seem to matter if
> they are back to back. I can cause the crash if the writes to 80 are
> very much spread out in time - it seems only to matter how many of
> them get executed - almost as if there is a buffer overflow. (And of
> course if you do back to back writes to other ports that are
> apparently fully unused, such as 0xED on my machine, no crash occurs).
>
> I believe (though no one seems to have confirming documentation from
> the chipset or motherboard vendor) that port 80 is actually functional
> for some unknown function on these machines. (They do respond to
> "in" instructions faster than a bus cycle abort does - more evidence).
That seems to be sufficient evidence for me to remove port 0x80
pass-through from kvm and emulate it instead. Given that port 80 writes
take 1 microsecond, and that an in-kernel exit handler takes a similar
amount of time, there won't be any significant performance loss.
--
error compiling committee.c: too many arguments to function
On Sun 2007-12-16 15:34:58, H. Peter Anvin wrote:
> Pavel Machek wrote:
>> Hi!
>>> The process of safely making delicate changes here is beyond my
>>> responsibility as just a user - believe me, I'm not suggesting that a
>>> risky fix be put in .24. I can patch my own kernels, and I can even
>>> share an unofficial patch with others for now, or suggest that Fedora and
>>> Ubuntu add it to their downstream.
>>>
>>> May I make a small suggestion, though. If the decision is a DMI-keyed
>>> switch from out-80 to udelay(2) gets put in, perhaps there should also
>>> be a way for people to test their own configuration for the underlying
>>> problem made available as a script. Though it is a "hack", all you need
>>> to freeze a problem system is to run a loop doing about 1000 "cat
>>> /dev/nvram > /dev/null" commands. If that leads to a freeze, one might
>>> ask to have the motherboard added to the DMI-key list.
>> Can you freeze it by catting /dev/rtc, too? That may be significant,
>> because that is readable for group audio (at least on some
>> systems)... which would smell like "small security hole" to me.
>
> Heck, on my system (Fedora 7), it's mode 644...
Ok, time to CC security team, I'd say.
Problem is, that some AMD64x2 nVidia laptops crash on port 0x80
access... which is easily user-triggerable by using /dev/rtc. If it is
644 on Fedora, I guess we have a problem.
Otoh, it is "only" a denial of service, and it can probably be
attributed to "buggy hardware". Is that still relevant for security team?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
Alan Cox wrote:
>> responds to reads differently than "unused" ports. In particular, an
>> inb takes 1/2 the elapsed time compared to a read to "known" unused port
>> 0xed - 792 tsc ticks for port 80 compared to about 1450 tsc ticks for
>> port 0xed and other unused ports (tsc at 800 MHz).
>>
>
> Well at least we know where the port is now - thats too fast for an LPC
> bus device, so it must be an SMI trap.
>
> Only easy way to find out is to use the debugging event counters and see
> how many instruction cycles are issued as part of the 0x80 port. If its
> suprisingly high then you've got a firmware bug and can go spank HP.
>
>
Alan, thank you for the pointers. I have been doing variations on this
testing theme for a while - I get intrigued by a good debugging
challenge, and after all it's my machine...
Two relevant new data points, and then some more suggestions:
1. It appears to be a real port. SMI traps are not happening in the
normal outb to 80. Hundreds of them execute perfectly with the expected
instruction counts. If I can trace the particular event that creates
the hard freeze (getting really creative, here) and stop before the
freeze disables the entire computer, I will. That may be an SMI, or
perhaps any other kind of interrupt or exception. Maybe someone knows
how to safely trace through an impending SMI while doing printk's or
something?
2. It appears to be the standard POST diagnostic port. On a whim, I
disassembled my DSDT code, and studied it more closely. It turns out
that there are a bunch of "Store(..., DBUG)" instructions scattered
throughout, and when you look at what DBUG is defined as, it is defined
as an IO Port at IO address DBGP, which is a 1-byte value = 0x80. So
the ACPI BIOS thinks it has something to do with debugging. There's a
little strangeness here, however, because the value sent to the port
occasionally has something to do with arguments to the ACPI operations
relating to sleep and wakeup ... could just be that those arguments are
distinctive.
In thinking about this, I recognize a couple of things. ACPI is telling
us something when it declares a reference to port 80 in its code. It's
not telling us the function of this port on this machine, but it is
telling us that it is being used by the BIOS. This could be a reason
to put out a printk warning message... 'warning: port 80 is used by
ACPI BIOS - if you are experiencing problems, you might try an alternate
means of iodelay.'
Second, it seems likely that there are one of two possible reasons that
the port 80 writes cause hang/freezes:
1. buffer overflow in such a device.
2. there is some "meaning" to certain byte values being written (the
_PTS and _WAK use of arguments that come from callers to store into port
80 makes me suspicious.) That might mean that the freeze happens only
when certain values are written, or when they are written closely in
time to some other action - being used to communicate something to the
SMM code). If there is some race in when Linux's port 80 writes happen
that happen to change the meaning of a request to the hardware or to
SMM, then we could be rarely stepping on
[attached the DSDT.dsl file fyi]
Alan Cox wrote:
>> responds to reads differently than "unused" ports. In particular, an
>> inb takes 1/2 the elapsed time compared to a read to "known" unused port
>> 0xed - 792 tsc ticks for port 80 compared to about 1450 tsc ticks for
>> port 0xed and other unused ports (tsc at 800 MHz).
>>
>
> Well at least we know where the port is now - thats too fast for an LPC
> bus device, so it must be an SMI trap.
>
> Only easy way to find out is to use the debugging event counters and see
> how many instruction cycles are issued as part of the 0x80 port. If its
> suprisingly high then you've got a firmware bug and can go spank HP.
>
>
Alan, thank you for the pointers. I have been doing variations on this
testing theme for a while - I get intrigued by a good debugging
challenge, and after all it's my machine...
Two relevant new data points, and then some more suggestions:
1. It appears to be a real port. SMI traps are not happening in the
normal outb to 80. Hundreds of them execute perfectly with the expected
instruction counts. If I can trace the particular event that creates
the hard freeze (getting really creative, here) and stop before the
freeze disables the entire computer, I will. That may be an SMI, or
perhaps any other kind of interrupt or exception. Maybe someone knows
how to safely trace through an impending SMI while doing printk's or
something?
2. It appears to be the standard POST diagnostic port. On a whim, I
disassembled my DSDT code, and studied it more closely. It turns out
that there are a bunch of "Store(..., DBUG)" instructions scattered
throughout, and when you look at what DBUG is defined as, it is defined
as an IO Port at IO address DBGP, which is a 1-byte value = 0x80. So
the ACPI BIOS thinks it has something to do with debugging. There's a
little strangeness here, however, because the value sent to the port
occasionally has something to do with arguments to the ACPI operations
relating to sleep and wakeup ... could just be that those arguments are
distinctive.
In thinking about this, I recognize a couple of things. ACPI is telling
us something when it declares a reference to port 80 in its code. It's
not telling us the function of this port on this machine, but it is
telling us that it is being used by the BIOS. This could be a reason
to put out a printk warning message... 'warning: port 80 is used by
ACPI BIOS - if you are experiencing problems, you might try an alternate
means of iodelay.'
Second, it seems likely that there are one of two possible reasons that
the port 80 writes cause hang/freezes:
1. buffer overflow in such a device.
2. there is some "meaning" to certain byte values being written (the
_PTS and _WAK use of arguments that come from callers to store into port
80 makes me suspicious.) That might mean that the freeze happens only
when certain values are written, or when they are written closely in
time to some other action - being used to communicate something to the
SMM code). If there is some race in when Linux's port 80 writes happen
that happen to change the meaning of a request to the hardware or to
SMM, then we could be rarely stepping on
> 80 makes me suspicious.) That might mean that the freeze happens only
> when certain values are written, or when they are written closely in
> time to some other action - being used to communicate something to the
> SMM code). If there is some race in when Linux's port 80 writes happen
> that happen to change the meaning of a request to the hardware or to
> SMM, then we could be rarely stepping on
That does imply some muppet 'extended' the debug interface for power
management on your laptop. Also pretty much proves that for such systems
we do have to move from port 0x80 to another delay approach.
Ingo - the fact that so many ISA bus devices need _p to mean "ISA bus
clocks" says to me we should keep the _p port 0x80 using variant for old
systems/device combinations (eg ISA ethernet cards) which won't show up
in any problem system (we know this from 15 odd years of testing), but
stop using it for PCI and embedded devices on modern systems.
Alan
Hi!
> 1. It appears to be a real port. SMI traps are not happening in the
> normal outb to 80. Hundreds of them execute perfectly with the expected
> instruction counts. If I can trace the particular event that creates
> the hard freeze (getting really creative, here) and stop before the
> freeze disables the entire computer, I will. That may be an SMI, or
> perhaps any other kind of interrupt or exception. Maybe someone knows
> how to safely trace through an impending SMI while doing printk's or
> something?
>
> 2. It appears to be the standard POST diagnostic port. On a whim, I
> disassembled my DSDT code, and studied it more closely. It turns out
> that there are a bunch of "Store(..., DBUG)" instructions scattered
> throughout, and when you look at what DBUG is defined as, it is defined
> as an IO Port at IO address DBGP, which is a 1-byte value = 0x80. So
> the ACPI BIOS thinks it has something to do with debugging. There's a
> little strangeness here, however, because the value sent to the port
> occasionally has something to do with arguments to the ACPI operations
> relating to sleep and wakeup ... could just be that those arguments are
> distinctive.
Maybe someone just left debugging code in production?
> In thinking about this, I recognize a couple of things. ACPI is telling
> us something when it declares a reference to port 80 in its code. It's
> not telling us the function of this port on this machine, but it is
> telling us that it is being used by the BIOS. This could be a reason
> to put out a printk warning message... 'warning: port 80 is used by
> ACPI BIOS - if you are experiencing problems, you might try an alternate
> means of iodelay.'
>
> Second, it seems likely that there are one of two possible reasons that
> the port 80 writes cause hang/freezes:
>
> 1. buffer overflow in such a device.
>
> 2. there is some "meaning" to certain byte values being written (the
> _PTS and _WAK use of arguments that come from callers to store into port
> 80 makes me suspicious.) That might mean that the freeze happens only
> when certain values are written, or when they are written closely in
> time to some other action - being used to communicate something to the
There's nothing easier than always writing 0 to the 0x80 to check if
it hangs in such case...?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
* Alan Cox <[email protected]> wrote:
> Ingo - the fact that so many ISA bus devices need _p to mean "ISA bus
> clocks" says to me we should keep the _p port 0x80 using variant for
> old systems/device combinations (eg ISA ethernet cards) which won't
> show up in any problem system (we know this from 15 odd years of
> testing), but stop using it for PCI and embedded devices on modern
> systems.
yes, ISA is fragile, and no way do we want to remove the delay, but are
there strong counter-arguments against doing the clean thing and adding
an udelay(2) (or udelay(1)) to replace those _p() uses in ISA drivers?
That removes the global effect once and forever. Initially for
standalone drivers without early bootup functionality, not platform
drivers that might need to run before we have calibrated udelay.
if someone runs a fresh new kernel on an ancient device then timings
_will_ change a bit, no matter what we do. Alignments change, the
compiler output will change (old compilers get deprecated so a new
compiler might have to be picked), cache effects change - and this is
inevitable. The important thing is to not eliminate the delays - but we
sure dont have to keep them cycle accurate (we couldnt even if we wanted
to). The only way to get the _exact same_ behavior is to not change the
kernel at all.
Ingo
On Tue, 1 Jan 2008 17:43:38 +0100
Ingo Molnar <[email protected]> wrote:
> if someone runs a fresh new kernel on an ancient device then timings
> _will_ change a bit, no matter what we do. Alignments change, the
> compiler output will change (old compilers get deprecated so a new
> compiler might have to be picked), cache effects change - and this is
> inevitable. The important thing is to not eliminate the delays - but
> we sure dont have to keep them cycle accurate (we couldnt even if we
> wanted to). The only way to get the _exact same_ behavior is to not
> change the kernel at all.
What I'm afraid is that udelay will be significantly slower, which
might hit anything that does a lot of gettimeofday calls (poking at the
PIT timer) on embedded 386/486 systems. On the other hand, those
systems might not want to upgrade to 2.6 anyway.
And why do people keep buying HP hardware? HP seem to be quite
Linux-unfriendly on the desktop [1] and on their laptops. Apparently
HP doesn't even bother to try Linux on any of their non-server systems.
[1] Try running Linux on a HP DC7700 machine, there seems to be a lot
of magic stuff in those machines that doesn't work well with Linux.
They had some ACPI crap that stopped FC7 from booting without a lot of
magic PCI access options and audio still does not work.
/Christer
Alan Cox wrote:
> That does imply some muppet 'extended' the debug interface for power
> management on your laptop. Also pretty much proves that for such systems
> we do have to move from port 0x80 to another delay approach.
>
Alan - in googling around the net yesterday looking for SuperIO chipsets
that claim to support port 80, I have found that "blade" servers from
companies like IBM and HP *claim* to have a system for monitoring port
80 diagnostic codes and sending them to the "drawer" management
processor through a management backplane. This is a little puzzling,
because you'd think they would have noticed port 80 issues, since they
run Linux in their systems. Maybe not hangs, but it seems unhelpful to
have a lot of noise spewing over a bus that is supposed to provide
"management" diagnostics. Anyway, what I did not find was whether there
was a particular chipset that provided that port 80 feature on those
machines. However, if it's a common "cell" in a design, it may have
leaked into the notebook market chipsets too.
Anyone know if the Linux kernels used on blade servers have been patched
to not do the port 80 things? I don't think this would break anything
there, but it might have been a helpful patch for their purposes. I
don't do blades personally or at work (I focus on mobile devices these
days, and my personal servers are discrete), so I have no knowledge.
It could be that the blade servers have BIOSes that don't do POST codes
over port 80, but send them directly to the "drawer" management bus, of
course.
Pavel Machek wrote:
>> 2. there is some "meaning" to certain byte values being written (the
>> _PTS and _WAK use of arguments that come from callers to store into port
>> 80 makes me suspicious.) That might mean that the freeze happens only
>> when certain values are written, or when they are written closely in
>> time to some other action - being used to communicate something to the
>>
>
> There's nothing easier than always writing 0 to the 0x80 to check if
> it hangs in such case...?
> Pavel
>
>
I did try that. Machine in question does hang when you write 0 to 0x80
in a loop a few thousand times. This particular suspicion was that the
problem was caused by the following sort of thing (it's a multi-cpu
system...)
First, some ACPI code writes "meaningful value" X to port 80 that is
sort of a "parameter" to whatever follows. Just because the DSDT
disassembly *calls* it the DBUG port doesn't mean it is *only* used for
debugging. We (Linux) use it for timing delays, after all...
then Linux driver writes some random value (!=X) including zero to port 80.
then ACPI writes some other values that cause SMI or some other thing to
happen,
There are experiments that are not so simple that could rule this
particular guess out. I have them on my queue of experiments I might
try (locking out ACPI). Of course if the BIOS were GPL, we could look
at the comments, etc...
I may today pull the laptop apart to see if I can see what chips are on
it, besides the nvidia chipset and the processor. That might give a
clue as to what SuperIO or other logic chips are there.
> there strong counter-arguments against doing the clean thing and adding
> an udelay(2) (or udelay(1)) to replace those _p() uses in ISA drivers?
#1 udelay has to be for the worst case bus clock (6MHz) while the device
may be at 10Mhz or even 12MHz ISA. So it slows it down stuff
unneccessarily- and stuff that really really is slow enough as is.
#2 Most of the ancient wind up relics with ISA bus don't have a tsc so
their udelay value is kind of iffy.
#3 Not changing it is the lowest risk for a lot of the old ISA code that
never occurs on newer boxes
If we have an isa_inb_p() as a specific statement of "I am doing an ISA
bus dependant delay on ancient crap hardware" then we can avoid the risk
of breakage. We wouldn't use it for non ISA, and certainly not for stuff
like chipset logic which requires a more thorough fix as it occurs on all
kinds of boxes.
> _will_ change a bit, no matter what we do. Alignments change, the
> compiler output will change (old compilers get deprecated so a new
> compiler might have to be picked), cache effects change - and this is
> inevitable. The important thing is to not eliminate the delays - but we
> sure dont have to keep them cycle accurate (we couldnt even if we wanted
> to). The only way to get the _exact same_ behavior is to not change the
> kernel at all.
ISA bus cycles are *slow*, the subtle processor cache and gcc triggered
timing changes are lost in the noise.
Alan
> 80 diagnostic codes and sending them to the "drawer" management
> processor through a management backplane. This is a little puzzling,
> because you'd think they would have noticed port 80 issues, since they
> run Linux in their systems. Maybe not hangs, but it seems unhelpful to
Most of the chipsets let you turn it on and off so presumably the BIOS
turns it off before running Linux. Thats certainly done by several
chipsets and we recently had a bug where a BIOS forgot to turn them off
which confused someones parallel port devices.
> Anyone know if the Linux kernels used on blade servers have been patched
> to not do the port 80 things? I don't think this would break anything
I'm not aware of such, or requests for them.
Alan
* Christer Weinigel <[email protected]> wrote:
> What I'm afraid is that udelay will be significantly slower, [...]
why should it be significantly slower?
Ingo
* Alan Cox <[email protected]> wrote:
> > there strong counter-arguments against doing the clean thing and
> > adding an udelay(2) (or udelay(1)) to replace those _p() uses in ISA
> > drivers?
>
> #1 udelay has to be for the worst case bus clock (6MHz) while the
> #device may be at 10Mhz or even 12MHz ISA. So it slows it down stuff
> unneccessarily- and stuff that really really is slow enough as is.
udelay is supposed to be reliable. If someone runs a new kernel and has
no TSC (which might happen even on modern hardware or with notsc) _and_
finds that udelay is not calibrated well enough then that's a kernel bug
we want to fix.
> #2 Most of the ancient wind up relics with ISA bus don't have a tsc so
> their udelay value is kind of iffy.
iffy in what way? Again, we might be hiding real udelay bugs.
> #3 Not changing it is the lowest risk for a lot of the old ISA code
> #that never occurs on newer boxes
Not changing the kernel _at all_ is what is the "lowest risk" option. If
the kernel is changed, it should be tested - and if we have a buggy
udelay, that should be fixed - because it could cause many other bugs in
other drivers.
yes, there are always risks in changing something, but using udelay is a
common-sense consolidation of code.
> > _will_ change a bit, no matter what we do. Alignments change, the
> > compiler output will change (old compilers get deprecated so a new
> > compiler might have to be picked), cache effects change - and this
> > is inevitable. The important thing is to not eliminate the delays -
> > but we sure dont have to keep them cycle accurate (we couldnt even
> > if we wanted to). The only way to get the _exact same_ behavior is
> > to not change the kernel at all.
>
> ISA bus cycles are *slow*, the subtle processor cache and gcc
> triggered timing changes are lost in the noise.
gcc triggered timing changes can easily add up to a LOT more -
especially if a loop is involved and especially on older hardware.
Remember, 1 microsecond is just a handful of instructions on real old
hardware. The kernel's timings are _not_ immutable, never were, never
will be.
Ingo
On Tue, 1 Jan 2008 19:46:59 +0100
Ingo Molnar <[email protected]> wrote:
>
> * Christer Weinigel <[email protected]> wrote:
>
> > What I'm afraid is that udelay will be significantly slower, [...]
>
> why should it be significantly slower?
out 80h, al is only two bytes. Any alternative that has been suggested
in this discussion will use more space. mov dx, alt_port; out dx, al
will be larger, a function call will definitely be a lot larger. People
have been making changes to the kernel to save a couple of hundred
bytes of text size.
On old hardware (or anything with an ISA bus which I'd guess includes
the Geode SCx200 SoC which is basically a MediaGX processor, a
southbridge and an ISA bus with a Super I/O chip on it) an out to 80h
will use exactly one ISA cycle. A call to udelay will need a margin,
so it will be slightly slower. And that's assuming that you can find
out the speed of the ISA bus, if you can't you'll have to assume the
slowest possible bus (6 MHz I guess) which will be a lot slower.
I don't know if the difference in code size or the udelay will be
significantly slower, but I think it might be.
And to take the MediaGX as an example, the TSC is not usable on that
CPU, so Linux has to use the PIT timer for gettimeofday. As I wrote
in a different post, I believe the PIT on the SCx200 needs outb_p to
work reliably. So if outb_p becomes significantly slower that will
affect a critical path on a very common embedded CPU.
I'm not sure what Alan meant with his comments about locking, but if
changing outb_p to use an udelay means that we have to add locking,
that is also going to affect the code size and speed.
/Christer
On 01-01-08 20:35, Christer Weinigel wrote:
> On Tue, 1 Jan 2008 19:46:59 +0100
> Ingo Molnar <[email protected]> wrote:
>
>> * Christer Weinigel <[email protected]> wrote:
>>
>>> What I'm afraid is that udelay will be significantly slower, [...]
>> why should it be significantly slower?
>
> out 80h, al is only two bytes. Any alternative that has been suggested
> in this discussion will use more space. mov dx, alt_port; out dx, al
> will be larger, a function call will definitely be a lot larger. People
> have been making changes to the kernel to save a couple of hundred
> bytes of text size.
>
> On old hardware (or anything with an ISA bus which I'd guess includes
> the Geode SCx200 SoC which is basically a MediaGX processor, a
> southbridge and an ISA bus with a Super I/O chip on it) an out to 80h
> will use exactly one ISA cycle.
Not to disagree with the point but more like 8 (1 us at 8 MHz). It's the
timeout property.
> A call to udelay will need a margin,
> so it will be slightly slower. And that's assuming that you can find
> out the speed of the ISA bus, if you can't you'll have to assume the
> slowest possible bus (6 MHz I guess) which will be a lot slower.
>
> I don't know if the difference in code size or the udelay will be
> significantly slower, but I think it might be.
There's also the bit about microseconds being very losely defined pre
loops_per_jiffy calibration. Per CPU-family init helps somewhat but
certainly for family 6 (Pentium Pro, II, III -- lots of hardware with ISA
busses therefore) speeds vary quite a bit still.
> And to take the MediaGX as an example, the TSC is not usable on that
> CPU, so Linux has to use the PIT timer for gettimeofday. As I wrote
> in a different post, I believe the PIT on the SCx200 needs outb_p to
> work reliably. So if outb_p becomes significantly slower that will
> affect a critical path on a very common embedded CPU.
>
> I'm not sure what Alan meant with his comments about locking, but if
> changing outb_p to use an udelay means that we have to add locking,
> that is also going to affect the code size and speed.
Explained here:
http://lkml.org/lkml/2007/12/30/136
However, that's not an argument. Missing locking is a bug, and current outb
I/O delay use hiding it doesn't change that.
Rene.
On Tue, 1 Jan 2008 19:45:24 +0100
Ingo Molnar <[email protected]> wrote:
>
> * Alan Cox <[email protected]> wrote:
>
> > > there strong counter-arguments against doing the clean thing and
> > > adding an udelay(2) (or udelay(1)) to replace those _p() uses in
> > > ISA drivers?
> >
> > #1 udelay has to be for the worst case bus clock (6MHz) while the
> > #device may be at 10Mhz or even 12MHz ISA. So it slows it down stuff
> > unneccessarily- and stuff that really really is slow enough as is.
>
> udelay is supposed to be reliable. If someone runs a new kernel and
> has no TSC (which might happen even on modern hardware or with notsc)
> _and_ finds that udelay is not calibrated well enough then that's a
> kernel bug we want to fix.
How do you find out the speed of the ISA bus? AFAIK there is no
standardized way to do that. On the Geode SC2200 the ISA bus speed is
usually the PCI clock divided by 4 giving 33MHz/4=8.3MHz or
30/4=7.5MHz, but with no external ISA devices it's possible to
overclock the ISA bus to /3 to run it at 11MHz or so. But without
poking at some CPU and southbridge specific registers to find out the
PCI bus speed and the ISA bus divisor you can't really tell.
So if you do udelay based on a 6MHz clock (I think you can safely
assume that any 386 based system runs the ISA bus at least that fast)
you'll waste at least 30% and maybe even 100% more time for the delay
after every _p call.
/Christer
On Tue, 01 Jan 2008 20:59:20 +0100
Rene Herman <[email protected]> wrote:
> On 01-01-08 20:35, Christer Weinigel wrote:
>
> > On old hardware (or anything with an ISA bus which I'd guess
> > includes the Geode SCx200 SoC which is basically a MediaGX
> > processor, a southbridge and an ISA bus with a Super I/O chip on
> > it) an out to 80h will use exactly one ISA cycle.
>
> Not to disagree with the point but more like 8 (1 us at 8 MHz). It's
> the timeout property.
Ah, sorry, you're right of course.
> > I'm not sure what Alan meant with his comments about locking, but if
> > changing outb_p to use an udelay means that we have to add locking,
> > that is also going to affect the code size and speed.
>
> Explained here:
>
> http://lkml.org/lkml/2007/12/30/136
>
> However, that's not an argument. Missing locking is a bug, and
> current outb I/O delay use hiding it doesn't change that.
Thanks, I had missed that one.
Regarding Alan's comment:
>For that matter does anyone actually have video cards old enough for us
>to care actually still in use with Linux today ?
I'm afraid that some PC104 systems may still use ancient video cards.
/Christer
* Christer Weinigel <[email protected]> wrote:
> On Tue, 1 Jan 2008 19:46:59 +0100
> Ingo Molnar <[email protected]> wrote:
>
> >
> > * Christer Weinigel <[email protected]> wrote:
> >
> > > What I'm afraid is that udelay will be significantly slower, [...]
> >
> > why should it be significantly slower?
>
> out 80h, al is only two bytes. Any alternative that has been
> suggested in this discussion will use more space. mov dx, alt_port;
> out dx, al will be larger, a function call will definitely be a lot
> larger. People have been making changes to the kernel to save a couple
> of hundred bytes of text size.
i've done dozens of patches that saved much less of text size, so yes, i
very much care about code size. But it has been stated in this thread
that most of the _p() API uses in the kernel today are bogus. So
eventually getting rid of the bogus ones will be a net code size
_reduction_. (But even that is besides the point, we prefer clean and
easier to maintain code.)
> I don't know if the difference in code size or the udelay will be
> significantly slower, but I think it might be.
ok, "I dont know but it might be slower" is a perfectly fine statement
instead of your original "it will be slower".
Ingo
> > #1 udelay has to be for the worst case bus clock (6MHz) while the
> > #device may be at 10Mhz or even 12MHz ISA. So it slows it down stuff
> > unneccessarily- and stuff that really really is slow enough as is.
>
> udelay is supposed to be reliable. If someone runs a new kernel and has
> no TSC (which might happen even on modern hardware or with notsc) _and_
> finds that udelay is not calibrated well enough then that's a kernel bug
> we want to fix.
You miss the point entirely. The delay is in bus clocks not CPU clocks,
not tsc clocks not PIT clocks, and it is permitted to vary by a factor of
two. So you'll worst case halve the speed of network packet up/download
even if your udelay is accurate.
> > #2 Most of the ancient wind up relics with ISA bus don't have a tsc so
> > their udelay value is kind of iffy.
>
> iffy in what way? Again, we might be hiding real udelay bugs.
As you say - its only a few instructions so small udelays tend to be
inaccurate - overlong.
> yes, there are always risks in changing something, but using udelay is a
> common-sense consolidation of code.
Not for ISA bus hardware. For chipset logic, for PCI yes - for ISA stuff
no. It's all about ISA clocks not wall clocks.
Alan
David P. Reed wrote:
> Alan, thank you for the pointers. I have been doing variations on this
> testing theme for a while - I get intrigued by a good debugging
> challenge, and after all it's my machine...
>
> Two relevant new data points, and then some more suggestions:
>
> 1. It appears to be a real port. SMI traps are not happening in the
> normal outb to 80. Hundreds of them execute perfectly with the expected
> instruction counts. If I can trace the particular event that creates
> the hard freeze (getting really creative, here) and stop before the
> freeze disables the entire computer, I will. That may be an SMI, or
> perhaps any other kind of interrupt or exception. Maybe someone knows
> how to safely trace through an impending SMI while doing printk's or
> something?
>
> 2. It appears to be the standard POST diagnostic port. On a whim, I
> disassembled my DSDT code, and studied it more closely. It turns out
> that there are a bunch of "Store(..., DBUG)" instructions scattered
> throughout, and when you look at what DBUG is defined as, it is defined
> as an IO Port at IO address DBGP, which is a 1-byte value = 0x80. So
> the ACPI BIOS thinks it has something to do with debugging. There's a
> little strangeness here, however, because the value sent to the port
> occasionally has something to do with arguments to the ACPI operations
> relating to sleep and wakeup ... could just be that those arguments are
> distinctive.
>
Dumb question: if you change your iodelay function so it always writes
zero to port 0x80, does it start working?
-hpa
Alan Cox wrote:
>> 80 makes me suspicious.) That might mean that the freeze happens only
>> when certain values are written, or when they are written closely in
>> time to some other action - being used to communicate something to the
>> SMM code). If there is some race in when Linux's port 80 writes happen
>> that happen to change the meaning of a request to the hardware or to
>> SMM, then we could be rarely stepping on
>
> That does imply some muppet 'extended' the debug interface for power
> management on your laptop. Also pretty much proves that for such systems
> we do have to move from port 0x80 to another delay approach.
>
> Ingo - the fact that so many ISA bus devices need _p to mean "ISA bus
> clocks" says to me we should keep the _p port 0x80 using variant for old
> systems/device combinations (eg ISA ethernet cards) which won't show up
> in any problem system (we know this from 15 odd years of testing), but
> stop using it for PCI and embedded devices on modern systems.
>
I have mentioned this before... I think writing zero to port 0xf0 would
be an acceptable pause interface (to the extent where we need an I/O
port) except on 386 with 387 present; on those systems we can fall back
to 0x80.
-hpa
Christer Weinigel wrote:
>
> out 80h, al is only two bytes. Any alternative that has been suggested
> in this discussion will use more space. mov dx, alt_port; out dx, al
> will be larger, a function call will definitely be a lot larger. People
> have been making changes to the kernel to save a couple of hundred
> bytes of text size.
>
If text size becomes a problem in this case, then we can use an
alternatives-like mechanism to fix up the kernel. However,
realistically this probably should be a function call *combined with*
the out and in; that reduces the impact somewhat.
-hpa
> How do you find out the speed of the ISA bus? AFAIK there is no
> standardized way to do that. On the Geode SC2200 the ISA bus speed is
It is per chipset magic registers. Fun fun fun
> usually the PCI clock divided by 4 giving 33MHz/4=8.3MHz or
> 30/4=7.5MHz, but with no external ISA devices it's possible to
> overclock the ISA bus to /3 to run it at 11MHz or so. But without
12MHz is valid for ISA although not a good idea - even IBM issued some
systems with 12MHz ISA before discovering many vendors had assumed 8 was
it.
Christer Weinigel wrote:
>
>> For that matter does anyone actually have video cards old enough for us
>> to care actually still in use with Linux today ?
>
> I'm afraid that some PC104 systems may still use ancient video cards.
>
PC/104 is actual ISA, not even LPC...
-hpa
> very much care about code size. But it has been stated in this thread
> that most of the _p() API uses in the kernel today are bogus. So
You missed a word "wrongly". It has been "wrongly stated"
I've been going through the ISA cases which are the majority. Generally
speaking they are correct. We have a couple of "interesting" PCI users
who most definitely want udelay() or removal of _p. We have various
chipset cases which want looking at in detail. The ISA drivers however
are both the main user and mostly right.
> ok, "I dont know but it might be slower" is a perfectly fine statement
> instead of your original "it will be slower".
If you use wall clock timings it will be slower.
Alan
/* gcc -W -Wall -O2 -o portime portime.c */
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <sys/io.h>
#define LOOPS 10000
inline uint64_t rdtsc(void)
{
uint32_t hi, lo;
asm ("rdtsc": "=d" (hi), "=a" (lo));
return (uint64_t)hi << 32 | lo;
}
inline void serialize(void)
{
asm ("cpuid": : : "eax", "ebx", "ecx", "edx");
}
int main(void)
{
uint64_t tsc0, tsc1, tsc2, tsc3, tsc4, tsc5;
uint64_t out80, in80, in61, outf0;
int i;
if (iopl(3) < 0) {
perror("iopl");
return EXIT_FAILURE;
}
asm ("cli");
tsc0 = rdtsc();
for (i = 0; i < LOOPS; i++) {
serialize();
serialize();
}
tsc1 = rdtsc();
for (i = 0; i < LOOPS; i++) {
serialize();
asm ("outb %al, $0x80");
serialize();
}
tsc2 = rdtsc();
for (i = 0; i < LOOPS; i++) {
serialize();
asm ("inb $0x80, %%al": : : "al");
serialize();
}
tsc3 = rdtsc();
for (i = 0; i < LOOPS; i++) {
serialize();
asm ("inb $0x61, %%al": : : "al");
serialize();
}
tsc4 = rdtsc();
for (i = 0; i < LOOPS; i++) {
serialize();
asm ("outb %b0, $0xf0": : "a" (0));
serialize();
}
tsc5 = rdtsc();
asm ("sti");
out80 = ((tsc2 - tsc1) - (tsc1 - tsc0)) / LOOPS;
in80 = ((tsc3 - tsc2) - (tsc1 - tsc0)) / LOOPS;
in61 = ((tsc4 - tsc3) - (tsc1 - tsc0)) / LOOPS;
outf0 = ((tsc5 - tsc4) - (tsc1 - tsc0)) / LOOPS;
printf("out 0x80: %llu cycles\n", out80);
printf("in 0x80: %llu cycles\n", in80);
printf("in 0x61: %llu cycles\n", in61);
printf("out 0xf0: %llu cycles\n", outf0);
return EXIT_SUCCESS;
}
On Tue, 1 Jan 2008 22:01:43 +0100
Ingo Molnar <[email protected]> wrote:
> > out 80h, al is only two bytes. Any alternative that has been
> > suggested in this discussion will use more space. mov dx,
> > alt_port; out dx, al will be larger, a function call will
> > definitely be a lot larger. People have been making changes to the
> > kernel to save a couple of hundred bytes of text size.
>
> i've done dozens of patches that saved much less of text size, so
> yes, i very much care about code size. But it has been stated in this
> thread that most of the _p() API uses in the kernel today are bogus.
> So eventually getting rid of the bogus ones will be a net code size
> _reduction_. (But even that is besides the point, we prefer clean and
> easier to maintain code.)
And once again, the _p in the code that talks to the PIT is very much
non-bogus. And it is a critical path that's called a lot. The i8253
PIT and the i8259 interrupt controller are probably the only ones that
are relevant on a modern machine, and it seems that even some fairly
modern chipsets have limitations on how fast you can drive them.
BTW, I just checked the Intel M8253 data sheet (dead tree variant), and
it says under A.C Characteristics, READ CYCLE:
Recovery Time Between /READ and Any Other Control Signal: 1 us
So at least for the original M8253 a udelay(1) might be more
appropriate than outb_p, since the delay is not expressed in clock
cycles but absolute time.
The data sheet for the Intel M8259A says:
End of /RD to Next Command: 300 ns
End of /WR to Next Command: 370 ns
On the other hand, I don't know how all the i8253/i8259 clones or the
numerous variants of Super I/O chips behave. It wouldn't surprise me
if some Super I/O chip uses the ISA bus clock to latch the values
internally so that the delay is dependent on the bus frequency instead.
> > I don't know if the difference in code size or the udelay will be
> > significantly slower, but I think it might be.
>
> ok, "I dont know but it might be slower" is a perfectly fine
> statement instead of your original "it will be slower".
I didn't say that, I said I'm afraid it will be slower. :-)
/Christer
On 01-01-08 22:42, Christer Weinigel wrote:
> The data sheet for the Intel M8259A says:
>
> End of /RD to Next Command: 300 ns
> End of /WR to Next Command: 370 ns
>
> On the other hand, I don't know how all the i8253/i8259 clones or the
> numerous variants of Super I/O chips behave. It wouldn't surprise me
> if some Super I/O chip uses the ISA bus clock to latch the values
> internally so that the delay is dependent on the bus frequency instead.
I wouldn't even be surprised if most all would...
Rene.
Rene Herman wrote:
> On 01-01-08 22:15, H. Peter Anvin wrote:
>
>> I have mentioned this before... I think writing zero to port 0xf0
>> would be an acceptable pause interface (to the extent where we need an
>> I/O port) except on 386 with 387 present; on those systems we can fall
>> back to 0x80.
>
> PII 400 / Intel 440 BX (PIIX4):
>
> rene@6bap:~/port80$ su -c ./portime
> out 0x80: 544 cycles
> in 0x80: 254 cycles
> in 0x61: 254 cycles
> out 0xf0: 544 cycles
>
> The Intel PIIX/PIIX3 datasheet specifically mentions that both reads and
> writes at 0xf0 "flow through to the ISA bus".
>
> However, more complete, it says:
>
> "Writing to this register causes the PIIX/PIIX3 to assert IGNNE#. The
> PIIX/PIIX3 also negates IRQ13 (internal to the PIIX). Note that IGNNE#
> is not asserted unless FERR# is active. Reads/writes flow through to the
> ISA bus".
>
> We don't want the side-effects, do we?
>
Yes, we do. It's exactly this side effect which makes this safer than
either 0x80 or 0xED -- it's a port that *guaranteed* can't be reclaimed
for other purposes without breaking MS-DOS compatibility.
It's specifically a side effect *we don't care about*, except in the
by-now-somewhat-exotic case of 386+387 (where we indeed can't use it
once user code has touched the FPU -- but we can fall back to 0x80 on
those, a very small number of systems.) 486+ doesn't use this interface
under Linux, since Linux uses the proper exception path on those
processors. If Compaq had wired up the proper signals on the first 386
PC motherboards, we wouldn't have cared about it on the 386 either.
-hpa
Christer Weinigel wrote:
>
> And once again, the _p in the code that talks to the PIT is very much
> non-bogus. And it is a critical path that's called a lot. The i8253
> PIT and the i8259 interrupt controller are probably the only ones that
> are relevant on a modern machine, and it seems that even some fairly
> modern chipsets have limitations on how fast you can drive them.
>
I actually analyzed the case of the PIT in the case of the
implementation of a real chipset. In our case, running the PIT at
1.19318 MHz when the rest of the chipset core was running at 100 MHz
introduced a huge amount of extra complexity and we really wanted to get
rid of it. As it turns out, the PIT interface is ill-defined if run at
a higher frequency; you can get undefined values as a result of a write
followed by a read if there is no intervening PIT clock, which of course
in the standard interface never happens. So in the end, we had to build
all the synchronizers, backpressure controls and other crap that went
along with an additional clock domain.
As a result of that experience, I really don't think you will *ever* see
a PIT that runs at a modern frequency.
Building a 100 MHz PIC, however, was not a problem, and being able to
sink accesses at full speed meant we didn't have to implement flow control.
-hpa
/* gcc -W -Wall -o smsw smsw.c */
#include <stdio.h>
#include <stdint.h>
int main(void)
{
uint16_t msw;
asm ("smsw %0": "=r" (msw));
printf("msw: %#hx\n", msw);
return 0;
}
Rene Herman wrote:
>>
>> Yes, we do. It's exactly this side effect which makes this safer than
>> either 0x80 or 0xED -- it's a port that *guaranteed* can't be
>> reclaimed for other purposes without breaking MS-DOS compatibility.
>
> I see that with CR0.NE set (*) we indeed don't care about IGNNE#...
>
> However, I'm worried about this comment in arch/x86/kernel/i8259_32.c
>
> ===
> /*
> * New motherboards sometimes make IRQ 13 be a PCI interrupt,
> * so allow interrupt sharing.
> */
> ===
>
> Is it really safe to just blindly negate IRQ13 on everything out there,
> from regular PC through funky embedded thingies?
>
It's not any IRQ 13, it's IRQ 13 from the FPU.
-hpa
On Tue, 01 Jan 2008 13:21:47 -0800
"H. Peter Anvin" <[email protected]> wrote:
> Christer Weinigel wrote:
> >
> > out 80h, al is only two bytes. Any alternative that has been
> > suggested in this discussion will use more space. mov dx,
> > alt_port; out dx, al will be larger, a function call will
> > definitely be a lot larger. People have been making changes to the
> > kernel to save a couple of hundred bytes of text size.
>
> If text size becomes a problem in this case, then we can use an
> alternatives-like mechanism to fix up the kernel. However,
> realistically this probably should be a function call *combined with*
> the out and in; that reduces the impact somewhat.
That's a very good point. So for the PIT it should be possible to have
two clocksources, one with the _p and one without, that one can switch
between with a kernel command line option. So there shouldn't be any
slowdown at all due to that.
The i8259 init code is not time critical, so should be able to use a
"reasonable" delay.
Besides the above there are only a handful of _p uses outside of real
ISA device drivers, and those should not be relevant for a modern PC
unless somebody wants to use an 8390 based PCMCIA card, but we could
tell them "don't do that then".
But I'd better shut up and let Alan continue on his review of the _p
use in the drivers.
/Christer
On 01-01-08 23:39, H. Peter Anvin wrote:
>>> Yes, we do. It's exactly this side effect which makes this safer
>>> than either 0x80 or 0xED -- it's a port that *guaranteed* can't be
>>> reclaimed for other purposes without breaking MS-DOS compatibility.
>>
>> I see that with CR0.NE set (*) we indeed don't care about IGNNE#...
>>
>> However, I'm worried about this comment in arch/x86/kernel/i8259_32.c
>>
>> ===
>> /*
>> * New motherboards sometimes make IRQ 13 be a PCI interrupt,
>> * so allow interrupt sharing.
>> */
>> ===
>>
>> Is it really safe to just blindly negate IRQ13 on everything out
>> there, from regular PC through funky embedded thingies?
>
> It's not any IRQ 13, it's IRQ 13 from the FPU.
Well, on the PIIX it is and I guess on anything where it's _not_ fully
internal an 0xf0 write wouldn't have any effect on IRQ13...
When you earlier mentioned this it seemed 0xed switched on DMI would be good
enough, but well.
Alan, do you have an opinion on the port 0xf0 write? It should probably
still be combined with a replacement/deletion for new machines due to the
bus-locking "bad for real-time" thing you mentioned earlier but in the short
run it could be a fairly low-impact replacement on anything except a 386+387
We should do a another timing measurement survey and it makes for sligtly
worse code if we indeed feel it's not safe enough to write anything other
than 0, but otherwise it's quite minimal.
Rene.
> Besides the above there are only a handful of _p uses outside of real
> ISA device drivers, and those should not be relevant for a modern PC
> unless somebody wants to use an 8390 based PCMCIA card, but we could
> tell them "don't do that then".
We need to build 8390.c twice anyway - once for PCI once for ISA with the
_p changes whichever way it gets done. PCMCIA can use whichever we decide
is right. Anyone know if PCMCIA is guaranteed to be 8MHz ?
On Tue, 1 Jan 2008 23:12:50 +0000
Alan Cox <[email protected]> wrote:
> > Besides the above there are only a handful of _p uses outside of
> > real ISA device drivers, and those should not be relevant for a
> > modern PC unless somebody wants to use an 8390 based PCMCIA card,
> > but we could tell them "don't do that then".
>
> We need to build 8390.c twice anyway - once for PCI once for ISA with
> the _p changes whichever way it gets done. PCMCIA can use whichever
> we decide is right. Anyone know if PCMCIA is guaranteed to be 8MHz ?
It's not. It's perfectly ok to drive a PCMCIA bus slower than that,
IIRC we used a much slower clock speed than that on a StrongARM
platform I worked a couple of years ago.
The PCMCIA CIS (Card information services) allows the following device
speeds: 100, 150, 200 and 250 ns. The memory card spec also allows 600
and 300 ns. The standard I/O card cycle speed is 255 ns. I believe
that is "the shortest access time for a read/write cycle", and I can't
tell if that is comparable to one ISA clock cycles or if it's
comparable to 8 ISA bus cycles.
On the other hand, there is no clock line in a PCMCIA connector, so for
PCMCIA devices any delays should be absolute times, or based on some
clock that is internal to the card. How that fits with the 8390 data
sheet talking about bus clocks, I don't know.
/Christer
On 02-01-08 00:11, Rene Herman wrote:
> On 01-01-08 23:39, H. Peter Anvin wrote:
>
>>>> Yes, we do. It's exactly this side effect which makes this safer
>>>> than either 0x80 or 0xED -- it's a port that *guaranteed* can't be
>>>> reclaimed for other purposes without breaking MS-DOS compatibility.
>>>
>>> I see that with CR0.NE set (*) we indeed don't care about IGNNE#...
>>>
>>> However, I'm worried about this comment in arch/x86/kernel/i8259_32.c
>>>
>>> ===
>>> /*
>>> * New motherboards sometimes make IRQ 13 be a PCI interrupt,
>>> * so allow interrupt sharing.
>>> */
>>> ===
>>>
>>> Is it really safe to just blindly negate IRQ13 on everything out
>>> there, from regular PC through funky embedded thingies?
>>
>> It's not any IRQ 13, it's IRQ 13 from the FPU.
>
> Well, on the PIIX it is and I guess on anything where it's _not_ fully
> internal an 0xf0 write wouldn't have any effect on IRQ13...
>
> When you earlier mentioned this it seemed 0xed switched on DMI would be
> good enough, but well.
>
> Alan, do you have an opinion on the port 0xf0 write? It should probably
> still be combined with a replacement/deletion for new machines due to
> the bus-locking "bad for real-time" thing you mentioned earlier but in
> the short run it could be a fairly low-impact replacement on anything
> except a 386+387
>
> We should do a another timing measurement survey and it makes for
> sligtly worse code if we indeed feel it's not safe enough to write
> anything other than 0, but otherwise it's quite minimal.
Thinking about this, my main worry about 0xf0 as a 0x80 replacement would be
systems that have elected to _not_ let port 0xf0 writes flow through to ISA
changing the timing-characteristics. Given that it's a known port, someone
may have elected to just keep it fully internal.
Upto now the datasheets I've read do put it on ISA...
Rene.
On Wed, 02 Jan 2008 00:11:54 +0100
Rene Herman <[email protected]> wrote:
> Well, on the PIIX it is and I guess on anything where it's _not_
> fully internal an 0xf0 write wouldn't have any effect on IRQ13...
>
> When you earlier mentioned this it seemed 0xed switched on DMI would
> be good enough, but well.
>
> Alan, do you have an opinion on the port 0xf0 write? It should
> probably still be combined with a replacement/deletion for new
> machines due to the bus-locking "bad for real-time" thing you
> mentioned earlier but in the short run it could be a fairly
> low-impact replacement on anything except a 386+387
Both 0xed and 0xf0 are mapped to internal functions on the AMD Elan
SC400 processor. It is an AMD 486 based system on a chip and since AMD
just knew that it would never have a math coprocessor, they reused the
0xf0-0xf2 range for the PCMCIA controller. I guess the AMD Elan SC500
will have similar problems.
I seem to recall that back when I was working with the Elan SC400
(sometime around 1998?) there were discussions about finding an
alternate delay port because outb to 0x80 messed up the debug port. I
think the Elan stopped those discussions because just about every port
on the Elan was reused for some alternate purpose.
/Christer
On 02-01-08 01:55, Christer Weinigel wrote:
> On Wed, 02 Jan 2008 00:11:54 +0100
> Rene Herman <[email protected]> wrote:
>
>> Well, on the PIIX it is and I guess on anything where it's _not_
>> fully internal an 0xf0 write wouldn't have any effect on IRQ13...
>>
>> When you earlier mentioned this it seemed 0xed switched on DMI would
>> be good enough, but well.
>>
>> Alan, do you have an opinion on the port 0xf0 write? It should
>> probably still be combined with a replacement/deletion for new
>> machines due to the bus-locking "bad for real-time" thing you
>> mentioned earlier but in the short run it could be a fairly
>> low-impact replacement on anything except a 386+387
>
> Both 0xed and 0xf0 are mapped to internal functions on the AMD Elan
> SC400 processor. It is an AMD 486 based system on a chip and since AMD
> just knew that it would never have a math coprocessor, they reused the
> 0xf0-0xf2 range for the PCMCIA controller. I guess the AMD Elan SC500
> will have similar problems.
>
> I seem to recall that back when I was working with the Elan SC400
> (sometime around 1998?) there were discussions about finding an
> alternate delay port because outb to 0x80 messed up the debug port. I
> think the Elan stopped those discussions because just about every port
> on the Elan was reused for some alternate purpose.
Okay, thanks much. So 0xf0 would be unuseable on 386+387 and AMD Elan SC400
and could possibly change timing on an unknown number of systems due to not
being put on the bus.
0x80 only fails for some recent HP laptops instead so it seems there would
be not enough cause to go with 0xf0 onstead of 0x80 as the default choice;
if we're quirking around machines anyway it might as well be the DMI based
quirking currently suggested.
Rene.
Christer Weinigel wrote:
>
> Both 0xed and 0xf0 are mapped to internal functions on the AMD Elan
> SC400 processor. It is an AMD 486 based system on a chip and since AMD
> just knew that it would never have a math coprocessor, they reused the
> 0xf0-0xf2 range for the PCMCIA controller. I guess the AMD Elan SC500
> will have similar problems.
>
> I seem to recall that back when I was working with the Elan SC400
> (sometime around 1998?) there were discussions about finding an
> alternate delay port because outb to 0x80 messed up the debug port. I
> think the Elan stopped those discussions because just about every port
> on the Elan was reused for some alternate purpose.
>
Yeah, the Elan is not supportable anyway without a CONFIG option (it's
broken in so many ways), so it doesn't really apply. It's a fuckwit design.
-hpa
* Alan Cox <[email protected]> wrote:
> > udelay is supposed to be reliable. If someone runs a new kernel and
> > has no TSC (which might happen even on modern hardware or with
> > notsc) _and_ finds that udelay is not calibrated well enough then
> > that's a kernel bug we want to fix.
>
> You miss the point entirely. The delay is in bus clocks not CPU
> clocks, not tsc clocks not PIT clocks, and it is permitted to vary by
> a factor of two. So you'll worst case halve the speed of network
> packet up/download even if your udelay is accurate.
ok, you are right. How about we go with one of your suggestions: rename
the API family to isa_*_p() in the affected ISA drivers? That makes it
perfectly clear that this is an ISA related historic quirk that we just
cannot properly emulate in an acceptable fashion. It will also make the
least amount of changes to these truly historic drivers.
The main maintenance thing we are interested in is to have no subsequent
new uses of this API and to eliminate these accesses from modern
hardware - and naming it clearly 'ISA' and making it dependent on
CONFIG_ISA would likely achieve that purpose.
oh, another thing: there are 100+ mails in this thread while there are
only 3 mails in the thread that lists 61 not-yet-fixed-in-2.6.24
regressions:
| Listed regressions statistics:
|
| Date Total Pending Unresolved
| ----------------------------------------
| Today 139 38 23
which is a sad proportion of attention :-/
Ingo
* H. Peter Anvin <[email protected]> wrote:
> Christer Weinigel wrote:
>>
>> out 80h, al is only two bytes. Any alternative that has been suggested
>> in this discussion will use more space. mov dx, alt_port; out dx, al
>> will be larger, a function call will definitely be a lot larger. People
>> have been making changes to the kernel to save a couple of hundred
>> bytes of text size.
>>
>
> If text size becomes a problem in this case, then we can use an
> alternatives-like mechanism to fix up the kernel. However,
> realistically this probably should be a function call *combined with*
> the out and in; that reduces the impact somewhat.
and that's exactly what x86.git#mm does now.
Ingo
FYI - another quirky Quanta motherboard from HP, with DMI readings
reported to me.
-------- Original Message --------
Date: Wed, 2 Jan 2008 16:23:27 +1030
From: Joel Stanley <[email protected]>
To: David P. Reed <[email protected]>
Subject: Re: [PATCH] Option to disable AMD C1E (allows dynticks to work)
On Dec 30, 2007 1:13 AM, David P. Reed <[email protected]> wrote:
> I have also attached a c program that only touches port 80. Compile it
> for 32-bit mode (see comment), run it as root, and after two or three
> runs, it will hang a system that has the port 80 bug.
Using port80.c, I could hard lock a HP Pavilion tx1000 laptop on the
first go. This was with ubuntu hardy's stock kernel (a 2.6.24-rc)
> dmidecode -s baseboard-manufacturer
> dmidecode -s baseboard-product-name
Quanta
30BF
Tonight, I will try compiling a kernel with these values added to your patch.
Some history, feel free to ignore if it's not relevant: ubuntu
feisty's 2.6.22 based kernel worked fine, irc. We were having issues
with sound, so tried fedora8's .23 based kernel, but this would
sporadically hard lock. Ubuntu hardy's 2.6.24 appeared fine, for the 2
hours or so I used it last night, until using the port80.c program,
obviously.
Cheers,
Joel
* David P. Reed <[email protected]> wrote:
> FYI - another quirky Quanta motherboard from HP, with DMI readings reported
> to me.
> Using port80.c, I could hard lock a HP Pavilion tx1000 laptop on the
> first go. This was with ubuntu hardy's stock kernel (a 2.6.24-rc)
>
>> dmidecode -s baseboard-manufacturer
>> dmidecode -s baseboard-product-name
>
> Quanta
> 30BF
thanks, i've updated the patches in x86.git with this:
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "HP Pavilion tx1000",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30BF")
+ }
Find combo patch below.
Ingo
--------------->
Index: linux-x86.q/Documentation/kernel-parameters.txt
===================================================================
--- linux-x86.q.orig/Documentation/kernel-parameters.txt
+++ linux-x86.q/Documentation/kernel-parameters.txt
@@ -785,6 +785,16 @@ and is between 256 and 4096 characters.
for translation below 32 bit and if not available
then look in the higher range.
+ io_delay= [X86-32,X86-64] I/O delay method
+ 0x80
+ Standard port 0x80 based delay
+ 0xed
+ Alternate port 0xed based delay (needed on some systems)
+ udelay
+ Simple two microseconds delay
+ none
+ No delay
+
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
Index: linux-x86.q/arch/x86/Kconfig.debug
===================================================================
--- linux-x86.q.orig/arch/x86/Kconfig.debug
+++ linux-x86.q/arch/x86/Kconfig.debug
@@ -112,4 +112,78 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
+#
+# IO delay types:
+#
+
+config IO_DELAY_TYPE_0X80
+ int
+ default "0"
+
+config IO_DELAY_TYPE_0XED
+ int
+ default "1"
+
+config IO_DELAY_TYPE_UDELAY
+ int
+ default "2"
+
+config IO_DELAY_TYPE_NONE
+ int
+ default "3"
+
+choice
+ prompt "IO delay type"
+ default IO_DELAY_UDELAY
+
+config IO_DELAY_0X80
+ bool "port 0x80 based port-IO delay [recommended]"
+ help
+ This is the traditional Linux IO delay used for in/out_p.
+ It is the most tested hence safest selection here.
+
+config IO_DELAY_0XED
+ bool "port 0xed based port-IO delay"
+ help
+ Use port 0xed as the IO delay. This frees up port 0x80 which is
+ often used as a hardware-debug port.
+
+config IO_DELAY_UDELAY
+ bool "udelay based port-IO delay"
+ help
+ Use udelay(2) as the IO delay method. This provides the delay
+ while not having any side-effect on the IO port space.
+
+config IO_DELAY_NONE
+ bool "no port-IO delay"
+ help
+ No port-IO delay. Will break on old boxes that require port-IO
+ delay for certain operations. Should work on most new machines.
+
+endchoice
+
+if IO_DELAY_0X80
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_0X80
+endif
+
+if IO_DELAY_0XED
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_0XED
+endif
+
+if IO_DELAY_UDELAY
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_UDELAY
+endif
+
+if IO_DELAY_NONE
+config DEFAULT_IO_DELAY_TYPE
+ int
+ default IO_DELAY_TYPE_NONE
+endif
+
endmenu
Index: linux-x86.q/arch/x86/boot/compressed/misc_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_32.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_32.c
@@ -276,10 +276,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/boot/compressed/misc_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/boot/compressed/misc_64.c
+++ linux-x86.q/arch/x86/boot/compressed/misc_64.c
@@ -269,10 +269,10 @@ static void putstr(const char *s)
RM_SCREEN_INFO.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
- outb_p(14, vidport);
- outb_p(0xff & (pos >> 9), vidport+1);
- outb_p(15, vidport);
- outb_p(0xff & (pos >> 1), vidport+1);
+ outb(14, vidport);
+ outb(0xff & (pos >> 9), vidport+1);
+ outb(15, vidport);
+ outb(0xff & (pos >> 1), vidport+1);
}
static void* memset(void* s, int c, unsigned n)
Index: linux-x86.q/arch/x86/kernel/Makefile_32
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/Makefile_32
+++ linux-x86.q/arch/x86/kernel/Makefile_32
@@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386
obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\
- quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
Index: linux-x86.q/arch/x86/kernel/Makefile_64
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/Makefile_64
+++ linux-x86.q/arch/x86/kernel/Makefile_64
@@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_
x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- i8253.o
+ i8253.o io_delay.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
Index: linux-x86.q/arch/x86/kernel/io_delay.c
===================================================================
--- /dev/null
+++ linux-x86.q/arch/x86/kernel/io_delay.c
@@ -0,0 +1,114 @@
+/*
+ * I/O delay strategies for inb_p/outb_p
+ *
+ * Allow for a DMI based override of port 0x80, needed for certain HP laptops
+ * and possibly other systems. Also allow for the gradual elimination of
+ * outb_p/inb_p API uses.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
+#include <asm/io.h>
+
+int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
+EXPORT_SYMBOL_GPL(io_delay_type);
+
+static int __initdata io_delay_override;
+
+/*
+ * Paravirt wants native_io_delay to be a constant.
+ */
+void native_io_delay(void)
+{
+ switch (io_delay_type) {
+ default:
+ case CONFIG_IO_DELAY_TYPE_0X80:
+ asm volatile ("outb %al, $0x80");
+ break;
+ case CONFIG_IO_DELAY_TYPE_0XED:
+ asm volatile ("outb %al, $0xed");
+ break;
+ case CONFIG_IO_DELAY_TYPE_UDELAY:
+ /*
+ * 2 usecs is an upper-bound for the outb delay but
+ * note that udelay doesn't have the bus-level
+ * side-effects that outb does, nor does udelay() have
+ * precise timings during very early bootup (the delays
+ * are shorter until calibrated):
+ */
+ udelay(2);
+ case CONFIG_IO_DELAY_TYPE_NONE:
+ break;
+ }
+}
+EXPORT_SYMBOL(native_io_delay);
+
+static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
+{
+ if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
+ printk(KERN_NOTICE "%s: using 0xed I/O delay port\n",
+ id->ident);
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ }
+
+ return 0;
+}
+
+/*
+ * Quirk table for systems that misbehave (lock up, etc.) if port
+ * 0x80 is used:
+ */
+static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
+ {
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "Compaq Presario V6000",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B7")
+ }
+ },
+ {
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "HP Pavilion dv9000z",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B9")
+ }
+ },
+ {
+ .callback = dmi_io_delay_0xed_port,
+ .ident = "HP Pavilion tx1000",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30BF")
+ }
+ },
+ { }
+};
+
+void __init io_delay_init(void)
+{
+ if (!io_delay_override)
+ dmi_check_system(io_delay_0xed_port_dmi_table);
+}
+
+static int __init io_delay_param(char *s)
+{
+ if (!strcmp(s, "0x80"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
+ else if (!strcmp(s, "0xed"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ else if (!strcmp(s, "udelay"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY;
+ else if (!strcmp(s, "none"))
+ io_delay_type = CONFIG_IO_DELAY_TYPE_NONE;
+ else
+ return -EINVAL;
+
+ io_delay_override = 1;
+ return 0;
+}
+
+early_param("io_delay", io_delay_param);
Index: linux-x86.q/arch/x86/kernel/setup_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/setup_32.c
+++ linux-x86.q/arch/x86/kernel/setup_32.c
@@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();;
+
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
Index: linux-x86.q/arch/x86/kernel/setup_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/setup_64.c
+++ linux-x86.q/arch/x86/kernel/setup_64.c
@@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+ io_delay_init();
+
#ifdef CONFIG_SMP
/* setup to use the static apicid table during kernel startup */
x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
Index: linux-x86.q/include/asm-x86/io_32.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_32.h
+++ linux-x86.q/include/asm-x86/io_32.h
@@ -250,10 +250,10 @@ static inline void flush_write_buffers(v
#endif /* __KERNEL__ */
-static inline void native_io_delay(void)
-{
- asm volatile("outb %%al,$0x80" : : : "memory");
-}
+extern void native_io_delay(void);
+
+extern int io_delay_type;
+extern void io_delay_init(void);
#if defined(CONFIG_PARAVIRT)
#include <asm/paravirt.h>
Index: linux-x86.q/include/asm-x86/io_64.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/io_64.h
+++ linux-x86.q/include/asm-x86/io_64.h
@@ -35,13 +35,20 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+extern void native_io_delay(void);
+extern int io_delay_type;
+extern void io_delay_init(void);
+
+static inline void slow_down_io(void)
+{
+ native_io_delay();
#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
#endif
+}
/*
* Talk about misusing macros..
@@ -50,21 +57,21 @@
static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port))
#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
Index: linux-x86.q/kernel/sysctl.c
===================================================================
--- linux-x86.q.orig/kernel/sysctl.c
+++ linux-x86.q/kernel/sysctl.c
@@ -53,6 +53,7 @@
#ifdef CONFIG_X86
#include <asm/nmi.h>
#include <asm/stacktrace.h>
+#include <asm/io.h>
#endif
static int deprecated_sysctl_warning(struct __sysctl_args *args);
@@ -683,6 +684,14 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "io_delay_type",
+ .data = &io_delay_type,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#endif
#if defined(CONFIG_MMU)
{
> ok, you are right. How about we go with one of your suggestions: rename
> the API family to isa_*_p() in the affected ISA drivers? That makes it
> perfectly clear that this is an ISA related historic quirk that we just
> cannot properly emulate in an acceptable fashion. It will also make the
> least amount of changes to these truly historic drivers.
Works for me. We need to build two versions of 8390.c now but thats no
big deal and sorts PCMCIA out too.
> The main maintenance thing we are interested in is to have no subsequent
> new uses of this API and to eliminate these accesses from modern
> hardware - and naming it clearly 'ISA' and making it dependent on
> CONFIG_ISA would likely achieve that purpose.
Agreed - will see if EISA/VLB cases come up but thats trivial.
> oh, another thing: there are 100+ mails in this thread while there are
> only 3 mails in the thread that lists 61 not-yet-fixed-in-2.6.24
> regressions:
That would be because I'm trying to stop 100 new extra regressions ;)
Alan
On 02-01-08 14:47, Alan Cox wrote:
>> ok, you are right. How about we go with one of your suggestions: rename
>> the API family to isa_*_p() in the affected ISA drivers? That makes it
>> perfectly clear that this is an ISA related historic quirk that we just
>> cannot properly emulate in an acceptable fashion. It will also make the
>> least amount of changes to these truly historic drivers.
>
> Works for me. We need to build two versions of 8390.c now but thats no
> big deal and sorts PCMCIA out too.
For no binary changes at all, and if going through all those outb_p() users
anyway, might/could as well just manually split them then:
outb_p() --> outb();
slow_down_io();
and then just leave out the slow_down_io() call in the non-ISA spots.
slow_down_io() could be renamed isa_io_delay() or anything (paravirt is a
little annoying there) if someone cares but then it's a complete identity
transformation for any driver that does care.
Would IMO also make for a somewhat better API than an isa_outb_p() as
there's nothing particurly ISA about the outb method itself -- many ISA
drivers use plain outb() as well.
Rene.
On 02-01-08 16:35, Rene Herman wrote:
> On 02-01-08 14:47, Alan Cox wrote:
>
>>> ok, you are right. How about we go with one of your suggestions:
>>> rename the API family to isa_*_p() in the affected ISA drivers? That
>>> makes it perfectly clear that this is an ISA related historic quirk
>>> that we just cannot properly emulate in an acceptable fashion. It
>>> will also make the least amount of changes to these truly historic
>>> drivers.
>>
>> Works for me. We need to build two versions of 8390.c now but thats no
>> big deal and sorts PCMCIA out too.
>
> For no binary changes at all, and if going through all those outb_p()
> users anyway, might/could as well just manually split them then:
>
> outb_p() --> outb();
> slow_down_io();
>
> and then just leave out the slow_down_io() call in the non-ISA spots.
> slow_down_io() could be renamed isa_io_delay() or anything (paravirt is
> a little annoying there) if someone cares but then it's a complete
> identity transformation for any driver that does care.
>
> Would IMO also make for a somewhat better API than an isa_outb_p() as
> there's nothing particurly ISA about the outb method itself -- many ISA
> drivers use plain outb() as well.
Would just need this bit of io.h arch unification from the orignal patch and
that's it:
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index a037b07..97cb8c6 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -35,13 +35,20 @@
* - Arnaldo Carvalho de Melo <[email protected]>
*/
-#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+static inline void native_io_delay(void)
+{
+ asm volatile("outb %%al,$0x80" : : : "memory");
+}
+static inline void slow_down_io(void)
+{
+ native_io_delay();
#ifdef REALLY_SLOW_IO
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
__SLOW_DOWN_IO
-#else
-#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
#endif
+}
/*
* Talk about misusing macros..
@@ -50,21 +57,21 @@
static inline void out##s(unsigned x value, unsigned short port) {
#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd"
(port))
#define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd"
(port));} \
+__OUT1(s,x) __OUT2(s,s1,"w"); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); }
#define __IN1(s) \
static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port))
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port)
,##i ); return _v; } \
+#define __IN(s,s1) \
+__IN1(s) __IN2(s,s1,"w"); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; }
#define __INS(s) \
static inline void ins##s(unsigned short port, void * addr, unsigned long
count) \
Christer Weinigel <[email protected]> wrote:
> How do you find out the speed of the ISA bus? AFAIK there is no
> standardized way to do that. On the Geode SC2200 the ISA bus speed is
> usually the PCI clock divided by 4 giving 33MHz/4=8.3MHz or
> 30/4=7.5MHz, but with no external ISA devices it's possible to
> overclock the ISA bus to /3 to run it at 11MHz or so. But without
> poking at some CPU and southbridge specific registers to find out the
> PCI bus speed and the ISA bus divisor you can't really tell.
If you overclock, you are on your own. IIRC I've used 13,3 MHz for some time
and used a lower PIO mode to compensate.
> So if you do udelay based on a 6MHz clock (I think you can safely
> assume that any 386 based system runs the ISA bus at least that fast)
> you'll waste at least 30% and maybe even 100% more time for the delay
> after every _p call.
Defaulting to 8 MHz and offering an option to set another clock speed
(like idebus=) should be OK.
Bodo Eggert wrote:
> Christer Weinigel <[email protected]> wrote:
>
>> How do you find out the speed of the ISA bus? AFAIK there is no
>> standardized way to do that. On the Geode SC2200 the ISA bus speed is
>> usually the PCI clock divided by 4 giving 33MHz/4=8.3MHz or
>> 30/4=7.5MHz, but with no external ISA devices it's possible to
>> overclock the ISA bus to /3 to run it at 11MHz or so. But without
>> poking at some CPU and southbridge specific registers to find out the
>> PCI bus speed and the ISA bus divisor you can't really tell.
>
> If you overclock, you are on your own. IIRC I've used 13,3 MHz for some time
> and used a lower PIO mode to compensate.
>
>> So if you do udelay based on a 6MHz clock (I think you can safely
>> assume that any 386 based system runs the ISA bus at least that fast)
>> you'll waste at least 30% and maybe even 100% more time for the delay
>> after every _p call.
>
> Defaulting to 8 MHz and offering an option to set another clock speed
> (like idebus=) should be OK.
>
The formalization of the ISA bus which was part of the EISA
specification settled on 8.33 MHz maximum nominal frequency. There
were, however, some earlier designs which used up to 12 MHz nominal; I'm
not sure if that applied to 386s though.
-hpa
On Mon, 7 Jan 2008, H. Peter Anvin wrote:
> Bodo Eggert wrote:
> > Christer Weinigel <[email protected]> wrote:
> > > How do you find out the speed of the ISA bus? AFAIK there is no
> > > standardized way to do that. On the Geode SC2200 the ISA bus speed is
> > > usually the PCI clock divided by 4 giving 33MHz/4=8.3MHz or
> > > 30/4=7.5MHz, but with no external ISA devices it's possible to
> > > overclock the ISA bus to /3 to run it at 11MHz or so. But without
> > > poking at some CPU and southbridge specific registers to find out the
> > > PCI bus speed and the ISA bus divisor you can't really tell.
> >
> > If you overclock, you are on your own. IIRC I've used 13,3 MHz for some time
> > and used a lower PIO mode to compensate.
> >
> > > So if you do udelay based on a 6MHz clock (I think you can safely
> > > assume that any 386 based system runs the ISA bus at least that fast)
> > > you'll waste at least 30% and maybe even 100% more time for the delay
> > > after every _p call.
> >
> > Defaulting to 8 MHz and offering an option to set another clock speed
> > (like idebus=) should be OK.
> >
>
> The formalization of the ISA bus which was part of the EISA specification
> settled on 8.33 MHz maximum nominal frequency. There were, however, some
> earlier designs which used up to 12 MHz nominal; I'm not sure if that applied
> to 386s though.
I've used up to 13,3 MHz on my 386DX40, but it was way out of spec and
I had to use a lower PIO mode to compensate. IIRC, one of my cards forced
me to settle for 10 MHz. Wikipedia claims there were systems having
16 MHz ISA bus, and systems underclocking themselves when accessing ISA.
I remember having optional and mandatory waitstates, too, but I'm not
100 % sure it was on ISA. I think they were ...
But overclocking is not the problem for udelay, it would err to the safe
side. The problem would be a BUS having < 8 MHz, and since the days of
80286, they are hard to find. IMO having an option to set the bus speed
for those systems should be enough.
--
knghtbrd:<JHM> AIX - the Unix from the universe where Spock has a beard.
Bodo Eggert wrote:
>
> But overclocking is not the problem for udelay, it would err to the safe
> side. The problem would be a BUS having < 8 MHz, and since the days of
> 80286, they are hard to find. IMO having an option to set the bus speed
> for those systems should be enough.
>
There might have been a few 386/20's clocking the ISA bus at ???3
(6.67 MHz) rather than ?2 (10 MHz) or ?2.5 (8 MHz).
-hpa
On Mon, 7 Jan 2008, H. Peter Anvin wrote:
> Bodo Eggert wrote:
> > But overclocking is not the problem for udelay, it would err to the safe
> > side. The problem would be a BUS having < 8 MHz, and since the days of
> > 80286, they are hard to find. IMO having an option to set the bus speed
> > for those systems should be enough.
> >
>
> There might have been a few 386/20's clocking the ISA bus at ???3
> (6.67 MHz) rather than ?2 (10 MHz) or ?2.5 (8 MHz).
Yes, and the remaining users should set the kernel option. Both of them.
The question is: How will they be told about the new kernel option?
--
A man inserted an advertisement in the classified: Wife Wanted."
The next day he received a hundred letters. They all said the
same thing: "You can have mine."
On 07-01-08 23:27, Bodo Eggert wrote:
> On Mon, 7 Jan 2008, H. Peter Anvin wrote:
>> There might have been a few 386/20's clocking the ISA bus at ???3 (6.67
>> MHz) rather than ?2 (10 MHz) or ?2.5 (8 MHz).
>
> Yes, and the remaining users should set the kernel option. Both of them.
> The question is: How will they be told about the new kernel option?
What exactly are you guys still talking about? Alan is looking at drivers
and finds that in them outb_p is generally correct and correctly specified
in bus-clocks for at least some (8390 was quoted). In those legacy drivers,
the _p ops can simply stay and can use the 15-year old proven 0x80 outb.
(with molnar suggesting they be renamed isa_in/outb_p and me suggesting that
if someone would be doing _that_ they might as well split them manually in
outb(); slow_down_io() possibly renaming slow_down_io() to isa_io_delay() or
similar).
Is this only about the ones then left for things like legacy PIC and PIT?
Does anyone care about just sticking in a udelay(2) (or 1) there as a
replacement and call it a day?
Rene.
Rene Herman wrote:
>
> Is this only about the ones then left for things like legacy PIC and
> PIT? Does anyone care about just sticking in a udelay(2) (or 1) there as
> a replacement and call it a day?
>
PIT is problematic because the PIT may be necessary for udelay setup.
-hpa
> But overclocking is not the problem for udelay, it would err to the safe
> side. The problem would be a BUS having < 8 MHz, and since the days of
> 80286, they are hard to find. IMO having an option to set the bus speed
> for those systems should be enough.
If you get it wrong you risk data corruption. Not good, not clever, not
appropriate. Basically the use of port 0x80 is the right thing to do for
ISA devices and as 15 odd years of use has shown works reliably and
solidly for ISA systems.
Alan
On 08-01-08 00:24, H. Peter Anvin wrote:
> Rene Herman wrote:
>>
>> Is this only about the ones then left for things like legacy PIC and
>> PIT? Does anyone care about just sticking in a udelay(2) (or 1) there
>> as a replacement and call it a day?
>>
>
> PIT is problematic because the PIT may be necessary for udelay setup.
Yes, can initialise loops_per_jiffy conservatively. Just didn't quite get
why you guys are talking about an ISA bus speed parameter.
Rene.
Rene.
H. Peter Anvin wrote:
> Rene Herman wrote:
>>
>> Is this only about the ones then left for things like legacy PIC and
>> PIT? Does anyone care about just sticking in a udelay(2) (or 1) there
>> as a replacement and call it a day?
>>
>
> PIT is problematic because the PIT may be necessary for udelay setup.
>
The PIT usage for calibrating the delay loop can be moderated, if need
by, by using the PC BIOS which by definition uses the PIT correctly it
its int 15 function 83 call.. Just do it before coming up in a state
where the PC BIOS int 15h calls no longer work. I gave code to do this
in a much earlier message.
This is the MOST reliable way to use the PIT early in boot, on a PC
compatible. God knows how one should do it on a Macintosh running a
386/20 :-). But the ONLY old bat-PIT machines are, thank god, PC
compatible, maybe.
On another topic. I have indeed determined what device uses port 80 on
Quanta AMD64 laptops from HP.
I had lunch with Jim Gettys of OLPC a week ago; he's an old friend since
he worked on the original X windows system. After telling him my story
about port 80, he mentioned that the OLPC XO machine had some issues
with port 80 which was by design handled by the ENE KBC device on its
motherboard. He said the ENE was a very desirable chipset for AMD
designs recommended by Quanta. Richard Smith of OLPC explained to me
how the KB3700 they use works, and that they use the KB3700 to send POST
codes out over a serial link during boot up.
This gave me a reason to take apart my laptop, to discover that it has
an ENE KB3920 B0 as its EC and KBC. The port interface for the KB3920
includes listening to port 80 which is then made available to firmware
on the EC. It is recognized and decoded on the LPC bus, only for
writes, and optionally can generate an interrupt in the 8051.
Dumping both the ENE chip, and looking at the DSDT.dsl for my machine, I
discovered that port 80 is used as an additional parameter for various
DSDT methods that communicate to the EC, when it is operating in ACPI mode.
More work is in progress as I play around with this. But the key thing
is that ACPI and perhaps SMM both use port 80 as part of the base
function of the chipset.
And actually, if I had looked at the /sys/bus/pnp definitions, rather
than /proc/ioports, I would have noticed that port 80 was part of a
PNP0C02 resource set. That means exactly one thing: ACPI says that
port 80 is NOT free to be used, for delays or anything else.
This should make no difference here: it's just one more reason to stop
using port 80 for delays on modern machines.
David P. Reed wrote:
>
> And actually, if I had looked at the /sys/bus/pnp definitions, rather
> than /proc/ioports, I would have noticed that port 80 was part of a
> PNP0C02 resource set. That means exactly one thing: ACPI says that
> port 80 is NOT free to be used, for delays or anything else.
>
> This should make no difference here: it's just one more reason to stop
> using port 80 for delays on modern machines.
>
And shoot the designer of this particular microcontroller firmware.
-hpa
H. Peter Anvin wrote:
>
> And shoot the designer of this particular microcontroller firmware.
>
>
Well, some days I want to shoot the "designer" of the entire Wintel
architecture... it's not exactly "designed" by anybody of course, and
today it's created largely by a collection of Taiwanese and Chinese ODM
firms, coupled with Microsoft WinHEC and Intel folks. At least they
follow the rules and their ACPI and BIOS code say that they are using
port 80 very clearly if you use PnP and ACPI properly. And in the old
days, you were "supposed" to use the system BIOS to talk to things like
the PIT that had timing issues, not write your own code.
Or perhaps the ACPI spec should specify a timing loop spec and precisely
specify the desired timing after accessing an I/O port till that device
has properly "acted" on that operation.
The idea that Port 80 was "unused" and appropriate for delay purposes
elicited skepticism by Linus that is recorded for posterity in the
comments of the relevant Linux include files - especially since it was
clearly "used" for non-delay purposes, by cards that could be plugged
into a PCI (fast), not just an 8-bit ISA, bus.
Perhaps we should declare the world of ACPI systems a separate "arch"
from the world of l'ancien regime where folklore about which ports were
used for what ruled. I lived through those old days, and they were not
wonderful, either.
The world sucks, and Linux is supposed to be able to adapt to that
world, suckitude and all.
> The PIT usage for calibrating the delay loop can be moderated, if need
> by, by using the PC BIOS which by definition uses the PIT correctly it
> its int 15 function 83 call.. Just do it before coming up in a state
> where the PC BIOS int 15h calls no longer work. I gave code to do this
> in a much earlier message.
And as I've said before we don't know if we have a PC BIOS. If we are
running from a kexec or on a Macintoy with EFI or an Xbox we may not.
As per previous discussions for the PIT we can simply guess a safe
initial udelay value and then tune the real one.
Alan
On Mon, 07 Jan 2008 20:38:09 +0100
Bodo Eggert <[email protected]> wrote:
> Christer Weinigel <[email protected]> wrote:
>
> > How do you find out the speed of the ISA bus? AFAIK there is no
> > standardized way to do that. On the Geode SC2200 the ISA bus speed
> > is usually the PCI clock divided by 4 giving 33MHz/4=8.3MHz or
> > 30/4=7.5MHz, but with no external ISA devices it's possible to
> > overclock the ISA bus to /3 to run it at 11MHz or so. But without
> > poking at some CPU and southbridge specific registers to find out
> > the PCI bus speed and the ISA bus divisor you can't really tell.
>
> If you overclock, you are on your own. IIRC I've used 13,3 MHz for
> some time and used a lower PIO mode to compensate.
That would not be overclocking, rather that the hardware designer would
have determined that on that specific hardware design, all peripherals
are able to run at 12MHz.
Also note that on some other system the hardware designer might have
decided to have a slower ISA clock, to save power, fulfil some EMI
requirement or whatever.
> > So if you do udelay based on a 6MHz clock (I think you can safely
> > assume that any 386 based system runs the ISA bus at least that
> > fast) you'll waste at least 30% and maybe even 100% more time for
> > the delay after every _p call.
>
> Defaulting to 8 MHz and offering an option to set another clock speed
> (like idebus=) should be OK.
Sounds like a big regression to have to start using a command line
option, when the current state of affairs is "it just works".
/Christer
On Tue, 8 Jan 2008, Rene Herman wrote:
> On 08-01-08 00:24, H. Peter Anvin wrote:
> > Rene Herman wrote:
> > > Is this only about the ones then left for things like legacy PIC and PIT?
> > > Does anyone care about just sticking in a udelay(2) (or 1) there as a
> > > replacement and call it a day?
> > >
> >
> > PIT is problematic because the PIT may be necessary for udelay setup.
>
> Yes, can initialise loops_per_jiffy conservatively. Just didn't quite get why
> you guys are talking about an ISA bus speed parameter.
If the ISA bus is below 8 MHz, we might need a longer delay. If we default
to the longer delay, the delay will be too long for more than 99,99 % of
all systems, not counting i586+. Especially if the driver is fine-tuned to
give maximum throughput, this may be bad.
OTOH, the DOS drivers I heared about use delays and would break on
underclocked ISA busses if the n * ISA_HZ delay was needed. Maybe
somebody having a configurable ISA bus speed and some problematic
chips can test it ...
--
Fun things to slip into your budget
"I [Meow Cat] sliped in 'Legal fees for firing Jim (Jim's my [his] boss).'
Jim approved the budget and was fired when upper management saw the budget."
The last time I heard of a 12 MHz bus in a PC system was in the days of
the PC-AT, when some clone makers sped up their buses (pre PCI!!!) in an
attempt to allow adapter card *memory* to run at the 12 MHz speed.
This caused so many industry-wide problems with adapter cards that
couldn't be installed in certain machines and still run reliably that
the industry learned a lesson. That doesn't mean that LPCs don't run at
12 MHz, but if they do, they don't have old 8 bit punky cards plugged
into them for lots of practical reasons. (I have whole drawers full of
such old cards, trying to figure out an environmentally responsible way
to get rid of them - even third world countries would be fools to make
machiens with them).
I can't believe that we are not supporting today's machines correctly
because we are still trying to be compatible with a few (at most a
hundre thousand were manufactured! Much less still functioning or
running Linux) machines.
Now I understand that PC/104 machines and other things are very non PC
compatible, but are x86 processor architectures. Do they even run x86
under 2.6.24?
Perhaps the rational solution here is to declare x86 the architecture
for "relics" and develop a merged architecture called "modern machines"
to include only those PCs that have been made to work since, say, the
release of (cough) WIndows 2000?
Bodo Eggert wrote:
> On Tue, 8 Jan 2008, Rene Herman wrote:
>
>> On 08-01-08 00:24, H. Peter Anvin wrote:
>>
>>> Rene Herman wrote:
>>>
>
>
>>>> Is this only about the ones then left for things like legacy PIC and PIT?
>>>> Does anyone care about just sticking in a udelay(2) (or 1) there as a
>>>> replacement and call it a day?
>>>>
>>>>
>>> PIT is problematic because the PIT may be necessary for udelay setup.
>>>
>> Yes, can initialise loops_per_jiffy conservatively. Just didn't quite get why
>> you guys are talking about an ISA bus speed parameter.
>>
>
> If the ISA bus is below 8 MHz, we might need a longer delay. If we default
> to the longer delay, the delay will be too long for more than 99,99 % of
> all systems, not counting i586+. Especially if the driver is fine-tuned to
> give maximum throughput, this may be bad.
>
> OTOH, the DOS drivers I heared about use delays and would break on
> underclocked ISA busses if the n * ISA_HZ delay was needed. Maybe
> somebody having a configurable ISA bus speed and some problematic
> chips can test it ...
>
>
On Mon, 7 Jan 2008, Alan Cox wrote:
> > But overclocking is not the problem for udelay, it would err to the safe
> > side. The problem would be a BUS having < 8 MHz, and since the days of
> > 80286, they are hard to find. IMO having an option to set the bus speed
> > for those systems should be enough.
>
> If you get it wrong you risk data corruption. Not good, not clever, not
> appropriate. Basically the use of port 0x80 is the right thing to do for
> ISA devices and as 15 odd years of use has shown works reliably and
> solidly for ISA systems.
As long as there is no port 80 card or a similar device using it. If
there is a port 80 card, ISA acess needing the delay does break, cause
the data corruption you fear and does cause this thread to be started.
Pest, Cholera ...
OTOH, maybe the 6-MHz-delay is the same as the 8-MHz-delay, and the kernel
parameter is not needed.
--
Fun things to slip into your budget
A Romulan Cloaking device:
The PHB won't know what it is but will be to chicken to ask
On 08-01-08 13:51, Bodo Eggert wrote:
> On Tue, 8 Jan 2008, Rene Herman wrote:
>>>> Is this only about the ones then left for things like legacy PIC and PIT?
>>>> Does anyone care about just sticking in a udelay(2) (or 1) there as a
>>>> replacement and call it a day?
>>>>
>>> PIT is problematic because the PIT may be necessary for udelay setup.
>> Yes, can initialise loops_per_jiffy conservatively. Just didn't quite get why
>> you guys are talking about an ISA bus speed parameter.
>
> If the ISA bus is below 8 MHz, we might need a longer delay. If we default
> to the longer delay, the delay will be too long for more than 99,99 % of
> all systems, not counting i586+. Especially if the driver is fine-tuned to
> give maximum throughput, this may be bad.
Yes, and I repeat -- old legacy ISA drivers can stay as they are. They've
been doing what they've been doing for 15 years and given that the systems
that break don't use them there is no practical upside to changing them and
a big downside particularly with respect to difficulty of testing.
A somewhat overly long delay shouldn't be particularly problematic for the
few remaining legacy hardware users _outside_ drivers/
Rene.
> OTOH, the DOS drivers I heared about use delays and would break on
> underclocked ISA busses if the n * ISA_HZ delay was needed. Maybe
> somebody having a configurable ISA bus speed and some problematic
> chips can test it ...
I've been looking at DOS reference drivers - they almost all use I/O port
based delays.
> The last time I heard of a 12 MHz bus in a PC system was in the days of
> the PC-AT, when some clone makers sped up their buses (pre PCI!!!) in an
> attempt to allow adapter card *memory* to run at the 12 MHz speed.
It wasn't about clone makers speeding up their busses. The ISA bus
originally ran at the CPU clock - 4.77/8/6/10 .. etc. Quite a few board
makers assumed 8MHz and while faster isn't a big problem at 8bit trying
to do the 8/16 bit decode with logic chips at 8MHz is quite tight and
above that generally broke. 8bit tends to work fine because you've got a
lot more timing headroom.
> I can't believe that we are not supporting today's machines correctly
> because we are still trying to be compatible with a few (at most a
> hundre thousand were manufactured! Much less still functioning or
> running Linux) machines.
It is about supporting this properly. Properly for ISA devices means
using I/O delays. Properly for chipset devices is probably using udelay.
> Now I understand that PC/104 machines and other things are very non PC
> compatible, but are x86 processor architectures. Do they even run x86
> under 2.6.24?
Linux runs on x86, it isn't limited to PC type architectures at all. We
don't need a BIOS, we don't need legacy compatible I/O devices.
> for "relics" and develop a merged architecture called "modern machines"
> to include only those PCs that have been made to work since, say, the
> release of (cough) WIndows 2000?
No point. We've got the 64bit kernel for that. That is a much saner
boundary to throw out all the nutty stuff.
Alan
> As long as there is no port 80 card or a similar device using it. If
> there is a port 80 card, ISA acess needing the delay does break
Such cards are very unusual on ISA machines and it hasn't been a problem
in fifteen years. All the alternatives are vastly higher risk
On Tuesday 08 January 2008 02:38:15 David P. Reed wrote:
> H. Peter Anvin wrote:
> > And shoot the designer of this particular microcontroller firmware.
>
> Well, some days I want to shoot the "designer" of the entire Wintel
> architecture... it's not exactly "designed" by anybody of course, and
> today it's created largely by a collection of Taiwanese and Chinese ODM
> firms, coupled with Microsoft WinHEC and Intel folks. At least they
> follow the rules and their ACPI and BIOS code say that they are using
> port 80 very clearly if you use PnP and ACPI properly. And in the old
> days, you were "supposed" to use the system BIOS to talk to things like
> the PIT that had timing issues, not write your own code.
Does anyone know what port does Windows use? I'm pretty sure that it isn't 80h
as I run Windows 98 often with port 80h debug card inserted. The last POST
code set by BIOS usually remains on the display and only changes when BIOS
does something like suspend/resume. IIRC, there was a program that was able
to display temperature from onboard sensors on the port 80h display that's
integrated on some mainboards.
--
Ondrej Zary
Windows these days does delays with timing loops or the scheduler. It
doesn't use a "port". Also, Windows XP only supports machines that tend
not to have timing problems that use delays. Instead, if a device takes
a while to respond, it has a "busy bit" in some port or memory slot that
can be tested.
Almost all of the issues in Linux where _p operations are used are (or
should be) historical - IMO.
Ondrej Zary wrote:
> On Tuesday 08 January 2008 02:38:15 David P. Reed wrote:
>
>> H. Peter Anvin wrote:
>>
>>> And shoot the designer of this particular microcontroller firmware.
>>>
>> Well, some days I want to shoot the "designer" of the entire Wintel
>> architecture... it's not exactly "designed" by anybody of course, and
>> today it's created largely by a collection of Taiwanese and Chinese ODM
>> firms, coupled with Microsoft WinHEC and Intel folks. At least they
>> follow the rules and their ACPI and BIOS code say that they are using
>> port 80 very clearly if you use PnP and ACPI properly. And in the old
>> days, you were "supposed" to use the system BIOS to talk to things like
>> the PIT that had timing issues, not write your own code.
>>
>
> Does anyone know what port does Windows use? I'm pretty sure that it isn't 80h
> as I run Windows 98 often with port 80h debug card inserted. The last POST
> code set by BIOS usually remains on the display and only changes when BIOS
> does something like suspend/resume. IIRC, there was a program that was able
> to display temperature from onboard sensors on the port 80h display that's
> integrated on some mainboards.
>
>
On Tuesday 08 January 2008 18:24:02 David P. Reed wrote:
> Windows these days does delays with timing loops or the scheduler. It
> doesn't use a "port". Also, Windows XP only supports machines that tend
> not to have timing problems that use delays. Instead, if a device takes
> a while to respond, it has a "busy bit" in some port or memory slot that
> can be tested.
Windows XP can run on a machine with ISA slot(s) and has built-in drivers for
some plug&play ISA cards - e.g. the famous 3Com EtherLink III. I think that
there's a driver for NE2000-compatible cards too and it probably works.
> Almost all of the issues in Linux where _p operations are used are (or
> should be) historical - IMO.
>
> Ondrej Zary wrote:
> > On Tuesday 08 January 2008 02:38:15 David P. Reed wrote:
> >> H. Peter Anvin wrote:
> >>> And shoot the designer of this particular microcontroller firmware.
> >>
> >> Well, some days I want to shoot the "designer" of the entire Wintel
> >> architecture... it's not exactly "designed" by anybody of course, and
> >> today it's created largely by a collection of Taiwanese and Chinese ODM
> >> firms, coupled with Microsoft WinHEC and Intel folks. At least they
> >> follow the rules and their ACPI and BIOS code say that they are using
> >> port 80 very clearly if you use PnP and ACPI properly. And in the old
> >> days, you were "supposed" to use the system BIOS to talk to things like
> >> the PIT that had timing issues, not write your own code.
> >
> > Does anyone know what port does Windows use? I'm pretty sure that it
> > isn't 80h as I run Windows 98 often with port 80h debug card inserted.
> > The last POST code set by BIOS usually remains on the display and only
> > changes when BIOS does something like suspend/resume. IIRC, there was a
> > program that was able to display temperature from onboard sensors on the
> > port 80h display that's integrated on some mainboards.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
Ondrej Zary
Ondrej Zary wrote:
> On Tuesday 08 January 2008 18:24:02 David P. Reed wrote:
>
>> Windows these days does delays with timing loops or the scheduler. It
>> doesn't use a "port". Also, Windows XP only supports machines that tend
>> not to have timing problems that use delays. Instead, if a device takes
>> a while to respond, it has a "busy bit" in some port or memory slot that
>> can be tested.
>>
>
> Windows XP can run on a machine with ISA slot(s) and has built-in drivers for
> some plug&play ISA cards - e.g. the famous 3Com EtherLink III. I think that
> there's a driver for NE2000-compatible cards too and it probably works.
>
There is no need to use io writes to supposedly/theoretically "unused
ports" to make drivers work on any bus.
ISA included! You can, for example, wait for an ISA bus serial adapter
to put out its next character by looping reading the port that has the
output buffer full flag in a tight loop, with no delay code at all. And
if you need to time things, just call a timing loop subroutine that you
calibrate at boot time.
I wrote DOS drivers for NE2000's on the ISA bus when they were brand new
designs from Novell without such kludges as writes to I/O port 80. I
don't remember writing a driver for the 3com devices - probably didn't,
because 3Com's cards were expensive at the time.
In any case, Linux *did* adopt this port 80 strategy - I'm sure all
concerned thought it was frightfully clever at the time. Linus
expressed his skepticism in the comments in io.h. The problem is to
safely move away from it toward a proper strategy that doesn't depend on
"bus aborts" which would trigger machine checks if they were properly
enabled.
> There is no need to use io writes to supposedly/theoretically "unused
> ports" to make drivers work on any bus.
The natsemi docs here say otherwise. I trust them not you.
> don't remember writing a driver for the 3com devices - probably didn't,
> because 3Com's cards were expensive at the time.
3C503 needs delays for some setups according to the docs. I can't tell
you how the 3COM drivers did it as that was a different bit of 3com to
the bit I worked for. From the rest of 3Com I saw probably utterly
vilely ;)
Later 3Com stuff was either sane (3c509 etc) or used whacko intel chips
(3c507/27) which had their own special breed of insanity to replace
address setup delay bugs.
> In any case, Linux *did* adopt this port 80 strategy - I'm sure all
> concerned thought it was frightfully clever at the time. Linus
> expressed his skepticism in the comments in io.h. The problem is to
> safely move away from it toward a proper strategy
No. The problem is that certain people, unfortunately those who know
nothing about ISA related bus systems, keep trying to confuse ISA delay
logic with core chip logic and end up trying to solve both a problem and a
non-problem in one, creating a nasty mess in the process.
Alan
On Tue, 8 Jan 2008, Ondrej Zary wrote:
> On Tuesday 08 January 2008 18:24:02 David P. Reed wrote:
> > Windows these days does delays with timing loops or the scheduler. It
> > doesn't use a "port". Also, Windows XP only supports machines that tend
> > not to have timing problems that use delays. Instead, if a device takes
> > a while to respond, it has a "busy bit" in some port or memory slot that
> > can be tested.
>
> Windows XP can run on a machine with ISA slot(s) and has built-in drivers for
> some plug&play ISA cards - e.g. the famous 3Com EtherLink III. I think that
> there's a driver for NE2000-compatible cards too and it probably works.
The NE2K-driver went missing in W2K. BTDT.
--
Anyone can speak Troll. All you have to do is point and grunt.
-- Fred Weasley
On Tuesday 08 January 2008 19:51:41 Bodo Eggert wrote:
> On Tue, 8 Jan 2008, Ondrej Zary wrote:
> > On Tuesday 08 January 2008 18:24:02 David P. Reed wrote:
> > > Windows these days does delays with timing loops or the scheduler. It
> > > doesn't use a "port". Also, Windows XP only supports machines that
> > > tend not to have timing problems that use delays. Instead, if a device
> > > takes a while to respond, it has a "busy bit" in some port or memory
> > > slot that can be tested.
> >
> > Windows XP can run on a machine with ISA slot(s) and has built-in drivers
> > for some plug&play ISA cards - e.g. the famous 3Com EtherLink III. I
> > think that there's a driver for NE2000-compatible cards too and it
> > probably works.
>
> The NE2K-driver went missing in W2K. BTDT.
Haven't tried personally but it seems to work accroding to this
http://www.windowsnetworking.com/articles_tutorials/wxpne2k.html - and it can
be made to work even with non-PnP cards.
--
Ondrej Zary
Alan Cox wrote:
> The natsemi docs here say otherwise. I trust them not you.
>
As well you should. I am honestly curious (for my own satisfaction) as
to what the natsemi docs say the delay code should do (can't imagine
they say "use io port 80 because it is unused"). I don't have any
copies anymore. But mere curiosity on my part is not worth spending a
lot of time on - I know you are super busy. If there's a copy online
at a URL ...
>
> The problem is that certain people, unfortunately those who know
> nothing about ISA related bus systems, keep trying to confuse ISA delay
> logic with core chip logic and end up trying to solve both a problem and a
> non-problem in one, creating a nasty mess in the process.
>
>
I agree that the problems of chip logic and ISA delay are all tangled
up, probably more than need be. I hope that the solution turns out to
simplify matters, and hopefully to document the intention of the
resulting code sections a bit more clearly for the future.
On Tue, 08 Jan 2008 13:44:54 -0500
"David P. Reed" <[email protected]> wrote:
> Ondrej Zary wrote:
> > On Tuesday 08 January 2008 18:24:02 David P. Reed wrote:
> >
> >> Windows these days does delays with timing loops or the
> >> scheduler. It doesn't use a "port". Also, Windows XP only
> >> supports machines that tend not to have timing problems that use
> >> delays. Instead, if a device takes a while to respond, it has a
> >> "busy bit" in some port or memory slot that can be tested.
> >>
> There is no need to use io writes to supposedly/theoretically "unused
> ports" to make drivers work on any bus.
> ISA included! You can, for example, wait for an ISA bus serial
> adapter to put out its next character by looping reading the port
> that has the output buffer full flag in a tight loop, with no delay
> code at all. And if you need to time things, just call a timing loop
> subroutine that you calibrate at boot time.
Now you're totally confusing things. You're talking about looking at
bits in a register to see if a transmit register is empty.
That's easy.
The delays needed for the Intel M8259 and M8253 say that you're not
even allowed to access the registers _at_ _all_ for some time after a
register access. If you do a write to a register immediately followed
by any access, including a read of the status register, you can corrupt
the state of the chip.
And the Intel chips are not the only ones with that kind of brain
damage. But what makes the 8259 and 8253 a big problem is that every
modern PC has a descendant of those chips in them. The discrete Intel
chips or clones got aggregated into Super I/O chips, and the Super I/O
chips were put on a LPC bus (an ISA bus with another name) or
integrated into the southbrige. And the "if it ain't broken, don't fix
it" mantra probably means that some modern chipsets are still using
exactly the same internal design as the 25 year old chips and will
still be subject to some of those ancient limitations.
/Christer
> As well you should. I am honestly curious (for my own satisfaction) as
> to what the natsemi docs say the delay code should do (can't imagine
> they say "use io port 80 because it is unused"). I don't have any
They say you must allow 4 bus clocks for the address decode. They don't
deal with the ISA side as the chip itself has no ISA glue.
> copies anymore. But mere curiosity on my part is not worth spending a
> lot of time on - I know you are super busy. If there's a copy online
> at a URL ...
Not that I know of. There may be. A good general source of info is Russ
Nelson's old DOS packet driver collection.
Alan -
I dug up a DP83901A SNIC datasheet in a quick Google search, while that
wasn't the only such chip, it was one of them. I can forward the PDF
(the http://www.alldatasheet.com site dynamically creates the download URL), if
anyone wants it.
The relevant passage says, in regard to delaying between checking the
CRDA addresses to see if a dummy "remote read" has been executed., and
in regard perhaps to other card IO register loops:
TIME BETWEEN CHIP SELECTS
The SNIC requires that successive chip selects be no
closer
than 4 bus clocks (BSCK) together. If the condition is
violat-
ed the SNIC may glitch ACK. CPUs that operate from pipe-
lined instructions (i e 386) or have a cache (i e 486) can
execute consecutive I O cycles very quickly The solution is
to delay the execution of consecutive I O cycles by either
breaking the pipeline or forcing the CPU to access outside
its cache.
The NE2000 as I recall had no special logic on the board to protect the
chip from successive chip selects that were too close - which is the
reason for the problem. Clearly an out to port 80 takes more than 4 ISA
bus clocks, so that works if the NE2000 is on the ISA bus, On the
other hand, there are other ways to delay more than 4 ISA bus clocks.
And as you say, one needs a delay for this chip that relates to the
chip's card's bus's clock speed, not absolute time.
Alan Cox wrote:
>> As well you should. I am honestly curious (for my own satisfaction) as
>> to what the natsemi docs say the delay code should do (can't imagine
>> they say "use io port 80 because it is unused"). I don't have any
>>
>
> They say you must allow 4 bus clocks for the address decode. They don't
> deal with the ISA side as the chip itself has no ISA glue.
>
>
>
>> copies anymore. But mere curiosity on my part is not worth spending a
>> lot of time on - I know you are super busy. If there's a copy online
>> at a URL ...
>>
>
> Not that I know of. There may be. A good general source of info is Russ
> Nelson's old DOS packet driver collection.
>
>
>
Christer Weinigel wrote:
>> There is no need to use io writes to supposedly/theoretically "unused
>> ports" to make drivers work on any bus.
>> ISA included! You can, for example, wait for an ISA bus serial
>> adapter to put out its next character by looping reading the port
>> that has the output buffer full flag in a tight loop, with no delay
>> code at all. And if you need to time things, just call a timing loop
>> subroutine that you calibrate at boot time.
>>
>
> Now you're totally confusing things. You're talking about looking at
> bits in a register to see if a transmit register is empty.
> That's easy.
>
> The delays needed for the Intel M8259 and M8253 say that you're not
> even allowed to access the registers _at_ _all_ for some time after a
> register access. If you do a write to a register immediately followed
> by any access, including a read of the status register, you can corrupt
> the state of the chip.
>
Not true. Even on the original IBM 5150 PC, the 8259 on the motherboard
accepted back to back OUT and IN instructions, and it would NOT trash
the chip state. You can read the original IBM BIOS code if you like. I
don't remember about the 8253's timing. I doubt the chip's state would
be corrupted in any way. The data and address lines were the same data
and address lines that the microprocessor used to access memory - it
didn't "hold" the lines stable any longer than the OUT instruction.
> And the Intel chips are not the only ones with that kind of brain
> damage. But what makes the 8259 and 8253 a big problem is that every
> modern PC has a descendant of those chips in them.
Register compatible. Not the same chips or even the same masks or
timing requirements.
> The discrete Intel
> chips or clones got aggregated into Super I/O chips, and the Super I/O
> chips were put on a LPC bus (an ISA bus with another name) or
> integrated into the southbrige.
Don't try to teach your grandmother to suck eggs: I've been programming
PC compatibles since probably before you were able to do long division -
including writing code on the first prototype IBM PCs, the first
pre-manufacturing PC-ATs, and zillions of clones. (and I was also
involved in designing hardware including the so-called "Lotus Intel"
expanded memory cards and the original PC cards) The 8259 PIC is an
*interrupt controller*. It was NEVER present in a Super I/O chip, or an
LPC chip. Its functionality was absorbed into the chipsets that control
interrupt mapping, like the PIIX and the nForce.
> And the "if it ain't broken, don't fix
> it" mantra probably means that some modern chipsets are still using
> exactly the same internal design as the 25 year old chips and will
> still be subject to some of those ancient limitations.
>
Oh, come on. Give the VLSI designers some credit for brains. The CAD
tools used to design the 8259 and 8253 were so primitive you couldn't
even get a chip manufactured with designs from that era today. When
people design chips today they do it with simulators that can't even
work, and testers that run from test suites that were not available at
the time.
On Tue, 08 Jan 2008 15:28:03 -0500
"David P. Reed" <[email protected]> wrote:
> Register compatible. Not the same chips or even the same masks or
> timing requirements.
No, but somehow people keep making similar mistakes. The DEC HiNote
needed outb_p to function correctly? That was definitely a much more
modern design than the original PC and most probably did not use any
discrete Intel chips, but the same timing problems were there. I belive
that problem had to do with the keyboard controller though.
> > The discrete Intel
> > chips or clones got aggregated into Super I/O chips, and the Super
> > I/O chips were put on a LPC bus (an ISA bus with another name) or
> > integrated into the southbrige.
> Don't try to teach your grandmother to suck eggs: I've been
> programming PC compatibles since probably before you were able to do
> long division - including writing code on the first prototype IBM
> PCs, the first pre-manufacturing PC-ATs, and zillions of clones.
> (and I was also involved in designing hardware including the
> so-called "Lotus Intel" expanded memory cards and the original PC
> cards)
Argument by personal authority. Thats good. I guess that's why you
don't seem to understand the difference between reading the serial port
status register and not being allowed to access a register at all
due to such this as the 4 cycle delay you quoted yourself from the 8390
data sheet, and similar issues with the I8253 that I quoted from its
data sheet a few posts ago.
> The 8259 PIC is an *interrupt controller*. It was NEVER
> present in a Super I/O chip, or an LPC chip. Its functionality was
> absorbed into the chipsets that control interrupt mapping, like the
> PIIX and the nForce.
Yup, sorry about that, it got integrated into some other chip instead.
I was thinking of another timer, the RTC which is usually a part of the
Super I/O. And which is yet another troublesome area since apparently a
lot of chipsets have problems with it. But the sequence is the same,
discrete chips get aggregated into larger chips. Sometimes the
sometimes old macrocells are reused, sometimes they are redesigned from
scratch (and new bugs are introduced).
> > And the "if it ain't broken, don't fix
> > it" mantra probably means that some modern chipsets are still using
> > exactly the same internal design as the 25 year old chips and will
> > still be subject to some of those ancient limitations.
> >
> Oh, come on. Give the VLSI designers some credit for brains. The
> CAD tools used to design the 8259 and 8253 were so primitive you
> couldn't even get a chip manufactured with designs from that era
> today. When people design chips today they do it with simulators
> that can't even work, and testers that run from test suites that were
> not available at the time.
And they still keep making the same mistakes... Registers that require
wait states before being read again, register that assume that there
are going to be some spare cycles between each access so that some
internal logic has time to update, registers that would have needed a
one byte FIFO to avoid DMA overruns (I'd almost forgotten about that
specific bug on SPI controller of the Samsung 2410, but it bit me last
week and I only managed to chase it down properly yesterday), and so on.
I'm quite impressed with what some VLSI designers manage to do. I just
saw a company roll out a completely new ARM9 design with lots of fun
stuff and as far as I know they only made one single mistake on that
chip. On the other hand, on other designs you can see how the same old
macrocell has been reused long past the "best before" date, because
some bugs crop up over and over again.
/Christer
Christer Weinigel wrote:
> Argument by personal authority. Thats good.
There is no other kind of argument. Are you claiming supernatural
authority drives your typing fingers, or is your argument based on what
you think you know? I have piles of code that I wrote, spec sheets (now
that I'm back in my home office), code that others wrote at the time,
and documentation from vendors that come from my personal experiences.
That doesn't mean I'm always right - always happy to learn something
new. Just don't condescend to a 55 year old who has been writing
operating systems, compilers, and designing hardware for almost 40 years
professionally (yes, I got my first job at 16 writing FORTRAN code to
simulate hydrodynamic systems).
> I guess that's why you
> don't seem to understand the difference between reading the serial port
> status register and not being allowed to access a register at all
> due to such this as the 4 cycle delay you quoted yourself from the 8390
> data sheet,
If you read what I said carefully, I said that the 8390 was a very
special case. The "chip select" problem it experienced was pretty much
unique among boards of the time. Those of us who looked at its design
and had any experience designing hardware for buses like the unibus or
even the buses on PDP-8's and DG machines thought it had to be a joke.
Of course it saved money per board, so it beat the 3Com boards on price
- and you could program it after a fashion. So it involved "cheaping out".
The normal timing problem was that an out or in operation to a board or
chip required some time to elapse before the chip performed the side
effects internally so that the next operation to it would have an
effect. This is exactly the reason why most chips and boards are
designed to either have a polling of a flag indicate operation
completion. The serial "buffer empty" flag is the simplest possible
explanatory example of such handshaking that came to mind (writing a
character to a serial output device twice often leads to surprises,
unless you wait for the previous character to clock out). See my
comment on RTC below, for a more complex to explain example.
> and similar issues with the I8253 that I quoted from its
> data sheet a few posts ago.
>
>
The 8253 was a motherboard chip. I am not sure it had any timing
problems with its electrical signalling. I just don't remember. The
spec sheet doesn't say it's internal state can get scrambled.
>
> I was thinking of another timer, the RTC which is usually a part of the
> Super I/O.
The RTC has very well documented timing requirements. But none of the
spec sheets, nor my experience with it, mention electrical issues that
prevented back-to-back port operations. The documented timing
requirements have to do with the state during the time it ticks over
internally once per second. But it is carefully designed to have a flag
that is "on" during 244 microseconds prior to and covering the time it
is unsafe to read the registers. That design is special because it is
designed to operate when the machine is powered off, so it has two
internal clock domains, one of which is used in "low power" mode and is
very slow to minimize power.
On Tue, 2008-01-08 at 14:15 -0500, David P. Reed wrote:
> Alan Cox wrote:
> > The natsemi docs here say otherwise. I trust them not you.
> >
> As well you should. I am honestly curious (for my own satisfaction) as
> to what the natsemi docs say the delay code should do (can't imagine
> they say "use io port 80 because it is unused"). I don't have any
What is the outcome of this thread? Are we going to use timing based
port delays, or can we finally drop these things entirely on 64-bit
architectures?
I a have a doubly vested interest in this, both as the owner of an
affected HP dv9210us laptop and as a maintainer of paravirt code - and
would like 64-bit Linux code to stop using I/O to port 0x80 in both
cases (as I suspect would every other person involved with
virtualization).
BTW, it isn't ever safe to pass port 0x80 through to hardware from a
virtual machine; some OSes use port 0x80 as a hardware available scratch
register (I believe Darwin/x86 did/does this during boot). This means
simultaneous execution of two virtual machines can interleave port 0x80
values or share data with a hardware provided covert channel. This
means KVM should be trapping port 0x80 access, which is really
expensive, or alternatively, Linux should not be using port 0x80 for
timing bus access on modern (64-bit) hardware.
I've tried to follow this thread, but with all the jabs, 1-ups, and
obscure legacy hardware pageantry going on, it isn't clear what we're
really doing.
Thanks,
Zach
Zachary Amsden wrote:
>
> BTW, it isn't ever safe to pass port 0x80 through to hardware from a
> virtual machine; some OSes use port 0x80 as a hardware available scratch
> register (I believe Darwin/x86 did/does this during boot).
That's funny, because there is definitely no guarantee that you get back
what you read (well, perhaps there is on Apple.)
-hpa
On Tue, 08 Jan 2008 18:52:42 -0800
Zachary Amsden <[email protected]> wrote:
> On Tue, 2008-01-08 at 14:15 -0500, David P. Reed wrote:
> > Alan Cox wrote:
> > > The natsemi docs here say otherwise. I trust them not you.
> > >
> > As well you should. I am honestly curious (for my own satisfaction)
> > as to what the natsemi docs say the delay code should do (can't
> > imagine they say "use io port 80 because it is unused"). I don't
> > have any
>
> What is the outcome of this thread? Are we going to use timing based
> port delays, or can we finally drop these things entirely on 64-bit
> architectures?
>
> I a have a doubly vested interest in this, both as the owner of an
> affected HP dv9210us laptop and as a maintainer of paravirt code - and
> would like 64-bit Linux code to stop using I/O to port 0x80 in both
> cases (as I suspect would every other person involved with
> virtualization).
>
> I've tried to follow this thread, but with all the jabs, 1-ups, and
> obscure legacy hardware pageantry going on, it isn't clear what we're
> really doing.
I belive Alan Cox is doing a review of some drivers, to see if they
actually need the I/O port delay. A lot of drivers probably use outb_p
just because it was copy-pasted from some other driver and it can be
removed. Alan's review has also brought to light a lack of locking in
some drivers, so I think Alan has been adding proper locking to some of
the watchdog drivers.
Most old ISA only device drivers can keep using OUT 80h. They are not
used on modern machines and it's better to keep them unchanged to avoid
unneccesary incompatibilities.
As far as I know, the 8253 PIT timer code needs outb_p on some older
platform, and this is one of the most troublesome since the same PIT
controller (or a register compatible one) has been used since the
original IBM PC, and it is frequently executed code. Ingo Molnar has
done an alternate implementation of the PIT clock source which uses
udelay instead of OUT 80h to delay accesses to the ports. The kernel
could make a choice of which variant to use based on the DMI year, if
compiling for x86_64, or something similar. Maybe have a command line
option too.
The keyboard controller on some platform needs the delay, and the same
driver is used on both ancient and modern systems, I think it can be
changed to udelay since it's not so time critical code.
The 8259 interrupt controller on some platform needs the delay, I think
it can be changed to udelay since it's only some setup code that uses
outb_p. I guess there are time critical accesses to the interrupt
controller from assembly code somewhere to acknowledge interrupts, and
that code needs a review.
The floppy controller code uses outb_p. Even though there might be
floppy controllers on modern systems, I'd rather leave the floppy code
alone since it's supposed to be very fragile. If you still use
floppies you deserve what you get.
Some specific drivers, such as drivers for 8390 or 8390 clone based
network cards are also a bit troublesome, they do need outb_p (and
the delay for the original 8390 chip is specified in bus cycles), and
there can be a big performance loss if pessimistic udelays are used for
the delay. There are still a bunch of PCMCIA cards based on that chip
which means that those cards can be used with modern machines. There
are also PCI and memory mapped variants of the 8390, some of them new
designs which are only register compatible, some other designs are
using a real 8390 with a FPGA used as glue logic. I think Alan
suggested compiling two versions of that driver, one with OUT 80h, and
one with udelay. Old machines can choose the old driver, and new
machines can use the new one. Other drivers can probably do the same
thing, or if not time critical, always use a pessimistic udelay.
As for the implementation, I like the suggestion to split outb_b into
two calls, one to outb and one to isa_slow_down_io. It makes it very
obvious that it is really two function calls, and that it needs
locking. For those uses that are not ISA port accesses,
isa_slow_down_io should be changed to an appropriate udelay instead.
The goal is anyway that a modern machine should not do OUT 80h, and old
machines keep doing it since it has been working well for some 15-odd
years, both in DOS device drivers and on Linux. Using an alternate
port may be a workaround, but it's probaby not a good idea since
alternate ports have received less testing and there's bound to be some
platform out there that has problems with any alternate port we
might choose. Allowing an alternate port will also add code bloat
(OUT 80h, AL becomes MOV DX, alternate_port; OUT DX, AL) for a
dubious gain.
Did I miss anyting?
/Christer
Christer Weinigel wrote:
>
> Did I miss anyting?
>
>
Nothing that seems *crucial* going forward for Linux. The fate of
"legacy machines" is really important to get right.
I have a small suggestion in mind that might be helpful in the future:
the "motherboard resources" discovered as PNP0C02 devices in their _CRS
settings in ACPI during ACPI PnP startup should be reserved (or
checked), and any drivers that still use port 80 implicitly should
reserve that port.
This may be too late in the boot process to make a decision not to use
port 80, and it
doesn't help decide a strategy to use an alternate port (0xED happens to
"work" on the dv9000 machines in the sense that it generates a bus
timeout on LPC, but there is no guarantee that 0xED is free on any
particular motherboard, and "unusedness" is not declared in any
BIOS/ACPI tables) or to use a udelay-based iodelay (but there is nothing
in the BIOS tables that suggest the right delays for various I/O ports
if any modern parts need them...which I question, but can't prove a
negative in general).
However, doing the reservations on such resources could generate a
warning that would help diagnose new current and future
designs including devices like the ENE KB3920 that have a port that is
defaulted to port 80 and routed to the EC for functions that the
firmware and ACPI can agree to do. Or any other ports used in new ways
and properly notified to the OS via the now-standard Wintel BIOS functions.
I don't know if /proc/ioports is being maintained, but the fact that it
doesn't contain all of those PNP0C02 resources known on my machine seems
to be a bug - which I am happy to code a patch or two for as a
contribution back to Linux, if it isn't on the way out as the /sys
hierarchy does a better job.
/sys/bus/pnp/... does get built properly and has port 80 described
properly - not as a DMA port, but as a port in use by device 05:00,
which is the motherboard resource catchall. Thus the patch would be small.
On 09-01-08 06:30, Christer Weinigel wrote:
> On Tue, 08 Jan 2008 18:52:42 -0800
> Zachary Amsden <[email protected]> wrote:
>> What is the outcome of this thread? Are we going to use timing based
>> port delays, or can we finally drop these things entirely on 64-bit
>> architectures?
>>
>> I a have a doubly vested interest in this, both as the owner of an
>> affected HP dv9210us laptop and as a maintainer of paravirt code - and
>> would like 64-bit Linux code to stop using I/O to port 0x80 in both
>> cases (as I suspect would every other person involved with
>> virtualization).
>>
>> I've tried to follow this thread, but with all the jabs, 1-ups, and
>> obscure legacy hardware pageantry going on, it isn't clear what we're
>> really doing.
>
> I belive Alan Cox is doing a review of some drivers, to see if they
> actually need the I/O port delay. A lot of drivers probably use outb_p
> just because it was copy-pasted from some other driver and it can be
> removed. Alan's review has also brought to light a lack of locking in
> some drivers, so I think Alan has been adding proper locking to some of
> the watchdog drivers.
Yes, Alan should be considered to be in the driver seat here (and current
x86.git changes should be tossed).
> Most old ISA only device drivers can keep using OUT 80h. They are not
> used on modern machines and it's better to keep them unchanged to avoid
> unneccesary incompatibilities.
>
> As far as I know, the 8253 PIT timer code needs outb_p on some older
> platform, and this is one of the most troublesome since the same PIT
> controller (or a register compatible one) has been used since the
> original IBM PC, and it is frequently executed code. Ingo Molnar has
> done an alternate implementation of the PIT clock source which uses
> udelay instead of OUT 80h to delay accesses to the ports. The kernel
> could make a choice of which variant to use based on the DMI year, if
> compiling for x86_64, or something similar. Maybe have a command line
> option too.
Just udelay() should be fine after "fixing" udelay() to be somewhat usefully
defined pre-calibration.
> The keyboard controller on some platform needs the delay, and the same
> driver is used on both ancient and modern systems, I think it can be
> changed to udelay since it's not so time critical code.
>
> The 8259 interrupt controller on some platform needs the delay, I think
> it can be changed to udelay since it's only some setup code that uses
> outb_p. I guess there are time critical accesses to the interrupt
> controller from assembly code somewhere to acknowledge interrupts, and
> that code needs a review.
I'd not expect very time crtical. The current outb_p use gives a delay
somewhere between .5 and 2 microseconds as per earlier survey meaning a
udelay(1) or 2 would be enough -- again, at the point that udelay() is sensible.
New machines don't use the legacy PIC anymore anyway.
> The floppy controller code uses outb_p. Even though there might be
> floppy controllers on modern systems, I'd rather leave the floppy code
> alone since it's supposed to be very fragile. If you still use
> floppies you deserve what you get.
Floppies forever. In practice, leaving it alone isn't going to matter, but
in that same practice changing it to udelay() probably doesn't either. The
ones to leave alone are the ones that are clumsy/impossible to test and the
ones such as in NIC drivers that were specifically tuned.
> Some specific drivers, such as drivers for 8390 or 8390 clone based
> network cards are also a bit troublesome, they do need outb_p (and
> the delay for the original 8390 chip is specified in bus cycles), and
> there can be a big performance loss if pessimistic udelays are used for
> the delay. There are still a bunch of PCMCIA cards based on that chip
> which means that those cards can be used with modern machines. There
> are also PCI and memory mapped variants of the 8390, some of them new
> designs which are only register compatible, some other designs are
> using a real 8390 with a FPGA used as glue logic. I think Alan
> suggested compiling two versions of that driver, one with OUT 80h, and
> one with udelay. Old machines can choose the old driver, and new
> machines can use the new one. Other drivers can probably do the same
> thing, or if not time critical, always use a pessimistic udelay.
Not sure what the final suggestion for those was either....
> As for the implementation, I like the suggestion to split outb_b into
> two calls, one to outb and one to isa_slow_down_io. It makes it very
> obvious that it is really two function calls, and that it needs
> locking. For those uses that are not ISA port accesses,
> isa_slow_down_io should be changed to an appropriate udelay instead.
... or simply deleted. The current outb_p is "outb; slow_down_io" as a macro
so that with this you also get no binary changes, making it rather easy to
prove that things do not change timing in cases where you keep the delay.
(they're not so much function calls though -- they're inlined).
> The goal is anyway that a modern machine should not do OUT 80h, and old
> machines keep doing it since it has been working well for some 15-odd
> years, both in DOS device drivers and on Linux. Using an alternate
> port may be a workaround, but it's probaby not a good idea since
> alternate ports have received less testing and there's bound to be some
> platform out there that has problems with any alternate port we
> might choose.
Based on specific DMI strings this can be limited to tested machines (as in
current x86.git) but yes, that's not particularly pleasing.
> Allowing an alternate port will also add code bloat (OUT 80h, AL becomes
> MOV DX, alternate_port; OUT DX, AL) for a dubious gain.
... destroying DX while it's at it meaning this might (will) also need DX
reloads. Not an argument versus a function call, but an argument versus the
current and proposed manual "outb; slow_down_io" split.
> Did I miss anyting?
Not so much it seems. Only that the only reason for the outb_p slit is an
API one. Molnar wants the API cleaned up to make sure no new users of outb_p
creep in, and being explkicit abhout what it that you're doing is going to
take care of that.
If simple outb_p() deprecation is considered enough instead, no need to
touch anything in drivers/, only changes to "outb(); udelay()" outside drivers/.
I'd let Alan decide here.
Thanks for the roundup.
Rene.
On Tue, 1 Jan 2008, H. Peter Anvin wrote:
> It's specifically a side effect *we don't care about*, except in the
> by-now-somewhat-exotic case of 386+387 (where we indeed can't use it once user
> code has touched the FPU -- but we can fall back to 0x80 on those, a very
> small number of systems.) 486+ doesn't use this interface under Linux, since
> Linux uses the proper exception path on those processors. If Compaq had wired
> up the proper signals on the first 386 PC motherboards, we wouldn't have cared
> about it on the 386 either.
It was actually IBM who broke it with the 80286-based PC/AT because of
the BIOS compatibility -- the vector #0x10 had already been claimed by the
original PC for the video software interrupt call (apparently against
Intel's recommendation not to use low 32 interrupt vectors for such
purposes), so it could not have been reused as is for FP exception
handling without breaking existing software. I suppose a more complicated
piece of glue logic could have been used along the lines of what
eventually went into the i486, but presumably the relatively low level of
integration of the PC/AT made such additional circuits hard to justify
even if it indeed was considered.
Maciej
On Wed, 2008-01-09 at 16:27 +0100, Rene Herman wrote:
> On 09-01-08 06:30, Christer Weinigel wrote:
> I'd not expect very time crtical. The current outb_p use gives a delay
> somewhere between .5 and 2 microseconds as per earlier survey meaning a
> udelay(1) or 2 would be enough -- again, at the point that udelay() is sensible.
>
> New machines don't use the legacy PIC anymore anyway.
>
> > The floppy controller code uses outb_p. Even though there might be
> > floppy controllers on modern systems, I'd rather leave the floppy code
> > alone since it's supposed to be very fragile. If you still use
> > floppies you deserve what you get.
>
> Floppies forever. In practice, leaving it alone isn't going to matter, but
> in that same practice changing it to udelay() probably doesn't either. The
> ones to leave alone are the ones that are clumsy/impossible to test and the
> ones such as in NIC drivers that were specifically tuned.
I'm speaking specifically in terms of 64-bit platforms here. Shouldn't
we unconditionally drop outb_p doing extra port I/O on 64-bit
architectures? Especially considering they don't even have an ISA bus
where the decode timing could even matter?
> If simple outb_p() deprecation is considered enough instead, no need to
> touch anything in drivers/, only changes to "outb(); udelay()" outside drivers/.
>
> I'd let Alan decide here.
Agree.
Zach
Maciej W. Rozycki wrote:
> On Tue, 1 Jan 2008, H. Peter Anvin wrote:
>
>> It's specifically a side effect *we don't care about*, except in the
>> by-now-somewhat-exotic case of 386+387 (where we indeed can't use it once user
>> code has touched the FPU -- but we can fall back to 0x80 on those, a very
>> small number of systems.) 486+ doesn't use this interface under Linux, since
>> Linux uses the proper exception path on those processors. If Compaq had wired
>> up the proper signals on the first 386 PC motherboards, we wouldn't have cared
>> about it on the 386 either.
>
> It was actually IBM who broke it with the 80286-based PC/AT because of
> the BIOS compatibility -- the vector #0x10 had already been claimed by the
> original PC for the video software interrupt call (apparently against
> Intel's recommendation not to use low 32 interrupt vectors for such
> purposes), so it could not have been reused as is for FP exception
> handling without breaking existing software. I suppose a more complicated
> piece of glue logic could have been used along the lines of what
> eventually went into the i486, but presumably the relatively low level of
> integration of the PC/AT made such additional circuits hard to justify
> even if it indeed was considered.
>
Supposedly the reason was that the DOS-less "cassette BASIC" delivered
by Microsoft used all the INT instructions except the reserved ones as a
weird bytecode interpreter. Bill Gates was fond of that kind of hacks.
-hpa
Zachary Amsden wrote:
>
> I'm speaking specifically in terms of 64-bit platforms here. Shouldn't
> we unconditionally drop outb_p doing extra port I/O on 64-bit
> architectures? Especially considering they don't even have an ISA bus
> where the decode timing could even matter?
>
Why should the bitsize of the CPU matter for this? It seems one of the
less meaningful keys for this.
Second, as I have mentioned, I don't believe this is really the case,
especially not for the PIT, which is still present -- the PIT
*semantics* has explicit timing constraints.
Third, you still have ISA devices, they're just called LPC or PC104
devices these days.
-hpa
On Wed, Jan 09, 2008 at 10:17:24AM -0800, Zachary Amsden wrote:
> On Wed, 2008-01-09 at 16:27 +0100, Rene Herman wrote:
> > On 09-01-08 06:30, Christer Weinigel wrote:
> > I'd not expect very time crtical. The current outb_p use gives a delay
> > somewhere between .5 and 2 microseconds as per earlier survey meaning a
> > udelay(1) or 2 would be enough -- again, at the point that udelay() is sensible.
> >
> > New machines don't use the legacy PIC anymore anyway.
> >
> > > The floppy controller code uses outb_p. Even though there might be
> > > floppy controllers on modern systems, I'd rather leave the floppy code
> > > alone since it's supposed to be very fragile. If you still use
> > > floppies you deserve what you get.
> >
> > Floppies forever. In practice, leaving it alone isn't going to matter, but
> > in that same practice changing it to udelay() probably doesn't either. The
> > ones to leave alone are the ones that are clumsy/impossible to test and the
> > ones such as in NIC drivers that were specifically tuned.
>
> I'm speaking specifically in terms of 64-bit platforms here. Shouldn't
> we unconditionally drop outb_p doing extra port I/O on 64-bit
> architectures? Especially considering they don't even have an ISA bus
> where the decode timing could even matter?
>...
I don't think the latter statement was true - AFAIR there are Alphas
with ISA slots.
> Agree.
>
> Zach
cu
Adrian
--
"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed
Adrian Bunk wrote:
>
> I don't think the latter statement was true - AFAIR there are Alphas
> with ISA slots.
>
See subject line.
-hpa
On Wed, 09 Jan 2008 10:18:11 -0800
"H. Peter Anvin" <[email protected]> wrote:
> Zachary Amsden wrote:
> >
> > I'm speaking specifically in terms of 64-bit platforms here.
> > Shouldn't we unconditionally drop outb_p doing extra port I/O on
> > 64-bit architectures? Especially considering they don't even have
> > an ISA bus where the decode timing could even matter?
> >
>
> Why should the bitsize of the CPU matter for this? It seems one of
> the less meaningful keys for this.
Well, anything that runs x86_64 should be a fairly modern system.
> Second, as I have mentioned, I don't believe this is really the case,
> especially not for the PIT, which is still present -- the PIT
> *semantics* has explicit timing constraints.
>
> Third, you still have ISA devices, they're just called LPC or PC104
> devices these days.
Or PCMCIA. I'm still a happy user of a Zyxel ZyAIR 100B, it's one of
the most stable cards Wifi I've got running under Linux. :-)
/Christer
Hi,
David P. Reed <dpreed <at> reed.com> writes:
> And actually, if I had looked at the /sys/bus/pnp definitions, rather
> than /proc/ioports, I would have noticed that port 80 was part of a
> PNP0C02 resource set. That means exactly one thing: ACPI says that
> port 80 is NOT free to be used, for delays or anything else.
I have some computers where port 0x80 is claimed by 8237A DMA controller [1]
But in this case it seems a lasy acpi programmer that doesn't want to convert
the hole in 0x80-0x8f range...
PS : I post from gmane web interface, so I can't keep CC.
[1]
This happen with a old 7 years old siemens PIII and a new hp core2duo.
state = active
io 0x0-0xf
io 0x80-0x8f
io 0xc0-0xdf
dma 4
On Tue, 2008-01-08 at 21:19 -0800, H. Peter Anvin wrote:
> Zachary Amsden wrote:
> >
> > BTW, it isn't ever safe to pass port 0x80 through to hardware from a
> > virtual machine; some OSes use port 0x80 as a hardware available scratch
> > register (I believe Darwin/x86 did/does this during boot).
>
> That's funny, because there is definitely no guarantee that you get back
> what you read (well, perhaps there is on Apple.)
According to Phoenix Technologies book "System BIOS for IBM PCs,
Compatibles and EISA Computers, 2nd Edition", the I/O port list gives
port 0080h R/W Extra page register (temporary storage)
Despite looking, I've never seen it documented anywhere else, but I
believe it works on just about every PC platform. Except, apparently,
my laptop.
Zach
Christer Weinigel wrote:
> On Wed, 09 Jan 2008 10:18:11 -0800
> "H. Peter Anvin" <[email protected]> wrote:
>
>> Zachary Amsden wrote:
>>> I'm speaking specifically in terms of 64-bit platforms here.
>>> Shouldn't we unconditionally drop outb_p doing extra port I/O on
>>> 64-bit architectures? Especially considering they don't even have
>>> an ISA bus where the decode timing could even matter?
>>>
>> Why should the bitsize of the CPU matter for this? It seems one of
>> the less meaningful keys for this.
>
> Well, anything that runs x86_64 should be a fairly modern system.
>
Yes, but you hardly want a situation where the machine works booting a
32-bit kernel and not a 64-bit kernel, or vice versa.
Furthermore, it's not so much about "modern" versus "old", it is about
picking a certain set of bugs.
-hpa
Zachary Amsden wrote:
>
> According to Phoenix Technologies book "System BIOS for IBM PCs,
> Compatibles and EISA Computers, 2nd Edition", the I/O port list gives
>
> port 0080h R/W Extra page register (temporary storage)
>
> Despite looking, I've never seen it documented anywhere else, but I
> believe it works on just about every PC platform. Except, apparently,
> my laptop.
>
>
>
The port 80 problem was discovered by me, after months of "bisecting"
the running code around a problem with hanging when using hwclock in
64-bit mode when ACPI is on. So it kills my laptop, too, and many
currentlaptop motherboards designed by Quanta for HP and Compaq (dv6000,
dv9000, tx1000, apparently)
In the last couple of weeks, I was able with luck to discover that the
problem is the ENE KB3920 chip, which is the "big brother" of the KB3700
chip included in the OLPC XO "$100 laptop" made also by Quanta. I
verified this by taking my laptop apart - a fun and risky experience.
Didn't break any connectors, but I don't recommend it for those who are
not experienced disassembling laptops and cellphones, etc. The KB3920
contains an EC, an SMBus, a KBC, some watchdog timers, and a variety of
other functions that keep the laptop going, coordinating the
relationships among various peripherals. The firmware is part standard
from ENE, part OEM-specific, in this case coded by Quanta or a BIOS
subcontractor.
You can read the specsheet for the KB3700 online at laptop.org, since
the specs of the laptop are "open". The 3920's spec is confidential.
And the firmware is confidential as well for both the 3700 and 3920.
Clues as to what it does can be gleaned by reading the disassembler
output of the DSDT code in the particular laptops - though the SMM BIOS
probably also talks to it.
Modern machines have many subsystems, and the ACPI and SMBIOS coordinate
to run them; blade servers also have drawer controllers and backplane
management buses. The part that runs Linux is only part of the machine.
Your laptop isn't an aberration. It's part of the new generation of
evolved machines that take advantage of the capabilities of ACPI and
SMBIOS and DMI standards that are becoming core parts of the market.
On Wed, 2008-01-09 at 17:22 -0500, David P. Reed wrote:
> Zachary Amsden wrote:
> >
> > According to Phoenix Technologies book "System BIOS for IBM PCs,
> > Compatibles and EISA Computers, 2nd Edition", the I/O port list gives
> >
> > port 0080h R/W Extra page register (temporary storage)
> >
> > Despite looking, I've never seen it documented anywhere else, but I
> > believe it works on just about every PC platform. Except, apparently,
> > my laptop.
> >
> >
> >
> The port 80 problem was discovered by me, after months of "bisecting"
> the running code around a problem with hanging when using hwclock in
> 64-bit mode when ACPI is on. So it kills my laptop, too, and many
> currentlaptop motherboards designed by Quanta for HP and Compaq (dv6000,
> dv9000, tx1000, apparently)
Thanks very much for that - I was debugging this for a while too, and
eventually just shut off hwclock.
> Your laptop isn't an aberration. It's part of the new generation of
> evolved machines that take advantage of the capabilities of ACPI and
> SMBIOS and DMI standards that are becoming core parts of the market.
I beg to differ. I managed to turn the thing into a brick by upgrading
the BIOS (with the correct image, no less) in an attempt to fix it. I
just got it back from repair. I'm not sure that is positive
evolutionary development, but it certainly does make my laptop an
aberration :)
FWIW, I fixed the problem locally by recompiling, changing port 80 to
port 84 in io.h; works great, and doesn't conflict with any occupied
ports.
Zach
On 11-01-08 02:36, Zachary Amsden wrote:
> FWIW, I fixed the problem locally by recompiling, changing port 80 to
> port 84 in io.h; works great, and doesn't conflict with any occupied
> ports.
Might not give you a "proper" delay though. 0xed should be a better choice...
Rene.
Rene Herman wrote:
> On 11-01-08 02:36, Zachary Amsden wrote:
>
>> FWIW, I fixed the problem locally by recompiling, changing port 80 to
>> port 84 in io.h; works great, and doesn't conflict with any occupied
>> ports.
>
> Might not give you a "proper" delay though. 0xed should be a better
> choice...
>
I don't think there is any magic here. I modified the patch to do *no
delay at all* in the io_delay "quirk" and have been running reliably for
weeks including the very heavy I/O load that comes from using software
RAID on this nice laptop that has two separate SATA drives! This
particular laptop has no problematic devices - the only problem is
actually in the CMOS_READ and CMOS_WRITE macros that *use* the _p
operations in a way that is unnecessary on this machine. (in fact, it
would be hard to add a problematic device - there's no PCMCIA slot
either, and so every option is USB or Firewire).
Using 0xED happens to work, but it's not guaranteed to work either.
There is not a "standard" for an "unused port that is mapped to cause a
bus abort on the LPC bus". More problematic is that I would think some
people might want to turn on the AMD feature that generates machine
checks if a bus timeout happens. The whole point of machine checks is
to allow the machine to be more reliable. Using any "unused port" for
a delay means that the machine check feature is wasted and utterly unusable.
> bus abort on the LPC bus". More problematic is that I would think some
> people might want to turn on the AMD feature that generates machine
> checks if a bus timeout happens. The whole point of machine checks is
An ISA/LPC bus timeout is fulfilled by the bridge so doesn't cause an MCE.
Alan
On 11-01-08 15:35, David P. Reed wrote:
> Rene Herman wrote:
>> On 11-01-08 02:36, Zachary Amsden wrote:
>>
>>> FWIW, I fixed the problem locally by recompiling, changing port 80 to
>>> port 84 in io.h; works great, and doesn't conflict with any occupied
>>> ports.
>>
>> Might not give you a "proper" delay though. 0xed should be a better
>> choice...
>>
> I don't think there is any magic here.
Golly, you don't think so? Just commenting on his local hack. Port 0x84 is
inside the (reserved) DMA page register range and stands a better chance of
not being echoed onto ISA by various chipsets than 0xed does due to that.
Yes -- on a sane machine it's all useless anyway and with all sane machines
this discussion would've ended quite some time ago already. It's the insane,
obsolete legacy junk that's the problem.
Rene.
Alan Cox wrote:
>> bus abort on the LPC bus". More problematic is that I would think some
>> people might want to turn on the AMD feature that generates machine
>> checks if a bus timeout happens. The whole point of machine checks is
>>
>
> An ISA/LPC bus timeout is fulfilled by the bridge so doesn't cause an MCE.
>
>
>
Good possibility, but the documentation on HyperTransport suggests
otherwise, even for LPC bridges in this particular modern world of
AMD64. I might do the experiment someday to see if my LPC bridge is
implemented in a way that does or doesn't support enabling MCE's. Could
be different between Intel and AMD - I haven't had reason to pore over
the Intel chipset specs, since my poking into all this stuff has been
driven by my personal machine's issues, and it's not got any Intel
compatible parts.
David P. Reed wrote:
> Alan Cox wrote:
>>> bus abort on the LPC bus". More problematic is that I would think
>>> some people might want to turn on the AMD feature that generates
>>> machine checks if a bus timeout happens. The whole point of machine
>>> checks is
>>
>> An ISA/LPC bus timeout is fulfilled by the bridge so doesn't cause an
>> MCE.
> Good possibility, but the documentation on HyperTransport suggests
> otherwise, even for LPC bridges in this particular modern world of
> AMD64. I might do the experiment someday to see if my LPC bridge is
> implemented in a way that does or doesn't support enabling MCE's. Could
> be different between Intel and AMD - I haven't had reason to pore over
> the Intel chipset specs, since my poking into all this stuff has been
> driven by my personal machine's issues, and it's not got any Intel
> compatible parts.
If you have a subtractive decoding bridge you will have completion on HT.
-hpa
On Fri, 2008-01-11 at 09:35 -0500, David P. Reed wrote:
> Using any "unused port" for a delay means that the machine check
> feature is wasted and utterly unusable.
Not entirely unusable. You can recover silently from the machine check
if it was one of the known accesses to the 'unused port'. It certainly
achieves a delay :)
On ppc32 we recover from the machine check if it was any inb/outb --
mostly to work around crappy drivers developed on i386, I believe.
--
dwmw2
David Woodhouse wrote:
> On Fri, 2008-01-11 at 09:35 -0500, David P. Reed wrote:
>
>> Using any "unused port" for a delay means that the machine check
>> feature is wasted and utterly unusable.
>>
>
> Not entirely unusable. You can recover silently from the machine check
> if it was one of the known accesses to the 'unused port'. It certainly
> achieves a delay :)
>
I'm sure that's what the driver writers had in mind. ;-)
And I think we probably have a great shot at getting Intel, Microsoft,
HP, et al.. to add a feature for Linux to one of the ACPI table
specifications that define an "unused port for delay purposes" field in
the ACPI 4.0 spec, and retrofit it into PC/104 machine BIOSes. At least
Microsoft doesn't have a patent on using port 80 for delay purposes. :-)
David P. Reed wrote:
> I think we probably have a great shot at getting Intel, Microsoft, HP,
> et al.. to add a feature for Linux to one of the ACPI table
> specifications that define an "unused port for delay purposes" field
> in the ACPI 4.0 spec, and retrofit it into PC/104 machine BIOSes. At
> least Microsoft doesn't have a patent on using port 80 for delay
> purposes. :-)
This use of port 80 (or insert some other random number) is a croc of
hackery of the most inexperienced kind. The task to be performed is to
delay for some period, and I think it's a mix of bloody mindedness and
fear of unfamiliar code and specification that explains why a delay is
not being coded. Lest we forget, someone who should know better said
that an OUT is used because you don't know how long the delay should be
on any specific machine. What rubbish.
For what it's worth, I would oppose any attempt to ammend ACPI
specifications in the way described above. It's bad enough to have that
embarrassing and unseemly hack in Linux. It would be so much worse to
enshrine the practice as industry standard practice.
I won't even mention the many instances of these delays where no delay
is what properly is needed. Performance? Who cares about performance?
On Thu, 17 Jan 2008 01:06:24 +1030
David Newall <[email protected]> wrote:
> This use of port 80 (or insert some other random number) is a croc of
> hackery of the most inexperienced kind.
Wrong. It's a careful designed solution used by all sorts of code for
over 15 years.
The task to be performed is to delay for some period
Wrong, it is for some number of bus clocks which is why I/O cycles are
used
> that an OUT is used because you don't know how long the delay should be
> on any specific machine. What rubbish.
Wrong again.
> I won't even mention the many instances of these delays where no delay
> is what properly is needed. Performance? Who cares about performance?
Correctness, who needs correctness ?
Alan
Alan Cox wrote:
> On Thu, 17 Jan 2008 01:06:24 +1030
> David Newall <[email protected]> wrote:
>
>
>> This use of port 80 (or insert some other random number) is a croc of
>> hackery of the most inexperienced kind.
>>
>
> Wrong. It's a careful designed solution used by all sorts of code for
> over 15 years.
>
It's not careful: it's a croc. It's an ugly hack, an abuse of process,
and totally unnecessary. Read my comment about delays (next).
> The task to be performed is to delay for some period
>
> Wrong, it is for some number of bus clocks which is why I/O cycles are
> used
>
Wrong. It's a delay. It's a delay measured in I/O cycles, but still a
delay. Doing I/O to get a delay, even if the delay is intended to be
measured in I/O cycles, is hackery of the most inexperienced sort. It's
the sort of thing junior programmers get boxed in the ear for. There's
no satisfactory reason to do it that way.
If the hardware required an intermediate junk I/O, that would be a
reason to do one, but it doesn't, does it? It requires a delay. It's
written thus in all of the application notes.
>> that an OUT is used because you don't know how long the delay should be
>> on any specific machine. What rubbish.
>>
>
> Wrong again.
>
Wrong again. Of course one knows how long the delay should be. The bus
speed is known. The specifications of the hardware is known. Do the
math you (the programmer writing the driver, not Alan) lazy sluggard,
and use a delay. It baffles commonsense to say you don't know how long
it should be.
>> I won't even mention the many instances of these delays where no delay
>> is what properly is needed. Performance? Who cares about performance?
>>
>
> Correctness, who needs correctness ?
Well, frankly, the development process could stand a little more of it.
The sooner we stop denying that this is a hack, the sooner we can fix it.
> If the hardware required an intermediate junk I/O, that would be a
> reason to do one, but it doesn't, does it? It requires a delay. It's
> written thus in all of the application notes.
And the only instruction that is synchronized to the bus in question is
an I/O instruction.
> Wrong again. Of course one knows how long the delay should be. The bus
> speed is known.
Wrong again. ISA bus speed is neither defined precisely, nor visible in a
system portable fashion.
I'm so glad you have nothing better to do than troll, if you
actually wrote code I'd be worried it might get into something people
used.
Alan
Alan Cox wrote:
>> If the hardware required an intermediate junk I/O, that would be a
>> reason to do one, but it doesn't, does it? It requires a delay. It's
>> written thus in all of the application notes.
>>
>
> And the only instruction that is synchronized to the bus in question is
> an I/O instruction.
>
This is a timing issue, isn't it? How are we synchronising, other than
by delaying for a (bus-dependant) period? The characteristics of each
bus are known so a number can be assigned for "one bus cycle", without
having to use the bus.
>> Wrong again. Of course one knows how long the delay should be. The bus
>> speed is known.
>>
>
> Wrong again. ISA bus speed is neither defined precisely, nor visible in a
> system portable fashion.
>
You say, "system portable," but I think you mean, "automatically
determined." We don't have to define this value automatically, if
that's so hard to do. We can use a tunable kernel-parameter.
> I'm so glad you have nothing better to do than troll
I'm not trolling. You know this is true because many people perceive
this to be a problem. I'm working on fixing it. Not all Linux problems
are solvable by diving into code, and there is anecdotal evidence to
believe this one has big performance considerations. I don't understand
why you are opposed to even talking about it.
> if you
> actually wrote code I'd be worried it might get into something people
> used.
Speaking of writing code: I remember working on a bluetooth Oops.
Lacking the hardware, I went to you for advice on how to get it before
someone for testing. You never replied.
> This is a timing issue, isn't it? How are we synchronising, other than
> by delaying for a (bus-dependant) period? The characteristics of each
> bus are known so a number can be assigned for "one bus cycle", without
> having to use the bus.
The characteristics of the bus are not known. It could be anything
between 6 and about 16MHz. The way you read the bus clock is system
dependant.
The underlying problem is really that over time some of the hardware has
moved from the ISA world into the chipsets. That is why I sent Ingo the
patches for inb_pit/inb_pic and to split ISA 8390 and non ISA 8390
support. Someone has to tackle the CMOS but we are then in a position to
relegant port 0x80 timing use to ISA systems where it is fine.
Alan
Alan Cox wrote:
>> This is a timing issue, isn't it? How are we synchronising, other than
>> by delaying for a (bus-dependant) period? The characteristics of each
>> bus are known so a number can be assigned for "one bus cycle", without
>> having to use the bus.
>>
>
> The characteristics of the bus are not known. It could be anything
> between 6 and about 16MHz.
In the early days of clone PCs, as you know but perhaps many on this
list might not, the bus speed could be changed, but this was
user-selectable. For such a machine, delay values can be pre-calculated
for each bus speed, and a kernel parameter set accordingly. Or are you
saying that the characteristics of the bus on a given machine vary for
reasons other than user selection?
The fact that busses run at different speeds on different machines is
not a problem because the delay value can be determined for each given
machine.
The question is, for a given machine, can we determine a delay value
instead of using a junk I/O?
On 17-01-08 14:36, David Newall wrote:
> In the early days of clone PCs, as you know but perhaps many on this
> list might not
I'm so incredibly sick of this fucking thread. We've had enough legacy farts
coming out of the woodwork advertising their own massive experience and
cluelessness by now. Both hpa and alan are in this thread and everyone else
can be ignored on the issue.
Over the course of a 100 messages or so in this thread it's been determined
that the best course of action is to keep the out for ISA and replace it
with udelay() for chipset logic. Now go away.
Rene.
> In the early days of clone PCs, as you know but perhaps many on this
> list might not, the bus speed could be changed, but this was
> user-selectable. For such a machine, delay values can be pre-calculated
> for each bus speed, and a kernel parameter set accordingly. Or are you
> saying that the characteristics of the bus on a given machine vary for
> reasons other than user selection?
They vary based on the CPU clock, the dividers from PCI to ISA on PCI
based boxes, and on the ISA only ones often on the CPU speed.
Unfortunately the way you control that divider or read it is chipset
specific. Nor would it be reasonable to expect the end user to set it.
For PC/104 systems the same applies today.
> The question is, for a given machine, can we determine a delay value
> instead of using a junk I/O?
The question (for ISA peripherals) is "why bother", and with the 8390
patch there are one or two dubious PCI driver users of _p left but not
much else that isn't ISA or chipset logic. The question for chipset logic
where it has become integrated is "can we get rid of it for some devices,
if not what can we use instead"
Alan
Rene Herman wrote:
> Over the course of a 100 messages or so in this thread it's been
> determined that the best course of action is to keep the out for ISA
> and replace it with udelay() for chipset logic. Now go away.
Rather than this incredible rudeness, why don't you direct your energy
towards convincing Alan of this. He's the hold-out.
On 17-01-08 22:58, David Newall wrote:
> Rene Herman wrote:
>> Over the course of a 100 messages or so in this thread it's been
>> determined that the best course of action is to keep the out for ISA
>> and replace it with udelay() for chipset logic. Now go away.
>
> Rather than this incredible rudeness, why don't you direct your energy
> towards convincing Alan of this. He's the hold-out.
No he isn't and that's why I'm rude -- everything needs to be repeated over
and over and over again. Read the thread(s). You didn't limit your reply to
chipset logic and Alan even already submitted patches to isolate the delay
for the chipset logic (PIC and PIT that is) where the expectation is that a
simple udelay() will suffice.
We've already talked about ISA bus speed, and how it's not in a sane sense
portably determinable, we've already talked about kernel parameters, about
udelay and it's usefulness in early boot, about how your rude "Junk I/O" is
exactly what is needed for some ISA devices and so on...
In fact, we're blue in the face from talking about it. So say something
useful or go away.
Rene
Rene,
Here is why you shouldn't leap so quickly to rudeness. Everything is
being repeated over and over and over again (as you put it) because
people like you shout down people like me without making any apparent
effort to understand the truth of the problem.
Rene Herman wrote:
> We've already talked about ISA bus speed, and how it's not in a sane
> sense portably determinable, we've already talked about kernel
> parameters, about udelay and it's usefulness in early boot, about how
> your rude "Junk I/O" is exactly what is needed for some ISA devices
> and so on...
The problem is that _p is widely used for non-ISA devices. For example,
a quick grep reveals the following (and more) all use outb_p:
./i2c/busses/i2c-amd756.c
./i2c/busses/i2c-ali1535.c
./i2c/busses/i2c-ali15x3.c
./i2c/busses/i2c-i801.c
./i2c/busses/i2c-piix4.c
./i2c/busses/i2c-viapro.c
./i2c/busses/i2c-nforce2.c
./i2c/busses/i2c-ali1563.c
./telephony/ixj.c
./char/pc8736x_gpio.c
./char/epca.c
./char/dtlk.c
./char/watchdog/w83697hf_wdt.c
./char/watchdog/wafer5823wdt.c
./char/watchdog/wdt.c
./char/watchdog/sc1200wdt.c
./char/watchdog/pc87413_wdt.c
./char/watchdog/wdt_pci.c
./char/watchdog/w83977f_wdt.c
./char/watchdog/pcwd_pci.c
./char/watchdog/w83877f_wdt.c
./char/watchdog/mixcomwd.c
./char/watchdog/w83627hf_wdt.c
./char/watchdog/advantechwdt.c
./char/watchdog/ib700wdt.c
./char/watchdog/pcwd.c
./char/watchdog/wdt977.c
./char/rocket_int.h
./char/sonypi.c
Most of these go nowhere near the ISA bus. This has been said before,
but perhaps you missed that. Which is another reason to use good
manners, isn't it?
The argument that you can't know how long to delay is utter rubbish.
> In fact, we're blue in the face from talking about it. So say
> something useful or go away.
I think I'm saying something useful. I'll keep an eye out for your
humble apology. (Are you big enough to give one?) In the mean time,
perhaps you'll follow your own advice and say something useful or go
away. :-p
I hope you'll see this in the positive and constructive light that it is
intended.
On 18-01-08 14:37, David Newall wrote:
> The problem is that _p is widely used for non-ISA devices.
Yes, we know, it's being fixed. Piss off.
Rene.