Hello,
ES7000 was failing to boot since first couple revisions of 2.6. The patch fixes the boot problem.
In the patch, some maintenance and cleanup was done for es7000 subarch, such as APIC destinations were corrected, missing initialization for the variable was added, extraneous file was removed, etc.
The patch was created against 2.6.5, compiled cleanly, and tested on the ES7000 system.
Thanks,
--Natalie
------------------------------------
diff -Naur linux6.5/arch/i386/mach-es7000/Makefile linux-2.6.5/arch/i386/mach-es7000/Makefile
--- linux6.5/arch/i386/mach-es7000/Makefile 2004-04-04 18:22:39.000000000 -0400
+++ linux-2.6.5/arch/i386/mach-es7000/Makefile 2004-04-05 00:07:13.000000000 -0400
@@ -2,4 +2,4 @@
# Makefile for the linux kernel.
#
-obj-y := setup.o topology.o es7000.o
+obj-y := setup.o es7000.o
diff -Naur linux6.5/arch/i386/mach-es7000/es7000.c linux-2.6.5/arch/i386/mach-es7000/es7000.c
--- linux6.5/arch/i386/mach-es7000/es7000.c 2004-04-04 18:22:39.000000000 -0400
+++ linux-2.6.5/arch/i386/mach-es7000/es7000.c 2004-04-05 00:07:13.000000000 -0400
@@ -82,6 +82,7 @@
host_addr = val;
host = (struct mip_reg *)val;
host_reg = __va(host);
+ mip_port = MIP_PORT(mi->mip_info);
val = MIP_RD_LO(mi->mip_reg);
mip_addr = val;
mip = (struct mip_reg *)val;
diff -Naur linux6.5/arch/i386/mach-es7000/es7000.h linux-2.6.5/arch/i386/mach-es7000/es7000.h
--- linux6.5/arch/i386/mach-es7000/es7000.h 2004-04-04 18:22:39.000000000 -0400
+++ linux-2.6.5/arch/i386/mach-es7000/es7000.h 2004-04-05 00:07:13.000000000 -0400
@@ -32,6 +32,7 @@
#define MIP_VALID 0x0100000000000000
#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
+#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
struct mip_reg_info {
unsigned long long mip_info;
diff -Naur linux6.5/arch/i386/mach-es7000/topology.c linux-2.6.5/arch/i386/mach-es7000/topology.c
--- linux6.5/arch/i386/mach-es7000/topology.c 2004-04-04 18:22:39.000000000 -0400
+++ linux-2.6.5/arch/i386/mach-es7000/topology.c 1969-12-31 19:00:00.000000000 -0500
@@ -1,64 +0,0 @@
-/*
- * arch/i386/mach-generic/topology.c - Populate driverfs with topology information
- *
- * Written by: Matthew Dobson, IBM Corporation
- * Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <[email protected]>
- */
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <asm/cpu.h>
-
-struct i386_cpu cpu_devices[NR_CPUS];
-
-#ifdef CONFIG_NUMA
-#include <linux/mmzone.h>
-#include <asm/node.h>
-
-struct i386_node node_devices[MAX_NUMNODES];
-
-static int __init topology_init(void)
-{
- int i;
-
- for (i = 0; i < num_online_nodes(); i++)
- arch_register_node(i);
- for (i = 0; i < NR_CPUS; i++)
- if (cpu_possible(i)) arch_register_cpu(i);
- return 0;
-}
-
-#else /* !CONFIG_NUMA */
-
-static int __init topology_init(void)
-{
- int i;
-
- for (i = 0; i < NR_CPUS; i++)
- if (cpu_possible(i)) arch_register_cpu(i);
- return 0;
-}
-
-#endif /* CONFIG_NUMA */
-
-subsys_initcall(topology_init);
diff -Naur linux6.5/arch/i386/kernel/mpparse.c linux-2.6.5/arch/i386/kernel/mpparse.c
--- linux6.5/arch/i386/kernel/mpparse.c 2004-04-04 18:22:39.000000000 -0400
+++ linux-2.6.5/arch/i386/kernel/mpparse.c 2004-04-05 00:07:13.000000000 -0400
@@ -969,7 +969,7 @@
*/
for (i = 0; i < mp_irq_entries; i++) {
if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
- && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
+ && (mp_irqs[i].mpc_dstirq == intsrc.mpc_dstirq)) {
mp_irqs[i] = intsrc;
found = 1;
break;
diff -Naur linux6.5/include/asm-i386/mach-es7000/mach_apic.h linux-2.6.5/include/asm-i386/mach-es7000/mach_apic.h
--- linux6.5/include/asm-i386/mach-es7000/mach_apic.h 2004-04-04 18:22:46.000000000 -0400
+++ linux-2.6.5/include/asm-i386/mach-es7000/mach_apic.h 2004-04-05 00:11:50.000000000 -0400
@@ -39,7 +39,7 @@
#endif
#define APIC_BROADCAST_ID (0xff)
-#define NO_IOAPIC_CHECK (0)
+#define NO_IOAPIC_CHECK (1)
static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
{
@@ -166,10 +166,12 @@
int cpu;
int apicid;
+#if defined CONFIG_ES7000_CLUSTERED_APIC
+ return 0xFF;
+#endif
num_bits_set = cpus_weight_const(cpumask);
- /* Return id to all */
if (num_bits_set == NR_CPUS)
- return 0xFF;
+ return cpu_to_logical_apicid(0);
/*
* The cpus in the mask must all be on the apic cluster. If are not
* on the same apicid cluster return default value of TARGET_CPUS.
@@ -182,7 +184,7 @@
if (apicid_cluster(apicid) !=
apicid_cluster(new_apicid)){
printk ("%s: Not a valid mask!\n",__FUNCTION__);
- return 0xFF;
+ return cpu_to_logical_apicid(0);
}
apicid = new_apicid;
cpus_found++;
----------------------------------------------------
Could you explain this bit? Looks slightly odd, and looks like something
others might be using ...
M.
> diff -Naur linux6.5/arch/i386/kernel/mpparse.c linux-2.6.5/arch/i386/kernel/mpparse.c
> --- linux6.5/arch/i386/kernel/mpparse.c 2004-04-04 18:22:39.000000000 -0400
> +++ linux-2.6.5/arch/i386/kernel/mpparse.c 2004-04-05 00:07:13.000000000 -0400
> @@ -969,7 +969,7 @@
> */
> for (i = 0; i < mp_irq_entries; i++) {
> if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
> - && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
> + && (mp_irqs[i].mpc_dstirq == intsrc.mpc_dstirq)) {
> mp_irqs[i] = intsrc;
> found = 1;
> break;
"Protasevich, Natalie" <[email protected]> wrote:
>
> ES7000 was failing to boot since first couple revisions of 2.6. The patch fixes the boot problem.
> In the patch, some maintenance and cleanup was done for es7000 subarch, such as APIC destinations were corrected, missing initialization for the variable was added, extraneous file was removed, etc.
> The patch was created against 2.6.5, compiled cleanly, and tested on the ES7000 system.
This patch appears to cause the local-apic based time interrupts to run too
fast on my old 4-way Xeon server. A `sleep 10' takes about five seconds.
Diffing the dmesg output shows the changes which your patch caused:
--- without 2004-04-05 22:18:41.061198208 -0700
+++ with 2004-04-05 22:17:15.000000000 -0700
@@ -1,4 +1,4 @@
- IO-APIC (apicid-pin) 4-0, 4-16, 4-17, 4-18, 4-19, 4-20, 4-21, 4-22, 4-23, 4-24, 4-25, 4-26, 4-27, 4-28, 4-29, 4-30, 4-31, 4-32, 4-33, 4-34, 4-35, 4-36, 4-37, 4-38, 4-39, 4-40, 4-41, 4-42, 4-43, 4-44, 4-45, 4-46, 4-47, 4-48, 4-49, 4-50, 4-51, 4-52, 4-53, 4-54, 4-55, 4-56, 4-57, 4-58, 4-59, 4-60, 4-61, 4-62, 4-63 not connected.
-..TIMER: vector=0x31 pin1=2 pin2=-1
+ IO-APIC (apicid-pin) 4-16, 4-17, 4-18, 4-19, 4-20, 4-21, 4-22, 4-23, 4-24, 4-25, 4-26, 4-27, 4-28, 4-29, 4-30, 4-31, 4-32, 4-33, 4-34, 4-35, 4-36, 4-37, 4-38, 4-39, 4-40, 4-41, 4-42, 4-43, 4-44, 4-45, 4-46, 4-47, 4-48, 4-49, 4-50, 4-51, 4-52, 4-53, 4-54, 4-55, 4-56, 4-57, 4-58, 4-59, 4-60, 4-61, 4-62, 4-63 not connected.
+..TIMER: vector=0x31 pin1=0 pin2=-1
-number of MP IRQ sources: 15.
+number of MP IRQ sources: 16.
.... IRQ redirection table:
NR Log Phy Mask Trig IRR Pol Stat Dest Deli Vect:
- 00 000 00 1 0 0 0 0 0 0 00
+ 00 00F 0F 0 0 0 0 0 1 1 31
01 00F 0F 0 0 0 0 0 1 1 39
02 00F 0F 0 0 0 0 0 1 1 31
03 00F 0F 0 0 0 0 0 1 1 41
@@ -368,7 +368,7 @@
3e 000 00 1 0 0 0 0 0 0 00
3f 000 00 1 0 0 0 0 0 0 00
IRQ to pin mappings:
-IRQ0 -> 0:2
+IRQ0 -> 0:0-> 0:2
IRQ1 -> 0:1
IRQ3 -> 0:3
IRQ4 -> 0:4
Hi Andrew,
The only line that is outside the es7000 code and could cause this is:
> --- linux6.5/arch/i386/kernel/mpparse.c 2004-04-04 18:22:39.000000000 -0400
> +++ linux-2.6.5/arch/i386/kernel/mpparse.c 2004-04-05 00:07:13.000000000 -0400
> @@ -969,7 +969,7 @@
> */
> for (i = 0; i < mp_irq_entries; i++) {
> if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
> - && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
> + && (mp_irqs[i].mpc_dstirq == intsrc.mpc_dstirq)) {
> mp_irqs[i] = intsrc;
> found = 1;
(The one that Martin asked me about)
The code is for the legacy/overwrites, and used to have this line (and I am researching it now).
ES7000 has pretty extensive overrides:
ACPI: INT_SRC_OVR (bus 0 bus_irq 1 global_irq 12 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 15 global_irq 13 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 4 global_irq 14 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 14 global_irq 15 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 6 global_irq 16 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 7 global_irq 17 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 8 global_irq 18 low edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 12 global_irq 19 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 20 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 23 high level)
The only device that fails to set the line correctly with the code as it is now is the IDE (line 4) and works fine with the suggested line. In the case below, the clock gets mapped to the cascade...
I will do more testing on the generic Xeon then, it should be a good solution there. Maybe, this is something that the hook might be needed for es7000.
Thanks,
--Natalie
-----Original Message-----
From: Andrew Morton [mailto:[email protected]]
Sent: Tuesday, April 06, 2004 12:30 AM
To: Protasevich, Natalie
Cc: [email protected]; [email protected]
Subject: Re: [PATCH] 2.6.5- es7000 subarch update
"Protasevich, Natalie" <[email protected]> wrote:
>
> ES7000 was failing to boot since first couple revisions of 2.6. The patch fixes the boot problem.
> In the patch, some maintenance and cleanup was done for es7000 subarch, such as APIC destinations were corrected, missing initialization for the variable was added, extraneous file was removed, etc.
> The patch was created against 2.6.5, compiled cleanly, and tested on the ES7000 system.
This patch appears to cause the local-apic based time interrupts to run too
fast on my old 4-way Xeon server. A `sleep 10' takes about five seconds.
Diffing the dmesg output shows the changes which your patch caused:
--- without 2004-04-05 22:18:41.061198208 -0700
+++ with 2004-04-05 22:17:15.000000000 -0700
@@ -1,4 +1,4 @@
- IO-APIC (apicid-pin) 4-0, 4-16, 4-17, 4-18, 4-19, 4-20, 4-21, 4-22, 4-23, 4-24, 4-25, 4-26, 4-27, 4-28, 4-29, 4-30, 4-31, 4-32, 4-33, 4-34, 4-35, 4-36, 4-37, 4-38, 4-39, 4-40, 4-41, 4-42, 4-43, 4-44, 4-45, 4-46, 4-47, 4-48, 4-49, 4-50, 4-51, 4-52, 4-53, 4-54, 4-55, 4-56, 4-57, 4-58, 4-59, 4-60, 4-61, 4-62, 4-63 not connected.
-..TIMER: vector=0x31 pin1=2 pin2=-1
+ IO-APIC (apicid-pin) 4-16, 4-17, 4-18, 4-19, 4-20, 4-21, 4-22, 4-23, 4-24, 4-25, 4-26, 4-27, 4-28, 4-29, 4-30, 4-31, 4-32, 4-33, 4-34, 4-35, 4-36, 4-37, 4-38, 4-39, 4-40, 4-41, 4-42, 4-43, 4-44, 4-45, 4-46, 4-47, 4-48, 4-49, 4-50, 4-51, 4-52, 4-53, 4-54, 4-55, 4-56, 4-57, 4-58, 4-59, 4-60, 4-61, 4-62, 4-63 not connected.
+..TIMER: vector=0x31 pin1=0 pin2=-1
-number of MP IRQ sources: 15.
+number of MP IRQ sources: 16.
.... IRQ redirection table:
NR Log Phy Mask Trig IRR Pol Stat Dest Deli Vect:
- 00 000 00 1 0 0 0 0 0 0 00
+ 00 00F 0F 0 0 0 0 0 1 1 31
01 00F 0F 0 0 0 0 0 1 1 39
02 00F 0F 0 0 0 0 0 1 1 31
03 00F 0F 0 0 0 0 0 1 1 41
@@ -368,7 +368,7 @@
3e 000 00 1 0 0 0 0 0 0 00
3f 000 00 1 0 0 0 0 0 0 00
IRQ to pin mappings:
-IRQ0 -> 0:2
+IRQ0 -> 0:0-> 0:2
IRQ1 -> 0:1
IRQ3 -> 0:3
IRQ4 -> 0:4
Hi Andrew, Martin,
I was able to reproduce the timer problem on a Xeon generic box, where my patch caused the same thing: a duplicate entry for the timer was programmed in the IO-APIC. The fact that it worked for es7000 was unfortunately just a coincidence :( I investigated it more and found that the legacy irq overwrite code had a bit of deficiency, which was revealed with our overwrite case. Please consider the following problem.
mp_config_acpi_legacy_irqs() constructs legacy irq table (irq<16), namely the default mp_irqs[] array which initially has one to one correspondence of a pin and a bus irq (pin=bus irq, for general case).
ACPI: IOAPIC (id[0x81] address[0xfec00000] global_irq_base[0x0])
IOAPIC[0]: Assigned apic_id 129
IOAPIC[0]: apic_id 129, version 4, address 0xfec00000, IRQ 0-23
Int: entry 0, type 0, pol 0, trig 0, bus 0, irq 0, 129-0
Int: entry 1, type 0, pol 0, trig 0, bus 0, irq 1, 129-1
Int: entry 3, type 0, pol 0, trig 0, bus 0, irq 3, 129-3
Int: entry 4, type 0, pol 0, trig 0, bus 0, irq 4, 129-4
Int: entry 5, type 0, pol 0, trig 0, bus 0, irq 5, 129-5
Int: entry 6, type 0, pol 0, trig 0, bus 0, irq 6, 129-6
Int: entry 7, type 0, pol 0, trig 0, bus 0, irq 7, 129-7
Int: entry 8, type 0, pol 0, trig 0, bus 0, irq 8, 129-8
Int: entry 9, type 0, pol 0, trig 0, bus 0, irq 9, 129-9
Int: entry 10, type 0, pol 0, trig 0, bus 0, irq 10, 129-10
Int: entry 11, type 0, pol 0, trig 0, bus 0, irq 11, 129-11
Int: entry 12, type 0, pol 0, trig 0, bus 0, irq 12, 129-12
Int: entry 13, type 0, pol 0, trig 0, bus 0, irq 13, 129-13
Int: entry 14, type 0, pol 0, trig 0, bus 0, irq 14, 129-14
Int: entry 15, type 0, pol 0, trig 0, bus 0, irq 15, 129-15
The override function mp_override_legacy_irq() parses ACPI INT_SRC_OVR entries, and each override entry results in a change of corresponding mp_irqs[] array element.
The problem happens when the dest_irq (pin) numerically smaller than the bus irq and is assigned higher array element (we index of the srcbus_irq) that the original bus irq with default correspondence. The mp_irqs[] element with new bus irq gets changed allright, but there is still the original (unmodified) element with default one-to-one assignment above it in the array:
...
Int: mp_irq 7, type 0, pol 3, trig 1, bus 0, irq 8, 129-18
Int: mp_irq 8, type 0, pol 1, trig 3, bus 0, irq 9, 129-23
Int: mp_irq 9, type 0, pol 0, trig 0, bus 0, irq 10, 129-10
Int: mp_irq 10, type 0, pol 0, trig 0, bus 0, irq 11, 129-11
Int: mp_irq 11, type 0, pol 1, trig 1, bus 0, irq 12, 129-19
Int: mp_irq 12, type 0, pol 0, trig 0, bus 0, irq 13, 129-13 <-- original entry for bus irq (and pin) 13
Int: mp_irq 13, type 0, pol 1, trig 1, bus 0, irq 14, 129-15
Int: mp_irq 14, type 0, pol 1, trig 1, bus 0, irq 15, 129-13 <-- modified entry for bus irq 15, with override for pin 13
...
Since it is a valid entry, find_irq_entry() in setup_IO_APIC_irqs() searches the mp_irqs[] by the pin number and runs into this element first. It uses it to program the pin and never gets to the element down below that contains modified entry with a correct overwrite in it.
I was able to get rid of this problem on the ES7000 with the following code:
for (i = 0; i < mp_irq_entries; i++) {
if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
&& (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
mp_irqs[i] = intsrc;
+ if (intsrc.mpc_srcbusirq > pin) {
+ int j;
+ for (j = 0; j < i; j++)
+ if (mp_irqs[j].mpc_dstirq == intsrc.mpc_dstirq)
+ mp_irqs[j].mpc_irqtype = -1;
+ }
found = 1;
break;
}
}
This resulted in the following array changes:
...
Int: mp_irq 10, type 0, pol 0, trig 0, bus 0, irq 11, 129-11
Int: mp_irq 11, type 0, pol 1, trig 1, bus 0, irq 12, 129-19
Int: mp_irq 12, type 255, pol 0, trig 0, bus 0, irq 13, 129-13 <-- invalidated element
Int: mp_irq 13, type 0, pol 1, trig 1, bus 0, irq 14, 129-15
Int: mp_irq 14, type 0, pol 1, trig 1, bus 0, irq 15, 129-13 <-- valid element
...
... and find_irq_entry() skipped the first element of mp_irqs[] with the pin 13 and went on searching until found the correct one.
This code did not affect my generic Xeon system.
Please let me know if it appears a viable solution to you. I can think of a couple other possible ways, this one seems to be the easiest...
I will appreciate any feedback and suggestions.
Thanks,
--Natalie
-----Original Message-----
From: Protasevich, Natalie
Sent: Tuesday, April 06, 2004 9:08 AM
To: 'Andrew Morton'
Cc: [email protected]; [email protected]
Subject: RE: [PATCH] 2.6.5- es7000 subarch update
Hi Andrew,
The only line that is outside the es7000 code and could cause this is:
> --- linux6.5/arch/i386/kernel/mpparse.c 2004-04-04 18:22:39.000000000 -0400
> +++ linux-2.6.5/arch/i386/kernel/mpparse.c 2004-04-05 00:07:13.000000000 -0400
> @@ -969,7 +969,7 @@
> */
> for (i = 0; i < mp_irq_entries; i++) {
> if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
> - && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
> + && (mp_irqs[i].mpc_dstirq == intsrc.mpc_dstirq)) {
> mp_irqs[i] = intsrc;
> found = 1;
(The one that Martin asked me about)
The code is for the legacy/overwrites, and used to have this line (and I am researching it now).
ES7000 has pretty extensive overrides:
ACPI: INT_SRC_OVR (bus 0 bus_irq 1 global_irq 12 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 15 global_irq 13 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 4 global_irq 14 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 14 global_irq 15 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 6 global_irq 16 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 7 global_irq 17 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 8 global_irq 18 low edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 12 global_irq 19 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 20 high edge)
ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 23 high level)
The only device that fails to set the line correctly with the code as it is now is the IDE (line 4) and works fine with the suggested line. In the case below, the clock gets mapped to the cascade...
I will do more testing on the generic Xeon then, it should be a good solution there. Maybe, this is something that the hook might be needed for es7000.
Thanks,
--Natalie
-----Original Message-----
From: Andrew Morton [mailto:[email protected]]
Sent: Tuesday, April 06, 2004 12:30 AM
To: Protasevich, Natalie
Cc: [email protected]; [email protected]
Subject: Re: [PATCH] 2.6.5- es7000 subarch update
"Protasevich, Natalie" <[email protected]> wrote:
>
> ES7000 was failing to boot since first couple revisions of 2.6. The patch fixes the boot problem.
> In the patch, some maintenance and cleanup was done for es7000 subarch, such as APIC destinations were corrected, missing initialization for the variable was added, extraneous file was removed, etc.
> The patch was created against 2.6.5, compiled cleanly, and tested on the ES7000 system.
This patch appears to cause the local-apic based time interrupts to run too
fast on my old 4-way Xeon server. A `sleep 10' takes about five seconds.
Diffing the dmesg output shows the changes which your patch caused:
--- without 2004-04-05 22:18:41.061198208 -0700
+++ with 2004-04-05 22:17:15.000000000 -0700
@@ -1,4 +1,4 @@
- IO-APIC (apicid-pin) 4-0, 4-16, 4-17, 4-18, 4-19, 4-20, 4-21, 4-22, 4-23, 4-24, 4-25, 4-26, 4-27, 4-28, 4-29, 4-30, 4-31, 4-32, 4-33, 4-34, 4-35, 4-36, 4-37, 4-38, 4-39, 4-40, 4-41, 4-42, 4-43, 4-44, 4-45, 4-46, 4-47, 4-48, 4-49, 4-50, 4-51, 4-52, 4-53, 4-54, 4-55, 4-56, 4-57, 4-58, 4-59, 4-60, 4-61, 4-62, 4-63 not connected.
-..TIMER: vector=0x31 pin1=2 pin2=-1
+ IO-APIC (apicid-pin) 4-16, 4-17, 4-18, 4-19, 4-20, 4-21, 4-22, 4-23, 4-24, 4-25, 4-26, 4-27, 4-28, 4-29, 4-30, 4-31, 4-32, 4-33, 4-34, 4-35, 4-36, 4-37, 4-38, 4-39, 4-40, 4-41, 4-42, 4-43, 4-44, 4-45, 4-46, 4-47, 4-48, 4-49, 4-50, 4-51, 4-52, 4-53, 4-54, 4-55, 4-56, 4-57, 4-58, 4-59, 4-60, 4-61, 4-62, 4-63 not connected.
+..TIMER: vector=0x31 pin1=0 pin2=-1
-number of MP IRQ sources: 15.
+number of MP IRQ sources: 16.
.... IRQ redirection table:
NR Log Phy Mask Trig IRR Pol Stat Dest Deli Vect:
- 00 000 00 1 0 0 0 0 0 0 00
+ 00 00F 0F 0 0 0 0 0 1 1 31
01 00F 0F 0 0 0 0 0 1 1 39
02 00F 0F 0 0 0 0 0 1 1 31
03 00F 0F 0 0 0 0 0 1 1 41
@@ -368,7 +368,7 @@
3e 000 00 1 0 0 0 0 0 0 00
3f 000 00 1 0 0 0 0 0 0 00
IRQ to pin mappings:
-IRQ0 -> 0:2
+IRQ0 -> 0:0-> 0:2
IRQ1 -> 0:1
IRQ3 -> 0:3
IRQ4 -> 0:4
On Mon, 12 Apr 2004, Protasevich, Natalie wrote:
> Since it is a valid entry, find_irq_entry() in setup_IO_APIC_irqs() searches the mp_irqs[] by the pin number and runs into this element first. It uses it to program the pin and never gets to the element down below that contains modified entry with a correct overwrite in it.
> I was able to get rid of this problem on the ES7000 with the following code:
>
> for (i = 0; i < mp_irq_entries; i++) {
> if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
> && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
> mp_irqs[i] = intsrc;
> + if (intsrc.mpc_srcbusirq > pin) {
> + int j;
> + for (j = 0; j < i; j++)
> + if (mp_irqs[j].mpc_dstirq == intsrc.mpc_dstirq)
> + mp_irqs[j].mpc_irqtype = -1;
> + }
> found = 1;
> break;
> }
> }
> I will appreciate any feedback and suggestions.
Out of interest, doesn't this have the same effect?
Index: linux-2.6.5-mc3/arch/i386/kernel/mpparse.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.5-mc3/arch/i386/kernel/mpparse.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 mpparse.c
--- linux-2.6.5-mc3/arch/i386/kernel/mpparse.c 9 Apr 2004 17:53:27 -0000 1.1.1.1
+++ linux-2.6.5-mc3/arch/i386/kernel/mpparse.c 12 Apr 2004 17:49:18 -0000
@@ -968,8 +968,7 @@ void __init mp_override_legacy_irq (
* Otherwise create a new entry (e.g. gsi == 2).
*/
for (i = 0; i < mp_irq_entries; i++) {
- if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
- && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
+ if (mp_irqs[i].mpc_dstirq == pin) {
mp_irqs[i] = intsrc;
found = 1;
break;
On Mon, 12 Apr 2004, Zwane Mwaikambo wrote:
> On Mon, 12 Apr 2004, Protasevich, Natalie wrote:
>
> > Since it is a valid entry, find_irq_entry() in setup_IO_APIC_irqs() searches the mp_irqs[] by the pin number and runs into this element first. It uses it to program the pin and never gets to the element down below that contains modified entry with a correct overwrite in it.
> > I was able to get rid of this problem on the ES7000 with the following code:
> >
> > for (i = 0; i < mp_irq_entries; i++) {
> > if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
> > && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
> > mp_irqs[i] = intsrc;
> > + if (intsrc.mpc_srcbusirq > pin) {
> > + int j;
> > + for (j = 0; j < i; j++)
> > + if (mp_irqs[j].mpc_dstirq == intsrc.mpc_dstirq)
> > + mp_irqs[j].mpc_irqtype = -1;
> > + }
> > found = 1;
> > break;
> > }
> > }
> > I will appreciate any feedback and suggestions.
>
> Out of interest, doesn't this have the same effect?
Forgot the bus check;
Index: linux-2.6.5-mc3/arch/i386/kernel/mpparse.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.5-mc3/arch/i386/kernel/mpparse.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 mpparse.c
--- linux-2.6.5-mc3/arch/i386/kernel/mpparse.c 9 Apr 2004 17:53:27 -0000 1.1.1.1
+++ linux-2.6.5-mc3/arch/i386/kernel/mpparse.c 12 Apr 2004 18:31:22 -0000
@@ -968,8 +968,9 @@ void __init mp_override_legacy_irq (
* Otherwise create a new entry (e.g. gsi == 2).
*/
for (i = 0; i < mp_irq_entries; i++) {
- if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
- && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
+ if ((mp_irqs[i].srcbus == MP_ISA_BUS) &&
+ mp_irqs[i].mpc_dstirq == pin) {
+
mp_irqs[i] = intsrc;
found = 1;
break;
> > Since it is a valid entry, find_irq_entry() in setup_IO_APIC_irqs() searches the mp_irqs[] by the pin number and runs into this element first. It uses it to program the pin and never gets to the element down below that contains modified entry with a correct overwrite in it.
> > I was able to get rid of this problem on the ES7000 with the following code:
> >
> > for (i = 0; i < mp_irq_entries; i++) {
> > if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
> > && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
> > mp_irqs[i] = intsrc;
> > + if (intsrc.mpc_srcbusirq > pin) {
> > + int j;
> > + for (j = 0; j < i; j++)
> > + if (mp_irqs[j].mpc_dstirq == intsrc.mpc_dstirq)
> > + mp_irqs[j].mpc_irqtype = -1;
> > + }
> > found = 1;
> > break;
> > }
> > }
> > I will appreciate any feedback and suggestions.
>
> Out of interest, doesn't this have the same effect?
Hi Zwane,
It is actually close to what I had in my initial patch when I posted it (and got in trouble with the timer IRQ0):
...
intsrc.mpc_dstirq = pin;
...
- && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
+ && (mp_irqs[i].mpc_dstirq == intsrc.mpc_dstirq)) {
This code indexes mp_irqs[] by the pin, but has a similar "miscounting" problem and adds a second element with the same bus irq. I think this code could work either way being indexed by the pin or by the bus irq, but it has to be fixed in either case. (As I understand, with srcbusirq it doesn't work as is for much fewer people than with dstirq :)
Regards,
--Natalie
>Forgot the bus check;
>Index: linux-2.6.5-mc3/arch/i386/kernel/mpparse.c
>===================================================================
>RCS file: /home/cvsroot/linux-2.6.5-mc3/arch/i386/kernel/mpparse.c,v
>retrieving revision 1.1.1.1
>diff -u -p -B -r1.1.1.1 mpparse.c
>--- linux-2.6.5-mc3/arch/i386/kernel/mpparse.c 9 Apr 2004 17:53:27 -0000 1.1.1.1
>+++ linux-2.6.5-mc3/arch/i386/kernel/mpparse.c 12 Apr 2004 18:31:22 -0000
>@@ -968,8 +968,9 @@ void __init mp_override_legacy_irq (
> * Otherwise create a new entry (e.g. gsi == 2).
> */
> for (i = 0; i < mp_irq_entries; i++) {
>- if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
>- && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
>+ if ((mp_irqs[i].srcbus == MP_ISA_BUS) &&
>+ mp_irqs[i].mpc_dstirq == pin) {
>+
> mp_irqs[i] = intsrc;
> found = 1;
> break;
On Mon, 12 Apr 2004, Protasevich, Natalie wrote:
> It is actually close to what I had in my initial patch when I posted it
> (and got in trouble with the timer IRQ0):
>
> ...
> intsrc.mpc_dstirq = pin;
> ...
> - && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
> + && (mp_irqs[i].mpc_dstirq == intsrc.mpc_dstirq)) {
>
> This code indexes mp_irqs[] by the pin, but has a similar "miscounting"
> problem and adds a second element with the same bus irq. I think this
> code could work either way being indexed by the pin or by the bus irq, but it
> has to be fixed in either case. (As I understand, with srcbusirq it
> doesn't work as is for much fewer people than with dstirq :)
Indeed it does, i'm beginning to wonder if the problem is in the mpparse
code for boxes such as Andrew's which get broken. Because really if we
overwrite the previous entry then this is acting very much like what the
function says it does. The only thing which i think makes things look
awkward with your patch is;
for (i = 0; i < mp_irq_entries; i++) {
if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
&& (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
mp_irqs[i] = intsrc;
+ if (intsrc.mpc_srcbusirq > pin) { <=======
+ int j;
+ for (j = 0; j < i; j++)
+ if (mp_irqs[j].mpc_dstirq == intsrc.mpc_dstirq)
+ mp_irqs[j].mpc_irqtype = -1;
+ }
found = 1;
break;
}
}
That just happens to get boxes like Andrew's out of the path because his
only gets broken with the irq0 override.
Natalie,
> ACPI: INT_SRC_OVR (bus 0 bus_irq 15 global_irq 13 high edge)
I agree with your description of why the 15->13 override fails.
> ACPI: INT_SRC_OVR (bus 0 bus_irq 4 global_irq 14 high edge)
> ACPI: INT_SRC_OVR (bus 0 bus_irq 14 global_irq 15 high edge)
These two also create an actual and potential duplicate mp_irqs[] entry.
> ACPI: INT_SRC_OVR (bus 0 bus_irq 6 global_irq 16 high edge)
> ACPI: INT_SRC_OVR (bus 0 bus_irq 7 global_irq 17 high edge)
> ACPI: INT_SRC_OVR (bus 0 bus_irq 8 global_irq 18 low edge)
> ACPI: INT_SRC_OVR (bus 0 bus_irq 12 global_irq 19 high edge)
> ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 20 high edge)
> ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 23 high level)
The ES7000 sure is a great tester of the over-ride code!
But I don't like the proposal to selectively invalidate
existing mp_irqs[] entries.
I think the proper fix is to parse the over-ride entries before
filling in the (remaining) identity mappings. This also gets
rid of the special case for IRQ2, which would be handled exactly
like the mappings to < 16 on the ES7000 above.
Perhaps I should send you a patch you can test on the ES7000,
since I don't have one of those?
In any case, I'd prefer that proposed patches to this code come
through me, since it is ACPI specific.
thanks,
-Len
Hi Len,
>The ES7000 sure is a great tester of the over-ride code!
Yes, indeed...
>But I don't like the proposal to selectively invalidate
>existing mp_irqs[] entries.
>I think the proper fix is to parse the over-ride entries before
>filling in the (remaining) identity mappings. This also gets
>rid of the special case for IRQ2, which would be handled exactly
>like the mappings to < 16 on the ES7000 above.
This is great idea! I know that the patch I suggested was "patching the symptom", not really addressing the cause. I didn't really expect it to be used, mostly just to illustrate the problem.
>Perhaps I should send you a patch you can test on the ES7000,
>since I don't have one of those?
Yes, please send me the patch and I will test it immediately.
>In any case, I'd prefer that proposed patches to this code come
>through me, since it is ACPI specific.
Actually, I was just looking for you, and this is my email to one of my friends, who I think won't get it till tomorrow: "The alternative patch I came up with I think is correct, but looks really ugly. There are like 3 or 4 places where it can be done, maybe someone could give me a hand to decide where to put the fix. Who is actually in charge of the mpparse and things like that I could cc to?"
:)
Thanks,
--Natalie
On Mon, 2004-04-12 at 23:44, Protasevich, Natalie wrote:
> >Perhaps I should send you a patch you can test on the ES7000,
> >since I don't have one of those?
>
> Yes, please send me the patch and I will test it immediately.
Natalie,
This 2.6.5 patch adds the interrupt source overrides
to mp_irqs[] before adding identity mappings for
the remaining legacy IRQS. So it handles both
the classic timer-override and the ES7000 scenario(s)
in a consistent manner.
It works on my vanilla Xeon boxes,
please test it on your ES7000. I'd love to see
the full dmesg from an ES7000 if you can grow your
CONFIG_LOG_BUF_SHIFT to capture it all.
A note about (ISA) interrupt-source-overrides and Linux.
As Maciej explained to me, the Linux convention is that
IRQs < 16 retain the IRQ# they had when in PIC mode.
This means that the classic timer override from IRQ0
to apic pin 2, is called IRQ0, not IRQ2. And it
prevents both a subsequent identity mapping on
IRQ0 or on IRQ2.
ES7000 mappings below 16 will work the same way.
IRQ15 override to pin 13 is still called IRQ15,
and it prevents a subsequent identity mapping on
either IRQ15 or IRQ13. (though conceivably,
another override could still map IRQ13 to
yet a different pin.)
This also applies to ES7000 mappings to pins > 15.
IRQ6 mapped to apic pin 16 will still be IRQ6 --
not IRQ16. Note that a request_irq(16) will not
work because IRQ6 has absconded with the pin 16
and so there is no pin associated with IRQ16.
This is all just a Linux convention,
and for the ACPI SCI I do something different.
Say the ACPI FADT tells us the SCI_INT is IRQ 9.
We use this to recognize an override from IRQ9
to say, apic pin 22. We install this as IRQ22,
not as IRQ9. This allows a subsequent identity
mapping at IRQ9 to still be used, and also allows
another device to request_irq(22) and share the
interrupt with ACPI. Both of these happen in
practice -- simultaneously.
I expect that if the Linux IRQ convention was working
on the ES7000 before, this patch will maintain
compatibility with that. However, if things were
not working as expected before, I just wanted to
point out that another convention is possible --
particularly with a sub-architecture.
cheers,
-Len
ps. note that this patch enabled some debug prink's
and that it will need x86_64 update too.
===== arch/i386/kernel/mpparse.c 1.69 vs edited =====
--- 1.69/arch/i386/kernel/mpparse.c Mon Mar 22 16:00:03 2004
+++ edited/arch/i386/kernel/mpparse.c Tue Apr 13 23:30:27 2004
@@ -929,8 +929,6 @@
u32 gsi)
{
struct mpc_config_intsrc intsrc;
- int i = 0;
- int found = 0;
int ioapic = -1;
int pin = -1;
@@ -958,28 +956,14 @@
intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
intsrc.mpc_dstirq = pin; /* INTIN# */
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
+ printk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
- /*
- * If an existing [IOAPIC.PIN -> IRQ] routing entry exists we override
it.
- * Otherwise create a new entry (e.g. gsi == 2).
- */
- for (i = 0; i < mp_irq_entries; i++) {
- if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
- && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
- mp_irqs[i] = intsrc;
- found = 1;
- break;
- }
- }
- if (!found) {
- mp_irqs[mp_irq_entries] = intsrc;
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
- }
+ mp_irqs[mp_irq_entries] = intsrc;
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
return;
}
@@ -1010,19 +994,26 @@
intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
/*
- * Use the default configuration for the IRQs 0-15. These may be
+ * Use the default configuration for the IRQs 0-15. Unless
* overriden by (MADT) interrupt source override entries.
*/
for (i = 0; i < 16; i++) {
+ int idx;
+
+ for (idx = 0; idx < mp_irq_entries; idx++)
+ if (mp_irqs[idx].mpc_srcbus == MP_ISA_BUS &&
+ (mp_irqs[idx].mpc_srcbusirq == i ||
+ mp_irqs[idx].mpc_dstirq == i))
+ break;
- if (i == 2)
- continue; /* Don't connect IRQ2 */
+ if (idx != mp_irq_entries)
+ continue; /* IRQ already used */
intsrc.mpc_irqtype = mp_INT;
intsrc.mpc_srcbusirq = i; /* Identity mapped */
intsrc.mpc_dstirq = i;
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+ printk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
"%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
===== arch/i386/kernel/acpi/boot.c 1.57 vs edited =====
--- 1.57/arch/i386/kernel/acpi/boot.c Tue Mar 30 17:05:19 2004
+++ edited/arch/i386/kernel/acpi/boot.c Tue Apr 13 22:40:47 2004
@@ -653,9 +653,6 @@
return count;
}
- /* Build a default routing table for legacy (ISA) interrupts. */
- mp_config_acpi_legacy_irqs();
-
count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR,
acpi_parse_int_src_ovr, NR_IRQ_VECTORS);
if (count < 0) {
printk(KERN_ERR PREFIX "Error parsing interrupt source overrides
entry\n");
@@ -669,6 +666,9 @@
*/
if (!acpi_sci_override_gsi)
acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
+
+ /* Fill in identity legacy mapings where no override */
+ mp_config_acpi_legacy_irqs();
count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src,
NR_IRQ_VECTORS);
if (count < 0) {
Hi Len,
>please test it on your ES7000. I'd love to see
>the full dmesg from an ES7000 if you can grow your
>CONFIG_LOG_BUF_SHIFT to capture it all.
The patch worked great, my system came up beautifully. Len, it was so well done, and it was fast, too - thanks! I forgot to increase this parameter, but still collected a full trace with some extra IO-APIC snapshots. For now, I am attaching a serial console trace, and later will provide you full dmesg.
>I expect that if the Linux IRQ convention was working
>on the ES7000 before, this patch will maintain
>compatibility with that. However, if things were
>not working as expected before, I just wanted to
>point out that another convention is possible --
>particularly with a sub-architecture.
Yes, the IRQ schema you described was used on the ES7000 previously on 2.6. For example, the way interrupts looked with the old interrupt code (with irq_balance off, and my tweak for the IDE):
0: 1566205 0 0 0 IO-APIC-edge timer
1: 12 0 0 0 IO-APIC-edge i8042
2: 0 0 0 0 XT-PIC cascade
4: 20 0 0 0 IO-APIC-edge serial
8: 3 0 0 0 IO-APIC-edge rtc
12: 399 0 0 0 IO-APIC-edge i8042
15: 37 0 0 0 IO-APIC-edge ide1
17: 8083 0 0 0 IO-APIC-level megaraid
20: 20141 0 0 0 IO-APIC-level eth0
23: 0 0 0 0 IO-APIC-level acpi
NMI: 0 0 0 0
LOC: 1562738 1562605 1562608 1562607
ERR: 0
MIS: 0
With your patch, it looks like this:
0: 21266 79032 0 0 IO-APIC-edge timer
1: 14 0 0 0 IO-APIC-edge i8042
2: 0 0 0 0 XT-PIC cascade
4: 19 0 0 0 IO-APIC-edge serial
8: 2 0 0 0 IO-APIC-edge rtc
12: 503 0 0 0 IO-APIC-edge i8042
15: 39 0 0 1 IO-APIC-edge ide1
17: 1762 0 0 0 IO-APIC-level megaraid
20: 868 0 0 0 IO-APIC-level eth0
23: 0 0 0 0 IO-APIC-level acpi
NMI: 0 0 0 0
LOC: 94472 94484 94483 94482
ERR: 0
MIS: 0
There were other strange schemas that I've used before in 2.4, mostly due to ongoing inconsistencies in the BIOS and ACPI. Those were eventually cleaned up and now I guess a general convention can be applied, even though BIOS-clean IRQ schema still looks pretty exotic, as you noticed... With this patch, it looks like everything's taken care off.
Let me know if you need particular debug done with this patch. I'll send you a dmesg you've requested with increased CONFIG_LOG_BUF_SHIFT shortly (as soon as the system becomes available today).
Thanks,
--Natalie