2009-12-10 09:11:05

by Jens Axboe

[permalink] [raw]
Subject: current -git fails to boot on nehalem-ex

Hi,

As written in the subject, it just hard hangs before writing anything on
the console. With earlyprintk, I captured the failure, see below.

I'll try and bisect this, but it takes some time (since the bios and
post process takes forever). I just updated the firmware on the box as
well, but it did boot 2.6.32 (and RHEL5 boots fine too). Of course that
doesn't rule out a BIOS bug.

kernel /vmlinuz-2.6 root=/dev/sda3 ro console=ttyS0,115200 console=tty0 earlyprintk=serial,ttyS0,115200
[Linux-bzImage, setup=0x3200, size=0x298f90]


[ 0.000000] Linux version 2.6.32 (axboe@nehalem) (gcc version 4.3.2 (Debian 4.3.2-1.1) ) #35 SMP Thu Dec 10 09:47:51 CET 2009 *
[ 0.000000] Command line: root=/dev/sda3 ro console=ttyS0,115200 console=tty0 earlyprintk=serial,ttyS0,115200 *
[ 0.000000] BIOS-provided physical RAM map: *
[ 0.000000] BIOS-e820: 0000000000000000 - 000000000009a400 (usable) *
[ 0.000000] BIOS-e820: 000000000009a400 - 00000000000a0000 (reserved) *
[ 0.000000] BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)***
[ 0.000000] BIOS-e820: 0000000000100000 - 0000000078c75000 (usable)
[ 0.000000] BIOS-e820: 0000000078c75000 - 0000000078e89000 (ACPI NVS)
[ 0.000000] BIOS-e820: 0000000078e89000 - 000000007924e000 (ACPI data)
[ 0.000000] BIOS-e820: 000000007924e000 - 00000000792c2000 (reserved)
[ 0.000000] BIOS-e820: 00000000792c2000 - 00000000792d2000 (ACPI data)
[ 0.000000] BIOS-e820: 00000000792d2000 - 00000000792e7000 (reserved)
[ 0.000000] BIOS-e820: 00000000792e7000 - 0000000079301000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079301000 - 0000000079303000 (reserved)
[ 0.000000] BIOS-e820: 0000000079303000 - 0000000079305000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079305000 - 0000000079310000 (reserved)
[ 0.000000] BIOS-e820: 0000000079310000 - 0000000079314000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079314000 - 0000000079319000 (reserved)
[ 0.000000] BIOS-e820: 0000000079319000 - 0000000079336000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079336000 - 0000000079358000 (reserved)
[ 0.000000] BIOS-e820: 0000000079358000 - 0000000079388000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079388000 - 00000000793c9000 (reserved)
[ 0.000000] BIOS-e820: 00000000793c9000 - 000000007968f000 (ACPI data)
[ 0.000000] BIOS-e820: 000000007968f000 - 00000000796bb000 (reserved)
[ 0.000000] BIOS-e820: 00000000796bb000 - 00000000799d8000 (ACPI data)
[ 0.000000] BIOS-e820: 00000000799d8000 - 0000000079bd8000 (ACPI NVS)
[ 0.000000] BIOS-e820: 0000000079bd8000 - 0000000079dc7000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079dc7000 - 0000000079dcb000 (reserved)
[ 0.000000] BIOS-e820: 0000000079dcb000 - 0000000079e1c000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079e1c000 - 0000000079e87000 (reserved)
[ 0.000000] BIOS-e820: 0000000079e87000 - 000000007bd5f000 (ACPI data)
[ 0.000000] BIOS-e820: 000000007bd5f000 - 000000007be4f000 (reserved)
[ 0.000000] BIOS-e820: 000000007be4f000 - 000000007bf87000 (ACPI data)
[ 0.000000] BIOS-e820: 000000007bf87000 - 000000007bfcf000 (ACPI NVS)
[ 0.000000] BIOS-e820: 000000007bfcf000 - 000000007bfff000 (ACPI data)
[ 0.000000] BIOS-e820: 000000007bfff000 - 0000000090000000 (reserved)
[ 0.000000] BIOS-e820: 00000000fc000000 - 00000000fd000000 (reserved)
[ 0.000000] BIOS-e820: 00000000fed1c000 - 00000000fed20000 (reserved)
[ 0.000000] BIOS-e820: 00000000ff000000 - 0000000100000000 (reserved)
[ 0.000000] BIOS-e820: 0000000100000000 - 0000001080000000 (usable)
[ 0.000000] bootconsole [earlyser0] enabled
[ 0.000000] NX (Execute Disable) protection: active
[ 0.000000] DMI 2.5 present.
[ 0.000000] last_pfn = 0x1080000 max_arch_pfn = 0x400000000
[ 0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[ 0.000000] last_pfn = 0x78c75 max_arch_pfn = 0x400000000
[ 0.000000] init_memory_mapping: 0000000000000000-0000000078c75000
[ 0.000000] init_memory_mapping: 0000000100000000-0000001080000000
[ 0.000000] ACPI: RSDP 00000000000f0410 00024 (v02 QUANTA)
[ 0.000000] ACPI: XSDT 000000007bffe120 000BC (v01 QUANTA QSSC-S4R 00000000 01000013)
[ 0.000000] ACPI: FACP 000000007bffd000 000F4 (v04 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: DSDT 000000007bfe3000 19BAD (v02 QUANTA QSSC-S4R 00000003 MSFT 0100000D)
[ 0.000000] ACPI: FACS 000000007bf87000 00040
[ 0.000000] ACPI: APIC 000000007bfe2000 003E4 (v02 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: MSCT 000000007bfe1000 00090 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] ACPI: MCFG 000000007bfe0000 0003C (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] ACPI: HPET 000000007bfdf000 00038 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] ACPI: SLIT 000000007bfde000 0003C (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] ACPI: SRAT 000000007bfdd000 00930 (v02 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] ACPI: SPCR 000000007bfdc000 00050 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: WDDT 000000007bfdb000 00040 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: SSDT 000000007bf4a000 3CFA4 (v02 QUANTA QSSC-S4R 00004000 INTL 20061109)
[ 0.000000] ACPI: SSDT 000000007bfda000 00174 (v02 QUANTA QSSC-S4R 00004000 INTL 20061109)
[ 0.000000] ACPI: PMCT 000000007bfd9000 00064 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: MIGT 000000007bfd8000 00040 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: TCPA 000000007bfd5000 00032 (v00 QUANTA QSSC-S4R 00000000 00000000)
[ 0.000000] ACPI: HEST 000000007bfd4000 005D0 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
[ 0.000000] ACPI: BERT 000000007bfd3000 00030 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
[ 0.000000] ACPI: ERST 000000007bfd2000 00230 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
[ 0.000000] ACPI: EINJ 000000007bfd1000 00130 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
[ 0.000000] ACPI: DMAR 000000007bfd0000 00350 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] found SMP MP-table at [ffff8800000fdd80] fdd80
[ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 0-fff BIOS data page
[ 0.000000]
[ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.32 #35
[ 0.000000] Call Trace:
[ 0.000000] [<ffffffff813c9eeb>] panic+0xa0/0x16a
[ 0.000000] [<ffffffff8167d280>] ? reserve_early_overlap_ok+0x2e/0x39
[ 0.000000] [<ffffffff8167d234>] ? drop_overlaps_that_are_ok+0x101/0x11f
[ 0.000000] [<ffffffff8167d133>] drop_overlaps_that_are_ok+0x0/0x11f
[ 0.000000] [<ffffffff8167d2c6>] reserve_early+0x3b/0x3d
[ 0.000000] [<ffffffff81683248>] smp_scan_config+0xe7/0x10e
[ 0.000000] [<ffffffff816832a9>] default_find_smp_config+0x3a/0x60
[ 0.000000] [<ffffffff8167b0be>] setup_arch+0x6ae/0xa88
[ 0.000000] [<ffffffff816789ff>] start_kernel+0x82/0x412
[ 0.000000] [<ffffffff81678289>] x86_64_start_reservations+0x99/0xb9
[ 0.000000] [<ffffffff81678389>] x86_64_start_kernel+0xe0/0xf2

--
Jens Axboe


2009-12-10 10:39:46

by Jens Axboe

[permalink] [raw]
Subject: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

On Thu, Dec 10 2009, Jens Axboe wrote:
> Hi,
>
> As written in the subject, it just hard hangs before writing anything on
> the console. With earlyprintk, I captured the failure, see below.
>
> I'll try and bisect this, but it takes some time (since the bios and
> post process takes forever). I just updated the firmware on the box as
> well, but it did boot 2.6.32 (and RHEL5 boots fine too). Of course that
> doesn't rule out a BIOS bug.

Results are persistent, git bisect points to:

commit b24c2a925a9837cccf54d50aeac22ba0cbc15455
Author: Yinghai Lu <[email protected]>
Date: Tue Nov 24 02:48:18 2009 -0800

x86: Move find_smp_config() earlier and avoid bootmem usage

which appears consistent with the panic(). Reverting that does indeed
make current -git boot properly.

--
Jens Axboe

2009-12-10 16:07:00

by Ingo Molnar

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)


* Jens Axboe <[email protected]> wrote:

> On Thu, Dec 10 2009, Jens Axboe wrote:
> > Hi,
> >
> > As written in the subject, it just hard hangs before writing anything on
> > the console. With earlyprintk, I captured the failure, see below.
> >
> > I'll try and bisect this, but it takes some time (since the bios and
> > post process takes forever). I just updated the firmware on the box as
> > well, but it did boot 2.6.32 (and RHEL5 boots fine too). Of course that
> > doesn't rule out a BIOS bug.
>
> Results are persistent, git bisect points to:
>
> commit b24c2a925a9837cccf54d50aeac22ba0cbc15455
> Author: Yinghai Lu <[email protected]>
> Date: Tue Nov 24 02:48:18 2009 -0800
>
> x86: Move find_smp_config() earlier and avoid bootmem usage
>
> which appears consistent with the panic(). Reverting that does indeed
> make current -git boot properly.

Thanks. Yinghai, do you have any ideas, or should we revert it?

Ingo

2009-12-10 17:27:23

by Yinghai Lu

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)





On Dec 10, 2009, at 8:06 AM, Ingo Molnar <[email protected]> wrote:

>
> * Jens Axboe <[email protected]> wrote:
>
>> On Thu, Dec 10 2009, Jens Axboe wrote:
>>> Hi,
>>>
>>> As written in the subject, it just hard hangs before writing
>>> anything on
>>> the console. With earlyprintk, I captured the failure, see below.
>>>
>>> I'll try and bisect this, but it takes some time (since the bios and
>>> post process takes forever). I just updated the firmware on the
>>> box as
>>> well, but it did boot 2.6.32 (and RHEL5 boots fine too). Of course
>>> that
>>> doesn't rule out a BIOS bug.
>>
>> Results are persistent, git bisect points to:
>>
>> commit b24c2a925a9837cccf54d50aeac22ba0cbc15455
>> Author: Yinghai Lu <[email protected]>
>> Date: Tue Nov 24 02:48:18 2009 -0800
>>
>> x86: Move find_smp_config() earlier and avoid bootmem usage
>>
>> which appears consistent with the panic(). Reverting that does indeed
>> make current -git boot properly.
>
> Thanks. Yinghai, do you have any ideas, or should we revert it?
>
Let find the root cause

Jen

Can you boot with earlyconsole or earlyprintk and debug?

YH

2009-12-10 18:07:23

by Jens Axboe

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

On Thu, Dec 10 2009, Yinghai wrote:
>
>
>
>
> On Dec 10, 2009, at 8:06 AM, Ingo Molnar <[email protected]> wrote:
>
>>
>> * Jens Axboe <[email protected]> wrote:
>>
>>> On Thu, Dec 10 2009, Jens Axboe wrote:
>>>> Hi,
>>>>
>>>> As written in the subject, it just hard hangs before writing
>>>> anything on
>>>> the console. With earlyprintk, I captured the failure, see below.
>>>>
>>>> I'll try and bisect this, but it takes some time (since the bios and
>>>> post process takes forever). I just updated the firmware on the
>>>> box as
>>>> well, but it did boot 2.6.32 (and RHEL5 boots fine too). Of course
>>>> that
>>>> doesn't rule out a BIOS bug.
>>>
>>> Results are persistent, git bisect points to:
>>>
>>> commit b24c2a925a9837cccf54d50aeac22ba0cbc15455
>>> Author: Yinghai Lu <[email protected]>
>>> Date: Tue Nov 24 02:48:18 2009 -0800
>>>
>>> x86: Move find_smp_config() earlier and avoid bootmem usage
>>>
>>> which appears consistent with the panic(). Reverting that does indeed
>>> make current -git boot properly.
>>
>> Thanks. Yinghai, do you have any ideas, or should we revert it?
>>
> Let find the root cause
>
> Jen

Jens :-)

> Can you boot with earlyconsole or earlyprintk and debug?

The original report already included oops output from earlyprintk, it's
below as well.

kernel /vmlinuz-2.6 root=/dev/sda3 ro console=ttyS0,115200 console=tty0 earlyp
rintk=serial,ttyS0,115200
[Linux-bzImage, setup=0x3200, size=0x298f90]


[ 0.000000] Linux version 2.6.32 (axboe@nehalem) (gcc version 4.3.2 (Debian 4.3.2-1.1) ) #35 SMP Thu Dec 10 09:47:51 CET 2009 *
[ 0.000000] Command line: root=/dev/sda3 ro console=ttyS0,115200 console=tty0 earlyprintk=serial,ttyS0,115200 *
[ 0.000000] BIOS-provided physical RAM map: *
[ 0.000000] BIOS-e820: 0000000000000000 - 000000000009a400 (usable) *
[ 0.000000] BIOS-e820: 000000000009a400 - 00000000000a0000 (reserved) *
[ 0.000000] BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)***
[ 0.000000] BIOS-e820: 0000000000100000 - 0000000078c75000 (usable)
[ 0.000000] BIOS-e820: 0000000078c75000 - 0000000078e89000 (ACPI NVS)
[ 0.000000] BIOS-e820: 0000000078e89000 - 000000007924e000 (ACPI data)
[ 0.000000] BIOS-e820: 000000007924e000 - 00000000792c2000 (reserved)
[ 0.000000] BIOS-e820: 00000000792c2000 - 00000000792d2000 (ACPI data)
[ 0.000000] BIOS-e820: 00000000792d2000 - 00000000792e7000 (reserved)
[ 0.000000] BIOS-e820: 00000000792e7000 - 0000000079301000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079301000 - 0000000079303000 (reserved)
[ 0.000000] BIOS-e820: 0000000079303000 - 0000000079305000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079305000 - 0000000079310000 (reserved)
[ 0.000000] BIOS-e820: 0000000079310000 - 0000000079314000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079314000 - 0000000079319000 (reserved)
[ 0.000000] BIOS-e820: 0000000079319000 - 0000000079336000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079336000 - 0000000079358000 (reserved)
[ 0.000000] BIOS-e820: 0000000079358000 - 0000000079388000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079388000 - 00000000793c9000 (reserved)
[ 0.000000] BIOS-e820: 00000000793c9000 - 000000007968f000 (ACPI data)
[ 0.000000] BIOS-e820: 000000007968f000 - 00000000796bb000 (reserved)
[ 0.000000] BIOS-e820: 00000000796bb000 - 00000000799d8000 (ACPI data)
[ 0.000000] BIOS-e820: 00000000799d8000 - 0000000079bd8000 (ACPI NVS)
[ 0.000000] BIOS-e820: 0000000079bd8000 - 0000000079dc7000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079dc7000 - 0000000079dcb000 (reserved)
[ 0.000000] BIOS-e820: 0000000079dcb000 - 0000000079e1c000 (ACPI data)
[ 0.000000] BIOS-e820: 0000000079e1c000 - 0000000079e87000 (reserved)
[ 0.000000] BIOS-e820: 0000000079e87000 - 000000007bd5f000 (ACPI data)
[ 0.000000] BIOS-e820: 000000007bd5f000 - 000000007be4f000 (reserved)
[ 0.000000] BIOS-e820: 000000007be4f000 - 000000007bf87000 (ACPI data)
[ 0.000000] BIOS-e820: 000000007bf87000 - 000000007bfcf000 (ACPI NVS)
[ 0.000000] BIOS-e820: 000000007bfcf000 - 000000007bfff000 (ACPI data)
[ 0.000000] BIOS-e820: 000000007bfff000 - 0000000090000000 (reserved)
[ 0.000000] BIOS-e820: 00000000fc000000 - 00000000fd000000 (reserved)
[ 0.000000] BIOS-e820: 00000000fed1c000 - 00000000fed20000 (reserved)
[ 0.000000] BIOS-e820: 00000000ff000000 - 0000000100000000 (reserved)
[ 0.000000] BIOS-e820: 0000000100000000 - 0000001080000000 (usable)
[ 0.000000] bootconsole [earlyser0] enabled
[ 0.000000] NX (Execute Disable) protection: active
[ 0.000000] DMI 2.5 present.
[ 0.000000] last_pfn = 0x1080000 max_arch_pfn = 0x400000000
[ 0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[ 0.000000] last_pfn = 0x78c75 max_arch_pfn = 0x400000000
[ 0.000000] init_memory_mapping: 0000000000000000-0000000078c75000
[ 0.000000] init_memory_mapping: 0000000100000000-0000001080000000
[ 0.000000] ACPI: RSDP 00000000000f0410 00024 (v02 QUANTA)
[ 0.000000] ACPI: XSDT 000000007bffe120 000BC (v01 QUANTA QSSC-S4R 00000000 01000013)
[ 0.000000] ACPI: FACP 000000007bffd000 000F4 (v04 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: DSDT 000000007bfe3000 19BAD (v02 QUANTA QSSC-S4R 00000003 MSFT 0100000D)
[ 0.000000] ACPI: FACS 000000007bf87000 00040
[ 0.000000] ACPI: APIC 000000007bfe2000 003E4 (v02 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: MSCT 000000007bfe1000 00090 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] ACPI: MCFG 000000007bfe0000 0003C (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] ACPI: HPET 000000007bfdf000 00038 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] ACPI: SLIT 000000007bfde000 0003C (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] ACPI: SRAT 000000007bfdd000 00930 (v02 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] ACPI: SPCR 000000007bfdc000 00050 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: WDDT 000000007bfdb000 00040 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: SSDT 000000007bf4a000 3CFA4 (v02 QUANTA QSSC-S4R 00004000 INTL 20061109)
[ 0.000000] ACPI: SSDT 000000007bfda000 00174 (v02 QUANTA QSSC-S4R 00004000 INTL 20061109)
[ 0.000000] ACPI: PMCT 000000007bfd9000 00064 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: MIGT 000000007bfd8000 00040 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
[ 0.000000] ACPI: TCPA 000000007bfd5000 00032 (v00 QUANTA QSSC-S4R 00000000 00000000)
[ 0.000000] ACPI: HEST 000000007bfd4000 005D0 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
[ 0.000000] ACPI: BERT 000000007bfd3000 00030 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
[ 0.000000] ACPI: ERST 000000007bfd2000 00230 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
[ 0.000000] ACPI: EINJ 000000007bfd1000 00130 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
[ 0.000000] ACPI: DMAR 000000007bfd0000 00350 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
[ 0.000000] found SMP MP-table at [ffff8800000fdd80] fdd80
[ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 0-fff BIOS data page
[ 0.000000]
[ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.32 #35
[ 0.000000] Call Trace:
[ 0.000000] [<ffffffff813c9eeb>] panic+0xa0/0x16a
[ 0.000000] [<ffffffff8167d280>] ? reserve_early_overlap_ok+0x2e/0x39
[ 0.000000] [<ffffffff8167d234>] ? drop_overlaps_that_are_ok+0x101/0x11f
[ 0.000000] [<ffffffff8167d133>] drop_overlaps_that_are_ok+0x0/0x11f
[ 0.000000] [<ffffffff8167d2c6>] reserve_early+0x3b/0x3d
[ 0.000000] [<ffffffff81683248>] smp_scan_config+0xe7/0x10e
[ 0.000000] [<ffffffff816832a9>] default_find_smp_config+0x3a/0x60
[ 0.000000] [<ffffffff8167b0be>] setup_arch+0x6ae/0xa88
[ 0.000000] [<ffffffff816789ff>] start_kernel+0x82/0x412
[ 0.000000] [<ffffffff81678289>] x86_64_start_reservations+0x99/0xb9
[ 0.000000] [<ffffffff81678389>] x86_64_start_kernel+0xe0/0xf2

--
Jens Axboe

2009-12-10 18:35:16

by Yinghai Lu

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

Jens Axboe wrote:
> On Thu, Dec 10 2009, Yinghai wrote:
>>
>>
>>
>> On Dec 10, 2009, at 8:06 AM, Ingo Molnar <[email protected]> wrote:
>>
>>> * Jens Axboe <[email protected]> wrote:
>>>
>>>> On Thu, Dec 10 2009, Jens Axboe wrote:
>>>>> Hi,
>>>>>
>>>>> As written in the subject, it just hard hangs before writing
>>>>> anything on
>>>>> the console. With earlyprintk, I captured the failure, see below.
>>>>>
>>>>> I'll try and bisect this, but it takes some time (since the bios and
>>>>> post process takes forever). I just updated the firmware on the
>>>>> box as
>>>>> well, but it did boot 2.6.32 (and RHEL5 boots fine too). Of course
>>>>> that
>>>>> doesn't rule out a BIOS bug.
>>>> Results are persistent, git bisect points to:
>>>>
>>>> commit b24c2a925a9837cccf54d50aeac22ba0cbc15455
>>>> Author: Yinghai Lu <[email protected]>
>>>> Date: Tue Nov 24 02:48:18 2009 -0800
>>>>
>>>> x86: Move find_smp_config() earlier and avoid bootmem usage
>>>>
>>>> which appears consistent with the panic(). Reverting that does indeed
>>>> make current -git boot properly.
>>> Thanks. Yinghai, do you have any ideas, or should we revert it?
>>>
>> Let find the root cause
>>
>> Jen
>
> Jens :-)
>
>> Can you boot with earlyconsole or earlyprintk and debug?
>
> The original report already included oops output from earlyprintk, it's
> below as well.
>
> kernel /vmlinuz-2.6 root=/dev/sda3 ro console=ttyS0,115200 console=tty0 earlyp
> rintk=serial,ttyS0,115200
> [Linux-bzImage, setup=0x3200, size=0x298f90]
>
>
> [ 0.000000] Linux version 2.6.32 (axboe@nehalem) (gcc version 4.3.2 (Debian 4.3.2-1.1) ) #35 SMP Thu Dec 10 09:47:51 CET 2009 *
> [ 0.000000] Command line: root=/dev/sda3 ro console=ttyS0,115200 console=tty0 earlyprintk=serial,ttyS0,115200 *
> [ 0.000000] BIOS-provided physical RAM map: *
> [ 0.000000] BIOS-e820: 0000000000000000 - 000000000009a400 (usable) *
> [ 0.000000] BIOS-e820: 000000000009a400 - 00000000000a0000 (reserved) *
> [ 0.000000] BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)***
> [ 0.000000] BIOS-e820: 0000000000100000 - 0000000078c75000 (usable)
> [ 0.000000] BIOS-e820: 0000000078c75000 - 0000000078e89000 (ACPI NVS)
> [ 0.000000] BIOS-e820: 0000000078e89000 - 000000007924e000 (ACPI data)
> [ 0.000000] BIOS-e820: 000000007924e000 - 00000000792c2000 (reserved)
> [ 0.000000] BIOS-e820: 00000000792c2000 - 00000000792d2000 (ACPI data)
> [ 0.000000] BIOS-e820: 00000000792d2000 - 00000000792e7000 (reserved)
> [ 0.000000] BIOS-e820: 00000000792e7000 - 0000000079301000 (ACPI data)
> [ 0.000000] BIOS-e820: 0000000079301000 - 0000000079303000 (reserved)
> [ 0.000000] BIOS-e820: 0000000079303000 - 0000000079305000 (ACPI data)
> [ 0.000000] BIOS-e820: 0000000079305000 - 0000000079310000 (reserved)
> [ 0.000000] BIOS-e820: 0000000079310000 - 0000000079314000 (ACPI data)
> [ 0.000000] BIOS-e820: 0000000079314000 - 0000000079319000 (reserved)
> [ 0.000000] BIOS-e820: 0000000079319000 - 0000000079336000 (ACPI data)
> [ 0.000000] BIOS-e820: 0000000079336000 - 0000000079358000 (reserved)
> [ 0.000000] BIOS-e820: 0000000079358000 - 0000000079388000 (ACPI data)
> [ 0.000000] BIOS-e820: 0000000079388000 - 00000000793c9000 (reserved)
> [ 0.000000] BIOS-e820: 00000000793c9000 - 000000007968f000 (ACPI data)
> [ 0.000000] BIOS-e820: 000000007968f000 - 00000000796bb000 (reserved)
> [ 0.000000] BIOS-e820: 00000000796bb000 - 00000000799d8000 (ACPI data)
> [ 0.000000] BIOS-e820: 00000000799d8000 - 0000000079bd8000 (ACPI NVS)
> [ 0.000000] BIOS-e820: 0000000079bd8000 - 0000000079dc7000 (ACPI data)
> [ 0.000000] BIOS-e820: 0000000079dc7000 - 0000000079dcb000 (reserved)
> [ 0.000000] BIOS-e820: 0000000079dcb000 - 0000000079e1c000 (ACPI data)
> [ 0.000000] BIOS-e820: 0000000079e1c000 - 0000000079e87000 (reserved)
> [ 0.000000] BIOS-e820: 0000000079e87000 - 000000007bd5f000 (ACPI data)
> [ 0.000000] BIOS-e820: 000000007bd5f000 - 000000007be4f000 (reserved)
> [ 0.000000] BIOS-e820: 000000007be4f000 - 000000007bf87000 (ACPI data)
> [ 0.000000] BIOS-e820: 000000007bf87000 - 000000007bfcf000 (ACPI NVS)
> [ 0.000000] BIOS-e820: 000000007bfcf000 - 000000007bfff000 (ACPI data)
> [ 0.000000] BIOS-e820: 000000007bfff000 - 0000000090000000 (reserved)
> [ 0.000000] BIOS-e820: 00000000fc000000 - 00000000fd000000 (reserved)
> [ 0.000000] BIOS-e820: 00000000fed1c000 - 00000000fed20000 (reserved)
> [ 0.000000] BIOS-e820: 00000000ff000000 - 0000000100000000 (reserved)
> [ 0.000000] BIOS-e820: 0000000100000000 - 0000001080000000 (usable)
> [ 0.000000] bootconsole [earlyser0] enabled
> [ 0.000000] NX (Execute Disable) protection: active
> [ 0.000000] DMI 2.5 present.
> [ 0.000000] last_pfn = 0x1080000 max_arch_pfn = 0x400000000
> [ 0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
> [ 0.000000] last_pfn = 0x78c75 max_arch_pfn = 0x400000000
> [ 0.000000] init_memory_mapping: 0000000000000000-0000000078c75000
> [ 0.000000] init_memory_mapping: 0000000100000000-0000001080000000
> [ 0.000000] ACPI: RSDP 00000000000f0410 00024 (v02 QUANTA)
> [ 0.000000] ACPI: XSDT 000000007bffe120 000BC (v01 QUANTA QSSC-S4R 00000000 01000013)
> [ 0.000000] ACPI: FACP 000000007bffd000 000F4 (v04 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> [ 0.000000] ACPI: DSDT 000000007bfe3000 19BAD (v02 QUANTA QSSC-S4R 00000003 MSFT 0100000D)
> [ 0.000000] ACPI: FACS 000000007bf87000 00040
> [ 0.000000] ACPI: APIC 000000007bfe2000 003E4 (v02 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> [ 0.000000] ACPI: MSCT 000000007bfe1000 00090 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> [ 0.000000] ACPI: MCFG 000000007bfe0000 0003C (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> [ 0.000000] ACPI: HPET 000000007bfdf000 00038 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> [ 0.000000] ACPI: SLIT 000000007bfde000 0003C (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> [ 0.000000] ACPI: SRAT 000000007bfdd000 00930 (v02 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> [ 0.000000] ACPI: SPCR 000000007bfdc000 00050 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> [ 0.000000] ACPI: WDDT 000000007bfdb000 00040 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> [ 0.000000] ACPI: SSDT 000000007bf4a000 3CFA4 (v02 QUANTA QSSC-S4R 00004000 INTL 20061109)
> [ 0.000000] ACPI: SSDT 000000007bfda000 00174 (v02 QUANTA QSSC-S4R 00004000 INTL 20061109)
> [ 0.000000] ACPI: PMCT 000000007bfd9000 00064 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> [ 0.000000] ACPI: MIGT 000000007bfd8000 00040 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> [ 0.000000] ACPI: TCPA 000000007bfd5000 00032 (v00 QUANTA QSSC-S4R 00000000 00000000)
> [ 0.000000] ACPI: HEST 000000007bfd4000 005D0 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
> [ 0.000000] ACPI: BERT 000000007bfd3000 00030 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
> [ 0.000000] ACPI: ERST 000000007bfd2000 00230 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
> [ 0.000000] ACPI: EINJ 000000007bfd1000 00130 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
> [ 0.000000] ACPI: DMAR 000000007bfd0000 00350 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> [ 0.000000] found SMP MP-table at [ffff8800000fdd80] fdd80
> [ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 0-fff BIOS data page

mptable mpc is [12 - f011]

what a BIOS !

can you check if it works with 2.6.32 + "acpi=off"?

Thanks

YH

2009-12-10 18:41:38

by Roland Dreier

[permalink] [raw]
Subject: Re: Bisected regression


> mptable mpc is [12 - f011]

> what a BIOS !

Can you be more explicit about what the BIOS is doing wrong?

Jens, can you say what BIOS version (18, 19, 20, ...?) you're using? I
think I know exactly what box you have.

I have a pretty good pipeline to the BIOS guys and I can try to get this
cleaned up if I know the details.

Thanks,
Roland

2009-12-10 18:43:17

by Yinghai Lu

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

Yinghai Lu wrote:
> Jens Axboe wrote:
>> On Thu, Dec 10 2009, Yinghai wrote:
>>>
>>>
>>> On Dec 10, 2009, at 8:06 AM, Ingo Molnar <[email protected]> wrote:
>>>
>>>> * Jens Axboe <[email protected]> wrote:
>>>>
>>>>> On Thu, Dec 10 2009, Jens Axboe wrote:
>>>>>> Hi,
>>>>>>
>>>>>> As written in the subject, it just hard hangs before writing
>>>>>> anything on
>>>>>> the console. With earlyprintk, I captured the failure, see below.
>>>>>>
>>>>>> I'll try and bisect this, but it takes some time (since the bios and
>>>>>> post process takes forever). I just updated the firmware on the
>>>>>> box as
>>>>>> well, but it did boot 2.6.32 (and RHEL5 boots fine too). Of course
>>>>>> that
>>>>>> doesn't rule out a BIOS bug.
>>>>> Results are persistent, git bisect points to:
>>>>>
>>>>> commit b24c2a925a9837cccf54d50aeac22ba0cbc15455
>>>>> Author: Yinghai Lu <[email protected]>
>>>>> Date: Tue Nov 24 02:48:18 2009 -0800
>>>>>
>>>>> x86: Move find_smp_config() earlier and avoid bootmem usage
>>>>>
>>>>> which appears consistent with the panic(). Reverting that does indeed
>>>>> make current -git boot properly.
>>>> Thanks. Yinghai, do you have any ideas, or should we revert it?
>>>>
>>> Let find the root cause
>>>
>>> Jen
>> Jens :-)
>>
>>> Can you boot with earlyconsole or earlyprintk and debug?
>> The original report already included oops output from earlyprintk, it's
>> below as well.
>>
>> kernel /vmlinuz-2.6 root=/dev/sda3 ro console=ttyS0,115200 console=tty0 earlyp
>> rintk=serial,ttyS0,115200
>> [Linux-bzImage, setup=0x3200, size=0x298f90]
>>
>>
>> [ 0.000000] Linux version 2.6.32 (axboe@nehalem) (gcc version 4.3.2 (Debian 4.3.2-1.1) ) #35 SMP Thu Dec 10 09:47:51 CET 2009 *
>> [ 0.000000] Command line: root=/dev/sda3 ro console=ttyS0,115200 console=tty0 earlyprintk=serial,ttyS0,115200 *
>> [ 0.000000] BIOS-provided physical RAM map: *
>> [ 0.000000] BIOS-e820: 0000000000000000 - 000000000009a400 (usable) *
>> [ 0.000000] BIOS-e820: 000000000009a400 - 00000000000a0000 (reserved) *
>> [ 0.000000] BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)***
>> [ 0.000000] BIOS-e820: 0000000000100000 - 0000000078c75000 (usable)
>> [ 0.000000] BIOS-e820: 0000000078c75000 - 0000000078e89000 (ACPI NVS)
>> [ 0.000000] BIOS-e820: 0000000078e89000 - 000000007924e000 (ACPI data)
>> [ 0.000000] BIOS-e820: 000000007924e000 - 00000000792c2000 (reserved)
>> [ 0.000000] BIOS-e820: 00000000792c2000 - 00000000792d2000 (ACPI data)
>> [ 0.000000] BIOS-e820: 00000000792d2000 - 00000000792e7000 (reserved)
>> [ 0.000000] BIOS-e820: 00000000792e7000 - 0000000079301000 (ACPI data)
>> [ 0.000000] BIOS-e820: 0000000079301000 - 0000000079303000 (reserved)
>> [ 0.000000] BIOS-e820: 0000000079303000 - 0000000079305000 (ACPI data)
>> [ 0.000000] BIOS-e820: 0000000079305000 - 0000000079310000 (reserved)
>> [ 0.000000] BIOS-e820: 0000000079310000 - 0000000079314000 (ACPI data)
>> [ 0.000000] BIOS-e820: 0000000079314000 - 0000000079319000 (reserved)
>> [ 0.000000] BIOS-e820: 0000000079319000 - 0000000079336000 (ACPI data)
>> [ 0.000000] BIOS-e820: 0000000079336000 - 0000000079358000 (reserved)
>> [ 0.000000] BIOS-e820: 0000000079358000 - 0000000079388000 (ACPI data)
>> [ 0.000000] BIOS-e820: 0000000079388000 - 00000000793c9000 (reserved)
>> [ 0.000000] BIOS-e820: 00000000793c9000 - 000000007968f000 (ACPI data)
>> [ 0.000000] BIOS-e820: 000000007968f000 - 00000000796bb000 (reserved)
>> [ 0.000000] BIOS-e820: 00000000796bb000 - 00000000799d8000 (ACPI data)
>> [ 0.000000] BIOS-e820: 00000000799d8000 - 0000000079bd8000 (ACPI NVS)
>> [ 0.000000] BIOS-e820: 0000000079bd8000 - 0000000079dc7000 (ACPI data)
>> [ 0.000000] BIOS-e820: 0000000079dc7000 - 0000000079dcb000 (reserved)
>> [ 0.000000] BIOS-e820: 0000000079dcb000 - 0000000079e1c000 (ACPI data)
>> [ 0.000000] BIOS-e820: 0000000079e1c000 - 0000000079e87000 (reserved)
>> [ 0.000000] BIOS-e820: 0000000079e87000 - 000000007bd5f000 (ACPI data)
>> [ 0.000000] BIOS-e820: 000000007bd5f000 - 000000007be4f000 (reserved)
>> [ 0.000000] BIOS-e820: 000000007be4f000 - 000000007bf87000 (ACPI data)
>> [ 0.000000] BIOS-e820: 000000007bf87000 - 000000007bfcf000 (ACPI NVS)
>> [ 0.000000] BIOS-e820: 000000007bfcf000 - 000000007bfff000 (ACPI data)
>> [ 0.000000] BIOS-e820: 000000007bfff000 - 0000000090000000 (reserved)
>> [ 0.000000] BIOS-e820: 00000000fc000000 - 00000000fd000000 (reserved)
>> [ 0.000000] BIOS-e820: 00000000fed1c000 - 00000000fed20000 (reserved)
>> [ 0.000000] BIOS-e820: 00000000ff000000 - 0000000100000000 (reserved)
>> [ 0.000000] BIOS-e820: 0000000100000000 - 0000001080000000 (usable)
>> [ 0.000000] bootconsole [earlyser0] enabled
>> [ 0.000000] NX (Execute Disable) protection: active
>> [ 0.000000] DMI 2.5 present.
>> [ 0.000000] last_pfn = 0x1080000 max_arch_pfn = 0x400000000
>> [ 0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
>> [ 0.000000] last_pfn = 0x78c75 max_arch_pfn = 0x400000000
>> [ 0.000000] init_memory_mapping: 0000000000000000-0000000078c75000
>> [ 0.000000] init_memory_mapping: 0000000100000000-0000001080000000
>> [ 0.000000] ACPI: RSDP 00000000000f0410 00024 (v02 QUANTA)
>> [ 0.000000] ACPI: XSDT 000000007bffe120 000BC (v01 QUANTA QSSC-S4R 00000000 01000013)
>> [ 0.000000] ACPI: FACP 000000007bffd000 000F4 (v04 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
>> [ 0.000000] ACPI: DSDT 000000007bfe3000 19BAD (v02 QUANTA QSSC-S4R 00000003 MSFT 0100000D)
>> [ 0.000000] ACPI: FACS 000000007bf87000 00040
>> [ 0.000000] ACPI: APIC 000000007bfe2000 003E4 (v02 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
>> [ 0.000000] ACPI: MSCT 000000007bfe1000 00090 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
>> [ 0.000000] ACPI: MCFG 000000007bfe0000 0003C (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
>> [ 0.000000] ACPI: HPET 000000007bfdf000 00038 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
>> [ 0.000000] ACPI: SLIT 000000007bfde000 0003C (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
>> [ 0.000000] ACPI: SRAT 000000007bfdd000 00930 (v02 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
>> [ 0.000000] ACPI: SPCR 000000007bfdc000 00050 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
>> [ 0.000000] ACPI: WDDT 000000007bfdb000 00040 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
>> [ 0.000000] ACPI: SSDT 000000007bf4a000 3CFA4 (v02 QUANTA QSSC-S4R 00004000 INTL 20061109)
>> [ 0.000000] ACPI: SSDT 000000007bfda000 00174 (v02 QUANTA QSSC-S4R 00004000 INTL 20061109)
>> [ 0.000000] ACPI: PMCT 000000007bfd9000 00064 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
>> [ 0.000000] ACPI: MIGT 000000007bfd8000 00040 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
>> [ 0.000000] ACPI: TCPA 000000007bfd5000 00032 (v00 QUANTA QSSC-S4R 00000000 00000000)
>> [ 0.000000] ACPI: HEST 000000007bfd4000 005D0 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
>> [ 0.000000] ACPI: BERT 000000007bfd3000 00030 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
>> [ 0.000000] ACPI: ERST 000000007bfd2000 00230 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
>> [ 0.000000] ACPI: EINJ 000000007bfd1000 00130 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
>> [ 0.000000] ACPI: DMAR 000000007bfd0000 00350 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
>> [ 0.000000] found SMP MP-table at [ffff8800000fdd80] fdd80
>> [ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 0-fff BIOS data page
>
> mptable mpc is [12 - f011]
>
> what a BIOS !
>
> can you check if it works with 2.6.32 + "acpi=off"?
>

please check if this one could workaround it.

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d17d482..f60acec 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -732,7 +732,7 @@ struct early_res {
char overlap_ok;
};
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
- { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
+ { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
{}
};

2009-12-10 18:44:24

by Jens Axboe

[permalink] [raw]
Subject: Re: Bisected regression

On Thu, Dec 10 2009, Roland Dreier wrote:
>
> > mptable mpc is [12 - f011]
>
> > what a BIOS !
>
> Can you be more explicit about what the BIOS is doing wrong?

I was going to ask the same, so it could be reported and fixed!

> Jens, can you say what BIOS version (18, 19, 20, ...?) you're using? I
> think I know exactly what box you have.

This is R20.

> I have a pretty good pipeline to the BIOS guys and I can try to get this
> cleaned up if I know the details.

Cool, that'd be nice!

--
Jens Axboe

2009-12-10 18:45:40

by Yinghai Lu

[permalink] [raw]
Subject: Re: Bisected regression

Roland Dreier wrote:
> > mptable mpc is [12 - f011]
>
> > what a BIOS !
>
> Can you be more explicit about what the BIOS is doing wrong?
>

bios should not put mpc there.

YH

2009-12-10 18:45:07

by Jens Axboe

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

On Thu, Dec 10 2009, Yinghai Lu wrote:
> Yinghai Lu wrote:
> > Jens Axboe wrote:
> >> On Thu, Dec 10 2009, Yinghai wrote:
> >>>
> >>>
> >>> On Dec 10, 2009, at 8:06 AM, Ingo Molnar <[email protected]> wrote:
> >>>
> >>>> * Jens Axboe <[email protected]> wrote:
> >>>>
> >>>>> On Thu, Dec 10 2009, Jens Axboe wrote:
> >>>>>> Hi,
> >>>>>>
> >>>>>> As written in the subject, it just hard hangs before writing
> >>>>>> anything on
> >>>>>> the console. With earlyprintk, I captured the failure, see below.
> >>>>>>
> >>>>>> I'll try and bisect this, but it takes some time (since the bios and
> >>>>>> post process takes forever). I just updated the firmware on the
> >>>>>> box as
> >>>>>> well, but it did boot 2.6.32 (and RHEL5 boots fine too). Of course
> >>>>>> that
> >>>>>> doesn't rule out a BIOS bug.
> >>>>> Results are persistent, git bisect points to:
> >>>>>
> >>>>> commit b24c2a925a9837cccf54d50aeac22ba0cbc15455
> >>>>> Author: Yinghai Lu <[email protected]>
> >>>>> Date: Tue Nov 24 02:48:18 2009 -0800
> >>>>>
> >>>>> x86: Move find_smp_config() earlier and avoid bootmem usage
> >>>>>
> >>>>> which appears consistent with the panic(). Reverting that does indeed
> >>>>> make current -git boot properly.
> >>>> Thanks. Yinghai, do you have any ideas, or should we revert it?
> >>>>
> >>> Let find the root cause
> >>>
> >>> Jen
> >> Jens :-)
> >>
> >>> Can you boot with earlyconsole or earlyprintk and debug?
> >> The original report already included oops output from earlyprintk, it's
> >> below as well.
> >>
> >> kernel /vmlinuz-2.6 root=/dev/sda3 ro console=ttyS0,115200 console=tty0 earlyp
> >> rintk=serial,ttyS0,115200
> >> [Linux-bzImage, setup=0x3200, size=0x298f90]
> >>
> >>
> >> [ 0.000000] Linux version 2.6.32 (axboe@nehalem) (gcc version 4.3.2 (Debian 4.3.2-1.1) ) #35 SMP Thu Dec 10 09:47:51 CET 2009 *
> >> [ 0.000000] Command line: root=/dev/sda3 ro console=ttyS0,115200 console=tty0 earlyprintk=serial,ttyS0,115200 *
> >> [ 0.000000] BIOS-provided physical RAM map: *
> >> [ 0.000000] BIOS-e820: 0000000000000000 - 000000000009a400 (usable) *
> >> [ 0.000000] BIOS-e820: 000000000009a400 - 00000000000a0000 (reserved) *
> >> [ 0.000000] BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)***
> >> [ 0.000000] BIOS-e820: 0000000000100000 - 0000000078c75000 (usable)
> >> [ 0.000000] BIOS-e820: 0000000078c75000 - 0000000078e89000 (ACPI NVS)
> >> [ 0.000000] BIOS-e820: 0000000078e89000 - 000000007924e000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 000000007924e000 - 00000000792c2000 (reserved)
> >> [ 0.000000] BIOS-e820: 00000000792c2000 - 00000000792d2000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 00000000792d2000 - 00000000792e7000 (reserved)
> >> [ 0.000000] BIOS-e820: 00000000792e7000 - 0000000079301000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 0000000079301000 - 0000000079303000 (reserved)
> >> [ 0.000000] BIOS-e820: 0000000079303000 - 0000000079305000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 0000000079305000 - 0000000079310000 (reserved)
> >> [ 0.000000] BIOS-e820: 0000000079310000 - 0000000079314000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 0000000079314000 - 0000000079319000 (reserved)
> >> [ 0.000000] BIOS-e820: 0000000079319000 - 0000000079336000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 0000000079336000 - 0000000079358000 (reserved)
> >> [ 0.000000] BIOS-e820: 0000000079358000 - 0000000079388000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 0000000079388000 - 00000000793c9000 (reserved)
> >> [ 0.000000] BIOS-e820: 00000000793c9000 - 000000007968f000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 000000007968f000 - 00000000796bb000 (reserved)
> >> [ 0.000000] BIOS-e820: 00000000796bb000 - 00000000799d8000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 00000000799d8000 - 0000000079bd8000 (ACPI NVS)
> >> [ 0.000000] BIOS-e820: 0000000079bd8000 - 0000000079dc7000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 0000000079dc7000 - 0000000079dcb000 (reserved)
> >> [ 0.000000] BIOS-e820: 0000000079dcb000 - 0000000079e1c000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 0000000079e1c000 - 0000000079e87000 (reserved)
> >> [ 0.000000] BIOS-e820: 0000000079e87000 - 000000007bd5f000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 000000007bd5f000 - 000000007be4f000 (reserved)
> >> [ 0.000000] BIOS-e820: 000000007be4f000 - 000000007bf87000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 000000007bf87000 - 000000007bfcf000 (ACPI NVS)
> >> [ 0.000000] BIOS-e820: 000000007bfcf000 - 000000007bfff000 (ACPI data)
> >> [ 0.000000] BIOS-e820: 000000007bfff000 - 0000000090000000 (reserved)
> >> [ 0.000000] BIOS-e820: 00000000fc000000 - 00000000fd000000 (reserved)
> >> [ 0.000000] BIOS-e820: 00000000fed1c000 - 00000000fed20000 (reserved)
> >> [ 0.000000] BIOS-e820: 00000000ff000000 - 0000000100000000 (reserved)
> >> [ 0.000000] BIOS-e820: 0000000100000000 - 0000001080000000 (usable)
> >> [ 0.000000] bootconsole [earlyser0] enabled
> >> [ 0.000000] NX (Execute Disable) protection: active
> >> [ 0.000000] DMI 2.5 present.
> >> [ 0.000000] last_pfn = 0x1080000 max_arch_pfn = 0x400000000
> >> [ 0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
> >> [ 0.000000] last_pfn = 0x78c75 max_arch_pfn = 0x400000000
> >> [ 0.000000] init_memory_mapping: 0000000000000000-0000000078c75000
> >> [ 0.000000] init_memory_mapping: 0000000100000000-0000001080000000
> >> [ 0.000000] ACPI: RSDP 00000000000f0410 00024 (v02 QUANTA)
> >> [ 0.000000] ACPI: XSDT 000000007bffe120 000BC (v01 QUANTA QSSC-S4R 00000000 01000013)
> >> [ 0.000000] ACPI: FACP 000000007bffd000 000F4 (v04 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> >> [ 0.000000] ACPI: DSDT 000000007bfe3000 19BAD (v02 QUANTA QSSC-S4R 00000003 MSFT 0100000D)
> >> [ 0.000000] ACPI: FACS 000000007bf87000 00040
> >> [ 0.000000] ACPI: APIC 000000007bfe2000 003E4 (v02 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> >> [ 0.000000] ACPI: MSCT 000000007bfe1000 00090 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> >> [ 0.000000] ACPI: MCFG 000000007bfe0000 0003C (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> >> [ 0.000000] ACPI: HPET 000000007bfdf000 00038 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> >> [ 0.000000] ACPI: SLIT 000000007bfde000 0003C (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> >> [ 0.000000] ACPI: SRAT 000000007bfdd000 00930 (v02 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> >> [ 0.000000] ACPI: SPCR 000000007bfdc000 00050 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> >> [ 0.000000] ACPI: WDDT 000000007bfdb000 00040 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> >> [ 0.000000] ACPI: SSDT 000000007bf4a000 3CFA4 (v02 QUANTA QSSC-S4R 00004000 INTL 20061109)
> >> [ 0.000000] ACPI: SSDT 000000007bfda000 00174 (v02 QUANTA QSSC-S4R 00004000 INTL 20061109)
> >> [ 0.000000] ACPI: PMCT 000000007bfd9000 00064 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> >> [ 0.000000] ACPI: MIGT 000000007bfd8000 00040 (v01 QUANTA QSSC-S4R 00000000 MSFT 0100000D)
> >> [ 0.000000] ACPI: TCPA 000000007bfd5000 00032 (v00 QUANTA QSSC-S4R 00000000 00000000)
> >> [ 0.000000] ACPI: HEST 000000007bfd4000 005D0 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
> >> [ 0.000000] ACPI: BERT 000000007bfd3000 00030 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
> >> [ 0.000000] ACPI: ERST 000000007bfd2000 00230 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
> >> [ 0.000000] ACPI: EINJ 000000007bfd1000 00130 (v01 QUANTA QSSC-S4R 00000001 INTL 00000001)
> >> [ 0.000000] ACPI: DMAR 000000007bfd0000 00350 (v01 QUANTA QSSC-S4R 00000001 MSFT 0100000D)
> >> [ 0.000000] found SMP MP-table at [ffff8800000fdd80] fdd80
> >> [ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 0-fff BIOS data page
> >
> > mptable mpc is [12 - f011]
> >
> > what a BIOS !
> >
> > can you check if it works with 2.6.32 + "acpi=off"?

I'll try that.

> please check if this one could workaround it.
>
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index d17d482..f60acec 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -732,7 +732,7 @@ struct early_res {
> char overlap_ok;
> };
> static struct early_res early_res[MAX_EARLY_RES] __initdata = {
> - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
> + { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
> {}
> };

And then this. Results in 10-15min.

--
Jens Axboe

2009-12-10 19:07:38

by Jens Axboe

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

On Thu, Dec 10 2009, Jens Axboe wrote:
> > >> [ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 0-fff BIOS data page
> > >
> > > mptable mpc is [12 - f011]
> > >
> > > what a BIOS !
> > >
> > > can you check if it works with 2.6.32 + "acpi=off"?
>
> I'll try that.

Yes, 2.6.32 boots fine with acpi=off passed.

> > please check if this one could workaround it.
> >
> > diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> > index d17d482..f60acec 100644
> > --- a/arch/x86/kernel/e820.c
> > +++ b/arch/x86/kernel/e820.c
> > @@ -732,7 +732,7 @@ struct early_res {
> > char overlap_ok;
> > };
> > static struct early_res early_res[MAX_EARLY_RES] __initdata = {
> > - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
> > + { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
> > {}
> > };
>
> And then this. Results in 10-15min.

Doesn't work, it just complains about a different overlap:

[ 0.000000] Kernel panic - not syncing: Overlapping early
reservations 12-f011 MP-table mpc to 6000-7fff TRAMPOLINE
[ 0.000000]
[ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.32 #51
[ 0.000000] Call Trace:
[ 0.000000] [<ffffffff813c709b>] panic+0xa0/0x16a
[ 0.000000] [<ffffffff8167d280>] ?
reserve_early_overlap_ok+0x2e/0x39
[ 0.000000] [<ffffffff8167d24a>] ?
drop_overlaps_that_are_ok+0x117/0x11f
[ 0.000000] [<ffffffff8167d133>] drop_overlaps_that_are_ok+0x0/0x11f
[ 0.000000] [<ffffffff8167d2c6>] reserve_early+0x3b/0x3d
[ 0.000000] [<ffffffff81683248>] smp_scan_config+0xe7/0x10e
[ 0.000000] [<ffffffff816832a9>] default_find_smp_config+0x3a/0x60
[ 0.000000] [<ffffffff8167b0be>] setup_arch+0x6ae/0xa88
[ 0.000000] [<ffffffff816789ff>] start_kernel+0x82/0x412
[ 0.000000] [<ffffffff81678289>] x86_64_start_reservations+0x99/0xb9
[ 0.000000] [<ffffffff81678389>] x86_64_start_kernel+0xe0/0xf2


--
Jens Axboe

2009-12-10 19:16:44

by Yinghai Lu

[permalink] [raw]
Subject: Re: Bisected regression

Jens Axboe wrote:
> On Thu, Dec 10 2009, Roland Dreier wrote:
>> > mptable mpc is [12 - f011]
>>
>> > what a BIOS !
>>
>> Can you be more explicit about what the BIOS is doing wrong?

esp when the mpc is about 64k. should put it in [xxxx, 0x100000), and reserve it in e820

YH

2009-12-10 19:20:31

by Yinghai Lu

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

Jens Axboe wrote:
> On Thu, Dec 10 2009, Jens Axboe wrote:
>>>>> [ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 0-fff BIOS data page
>>>> mptable mpc is [12 - f011]
>>>>
>>>> what a BIOS !
>>>>
>>>> can you check if it works with 2.6.32 + "acpi=off"?
>> I'll try that.
>
> Yes, 2.6.32 boots fine with acpi=off passed.
>
>>> please check if this one could workaround it.
>>>
>>> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
>>> index d17d482..f60acec 100644
>>> --- a/arch/x86/kernel/e820.c
>>> +++ b/arch/x86/kernel/e820.c
>>> @@ -732,7 +732,7 @@ struct early_res {
>>> char overlap_ok;
>>> };
>>> static struct early_res early_res[MAX_EARLY_RES] __initdata = {
>>> - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
>>> + { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
>>> {}
>>> };
>> And then this. Results in 10-15min.
>
> Doesn't work, it just complains about a different overlap:
>
> [ 0.000000] Kernel panic - not syncing: Overlapping early
> reservations 12-f011 MP-table mpc to 6000-7fff TRAMPOLINE
> [ 0.000000]
> [ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.32 #51
> [ 0.000000] Call Trace:
> [ 0.000000] [<ffffffff813c709b>] panic+0xa0/0x16a
> [ 0.000000] [<ffffffff8167d280>] ?
> reserve_early_overlap_ok+0x2e/0x39
> [ 0.000000] [<ffffffff8167d24a>] ?
> drop_overlaps_that_are_ok+0x117/0x11f
> [ 0.000000] [<ffffffff8167d133>] drop_overlaps_that_are_ok+0x0/0x11f
> [ 0.000000] [<ffffffff8167d2c6>] reserve_early+0x3b/0x3d
> [ 0.000000] [<ffffffff81683248>] smp_scan_config+0xe7/0x10e
> [ 0.000000] [<ffffffff816832a9>] default_find_smp_config+0x3a/0x60
> [ 0.000000] [<ffffffff8167b0be>] setup_arch+0x6ae/0xa88
> [ 0.000000] [<ffffffff816789ff>] start_kernel+0x82/0x412
> [ 0.000000] [<ffffffff81678289>] x86_64_start_reservations+0x99/0xb9
> [ 0.000000] [<ffffffff81678389>] x86_64_start_kernel+0xe0/0xf2
>
>

that is funny. we may need to use find early for our TRAMPOLINE...?

otherwise your mptable is corrupted....

YH

2009-12-10 19:26:59

by Jens Axboe

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

On Thu, Dec 10 2009, Yinghai Lu wrote:
> Jens Axboe wrote:
> > On Thu, Dec 10 2009, Jens Axboe wrote:
> >>>>> [ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 0-fff BIOS data page
> >>>> mptable mpc is [12 - f011]
> >>>>
> >>>> what a BIOS !
> >>>>
> >>>> can you check if it works with 2.6.32 + "acpi=off"?
> >> I'll try that.
> >
> > Yes, 2.6.32 boots fine with acpi=off passed.
> >
> >>> please check if this one could workaround it.
> >>>
> >>> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> >>> index d17d482..f60acec 100644
> >>> --- a/arch/x86/kernel/e820.c
> >>> +++ b/arch/x86/kernel/e820.c
> >>> @@ -732,7 +732,7 @@ struct early_res {
> >>> char overlap_ok;
> >>> };
> >>> static struct early_res early_res[MAX_EARLY_RES] __initdata = {
> >>> - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
> >>> + { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
> >>> {}
> >>> };
> >> And then this. Results in 10-15min.
> >
> > Doesn't work, it just complains about a different overlap:
> >
> > [ 0.000000] Kernel panic - not syncing: Overlapping early
> > reservations 12-f011 MP-table mpc to 6000-7fff TRAMPOLINE
> > [ 0.000000]
> > [ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.32 #51
> > [ 0.000000] Call Trace:
> > [ 0.000000] [<ffffffff813c709b>] panic+0xa0/0x16a
> > [ 0.000000] [<ffffffff8167d280>] ?
> > reserve_early_overlap_ok+0x2e/0x39
> > [ 0.000000] [<ffffffff8167d24a>] ?
> > drop_overlaps_that_are_ok+0x117/0x11f
> > [ 0.000000] [<ffffffff8167d133>] drop_overlaps_that_are_ok+0x0/0x11f
> > [ 0.000000] [<ffffffff8167d2c6>] reserve_early+0x3b/0x3d
> > [ 0.000000] [<ffffffff81683248>] smp_scan_config+0xe7/0x10e
> > [ 0.000000] [<ffffffff816832a9>] default_find_smp_config+0x3a/0x60
> > [ 0.000000] [<ffffffff8167b0be>] setup_arch+0x6ae/0xa88
> > [ 0.000000] [<ffffffff816789ff>] start_kernel+0x82/0x412
> > [ 0.000000] [<ffffffff81678289>] x86_64_start_reservations+0x99/0xb9
> > [ 0.000000] [<ffffffff81678389>] x86_64_start_kernel+0xe0/0xf2
> >
> >
>
> that is funny. we may need to use find early for our TRAMPOLINE...?
>
> otherwise your mptable is corrupted....

Roland, are you filing a report for this?

I can test other patches if you have good ideas, otherwise I suggest we
revert the commit.

--
Jens Axboe

2009-12-10 19:38:53

by Roland Dreier

[permalink] [raw]
Subject: Re: Bisected regression


> that is funny. we may need to use find early for our TRAMPOLINE...?

> otherwise your mptable is corrupted....

Wouldn't surprise me if mptable is messed up... most likely BIOS people
take more care to get ACPI right.

I'm not an expert on this stuff, but do we need the mptable if ACPI info
is there?

- R.

2009-12-10 19:34:27

by Yinghai Lu

[permalink] [raw]
Subject: Re: Bisected regression

Roland Dreier wrote:
> > that is funny. we may need to use find early for our TRAMPOLINE...?
>
> > otherwise your mptable is corrupted....
>
> Wouldn't surprise me if mptable is messed up... most likely BIOS people
> take more care to get ACPI right.
>
> I'm not an expert on this stuff, but do we need the mptable if ACPI info
> is there?

that BIOS is using first 64k for mptable. and after booting those mptable is overwritten by trampoline.

before that happen, kernel already analyse the table and get enough info.

so when you boot with acpi=off and then try to kexec second kernel still with acpi=off, you will not get booted.

YH

2009-12-10 19:34:43

by Roland Dreier

[permalink] [raw]
Subject: Re: Bisected regression


> Roland, are you filing a report for this?

Yes, I'll try to track this down.

> I can test other patches if you have good ideas, otherwise I suggest we
> revert the commit.

I'll definitely try to get the BIOS fixed but I do also think that the
kernel shouldn't panic on bad info from the BIOS -- we used to be able
to run fine with what the BIOS gave us. BIOSes are always going to be
crap, and we can print nasty messages in those cases, but the kernel
shouldn't panic unless things are really hopeless.

The benefit of the commit in question seemed to be code cleanup and
reducing use of bootmem -- which I don't think is enough benefit to
justify increasing fragility at runtime.

- R.

2009-12-10 19:36:28

by Roland Dreier

[permalink] [raw]
Subject: Re: Bisected regression


> that BIOS is using first 64k for mptable. and after booting those
> mptable is overwritten by trampoline.

So fix that BIOS people should do is to move the mptable above 64K and
leave low 64K empty?

- R.

2009-12-10 19:51:51

by Yinghai Lu

[permalink] [raw]
Subject: Re: Bisected regression

Roland Dreier wrote:
> > that BIOS is using first 64k for mptable. and after booting those
> > mptable is overwritten by trampoline.
>
> So fix that BIOS people should do is to move the mptable above 64K and
> leave low 64K empty?

Yes. should be below and near 1M, and use e820 to reserve it.

YH

2009-12-10 21:08:26

by Yinghai Lu

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

Jens Axboe wrote:

> I can test other patches if you have good ideas, otherwise I suggest we
> revert the commit.

we should go further.

please check, esp should fix that from kernel acpi=off to kexec second kernel with acpi=off for your system.

[PATCH] x86: use find_e820 instead of hard code trampoline addr

Jens found

after
|commit b24c2a925a9837cccf54d50aeac22ba0cbc15455
| Author: Yinghai Lu <[email protected]>
| Date: Tue Nov 24 02:48:18 2009 -0800
|
| x86: Move find_smp_config() earlier and avoid bootmem usage

[ 0.000000] found SMP MP-table at [ffff8800000fdd80] fdd80
[ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 0-fff BIOS data page
[ 0.000000]
[ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.32 #35
[ 0.000000] Call Trace:
[ 0.000000] [<ffffffff813c9eeb>] panic+0xa0/0x16a
[ 0.000000] [<ffffffff8167d280>] ? reserve_early_overlap_ok+0x2e/0x39
[ 0.000000] [<ffffffff8167d234>] ? drop_overlaps_that_are_ok+0x101/0x11f
[ 0.000000] [<ffffffff8167d133>] drop_overlaps_that_are_ok+0x0/0x11f
[ 0.000000] [<ffffffff8167d2c6>] reserve_early+0x3b/0x3d
[ 0.000000] [<ffffffff81683248>] smp_scan_config+0xe7/0x10e
[ 0.000000] [<ffffffff816832a9>] default_find_smp_config+0x3a/0x60
[ 0.000000] [<ffffffff8167b0be>] setup_arch+0x6ae/0xa88
[ 0.000000] [<ffffffff816789ff>] start_kernel+0x82/0x412
[ 0.000000] [<ffffffff81678289>] x86_64_start_reservations+0x99/0xb9
[ 0.000000] [<ffffffff81678389>] x86_64_start_kernel+0xe0/0xf2

and

[ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 6000-7fff TRAMPOLINE
[ 0.000000]
[ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.32 #51
[ 0.000000] Call Trace:
[ 0.000000] [<ffffffff813c709b>] panic+0xa0/0x16a
[ 0.000000] [<ffffffff8167d280>] ? reserve_early_overlap_ok+0x2e/0x39
[ 0.000000] [<ffffffff8167d24a>] ? drop_overlaps_that_are_ok+0x117/0x11f
[ 0.000000] [<ffffffff8167d133>] drop_overlaps_that_are_ok+0x0/0x11f
[ 0.000000] [<ffffffff8167d2c6>] reserve_early+0x3b/0x3d
[ 0.000000] [<ffffffff81683248>] smp_scan_config+0xe7/0x10e
[ 0.000000] [<ffffffff816832a9>] default_find_smp_config+0x3a/0x60
[ 0.000000] [<ffffffff8167b0be>] setup_arch+0x6ae/0xa88
[ 0.000000] [<ffffffff816789ff>] start_kernel+0x82/0x412
[ 0.000000] [<ffffffff81678289>] x86_64_start_reservations+0x99/0xb9
[ 0.000000] [<ffffffff81678389>] x86_64_start_kernel+0xe0/0xf2

it turns out BIOS is using first 64k for mptable without reserve it.

so try to find good range for it instead of hard code it.
in case some bios try to use that range for sth.

Reported-by: Jens Axboe <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>

---
arch/x86/include/asm/trampoline.h | 1 -
arch/x86/kernel/e820.c | 11 ++++++++++-
arch/x86/kernel/head32.c | 2 --
arch/x86/kernel/head64.c | 2 --
arch/x86/kernel/mpparse.c | 3 ---
arch/x86/kernel/setup.c | 13 ++++++++-----
arch/x86/kernel/trampoline.c | 20 +++++++++-----------
7 files changed, 27 insertions(+), 25 deletions(-)

Index: linux-2.6/arch/x86/include/asm/trampoline.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/trampoline.h
+++ linux-2.6/arch/x86/include/asm/trampoline.h
@@ -16,7 +16,6 @@ extern unsigned long initial_code;
extern unsigned long initial_gs;

#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
-#define TRAMPOLINE_BASE 0x6000

extern unsigned long setup_trampoline(void);
extern void __init reserve_trampoline_memory(void);
Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -732,7 +732,16 @@ struct early_res {
char overlap_ok;
};
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
- { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
+ { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
+#ifdef CONFIG_X86_32
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ { PAGE_SIZE, PAGE_SIZE, "EX TRAMPOLINE", 1 },
+#endif
+
{}
};

Index: linux-2.6/arch/x86/kernel/head32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head32.c
+++ linux-2.6/arch/x86/kernel/head32.c
@@ -29,8 +29,6 @@ static void __init i386_default_early_se

void __init i386_start_kernel(void)
{
- reserve_trampoline_memory();
-
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");

#ifdef CONFIG_BLK_DEV_INITRD
Index: linux-2.6/arch/x86/kernel/head64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head64.c
+++ linux-2.6/arch/x86/kernel/head64.c
@@ -98,8 +98,6 @@ void __init x86_64_start_reservations(ch
{
copy_bootdata(__va(real_mode_data));

- reserve_trampoline_memory();
-
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");

#ifdef CONFIG_BLK_DEV_INITRD
Index: linux-2.6/arch/x86/kernel/mpparse.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/mpparse.c
+++ linux-2.6/arch/x86/kernel/mpparse.c
@@ -945,9 +945,6 @@ void __init early_reserve_e820_mpc_new(v
{
if (enable_update_mptable && alloc_mptable) {
u64 startt = 0;
-#ifdef CONFIG_X86_TRAMPOLINE
- startt = TRAMPOLINE_BASE;
-#endif
mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
}
}
Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -73,6 +73,7 @@

#include <asm/mtrr.h>
#include <asm/apic.h>
+#include <asm/trampoline.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@@ -875,6 +876,13 @@ void __init setup_arch(char **cmdline_p)

reserve_brk();

+ /*
+ * Find and reserve possible boot-time SMP configuration:
+ */
+ find_smp_config();
+
+ reserve_trampoline_memory();
+
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
@@ -921,11 +929,6 @@ void __init setup_arch(char **cmdline_p)

early_acpi_boot_init();

- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
-
#ifdef CONFIG_ACPI_NUMA
/*
* Parse SRAT to discover nodes.
Index: linux-2.6/arch/x86/kernel/trampoline.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/trampoline.c
+++ linux-2.6/arch/x86/kernel/trampoline.c
@@ -12,21 +12,19 @@
#endif

/* ready for x86_64 and x86 */
-unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE);
+unsigned char *__trampinitdata trampoline_base;

void __init reserve_trampoline_memory(void)
{
-#ifdef CONFIG_X86_32
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
-#endif
+ unsigned long mem;
+
/* Has to be in very low memory so we can execute real-mode AP code. */
- reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE,
- "TRAMPOLINE");
+ mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE);
+ if (mem == -1L)
+ panic("Cannot allocate trampoline\n");
+
+ trampoline_base = __va(mem);
+ reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE");
}

/*

2009-12-10 21:15:38

by Jens Axboe

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

On Thu, Dec 10 2009, Yinghai Lu wrote:
> Jens Axboe wrote:
>
> > I can test other patches if you have good ideas, otherwise I suggest we
> > revert the commit.
>
> we should go further.
>
> please check, esp should fix that from kernel acpi=off to kexec
> second kernel with acpi=off for your system.

OK, I'll try that. The acpi=off I did was kexec booting into that
kernel, but the current kernel was not kexec'ed. It'll be tomorrow
though, system is off for today.

> [PATCH] x86: use find_e820 instead of hard code trampoline addr

I'll give it a go, thanks!

--
Jens Axboe

2009-12-10 22:40:28

by H. Peter Anvin

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

On 12/10/2009 01:07 PM, Yinghai Lu wrote:
>
> it turns out BIOS is using first 64k for mptable without reserve it.
>
> so try to find good range for it instead of hard code it.
> in case some bios try to use that range for sth.
>

Is this an EFI BIOS? Otherwise, the first 64K would be clobbered anyway
by the boot loader, so it is invalid anyway... If it is EFI, it is
theoretically possible.

-hpa

2009-12-10 22:45:53

by H. Peter Anvin

[permalink] [raw]
Subject: Re: Bisected regression

On 12/10/2009 11:50 AM, Yinghai Lu wrote:
> Roland Dreier wrote:
>> > that BIOS is using first 64k for mptable. and after booting those
>> > mptable is overwritten by trampoline.
>>
>> So fix that BIOS people should do is to move the mptable above 64K and
>> leave low 64K empty?
>
> Yes. should be below and near 1M, and use e820 to reserve it.
>

Keep in mind BIOS loads and runs the OS bootloader at 0x7c00. If this
is a conventional BIOS, there is absolutely no way such a table is
intact by the time the OS runs.

The BIOS should, indeed put it either in the UMA (0xc800..0xffff) or at
the top of DOS memory (just below 640K) and reserve it in e820.
Anything else is irreparably broken.

-hpa

2009-12-10 22:46:20

by Yinghai Lu

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

H. Peter Anvin wrote:
> On 12/10/2009 01:07 PM, Yinghai Lu wrote:
>> it turns out BIOS is using first 64k for mptable without reserve it.
>>
>> so try to find good range for it instead of hard code it.
>> in case some bios try to use that range for sth.
>>
>
> Is this an EFI BIOS? Otherwise, the first 64K would be clobbered anyway
> by the boot loader, so it is invalid anyway... If it is EFI, it is
> theoretically possible.

Jens said acpi=off works with 2.6.32.

can you specify the exact byte position that boot loader could touched?

YH

2009-12-10 22:47:30

by Roland Dreier

[permalink] [raw]
Subject: Re: Bisected regression


> Is this an EFI BIOS? Otherwise, the first 64K would be clobbered anyway
> by the boot loader, so it is invalid anyway... If it is EFI, it is
> theoretically possible.

Yes, EFI BIOS on an pre-release NHM EX box.

- R.

2009-12-10 22:51:07

by H. Peter Anvin

[permalink] [raw]
Subject: Re: Bisected regression

On 12/10/2009 02:47 PM, Roland Dreier wrote:
>
> > Is this an EFI BIOS? Otherwise, the first 64K would be clobbered anyway
> > by the boot loader, so it is invalid anyway... If it is EFI, it is
> > theoretically possible.
>
> Yes, EFI BIOS on an pre-release NHM EX box.
>

OK, for EFI (with no legacy BIOS support) then it is legal -- still a
bad choice.

-hpa

2009-12-10 22:55:21

by H. Peter Anvin

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

On 12/10/2009 02:45 PM, Yinghai Lu wrote:
> H. Peter Anvin wrote:
>> On 12/10/2009 01:07 PM, Yinghai Lu wrote:
>>> it turns out BIOS is using first 64k for mptable without reserve it.
>>>
>>> so try to find good range for it instead of hard code it.
>>> in case some bios try to use that range for sth.
>>>
>>
>> Is this an EFI BIOS? Otherwise, the first 64K would be clobbered anyway
>> by the boot loader, so it is invalid anyway... If it is EFI, it is
>> theoretically possible.
>
> Jens said acpi=off works with 2.6.32.
>
> can you specify the exact byte position that boot loader could touched?
>

A legacy-BIOS boot loader will typically clobber all memory from 0x600
(in theory 0x501) up to the FBM point; a pointer to the FBM mark is
given by (*(uint16_t *)0x413) << 10.

-hpa

2009-12-11 08:19:03

by Jens Axboe

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)

On Thu, Dec 10 2009, Yinghai Lu wrote:
> Jens Axboe wrote:
>
> > I can test other patches if you have good ideas, otherwise I suggest we
> > revert the commit.
>
> we should go further.
>
> please check, esp should fix that from kernel acpi=off to kexec
> second kernel with acpi=off for your system.
>
> [PATCH] x86: use find_e820 instead of hard code trampoline addr

-git with this patch boots properly. kexec'ing into a kernel with
acpi=off passed works. Then kexec'ing into the same kernel again with
acpi-off works too.

So as far as I'm concerned, this patch fixes it for me. You can add my
Tested-by, too. Thanks!

--
Jens Axboe

2009-12-11 08:32:22

by Ingo Molnar

[permalink] [raw]
Subject: Re: Bisected regression (Was Re: current -git fails to boot on nehalem-ex)


* Jens Axboe <[email protected]> wrote:

> On Thu, Dec 10 2009, Yinghai Lu wrote:
> > Jens Axboe wrote:
> >
> > > I can test other patches if you have good ideas, otherwise I suggest we
> > > revert the commit.
> >
> > we should go further.
> >
> > please check, esp should fix that from kernel acpi=off to kexec
> > second kernel with acpi=off for your system.
> >
> > [PATCH] x86: use find_e820 instead of hard code trampoline addr
>
> -git with this patch boots properly. kexec'ing into a kernel with
> acpi=off passed works. Then kexec'ing into the same kernel again with
> acpi-off works too.
>
> So as far as I'm concerned, this patch fixes it for me. You can add my
> Tested-by, too. Thanks!

Thanks Jens and Yinghai, i've queued up the fix.

Ingo

2009-12-11 08:56:48

by Yinghai Lu

[permalink] [raw]
Subject: [tip:x86/urgent] x86: Use find_e820() instead of hard coded trampoline address

Commit-ID: 893f38d144a4d96d2483cd7c3801d26e1b2c23e9
Gitweb: http://git.kernel.org/tip/893f38d144a4d96d2483cd7c3801d26e1b2c23e9
Author: Yinghai Lu <[email protected]>
AuthorDate: Thu, 10 Dec 2009 13:07:22 -0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 11 Dec 2009 09:28:22 +0100

x86: Use find_e820() instead of hard coded trampoline address

Jens found the following crash/regression:

[ 0.000000] found SMP MP-table at [ffff8800000fdd80] fdd80
[ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 0-fff BIOS data page

and

[ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 6000-7fff TRAMPOLINE

and bisected it to b24c2a9 ("x86: Move find_smp_config()
earlier and avoid bootmem usage").

It turns out the BIOS is using the first 64k for mptable,
without reserving it.

So try to find good range for the real-mode trampoline instead of
hard coding it, in case some bios tries to use that range for sth.

Reported-by: Jens Axboe <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
Tested-by: Jens Axboe <[email protected]>
Cc: Randy Dunlap <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/trampoline.h | 1 -
arch/x86/kernel/e820.c | 11 ++++++++++-
arch/x86/kernel/head32.c | 2 --
arch/x86/kernel/head64.c | 2 --
arch/x86/kernel/mpparse.c | 3 ---
arch/x86/kernel/setup.c | 13 ++++++++-----
arch/x86/kernel/trampoline.c | 20 +++++++++-----------
7 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index 90f06c2..cb507bb 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -16,7 +16,6 @@ extern unsigned long initial_code;
extern unsigned long initial_gs;

#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
-#define TRAMPOLINE_BASE 0x6000

extern unsigned long setup_trampoline(void);
extern void __init reserve_trampoline_memory(void);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d17d482..f50447d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -732,7 +732,16 @@ struct early_res {
char overlap_ok;
};
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
- { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
+ { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
+#ifdef CONFIG_X86_32
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ { PAGE_SIZE, PAGE_SIZE, "EX TRAMPOLINE", 1 },
+#endif
+
{}
};

diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 4f8e250..5051b94 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,8 +29,6 @@ static void __init i386_default_early_setup(void)

void __init i386_start_kernel(void)
{
- reserve_trampoline_memory();
-
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");

#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0b06cd7..b5a9896 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -98,8 +98,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
{
copy_bootdata(__va(real_mode_data));

- reserve_trampoline_memory();
-
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");

#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 35a57c9..40b54ce 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -945,9 +945,6 @@ void __init early_reserve_e820_mpc_new(void)
{
if (enable_update_mptable && alloc_mptable) {
u64 startt = 0;
-#ifdef CONFIG_X86_TRAMPOLINE
- startt = TRAMPOLINE_BASE;
-#endif
mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
}
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 946a311..f7b8b98 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -73,6 +73,7 @@

#include <asm/mtrr.h>
#include <asm/apic.h>
+#include <asm/trampoline.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@@ -875,6 +876,13 @@ void __init setup_arch(char **cmdline_p)

reserve_brk();

+ /*
+ * Find and reserve possible boot-time SMP configuration:
+ */
+ find_smp_config();
+
+ reserve_trampoline_memory();
+
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
@@ -921,11 +929,6 @@ void __init setup_arch(char **cmdline_p)

early_acpi_boot_init();

- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
-
#ifdef CONFIG_ACPI_NUMA
/*
* Parse SRAT to discover nodes.
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index cd02212..c652ef6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -12,21 +12,19 @@
#endif

/* ready for x86_64 and x86 */
-unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE);
+unsigned char *__trampinitdata trampoline_base;

void __init reserve_trampoline_memory(void)
{
-#ifdef CONFIG_X86_32
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
-#endif
+ unsigned long mem;
+
/* Has to be in very low memory so we can execute real-mode AP code. */
- reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE,
- "TRAMPOLINE");
+ mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE);
+ if (mem == -1L)
+ panic("Cannot allocate trampoline\n");
+
+ trampoline_base = __va(mem);
+ reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE");
}

/*