Hi,
this is something we're getting during testing on one of our boxes here,
a dual socket Magny-Cours machine. It is oopsing on the addr variable in
__alloc_memory_core_early() after converting it to a virtual address in
order to clear the mem region at it. I've removed the "#if 0.. #endif"
around the printk which dumps the ranges in the early node map for more
info.
It's latest -git, 32bit build, config is attached.
Here's the whole console output:
[ 0.000000] Initializing cgroup subsys cpuset
[ 0.000000] Initializing cgroup subsys cpu
[ 0.000000] Linux version 2.6.35-rc4+ (root@hernando) (gcc version 4.1.2 20070115 (SUSE Linux)) #2 SMP Fri Jul 9 16:20:13 CEST 2010
[ 0.000000] BIOS-provided physical RAM map:
[ 0.000000] BIOS-e820: 0000000000000000 - 0000000000088c00 (usable)
[ 0.000000] BIOS-e820: 0000000000088c00 - 00000000000a0000 (reserved)
[ 0.000000] BIOS-e820: 00000000000cc000 - 0000000000100000 (reserved)
[ 0.000000] BIOS-e820: 0000000000100000 - 00000000c7e60000 (usable)
[ 0.000000] BIOS-e820: 00000000c7e60000 - 00000000c7e6b000 (ACPI data)
[ 0.000000] BIOS-e820: 00000000c7e6b000 - 00000000c7e6d000 (ACPI NVS)
[ 0.000000] BIOS-e820: 00000000c7e6d000 - 00000000c7eff000 (reserved)
[ 0.000000] BIOS-e820: 00000000c7f00000 - 00000000c8000000 (reserved)
[ 0.000000] BIOS-e820: 00000000e0000000 - 00000000f0000000 (reserved)
[ 0.000000] BIOS-e820: 00000000fec00000 - 00000000fec10000 (reserved)
[ 0.000000] BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
[ 0.000000] BIOS-e820: 00000000fff00000 - 0000000100000000 (reserved)
[ 0.000000] BIOS-e820: 0000000100000000 - 0000000238000000 (usable)
[ 0.000000] bootconsole [earlyser0] enabled
[ 0.000000] NX (Execute Disable) protection: active
[ 0.000000] DMI present.
[ 0.000000] Phoenix BIOS detected: BIOS may corrupt low RAM, working around it.
[ 0.000000] last_pfn = 0x238000 max_arch_pfn = 0x1000000
[ 0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[ 0.000000] Scanning 0 areas for low memory corruption
[ 0.000000] modified physical RAM map:
[ 0.000000] modified: 0000000000000000 - 0000000000010000 (reserved)
[ 0.000000] modified: 0000000000010000 - 0000000000088c00 (usable)
[ 0.000000] modified: 0000000000088c00 - 00000000000a0000 (reserved)
[ 0.000000] modified: 00000000000cc000 - 0000000000100000 (reserved)
[ 0.000000] modified: 0000000000100000 - 00000000c7e60000 (usable)
[ 0.000000] modified: 00000000c7e60000 - 00000000c7e6b000 (ACPI data)
[ 0.000000] modified: 00000000c7e6b000 - 00000000c7e6d000 (ACPI NVS)
[ 0.000000] modified: 00000000c7e6d000 - 00000000c7eff000 (reserved)
[ 0.000000] modified: 00000000c7f00000 - 00000000c8000000 (reserved)
[ 0.000000] modified: 00000000e0000000 - 00000000f0000000 (reserved)
[ 0.000000] modified: 00000000fec00000 - 00000000fec10000 (reserved)
[ 0.000000] modified: 00000000fee00000 - 00000000fee01000 (reserved)
[ 0.000000] modified: 00000000fff00000 - 0000000100000000 (reserved)
[ 0.000000] modified: 0000000100000000 - 0000000238000000 (usable)
[ 0.000000] found SMP MP-table at [c00f72a0] f72a0
[ 0.000000] init_memory_mapping: 0000000000000000-00000000375fe000
[ 0.000000] ACPI: RSDP 000f7220 00024 (v02 PTLTD )
[ 0.000000] ACPI: XSDT c7e60915 0009C (v01 PTLTD ? XSDT 06040000 LTP 00000000)
[ 0.000000] ACPI: FACP c7e66d6a 000F4 (v03 AMD Dinar 06040000 AMD 000F4240)
[ 0.000000] ACPI: DSDT c7e609b1 063B9 (v02 AMD SB700 06040000 MSFT 03000000)
[ 0.000000] ACPI: FACS c7e6cfc0 00040
[ 0.000000] ACPI: TCPA c7e66ed2 00032 (v02 AMD 06040000 PTEC 00000000)
[ 0.000000] ACPI: IVRS c7e66f04 000A8 (v01 AMD RD890S 06040000 AMD 00000000)
[ 0.000000] ACPI: EINJ c7e66fac 001B0 (v01 PTL WHEAPTL 06040000 PTL 00000001)
[ 0.000000] ACPI: HEST c7e6715c 002AC (v01 PTL WHEAPTL 06040000 PTL 00000001)
[ 0.000000] ACPI: BERT c7e67408 00030 (v01 PTL WHEAPTL 06040000 PTL 00000001)
[ 0.000000] ACPI: SSDT c7e67438 000E1 (v01 wheaos wheaosc 06040000 INTL 20050624)
[ 0.000000] ACPI: ERST c7e67519 00270 (v01 PTL WHEAPTL 06040000 PTL 00000001)
[ 0.000000] ACPI: SLIT c7e67789 0003C (v01 AMD F10 06040000 AMD 00000001)
[ 0.000000] ACPI: SRAT c7e677c5 002A0 (v02 AMD F10 06040000 AMD 00000001)
[ 0.000000] ACPI: SSDT c7e67a65 03294 (v01 AMD POWERNOW 06040000 AMD 00000001)
[ 0.000000] ACPI: SSDT c7e6acf9 000F5 (v01 AMD-K8 AMD-ACPI 06040000 AMD 00000001)
[ 0.000000] ACPI: APIC c7e6adee 0019E (v01 PTLTD ? APIC 06040000 LTP 00000000)
[ 0.000000] ACPI: MCFG c7e6af8c 0003C (v01 PTLTD MCFG 06040000 LTP 00000000)
[ 0.000000] ACPI: HPET c7e6afc8 00038 (v01 PTLTD HPETTBL 06040000 LTP 00000001)
[ 0.000000] Reserving total of 4c00 pages for numa KVA remap
[ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
[ 0.000000] max_pfn = 238000
[ 0.000000] 8202MB HIGHMEM available.
[ 0.000000] 885MB LOWMEM available.
[ 0.000000] mapped low ram: 0 - 375fe000
[ 0.000000] low ram: 0 - 375fe000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
[ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
[ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
[ 0.000000] BUG: unable to handle kernel paging request at 40000000
[ 0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
[ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
[ 0.000000] Oops: 0002 [#1] SMP
[ 0.000000] last sysfs file:
[ 0.000000] Modules linked in:
[ 0.000000]
[ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.35-rc4+ #2 Dinar/Dinar
[ 0.000000] EIP: 0060:[<c2c8cff1>] EFLAGS: 00010046 CPU: 0
[ 0.000000] EIP is at __alloc_memory_core_early+0x147/0x1d6
[ 0.000000] EAX: 00000000 EBX: 80000000 ECX: 00000240 EDX: 00000000
[ 0.000000] ESI: 00000000 EDI: 40000000 EBP: c2837e7c ESP: c2837e10
[ 0.000000] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
[ 0.000000] Process swapper (pid: 0, ti=c2836000 task=c2845840 task.ti=c2836000)
[ 0.000000] Stack:
[ 0.000000] c272e3ce 00000001 80000000 00000000 c7e60000 00000000 01000000 00000000
[ 0.000000] <0> ffffffff ffffffff 00000240 00000000 00000040 00000000 80000000 00000000
[ 0.000000] <0> 00000240 00000000 00000001 40000000 80000000 00000000 c7e60000 00000000
[ 0.000000] Call Trace:
[ 0.000000] [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
[ 0.000000] [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
[ 0.000000] [<c2c9149e>] ? sparse_init+0x1dc/0x499
[ 0.000000] [<c2c79118>] ? paging_init+0x168/0x1df
[ 0.000000] [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb
[ 0.000000] [<c2c55b0b>] ? setup_arch+0x1826/0x1a38
[ 0.000000] [<c2c822be>] ? __reserve_early+0x17e/0x19a
[ 0.000000] [<c112cea4>] ? init_cgroup_css+0xd5/0x110
[ 0.000000] [<c2c4c50e>] ? start_kernel+0x1d1/0xc42
[ 0.000000] [<c2c4b1d9>] ? i386_start_kernel+0x1d9/0x1ef
[ 0.000000] Code: 15 c4 52 ec c2 00 83 05 08 7e ec c2 01 83 15 0c 7e ec c2 00 fc 89 4d e0 8b 4d d4 83 05 c8 52 ec c2 01 83 15 cc 52 ec c2 00 31 c0 <f3> aa 89 d8 83 05 d0 52 ec c2 01 83 15 d4 52 ec c2 00 83 c4 40
[ 0.000000] EIP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6 SS:ESP 0068:c2837e10
[ 0.000000] CR2: 0000000040000000
[ 0.000000] ---[ end trace 4eaa2a86a8e2da22 ]---
[ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
[ 0.000000] Pid: 0, comm: swapper Tainted: G D 2.6.35-rc4+ #2
[ 0.000000] Call Trace:
[ 0.000000] [<c109e39b>] panic+0xf0/0x2a4
[ 0.000000] [<c10a7182>] do_exit+0x1c8/0x168b
[ 0.000000] [<c109ebce>] ? spin_unlock_irqrestore+0x1b/0x2b
[ 0.000000] [<c10a232d>] ? kmsg_dump+0x380/0x396
[ 0.000000] [<c224ee14>] oops_end+0x1fb/0x211
[ 0.000000] [<c105d1f1>] no_context+0x48a/0x4a2
[ 0.000000] [<c109f655>] ? release_console_sem+0x543/0x56e
[ 0.000000] [<c105d6c4>] __bad_area_nosemaphore+0x2b7/0x2cd
[ 0.000000] [<c105d7c2>] bad_area_nosemaphore+0x20/0x31
[ 0.000000] [<c22540d1>] do_page_fault+0x60d/0xe1b
[ 0.000000] [<c2253ac4>] ? do_page_fault+0x0/0xe1b
[ 0.000000] [<c224d38b>] error_code+0x6b/0x70
[ 0.000000] [<c2253ac4>] ? do_page_fault+0x0/0xe1b
[ 0.000000] [<c2c8cff1>] ? __alloc_memory_core_early+0x147/0x1d6
[ 0.000000] [<c2c8b4f8>] __alloc_bootmem_node+0x216/0x22f
[ 0.000000] [<c2c90c9b>] sparse_early_usemaps_alloc_node+0x5a/0x10b
[ 0.000000] [<c2c9149e>] sparse_init+0x1dc/0x499
[ 0.000000] [<c2c79118>] paging_init+0x168/0x1df
[ 0.000000] [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb
[ 0.000000] [<c2c55b0b>] setup_arch+0x1826/0x1a38
[ 0.000000] [<c2c822be>] ? __reserve_early+0x17e/0x19a
[ 0.000000] [<c112cea4>] ? init_cgroup_css+0xd5/0x110
[ 0.000000] [<c2c4c50e>] start_kernel+0x1d1/0xc42
[ 0.000000] [<c2c4b1d9>] i386_start_kernel+0x1d9/0x1ef
--
Regards/Gruss,
Boris.
Operating Systems Research Center
Advanced Micro Devices, Inc.
On 07/09/2010 07:54 AM, Borislav Petkov wrote:
> Hi,
>
> this is something we're getting during testing on one of our boxes here,
> a dual socket Magny-Cours machine. It is oopsing on the addr variable in
> __alloc_memory_core_early() after converting it to a virtual address in
> order to clear the mem region at it. I've removed the "#if 0.. #endif"
> around the printk which dumps the ranges in the early node map for more
> info.
>
> It's latest -git, 32bit build, config is attached.
>
can not duplicate on all my setup.
but look at the code there is limit problem with 32 bit...
please check if it will fix the problem.
and we will need to rebase early_res to memblock patchset.
Thanks
Yinghai Lu
[PATCH] x86,mm: fix 32bit numa sparse vmemmp
Borislav Petkov <[email protected]> reported his 32bit numa has problem:
[ 0.000000] Reserving total of 4c00 pages for numa KVA remap
[ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
[ 0.000000] max_pfn = 238000
[ 0.000000] 8202MB HIGHMEM available.
[ 0.000000] 885MB LOWMEM available.
[ 0.000000] mapped low ram: 0 - 375fe000
[ 0.000000] low ram: 0 - 375fe000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
[ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
[ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
[ 0.000000] BUG: unable to handle kernel paging request at 40000000
[ 0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
[ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
...
[ 0.000000] Call Trace:
[ 0.000000] [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
[ 0.000000] [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
[ 0.000000] [<c2c9149e>] ? sparse_init+0x1dc/0x499
[ 0.000000] [<c2c79118>] ? paging_init+0x168/0x1df
[ 0.000000] [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb
looks like it allocate much high address for bootmem.
try to cut limit with get_max_mapped()
Reported-by: Borislav Petkov <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
---
mm/page_alloc.c | 3 +++
mm/sparse.c | 5 ++++-
2 files changed, 7 insertions(+), 1 deletion(-)
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(
int i;
void *ptr;
+ if (limit > get_max_mapped())
+ limit = get_max_mapped();
+
/* need to go over early_node_map to find out good range for node */
for_each_active_range_index_in_nid(i, nid) {
u64 addr;
Index: linux-2.6/mm/sparse.c
===================================================================
--- linux-2.6.orig/mm/sparse.c
+++ linux-2.6/mm/sparse.c
@@ -363,7 +363,10 @@ static void __init sparse_early_usemaps_
return;
}
- usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count);
+ usemap = __alloc_bootmem_node_nopanic(NODE_DATA(nodeid),
+ size * usemap_count,
+ SMP_CACHE_BYTES,
+ __pa(MAX_DMA_ADDRESS));
if (usemap) {
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
if (!present_section_nr(pnum))
Hi,
On Fri, 9 Jul 2010 15:08:52 -0400
Yinghai Lu <[email protected]> wrote:
>On 07/09/2010 07:54 AM, Borislav Petkov wrote:
>> Hi,
>>
>> this is something we're getting during testing on one of our boxes
>> here, a dual socket Magny-Cours machine. It is oopsing on the addr
>> variable in __alloc_memory_core_early() after converting it to a
>> virtual address in order to clear the mem region at it. I've removed
>> the "#if 0.. #endif" around the printk which dumps the ranges in the
>> early node map for more info.
>>
>> It's latest -git, 32bit build, config is attached.
>>
>
>can not duplicate on all my setup.
>
>but look at the code there is limit problem with 32 bit...
>
>please check if it will fix the problem.
>
>and we will need to rebase early_res to memblock patchset.
>
>Thanks
>
>Yinghai Lu
>
I'm sorry but the patch isn't working.
Now we are seeing this:
[ 0.000000] BUG: Int 6: CR2 (null)
[ 0.000000] EDI 00000005 ESI f3e00800 EBP c21afed4 ESP c21afe90
[ 0.000000] EBX 01000000 EDX 00000006 ECX (null) EAX fffffff4
[ 0.000000] err (null) EIP c2359558 CS 00000060 flg 00010046
[ 0.000000] Stack: fffffff4 (null) 00000001 f3e00000 00000005 00080000 00000001 00000002
[ 0.000000] 000b6800 00000002 f3e00000 00000001 00000002 c21aff0c c2359dd2 (null)
[ 0.000000] ffe00000 00000004 (null) 00000004 001fafd8 c2943000 (null) 00004e00
[ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.35-rc4+ #1
[ 0.000000] Call Trace:
[ 0.000000] [<c1dd39d9>] hlt_loop+0x0/0x3
[ 0.000000] [<c2359558>] ? free_area_init_node+0x5b1/0x647
[ 0.000000] [<c2359dd2>] free_area_init_nodes+0x7b5/0x89a
[ 0.000000] [<c234a2c9>] paging_init+0x1a9/0x1cc
[ 0.000000] [<c2330325>] setup_arch+0x1414/0x15f7
[ 0.000000] [<c23510c1>] ? __reserve_early+0x119/0x135
[ 0.000000] [<c1078c8a>] ? printk+0x36/0x54
[ 0.000000] [<c232917b>] start_kernel+0x1d0/0xb6f
[ 0.000000] [<c2328163>] i386_start_kernel+0x163/0x179
##
##################################################################
# Email : [email protected] GnuPG-Key : 0xA6AB055D #
# Fingerprint: 17C4 5DB2 7C4C C1C7 1452 8148 F139 7C09 A6AB 055D #
##################################################################
# Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach #
# General Managers: Alberto Bozzo, Andrew Bowd #
# Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen #
# HRB Nr. 43632 #
##################################################################
On 07/09/2010 12:52 PM, Conny Seidel wrote:
> Hi,
>
> On Fri, 9 Jul 2010 15:08:52 -0400
> Yinghai Lu <[email protected]> wrote:
>
>> On 07/09/2010 07:54 AM, Borislav Petkov wrote:
>>> Hi,
>>>
>>> this is something we're getting during testing on one of our boxes
>>> here, a dual socket Magny-Cours machine. It is oopsing on the addr
>>> variable in __alloc_memory_core_early() after converting it to a
>>> virtual address in order to clear the mem region at it. I've removed
>>> the "#if 0.. #endif" around the printk which dumps the ranges in the
>>> early node map for more info.
>>>
>>> It's latest -git, 32bit build, config is attached.
>>>
>>
>> can not duplicate on all my setup.
>>
>> but look at the code there is limit problem with 32 bit...
>>
>> please check if it will fix the problem.
>>
>> and we will need to rebase early_res to memblock patchset.
>>
>> Thanks
>>
>> Yinghai Lu
>>
>
> I'm sorry but the patch isn't working.
>
> Now we are seeing this:
>
> [ 0.000000] BUG: Int 6: CR2 (null)
> [ 0.000000] EDI 00000005 ESI f3e00800 EBP c21afed4 ESP c21afe90
> [ 0.000000] EBX 01000000 EDX 00000006 ECX (null) EAX fffffff4
> [ 0.000000] err (null) EIP c2359558 CS 00000060 flg 00010046
> [ 0.000000] Stack: fffffff4 (null) 00000001 f3e00000 00000005 00080000 00000001 00000002
> [ 0.000000] 000b6800 00000002 f3e00000 00000001 00000002 c21aff0c c2359dd2 (null)
> [ 0.000000] ffe00000 00000004 (null) 00000004 001fafd8 c2943000 (null) 00004e00
> [ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.35-rc4+ #1
> [ 0.000000] Call Trace:
> [ 0.000000] [<c1dd39d9>] hlt_loop+0x0/0x3
> [ 0.000000] [<c2359558>] ? free_area_init_node+0x5b1/0x647
> [ 0.000000] [<c2359dd2>] free_area_init_nodes+0x7b5/0x89a
> [ 0.000000] [<c234a2c9>] paging_init+0x1a9/0x1cc
> [ 0.000000] [<c2330325>] setup_arch+0x1414/0x15f7
> [ 0.000000] [<c23510c1>] ? __reserve_early+0x119/0x135
> [ 0.000000] [<c1078c8a>] ? printk+0x36/0x54
> [ 0.000000] [<c232917b>] start_kernel+0x1d0/0xb6f
> [ 0.000000] [<c2328163>] i386_start_kernel+0x163/0x179
can you send out boot log from start?
YH
On Fri, 9 Jul 2010 16:18:39 -0400
Yinghai Lu <[email protected]> wrote:
>On 07/09/2010 12:52 PM, Conny Seidel wrote:
>> Hi,
>>
>> On Fri, 9 Jul 2010 15:08:52 -0400
>> Yinghai Lu <[email protected]> wrote:
>>
>>> On 07/09/2010 07:54 AM, Borislav Petkov wrote:
>>>> Hi,
>>>>
>>>> this is something we're getting during testing on one of our boxes
>>>> here, a dual socket Magny-Cours machine. It is oopsing on the addr
>>>> variable in __alloc_memory_core_early() after converting it to a
>>>> virtual address in order to clear the mem region at it. I've
>>>> removed the "#if 0.. #endif" around the printk which dumps the
>>>> ranges in the early node map for more info.
>>>>
>>>> It's latest -git, 32bit build, config is attached.
>>>>
>>>
>>> can not duplicate on all my setup.
>>>
>>> but look at the code there is limit problem with 32 bit...
>>>
>>> please check if it will fix the problem.
>>>
>>> and we will need to rebase early_res to memblock patchset.
>>>
>>> Thanks
>>>
>>> Yinghai Lu
>>>
>>
>> I'm sorry but the patch isn't working.
<snip>
>can you send out boot log from start?
>
>YH
>
Sure. see the complete log in the attached file.
Conny
##
##################################################################
# Email : [email protected] GnuPG-Key : 0xA6AB055D #
# Fingerprint: 17C4 5DB2 7C4C C1C7 1452 8148 F139 7C09 A6AB 055D #
##################################################################
# Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach #
# General Managers: Alberto Bozzo, Andrew Bowd #
# Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen #
# HRB Nr. 43632 #
##################################################################
On 07/09/2010 01:42 PM, Conny Seidel wrote:
> sparse_early_mem_map_alloc: sparsemem memory map backing failed some memory will not be available.
please check
[PATCH -v2] x86,mm: fix 32bit numa sparsemem
Borislav Petkov <[email protected]> reported his 32bit numa has problem:
[ 0.000000] Reserving total of 4c00 pages for numa KVA remap
[ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
[ 0.000000] max_pfn = 238000
[ 0.000000] 8202MB HIGHMEM available.
[ 0.000000] 885MB LOWMEM available.
[ 0.000000] mapped low ram: 0 - 375fe000
[ 0.000000] low ram: 0 - 375fe000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
[ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
[ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
[ 0.000000] BUG: unable to handle kernel paging request at 40000000
[ 0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
[ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
...
[ 0.000000] Call Trace:
[ 0.000000] [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
[ 0.000000] [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
[ 0.000000] [<c2c9149e>] ? sparse_init+0x1dc/0x499
[ 0.000000] [<c2c79118>] ? paging_init+0x168/0x1df
[ 0.000000] [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb
looks like it allocate much high address for bootmem.
try to cut limit with get_max_mapped()
-v2: add fallback for sparse_mem_map_populate()
for "sparse_early_mem_map_alloc: sparsemem memory map backing failed some memory will not be available."
Reported-by: Borislav Petkov <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
---
mm/page_alloc.c | 3 +++
mm/sparse.c | 10 +++++++++-
2 files changed, 12 insertions(+), 1 deletion(-)
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(
int i;
void *ptr;
+ if (limit > get_max_mapped())
+ limit = get_max_mapped();
+
/* need to go over early_node_map to find out good range for node */
for_each_active_range_index_in_nid(i, nid) {
u64 addr;
Index: linux-2.6/mm/sparse.c
===================================================================
--- linux-2.6.orig/mm/sparse.c
+++ linux-2.6/mm/sparse.c
@@ -363,7 +363,10 @@ static void __init sparse_early_usemaps_
return;
}
- usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count);
+ usemap = __alloc_bootmem_node_nopanic(NODE_DATA(nodeid),
+ size * usemap_count,
+ SMP_CACHE_BYTES,
+ __pa(MAX_DMA_ADDRESS));
if (usemap) {
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
if (!present_section_nr(pnum))
@@ -391,6 +394,11 @@ struct page __init *sparse_mem_map_popul
size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
map = __alloc_bootmem_node_high(NODE_DATA(nid), size,
PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+
+ if (!map)
+ map = __alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
+ PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+
return map;
}
void __init sparse_mem_maps_populate_node(struct page **map_map,
On Fri, 9 Jul 2010 18:07:50 -0400
Yinghai Lu <[email protected]> wrote:
>On 07/09/2010 01:42 PM, Conny Seidel wrote:
>> sparse_early_mem_map_alloc: sparsemem memory map backing failed some
>> memory will not be available.
>
>please check
>
This still panics, see logfile for complete trace.
Conny
##
##################################################################
# Email : [email protected] GnuPG-Key : 0xA6AB055D #
# Fingerprint: 17C4 5DB2 7C4C C1C7 1452 8148 F139 7C09 A6AB 055D #
##################################################################
# Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach #
# General Managers: Alberto Bozzo, Andrew Bowd #
# Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen #
# HRB Nr. 43632 #
##################################################################
On 07/09/2010 04:51 PM, Conny Seidel wrote:
> On Fri, 9 Jul 2010 18:07:50 -0400
> Yinghai Lu <[email protected]> wrote:
>
>> On 07/09/2010 01:42 PM, Conny Seidel wrote:
>>> sparse_early_mem_map_alloc: sparsemem memory map backing failed some
>>> memory will not be available.
>>
>> please check
>>
> This still panics, see logfile for complete trace.
>
this one should fix the problem.
Thanks
Yinghai
[PATCH -v3] x86,nobootmem: make alloc_bootmem_node fall back to other node when 32bit numa are used
Borislav Petkov <[email protected]> reported his 32bit numa has problem:
[ 0.000000] Reserving total of 4c00 pages for numa KVA remap
[ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
[ 0.000000] max_pfn = 238000
[ 0.000000] 8202MB HIGHMEM available.
[ 0.000000] 885MB LOWMEM available.
[ 0.000000] mapped low ram: 0 - 375fe000
[ 0.000000] low ram: 0 - 375fe000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
[ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
[ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
[ 0.000000] BUG: unable to handle kernel paging request at 40000000
[ 0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
[ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
...
[ 0.000000] Call Trace:
[ 0.000000] [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
[ 0.000000] [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
[ 0.000000] [<c2c9149e>] ? sparse_init+0x1dc/0x499
[ 0.000000] [<c2c79118>] ? paging_init+0x168/0x1df
[ 0.000000] [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb
looks like it allocate much high address for bootmem.
try to cut limit with get_max_mapped()
-v3: make alloc_bootmem_node could fallback to other node.
just like old alloc_bootmem_node did
need this patch for 2.6.34 and 2.6.35
Reported-by: Borislav Petkov <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
Cc: [email protected]
---
mm/bootmem.c | 24 ++++++++++++++++++++----
mm/page_alloc.c | 3 +++
2 files changed, 23 insertions(+), 4 deletions(-)
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(
int i;
void *ptr;
+ if (limit > get_max_mapped())
+ limit = get_max_mapped();
+
/* need to go over early_node_map to find out good range for node */
for_each_active_range_index_in_nid(i, nid) {
u64 addr;
Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_no
void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+ if (ptr)
+ return ptr;
+
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, -1ULL);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
#endif
+
+ return ptr;
}
void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsign
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+ if (ptr)
+ return ptr;
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align,
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#endif
+ return ptr;
}
On Fri, 9 Jul 2010 21:03:42 -0400
Yinghai Lu <[email protected]> wrote:
>On 07/09/2010 04:51 PM, Conny Seidel wrote:
>> On Fri, 9 Jul 2010 18:07:50 -0400
>> Yinghai Lu <[email protected]> wrote:
>>
>>> On 07/09/2010 01:42 PM, Conny Seidel wrote:
>>>> sparse_early_mem_map_alloc: sparsemem memory map backing failed some
>>>> memory will not be available.
>>>
>>> please check
>>>
>> This still panics, see logfile for complete trace.
>>
>
>this one should fix the problem.
>
>Thanks
>
>Yinghai
Yes, Patch -v3 fixes the issue, thanks.
>[PATCH -v3] x86,nobootmem: make alloc_bootmem_node fall back to other node when 32bit numa are used
>
>Borislav Petkov <[email protected]> reported his 32bit numa has problem:
>
>[ 0.000000] Reserving total of 4c00 pages for numa KVA remap
>[ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
>[ 0.000000] max_pfn = 238000
>[ 0.000000] 8202MB HIGHMEM available.
>[ 0.000000] 885MB LOWMEM available.
>[ 0.000000] mapped low ram: 0 - 375fe000
>[ 0.000000] low ram: 0 - 375fe000
>[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
>[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
>[ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
>[ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
>[ 0.000000] BUG: unable to handle kernel paging request at 40000000
>[ 0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
>[ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
>...
>[ 0.000000] Call Trace:
>[ 0.000000] [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
>[ 0.000000] [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
>[ 0.000000] [<c2c9149e>] ? sparse_init+0x1dc/0x499
>[ 0.000000] [<c2c79118>] ? paging_init+0x168/0x1df
>[ 0.000000] [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb
>
>looks like it allocate much high address for bootmem.
>
>try to cut limit with get_max_mapped()
>
>-v3: make alloc_bootmem_node could fallback to other node.
> just like old alloc_bootmem_node did
>
>need this patch for 2.6.34 and 2.6.35
>
>Reported-by: Borislav Petkov <[email protected]>
>Signed-off-by: Yinghai Lu <[email protected]>
>Cc: [email protected]
Tested-by: Conny Seidel <[email protected]>
##
##################################################################
# Email : [email protected] GnuPG-Key : 0xA6AB055D #
# Fingerprint: 17C4 5DB2 7C4C C1C7 1452 8148 F139 7C09 A6AB 055D #
##################################################################
# Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach #
# General Managers: Alberto Bozzo, Andrew Bowd #
# Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen #
# HRB Nr. 43632 #
##################################################################
Borislav Petkov reported his 32bit numa system has problem:
[ 0.000000] Reserving total of 4c00 pages for numa KVA remap
[ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
[ 0.000000] max_pfn = 238000
[ 0.000000] 8202MB HIGHMEM available.
[ 0.000000] 885MB LOWMEM available.
[ 0.000000] mapped low ram: 0 - 375fe000
[ 0.000000] low ram: 0 - 375fe000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
[ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
[ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
[ 0.000000] BUG: unable to handle kernel paging request at 40000000
[ 0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
[ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
...
[ 0.000000] Call Trace:
[ 0.000000] [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
[ 0.000000] [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
[ 0.000000] [<c2c9149e>] ? sparse_init+0x1dc/0x499
[ 0.000000] [<c2c79118>] ? paging_init+0x168/0x1df
[ 0.000000] [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb
looks like it allocate much high address for bootmem.
try to cut limit with get_max_mapped()
-v3: make alloc_bootmem_node could fallback to other node.
just like old alloc_bootmem_node did
need this one for 2.6.34 and 2.6.35
Reported-by: Borislav Petkov <[email protected]>
Tested-by: Conny Seidel <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
Cc: [email protected]
---
mm/bootmem.c | 24 ++++++++++++++++++++----
mm/page_alloc.c | 3 +++
2 files changed, 23 insertions(+), 4 deletions(-)
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(
int i;
void *ptr;
+ if (limit > get_max_mapped())
+ limit = get_max_mapped();
+
/* need to go over early_node_map to find out good range for node */
for_each_active_range_index_in_nid(i, nid) {
u64 addr;
Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_no
void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+ if (ptr)
+ return ptr;
+
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, -1ULL);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
#endif
+
+ return ptr;
}
void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsign
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+ if (ptr)
+ return ptr;
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align,
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#endif
+ return ptr;
}
Borislav Petkov reported his 32bit numa system has problem:
[ 0.000000] Reserving total of 4c00 pages for numa KVA remap
[ 0.000000] kva_start_pfn ~ 32800 max_low_pfn ~ 375fe
[ 0.000000] max_pfn = 238000
[ 0.000000] 8202MB HIGHMEM available.
[ 0.000000] 885MB LOWMEM available.
[ 0.000000] mapped low ram: 0 - 375fe000
[ 0.000000] low ram: 0 - 375fe000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 1000 1000 => 34e7000
[ 0.000000] alloc (nid=8 100000 - 7ee00000) (1000000 - ffffffff) 200 40 => 34c9d80
[ 0.000000] alloc (nid=0 100000 - 7ee00000) (1000000 - ffffffffffffffff) 180 40 => 34e6140
[ 0.000000] alloc (nid=1 80000000 - c7e60000) (1000000 - ffffffffffffffff) 240 40 => 80000000
[ 0.000000] BUG: unable to handle kernel paging request at 40000000
[ 0.000000] IP: [<c2c8cff1>] __alloc_memory_core_early+0x147/0x1d6
[ 0.000000] *pdpt = 0000000000000000 *pde = f000ff53f000ff00
...
[ 0.000000] Call Trace:
[ 0.000000] [<c2c8b4f8>] ? __alloc_bootmem_node+0x216/0x22f
[ 0.000000] [<c2c90c9b>] ? sparse_early_usemaps_alloc_node+0x5a/0x10b
[ 0.000000] [<c2c9149e>] ? sparse_init+0x1dc/0x499
[ 0.000000] [<c2c79118>] ? paging_init+0x168/0x1df
[ 0.000000] [<c2c780ff>] ? native_pagetable_setup_start+0xef/0x1bb
looks like it allocate much high address for bootmem.
try to cut limit with get_max_mapped()
-v3: make alloc_bootmem_node could fallback to other node.
just like old alloc_bootmem_node did
need this one for 2.6.34 and 2.6.35
Reported-by: Borislav Petkov <[email protected]>
Tested-by: Conny Seidel <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
Cc: [email protected]
---
mm/bootmem.c | 24 ++++++++++++++++++++----
mm/page_alloc.c | 3 +++
2 files changed, 23 insertions(+), 4 deletions(-)
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(
int i;
void *ptr;
+ if (limit > get_max_mapped())
+ limit = get_max_mapped();
+
/* need to go over early_node_map to find out good range for node */
for_each_active_range_index_in_nid(i, nid) {
u64 addr;
Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_no
void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+ if (ptr)
+ return ptr;
+
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, -1ULL);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
#endif
+
+ return ptr;
}
void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsign
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
+ void *ptr;
+
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
#ifdef CONFIG_NO_BOOTMEM
- return __alloc_memory_core_early(pgdat->node_id, size, align,
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+ if (ptr)
+ return ptr;
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#else
- return ___alloc_bootmem_node(pgdat->bdata, size, align,
+ ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
#endif
+ return ptr;
}