2009-11-17 19:17:57

by Mike Travis

[permalink] [raw]
Subject: [PATCH 1/5] x86: Limit the number of processor bootup messages

When there are a large number of processors in a system, there
is an excessive amount of messages sent to the system console.
It's estimated that with 4096 processors in a system, and the
console baudrate set to 56K, the startup messages will take
about 84 minutes to clear the serial port.

This set of patches limits the number of repetitious messages
which contain no additional information. Much of this information
is obtainable from the /proc and /sysfs. Some of the messages
are also sent to the kernel log buffer as KERN_DEBUG messages so
dmesg can be used to examine more closely any details specific to
a problem.

The list of message transformations....

For system_state == SYSTEM_BOOTING:

Booting Node 0, Processors #1 #2 #3 #4 #5 #6 #7 Ok.
Booting Node 1, Processors #8 #9 #10 #11 #12 #13 #14 #15 Ok.
..
Booting Node 3, Processors #56 #57 #58 #59 #60 #61 #62 #63 Ok.
Brought up 64 CPUs

The following lines have been removed:

CPU: Physical Processor ID:
CPU: Processor Core ID:
CPU %d/0x%x -> Node %d

The following lines will only be printed if unusual (state):

CPU %d is now offline (system_state == RUNNING)

The following lines will only be printed in debug mode:

Initializing CPU#%d

The following lines are only printed for the first (boot) cpu:

CPU0: Hyper-Threading is disabled
CPU0: Thermal monitoring enabled

Signed-off-by: Mike Travis <[email protected]>
---
arch/x86/kernel/cpu/addon_cpuid_features.c | 6 ---
arch/x86/kernel/cpu/amd.c | 2 -
arch/x86/kernel/cpu/common.c | 20 +++---------
arch/x86/kernel/cpu/intel.c | 2 -
arch/x86/kernel/cpu/mcheck/therm_throt.c | 8 +++--
arch/x86/kernel/smpboot.c | 46 ++++++++++++++++++++---------
6 files changed, 44 insertions(+), 40 deletions(-)

--- linux.orig/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ linux/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -127,12 +127,6 @@

c->x86_max_cores = (core_level_siblings / smp_num_siblings);

-
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- c->phys_proc_id);
- if (c->x86_max_cores > 1)
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- c->cpu_core_id);
return;
#endif
}
--- linux.orig/arch/x86/kernel/cpu/amd.c
+++ linux/arch/x86/kernel/cpu/amd.c
@@ -375,8 +375,6 @@
node = nearby_node(apicid);
}
numa_set_node(cpu, node);
-
- printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
#endif
}

--- linux.orig/arch/x86/kernel/cpu/common.c
+++ linux/arch/x86/kernel/cpu/common.c
@@ -437,7 +437,7 @@
return;

if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
- goto out;
+ return;

if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
return;
@@ -446,13 +446,13 @@

smp_num_siblings = (ebx & 0xff0000) >> 16;

- if (smp_num_siblings == 1) {
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
- goto out;
+ if (smp_num_siblings == 1 && c->cpu_index == 0) {
+ pr_info("CPU0: Hyper-Threading is disabled\n");
+ return;
}

if (smp_num_siblings <= 1)
- goto out;
+ return;

if (smp_num_siblings > nr_cpu_ids) {
pr_warning("CPU: Unsupported number of siblings %d",
@@ -472,14 +472,6 @@

c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
((1 << core_bits) - 1);
-
-out:
- if ((c->x86_max_cores * smp_num_siblings) > 1) {
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- c->phys_proc_id);
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- c->cpu_core_id);
- }
#endif
}

@@ -1115,7 +1107,7 @@
if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
panic("CPU#%d already initialized!\n", cpu);

- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+ pr_debug("Initializing CPU#%d\n", cpu);

clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);

--- linux.orig/arch/x86/kernel/cpu/intel.c
+++ linux/arch/x86/kernel/cpu/intel.c
@@ -266,8 +266,6 @@
if (node == NUMA_NO_NODE || !node_online(node))
node = first_node(node_online_map);
numa_set_node(cpu, node);
-
- printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
#endif
}

--- linux.orig/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ linux/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -259,6 +259,7 @@
unsigned int cpu = smp_processor_id();
int tm2 = 0;
u32 l, h;
+ static bool printed;

/* Thermal monitoring depends on ACPI and clock modulation*/
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
@@ -312,8 +313,11 @@
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);

- printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
- cpu, tm2 ? "TM2" : "TM1");
+ if (!printed) {
+ printk(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
+ tm2 ? "TM2" : "TM1");
+ printed = true;
+ }

/* enable thermal throttle processing */
atomic_set(&therm_throt_en, 1);
--- linux.orig/arch/x86/kernel/smpboot.c
+++ linux/arch/x86/kernel/smpboot.c
@@ -671,6 +671,27 @@
complete(&c_idle->done);
}

+/* reduce the number of lines printed when booting a large cpu count system */
+static void __cpuinit announce_cpu(int cpu, int apicid)
+{
+ if (system_state == SYSTEM_BOOTING) {
+#ifdef CONFIG_NUMA
+ static int current_node = -1;
+ int node = cpu_to_node(cpu);
+
+ if (node != current_node) {
+ if (current_node > (-1))
+ pr_cont(" Ok.\n");
+ current_node = node;
+ pr_info("Booting Node %3d, Processors ", node);
+ }
+ pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : "");
+ return;
+#endif
+ }
+ pr_info("Booting Processor %d APIC 0x%x\n", cpu, apicid);
+}
+
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -736,9 +757,8 @@
/* start_ip had better be page-aligned! */
start_ip = setup_trampoline();

- /* So we see what's up */
- printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n",
- cpu, apicid, start_ip);
+ /* So we see what's up */
+ announce_cpu(cpu, apicid);

/*
* This grunge runs the startup process for
@@ -787,21 +807,17 @@
udelay(100);
}

- if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- pr_debug("OK.\n");
- printk(KERN_INFO "CPU%d: ", cpu);
- print_cpu_info(&cpu_data(cpu));
- pr_debug("CPU has booted.\n");
- } else {
+ if (cpumask_test_cpu(cpu, cpu_callin_mask))
+ pr_debug("CPU%d: has booted.\n", cpu);
+ else {
boot_error = 1;
if (*((volatile unsigned char *)trampoline_base)
== 0xA5)
/* trampoline started but...? */
- printk(KERN_ERR "Stuck ??\n");
+ pr_err("CPU%d: Stuck ??\n", cpu);
else
/* trampoline code not run */
- printk(KERN_ERR "Not responding.\n");
+ pr_err("CPU%d: Not responding.\n", cpu);
if (apic->inquire_remote_apic)
apic->inquire_remote_apic(apicid);
}
@@ -1300,14 +1316,16 @@
for (i = 0; i < 10; i++) {
/* They ack this in play_dead by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
- printk(KERN_INFO "CPU %d is now offline\n", cpu);
+ if (system_state == SYSTEM_RUNNING)
+ pr_info("CPU %u is now offline\n", cpu);
+
if (1 == num_online_cpus())
alternatives_smp_switch(0);
return;
}
msleep(100);
}
- printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+ pr_err("CPU %u didn't die...\n", cpu);
}

void play_dead_common(void)


2009-11-17 20:10:30

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86: Limit the number of processor bootup messages

On Tue, Nov 17, 2009 at 11:17 AM, Mike Travis <[email protected]> wrote:
> When there are a large number of processors in a system, there
> is an excessive amount of messages sent to the system console.
> It's estimated that with 4096 processors in a system, and the
> console baudrate set to 56K, the startup messages will take
> about 84 minutes to clear the serial port.
>
> This set of patches limits the number of repetitious messages
> which contain no additional information. ?Much of this information
> is obtainable from the /proc and /sysfs. ? Some of the messages
> are also sent to the kernel log buffer as KERN_DEBUG messages so
> dmesg can be used to examine more closely any details specific to
> a problem.
>
> The list of message transformations....
>
> For system_state == SYSTEM_BOOTING:
>
> Booting Node ? 0, Processors ?#1 #2 #3 #4 #5 #6 #7 Ok.
> Booting Node ? 1, Processors ?#8 #9 #10 #11 #12 #13 #14 #15 Ok.
> ..
> Booting Node ? 3, Processors ?#56 #57 #58 #59 #60 #61 #62 #63 Ok.
> Brought up 64 CPUs
>
> The following lines have been removed:
>
> ? ? ? ?CPU: Physical Processor ID:
> ? ? ? ?CPU: Processor Core ID:
> ? ? ? ?CPU %d/0x%x -> Node %d

please don't.

YH

2009-11-17 20:37:20

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86: Limit the number of processor bootup messages

On 11/17/2009 12:10 PM, Yinghai Lu wrote:
>>
>> The following lines have been removed:
>>
>> CPU: Physical Processor ID:
>> CPU: Processor Core ID:
>> CPU %d/0x%x -> Node %d
>
> please don't.
>

Why not?

Or, more formally: please state the rationale for keeping them.

-hpa

2009-11-17 21:05:22

by Mike Travis

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86: Limit the number of processor bootup messages



Yinghai Lu wrote:
> On Tue, Nov 17, 2009 at 11:17 AM, Mike Travis <[email protected]> wrote:
>> When there are a large number of processors in a system, there
>> is an excessive amount of messages sent to the system console.
>> It's estimated that with 4096 processors in a system, and the
>> console baudrate set to 56K, the startup messages will take
>> about 84 minutes to clear the serial port.
>>
>> This set of patches limits the number of repetitious messages
>> which contain no additional information. Much of this information
>> is obtainable from the /proc and /sysfs. Some of the messages
>> are also sent to the kernel log buffer as KERN_DEBUG messages so
>> dmesg can be used to examine more closely any details specific to
>> a problem.
>>
>> The list of message transformations....
>>
>> For system_state == SYSTEM_BOOTING:
>>
>> Booting Node 0, Processors #1 #2 #3 #4 #5 #6 #7 Ok.
>> Booting Node 1, Processors #8 #9 #10 #11 #12 #13 #14 #15 Ok.
>> ..
>> Booting Node 3, Processors #56 #57 #58 #59 #60 #61 #62 #63 Ok.
>> Brought up 64 CPUs
>>
>> The following lines have been removed:
>>
>> CPU: Physical Processor ID:
>> CPU: Processor Core ID:
>> CPU %d/0x%x -> Node %d
>
> please don't.
>
> YH

The current output format is:

[ 1.752861] Booting Node 0, Processors #1 #2 #3 #4 #5 #6 #7 Ok.
[ 2.271831] Booting Node 1, Processors #8 #9 #10 #11 #12 #13 #14 #15 Ok.
[ 2.858473] Booting Node 2, Processors #16 #17 #18 #19 #20 #21 #22 #23 Ok.
[ 3.445168] Booting Node 3, Processors #24 #25 #26 #27 #28 #29 #30 #31 Ok.
[ 4.031750] Booting Node 0, Processors #32 #33 #34 #35 #36 #37 #38 #39 Ok.
[ 4.618461] Booting Node 1, Processors #40 #41 #42 #43 #44 #45 #46 #47 Ok.
[ 5.206036] Booting Node 2, Processors #48 #49 #50 #51 #52 #53 #54 #55 Ok.
[ 5.795760] Booting Node 3, Processors #56 #57 #58 #59 #60 #61 #62 #63 Ok.
[ 6.382678] Skipped synchronization checks as TSC is reliable.
[ 6.389254] Brought up 64 CPUs
[ 6.392705] Total of 64 processors activated (294277.71 BogoMIPS).

So cpu/node is retained. How would you propose interjecting the core and cpu ids?
A summary after the above? (These are obtainable from /proc/cpuinfo. Any reason
why the information is required at boot time?)

I had proposed to send them to the kernel debug log buffer, but was told they
were not needed so I removed them.

Here is the same info:

53> cat simple.awk
#!/bin/bash

cat $1 | awk '
{
if ($1 == "processor")
cpu = $3;

if ($1 == "physical" && $2 == "id")
phyid = $4;

if ($1 == "core" && $2 == "id") {
coreid = $4;
printf "CPU%d: Physical Processor ID: %d\n", cpu, phyid;
printf "CPU%d: Physical Core ID: %d\n", cpu, coreid;
}
}
'
54> ./simple.awk /proc/cpuinfo
CPU0: Physical Processor ID: 0
CPU0: Physical Core ID: 0
CPU1: Physical Processor ID: 0
CPU1: Physical Core ID: 1
CPU2: Physical Processor ID: 0
CPU2: Physical Core ID: 3
CPU3: Physical Processor ID: 0
CPU3: Physical Core ID: 8
CPU4: Physical Processor ID: 0
CPU4: Physical Core ID: 10
CPU5: Physical Processor ID: 0
CPU5: Physical Core ID: 11
CPU6: Physical Processor ID: 1
CPU6: Physical Core ID: 0
CPU7: Physical Processor ID: 1
CPU7: Physical Core ID: 1
CPU8: Physical Processor ID: 1

<and so on>

CPU45: Physical Processor ID: 3
CPU45: Physical Core ID: 9
CPU46: Physical Processor ID: 3
CPU46: Physical Core ID: 10
CPU47: Physical Processor ID: 3
CPU47: Physical Core ID: 11

2009-11-17 21:11:41

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86: Limit the number of processor bootup messages

On Tue, Nov 17, 2009 at 12:29 PM, H. Peter Anvin <[email protected]> wrote:
> On 11/17/2009 12:10 PM, Yinghai Lu wrote:
>>>
>>> The following lines have been removed:
>>>
>>> ? ? ? ?CPU: Physical Processor ID:
>>> ? ? ? ?CPU: Processor Core ID:
>>> ? ? ? ?CPU %d/0x%x -> Node %d
>>
>> please don't.
>>
>
> Why not?
>
> Or, more formally: please state the rationale for keeping them.
>
at least one distribution: SLES 11 mess it up when BSP is from socket
1 instead of socket0

and above message does show kernel think BSP still from socket0, and
other cores in that package are from socket1.

YH

2009-11-18 02:38:40

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86: Limit the number of processor bootup messages

On Tue, Nov 17, 2009 at 1:11 PM, Yinghai Lu <[email protected]> wrote:
> On Tue, Nov 17, 2009 at 12:29 PM, H. Peter Anvin <[email protected]> wrote:
>> On 11/17/2009 12:10 PM, Yinghai Lu wrote:
>>>>
>>>> The following lines have been removed:
>>>>
>>>> ? ? ? ?CPU: Physical Processor ID:
>>>> ? ? ? ?CPU: Processor Core ID:
>>>> ? ? ? ?CPU %d/0x%x -> Node %d
>>>
>>> please don't.
>>>
>>
>> Why not?
>>
>> Or, more formally: please state the rationale for keeping them.
>>
> at least one distribution: SLES 11 mess it up when BSP is from socket
> 1 instead of socket0
>
> and above message does show kernel think BSP still from socket0, and
> other cores in that package are from socket1.

CPU: Physical Processor ID: 0
CPU: Processor Core ID: 0
CPU: L1 I cache: 32K, L1 D cache: 32K
CPU: L2 cache: 256K
CPU: L3 cache: 24576K
BUG: unable to handle kernel paging request at ffffffffc07129b0
IP: [<ffffffff8049494b>] init_intel+0xea/0x14b
PGD 203067 PUD 204067 PMD 0
Oops: 0000 [1] SMP
last sysfs file:
CPU 0
Modules linked in:
Supported: Yes
Pid: 0, comm: swapper Not tainted 2.6.27.19-5-default #1
RIP: 0010:[<ffffffff8049494b>] [<ffffffff8049494b>] init_intel+0xea/0x14b
RSP: 0018:ffffffff80965f48 EFLAGS: 00010202
RAX: 0000000020000000 RBX: 0000000000000044 RCX: 00000000000001a0
RDX: ffffffff808cbd14 RSI: 0000000000000046 RDI: 0000000020000000
RBP: 0000000020000000 R08: 0000000000000000 R09: 0000000000000000
R10: 000000000000000a R11: ffffffff802223f1 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
FS: 0000000000000000(0000) GS:ffffffff80a40080(0000) knlGS:0000000000000000
CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: ffffffffc07129b0 CR3: 0000000000201000 CR4: 00000000000006a0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process swapper (pid: 0, threadinfo ffffffff80964000, task ffffffff806da380)
Stack: 0000000100000003 0000000100000000 ffffffff808cbd00 ffffe20000000000
0000007800000000 ffffffff804943ef 0000000000000000 ffffffff80974fda
0000000000000000 ffffffff8096de10 0000000000000000 ffffffff809a1510
Call Trace:
[<ffffffff804943ef>] identify_cpu+0x3c/0xa3
[<ffffffff80974fda>] check_bugs+0x9/0x2e
[<ffffffff8096de10>] start_kernel+0x313/0x324
[<ffffffff8096d38f>] x86_64_start_kernel+0xde/0xe4


Code: 0f a2 a8 1f 74 07 c1 e8 1a ff c0 eb 05 b8 01 00 00 00 66 89 85 d8 00 00 00
65 44 8b 24 25 24 00 00 00 e8 c3 66 d8 ff 89 c5 48 98 <0f> bf 9c 00 b0 29 71 80
83 fb ff 74 0d 0f a3 1d 91 a1 4c 00 19
RIP [<ffffffff8049494b>] init_intel+0xea/0x14b
RSP <ffffffff80965f48>
CR2: ffffffffc07129b0
---[ end trace 4eaa2a86a8e2da22 ]---


2.6.32 kernel corresponding part:

[ 0.128855] CPU: Physical Processor ID: 1
[ 0.129856] CPU: Processor Core ID: 0
[ 0.130845] CPU: L1 I cache: 32K, L1 D cache: 32K
[ 0.151454] CPU: L2 cache: 256K
[ 0.152463] CPU: L3 cache: 24576K
[ 0.153471] CPU 0/0x20 -> Node 0
[ 0.168552] CPU 0 microcode level: 0xffff0008
[ 0.169901] mce: CPU supports 22 MCE banks

Subject: Re: [PATCH 1/5] x86: Limit the number of processor bootup messages

Hi,

On Tue, Nov 17, 2009 at 01:17:53PM -0600, Mike Travis wrote:
> When there are a large number of processors in a system, there
> is an excessive amount of messages sent to the system console.
> It's estimated that with 4096 processors in a system, and the
> console baudrate set to 56K, the startup messages will take
> about 84 minutes to clear the serial port.
>
> This set of patches limits the number of repetitious messages
> which contain no additional information. Much of this information
> is obtainable from the /proc and /sysfs. Some of the messages
> are also sent to the kernel log buffer as KERN_DEBUG messages so
> dmesg can be used to examine more closely any details specific to
> a problem.
>
> The list of message transformations....
>
> For system_state == SYSTEM_BOOTING:
>
> Booting Node 0, Processors #1 #2 #3 #4 #5 #6 #7 Ok.

Aren't we missing core 0 here?

> Booting Node 1, Processors #8 #9 #10 #11 #12 #13 #14 #15 Ok.
> ..
> Booting Node 3, Processors #56 #57 #58 #59 #60 #61 #62 #63 Ok.
> Brought up 64 CPUs

Also, I'm getting

Booting Node 0, Processors #1
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 512K (64 bytes/line)
#2
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 512K (64 bytes/line)
#3
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 512K (64 bytes/line)
#4
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 512K (64 bytes/line)
#5
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 512K (64 bytes/line)
Ok.
Booting Node 1, Processors #6
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 512K (64 bytes/line)
#7

...

and clearly CPU cache info is too verbose. We might want to
kill it since we have it replicated in /sysfs. In that case,
arch/x86/kernel/cpu/common.c:display_cacheinfo() could become obsolete
and we could remove it... Or is there some reason for dumping that
particular information during boot?

--
Regards/Gruss,
Boris.

Operating | Advanced Micro Devices GmbH
System | Karl-Hammerschmidt-Str. 34, 85609 Dornach b. M?nchen, Germany
Research | Gesch?ftsf?hrer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis M?nchen
(OSRC) | Registergericht M?nchen, HRB Nr. 43632

2009-11-18 17:18:15

by Mike Travis

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86: Limit the number of processor bootup messages



Borislav Petkov wrote:
> Hi,
>
> On Tue, Nov 17, 2009 at 01:17:53PM -0600, Mike Travis wrote:
>> When there are a large number of processors in a system, there
>> is an excessive amount of messages sent to the system console.
>> It's estimated that with 4096 processors in a system, and the
>> console baudrate set to 56K, the startup messages will take
>> about 84 minutes to clear the serial port.
>>
>> This set of patches limits the number of repetitious messages
>> which contain no additional information. Much of this information
>> is obtainable from the /proc and /sysfs. Some of the messages
>> are also sent to the kernel log buffer as KERN_DEBUG messages so
>> dmesg can be used to examine more closely any details specific to
>> a problem.
>>
>> The list of message transformations....
>>
>> For system_state == SYSTEM_BOOTING:
>>
>> Booting Node 0, Processors #1 #2 #3 #4 #5 #6 #7 Ok.
>
> Aren't we missing core 0 here?

Core 0 already booted as it's the Boot CPU. The info is earlier
in the log.

>
>> Booting Node 1, Processors #8 #9 #10 #11 #12 #13 #14 #15 Ok.
>> ..
>> Booting Node 3, Processors #56 #57 #58 #59 #60 #61 #62 #63 Ok.
>> Brought up 64 CPUs
>
> Also, I'm getting
>
> Booting Node 0, Processors #1
> CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
> CPU: L2 Cache: 512K (64 bytes/line)

There are other patches that deal with these:

http://git.kernel.org/tip/15cd8812ab2ce62a2f779e93a8398bdad752291a
http://git.kernel.org/tip/b01c845f0f2e3f9e54e6a78d5d56895f5b95e818

> #2
> CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
> CPU: L2 Cache: 512K (64 bytes/line)
> #3
> CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
> CPU: L2 Cache: 512K (64 bytes/line)
> #4
> CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
> CPU: L2 Cache: 512K (64 bytes/line)
> #5
> CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
> CPU: L2 Cache: 512K (64 bytes/line)
> Ok.
> Booting Node 1, Processors #6
> CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
> CPU: L2 Cache: 512K (64 bytes/line)
> #7
>
> ...
>
> and clearly CPU cache info is too verbose. We might want to
> kill it since we have it replicated in /sysfs. In that case,
> arch/x86/kernel/cpu/common.c:display_cacheinfo() could become obsolete
> and we could remove it... Or is there some reason for dumping that
> particular information during boot?
>

Yes, the above patches remove them entirely.

Thanks,
Mike

2009-11-18 17:43:14

by Mike Travis

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86: Limit the number of processor bootup messages



Yinghai Lu wrote:
> On Tue, Nov 17, 2009 at 12:29 PM, H. Peter Anvin <[email protected]> wrote:
>> On 11/17/2009 12:10 PM, Yinghai Lu wrote:
>>>> The following lines have been removed:
>>>>
>>>> CPU: Physical Processor ID:
>>>> CPU: Processor Core ID:
>>>> CPU %d/0x%x -> Node %d
>>> please don't.
>>>
>> Why not?
>>
>> Or, more formally: please state the rationale for keeping them.
>>
> at least one distribution: SLES 11 mess it up when BSP is from socket
> 1 instead of socket0
>
> and above message does show kernel think BSP still from socket0, and
> other cores in that package are from socket1.

[ 1.601924] Booting Node 0, Processors #1 #2 #3 #4 #5 #6 #7 Ok.

How does one imply Socket 1 from Node 0?

I added the socket and core id for BSP. I think cluttering up output
with useless additional information that is easily available is not
worthwhile.

2009-11-18 17:44:57

by Mike Travis

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86: Limit the number of processor bootup messages

Is this a bug report? Can you supply the system configuration, config
file and the kernel release/patched status?

I've tested the patches on a few different (Intel) architectures
and haven't encountered any panics yet.

Thanks,
Mike

Yinghai Lu wrote:
> On Tue, Nov 17, 2009 at 1:11 PM, Yinghai Lu <[email protected]> wrote:
>> On Tue, Nov 17, 2009 at 12:29 PM, H. Peter Anvin <[email protected]> wrote:
>>> On 11/17/2009 12:10 PM, Yinghai Lu wrote:
>>>>> The following lines have been removed:
>>>>>
>>>>> CPU: Physical Processor ID:
>>>>> CPU: Processor Core ID:
>>>>> CPU %d/0x%x -> Node %d
>>>> please don't.
>>>>
>>> Why not?
>>>
>>> Or, more formally: please state the rationale for keeping them.
>>>
>> at least one distribution: SLES 11 mess it up when BSP is from socket
>> 1 instead of socket0
>>
>> and above message does show kernel think BSP still from socket0, and
>> other cores in that package are from socket1.
>
> CPU: Physical Processor ID: 0
> CPU: Processor Core ID: 0
> CPU: L1 I cache: 32K, L1 D cache: 32K
> CPU: L2 cache: 256K
> CPU: L3 cache: 24576K
> BUG: unable to handle kernel paging request at ffffffffc07129b0
> IP: [<ffffffff8049494b>] init_intel+0xea/0x14b
> PGD 203067 PUD 204067 PMD 0
> Oops: 0000 [1] SMP
> last sysfs file:
> CPU 0
> Modules linked in:
> Supported: Yes
> Pid: 0, comm: swapper Not tainted 2.6.27.19-5-default #1
> RIP: 0010:[<ffffffff8049494b>] [<ffffffff8049494b>] init_intel+0xea/0x14b
> RSP: 0018:ffffffff80965f48 EFLAGS: 00010202
> RAX: 0000000020000000 RBX: 0000000000000044 RCX: 00000000000001a0
> RDX: ffffffff808cbd14 RSI: 0000000000000046 RDI: 0000000020000000
> RBP: 0000000020000000 R08: 0000000000000000 R09: 0000000000000000
> R10: 000000000000000a R11: ffffffff802223f1 R12: 0000000000000000
> R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
> FS: 0000000000000000(0000) GS:ffffffff80a40080(0000) knlGS:0000000000000000
> CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
> CR2: ffffffffc07129b0 CR3: 0000000000201000 CR4: 00000000000006a0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process swapper (pid: 0, threadinfo ffffffff80964000, task ffffffff806da380)
> Stack: 0000000100000003 0000000100000000 ffffffff808cbd00 ffffe20000000000
> 0000007800000000 ffffffff804943ef 0000000000000000 ffffffff80974fda
> 0000000000000000 ffffffff8096de10 0000000000000000 ffffffff809a1510
> Call Trace:
> [<ffffffff804943ef>] identify_cpu+0x3c/0xa3
> [<ffffffff80974fda>] check_bugs+0x9/0x2e
> [<ffffffff8096de10>] start_kernel+0x313/0x324
> [<ffffffff8096d38f>] x86_64_start_kernel+0xde/0xe4
>
>
> Code: 0f a2 a8 1f 74 07 c1 e8 1a ff c0 eb 05 b8 01 00 00 00 66 89 85 d8 00 00 00
> 65 44 8b 24 25 24 00 00 00 e8 c3 66 d8 ff 89 c5 48 98 <0f> bf 9c 00 b0 29 71 80
> 83 fb ff 74 0d 0f a3 1d 91 a1 4c 00 19
> RIP [<ffffffff8049494b>] init_intel+0xea/0x14b
> RSP <ffffffff80965f48>
> CR2: ffffffffc07129b0
> ---[ end trace 4eaa2a86a8e2da22 ]---
>
>
> 2.6.32 kernel corresponding part:
>
> [ 0.128855] CPU: Physical Processor ID: 1
> [ 0.129856] CPU: Processor Core ID: 0
> [ 0.130845] CPU: L1 I cache: 32K, L1 D cache: 32K
> [ 0.151454] CPU: L2 cache: 256K
> [ 0.152463] CPU: L3 cache: 24576K
> [ 0.153471] CPU 0/0x20 -> Node 0
> [ 0.168552] CPU 0 microcode level: 0xffff0008
> [ 0.169901] mce: CPU supports 22 MCE banks

2009-11-18 18:09:04

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86: Limit the number of processor bootup messages

On Wed, Nov 18, 2009 at 09:18:09AM -0800, Mike Travis wrote:
> There are other patches that deal with these:
>
> http://git.kernel.org/tip/15cd8812ab2ce62a2f779e93a8398bdad752291a
> http://git.kernel.org/tip/b01c845f0f2e3f9e54e6a78d5d56895f5b95e818

yep, it looks better now, thanks:

CPU: Physical Processor ID: 0
CPU: Processor Core ID: 0
mce: CPU supports 6 MCE banks
using C1E aware idle routine
...
Booting Node 0, Processors #1 #2 #3 #4 #5 Ok.
Booting Node 1, Processors #6 #7 #8 #9 #10 #11 Ok.
Booting Node 3, Processors #12 #13 #14 #15 #16 #17 Ok.
Booting Node 2, Processors #18 #19 #20 #21 #22 #23 Ok.
Brought up 24 CPUs
Total of 24 processors activated (81398.64 BogoMIPS).

> >and clearly CPU cache info is too verbose. We might want to
> >kill it since we have it replicated in /sysfs. In that case,
> >arch/x86/kernel/cpu/common.c:display_cacheinfo() could become obsolete
> >and we could remove it... Or is there some reason for dumping that
> >particular information during boot?
> >
>
> Yes, the above patches remove them entirely.

In that case, maybe a small fixlet like the following is in order:

--
From: Borislav Petkov <[email protected]>
Date: Wed, 18 Nov 2009 19:01:28 +0100
Subject: [PATCH] x86, cpu: mv display_cacheinfo -> cpu_detect_cache_sizes

display_cacheinfo() doesn't display anything anymore and it is solely
used to detect CPU cache sizes for /proc/cpuinfo. Rename it accordingly.

Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/amd.c | 2 +-
arch/x86/kernel/cpu/centaur.c | 2 +-
arch/x86/kernel/cpu/common.c | 4 ++--
arch/x86/kernel/cpu/cpu.h | 2 +-
arch/x86/kernel/cpu/cyrix.c | 2 +-
arch/x86/kernel/cpu/transmeta.c | 2 +-
6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c910a71..7128b37 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -535,7 +535,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
}

- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);

/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x80000008) {
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index c95e831..e58d978 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
}

- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
}

enum {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 61242a5..9bf845d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void)
static void __cpuinit default_init(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
#else
/* Not much we can do here... */
/* Check if at least it has cpuid */
@@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
}
}

-void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;

diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 6de9a90..3624e8a 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -32,6 +32,6 @@ struct cpu_dev {
extern const struct cpu_dev *const __x86_cpu_dev_start[],
*const __x86_cpu_dev_end[];

-extern void display_cacheinfo(struct cpuinfo_x86 *c);
+extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);

#endif
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 19807b8..4fbd384 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
/* Handle the GX (Formally known as the GX2) */

if (c->x86 == 5 && c->x86_model == 5)
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
else
init_cyrix(c);
}
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index bb62b3e..2800074 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)

early_init_transmeta(c);

- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);

/* Print CMS and CPU revision */
max = cpuid_eax(0x80860000);
--
1.6.5



--
Regards/Gruss,
Boris.