2007-08-13 14:32:48

by Jan Beulich

[permalink] [raw]
Subject: [PATCH] x86: optionally show last exception from/to register contents

.. when dumping register state. This is particularly useful when gcc
managed to tail-call optimize an indirect call which happens to hit a
NULL (or otherwise invalid) pointer.

Signed-off-by: Jan Beulich <[email protected]>

arch/i386/kernel/cpu/amd.c | 4 ++++
arch/i386/kernel/cpu/common.c | 2 ++
arch/i386/kernel/cpu/intel.c | 20 ++++++++++++++------
arch/i386/kernel/traps.c | 27 +++++++++++++++++++++++++++
arch/x86_64/kernel/setup.c | 23 ++++++++++++++++++-----
arch/x86_64/kernel/traps.c | 27 +++++++++++++++++++++++++++
include/asm-i386/msr-index.h | 3 +++
include/asm-i386/processor.h | 4 ++++
include/asm-x86_64/processor.h | 3 +++
9 files changed, 102 insertions(+), 11 deletions(-)

--- linux-2.6.23-rc3/arch/i386/kernel/cpu/amd.c 2007-08-13 08:59:44.000000000 +0200
+++ 2.6.23-rc3-x86-ler/arch/i386/kernel/cpu/amd.c 2007-08-07 10:42:55.000000000 +0200
@@ -238,9 +238,13 @@ static void __cpuinit init_amd(struct cp
case 0x10:
case 0x11:
set_bit(X86_FEATURE_K8, c->x86_capability);
+ if (ler_enabled)
+ __get_cpu_var(ler_msr) = MSR_IA32_LASTINTFROMIP;
break;
case 6:
set_bit(X86_FEATURE_K7, c->x86_capability);
+ if (ler_enabled)
+ __get_cpu_var(ler_msr) = MSR_IA32_LASTINTFROMIP;
break;
}
if (c->x86 >= 6)
--- linux-2.6.23-rc3/arch/i386/kernel/cpu/common.c 2007-08-13 08:59:44.000000000 +0200
+++ 2.6.23-rc3-x86-ler/arch/i386/kernel/cpu/common.c 2007-08-07 10:42:55.000000000 +0200
@@ -503,6 +503,8 @@ static void __cpuinit identify_cpu(struc

/* Init Machine Check Exception if available. */
mcheck_init(c);
+
+ ler_enable();
}

void __init identify_boot_cpu(void)
--- linux-2.6.23-rc3/arch/i386/kernel/cpu/intel.c 2007-07-09 01:32:17.000000000 +0200
+++ 2.6.23-rc3-x86-ler/arch/i386/kernel/cpu/intel.c 2007-08-07 10:42:55.000000000 +0200
@@ -188,15 +188,23 @@ static void __cpuinit init_intel(struct
}
#endif

- if (c->x86 == 15) {
+ switch (c->x86) {
+ case 15:
set_bit(X86_FEATURE_P4, c->x86_capability);
set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability);
- }
- if (c->x86 == 6)
+ if (c->x86_model >= 0x03)
+ set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+ if (ler_enabled)
+ __get_cpu_var(ler_msr) = MSR_P4_LER_FROM_LIP;
+ break;
+ case 6:
set_bit(X86_FEATURE_P3, c->x86_capability);
- if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
- (c->x86 == 0x6 && c->x86_model >= 0x0e))
- set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+ if (c->x86_model >= 0x0e)
+ set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+ if (ler_enabled)
+ __get_cpu_var(ler_msr) = MSR_IA32_LASTINTFROMIP;
+ break;
+ }

if (cpu_has_ds) {
unsigned int l1;
--- linux-2.6.23-rc3/arch/i386/kernel/traps.c 2007-08-13 08:59:45.000000000 +0200
+++ 2.6.23-rc3-x86-ler/arch/i386/kernel/traps.c 2007-08-07 10:42:55.000000000 +0200
@@ -321,6 +321,13 @@ void show_registers(struct pt_regs *regs
unsigned int code_len = code_bytes;
unsigned char c;

+ if (__get_cpu_var(ler_msr)) {
+ u32 from, to, hi;
+
+ rdmsr(__get_cpu_var(ler_msr), from, hi);
+ rdmsr(__get_cpu_var(ler_msr) + 1, to, hi);
+ printk("LER: %08x -> %08x\n", from, to);
+ }
printk("\n" KERN_EMERG "Stack: ");
show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);

@@ -360,6 +367,18 @@ int is_valid_bugaddr(unsigned long eip)
return ud2 == 0x0b0f;
}

+DEFINE_PER_CPU(u32, ler_msr);
+int ler_enabled = 0;
+
+void ler_enable(void) {
+ if (__get_cpu_var(ler_msr)) {
+ u64 debugctl;
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl | 1);
+ }
+}
+
/*
* This is gone through when something in the kernel has done something bad and
* is about to be terminated.
@@ -846,6 +865,7 @@ fastcall void __kprobes do_debug(struct
struct task_struct *tsk = current;

get_debugreg(condition, 6);
+ ler_enable();

if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
SIGTRAP) == NOTIFY_STOP)
@@ -1239,3 +1259,10 @@ static int __init code_bytes_setup(char
return 1;
}
__setup("code_bytes=", code_bytes_setup);
+
+static int __init ler_setup(char *s)
+{
+ ler_enabled = 1;
+ return 1;
+}
+__setup("ler", ler_setup);
--- linux-2.6.23-rc3/arch/x86_64/kernel/setup.c 2007-08-13 08:59:56.000000000 +0200
+++ 2.6.23-rc3-x86-ler/arch/x86_64/kernel/setup.c 2007-08-07 10:42:55.000000000 +0200
@@ -617,6 +617,9 @@ static void __cpuinit init_amd(struct cp
/* Family 10 doesn't support C states in MWAIT so don't use it */
if (c->x86 == 0x10 && !force_mwait)
clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
+
+ if (ler_enabled && c->x86 <= 16)
+ __get_cpu_var(ler_msr) = MSR_IA32_LASTINTFROMIP;
}

static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -736,13 +739,22 @@ static void __cpuinit init_intel(struct
c->x86_phys_bits = 36;
}

- if (c->x86 == 15)
+ switch (c->x86) {
+ case 15:
c->x86_cache_alignment = c->x86_clflush_size * 2;
- if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
- (c->x86 == 0x6 && c->x86_model >= 0x0e))
- set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
- if (c->x86 == 6)
+ if (c->x86_model >= 0x03)
+ set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+ if (ler_enabled)
+ __get_cpu_var(ler_msr) = MSR_P4_LER_FROM_LIP;
+ break;
+ case 6:
+ if (c->x86_model >= 0x0e)
+ set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+ if (ler_enabled)
+ __get_cpu_var(ler_msr) = MSR_IA32_LASTINTFROMIP;
+ break;
+ }
if (c->x86 == 15)
set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
else
@@ -906,6 +918,7 @@ void __cpuinit identify_cpu(struct cpuin
#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
+ ler_enable();
}


--- linux-2.6.23-rc3/arch/x86_64/kernel/traps.c 2007-08-13 08:59:56.000000000 +0200
+++ 2.6.23-rc3-x86-ler/arch/x86_64/kernel/traps.c 2007-08-07 10:42:55.000000000 +0200
@@ -426,6 +426,13 @@ void show_registers(struct pt_regs *regs
* time of the fault..
*/
if (in_kernel) {
+ if (__get_cpu_var(ler_msr)) {
+ u64 from, to;
+
+ rdmsrl(__get_cpu_var(ler_msr), from);
+ rdmsrl(__get_cpu_var(ler_msr) + 1, to);
+ printk("LER: %016Lx -> %016Lx\n", from, to);
+ }
printk("Stack: ");
_show_stack(NULL, regs, (unsigned long*)rsp);

@@ -464,6 +471,18 @@ void out_of_line_bug(void)
EXPORT_SYMBOL(out_of_line_bug);
#endif

+DEFINE_PER_CPU(u32, ler_msr);
+int ler_enabled = 0;
+
+void ler_enable(void) {
+ if (__get_cpu_var(ler_msr)) {
+ u64 debugctl;
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl | 1);
+ }
+}
+
static DEFINE_SPINLOCK(die_lock);
static int die_owner = -1;
static unsigned int die_nest_count;
@@ -849,6 +868,7 @@ asmlinkage void __kprobes do_debug(struc
siginfo_t info;

get_debugreg(condition, 6);
+ ler_enable();

if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
SIGTRAP) == NOTIFY_STOP)
@@ -1136,3 +1156,10 @@ static int __init kstack_setup(char *s)
return 0;
}
early_param("kstack", kstack_setup);
+
+static int __init ler_setup(char *s)
+{
+ ler_enabled = 1;
+ return 1;
+}
+__setup("ler", ler_setup);
--- linux-2.6.23-rc3/include/asm-i386/msr-index.h 2007-07-09 01:32:17.000000000 +0200
+++ 2.6.23-rc3-x86-ler/include/asm-i386/msr-index.h 2007-08-07 10:42:55.000000000 +0200
@@ -63,6 +63,9 @@
#define MSR_IA32_LASTINTFROMIP 0x000001dd
#define MSR_IA32_LASTINTTOIP 0x000001de

+#define MSR_P4_LER_FROM_LIP 0x000001d7
+#define MSR_P4_LER_TO_LIP 0x000001d8
+
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
--- linux-2.6.23-rc3/include/asm-i386/processor.h 2007-08-13 09:01:02.000000000 +0200
+++ 2.6.23-rc3-x86-ler/include/asm-i386/processor.h 2007-08-07 10:42:55.000000000 +0200
@@ -643,6 +643,10 @@ static inline unsigned int cpuid_edx(uns
return edx;
}

+DECLARE_PER_CPU(u32, ler_msr);
+extern int ler_enabled;
+void ler_enable(void);
+
/* generic versions from gas */
#define GENERIC_NOP1 ".byte 0x90\n"
#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
--- linux-2.6.23-rc3/include/asm-x86_64/processor.h 2007-08-13 09:01:07.000000000 +0200
+++ 2.6.23-rc3-x86-ler/include/asm-x86_64/processor.h 2007-08-07 10:42:55.000000000 +0200
@@ -333,6 +333,9 @@ struct extended_sigtable {
struct extended_signature sigs[0];
};

+DECLARE_PER_CPU(u32, ler_msr);
+extern int ler_enabled;
+void ler_enable(void);

#define ASM_NOP1 K8_NOP1
#define ASM_NOP2 K8_NOP2



2007-08-13 14:53:17

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] x86: optionally show last exception from/to register contents

On Mon, Aug 13, 2007 at 12:33:05PM +0100, Jan Beulich wrote:


>
> if (cpu_has_ds) {
> unsigned int l1;
> --- linux-2.6.23-rc3/arch/i386/kernel/traps.c 2007-08-13 08:59:45.000000000 +0200
> +++ 2.6.23-rc3-x86-ler/arch/i386/kernel/traps.c 2007-08-07 10:42:55.000000000 +0200
> @@ -321,6 +321,13 @@ void show_registers(struct pt_regs *regs
> unsigned int code_len = code_bytes;
> unsigned char c;
>
> + if (__get_cpu_var(ler_msr)) {
> + u32 from, to, hi;
> +
> + rdmsr(__get_cpu_var(ler_msr), from, hi);
> + rdmsr(__get_cpu_var(ler_msr) + 1, to, hi);
> + printk("LER: %08x -> %08x\n", from, to);
> + }

This seems racy -- AFAIK the MSR will record the last branch
before an interrupt too, and the trap handlers enable interrupts
before coming here.

Can't think of a good way to avoid that for page fault at least
without impacting interrupt latency or reading the MSR always.

The other thing is that this should use print_symbol()

> printk("\n" KERN_EMERG "Stack: ");
> show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
>
> @@ -360,6 +367,18 @@ int is_valid_bugaddr(unsigned long eip)
> return ud2 == 0x0b0f;
> }
>
> +DEFINE_PER_CPU(u32, ler_msr);
> +int ler_enabled = 0;
> +
> +void ler_enable(void) {
> + if (__get_cpu_var(ler_msr)) {
> + u64 debugctl;
> +
> + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
> + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl | 1);

Better use checking_rd/wrmsrl and disable on fail -- often emulators
fake specific CPUs, but don't implement all debugging features.

> +static int __init ler_setup(char *s)
> +{
> + ler_enabled = 1;
> + return 1;
> +}
> +__setup("ler", ler_setup);

I don't think the option is very useful.

BTW I have a patch somewhere to save/report last branch on exception
too, but it never seemed suitable for mainline because of its
high overhead. If someone submitted a full BTS driver I would
be tempted though :)

-Andi

2007-08-13 15:23:45

by Jan Beulich

[permalink] [raw]
Subject: Re: [PATCH] x86: optionally show last exception from/to register contents

>>> Andi Kleen <[email protected]> 13.08.07 15:08 >>>
>On Mon, Aug 13, 2007 at 12:33:05PM +0100, Jan Beulich wrote:
>> + if (__get_cpu_var(ler_msr)) {
>> + u32 from, to, hi;
>> +
>> + rdmsr(__get_cpu_var(ler_msr), from, hi);
>> + rdmsr(__get_cpu_var(ler_msr) + 1, to, hi);
>> + printk("LER: %08x -> %08x\n", from, to);
>> + }
>
>This seems racy -- AFAIK the MSR will record the last branch
>before an interrupt too, and the trap handlers enable interrupts
>before coming here.
>
>Can't think of a good way to avoid that for page fault at least
>without impacting interrupt latency or reading the MSR always.

That's the problem. Other than either going through interrupt gates *and* keeping
interrupts disabled long enough, there's nothing you can do, except turning of
tracing before re-enabling interrupts, which in turn you would likely reject because
of the performance overhead of the necessary rdmsr/wrmsr pair.

>The other thing is that this should use print_symbol()

I considered this, but decided against it due to the address pair being displayed.
But I could certainly change that.

>> @@ -360,6 +367,18 @@ int is_valid_bugaddr(unsigned long eip)
>> return ud2 == 0x0b0f;
>> }
>>
>> +DEFINE_PER_CPU(u32, ler_msr);
>> +int ler_enabled = 0;
>> +
>> +void ler_enable(void) {
>> + if (__get_cpu_var(ler_msr)) {
>> + u64 debugctl;
>> +
>> + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
>> + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl | 1);
>
>Better use checking_rd/wrmsrl and disable on fail -- often emulators
>fake specific CPUs, but don't implement all debugging features.

Okay, if you consider emulators important here (this is one of the reasons for
having to turn on the feature via command line option).

>> +static int __init ler_setup(char *s)
>> +{
>> + ler_enabled = 1;
>> + return 1;
>> +}
>> +__setup("ler", ler_setup);
>
>I don't think the option is very useful.

Apart from the above, I assume it's also useful initially to avoid boot problems
in case the model->msr mapping isn't fully cooked, yet (obviously I don't have
machines with all the different CPU families/models around to test, so the coding
of what I wasn't able to test is purely based on the manuals).

>BTW I have a patch somewhere to save/report last branch on exception
>too, but it never seemed suitable for mainline because of its

I though you did, but since I never saw it appear in any release, I posted
this one...

So especially with the first concern of yours - is it worth at all rev-ing this
patch to address the other change requests you voiced?

>high overhead. If someone submitted a full BTS driver I would
>be tempted though :)

That shouldn't be too difficult.

Jan

2007-08-13 15:29:49

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] x86: optionally show last exception from/to register contents

> I though you did, but since I never saw it appear in any release, I posted
> this one...

It's a little different (and somewhat hackish I admit)
ftp://firstfloor.org/pub/ak/x86_64/debug/last-branch
It has far more overhead than yours of course.

The only way to merge something like that would be dynamic
patching and I was never quite motivated for that.

> So especially with the first concern of yours - is it worth at all rev-ing this
> patch to address the other change requests you voiced?

The patch is useful even with the race, just document somewhere
that it is unreliable (perhaps in the printk itself) and do the
other changes.

I would also suggest to enable it by default.

-Andi

2007-08-13 16:20:31

by Keith Owens

[permalink] [raw]
Subject: Re: [PATCH] x86: optionally show last exception from/to register contents

Andi Kleen (on Mon, 13 Aug 2007 15:08:45 +0200) wrote:
>On Mon, Aug 13, 2007 at 12:33:05PM +0100, Jan Beulich wrote:
>
>
>>
>> if (cpu_has_ds) {
>> unsigned int l1;
>> --- linux-2.6.23-rc3/arch/i386/kernel/traps.c 2007-08-13 08:59:45.000000000 +0200
>> +++ 2.6.23-rc3-x86-ler/arch/i386/kernel/traps.c 2007-08-07 10:42:55.000000000 +0200
>> @@ -321,6 +321,13 @@ void show_registers(struct pt_regs *regs
>> unsigned int code_len = code_bytes;
>> unsigned char c;
>>
>> + if (__get_cpu_var(ler_msr)) {
>> + u32 from, to, hi;
>> +
>> + rdmsr(__get_cpu_var(ler_msr), from, hi);
>> + rdmsr(__get_cpu_var(ler_msr) + 1, to, hi);
>> + printk("LER: %08x -> %08x\n", from, to);
>> + }
>
>This seems racy -- AFAIK the MSR will record the last branch
>before an interrupt too, and the trap handlers enable interrupts
>before coming here.
>
>Can't think of a good way to avoid that for page fault at least
>without impacting interrupt latency or reading the MSR always.

KDB used to have a "last branch recording" (lbr) feature. The page
fault handler was modified to disable lbr before entering
do_page_fault(). Nobody seemed to care about the slight slowdown but
also nobody seemed to be using that feature for debugging, we rarely
get wild branches into the middle of nowhere. lbr was messy to
maintain for very little gain, so I removed it from KDB at
2.6.17-i386-2.

2007-08-13 17:04:22

by Randy Dunlap

[permalink] [raw]
Subject: Re: [PATCH] x86: optionally show last exception from/to register contents

On Mon, 13 Aug 2007 16:21:10 +0200 Andi Kleen wrote:

> > I though you did, but since I never saw it appear in any release, I posted
> > this one...
>
> It's a little different (and somewhat hackish I admit)
> ftp://firstfloor.org/pub/ak/x86_64/debug/last-branch
> It has far more overhead than yours of course.
>
> The only way to merge something like that would be dynamic
> patching and I was never quite motivated for that.
>
> > So especially with the first concern of yours - is it worth at all rev-ing this
> > patch to address the other change requests you voiced?
>
> The patch is useful even with the race, just document somewhere
> that it is unreliable (perhaps in the printk itself) and do the
> other changes.

and add "ler" entry to Documentation/kernel-parameters.txt, please,
with what it means.

> I would also suggest to enable it by default.


---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***