I was working on this for the mjb tree but I thought I'd throw it out
here in case anyone else finds it interesting. This simple change to
the stack trace code uses the frame pointer to do the trace instead of
assuming any kernel address on the stack is a return address. It makes
for much more readable stack traces.
diff -purN linux-2.6.0-test9-virgin/arch/i386/kernel/traps.c linux-2.6.0-test9-stack/arch/i386/kernel/traps.c
--- linux-2.6.0-test9-virgin/arch/i386/kernel/traps.c 2003-11-03 10:34:18.000000000 -0800
+++ linux-2.6.0-test9-stack/arch/i386/kernel/traps.c 2003-11-04 13:51:20.000000000 -0800
@@ -93,17 +93,36 @@ asmlinkage void machine_check(void);
static int kstack_depth_to_print = 24;
-void show_trace(struct task_struct *task, unsigned long * stack)
+#ifdef CONFIG_FRAME_POINTER
+void show_trace_fp(struct task_struct *task)
+{
+ unsigned long addr, ebp;
+
+ if (task == current) {
+ /* Grab ebp right from our regs */
+ asm ("movl %%ebp, %0" : "=r" (ebp) : );
+ } else {
+ /* ebp is the last reg pushed by switch_to */
+ ebp = *(unsigned long *) task->thread.esp;
+ }
+
+ while (ebp) {
+ addr = *(unsigned long *) (ebp + 4);
+ if (!kernel_text_address(addr))
+ return;
+ printk(" [<%08lx>] ", addr);
+ print_symbol("%s\n", addr);
+ ebp = *(unsigned long *) ebp;
+ }
+}
+#else
+void show_trace_guess(unsigned long * stack)
{
unsigned long addr;
if (!stack)
stack = (unsigned long*)&stack;
- printk("Call Trace:");
-#ifdef CONFIG_KALLSYMS
- printk("\n");
-#endif
while (!kstack_end(stack)) {
addr = *stack++;
if (kernel_text_address(addr)) {
@@ -111,6 +130,20 @@ void show_trace(struct task_struct *task
print_symbol("%s\n", addr);
}
}
+}
+#endif
+
+void show_trace(struct task_struct *task, unsigned long * stack)
+{
+ printk("Call Trace:");
+#ifdef CONFIG_KALLSYMS
+ printk("\n");
+#endif
+#ifdef CONFIG_FRAME_POINTER
+ show_trace_fp(task);
+#else
+ show_trace_guess(stack);
+#endif
printk("\n");
}
--
Adam Litke (agl at us.ibm.com)
IBM Linux Technology Center
(503) 578 - 3283 t/l 775 - 3283
On 04 Nov 2003 14:13:49 -0800 Adam Litke <[email protected]> wrote:
>
> I was working on this for the mjb tree but I thought I'd throw it out
> here in case anyone else finds it interesting. This simple change to
> the stack trace code uses the frame pointer to do the trace instead of
> assuming any kernel address on the stack is a return address. It makes
> for much more readable stack traces.
I was asked by a colleague to do the same thing and came up with this
alternative version which, I think, gets it mostly right - I am not
sure about the stack trace from an OOPS.
Don't bother to tell me this is a bit of a hack - I know :-)
Patch relative to 2.6.0-test9.
--
Cheers,
Stephen Rothwell [email protected]
http://www.canb.auug.org.au/~sfr/
diff -ruN 2.6.0-test9/arch/i386/kernel/process.c 2.6.0-test9-sfr.1/arch/i386/kernel/process.c
--- 2.6.0-test9/arch/i386/kernel/process.c 2003-10-09 10:22:41.000000000 +1000
+++ 2.6.0-test9-sfr.1/arch/i386/kernel/process.c 2003-10-28 17:00:40.000000000 +1100
@@ -243,7 +243,7 @@
".previous \n"
: "=r" (cr4): "0" (0));
printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
- show_trace(NULL, ®s->esp);
+ show_trace(®s->esp, (unsigned long *)regs->ebp);
}
/*
diff -ruN 2.6.0-test9/arch/i386/kernel/traps.c 2.6.0-test9-sfr.1/arch/i386/kernel/traps.c
--- 2.6.0-test9/arch/i386/kernel/traps.c 2003-10-20 11:35:54.000000000 +1000
+++ 2.6.0-test9-sfr.1/arch/i386/kernel/traps.c 2003-10-29 15:45:50.000000000 +1100
@@ -91,9 +91,11 @@
asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);
+#define DECLARE_EBP register unsigned long *ebp asm("ebp")
+
static int kstack_depth_to_print = 24;
-void show_trace(struct task_struct *task, unsigned long * stack)
+void show_trace(unsigned long *stack, unsigned long *frame)
{
unsigned long addr;
@@ -106,10 +108,24 @@
#endif
while (!kstack_end(stack)) {
addr = *stack++;
- if (kernel_text_address(addr)) {
+ if (
+#ifdef CONFIG_FRAME_POINTER
+ /*
+ * The plus 2 is because the return address
+ * is immediately above where the frame pointer
+ * points and we incremented the stack pointer
+ * above ...
+ */
+ (!frame || (stack == (frame + 2))) &&
+#endif
+ kernel_text_address(addr)) {
printk(" [<%08lx>] ", addr);
print_symbol("%s\n", addr);
}
+#ifdef CONFIG_FRAME_POINTER
+ if (frame && (stack == (frame + 2)))
+ frame = (unsigned long *)*frame;
+#endif
}
printk("\n");
}
@@ -121,19 +137,45 @@
/* User space on another CPU? */
if ((esp ^ (unsigned long)tsk->thread_info) & (PAGE_MASK<<1))
return;
- show_trace(tsk, (unsigned long *)esp);
+ /*
+ * switch_to pushes the frame pointer just before saving
+ * the stack pointer.
+ */
+ show_trace((unsigned long *)esp, (unsigned long *)*(unsigned long *)esp);
}
void show_stack(struct task_struct *task, unsigned long *esp)
{
unsigned long *stack;
int i;
+ DECLARE_EBP;
+ unsigned long *frame;
if (esp == NULL) {
- if (task)
+ if (task && (task != current)) {
esp = (unsigned long*)task->thread.esp;
- else
+ /*
+ * switch_to pushes the frame pointer just before
+ * saving the stack pointer.
+ */
+ frame = (unsigned long *)*esp;
+ } else {
esp = (unsigned long *)&esp;
+ frame = ebp;
+#ifdef CONFIG_FRAME_POINTER
+ /* we want the enclosing frame ... */
+ frame = (unsigned long *)*ebp;
+#endif
+ }
+ } else {
+ /*
+ * The only place that calls show_stack with task == NULL
+ * and esp != NULL is show_registers below, where esp
+ * points at the esp field of a pt_regs structure on the
+ * stack. So this gets us the saved ebp value.
+ */
+ frame = ((struct pt_regs *)((unsigned char *)esp -
+ offsetof(struct pt_regs, esp)))->ebp;
}
stack = esp;
@@ -145,7 +187,7 @@
printk("%08lx ", *stack++);
}
printk("\n");
- show_trace(task, esp);
+ show_trace(esp, frame);
}
/*
@@ -154,8 +196,9 @@
void dump_stack(void)
{
unsigned long stack;
+ DECLARE_EBP;
- show_trace(current, &stack);
+ show_trace(&stack, ebp);
}
EXPORT_SYMBOL(dump_stack);
diff -ruN 2.6.0-test9/include/asm-i386/processor.h 2.6.0-test9-sfr.1/include/asm-i386/processor.h
--- 2.6.0-test9/include/asm-i386/processor.h 2003-10-20 11:36:07.000000000 +1000
+++ 2.6.0-test9-sfr.1/include/asm-i386/processor.h 2003-10-28 16:57:42.000000000 +1100
@@ -492,7 +492,7 @@
extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
extern unsigned long thread_saved_pc(struct task_struct *tsk);
-void show_trace(struct task_struct *task, unsigned long *stack);
+void show_trace(unsigned long *stack, unsigned long *frame);
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)->thread_info))[1019])
>> I was working on this for the mjb tree but I thought I'd throw it out
>> here in case anyone else finds it interesting. This simple change to
>> the stack trace code uses the frame pointer to do the trace instead of
>> assuming any kernel address on the stack is a return address. It makes
>> for much more readable stack traces.
>
> I was asked by a colleague to do the same thing and came up with this
> alternative version which, I think, gets it mostly right - I am not
> sure about the stack trace from an OOPS.
>
> Don't bother to tell me this is a bit of a hack - I know :-)
>
> Patch relative to 2.6.0-test9.
What's the difference between the two patches, apart from the size?
Better error handling / functionality somehow?
M.
On Wed, 05 Nov 2003 11:56:34 -0800 "Martin J. Bligh" <[email protected]> wrote:
>
> What's the difference between the two patches, apart from the size?
> Better error handling / functionality somehow?
I think mine handles more cases, but is much more of a hack ...
--
Cheers,
Stephen Rothwell [email protected]
http://www.canb.auug.org.au/~sfr/
>> What's the difference between the two patches, apart from the size?
>> Better error handling / functionality somehow?
>
> I think mine handles more cases, but is much more of a hack ...
I don't care if it's a hack, as long as it works ;-) Could you elaborate
on the other cases it handles?
Thanks,
M.
For what it's worth, on ppc I've made stack traces
slightly smarter, too. I think the ppc stack dump
code always used the frame
pointer, but it still output a bunch of garbage at the
end of the stack dump. As a byproduct of porting the
module tracking patch to ppc, I had to check each
address printed out by the stack dumper, and fairly
often they're not legal text addresses. I print a ? after
the questionable ones now. No big deal, but it is
*slightly* smarter. Here's the bit in question:
--- patch-backup/arch/ppc/kernel/process.c Mon Oct 13 17:32:04 2003
+++ arch/ppc/kernel/process.c Tue Oct 14 00:05:29 2003
@@ -514,7 +564,11 @@
break;
if (cnt++ % 7 == 0)
printk("\n");
- printk("%08lX ", i);
+ if (kernel_text_address(i)) { /* mostly just want to mark modules */
+ printk("%08lX ", i);
+ } else { /* FIXME: should we ignore fishy adrs like x86? */
+ printk("%08lX? ", i); /* Or append a '?' (ksymoops doesn't mind) */
+ }
if (cnt > 32) break;
if (__get_user(sp, (unsigned long **)sp))
break;
And here's the whole module tracking patch, including my port of it to ppc
(the paths have the first component removed, sorry):
# Purpose: add module info to oops log
# Source http://www.kernel.org/pub/linux/kernel/people/andrea/kernels/v2.4/2.4.23pre6aa3/90_module-oops-tracking-3
# See also http://marc.theaimsgroup.com/?l=linux-kernel&m=102772338115172&w=2
# ppc port added by [email protected] Oct 2003
diff -urNp z/arch/i386/kernel/traps.c 2.4.19rc3aa2/arch/i386/kernel/traps.c
--- arch/i386/kernel/traps.c Sat Jul 27 03:42:39 2002
+++ arch/i386/kernel/traps.c Sat Jul 27 03:41:56 2002
@@ -113,6 +113,7 @@ static inline int kernel_text_address(un
* module area. Of course it'd be better to test only
* for the .text subset... */
if (mod_bound(addr, 0, mod)) {
+ module_oops_tracking_mark(mod);
retval = 1;
break;
}
@@ -208,6 +209,8 @@ void show_registers(struct pt_regs *regs
esp = regs->esp;
ss = regs->xss & 0xffff;
}
+ module_oops_tracking_init();
+ kernel_text_address(regs->eip);
printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx\n",
smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags);
printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
@@ -226,8 +229,9 @@ void show_registers(struct pt_regs *regs
printk("\nStack: ");
show_stack((unsigned long*)esp);
+ module_oops_tracking_print();
- printk("\nCode: ");
+ printk("Code: ");
if(regs->eip < PAGE_OFFSET)
goto bad;
@@ -288,7 +292,8 @@ void die(const char * str, struct pt_reg
spin_lock_irq(&die_lock);
bust_spinlocks(1);
handle_BUG(regs);
- printk("%s: %04lx\n", str, err & 0xffff);
+ printk("%s: %04lx ", str, err & 0xffff);
+ printk(oops_id);
show_registers(regs);
bust_spinlocks(0);
spin_unlock_irq(&die_lock);
diff -urNp z/include/linux/kernel.h 2.4.19rc3aa2/include/linux/kernel.h
--- include/linux/kernel.h Sat Jul 27 03:42:39 2002
+++ include/linux/kernel.h Sat Jul 27 04:57:04 2002
@@ -109,6 +109,8 @@ extern const char *print_tainted(void);
extern void dump_stack(void);
+extern char *oops_id;
+
#if DEBUG
#define pr_debug(fmt,arg...) \
printk(KERN_DEBUG fmt,##arg)
diff -urNp z/include/linux/module.h 2.4.19rc3aa2/include/linux/module.h
--- include/linux/module.h Sat Jul 27 03:42:39 2002
+++ include/linux/module.h Sat Jul 27 04:57:40 2002
@@ -110,6 +110,7 @@ struct module_info
#define MOD_USED_ONCE 16
#define MOD_JUST_FREED 32
#define MOD_INITIALIZING 64
+#define MOD_OOPS_PRINT 128UL
/* Values for query_module's which. */
@@ -411,4 +412,14 @@ __attribute__((section("__ksymtab"))) =
#define SET_MODULE_OWNER(some_struct) do { } while (0)
#endif
+#ifdef CONFIG_MODULES
+extern void module_oops_tracking_init(void);
+extern void module_oops_tracking_mark(struct module *);
+extern void module_oops_tracking_print(void);
+#else
+#define module_oops_tracking_init() do { } while (0)
+#define module_oops_tracking_mark(x) do { } while (0)
+#define module_oops_tracking_print() do { } while (0)
+#endif
+
#endif /* _LINUX_MODULE_H */
diff -urNp z/init/version.c 2.4.19rc3aa2/init/version.c
--- init/version.c Sat Jul 27 03:42:39 2002
+++ init/version.c Sat Jul 27 05:03:14 2002
@@ -24,3 +24,5 @@ struct new_utsname system_utsname = {
const char *linux_banner =
"Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
+
+const char *oops_id = UTS_RELEASE " " UTS_VERSION "\n";
diff -urNp z/kernel/module.c 2.4.19rc3aa2/kernel/module.c
--- kernel/module.c Sat Jul 27 03:42:39 2002
+++ kernel/module.c Sat Jul 27 02:38:24 2002
@@ -1242,6 +1242,50 @@ struct seq_operations ksyms_op = {
show: s_show
};
+/*
+ * The module tracking logic assumes the module list doesn't
+ * change under the oops. In case of a race we could get not
+ * the exact information about the affected modules, but it's
+ * almost impossible to trigger and it's a not interesting case.
+ * This code runs protected by the die_lock spinlock, the arch/
+ * caller takes care of it.
+ */
+void module_oops_tracking_init(void)
+{
+ struct module * mod;
+
+ for (mod = module_list; mod != &kernel_module; mod = mod->next)
+ mod->flags &= ~MOD_OOPS_PRINT;
+}
+
+void module_oops_tracking_mark(struct module * mod)
+{
+ mod->flags |= MOD_OOPS_PRINT;
+}
+
+void module_oops_tracking_print(void)
+{
+ struct module *mod;
+ int i = 0;
+
+ for (mod = module_list; mod != &kernel_module; mod = mod->next) {
+ if (!(mod->flags & MOD_OOPS_PRINT))
+ continue;
+ if (!i)
+ printk("Modules:");
+ if (i && !(i % 2))
+ printk("\n ");
+ i++;
+
+ printk(" [(%s:<%p>:<%p>)]",
+ mod->name,
+ (char *) mod + mod->size_of_struct,
+ (char *) mod + mod->size);
+ }
+ if (i)
+ printk("\n");
+}
+
#else /* CONFIG_MODULES */
/* Dummy syscalls for people who don't want modules */
--- patch-backup/arch/ppc/kernel/process.c Mon Oct 13 17:32:04 2003
+++ arch/ppc/kernel/process.c Tue Oct 14 00:05:29 2003
@@ -60,6 +60,53 @@
#undef SHOW_TASK_SWITCHES
#undef CHECK_STACK
+/*
+ * If the address is either in the .text section of the
+ * kernel, or in the vmalloc'ed module regions, it *may*
+ * be the address of a calling routine
+ */
+
+extern char _etext[], _stext[];
+
+#ifdef CONFIG_MODULES
+#include <linux/module.h>
+
+extern struct module *module_list;
+extern struct module kernel_module;
+
+static inline int kernel_text_address(unsigned long addr)
+{
+ int retval = 0;
+ struct module *mod;
+
+ if (addr >= (unsigned long) &_stext &&
+ addr <= (unsigned long) &_etext)
+ return 1;
+
+ for (mod = module_list; mod != &kernel_module; mod = mod->next) {
+ /* mod_bound tests for addr being inside the vmalloc'ed
+ * module area. Of course it'd be better to test only
+ * for the .text subset... */
+ if (mod_bound(addr, 0, mod)) {
+ module_oops_tracking_mark(mod);
+ retval = 1;
+ break;
+ }
+ }
+
+ return retval;
+}
+
+#else
+
+static inline int kernel_text_address(unsigned long addr)
+{
+ return (addr >= (unsigned long) &_stext &&
+ addr <= (unsigned long) &_etext);
+}
+
+#endif
+
#if defined(CHECK_STACK)
unsigned long
kernel_stack_top(struct task_struct *tsk)
@@ -243,6 +290,8 @@
{
int i;
+ module_oops_tracking_init();
+ kernel_text_address(regs->nip);
printk("NIP: %08lX XER: %08lX LR: %08lX SP: %08lX REGS: %p TRAP: %04lx %s\n",
regs->nip, regs->xer, regs->link, regs->gpr[1], regs,regs->trap, print_tainted());
printk("MSR: %08lx EE: %01x PR: %01x FP: %01x ME: %01x IR/DR: %01x%01x\n",
@@ -303,6 +352,7 @@
}
out:
print_backtrace((unsigned long *)regs->gpr[1]);
+ module_oops_tracking_print();
}
void exit_thread(void)
@@ -514,7 +564,11 @@
break;
if (cnt++ % 7 == 0)
printk("\n");
- printk("%08lX ", i);
+ if (kernel_text_address(i)) { /* mostly just want to mark modules */
+ printk("%08lX ", i);
+ } else { /* FIXME: should we ignore fishy adrs like x86? */
+ printk("%08lX? ", i); /* Or append a '?' (ksymoops doesn't mind) */
+ }
if (cnt > 32) break;
if (__get_user(sp, (unsigned long **)sp))
break;
--- arch/ppc/kernel/traps.c.old Mon Oct 13 17:23:40 2003
+++ arch/ppc/kernel/traps.c Mon Oct 13 17:25:27 2003
@@ -97,7 +97,8 @@
set_backlight_enable(1);
set_backlight_level(BACKLIGHT_MAX);
#endif
- printk("Oops: %s, sig: %ld\n", str, err);
+ printk("Oops: %s, sig: %ld ", str, err);
+ printk(oops_id);
show_regs(fp);
#ifdef CONFIG_BOOTX_TEXT
force_printk_to_btext = 0;
On Tue, 2003-11-04 at 14:13, Adam Litke wrote:
> I was working on this for the mjb tree but I thought I'd throw it out
> here in case anyone else finds it interesting. This simple change to
> the stack trace code uses the frame pointer to do the trace instead of
> assuming any kernel address on the stack is a return address. It makes
> for much more readable stack traces.
Here is an updated patch that fixes a very stupid NULL pointer
dereference. Also, it now avoids contradictory stack data when thread
== current.
diff -purN linux-2.6.0-test9-virgin/arch/i386/kernel/traps.c linux-2.6.0-test9-stack/arch/i386/kernel/traps.c
--- linux-2.6.0-test9-virgin/arch/i386/kernel/traps.c 2003-11-03 10:34:18.000000000 -0800
+++ linux-2.6.0-test9-stack/arch/i386/kernel/traps.c 2003-11-06 09:47:19.000000000 -0800
@@ -91,19 +91,58 @@ asmlinkage void alignment_check(void);
asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);
-static int kstack_depth_to_print = 24;
+static int kstack_depth_to_print = 128;
-void show_trace(struct task_struct *task, unsigned long * stack)
+#ifdef CONFIG_FRAME_POINTER
+void show_stack_frame(unsigned long start, unsigned long end)
+{
+ int i;
+
+ printk(" ");
+ for (i = start; i < end; i += 4) {
+ if ((i - start) && ((i - start)%24 == 0))
+ printk("\n ");
+ printk("%08lx ", *(unsigned long *) i);
+ }
+ printk("\n");
+}
+
+void show_trace_fp(struct task_struct *task, unsigned long * stack)
+{
+ unsigned long addr, ebp;
+
+ if (!task)
+ task = current;
+
+ if (task == current) {
+ /* Grab ebp right from our regs */
+ asm ("movl %%ebp, %0" : "=r" (ebp) : );
+ } else {
+ /* ebp is the last reg pushed by switch_to */
+ ebp = *(unsigned long *) task->thread.esp;
+ }
+
+ show_stack_frame((unsigned long) stack, ebp+4);
+ while (ebp) {
+ addr = *(unsigned long *) (ebp + 4);
+ if (!kernel_text_address(addr))
+ return;
+ printk(" [<%08lx>] ", addr);
+ print_symbol("%s\n", addr);
+
+ /* Show the stack frame starting with args */
+ show_stack_frame(ebp + 8, (*(unsigned long *) ebp) + 4);
+ ebp = *(unsigned long *) ebp;
+ }
+}
+#else
+void show_trace_guess(unsigned long * stack)
{
unsigned long addr;
if (!stack)
stack = (unsigned long*)&stack;
- printk("Call Trace:");
-#ifdef CONFIG_KALLSYMS
- printk("\n");
-#endif
while (!kstack_end(stack)) {
addr = *stack++;
if (kernel_text_address(addr)) {
@@ -111,6 +150,20 @@ void show_trace(struct task_struct *task
print_symbol("%s\n", addr);
}
}
+}
+#endif
+
+void show_trace(struct task_struct *task, unsigned long * stack)
+{
+ printk("Call Trace:");
+#ifdef CONFIG_KALLSYMS
+ printk("\n");
+#endif
+#ifdef CONFIG_FRAME_POINTER
+ show_trace_fp(task, stack);
+#else
+ show_trace_guess(stack);
+#endif
printk("\n");
}
@@ -136,6 +189,7 @@ void show_stack(struct task_struct *task
esp = (unsigned long *)&esp;
}
+#ifndef CONFIG_FRAME_POINTER
stack = esp;
for(i = 0; i < kstack_depth_to_print; i++) {
if (kstack_end(stack))
@@ -145,6 +199,7 @@ void show_stack(struct task_struct *task
printk("%08lx ", *stack++);
}
printk("\n");
+#endif
show_trace(task, esp);
}
--
Adam Litke (agl at us.ibm.com)
IBM Linux Technology Center
(503) 578 - 3283 t/l 775 - 3283
Adam Litke <[email protected]> writes:
> -static int kstack_depth_to_print = 24;
> +static int kstack_depth_to_print = 128;
I would not do that. It makes too much of the oops scroll away.
> + show_stack_frame((unsigned long) stack, ebp+4);
I think this needs much more sanity checking, otherwise the risk
of recursive oops etc is too big.
I would always check first if the stack value read from ebp is inside the
stack page of the current process. If you're paranoid you could even use
__get_user() to catch exceptions.
-Andi