Hi Ingo,
Please consider for inclusion.
This series of patches adds very basic latency trace support for powerpc.
Compile, boot and trace tested (a little) on Celleb.
Thanks in advance.
-- owa
TOSHIBA, Corprate Software Engineering Center.
add mcount() and _mcount() for latency trace support.
Signed-off-by: Tsutomu OWA <[email protected]>
-- owa
diff -rup linux-2.6.21-rt1/arch/powerpc/kernel/entry_64.S rt/arch/powerpc/kernel/entry_64.S
--- linux-2.6.21-rt1/arch/powerpc/kernel/entry_64.S 2007-05-07 14:08:12.000000000 +0900
+++ rt/arch/powerpc/kernel/entry_64.S 2007-05-08 18:54:07.000000000 +0900
@@ -832,3 +832,63 @@ _GLOBAL(enter_prom)
ld r0,16(r1)
mtlr r0
blr
+
+#ifdef CONFIG_MCOUNT
+/*
+ * code almost taken from entry_32.S
+ */
+#define MCOUNT_FRAME_SIZE 32
+_GLOBAL(mcount)
+ stdu r1,-MCOUNT_FRAME_SIZE(r1)
+ mflr r3
+
+ LOAD_REG_ADDR(r5,mcount_enabled)
+ lwz r5,0(r5)
+ std r3,MCOUNT_FRAME_SIZE+16(r1)
+ cmpwi r5,0
+ beq 1f
+
+ /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */
+ ld r4,MCOUNT_FRAME_SIZE(r1)
+ ld r4,16(r4)
+ bl .__trace
+ nop
+1:
+ ld r0,MCOUNT_FRAME_SIZE+16(r1)
+ mtlr r0
+ addi r1,r1,MCOUNT_FRAME_SIZE
+ blr
+
+/*
+ * Based on glibc-2.4/sysdeps/powerpc/powerpc64/ppc-mcount.S
+ *
+ * We don't need to save the parameter-passing registers as gcc takes
+ * care of that for us. Thus this function looks fairly normal.
+ * In fact, the generic code would work for us.
+ */
+_GLOBAL(_mcount)
+ /* return if we're in real mode. */
+ mfmsr r3
+ andi. r0,r3,MSR_IR|MSR_DR /* see if relocation is on? */
+ beqlr /* if not, do nothing. */
+ /* we're in translation mode. keep going. */
+ mflr r3
+ ld r11,0(r1) /* load back chain ptr */
+ stdu r1,-STACK_FRAME_OVERHEAD(r1)
+ std r3,STACK_FRAME_OVERHEAD+16(r1)
+ ld r4,16(r11) /* LR in back chain */
+ LOAD_REG_ADDR(r5,mcount_enabled)
+ lwz r5,0(r5)
+ cmpwi r5,0 /* see if mcount_enabled? */
+ beq 1f /* if disabled, then skip */
+
+ /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */
+ bl .__trace
+ nop
+1:
+ ld r0,STACK_FRAME_OVERHEAD+16(r1) /* restore saved LR */
+ mtlr r0
+ addi r1,r1,STACK_FRAME_OVERHEAD
+ blr
+
+#endif /* CONFIG_MCOUNT */
add dummy functions save_stack_trace(), early_printk() for now and
export _mcount to compile.
Signed-off-by: Tsutomu OWA <[email protected]>
-- owa
diff -rup linux-2.6.21-rt1/arch/powerpc/kernel/setup_64.c rt/arch/powerpc/kernel/setup_64.c
--- linux-2.6.21-rt1/arch/powerpc/kernel/setup_64.c 2007-04-26 12:08:32.000000000 +0900
+++ rt/arch/powerpc/kernel/setup_64.c 2007-05-08 18:50:46.000000000 +0900
@@ -606,3 +606,22 @@ struct ppc_pci_io ppc_pci_io;
EXPORT_SYMBOL(ppc_pci_io);
#endif /* CONFIG_PPC_INDIRECT_IO */
+#ifdef CONFIG_STACKTRACE
+#include <linux/stacktrace.h>
+void notrace save_stack_trace(struct stack_trace *trace,
+ struct task_struct *task)
+{
+}
+#endif /* CONFIG_STACKTRACE */
+
+#ifdef CONFIG_EARLY_PRINTK
+void notrace early_printk(const char *fmt, ...)
+{
+ BUG();
+}
+#endif /* CONFIG_EARLY_PRINTK */
+
+#ifdef CONFIG_MCOUNT
+extern void _mcount(void);
+EXPORT_SYMBOL(_mcount);
+#endif /* CONFIG_MCOUNT */
mark raw_local_irq_restore() and timebase_read() as notrace since
these are called from __trace().
Signed-off-by: Tsutomu OWA <[email protected]>
-- owa
diff -rup linux-2.6.21-rt1/arch/powerpc/kernel/irq.c rt/arch/powerpc/kernel/irq.c
--- linux-2.6.21-rt1/arch/powerpc/kernel/irq.c 2007-05-07 14:08:12.000000000 +0900
+++ rt/arch/powerpc/kernel/irq.c 2007-05-07 14:05:30.000000000 +0900
@@ -111,7 +111,7 @@ static inline void set_soft_enabled(unsi
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
}
-void raw_local_irq_restore(unsigned long en)
+void notrace raw_local_irq_restore(unsigned long en)
{
/*
* get_paca()->soft_enabled = en;
--- linux-2.6.21-rt1/arch/powerpc/kernel/time.c 2007-05-09 17:52:05.000000000 +0900
+++ rt/arch/powerpc/kernel/time.c 2007-05-09 18:03:39.000000000 +0900
@@ -923,7 +923,7 @@ void div128_by_32(u64 dividend_high, u64
#include <linux/clocksource.h>
-static cycle_t timebase_read(void)
+static cycle_t notrace timebase_read(void)
{
return (cycle_t)get_tb();
}
Rename variable name "mcount" in xmon to xmon_mcount, since it conflicts
with mcount() function used by latency trace function.
Signed-off-by: Tsutomu OWA <[email protected]>
-- owa
diff -rup linux-2.6.21-rt1/arch/powerpc/xmon/xmon.c rt/arch/powerpc/xmon/xmon.c
--- linux-2.6.21-rt1/arch/powerpc/xmon/xmon.c 2007-05-07 14:08:12.000000000 +0900
+++ rt/arch/powerpc/xmon/xmon.c 2007-05-07 14:05:30.000000000 +0900
@@ -2129,7 +2129,7 @@ print_address(unsigned long addr)
static unsigned long mdest; /* destination address */
static unsigned long msrc; /* source address */
static unsigned long mval; /* byte value to set memory to */
-static unsigned long mcount; /* # bytes to affect */
+static unsigned long xmon_mcount; /* # bytes to affect */
static unsigned long mdiffs; /* max # differences to print */
void
@@ -2141,19 +2141,20 @@ memops(int cmd)
scanhex((void *)(cmd == 's'? &mval: &msrc));
if( termch != '\n' )
termch = 0;
- scanhex((void *)&mcount);
+ scanhex((void *)&xmon_mcount);
switch( cmd ){
case 'm':
- memmove((void *)mdest, (void *)msrc, mcount);
+ memmove((void *)mdest, (void *)msrc, xmon_mcount);
break;
case 's':
- memset((void *)mdest, mval, mcount);
+ memset((void *)mdest, mval, xmon_mcount);
break;
case 'd':
if( termch != '\n' )
termch = 0;
scanhex((void *)&mdiffs);
- memdiffs((unsigned char *)mdest, (unsigned char *)msrc, mcount, mdiffs);
+ memdiffs((unsigned char *)mdest, (unsigned char *)msrc,
+ xmon_mcount, mdiffs);
break;
}
}
Calculate clocksource_timebase.shift from tb_ticks_per_jiffy to get an
accurate translation, though I don't understand why current version of
clocksource_timebase.shift could be constant...
Signed-off-by: Tsutomu OWA <[email protected]>
-- owa
--- linux-2.6.21-rt1/arch/powerpc/kernel/time.c 2007-05-09 18:05:57.000000000 +0900
+++ rt/arch/powerpc/kernel/time.c 2007-05-09 18:06:24.000000000 +0900
@@ -945,6 +945,9 @@ static int __init init_timebase_clocksou
if (__USE_RTC())
return -ENODEV;
+#ifdef CONFIG_PPC64
+ clocksource_timebase.shift = tb_ticks_per_jiffy / 1000000;
+#endif
clocksource_timebase.mult = clocksource_hz2mult(tb_ticks_per_sec,
clocksource_timebase.shift);
return clocksource_register(&clocksource_timebase);