trace_clock_local() is based on the arch-specific sched_clock(), in X86,
it is tsc(64bit) based, which can give very high precision(about 1ns
with 1GHz). but in MIPS, the sched_clock() is jiffies based, which can
give only 10ms precison with 1000 HZ. which is not enough for tracing,
especially for Real Time system.
so, we need to implement a MIPS specific sched_clock() to get higher
precision. There is a tsc like clock counter register in MIPS, whose
frequency is half of the processor, so, if the cpu frequency is 800MHz,
the time precision reaches 2.5ns, which is very good for tracing, even
for Real Time system.
but 'Cause it is only 32bit long, which will rollover quickly, so, such
a sched_clock() will bring with extra load, which is not good for the
whole system. so, we only need to implement a arch-specific
trace_clock_local() for tracing. as a preparation, we convert it as a
weak function.
The MIPS specific trace_clock_local() is coming in the next two patches.
Signed-off-by: Wu Zhangjin <[email protected]>
---
kernel/trace/trace_clock.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 20c5f92..06b8cd2 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -26,7 +26,7 @@
* Useful for tracing that does not cross to other CPUs nor
* does it go through idle events.
*/
-u64 notrace trace_clock_local(void)
+u64 __weak notrace trace_clock_local(void)
{
unsigned long flags;
u64 clock;
--
1.6.2.1
This patch implement a mips_timecounter_read(), which can be used to get
high precision timestamp without extra lock.
It is based on the clock counter register and the
timecounter/cyclecounter abstraction layer of kernel.
Signed-off-by: Wu Zhangjin <[email protected]>
---
arch/mips/include/asm/time.h | 19 +++++++++++++++++++
arch/mips/kernel/csrc-r4k.c | 41 +++++++++++++++++++++++++++++++++++++++++
arch/mips/kernel/time.c | 2 ++
3 files changed, 62 insertions(+), 0 deletions(-)
diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
index df6a430..b8af7fa 100644
--- a/arch/mips/include/asm/time.h
+++ b/arch/mips/include/asm/time.h
@@ -73,8 +73,18 @@ static inline int mips_clockevent_init(void)
*/
#ifdef CONFIG_CSRC_R4K_LIB
extern int init_r4k_clocksource(void);
+extern int init_r4k_timecounter(void);
+extern u64 r4k_timecounter_read(void);
#endif
+static inline u64 mips_timecounter_read(void)
+{
+#ifdef CONFIG_CSRC_R4K
+ return r4k_timecounter_read();
+#else
+ return sched_clock();
+#endif
+}
static inline int init_mips_clocksource(void)
{
#ifdef CONFIG_CSRC_R4K
@@ -84,6 +94,15 @@ static inline int init_mips_clocksource(void)
#endif
}
+static inline int init_mips_timecounter(void)
+{
+#ifdef CONFIG_CSRC_R4K
+ return init_r4k_timecounter();
+#else
+ return 0;
+#endif
+}
+
extern void clocksource_set_clock(struct clocksource *cs, unsigned int clock);
extern void clockevent_set_clock(struct clock_event_device *cd,
unsigned int clock);
diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
index e95a3cd..4e7705f 100644
--- a/arch/mips/kernel/csrc-r4k.c
+++ b/arch/mips/kernel/csrc-r4k.c
@@ -7,6 +7,7 @@
*/
#include <linux/clocksource.h>
#include <linux/init.h>
+#include <linux/sched.h>
#include <asm/time.h>
@@ -36,3 +37,43 @@ int __init init_r4k_clocksource(void)
return 0;
}
+
+static struct timecounter r4k_tc = {
+ .cc = NULL,
+};
+
+static cycle_t r4k_cc_read(const struct cyclecounter *cc)
+{
+ return read_c0_count();
+}
+
+static struct cyclecounter r4k_cc = {
+ .read = r4k_cc_read,
+ .mask = CLOCKSOURCE_MASK(32),
+ .shift = 32,
+};
+
+int __init init_r4k_timecounter(void)
+{
+ if (!cpu_has_counter || !mips_hpt_frequency)
+ return -ENXIO;
+
+ r4k_cc.mult = div_sc((unsigned long)mips_hpt_frequency, NSEC_PER_SEC,
+ 32);
+
+ timecounter_init(&r4k_tc, &r4k_cc, sched_clock());
+
+ return 0;
+}
+
+u64 r4k_timecounter_read(void)
+{
+ u64 clock;
+
+ if (r4k_tc.cc != NULL)
+ clock = timecounter_read(&r4k_tc);
+ else
+ clock = sched_clock();
+
+ return clock;
+}
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index 1f467d5..e38cca1 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -156,4 +156,6 @@ void __init time_init(void)
if (!mips_clockevent_init() || !cpu_has_mfc0_count_bug())
init_mips_clocksource();
+ if (!cpu_has_mfc0_count_bug())
+ init_mips_timecounter();
}
--
1.6.2.1
This patch add the mips_timecounter_read() based high precision version
of trace_clock_local().
Signed-off-by: Wu Zhangjin <[email protected]>
---
arch/mips/kernel/Makefile | 6 ++++++
arch/mips/kernel/trace_clock.c | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+), 0 deletions(-)
create mode 100644 arch/mips/kernel/trace_clock.c
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 8e26e9c..f228868 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -8,6 +8,10 @@ obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \
ptrace.o reset.o setup.o signal.o syscall.o \
time.o topology.o traps.o unaligned.o watch.o
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_trace_clock.o = -pg
+endif
+
obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o
obj-$(CONFIG_CEVT_R4K_LIB) += cevt-r4k.o
obj-$(CONFIG_MIPS_MT_SMTC) += cevt-smtc.o
@@ -24,6 +28,8 @@ obj-$(CONFIG_SYNC_R4K) += sync-r4k.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_MODULES) += mips_ksyms.o module.o
+obj-$(CONFIG_FTRACE) += trace_clock.o
+
obj-$(CONFIG_CPU_LOONGSON2) += r4k_fpu.o r4k_switch.o
obj-$(CONFIG_CPU_MIPS32) += r4k_fpu.o r4k_switch.o
obj-$(CONFIG_CPU_MIPS64) += r4k_fpu.o r4k_switch.o
diff --git a/arch/mips/kernel/trace_clock.c b/arch/mips/kernel/trace_clock.c
new file mode 100644
index 0000000..2e3475f
--- /dev/null
+++ b/arch/mips/kernel/trace_clock.c
@@ -0,0 +1,37 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive for
+ * more details.
+ *
+ * Copyright (C) 2009 Lemote Inc. & DSLab, Lanzhou University, China
+ * Author: Wu Zhangjin <[email protected]>
+ */
+
+#include <linux/clocksource.h>
+#include <linux/sched.h>
+
+#include <asm/time.h>
+
+/*
+ * trace_clock_local(): the simplest and least coherent tracing clock.
+ *
+ * Useful for tracing that does not cross to other CPUs nor
+ * does it go through idle events.
+ */
+u64 trace_clock_local(void)
+{
+ unsigned long flags;
+ u64 clock;
+
+ raw_local_irq_save(flags);
+
+ preempt_disable_notrace();
+
+ clock = mips_timecounter_read();
+
+ preempt_enable_no_resched_notrace();
+
+ raw_local_irq_restore(flags);
+
+ return clock;
+}
--
1.6.2.1
if -pg of gcc is enabled. a calling to _mcount will be inserted to each
kernel function. so, there is a possibility to trace the functions in
_mcount.
here is the implementation of mips specific _mcount for static function
tracer.
-ffunction-sections option not works with -pg, so disable it if enables
FUNCTION_TRACER.
NOTE: This implementation not support module tracing yet, but you can
try to enable it via adding -mlong-calls to KBUILD_CFLAGS, but this will
made the following dynamic function tracer fail, so, I did not enable the
-mlong-calls by deafult, in the next version, I plan to enable the
module support via making dynamic function tracer work with -mlong-calls
to just enable the long call for modules.
Reviewed-by: Steven Rostedt <[email protected]>
Signed-off-by: Wu Zhangjin <[email protected]>
---
arch/mips/Kconfig | 1 +
arch/mips/Makefile | 2 +
arch/mips/include/asm/ftrace.h | 25 ++++++++++-
arch/mips/kernel/Makefile | 2 +
arch/mips/kernel/mcount.S | 94 ++++++++++++++++++++++++++++++++++++++++
arch/mips/kernel/mips_ksyms.c | 5 ++
6 files changed, 128 insertions(+), 1 deletions(-)
create mode 100644 arch/mips/kernel/mcount.S
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 03bd56a..6b33e88 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -4,6 +4,7 @@ config MIPS
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_ARCH_KGDB
+ select HAVE_FUNCTION_TRACER
# Horrible source of confusion. Die, die, die ...
select EMBEDDED
select RTC_LIB if !LEMOTE_FULOONG2E
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 77f5021..0a2073e 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -48,7 +48,9 @@ ifneq ($(SUBARCH),$(ARCH))
endif
endif
+ifndef CONFIG_FUNCTION_TRACER
cflags-y := -ffunction-sections
+endif
cflags-y += $(call cc-option, -mno-check-zero-division)
ifdef CONFIG_32BIT
diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index 40a8c17..5f8ebcf 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -1 +1,24 @@
-/* empty */
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive for
+ * more details.
+ *
+ * Copyright (C) 2009 DSLab, Lanzhou University, China
+ * Author: Wu Zhangjin <[email protected]>
+ */
+
+#ifndef _ASM_MIPS_FTRACE_H
+#define _ASM_MIPS_FTRACE_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+#define MCOUNT_ADDR ((unsigned long)(_mcount))
+#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void _mcount(void);
+#define mcount _mcount
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_FUNCTION_TRACER */
+#endif /* _ASM_MIPS_FTRACE_H */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index f228868..38e704a 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_trace_clock.o = -pg
+CFLAGS_REMOVE_early_printk.o = -pg
endif
obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o
@@ -29,6 +30,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_MODULES) += mips_ksyms.o module.o
obj-$(CONFIG_FTRACE) += trace_clock.o
+obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
obj-$(CONFIG_CPU_LOONGSON2) += r4k_fpu.o r4k_switch.o
obj-$(CONFIG_CPU_MIPS32) += r4k_fpu.o r4k_switch.o
diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
new file mode 100644
index 0000000..5bb8055
--- /dev/null
+++ b/arch/mips/kernel/mcount.S
@@ -0,0 +1,94 @@
+/*
+ * the mips-specific _mcount implementation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive for
+ * more details.
+ *
+ * Copyright (C) 2009 DSLab, Lanzhou University, China
+ * Author: Wu Zhangjin <[email protected]>
+ */
+
+#include <asm/regdef.h>
+#include <asm/stackframe.h>
+#include <asm/ftrace.h>
+
+ .text
+ .set noreorder
+ .set noat
+
+ /* since there is a "addiu sp,sp,-8" before "jal _mcount" in 32bit */
+ .macro RESTORE_SP_FOR_32BIT
+#ifdef CONFIG_32BIT
+ PTR_ADDIU sp, 8
+#endif
+ .endm
+
+ .macro MCOUNT_SAVE_REGS
+ PTR_SUBU sp, PT_SIZE
+ PTR_S ra, PT_R31(sp)
+ PTR_S AT, PT_R1(sp)
+ PTR_S a0, PT_R4(sp)
+ PTR_S a1, PT_R5(sp)
+ PTR_S a2, PT_R6(sp)
+ PTR_S a3, PT_R7(sp)
+#ifdef CONFIG_64BIT
+ PTR_S a4, PT_R8(sp)
+ PTR_S a5, PT_R9(sp)
+ PTR_S a6, PT_R10(sp)
+ PTR_S a7, PT_R11(sp)
+#endif
+ .endm
+
+ .macro MCOUNT_RESTORE_REGS
+ PTR_L ra, PT_R31(sp)
+ PTR_L AT, PT_R1(sp)
+ PTR_L a0, PT_R4(sp)
+ PTR_L a1, PT_R5(sp)
+ PTR_L a2, PT_R6(sp)
+ PTR_L a3, PT_R7(sp)
+#ifdef CONFIG_64BIT
+ PTR_L a4, PT_R8(sp)
+ PTR_L a5, PT_R9(sp)
+ PTR_L a6, PT_R10(sp)
+ PTR_L a7, PT_R11(sp)
+#endif
+ PTR_ADDIU sp, PT_SIZE
+.endm
+
+ .macro MCOUNT_SET_ARGS
+ move a0, ra /* arg1: next ip, selfaddr */
+ move a1, AT /* arg2: the caller's next ip, parent */
+ PTR_SUBU a0, MCOUNT_INSN_SIZE
+ .endm
+
+ .macro RETURN_BACK
+ jr ra
+ move ra, AT
+ .endm
+
+NESTED(_mcount, PT_SIZE, ra)
+ RESTORE_SP_FOR_32BIT
+ PTR_LA t0, ftrace_stub
+ PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
+ bne t0, t1, static_trace
+ nop
+
+ j ftrace_stub
+ nop
+
+static_trace:
+ MCOUNT_SAVE_REGS
+
+ MCOUNT_SET_ARGS /* call *ftrace_trace_function */
+ jalr t1
+ nop
+
+ MCOUNT_RESTORE_REGS
+ .globl ftrace_stub
+ftrace_stub:
+ RETURN_BACK
+ END(_mcount)
+
+ .set at
+ .set reorder
diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c
index 225755d..1d04807 100644
--- a/arch/mips/kernel/mips_ksyms.c
+++ b/arch/mips/kernel/mips_ksyms.c
@@ -13,6 +13,7 @@
#include <asm/checksum.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
+#include <asm/ftrace.h>
extern void *__bzero(void *__s, size_t __count);
extern long __strncpy_from_user_nocheck_asm(char *__to,
@@ -51,3 +52,7 @@ EXPORT_SYMBOL(csum_partial_copy_nocheck);
EXPORT_SYMBOL(__csum_partial_copy_user);
EXPORT_SYMBOL(invalid_pte_table);
+#ifdef CONFIG_FUNCTION_TRACER
+/* _mcount is defined in arch/mips/kernel/mcount.S */
+EXPORT_SYMBOL(_mcount);
+#endif
--
1.6.2.1
Signed-off-by: Wu Zhangjin <[email protected]>
---
arch/mips/Kconfig | 1 +
arch/mips/kernel/mcount.S | 4 ++++
2 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 6b33e88..a9bd992 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -5,6 +5,7 @@ config MIPS
select HAVE_OPROFILE
select HAVE_ARCH_KGDB
select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_TRACE_MCOUNT_TEST
# Horrible source of confusion. Die, die, die ...
select EMBEDDED
select RTC_LIB if !LEMOTE_FULOONG2E
diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
index 5bb8055..82e54ab 100644
--- a/arch/mips/kernel/mcount.S
+++ b/arch/mips/kernel/mcount.S
@@ -69,6 +69,10 @@
NESTED(_mcount, PT_SIZE, ra)
RESTORE_SP_FOR_32BIT
+ lw t0, function_trace_stop
+ bnez t0, ftrace_stub
+ nop
+
PTR_LA t0, ftrace_stub
PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
bne t0, t1, static_trace
--
1.6.2.1
MIPS architecture need this argument to handle big/little endian
respectively.
Signed-off-by: Wu Zhangjin <[email protected]>
---
scripts/Makefile.build | 1 +
scripts/recordmcount.pl | 6 +++---
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 341b589..0b94d2f 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -207,6 +207,7 @@ endif
ifdef CONFIG_FTRACE_MCOUNT_RECORD
cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
+ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
"$(if $(CONFIG_64BIT),64,32)" \
"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
"$(if $(part-of-module),1,0)" "$(@)";
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 090d300..daee038 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -99,13 +99,13 @@ $P =~ s@.*/@@g;
my $V = '0.1';
-if ($#ARGV < 7) {
- print "usage: $P arch bits objdump objcopy cc ld nm rm mv is_module inputfile\n";
+if ($#ARGV < 8) {
+ print "usage: $P arch endian bits objdump objcopy cc ld nm rm mv is_module inputfile\n";
print "version: $V\n";
exit(1);
}
-my ($arch, $bits, $objdump, $objcopy, $cc,
+my ($arch, $endian, $bits, $objdump, $objcopy, $cc,
$ld, $nm, $rm, $mv, $is_module, $inputfile) = @ARGV;
# This file refers to mcount and shouldn't be ftraced, so lets' ignore it
--
1.6.2.1
dynamic function tracer need to replace "nop" to "jumps & links" and
something reversely.
in linux-mips64, there will be some local symbols, whose name are
prefixed by $L, which need to be filtered. thanks goes to Steven for
writing the mips64-specific function_regex.
Signed-off-by: Wu Zhangjin <[email protected]>
---
arch/mips/Kconfig | 2 +
arch/mips/include/asm/ftrace.h | 10 ++++
arch/mips/kernel/Makefile | 3 +-
arch/mips/kernel/ftrace.c | 107 ++++++++++++++++++++++++++++++++++++++++
arch/mips/kernel/mcount.S | 31 ++++++++++++
scripts/recordmcount.pl | 22 ++++++++
6 files changed, 174 insertions(+), 1 deletions(-)
create mode 100644 arch/mips/kernel/ftrace.c
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a9bd992..147fbbc 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -6,6 +6,8 @@ config MIPS
select HAVE_ARCH_KGDB
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE_MCOUNT_RECORD
# Horrible source of confusion. Die, die, die ...
select EMBEDDED
select RTC_LIB if !LEMOTE_FULOONG2E
diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index 5f8ebcf..b4970c9 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -19,6 +19,16 @@
extern void _mcount(void);
#define mcount _mcount
+#ifdef CONFIG_DYNAMIC_FTRACE
+/* reloction of mcount call site is the same as the address */
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_TRACER */
#endif /* _ASM_MIPS_FTRACE_H */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 38e704a..3cda3ab 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_trace_clock.o = -pg
+CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
@@ -30,7 +31,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_MODULES) += mips_ksyms.o module.o
obj-$(CONFIG_FTRACE) += trace_clock.o
-obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
+obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
obj-$(CONFIG_CPU_LOONGSON2) += r4k_fpu.o r4k_switch.o
obj-$(CONFIG_CPU_MIPS32) += r4k_fpu.o r4k_switch.o
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
new file mode 100644
index 0000000..1e44865
--- /dev/null
+++ b/arch/mips/kernel/ftrace.c
@@ -0,0 +1,107 @@
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <[email protected]>
+ * Copyright (C) 2009 DSLab, Lanzhou University, China
+ * Author: Wu Zhangjin <[email protected]>
+ *
+ * Thanks goes to Steven Rostedt for writing the original x86 version.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/init.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */
+#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */
+
+#ifdef CONFIG_CPU_LOONGSON2
+static unsigned int ftrace_nop = 0x00020021;
+#else
+static unsigned int ftrace_nop = 0x00000000;
+#endif
+
+static unsigned char *ftrace_call_replace(unsigned long op_code,
+ unsigned long addr)
+{
+ static unsigned int op;
+
+ op = op_code | ((addr >> 2) & ADDR_MASK);
+
+ return (unsigned char *)&op;
+}
+
+static unsigned char *ftrace_nop_replace(void)
+{
+ return (unsigned char *)&ftrace_nop;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ unsigned char replaced[MCOUNT_INSN_SIZE];
+
+ /* read the text we want to modify */
+ if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
+
+ /* replace the text with the new text */
+ if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ flush_icache_range(ip, ip + 8);
+
+ return 0;
+}
+
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+
+ old = ftrace_call_replace(JAL, addr);
+ new = ftrace_nop_replace();
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+
+ old = ftrace_nop_replace();
+ new = ftrace_call_replace(JAL, addr);
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+ int ret;
+
+ memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(JAL, (unsigned long)func);
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ /* The return code is retured via data */
+ *(unsigned long *)data = 0;
+
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
index 82e54ab..30637e6 100644
--- a/arch/mips/kernel/mcount.S
+++ b/arch/mips/kernel/mcount.S
@@ -67,6 +67,35 @@
move ra, AT
.endm
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+LEAF(_mcount)
+ RESTORE_SP_FOR_32BIT
+ RETURN_BACK
+ END(_mcount)
+
+NESTED(ftrace_caller, PT_SIZE, ra)
+ RESTORE_SP_FOR_32BIT
+ lw t0, function_trace_stop
+ bnez t0, ftrace_stub
+ nop
+
+ MCOUNT_SAVE_REGS
+
+ MCOUNT_SET_ARGS
+ .globl ftrace_call
+ftrace_call:
+ jal ftrace_stub
+ nop
+
+ MCOUNT_RESTORE_REGS
+ .globl ftrace_stub
+ftrace_stub:
+ RETURN_BACK
+ END(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+
NESTED(_mcount, PT_SIZE, ra)
RESTORE_SP_FOR_32BIT
lw t0, function_trace_stop
@@ -94,5 +123,7 @@ ftrace_stub:
RETURN_BACK
END(_mcount)
+#endif /* ! CONFIG_DYNAMIC_FTRACE */
+
.set at
.set reorder
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index daee038..511c3a2 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -245,6 +245,28 @@ if ($arch eq "x86_64") {
$ld .= " -m elf64_sparc";
$cc .= " -m64";
$objcopy .= " -O elf64-sparc";
+
+} elsif ($arch eq "mips") {
+ $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
+ $objdump .= " -Melf-trad".$endian."mips ";
+
+ if ($endian eq "big") {
+ $endian = " -EB ";
+ $ld .= " -melf".$bits."btsmip";
+ } else {
+ $endian = " -EL ";
+ $ld .= " -melf".$bits."ltsmip";
+ }
+
+ $cc .= " -mno-abicalls -fno-pic -mabi=" . $bits . $endian;
+ $ld .= $endian;
+
+ if ($bits == 64) {
+ $function_regex =
+ "^([0-9a-fA-F]+)\\s+<(.|[^\$]L.*?|\$[^L].*?|[^\$][^L].*?)>:";
+ $type = ".dword";
+ }
+
} else {
die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
}
--
1.6.2.1
We use mips_timecounter_init() to get the timestamp in MIPS, so, it's
better to not trace it, otherwise, it will goto recursion(hang) when
using function graph tracer.
but this patch have touched the common part in kernel/time/clocksource.c
and include/linux/clocksource.h, so it is not good, perhaps the better
solution is moving the whole mips_timecounter_init() implementation into
arch/mips, but that will introduce source code duplication. any other
solution?
Signed-off-by: Wu Zhangjin <[email protected]>
---
arch/mips/include/asm/time.h | 2 +-
arch/mips/kernel/csrc-r4k.c | 4 ++--
include/linux/clocksource.h | 2 +-
kernel/time/clocksource.c | 4 ++--
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
index b8af7fa..e1c660f 100644
--- a/arch/mips/include/asm/time.h
+++ b/arch/mips/include/asm/time.h
@@ -77,7 +77,7 @@ extern int init_r4k_timecounter(void);
extern u64 r4k_timecounter_read(void);
#endif
-static inline u64 mips_timecounter_read(void)
+static inline u64 notrace mips_timecounter_read(void)
{
#ifdef CONFIG_CSRC_R4K
return r4k_timecounter_read();
diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
index 4e7705f..0690bea 100644
--- a/arch/mips/kernel/csrc-r4k.c
+++ b/arch/mips/kernel/csrc-r4k.c
@@ -42,7 +42,7 @@ static struct timecounter r4k_tc = {
.cc = NULL,
};
-static cycle_t r4k_cc_read(const struct cyclecounter *cc)
+static cycle_t notrace r4k_cc_read(const struct cyclecounter *cc)
{
return read_c0_count();
}
@@ -66,7 +66,7 @@ int __init init_r4k_timecounter(void)
return 0;
}
-u64 r4k_timecounter_read(void)
+u64 notrace r4k_timecounter_read(void)
{
u64 clock;
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 83d2fbd..2a02992 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -73,7 +73,7 @@ struct timecounter {
* XXX - This could use some mult_lxl_ll() asm optimization. Same code
* as in cyc2ns, but with unsigned result.
*/
-static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
+static inline u64 notrace cyclecounter_cyc2ns(const struct cyclecounter *cc,
cycle_t cycles)
{
u64 ret = (u64)cycles;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 5e18c6a..9ce9d02 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(timecounter_init);
* The first call to this function for a new time counter initializes
* the time tracking and returns an undefined result.
*/
-static u64 timecounter_read_delta(struct timecounter *tc)
+static u64 notrace timecounter_read_delta(struct timecounter *tc)
{
cycle_t cycle_now, cycle_delta;
u64 ns_offset;
@@ -72,7 +72,7 @@ static u64 timecounter_read_delta(struct timecounter *tc)
return ns_offset;
}
-u64 timecounter_read(struct timecounter *tc)
+u64 notrace timecounter_read(struct timecounter *tc)
{
u64 nsec;
--
1.6.2.1
The implementation of function graph tracer for MIPS is a little
different from X86.
in MIPS, gcc(with -pg) only transfer the caller's return address(at) and
the _mcount's return address(ra) to us.
move at, ra
jal _mcount
in the function is a leaf, it will no save the return address(ra):
ffffffff80101298 <au1k_wait>:
ffffffff80101298: 67bdfff0 daddiu sp,sp,-16
ffffffff8010129c: ffbe0008 sd s8,8(sp)
ffffffff801012a0: 03a0f02d move s8,sp
ffffffff801012a4: 03e0082d move at,ra
ffffffff801012a8: 0c042930 jal ffffffff8010a4c0 <_mcount>
ffffffff801012ac: 00020021 nop
so, we can hijack it directly in _mcount, but if the function is non-leaf, the
return address is saved in the stack.
ffffffff80133030 <copy_process>:
ffffffff80133030: 67bdff50 daddiu sp,sp,-176
ffffffff80133034: ffbe00a0 sd s8,160(sp)
ffffffff80133038: 03a0f02d move s8,sp
ffffffff8013303c: ffbf00a8 sd ra,168(sp)
ffffffff80133040: ffb70098 sd s7,152(sp)
ffffffff80133044: ffb60090 sd s6,144(sp)
ffffffff80133048: ffb50088 sd s5,136(sp)
ffffffff8013304c: ffb40080 sd s4,128(sp)
ffffffff80133050: ffb30078 sd s3,120(sp)
ffffffff80133054: ffb20070 sd s2,112(sp)
ffffffff80133058: ffb10068 sd s1,104(sp)
ffffffff8013305c: ffb00060 sd s0,96(sp)
ffffffff80133060: 03e0082d move at,ra
ffffffff80133064: 0c042930 jal ffffffff8010a4c0 <_mcount>
ffffffff80133068: 00020021 nop
but we can not get the exact stack address(which saved ra) directly in
_mcount, we need to search the content of at register in the stack space
or search the "s{d,w} ra, offset(sp)" instruction in the text. 'Cause we
can not prove there is only a match in the stack space, so, we search
the text instead.
as we can see, if the first instruction above "move at, ra" is "move s8,
sp"(move fp, sp), it is a leaf function, so we hijack the at register
directly via put &return_to_handler into it, and otherwise, we search
the "s{d,w} ra, offset(sp)" instruction to get the stack offset, and
then the stack address. we use the above copy_process() as an example,
we at last find "ffbf00a8", 0xa8 is the stack offset, we plus it with
s8(fp), that is the stack address, we hijack the content via writing the
&return_to_handler in.
Signed-off-by: Wu Zhangjin <[email protected]>
---
arch/mips/Kconfig | 1 +
arch/mips/kernel/ftrace.c | 192 ++++++++++++++++++++++++++++++++++++++++
arch/mips/kernel/mcount.S | 55 +++++++++++-
arch/mips/kernel/vmlinux.lds.S | 1 +
4 files changed, 248 insertions(+), 1 deletions(-)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 147fbbc..de690fd 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -8,6 +8,7 @@ config MIPS
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_GRAPH_TRACER
# Horrible source of confusion. Die, die, die ...
select EMBEDDED
select RTC_LIB if !LEMOTE_FULOONG2E
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 1e44865..fddee5b 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -13,6 +13,8 @@
#include <linux/ftrace.h>
#include <asm/cacheflush.h>
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -105,3 +107,193 @@ int __init ftrace_dyn_arch_init(void *data)
return 0;
}
#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define JMP 0x08000000 /* jump to target directly */
+extern void ftrace_graph_call(void);
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+ int ret;
+
+ /* j ftrace_stub */
+ memcpy(old, (unsigned long *)ip, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(JMP, (unsigned long)ftrace_graph_caller);
+
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+ int ret;
+
+ /* j ftrace_graph_caller */
+ memcpy(old, (unsigned long *)ip, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(JMP, (unsigned long)ftrace_stub);
+
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
+#define S_RA (0x2fbf << 16) /* 32bit: afbf, 64bit: ffbf */
+/* This is only available when enabled -fno-omit-frame-pointer with CONFIG_FRAME_POINTER=y */
+#define MOV_FP_SP 0x03a0f021 /* 32bit: 0x03a0f021, 64bit: 0x03a0f02d */
+#define STACK_OFFSET_MASK 0xfff /* stack offset range: 0 ~ PT_SIZE(304) */
+
+unsigned long ftrace_get_parent_addr(unsigned long self_addr,
+ unsigned long parent,
+ unsigned long parent_addr,
+ unsigned long fp)
+{
+ unsigned long sp, ip, ra;
+ unsigned int code;
+
+ /* move to the instruction "move ra, at" */
+ ip = self_addr - 8;
+
+ /* search the text until finding the "move s8, sp" instruction or
+ * "s{d,w} ra, offset(sp)" instruction */
+ do {
+ ip -= 4;
+ /* read the text we want to match */
+ if (probe_kernel_read(&code, (void *)ip, 4)) {
+ WARN_ON(1);
+ panic("read the text failure\n");
+ }
+
+ /* if the first instruction above "move at, ra" is "move
+ * s8(fp), sp", means the function is a leaf */
+ if ((code & MOV_FP_SP) == MOV_FP_SP)
+ return parent_addr;
+ } while (((code & S_RA) != S_RA));
+
+ sp = fp + (code & STACK_OFFSET_MASK);
+ ra = *(unsigned long *)sp;
+
+ if (ra == parent)
+ return sp;
+ else
+ panic
+ ("failed on getting stack address of ra\n: addr: 0x%lx, code: 0x%x\n",
+ ip, code);
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long fp)
+{
+ unsigned long old;
+ int faulted;
+ struct ftrace_graph_ent trace;
+ unsigned long return_hooker = (unsigned long)
+ &return_to_handler;
+
+ /*
+ * Protect against fault, even if it shouldn't
+ * happen. This tool is too much intrusive to
+ * ignore such a protection.
+ *
+ * old = *parent;
+ */
+ asm volatile (
+ "1: " STR(PTR_L) " %[old], 0(%[parent])\n"
+ " li %[faulted], 0\n"
+ "2:\n"
+
+ ".section .fixup, \"ax\"\n"
+ "3: li %[faulted], 1\n"
+ " j 2b\n"
+ ".previous\n"
+
+ ".section\t__ex_table,\"a\"\n\t"
+ STR(PTR) "\t1b, 3b\n\t"
+ ".previous\n"
+
+ : [old] "=&r" (old), [faulted] "=r" (faulted)
+ : [parent] "r" (parent)
+ : "memory"
+ );
+
+ if (unlikely(faulted)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ return;
+ }
+
+ /* The argument *parent only work for leaf function, we need to check
+ * whether the function is leaf, if not, we need to get the real stack
+ * address which stored it.
+ *
+ * and here, we must stop tracing before calling probe_kernel_read().
+ * after calling it, restart tracing. otherwise, it will hang there.*/
+ tracing_stop();
+ parent =
+ (unsigned long *)ftrace_get_parent_addr(self_addr, old,
+ (unsigned long)parent, fp);
+ tracing_start();
+
+ if (unlikely(atomic_read(¤t->tracing_graph_pause)))
+ return;
+
+ /*
+ * Protect against fault, even if it shouldn't
+ * happen. This tool is too much intrusive to
+ * ignore such a protection.
+ *
+ * *parent = return_hooker;
+ */
+ asm volatile (
+ "1: " STR(PTR_S) " %[return_hooker], 0(%[parent])\n"
+ " li %[faulted], 0\n"
+ "2:\n"
+
+ ".section .fixup, \"ax\"\n"
+ "3: li %[faulted], 1\n"
+ " j 2b\n"
+ ".previous\n"
+
+ ".section\t__ex_table,\"a\"\n\t"
+ STR(PTR) "\t1b, 3b\n\t"
+ ".previous\n"
+
+ : [faulted] "=r" (faulted)
+ : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
+ : "memory"
+ );
+
+ if (unlikely(faulted)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ return;
+ }
+
+ if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) ==
+ -EBUSY) {
+ *parent = old;
+ return;
+ }
+
+ trace.func = self_addr;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ current->curr_ret_stack--;
+ *parent = old;
+ }
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
index 30637e6..356e81c 100644
--- a/arch/mips/kernel/mcount.S
+++ b/arch/mips/kernel/mcount.S
@@ -89,6 +89,14 @@ ftrace_call:
nop
MCOUNT_RESTORE_REGS
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ .globl ftrace_graph_call
+ftrace_graph_call:
+ j ftrace_stub
+ nop
+#endif
+
.globl ftrace_stub
ftrace_stub:
RETURN_BACK
@@ -106,7 +114,15 @@ NESTED(_mcount, PT_SIZE, ra)
PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
bne t0, t1, static_trace
nop
-
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ PTR_L t2, ftrace_graph_return
+ bne t0, t2, ftrace_graph_caller
+ nop
+ PTR_LA t0, ftrace_graph_entry_stub
+ PTR_L t2, ftrace_graph_entry
+ bne t0, t2, ftrace_graph_caller
+ nop
+#endif
j ftrace_stub
nop
@@ -125,5 +141,42 @@ ftrace_stub:
#endif /* ! CONFIG_DYNAMIC_FTRACE */
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+NESTED(ftrace_graph_caller, PT_SIZE, ra)
+ MCOUNT_SAVE_REGS
+
+ PTR_LA a0, PT_R1(sp) /* arg1: &AT -> a0 */
+ move a1, ra /* arg2: next ip, selfaddr */
+ PTR_SUBU a1, MCOUNT_INSN_SIZE
+ move a2, fp /* arg3: frame pointer */
+ jal prepare_ftrace_return
+ nop
+
+ MCOUNT_RESTORE_REGS
+ RETURN_BACK
+ END(ftrace_graph_caller)
+
+ .align 2
+ .globl return_to_handler
+return_to_handler:
+ PTR_SUBU sp, PT_SIZE
+ PTR_S v0, PT_R2(sp)
+ PTR_S v1, PT_R3(sp)
+
+ jal ftrace_return_to_handler
+ nop
+
+ /* restore the real parent address: v0 -> ra */
+ move ra, v0
+
+ PTR_L v0, PT_R2(sp)
+ PTR_L v1, PT_R3(sp)
+ PTR_ADDIU sp, PT_SIZE
+
+ jr ra
+ nop
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
.set at
.set reorder
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 162b299..f25df73 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -46,6 +46,7 @@ SECTIONS
SCHED_TEXT
LOCK_TEXT
KPROBES_TEXT
+ IRQENTRY_TEXT
*(.text.*)
*(.fixup)
*(.gnu.warning)
--
1.6.2.1
On Wed, 2009-10-21 at 22:34 +0800, Wu Zhangjin wrote:
> This patch add the mips_timecounter_read() based high precision version
> of trace_clock_local().
>
> Signed-off-by: Wu Zhangjin <[email protected]>
> ---
> arch/mips/kernel/Makefile | 6 ++++++
> arch/mips/kernel/trace_clock.c | 37 +++++++++++++++++++++++++++++++++++++
> 2 files changed, 43 insertions(+), 0 deletions(-)
> create mode 100644 arch/mips/kernel/trace_clock.c
>
> diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
> index 8e26e9c..f228868 100644
> --- a/arch/mips/kernel/Makefile
> +++ b/arch/mips/kernel/Makefile
> @@ -8,6 +8,10 @@ obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \
> ptrace.o reset.o setup.o signal.o syscall.o \
> time.o topology.o traps.o unaligned.o watch.o
>
> +ifdef CONFIG_FUNCTION_TRACER
> +CFLAGS_REMOVE_trace_clock.o = -pg
> +endif
> +
> obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o
> obj-$(CONFIG_CEVT_R4K_LIB) += cevt-r4k.o
> obj-$(CONFIG_MIPS_MT_SMTC) += cevt-smtc.o
> @@ -24,6 +28,8 @@ obj-$(CONFIG_SYNC_R4K) += sync-r4k.o
> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> obj-$(CONFIG_MODULES) += mips_ksyms.o module.o
>
> +obj-$(CONFIG_FTRACE) += trace_clock.o
> +
> obj-$(CONFIG_CPU_LOONGSON2) += r4k_fpu.o r4k_switch.o
> obj-$(CONFIG_CPU_MIPS32) += r4k_fpu.o r4k_switch.o
> obj-$(CONFIG_CPU_MIPS64) += r4k_fpu.o r4k_switch.o
> diff --git a/arch/mips/kernel/trace_clock.c b/arch/mips/kernel/trace_clock.c
> new file mode 100644
> index 0000000..2e3475f
> --- /dev/null
> +++ b/arch/mips/kernel/trace_clock.c
> @@ -0,0 +1,37 @@
> +/*
> + * This file is subject to the terms and conditions of the GNU General Public
> + * License. See the file "COPYING" in the main directory of this archive for
> + * more details.
> + *
> + * Copyright (C) 2009 Lemote Inc. & DSLab, Lanzhou University, China
> + * Author: Wu Zhangjin <[email protected]>
> + */
> +
> +#include <linux/clocksource.h>
> +#include <linux/sched.h>
> +
> +#include <asm/time.h>
> +
> +/*
> + * trace_clock_local(): the simplest and least coherent tracing clock.
> + *
> + * Useful for tracing that does not cross to other CPUs nor
> + * does it go through idle events.
> + */
> +u64 trace_clock_local(void)
> +{
> + unsigned long flags;
> + u64 clock;
> +
> + raw_local_irq_save(flags);
> +
> + preempt_disable_notrace();
Why have the preempt_disable? You disable interrupts already, that will
prevent any preemption at that point.
-- Steve
> +
> + clock = mips_timecounter_read();
> +
> + preempt_enable_no_resched_notrace();
> +
> + raw_local_irq_restore(flags);
> +
> + return clock;
> +}
On Wed, 2009-10-21 at 10:46 -0400, Steven Rostedt wrote:
> On Wed, 2009-10-21 at 22:34 +0800, Wu Zhangjin wrote:
> > This patch add the mips_timecounter_read() based high precision version
> > of trace_clock_local().
> >
> > Signed-off-by: Wu Zhangjin <[email protected]>
> > ---
> > arch/mips/kernel/Makefile | 6 ++++++
> > arch/mips/kernel/trace_clock.c | 37 +++++++++++++++++++++++++++++++++++++
> > 2 files changed, 43 insertions(+), 0 deletions(-)
> > create mode 100644 arch/mips/kernel/trace_clock.c
> >
> > diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
> > index 8e26e9c..f228868 100644
> > --- a/arch/mips/kernel/Makefile
> > +++ b/arch/mips/kernel/Makefile
> > @@ -8,6 +8,10 @@ obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \
> > ptrace.o reset.o setup.o signal.o syscall.o \
> > time.o topology.o traps.o unaligned.o watch.o
> >
> > +ifdef CONFIG_FUNCTION_TRACER
> > +CFLAGS_REMOVE_trace_clock.o = -pg
> > +endif
> > +
> > obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o
> > obj-$(CONFIG_CEVT_R4K_LIB) += cevt-r4k.o
> > obj-$(CONFIG_MIPS_MT_SMTC) += cevt-smtc.o
> > @@ -24,6 +28,8 @@ obj-$(CONFIG_SYNC_R4K) += sync-r4k.o
> > obj-$(CONFIG_STACKTRACE) += stacktrace.o
> > obj-$(CONFIG_MODULES) += mips_ksyms.o module.o
> >
> > +obj-$(CONFIG_FTRACE) += trace_clock.o
> > +
> > obj-$(CONFIG_CPU_LOONGSON2) += r4k_fpu.o r4k_switch.o
> > obj-$(CONFIG_CPU_MIPS32) += r4k_fpu.o r4k_switch.o
> > obj-$(CONFIG_CPU_MIPS64) += r4k_fpu.o r4k_switch.o
> > diff --git a/arch/mips/kernel/trace_clock.c b/arch/mips/kernel/trace_clock.c
> > new file mode 100644
> > index 0000000..2e3475f
> > --- /dev/null
> > +++ b/arch/mips/kernel/trace_clock.c
> > @@ -0,0 +1,37 @@
> > +/*
> > + * This file is subject to the terms and conditions of the GNU General Public
> > + * License. See the file "COPYING" in the main directory of this archive for
> > + * more details.
> > + *
> > + * Copyright (C) 2009 Lemote Inc. & DSLab, Lanzhou University, China
> > + * Author: Wu Zhangjin <[email protected]>
> > + */
> > +
> > +#include <linux/clocksource.h>
> > +#include <linux/sched.h>
> > +
> > +#include <asm/time.h>
> > +
> > +/*
> > + * trace_clock_local(): the simplest and least coherent tracing clock.
> > + *
> > + * Useful for tracing that does not cross to other CPUs nor
> > + * does it go through idle events.
> > + */
> > +u64 trace_clock_local(void)
> > +{
> > + unsigned long flags;
> > + u64 clock;
> > +
> > + raw_local_irq_save(flags);
> > +
> > + preempt_disable_notrace();
>
> Why have the preempt_disable? You disable interrupts already, that will
> prevent any preemption at that point.
>
just removed it.
Thanks!
Regards,
Wu Zhangjin
On Wed, 2009-10-21 at 22:35 +0800, Wu Zhangjin wrote:
> The implementation of function graph tracer for MIPS is a little
> different from X86.
>
> in MIPS, gcc(with -pg) only transfer the caller's return address(at) and
> the _mcount's return address(ra) to us.
>
> move at, ra
> jal _mcount
>
> in the function is a leaf, it will no save the return address(ra):
>
> ffffffff80101298 <au1k_wait>:
> ffffffff80101298: 67bdfff0 daddiu sp,sp,-16
> ffffffff8010129c: ffbe0008 sd s8,8(sp)
> ffffffff801012a0: 03a0f02d move s8,sp
> ffffffff801012a4: 03e0082d move at,ra
> ffffffff801012a8: 0c042930 jal ffffffff8010a4c0 <_mcount>
> ffffffff801012ac: 00020021 nop
>
> so, we can hijack it directly in _mcount, but if the function is non-leaf, the
> return address is saved in the stack.
>
> ffffffff80133030 <copy_process>:
> ffffffff80133030: 67bdff50 daddiu sp,sp,-176
> ffffffff80133034: ffbe00a0 sd s8,160(sp)
> ffffffff80133038: 03a0f02d move s8,sp
> ffffffff8013303c: ffbf00a8 sd ra,168(sp)
> ffffffff80133040: ffb70098 sd s7,152(sp)
> ffffffff80133044: ffb60090 sd s6,144(sp)
> ffffffff80133048: ffb50088 sd s5,136(sp)
> ffffffff8013304c: ffb40080 sd s4,128(sp)
> ffffffff80133050: ffb30078 sd s3,120(sp)
> ffffffff80133054: ffb20070 sd s2,112(sp)
> ffffffff80133058: ffb10068 sd s1,104(sp)
> ffffffff8013305c: ffb00060 sd s0,96(sp)
> ffffffff80133060: 03e0082d move at,ra
> ffffffff80133064: 0c042930 jal ffffffff8010a4c0 <_mcount>
> ffffffff80133068: 00020021 nop
>
> but we can not get the exact stack address(which saved ra) directly in
> _mcount, we need to search the content of at register in the stack space
> or search the "s{d,w} ra, offset(sp)" instruction in the text. 'Cause we
> can not prove there is only a match in the stack space, so, we search
> the text instead.
>
> as we can see, if the first instruction above "move at, ra" is "move s8,
> sp"(move fp, sp), it is a leaf function, so we hijack the at register
> directly via put &return_to_handler into it, and otherwise, we search
> the "s{d,w} ra, offset(sp)" instruction to get the stack offset, and
> then the stack address. we use the above copy_process() as an example,
> we at last find "ffbf00a8", 0xa8 is the stack offset, we plus it with
> s8(fp), that is the stack address, we hijack the content via writing the
> &return_to_handler in.
>
> Signed-off-by: Wu Zhangjin <[email protected]>
> ---
> arch/mips/Kconfig | 1 +
> arch/mips/kernel/ftrace.c | 192 ++++++++++++++++++++++++++++++++++++++++
> arch/mips/kernel/mcount.S | 55 +++++++++++-
> arch/mips/kernel/vmlinux.lds.S | 1 +
> 4 files changed, 248 insertions(+), 1 deletions(-)
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 147fbbc..de690fd 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -8,6 +8,7 @@ config MIPS
> select HAVE_FUNCTION_TRACE_MCOUNT_TEST
> select HAVE_DYNAMIC_FTRACE
> select HAVE_FTRACE_MCOUNT_RECORD
> + select HAVE_FUNCTION_GRAPH_TRACER
> # Horrible source of confusion. Die, die, die ...
> select EMBEDDED
> select RTC_LIB if !LEMOTE_FULOONG2E
> diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
> index 1e44865..fddee5b 100644
> --- a/arch/mips/kernel/ftrace.c
> +++ b/arch/mips/kernel/ftrace.c
> @@ -13,6 +13,8 @@
> #include <linux/ftrace.h>
>
> #include <asm/cacheflush.h>
> +#include <asm/asm.h>
> +#include <asm/asm-offsets.h>
>
> #ifdef CONFIG_DYNAMIC_FTRACE
>
> @@ -105,3 +107,193 @@ int __init ftrace_dyn_arch_init(void *data)
> return 0;
> }
> #endif /* CONFIG_DYNAMIC_FTRACE */
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +
> +#define JMP 0x08000000 /* jump to target directly */
> +extern void ftrace_graph_call(void);
> +
> +int ftrace_enable_ftrace_graph_caller(void)
> +{
> + unsigned long ip = (unsigned long)(&ftrace_graph_call);
> + unsigned char old[MCOUNT_INSN_SIZE], *new;
> + int ret;
> +
> + /* j ftrace_stub */
> + memcpy(old, (unsigned long *)ip, MCOUNT_INSN_SIZE);
> + new = ftrace_call_replace(JMP, (unsigned long)ftrace_graph_caller);
> +
> + ret = ftrace_modify_code(ip, old, new);
> +
> + return ret;
> +}
> +
> +int ftrace_disable_ftrace_graph_caller(void)
> +{
> + unsigned long ip = (unsigned long)(&ftrace_graph_call);
> + unsigned char old[MCOUNT_INSN_SIZE], *new;
> + int ret;
> +
> + /* j ftrace_graph_caller */
> + memcpy(old, (unsigned long *)ip, MCOUNT_INSN_SIZE);
> + new = ftrace_call_replace(JMP, (unsigned long)ftrace_stub);
> +
> + ret = ftrace_modify_code(ip, old, new);
> +
> + return ret;
> +}
> +
> +#endif /* !CONFIG_DYNAMIC_FTRACE */
> +
> +#define S_RA (0x2fbf << 16) /* 32bit: afbf, 64bit: ffbf */
> +/* This is only available when enabled -fno-omit-frame-pointer with CONFIG_FRAME_POINTER=y */
> +#define MOV_FP_SP 0x03a0f021 /* 32bit: 0x03a0f021, 64bit: 0x03a0f02d */
> +#define STACK_OFFSET_MASK 0xfff /* stack offset range: 0 ~ PT_SIZE(304) */
> +
> +unsigned long ftrace_get_parent_addr(unsigned long self_addr,
> + unsigned long parent,
> + unsigned long parent_addr,
> + unsigned long fp)
> +{
> + unsigned long sp, ip, ra;
> + unsigned int code;
> +
> + /* move to the instruction "move ra, at" */
> + ip = self_addr - 8;
> +
> + /* search the text until finding the "move s8, sp" instruction or
> + * "s{d,w} ra, offset(sp)" instruction */
> + do {
> + ip -= 4;
> + /* read the text we want to match */
> + if (probe_kernel_read(&code, (void *)ip, 4)) {
> + WARN_ON(1);
> + panic("read the text failure\n");
> + }
> +
> + /* if the first instruction above "move at, ra" is "move
> + * s8(fp), sp", means the function is a leaf */
> + if ((code & MOV_FP_SP) == MOV_FP_SP)
> + return parent_addr;
> + } while (((code & S_RA) != S_RA));
> +
> + sp = fp + (code & STACK_OFFSET_MASK);
> + ra = *(unsigned long *)sp;
> +
Seems missed the fault protection here? is there a need? never met fault
in this place and also the following two places, so, are we safe to
remove all of the fault protection?
Regards
Wu Zhangjin
> + if (ra == parent)
> + return sp;
> + else
> + panic
> + ("failed on getting stack address of ra\n: addr: 0x%lx, code: 0x%x\n",
> + ip, code);
> +}
> +
> +/*
> + * Hook the return address and push it in the stack of return addrs
> + * in current thread info.
> + */
> +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
> + unsigned long fp)
> +{
> + unsigned long old;
> + int faulted;
> + struct ftrace_graph_ent trace;
> + unsigned long return_hooker = (unsigned long)
> + &return_to_handler;
> +
> + /*
> + * Protect against fault, even if it shouldn't
> + * happen. This tool is too much intrusive to
> + * ignore such a protection.
> + *
> + * old = *parent;
> + */
> + asm volatile (
> + "1: " STR(PTR_L) " %[old], 0(%[parent])\n"
> + " li %[faulted], 0\n"
> + "2:\n"
> +
> + ".section .fixup, \"ax\"\n"
> + "3: li %[faulted], 1\n"
> + " j 2b\n"
> + ".previous\n"
> +
> + ".section\t__ex_table,\"a\"\n\t"
> + STR(PTR) "\t1b, 3b\n\t"
> + ".previous\n"
> +
> + : [old] "=&r" (old), [faulted] "=r" (faulted)
> + : [parent] "r" (parent)
> + : "memory"
> + );
> +
> + if (unlikely(faulted)) {
> + ftrace_graph_stop();
> + WARN_ON(1);
> + return;
> + }
> +
> + /* The argument *parent only work for leaf function, we need to check
> + * whether the function is leaf, if not, we need to get the real stack
> + * address which stored it.
> + *
> + * and here, we must stop tracing before calling probe_kernel_read().
> + * after calling it, restart tracing. otherwise, it will hang there.*/
> + tracing_stop();
> + parent =
> + (unsigned long *)ftrace_get_parent_addr(self_addr, old,
> + (unsigned long)parent, fp);
> + tracing_start();
> +
> + if (unlikely(atomic_read(¤t->tracing_graph_pause)))
> + return;
> +
> + /*
> + * Protect against fault, even if it shouldn't
> + * happen. This tool is too much intrusive to
> + * ignore such a protection.
> + *
> + * *parent = return_hooker;
> + */
> + asm volatile (
> + "1: " STR(PTR_S) " %[return_hooker], 0(%[parent])\n"
> + " li %[faulted], 0\n"
> + "2:\n"
> +
> + ".section .fixup, \"ax\"\n"
> + "3: li %[faulted], 1\n"
> + " j 2b\n"
> + ".previous\n"
> +
> + ".section\t__ex_table,\"a\"\n\t"
> + STR(PTR) "\t1b, 3b\n\t"
> + ".previous\n"
> +
> + : [faulted] "=r" (faulted)
> + : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
> + : "memory"
> + );
> +
> + if (unlikely(faulted)) {
> + ftrace_graph_stop();
> + WARN_ON(1);
> + return;
> + }
> +
> + if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) ==
> + -EBUSY) {
> + *parent = old;
> + return;
> + }
> +
> + trace.func = self_addr;
> +
> + /* Only trace if the calling function expects to */
> + if (!ftrace_graph_entry(&trace)) {
> + current->curr_ret_stack--;
> + *parent = old;
> + }
> +}
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
> index 30637e6..356e81c 100644
> --- a/arch/mips/kernel/mcount.S
> +++ b/arch/mips/kernel/mcount.S
> @@ -89,6 +89,14 @@ ftrace_call:
> nop
>
> MCOUNT_RESTORE_REGS
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> + .globl ftrace_graph_call
> +ftrace_graph_call:
> + j ftrace_stub
> + nop
> +#endif
> +
> .globl ftrace_stub
> ftrace_stub:
> RETURN_BACK
> @@ -106,7 +114,15 @@ NESTED(_mcount, PT_SIZE, ra)
> PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
> bne t0, t1, static_trace
> nop
> -
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> + PTR_L t2, ftrace_graph_return
> + bne t0, t2, ftrace_graph_caller
> + nop
> + PTR_LA t0, ftrace_graph_entry_stub
> + PTR_L t2, ftrace_graph_entry
> + bne t0, t2, ftrace_graph_caller
> + nop
> +#endif
> j ftrace_stub
> nop
>
> @@ -125,5 +141,42 @@ ftrace_stub:
>
> #endif /* ! CONFIG_DYNAMIC_FTRACE */
>
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +
> +NESTED(ftrace_graph_caller, PT_SIZE, ra)
> + MCOUNT_SAVE_REGS
> +
> + PTR_LA a0, PT_R1(sp) /* arg1: &AT -> a0 */
> + move a1, ra /* arg2: next ip, selfaddr */
> + PTR_SUBU a1, MCOUNT_INSN_SIZE
> + move a2, fp /* arg3: frame pointer */
> + jal prepare_ftrace_return
> + nop
> +
> + MCOUNT_RESTORE_REGS
> + RETURN_BACK
> + END(ftrace_graph_caller)
> +
> + .align 2
> + .globl return_to_handler
> +return_to_handler:
> + PTR_SUBU sp, PT_SIZE
> + PTR_S v0, PT_R2(sp)
> + PTR_S v1, PT_R3(sp)
> +
> + jal ftrace_return_to_handler
> + nop
> +
> + /* restore the real parent address: v0 -> ra */
> + move ra, v0
> +
> + PTR_L v0, PT_R2(sp)
> + PTR_L v1, PT_R3(sp)
> + PTR_ADDIU sp, PT_SIZE
> +
> + jr ra
> + nop
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> +
> .set at
> .set reorder
> diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
> index 162b299..f25df73 100644
> --- a/arch/mips/kernel/vmlinux.lds.S
> +++ b/arch/mips/kernel/vmlinux.lds.S
> @@ -46,6 +46,7 @@ SECTIONS
> SCHED_TEXT
> LOCK_TEXT
> KPROBES_TEXT
> + IRQENTRY_TEXT
> *(.text.*)
> *(.fixup)
> *(.gnu.warning)
On Wed, 2009-10-21 at 22:34 +0800, Wu Zhangjin wrote:
> +++ b/arch/mips/kernel/mcount.S
> @@ -0,0 +1,94 @@
> +/*
> + * the mips-specific _mcount implementation
> + *
> + * This file is subject to the terms and conditions of the GNU General Public
> + * License. See the file "COPYING" in the main directory of this archive for
> + * more details.
> + *
> + * Copyright (C) 2009 DSLab, Lanzhou University, China
> + * Author: Wu Zhangjin <[email protected]>
> + */
> +
> +#include <asm/regdef.h>
> +#include <asm/stackframe.h>
> +#include <asm/ftrace.h>
> +
> + .text
> + .set noreorder
> + .set noat
> +
> + /* since there is a "addiu sp,sp,-8" before "jal _mcount" in 32bit */
> + .macro RESTORE_SP_FOR_32BIT
> +#ifdef CONFIG_32BIT
> + PTR_ADDIU sp, 8
> +#endif
> + .endm
> +
> + .macro MCOUNT_SAVE_REGS
> + PTR_SUBU sp, PT_SIZE
> + PTR_S ra, PT_R31(sp)
> + PTR_S AT, PT_R1(sp)
> + PTR_S a0, PT_R4(sp)
> + PTR_S a1, PT_R5(sp)
> + PTR_S a2, PT_R6(sp)
> + PTR_S a3, PT_R7(sp)
> +#ifdef CONFIG_64BIT
> + PTR_S a4, PT_R8(sp)
> + PTR_S a5, PT_R9(sp)
> + PTR_S a6, PT_R10(sp)
> + PTR_S a7, PT_R11(sp)
> +#endif
> + .endm
> +
> + .macro MCOUNT_RESTORE_REGS
> + PTR_L ra, PT_R31(sp)
> + PTR_L AT, PT_R1(sp)
> + PTR_L a0, PT_R4(sp)
> + PTR_L a1, PT_R5(sp)
> + PTR_L a2, PT_R6(sp)
> + PTR_L a3, PT_R7(sp)
> +#ifdef CONFIG_64BIT
> + PTR_L a4, PT_R8(sp)
> + PTR_L a5, PT_R9(sp)
> + PTR_L a6, PT_R10(sp)
> + PTR_L a7, PT_R11(sp)
> +#endif
> + PTR_ADDIU sp, PT_SIZE
> +.endm
> +
> + .macro MCOUNT_SET_ARGS
> + move a0, ra /* arg1: next ip, selfaddr */
> + move a1, AT /* arg2: the caller's next ip, parent */
> + PTR_SUBU a0, MCOUNT_INSN_SIZE
> + .endm
> +
> + .macro RETURN_BACK
> + jr ra
> + move ra, AT
> + .endm
> +
> +NESTED(_mcount, PT_SIZE, ra)
> + RESTORE_SP_FOR_32BIT
> + PTR_LA t0, ftrace_stub
> + PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
Is t0 and t1 safe for mcount to use? Remember, mcount does not follow
the dynamics of C function ABI.
-- Steve
> + bne t0, t1, static_trace
> + nop
> +
> + j ftrace_stub
> + nop
> +
> +static_trace:
> + MCOUNT_SAVE_REGS
> +
> + MCOUNT_SET_ARGS /* call *ftrace_trace_function */
> + jalr t1
> + nop
> +
> + MCOUNT_RESTORE_REGS
> + .globl ftrace_stub
> +ftrace_stub:
> + RETURN_BACK
> + END(_mcount)
> +
On Wed, 2009-10-21 at 22:35 +0800, Wu Zhangjin wrote:
> MIPS architecture need this argument to handle big/little endian
> respectively.
>
> Signed-off-by: Wu Zhangjin <[email protected]>
> ---
> scripts/Makefile.build | 1 +
> scripts/recordmcount.pl | 6 +++---
> 2 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/scripts/Makefile.build b/scripts/Makefile.build
> index 341b589..0b94d2f 100644
> --- a/scripts/Makefile.build
> +++ b/scripts/Makefile.build
> @@ -207,6 +207,7 @@ endif
>
> ifdef CONFIG_FTRACE_MCOUNT_RECORD
> cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
> + "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
I thought I added this already??
/me goes and looks
Hmm, not there. Did a patch get lost?
Thanks,
-- Steve
> "$(if $(CONFIG_64BIT),64,32)" \
> "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
> "$(if $(part-of-module),1,0)" "$(@)";
> diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
> index 090d300..daee038 100755
> --- a/scripts/recordmcount.pl
> +++ b/scripts/recordmcount.pl
> @@ -99,13 +99,13 @@ $P =~ s@.*/@@g;
>
> my $V = '0.1';
>
> -if ($#ARGV < 7) {
> - print "usage: $P arch bits objdump objcopy cc ld nm rm mv is_module inputfile\n";
> +if ($#ARGV < 8) {
> + print "usage: $P arch endian bits objdump objcopy cc ld nm rm mv is_module inputfile\n";
> print "version: $V\n";
> exit(1);
> }
>
> -my ($arch, $bits, $objdump, $objcopy, $cc,
> +my ($arch, $endian, $bits, $objdump, $objcopy, $cc,
> $ld, $nm, $rm, $mv, $is_module, $inputfile) = @ARGV;
>
> # This file refers to mcount and shouldn't be ftraced, so lets' ignore it
On Wed, 2009-10-21 at 22:35 +0800, Wu Zhangjin wrote:
> The implementation of function graph tracer for MIPS is a little
> different from X86.
>
> in MIPS, gcc(with -pg) only transfer the caller's return address(at) and
> the _mcount's return address(ra) to us.
>
> move at, ra
> jal _mcount
>
> in the function is a leaf, it will no save the return address(ra):
>
> ffffffff80101298 <au1k_wait>:
> ffffffff80101298: 67bdfff0 daddiu sp,sp,-16
> ffffffff8010129c: ffbe0008 sd s8,8(sp)
> ffffffff801012a0: 03a0f02d move s8,sp
> ffffffff801012a4: 03e0082d move at,ra
> ffffffff801012a8: 0c042930 jal ffffffff8010a4c0 <_mcount>
> ffffffff801012ac: 00020021 nop
>
> so, we can hijack it directly in _mcount, but if the function is non-leaf, the
> return address is saved in the stack.
>
> ffffffff80133030 <copy_process>:
> ffffffff80133030: 67bdff50 daddiu sp,sp,-176
> ffffffff80133034: ffbe00a0 sd s8,160(sp)
> ffffffff80133038: 03a0f02d move s8,sp
> ffffffff8013303c: ffbf00a8 sd ra,168(sp)
> ffffffff80133040: ffb70098 sd s7,152(sp)
> ffffffff80133044: ffb60090 sd s6,144(sp)
> ffffffff80133048: ffb50088 sd s5,136(sp)
> ffffffff8013304c: ffb40080 sd s4,128(sp)
> ffffffff80133050: ffb30078 sd s3,120(sp)
> ffffffff80133054: ffb20070 sd s2,112(sp)
> ffffffff80133058: ffb10068 sd s1,104(sp)
> ffffffff8013305c: ffb00060 sd s0,96(sp)
> ffffffff80133060: 03e0082d move at,ra
> ffffffff80133064: 0c042930 jal ffffffff8010a4c0 <_mcount>
> ffffffff80133068: 00020021 nop
>
> but we can not get the exact stack address(which saved ra) directly in
> _mcount, we need to search the content of at register in the stack space
> or search the "s{d,w} ra, offset(sp)" instruction in the text. 'Cause we
> can not prove there is only a match in the stack space, so, we search
> the text instead.
>
> as we can see, if the first instruction above "move at, ra" is "move s8,
> sp"(move fp, sp), it is a leaf function, so we hijack the at register
Are you sure it will always be the first instruction for leaf registers.
You may want to search for that instruction and stop on it. If you have
not yet found the storage of ra in the stack, then you know it is a leaf
function.
> directly via put &return_to_handler into it, and otherwise, we search
> the "s{d,w} ra, offset(sp)" instruction to get the stack offset, and
> then the stack address. we use the above copy_process() as an example,
> we at last find "ffbf00a8", 0xa8 is the stack offset, we plus it with
> s8(fp), that is the stack address, we hijack the content via writing the
> &return_to_handler in.
>
> Signed-off-by: Wu Zhangjin <[email protected]>
> ---
> arch/mips/Kconfig | 1 +
> arch/mips/kernel/ftrace.c | 192 ++++++++++++++++++++++++++++++++++++++++
> arch/mips/kernel/mcount.S | 55 +++++++++++-
> arch/mips/kernel/vmlinux.lds.S | 1 +
> 4 files changed, 248 insertions(+), 1 deletions(-)
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 147fbbc..de690fd 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -8,6 +8,7 @@ config MIPS
> select HAVE_FUNCTION_TRACE_MCOUNT_TEST
> select HAVE_DYNAMIC_FTRACE
> select HAVE_FTRACE_MCOUNT_RECORD
> + select HAVE_FUNCTION_GRAPH_TRACER
> # Horrible source of confusion. Die, die, die ...
> select EMBEDDED
> select RTC_LIB if !LEMOTE_FULOONG2E
> diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
> index 1e44865..fddee5b 100644
> --- a/arch/mips/kernel/ftrace.c
> +++ b/arch/mips/kernel/ftrace.c
> @@ -13,6 +13,8 @@
> #include <linux/ftrace.h>
>
> #include <asm/cacheflush.h>
> +#include <asm/asm.h>
> +#include <asm/asm-offsets.h>
>
> #ifdef CONFIG_DYNAMIC_FTRACE
>
> @@ -105,3 +107,193 @@ int __init ftrace_dyn_arch_init(void *data)
> return 0;
> }
> #endif /* CONFIG_DYNAMIC_FTRACE */
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +
> +#define JMP 0x08000000 /* jump to target directly */
> +extern void ftrace_graph_call(void);
> +
> +int ftrace_enable_ftrace_graph_caller(void)
> +{
> + unsigned long ip = (unsigned long)(&ftrace_graph_call);
> + unsigned char old[MCOUNT_INSN_SIZE], *new;
> + int ret;
> +
> + /* j ftrace_stub */
> + memcpy(old, (unsigned long *)ip, MCOUNT_INSN_SIZE);
> + new = ftrace_call_replace(JMP, (unsigned long)ftrace_graph_caller);
> +
> + ret = ftrace_modify_code(ip, old, new);
> +
> + return ret;
> +}
> +
> +int ftrace_disable_ftrace_graph_caller(void)
> +{
> + unsigned long ip = (unsigned long)(&ftrace_graph_call);
> + unsigned char old[MCOUNT_INSN_SIZE], *new;
> + int ret;
> +
> + /* j ftrace_graph_caller */
> + memcpy(old, (unsigned long *)ip, MCOUNT_INSN_SIZE);
> + new = ftrace_call_replace(JMP, (unsigned long)ftrace_stub);
> +
> + ret = ftrace_modify_code(ip, old, new);
> +
> + return ret;
> +}
> +
> +#endif /* !CONFIG_DYNAMIC_FTRACE */
> +
> +#define S_RA (0x2fbf << 16) /* 32bit: afbf, 64bit: ffbf */
> +/* This is only available when enabled -fno-omit-frame-pointer with CONFIG_FRAME_POINTER=y */
> +#define MOV_FP_SP 0x03a0f021 /* 32bit: 0x03a0f021, 64bit: 0x03a0f02d */
> +#define STACK_OFFSET_MASK 0xfff /* stack offset range: 0 ~ PT_SIZE(304) */
> +
> +unsigned long ftrace_get_parent_addr(unsigned long self_addr,
> + unsigned long parent,
> + unsigned long parent_addr,
> + unsigned long fp)
> +{
> + unsigned long sp, ip, ra;
> + unsigned int code;
> +
> + /* move to the instruction "move ra, at" */
> + ip = self_addr - 8;
> +
> + /* search the text until finding the "move s8, sp" instruction or
> + * "s{d,w} ra, offset(sp)" instruction */
> + do {
> + ip -= 4;
> + /* read the text we want to match */
> + if (probe_kernel_read(&code, (void *)ip, 4)) {
I thought you had issues with using probe_kernel_read here?
> + WARN_ON(1);
> + panic("read the text failure\n");
> + }
> +
> + /* if the first instruction above "move at, ra" is "move
> + * s8(fp), sp", means the function is a leaf */
Ah, the comment is incorrect. We do search for the move and stop there.
So it is not the first instruction. The comment should read...
If we hit the "move s8(fp), sp" instruction before finding where the
ra is stored, then this is a leaf function and it does not store the
ra on the stack.
> + if ((code & MOV_FP_SP) == MOV_FP_SP)
> + return parent_addr;
> + } while (((code & S_RA) != S_RA));
> +
> + sp = fp + (code & STACK_OFFSET_MASK);
> + ra = *(unsigned long *)sp;
> +
> + if (ra == parent)
> + return sp;
> + else
> + panic
> + ("failed on getting stack address of ra\n: addr: 0x%lx, code: 0x%x\n",
> + ip, code);
I would not panic. I would disable the tracing, and not do the change.
The code should work regardless.
> +}
> +
> +/*
> + * Hook the return address and push it in the stack of return addrs
> + * in current thread info.
> + */
> +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
> + unsigned long fp)
> +{
> + unsigned long old;
> + int faulted;
> + struct ftrace_graph_ent trace;
> + unsigned long return_hooker = (unsigned long)
> + &return_to_handler;
> +
> + /*
> + * Protect against fault, even if it shouldn't
> + * happen. This tool is too much intrusive to
> + * ignore such a protection.
> + *
> + * old = *parent;
> + */
> + asm volatile (
> + "1: " STR(PTR_L) " %[old], 0(%[parent])\n"
> + " li %[faulted], 0\n"
> + "2:\n"
> +
> + ".section .fixup, \"ax\"\n"
> + "3: li %[faulted], 1\n"
> + " j 2b\n"
> + ".previous\n"
> +
> + ".section\t__ex_table,\"a\"\n\t"
> + STR(PTR) "\t1b, 3b\n\t"
> + ".previous\n"
> +
> + : [old] "=&r" (old), [faulted] "=r" (faulted)
> + : [parent] "r" (parent)
> + : "memory"
> + );
> +
> + if (unlikely(faulted)) {
> + ftrace_graph_stop();
> + WARN_ON(1);
See, this is how we kill the tracing.
> + return;
> + }
> +
> + /* The argument *parent only work for leaf function, we need to check
> + * whether the function is leaf, if not, we need to get the real stack
> + * address which stored it.
> + *
> + * and here, we must stop tracing before calling probe_kernel_read().
> + * after calling it, restart tracing. otherwise, it will hang there.*/
> + tracing_stop();
Ah, here you stop tracing. I still prefer to use a direct asm. This has
a little overhead.
Also, this completely negates the above asm, which basically gets the
parent the "x86" way.
> + parent =
> + (unsigned long *)ftrace_get_parent_addr(self_addr, old,
> + (unsigned long)parent, fp);
> + tracing_start();
> +
> + if (unlikely(atomic_read(¤t->tracing_graph_pause)))
> + return;
> +
> + /*
> + * Protect against fault, even if it shouldn't
> + * happen. This tool is too much intrusive to
> + * ignore such a protection.
> + *
> + * *parent = return_hooker;
> + */
> + asm volatile (
> + "1: " STR(PTR_S) " %[return_hooker], 0(%[parent])\n"
> + " li %[faulted], 0\n"
> + "2:\n"
> +
> + ".section .fixup, \"ax\"\n"
> + "3: li %[faulted], 1\n"
> + " j 2b\n"
> + ".previous\n"
> +
> + ".section\t__ex_table,\"a\"\n\t"
> + STR(PTR) "\t1b, 3b\n\t"
> + ".previous\n"
> +
> + : [faulted] "=r" (faulted)
> + : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
> + : "memory"
> + );
Hmm, the above should only be done for non leaf functions.
> +
> + if (unlikely(faulted)) {
> + ftrace_graph_stop();
> + WARN_ON(1);
> + return;
> + }
> +
> + if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) ==
> + -EBUSY) {
> + *parent = old;
And this fails for leaf functions too.
> + return;
> + }
> +
> + trace.func = self_addr;
> +
> + /* Only trace if the calling function expects to */
> + if (!ftrace_graph_entry(&trace)) {
> + current->curr_ret_stack--;
> + *parent = old;
ditto
> + }
> +}
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
> index 30637e6..356e81c 100644
> --- a/arch/mips/kernel/mcount.S
> +++ b/arch/mips/kernel/mcount.S
> @@ -89,6 +89,14 @@ ftrace_call:
> nop
>
> MCOUNT_RESTORE_REGS
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> + .globl ftrace_graph_call
> +ftrace_graph_call:
> + j ftrace_stub
> + nop
> +#endif
> +
> .globl ftrace_stub
> ftrace_stub:
> RETURN_BACK
> @@ -106,7 +114,15 @@ NESTED(_mcount, PT_SIZE, ra)
> PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
> bne t0, t1, static_trace
> nop
> -
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> + PTR_L t2, ftrace_graph_return
> + bne t0, t2, ftrace_graph_caller
> + nop
> + PTR_LA t0, ftrace_graph_entry_stub
> + PTR_L t2, ftrace_graph_entry
> + bne t0, t2, ftrace_graph_caller
> + nop
> +#endif
> j ftrace_stub
> nop
>
> @@ -125,5 +141,42 @@ ftrace_stub:
>
> #endif /* ! CONFIG_DYNAMIC_FTRACE */
>
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +
> +NESTED(ftrace_graph_caller, PT_SIZE, ra)
> + MCOUNT_SAVE_REGS
> +
> + PTR_LA a0, PT_R1(sp) /* arg1: &AT -> a0 */
Ah! You load the address of the stack you stored the ra with!
Good idea. I guess the leaf function update will work there too!
> + move a1, ra /* arg2: next ip, selfaddr */
> + PTR_SUBU a1, MCOUNT_INSN_SIZE
> + move a2, fp /* arg3: frame pointer */
> + jal prepare_ftrace_return
> + nop
> +
> + MCOUNT_RESTORE_REGS
> + RETURN_BACK
> + END(ftrace_graph_caller)
> +
> + .align 2
> + .globl return_to_handler
> +return_to_handler:
> + PTR_SUBU sp, PT_SIZE
> + PTR_S v0, PT_R2(sp)
> + PTR_S v1, PT_R3(sp)
> +
> + jal ftrace_return_to_handler
> + nop
> +
> + /* restore the real parent address: v0 -> ra */
> + move ra, v0
> +
> + PTR_L v0, PT_R2(sp)
> + PTR_L v1, PT_R3(sp)
> + PTR_ADDIU sp, PT_SIZE
> +
> + jr ra
> + nop
> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> +
> .set at
> .set reorder
> diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
> index 162b299..f25df73 100644
> --- a/arch/mips/kernel/vmlinux.lds.S
> +++ b/arch/mips/kernel/vmlinux.lds.S
> @@ -46,6 +46,7 @@ SECTIONS
> SCHED_TEXT
> LOCK_TEXT
> KPROBES_TEXT
> + IRQENTRY_TEXT
This looks like it should be in a separate patch. I don't see where you
explain this?
-- Steve
> *(.text.*)
> *(.fixup)
> *(.gnu.warning)
On Wed, 2009-10-21 at 23:21 +0800, Wu Zhangjin wrote:
> > +unsigned long ftrace_get_parent_addr(unsigned long self_addr,
> > + unsigned long parent,
> > + unsigned long parent_addr,
> > + unsigned long fp)
> > +{
> > + unsigned long sp, ip, ra;
> > + unsigned int code;
> > +
> > + /* move to the instruction "move ra, at" */
> > + ip = self_addr - 8;
> > +
> > + /* search the text until finding the "move s8, sp" instruction or
> > + * "s{d,w} ra, offset(sp)" instruction */
> > + do {
> > + ip -= 4;
> > + /* read the text we want to match */
> > + if (probe_kernel_read(&code, (void *)ip, 4)) {
> > + WARN_ON(1);
> > + panic("read the text failure\n");
> > + }
> > +
> > + /* if the first instruction above "move at, ra" is "move
> > + * s8(fp), sp", means the function is a leaf */
> > + if ((code & MOV_FP_SP) == MOV_FP_SP)
> > + return parent_addr;
> > + } while (((code & S_RA) != S_RA));
> > +
> > + sp = fp + (code & STACK_OFFSET_MASK);
> > + ra = *(unsigned long *)sp;
> > +
>
> Seems missed the fault protection here? is there a need? never met fault
> in this place and also the following two places, so, are we safe to
> remove all of the fault protection?
Is that "sp" basically already been check by the above
probe_kernel_read? If so, then it should be fine not to do the check
again.
-- Steve
>
> Regards
> Wu Zhangjin
>
> > + if (ra == parent)
> > + return sp;
> > + else
> > + panic
> > + ("failed on getting stack address of ra\n: addr: 0x%lx, code: 0x%x\n",
> > + ip, code);
> > +}
> > +
Steven Rostedt wrote:
> On Wed, 2009-10-21 at 22:35 +0800, Wu Zhangjin wrote:
>> The implementation of function graph tracer for MIPS is a little
>> different from X86.
>>
>> in MIPS, gcc(with -pg) only transfer the caller's return address(at) and
>> the _mcount's return address(ra) to us.
>>
>> move at, ra
>> jal _mcount
>>
>> in the function is a leaf, it will no save the return address(ra):
>>
>> ffffffff80101298 <au1k_wait>:
>> ffffffff80101298: 67bdfff0 daddiu sp,sp,-16
>> ffffffff8010129c: ffbe0008 sd s8,8(sp)
>> ffffffff801012a0: 03a0f02d move s8,sp
>> ffffffff801012a4: 03e0082d move at,ra
>> ffffffff801012a8: 0c042930 jal ffffffff8010a4c0 <_mcount>
>> ffffffff801012ac: 00020021 nop
>>
>> so, we can hijack it directly in _mcount, but if the function is non-leaf, the
>> return address is saved in the stack.
>>
>> ffffffff80133030 <copy_process>:
>> ffffffff80133030: 67bdff50 daddiu sp,sp,-176
>> ffffffff80133034: ffbe00a0 sd s8,160(sp)
>> ffffffff80133038: 03a0f02d move s8,sp
>> ffffffff8013303c: ffbf00a8 sd ra,168(sp)
>> ffffffff80133040: ffb70098 sd s7,152(sp)
>> ffffffff80133044: ffb60090 sd s6,144(sp)
>> ffffffff80133048: ffb50088 sd s5,136(sp)
>> ffffffff8013304c: ffb40080 sd s4,128(sp)
>> ffffffff80133050: ffb30078 sd s3,120(sp)
>> ffffffff80133054: ffb20070 sd s2,112(sp)
>> ffffffff80133058: ffb10068 sd s1,104(sp)
>> ffffffff8013305c: ffb00060 sd s0,96(sp)
>> ffffffff80133060: 03e0082d move at,ra
>> ffffffff80133064: 0c042930 jal ffffffff8010a4c0 <_mcount>
>> ffffffff80133068: 00020021 nop
>>
>> but we can not get the exact stack address(which saved ra) directly in
>> _mcount, we need to search the content of at register in the stack space
>> or search the "s{d,w} ra, offset(sp)" instruction in the text. 'Cause we
>> can not prove there is only a match in the stack space, so, we search
>> the text instead.
>>
>> as we can see, if the first instruction above "move at, ra" is "move s8,
>> sp"(move fp, sp), it is a leaf function, so we hijack the at register
>
> Are you sure it will always be the first instruction for leaf registers.
> You may want to search for that instruction and stop on it. If you have
> not yet found the storage of ra in the stack, then you know it is a leaf
> function.
>
There is no deterministic way to identify MIPS function prologs. This
is especially true for leaf functions, but also for functions with
multiple return sites.
For certain GCC versions there may be a set of command line options that
would give good results, but in general it is not possible. Attempts at
fast backtrace generation using code inspection are not reliable and
will invariably result in faults and panics when they fail.
David Daney
On Wed, 2009-10-21 at 09:37 -0700, David Daney wrote:
> There is no deterministic way to identify MIPS function prologs. This
> is especially true for leaf functions, but also for functions with
> multiple return sites.
>
> For certain GCC versions there may be a set of command line options that
> would give good results, but in general it is not possible. Attempts at
> fast backtrace generation using code inspection are not reliable and
> will invariably result in faults and panics when they fail.
Thanks for the update.
We can easily protect against panics, since we do fault protection
within the code (although currently it will panic on fault, but we can
fix that ;-). We can limit the search to a couple of 100 instructions,
as well as fail on first panic.
But are you sure that when compiled with -pg, that GCC does not give a
reliable prologue. Things are different when GCC is compiled with -pg,
it may indeed always have something that we can flag.
We could also add other tests, like the subtraction of the stack too.
-- Steve
Steven Rostedt wrote:
> On Wed, 2009-10-21 at 09:37 -0700, David Daney wrote:
>
>> There is no deterministic way to identify MIPS function prologs. This
>> is especially true for leaf functions, but also for functions with
>> multiple return sites.
>>
>> For certain GCC versions there may be a set of command line options that
>> would give good results, but in general it is not possible. Attempts at
>> fast backtrace generation using code inspection are not reliable and
>> will invariably result in faults and panics when they fail.
>
> Thanks for the update.
>
> We can easily protect against panics, since we do fault protection
> within the code (although currently it will panic on fault, but we can
> fix that ;-). We can limit the search to a couple of 100 instructions,
> as well as fail on first panic.
>
> But are you sure that when compiled with -pg, that GCC does not give a
> reliable prologue. Things are different when GCC is compiled with -pg,
> it may indeed always have something that we can flag.
>
> We could also add other tests, like the subtraction of the stack too.
>
I have not used -pg, so I don't know for sure, I think all it does is
add the calls to _mcount. Someone could investigate
-fno-omit-frame-pointer, with that you may be able to use:
move s8,sp
To identify function prologs, but it would still be ad hoc, as modern
versions of GCC will reorder instructions in the prolog for better
scheduling.
David Daney
On Wed, 2009-10-21 at 10:07 -0700, David Daney wrote:
> I have not used -pg, so I don't know for sure, I think all it does is
> add the calls to _mcount. Someone could investigate
> -fno-omit-frame-pointer, with that you may be able to use:
Note, -pg assumes -fno-omit-frame-pointer, since -fomit-frame-pointer
and -pg are incompatible.
>
> move s8,sp
>
> To identify function prologs, but it would still be ad hoc, as modern
> versions of GCC will reorder instructions in the prolog for better
> scheduling.
I'll have to search the ABI documentation about calling _mcount in MIPS.
There are assumptions with _mcount that are made. It may very well be
safe to assume that the move s8,sp will always be there before an mcount
call.
-- Steve
Steven Rostedt wrote:
> On Wed, 2009-10-21 at 10:07 -0700, David Daney wrote:
>
>> I have not used -pg, so I don't know for sure, I think all it does is
>> add the calls to _mcount. Someone could investigate
>> -fno-omit-frame-pointer, with that you may be able to use:
>
> Note, -pg assumes -fno-omit-frame-pointer, since -fomit-frame-pointer
> and -pg are incompatible.
>
>> move s8,sp
>>
>> To identify function prologs, but it would still be ad hoc, as modern
>> versions of GCC will reorder instructions in the prolog for better
>> scheduling.
>
> I'll have to search the ABI documentation about calling _mcount in MIPS.
> There are assumptions with _mcount that are made. It may very well be
> safe to assume that the move s8,sp will always be there before an mcount
> call.
>
Although I am quite interested in ftrace for MIPS, I a haven't spent the
effort to try to figure out how many levels of backtrace we need to be
generating here.
If you only need the caller's address, that is passed in register 'at'.
If you need more than that, then things get tricky. I would do one of
two things:
1) Use the mechanism used by the OOPS dump.
2) Implement back traces based on DWARF2 unwinding data that is
generated by the compiler.
David Daney
On Wed, 2009-10-21 at 10:48 -0700, David Daney wrote:
> Although I am quite interested in ftrace for MIPS, I a haven't spent the
> effort to try to figure out how many levels of backtrace we need to be
> generating here.
>
> If you only need the caller's address, that is passed in register 'at'.
> If you need more than that, then things get tricky. I would do one of
> two things:
Ftrace function tracer only needs the address passed in by AT. That
output looks like this (run on x86):
# tracer: function
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
<idle>-0 [000] 140973.465592: mwait_idle <-cpu_idle
<idle>-0 [000] 140973.465592: need_resched <-mwait_idle
<idle>-0 [000] 140973.465593: test_ti_thread_flag <-need_resched
<idle>-0 [000] 140973.465593: T.389 <-mwait_idle
<idle>-0 [000] 140973.465594: need_resched <-mwait_idle
<idle>-0 [000] 140973.465594: test_ti_thread_flag <-need_resched
<idle>-0 [000] 140973.465594: trace_power_end <-mwait_idle
<idle>-0 [000] 140973.465595: test_ti_thread_flag <-cpu_idle
<idle>-0 [000] 140973.465595: enter_idle <-cpu_idle
<idle>-0 [000] 140973.465596: mwait_idle <-cpu_idle
<idle>-0 [000] 140973.465596: need_resched <-mwait_idle
<idle>-0 [000] 140973.465596: test_ti_thread_flag <-need_resched
<idle>-0 [000] 140973.465597: T.389 <-mwait_idle
Which is very useful to have. But what we are trying to get is the
function graph tracer working. This is based off of ftrace's function
tracer. But it does a trick with the return address. Instead of just
reading it, it modifies it to call a hook (note kprobes does the same
thing). We replace the return address with a function that will also
trace the exit of the function. With this, we get a much nicer looking
trace, and also can record the time it took to execute the function
(note, graph tracing has overhead that skews this time).
Here's an example (again on x86):
# tracer: function_graph
#
# CPU DURATION FUNCTION CALLS
# | | | | | | |
0) 0.535 us | find_vma();
0) | handle_mm_fault() {
0) | count_vm_event() {
0) 0.333 us | test_ti_thread_flag();
0) 1.029 us | }
0) 0.336 us | pud_alloc();
0) 0.333 us | pmd_alloc();
0) 0.389 us | _spin_lock();
0) | do_wp_page() {
0) 0.322 us | vm_normal_page();
0) 0.329 us | reuse_swap_page();
0) | unlock_page() {
0) | wake_up_page() {
0) 0.352 us | page_waitqueue();
0) 0.442 us | __wake_up_bit();
0) 1.790 us | }
0) 2.444 us | }
0) | _spin_unlock() {
0) 0.371 us | T.272();
0) 1.089 us | }
>
> 1) Use the mechanism used by the OOPS dump.
>
> 2) Implement back traces based on DWARF2 unwinding data that is
> generated by the compiler.
We're not doing back traces. We need to modify the return of the
function being called. Note, the above functions that end with ";" are
leaf functions. Non leaf functions show "{" and end with "}".
The trick here is to find a reliable way to modify the return address.
Thanks,
-- Steve
>
>
> We're not doing back traces. We need to modify the return of the
> function being called. Note, the above functions that end with ";" are
> leaf functions. Non leaf functions show "{" and end with "}".
>
> The trick here is to find a reliable way to modify the return address.
>
would it not more or less be the same thing if you used -finstrument-functions
then and provide a stub __cyg_profile_func_enter/exit initialized to an empty
function until you replace it during tracing. This does give you an overhead
when you are not tracing - but it would make the tracer implementation quite
generic.
hofrat
Steven Rostedt wrote:
[...]
> We're not doing back traces. We need to modify the return of the
> function being called. Note, the above functions that end with ";" are
> leaf functions. Non leaf functions show "{" and end with "}".
>
> The trick here is to find a reliable way to modify the return address.
>
Well that's what I get for chiming in without fully understanding the
context.
I think it is fine to search backwards from the ($ra - 8) of the call to
_mcount until you encounter the adjustment of $sp. If you encounter a
store of $ra, you can assume that the function will use the stored value
when returning, otherwise modify the value restored to $ra (passed in
$at). That is probably what you were doing to begin with.
David Daney
On Wed, 2009-10-21 at 20:17 +0200, Nicholas Mc Guire wrote:
> >
> >
> > We're not doing back traces. We need to modify the return of the
> > function being called. Note, the above functions that end with ";" are
> > leaf functions. Non leaf functions show "{" and end with "}".
> >
> > The trick here is to find a reliable way to modify the return address.
> >
> would it not more or less be the same thing if you used -finstrument-functions
> then and provide a stub __cyg_profile_func_enter/exit initialized to an empty
> function until you replace it during tracing. This does give you an overhead
> when you are not tracing - but it would make the tracer implementation quite
> generic.
-finstrument-functions adds a substantial overhead when not tracing, and
there's no easy way to remove it. The beauty with this approach is that
-pg only adds a couple of instructions (one on x86). When tracing is
disabled, that one line is converted to a nop.
On x86, hackbench reports no difference between running with dynamic
ftrace configured but disabled (includes function graph configured) and
without it configured.
This allows function tracing to be configured in on production
environments.
-- Steve
Wu Zhangjin wrote:
>
>
> in MIPS, gcc(with -pg) only transfer the caller's return address(at) and
> the _mcount's return address(ra) to us.
>
> move at, ra
> jal _mcount
>
> in the function is a leaf, it will no save the return address(ra):
>
> ffffffff80101298 <au1k_wait>:
> ffffffff80101298: 67bdfff0 daddiu sp,sp,-16
> ffffffff8010129c: ffbe0008 sd s8,8(sp)
> ffffffff801012a0: 03a0f02d move s8,sp
> ffffffff801012a4: 03e0082d move at,ra
> ffffffff801012a8: 0c042930 jal ffffffff8010a4c0 <_mcount>
> ffffffff801012ac: 00020021 nop
>
> so, we can hijack it directly in _mcount, but if the function is non-leaf,
> the
> return address is saved in the stack.
>
> ffffffff80133030 <copy_process>:
> ffffffff80133030: 67bdff50 daddiu sp,sp,-176
> ffffffff80133034: ffbe00a0 sd s8,160(sp)
> ffffffff80133038: 03a0f02d move s8,sp
> ffffffff8013303c: ffbf00a8 sd ra,168(sp)
> ffffffff80133040: ffb70098 sd s7,152(sp)
> ffffffff80133044: ffb60090 sd s6,144(sp)
> ffffffff80133048: ffb50088 sd s5,136(sp)
> ffffffff8013304c: ffb40080 sd s4,128(sp)
> ffffffff80133050: ffb30078 sd s3,120(sp)
> ffffffff80133054: ffb20070 sd s2,112(sp)
> ffffffff80133058: ffb10068 sd s1,104(sp)
> ffffffff8013305c: ffb00060 sd s0,96(sp)
> ffffffff80133060: 03e0082d move at,ra
> ffffffff80133064: 0c042930 jal ffffffff8010a4c0 <_mcount>
> ffffffff80133068: 00020021 nop
>
> but we can not get the exact stack address(which saved ra) directly in
> _mcount, we need to search the content of at register in the stack space
> or search the "s{d,w} ra, offset(sp)" instruction in the text. 'Cause we
> can not prove there is only a match in the stack space, so, we search
> the text instead.
>
>
All this stuff can be avoided having PROFILE_BEFORE_PROLOGUE enabled in GCC
(gcc/config/mips/mips.h), like it is done one several other architectures.
Some of them even require it to be set for a working _mcount.
Putting the call of _mcount before the function prologue should make no harm
(it's working for me), and this way RA can be hooked for function graph
tracing
before it is saved to stack in the function prologue. Thus there will be no
difference between leaf and non-leaf functions.
This change is no big deal as GCC has to be patched anyway to get dynamic
ftracing (and with 4.2.1 I had to patch it to get ftracing in modules
working
- using -mlong-calls -, but it's possible that this is fixed in the newer
releases).
Regards,
Patrik
--
View this message in context: http://www.nabble.com/-PATCH--v4-1-9--tracing%3A-convert-trace_clock_local%28%29-as-weak-function-tp25993719p26008418.html
Sent from the linux-kernel mailing list archive at Nabble.com.
Hi,
On Wed, 2009-10-21 at 13:23 -0400, Steven Rostedt wrote:
> On Wed, 2009-10-21 at 10:07 -0700, David Daney wrote:
>
> > I have not used -pg, so I don't know for sure, I think all it does is
> > add the calls to _mcount. Someone could investigate
> > -fno-omit-frame-pointer, with that you may be able to use:
>
> Note, -pg assumes -fno-omit-frame-pointer, since -fomit-frame-pointer
> and -pg are incompatible.
Ralf have told me -pg really works with -fomit-frame-pointer, although
the gcc tool tell us they are not incompatible when we use both of them
together, but when I remove -fno-omit-frame-pointer in
KBUILD_FLAGS(enabled by CONFIG_FRAME_POINTER), it definitely remove the
s8(fp) relative source code(Seems -fomit-frame-pionter is used by
default by gcc), the leaf function becomes this:
function:
80101144 <au1k_wait>:
80101144: 03e00821 move at,ra
80101148: 0c04271c jal 80109c70 <_mcount>
No more instruction,
and the non-leaf function becomes,
80126590 <copy_process>:
80126590: 27bdffa0 addiu sp,sp,-96
80126594: afbf005c sw ra,92(sp)
80126598: afbe0058 sw s8,88(sp)
8012659c: afb70054 sw s7,84(sp)
801265a0: afb60050 sw s6,80(sp)
801265a4: afb5004c sw s5,76(sp)
801265a8: afb40048 sw s4,72(sp)
801265ac: afb30044 sw s3,68(sp)
801265b0: afb20040 sw s2,64(sp)
801265b4: afb1003c sw s1,60(sp)
801265b8: afb00038 sw s0,56(sp)
801265bc: 03e00821 move at,ra
801265c0: 0c04271c jal 80109c70 <_mcount>
It may save about two instructions for us.
sw s8, offset(sp)
move s8, fp
and also, I have tried to just search "Save" instruction, if I find one,
that should be a non-leaf function, otherwise, it's leaf function, but I
can not prove no "Save" instruction before the leaf function's "move at,
ra", for example:
8010113c: 03e00008 jr ra
80101140: 00020021 nop
80101144 <au1k_wait>:
80101144: 03e00821 move at,ra
80101148: 0c04271c jal 80109c70 <_mcount>
if there is "save" instruction at address 80101140, it will fail.
Although, I met not failure with several tries, but no prove on it! any
ABI protection for this? if YES, this should be a better solution, for
it may works without -fno-omit-frame-pointer and save several
instructions for us.
and BTW, -fomit-frame-pointer does help "a lot" for us to get the stack
address without any big load:
sp = fp + (code & STACK_OFFSET_MASK);
ra = *(unsigned long *)sp;
although we can calculate it via the parent_addr, I have got this in:
NESTED(ftrace_graph_caller, PT_SIZE, ra)
MCOUNT_SAVE_REGS
PTR_LA a0, PT_R1(sp) /* arg1: &AT -> a0 */
move a1, ra /* arg2: next ip, selfaddr */
PTR_SUBU a1, MCOUNT_INSN_SIZE
move a2, fp /* arg3: frame pointer */
jal prepare_ftrace_return
nop
MCOUNT_RESTORE_REGS
RETURN_BACK
END(ftrace_graph_caller)
so, parent_addr is the current sp + PT_R1:
sp = sp - PT_SIZE (did in MCOUNT_SAVE_REGS)
parent_addr = sp + PT_R1
so the fp can be calculated like this:
fp = parent_addr - PT_R1 + PT_SIZE;
this will take use a little time to calculate it.
ooh, so, both -fomit-frame-pointer and -fno-omit-frame-pointer work for
us, the left job is that: we need to prove them are really SAFE here.
> >
> > move s8,sp
> >
> > To identify function prologs, but it would still be ad hoc, as modern
> > versions of GCC will reorder instructions in the prolog for better
> > scheduling.
>
> I'll have to search the ABI documentation about calling _mcount in MIPS.
> There are assumptions with _mcount that are made. It may very well be
> safe to assume that the move s8,sp will always be there before an mcount
> call.
>
I have read the source code about _mcount in MIPS, no assumption for
using move s8,sp(which should be assumed in -fno-omit-frame-pointer),
that souce code in gcc for MIPS looks like this:
gcc/config/mips/mips.h:
#define FUNCTION_PROFILER(FILE, LABELNO)
{
if (TARGET_MIPS16)
sorry ("mips16 function profiling");
if (TARGET_LONG_CALLS)
{
/* For TARGET_LONG_CALLS use $3 for the address of _mcount. */
if (Pmode == DImode)
fprintf (FILE, "\tdla\t%s,_mcount\n", reg_names[GP_REG_FIRST +
3]);
else
fprintf (FILE, "\tla\t%s,_mcount\n", reg_names[GP_REG_FIRST +
3]);
}
fprintf (FILE, "\t.set\tnoat\n");
fprintf (FILE, "\tmove\t%s,%s\t\t# save current return address\n",
reg_names[GP_REG_FIRST + 1], reg_names[GP_REG_FIRST + 31]);
/* _mcount treats $2 as the static chain register. */
if (cfun->static_chain_decl != NULL)
fprintf (FILE, "\tmove\t%s,%s\n", reg_names[2],
reg_names[STATIC_CHAIN_REGNUM]);
if (!TARGET_NEWABI)
{
fprintf (FILE,
"\t%s\t%s,%s,%d\t\t# _mcount pops 2 words from stack
\n",
TARGET_64BIT ? "dsubu" : "subu",
reg_names[STACK_POINTER_REGNUM],
reg_names[STACK_POINTER_REGNUM],
Pmode == DImode ? 16 : 8);
}
if (TARGET_LONG_CALLS)
fprintf (FILE, "\tjalr\t%s\n", reg_names[GP_REG_FIRST + 3]);
else
fprintf (FILE, "\tjal\t_mcount\n");
fprintf (FILE, "\t.set\tat\n");
/* _mcount treats $2 as the static chain register. */
if (cfun->static_chain_decl != NULL)
fprintf (FILE, "\tmove\t%s,%s\n", reg_names[STATIC_CHAIN_REGNUM],
reg_names[2]);
}
The above is a macro, I just removed the "\" for readable, BTW: I got
the -mloong-calls option there!
I have tried to hack the gcc source code, and made the ra is pushed into
the smallest address of stack, but it not always be 0(sp), which should
be defined in the relative ABIs, so, I giving up this way. and also, I
tried to save the stack address to ra(or at), but this will make the
leaf function not work, 'Cause it will not save the return address to
the stack, although we can try a trick to distinguish them(stack offset
ranges from 0 to PT_SIZE, but the return address is very big :-)). both
of them will made the implementation deviate from "original" and will
make things "awful".
ooh, I can imaging touching gcc is really not a good idea(Thanks to
Nicholas), so, go back to kernel space as Steven suggested, at last,
made probe_kernel_read() work with tracing_stop()/tracing_start()[Seems
these two functions are need for probe_kernel_read(), will explain it in
another reply to an existing E-mail].
(Here is the gcc part what I have touched, only for FUN, if you hate it,
ignore it.
commit 4c2860a48914ecaa3b69b6eca4721dcf944ce342
Author: Wu Zhangjin <[email protected]>
Date: Tue Oct 20 00:49:37 2009 +0800
profile: fix function graph tracer of kerenl for MIPS
It's very hard to get the stack address of the return address(ra) in
MIPS with the old profiling support(only the value of the return
address
available), so, we need to try to transfer the address to it!
TODO: a new option should be added for kernel own to activate this
patch.
Signed-off-by: Wu Zhangjin <[email protected]>
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index f153d13..239308d 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -407,6 +407,9 @@ int num_source_filenames;
written anything yet. */
const char *current_function_file = "";
+/* the offset of the ra register's stack address */
+int ra_offset_from_sp = -1;
+
/* A label counter used by PUT_SDB_BLOCK_START and PUT_SDB_BLOCK_END.
*/
int sdb_label_count;
@@ -8898,10 +8901,12 @@ mips_for_each_saved_reg (HOST_WIDE_INT
sp_offset, mips_save_restore_fn fn)
need a nop in the epilogue if at least one register is reloaded in
addition to return address. */
offset = cfun->machine->frame.gp_sp_offset - sp_offset;
- for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
+ for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
{
mips_save_restore_reg (word_mode, regno, offset, fn);
+ if (crtl->profile)
+ ra_offset_from_sp = (regno == GP_REG_LAST) ? (int)offset : -1;
offset -= UNITS_PER_WORD;
}
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index c2533c4..f73bcf2 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2315,6 +2315,9 @@ typedef struct mips_args {
fprintf (FILE, "\tla\t%s,_mcount\n", reg_names[GP_REG_FIRST +
3]); \
}
\
fprintf (FILE, "\t.set\tnoat\n");
\
+ if (ra_offset_from_sp != -1)
\
+ fprintf (FILE, "\tlui\t%s,%d\t\t\n",
\
+ reg_names[GP_REG_FIRST + 31], ra_offset_from_sp);
\
fprintf (FILE, "\tmove\t%s,%s\t\t# save current return address\n",
\
reg_names[GP_REG_FIRST + 1], reg_names[GP_REG_FIRST + 31]);
\
/* _mcount treats $2 as the static chain register. */
\
@@ -3437,6 +3440,7 @@ extern const struct mips_cpu_info *mips_tune_info;
extern const struct mips_rtx_cost_data *mips_cost;
extern bool mips_base_mips16;
extern enum mips_code_readable_setting mips_code_readable;
+extern int ra_offset_from_sp;
#endif
/* Enable querying of DFA units. */
)
Regards,
Wu Zhangjin
Wu Zhangjin wrote:
>
> This patch implement a mips_timecounter_read(), which can be used to get
> high precision timestamp without extra lock.
>
> It is based on the clock counter register and the
> timecounter/cyclecounter abstraction layer of kernel.
>
>
I don't like this. I think MIPS should have a custom sched_clock()
- which ftrace would use without any extra magic -, instead of the
default jiffies-based one. It's true that the 32-bit C0 counter is too
small, but I've successfully extended it in software by having a word
in memory for the upper part of a 64-bit counter and
incrementing it every time C0 rolls over. My code is ugly and maybe
is racy, but works on our system.
Yesterday during getting that in shape I've found about that the
kernel even has a mechanism for that what I wanted to implement.
See here:
http://lxr.linux.no/#linux+v2.6.31/include/linux/cnt32_to_63.h#L34
And for a correct application of it:
http://lxr.linux.no/#linux+v2.6.31/arch/arm/plat-orion/time.c#L45
Next week I'll knock up something based on that and submit it for
review.
In my custom code (full 64-bit counter, not a 63-bit one like above)
I've set the scale factor to 15 for our system (Au1250 clocked at 756 MHz),
so
sched_clock() wraps around about every 6.2 days, and the upper word has
to be warmed up about every 5 sec. Numbers will be different for the code
using cnt32_to_63.h.
Regards,
Patrik
--
View this message in context: http://www.nabble.com/-PATCH--v4-1-9--tracing%3A-convert-trace_clock_local%28%29-as-weak-function-tp25993719p26009516.html
Sent from the linux-kernel mailing list archive at Nabble.com.
On Thu, 2009-10-22 at 19:38 +0800, Wu Zhangjin wrote:
> Hi,
>
> On Wed, 2009-10-21 at 13:23 -0400, Steven Rostedt wrote:
> Ralf have told me -pg really works with -fomit-frame-pointer, although
> the gcc tool tell us they are not incompatible when we use both of them
> together, but when I remove -fno-omit-frame-pointer in
> KBUILD_FLAGS(enabled by CONFIG_FRAME_POINTER), it definitely remove the
> s8(fp) relative source code(Seems -fomit-frame-pionter is used by
> default by gcc), the leaf function becomes this:
>
> function:
>
> 80101144 <au1k_wait>:
> 80101144: 03e00821 move at,ra
> 80101148: 0c04271c jal 80109c70 <_mcount>
>
> No more instruction,
>
> and the non-leaf function becomes,
>
> 80126590 <copy_process>:
> 80126590: 27bdffa0 addiu sp,sp,-96
> 80126594: afbf005c sw ra,92(sp)
> 80126598: afbe0058 sw s8,88(sp)
> 8012659c: afb70054 sw s7,84(sp)
> 801265a0: afb60050 sw s6,80(sp)
> 801265a4: afb5004c sw s5,76(sp)
> 801265a8: afb40048 sw s4,72(sp)
> 801265ac: afb30044 sw s3,68(sp)
> 801265b0: afb20040 sw s2,64(sp)
> 801265b4: afb1003c sw s1,60(sp)
> 801265b8: afb00038 sw s0,56(sp)
> 801265bc: 03e00821 move at,ra
> 801265c0: 0c04271c jal 80109c70 <_mcount>
>
> It may save about two instructions for us.
>
> sw s8, offset(sp)
> move s8, fp
>
> and also, I have tried to just search "Save" instruction, if I find one,
So you look for "sw ..."?
> that should be a non-leaf function, otherwise, it's leaf function, but I
> can not prove no "Save" instruction before the leaf function's "move at,
> ra", for example:
Yes but it should never be saving the ra. You can search all
instructions before the move at,ra until you find the save of the ra, or
you find something that is not a save. If you find the saving of ra, it
is not a leaf, but if you don't find the saving of ra, then it is a
leaf.
>
> 8010113c: 03e00008 jr ra
> 80101140: 00020021 nop
>
> 80101144 <au1k_wait>:
> 80101144: 03e00821 move at,ra
> 80101148: 0c04271c jal 80109c70 <_mcount>
>
> if there is "save" instruction at address 80101140, it will fail.
> Although, I met not failure with several tries, but no prove on it! any
> ABI protection for this? if YES, this should be a better solution, for
> it may works without -fno-omit-frame-pointer and save several
> instructions for us.
If we don't stop at just one save, but look for the saving of ra, it
should not fail.
>
> and BTW, -fomit-frame-pointer does help "a lot" for us to get the stack
> address without any big load:
>
> sp = fp + (code & STACK_OFFSET_MASK);
> ra = *(unsigned long *)sp;
>
> although we can calculate it via the parent_addr, I have got this in:
>
> NESTED(ftrace_graph_caller, PT_SIZE, ra)
> MCOUNT_SAVE_REGS
>
> PTR_LA a0, PT_R1(sp) /* arg1: &AT -> a0 */
> move a1, ra /* arg2: next ip, selfaddr */
> PTR_SUBU a1, MCOUNT_INSN_SIZE
> move a2, fp /* arg3: frame pointer */
> jal prepare_ftrace_return
> nop
>
> MCOUNT_RESTORE_REGS
> RETURN_BACK
> END(ftrace_graph_caller)
>
> so, parent_addr is the current sp + PT_R1:
>
> sp = sp - PT_SIZE (did in MCOUNT_SAVE_REGS)
> parent_addr = sp + PT_R1
>
> so the fp can be calculated like this:
>
> fp = parent_addr - PT_R1 + PT_SIZE;
>
> this will take use a little time to calculate it.
>
> ooh, so, both -fomit-frame-pointer and -fno-omit-frame-pointer work for
> us, the left job is that: we need to prove them are really SAFE here.
>
Great!
-- Steve
> > >
> > > move s8,sp
> > >
> > > To identify function prologs, but it would still be ad hoc, as modern
> > > versions of GCC will reorder instructions in the prolog for better
> > > scheduling.
> >
On Thu, 2009-10-22 at 09:17 -0400, Steven Rostedt wrote:
> On Thu, 2009-10-22 at 19:38 +0800, Wu Zhangjin wrote:
> > Hi,
> >
> > On Wed, 2009-10-21 at 13:23 -0400, Steven Rostedt wrote:
>
> > Ralf have told me -pg really works with -fomit-frame-pointer, although
> > the gcc tool tell us they are not incompatible when we use both of them
> > together, but when I remove -fno-omit-frame-pointer in
> > KBUILD_FLAGS(enabled by CONFIG_FRAME_POINTER), it definitely remove the
> > s8(fp) relative source code(Seems -fomit-frame-pionter is used by
> > default by gcc), the leaf function becomes this:
> >
> > function:
> >
> > 80101144 <au1k_wait>:
> > 80101144: 03e00821 move at,ra
> > 80101148: 0c04271c jal 80109c70 <_mcount>
> >
> > No more instruction,
> >
> > and the non-leaf function becomes,
> >
> > 80126590 <copy_process>:
> > 80126590: 27bdffa0 addiu sp,sp,-96
> > 80126594: afbf005c sw ra,92(sp)
> > 80126598: afbe0058 sw s8,88(sp)
> > 8012659c: afb70054 sw s7,84(sp)
> > 801265a0: afb60050 sw s6,80(sp)
> > 801265a4: afb5004c sw s5,76(sp)
> > 801265a8: afb40048 sw s4,72(sp)
> > 801265ac: afb30044 sw s3,68(sp)
> > 801265b0: afb20040 sw s2,64(sp)
> > 801265b4: afb1003c sw s1,60(sp)
> > 801265b8: afb00038 sw s0,56(sp)
> > 801265bc: 03e00821 move at,ra
> > 801265c0: 0c04271c jal 80109c70 <_mcount>
> >
> > It may save about two instructions for us.
> >
> > sw s8, offset(sp)
> > move s8, fp
> >
> > and also, I have tried to just search "Save" instruction, if I find one,
>
> So you look for "sw ..."?
>
> > that should be a non-leaf function, otherwise, it's leaf function, but I
> > can not prove no "Save" instruction before the leaf function's "move at,
> > ra", for example:
>
> Yes but it should never be saving the ra. You can search all
> instructions before the move at,ra until you find the save of the ra, or
> you find something that is not a save. If you find the saving of ra, it
> is not a leaf, but if you don't find the saving of ra, then it is a
> leaf.
>
> >
> > 8010113c: 03e00008 jr ra
> > 80101140: 00020021 nop
> >
> > 80101144 <au1k_wait>:
> > 80101144: 03e00821 move at,ra
> > 80101148: 0c04271c jal 80109c70 <_mcount>
> >
> > if there is "save" instruction at address 80101140, it will fail.
> > Although, I met not failure with several tries, but no prove on it! any
> > ABI protection for this? if YES, this should be a better solution, for
> > it may works without -fno-omit-frame-pointer and save several
> > instructions for us.
>
> If we don't stop at just one save, but look for the saving of ra, it
> should not fail.
>
We can not look for the saving of ra continuously(when should we stop?
if with -fno-omit-fram-pointer, we have "move s8,sp" or "addiu sp, sp,
-offset", but without it, we have no "guideboard" to know that is the
beginning of the function!), 'Cause we may find the saving of ra of
another function, which will fail at that time.
BTW: Just replace probe_kernel_read() and tracing_stop/tracing_start by
asm, it works in 32bit, but fails in 64bit, I'm trying to find why!(TLB
miss on load or ifetch, will fix it asap! and resend the patchset out!)
Regards,
Wu Zhangjin
On Thu, 2009-10-22 at 21:31 +0800, Wu Zhangjin wrote:
> > If we don't stop at just one save, but look for the saving of ra, it
> > should not fail.
> >
>
> We can not look for the saving of ra continuously(when should we stop?
When we hit something other than sw .... I'm sure we will get to
something other than a store. ;-)
> if with -fno-omit-fram-pointer, we have "move s8,sp" or "addiu sp, sp,
> -offset", but without it, we have no "guideboard" to know that is the
> beginning of the function!), 'Cause we may find the saving of ra of
> another function, which will fail at that time.
But that other function should have a jump to mcount before it, or some
other kind of return. A function that has _mcount attached, can not be
inlined. So something must have jumped to it. There should be no cases
where code from above just "falls" into the leaf function.
>
> BTW: Just replace probe_kernel_read() and tracing_stop/tracing_start by
> asm, it works in 32bit, but fails in 64bit, I'm trying to find why!(TLB
> miss on load or ifetch, will fix it asap! and resend the patchset out!)
Thanks!
-- Steve
Note, I'm going to try booting a vanilla kernel on the notebook. If it
works, I'll start applying your patches and playing with it too. But I
also have some other work to do first.
Wu Zhangjin wrote:
> Hi,
>
> On Wed, 2009-10-21 at 13:23 -0400, Steven Rostedt wrote:
>> On Wed, 2009-10-21 at 10:07 -0700, David Daney wrote:
>>
>>> I have not used -pg, so I don't know for sure, I think all it does is
>>> add the calls to _mcount. Someone could investigate
>>> -fno-omit-frame-pointer, with that you may be able to use:
>> Note, -pg assumes -fno-omit-frame-pointer, since -fomit-frame-pointer
>> and -pg are incompatible.
>
> Ralf have told me -pg really works with -fomit-frame-pointer, although
> the gcc tool tell us they are not incompatible when we use both of them
> together, but when I remove -fno-omit-frame-pointer in
> KBUILD_FLAGS(enabled by CONFIG_FRAME_POINTER), it definitely remove the
> s8(fp) relative source code(Seems -fomit-frame-pionter is used by
> default by gcc), the leaf function becomes this:
>
> function:
>
> 80101144 <au1k_wait>:
> 80101144: 03e00821 move at,ra
> 80101148: 0c04271c jal 80109c70 <_mcount>
>
> No more instruction,
>
> and the non-leaf function becomes,
>
> 80126590 <copy_process>:
> 80126590: 27bdffa0 addiu sp,sp,-96
> 80126594: afbf005c sw ra,92(sp)
> 80126598: afbe0058 sw s8,88(sp)
> 8012659c: afb70054 sw s7,84(sp)
> 801265a0: afb60050 sw s6,80(sp)
> 801265a4: afb5004c sw s5,76(sp)
> 801265a8: afb40048 sw s4,72(sp)
> 801265ac: afb30044 sw s3,68(sp)
> 801265b0: afb20040 sw s2,64(sp)
> 801265b4: afb1003c sw s1,60(sp)
> 801265b8: afb00038 sw s0,56(sp)
> 801265bc: 03e00821 move at,ra
> 801265c0: 0c04271c jal 80109c70 <_mcount>
>
> It may save about two instructions for us.
>
> sw s8, offset(sp)
> move s8, fp
>
> and also, I have tried to just search "Save" instruction, if I find one,
> that should be a non-leaf function, otherwise, it's leaf function, but I
> can not prove no "Save" instruction before the leaf function's "move at,
> ra", for example:
>
> 8010113c: 03e00008 jr ra
> 80101140: 00020021 nop
>
> 80101144 <au1k_wait>:
> 80101144: 03e00821 move at,ra
> 80101148: 0c04271c jal 80109c70 <_mcount>
>
> if there is "save" instruction at address 80101140, it will fail.
> Although, I met not failure with several tries, but no prove on it! any
> ABI protection for this? if YES, this should be a better solution, for
> it may works without -fno-omit-frame-pointer and save several
> instructions for us.
This is what I was talking about up-thread. Leaf functions may have no
function prolog. If you do code scanning you will fail. While scanning
backwards, there is no way to know when you have entered a new function.
Looking for function return sequences 'jr ra' doesn't work as there
may be functions with multiple return sites, functions that never
return, or arbitrary data before the function. I think you have to
force a frame pointer to be established if you want this to work.
David Daney
On Thu, 2009-10-22 at 08:59 -0700, David Daney wrote:
> This is what I was talking about up-thread. Leaf functions may have no
> function prolog. If you do code scanning you will fail. While scanning
> backwards, there is no way to know when you have entered a new function.
> Looking for function return sequences 'jr ra' doesn't work as there
> may be functions with multiple return sites, functions that never
> return, or arbitrary data before the function. I think you have to
> force a frame pointer to be established if you want this to work.
Functions that run off into another function?? I guess the compiler
could do that, but with -pg enable, I would think is broken.
-- Steve
Steven Rostedt wrote:
> On Thu, 2009-10-22 at 08:59 -0700, David Daney wrote:
>
>> This is what I was talking about up-thread. Leaf functions may have no
>> function prolog. If you do code scanning you will fail. While scanning
>> backwards, there is no way to know when you have entered a new function.
>> Looking for function return sequences 'jr ra' doesn't work as there
>> may be functions with multiple return sites, functions that never
>> return, or arbitrary data before the function. I think you have to
>> force a frame pointer to be established if you want this to work.
>
> Functions that run off into another function?? I guess the compiler
> could do that, but with -pg enable, I would think is broken.
>
Use of GCC-4.5's __builtin_unreachable() can lead to this, as well as
functions that call noreturn functions.
Note to self: Time to resend the __builtin_unreachable() patch set again...
David Daney
On Wed, 2009-10-21 at 12:12 -0400, Steven Rostedt wrote:
> On Wed, 2009-10-21 at 22:35 +0800, Wu Zhangjin wrote:
> > The implementation of function graph tracer for MIPS is a little
> > different from X86.
> >
> > in MIPS, gcc(with -pg) only transfer the caller's return address(at) and
> > the _mcount's return address(ra) to us.
> >
> > move at, ra
> > jal _mcount
> >
> > in the function is a leaf, it will no save the return address(ra):
> >
> > ffffffff80101298 <au1k_wait>:
> > ffffffff80101298: 67bdfff0 daddiu sp,sp,-16
> > ffffffff8010129c: ffbe0008 sd s8,8(sp)
> > ffffffff801012a0: 03a0f02d move s8,sp
> > ffffffff801012a4: 03e0082d move at,ra
> > ffffffff801012a8: 0c042930 jal ffffffff8010a4c0 <_mcount>
> > ffffffff801012ac: 00020021 nop
> >
> > so, we can hijack it directly in _mcount, but if the function is non-leaf, the
> > return address is saved in the stack.
> >
> > ffffffff80133030 <copy_process>:
> > ffffffff80133030: 67bdff50 daddiu sp,sp,-176
> > ffffffff80133034: ffbe00a0 sd s8,160(sp)
> > ffffffff80133038: 03a0f02d move s8,sp
> > ffffffff8013303c: ffbf00a8 sd ra,168(sp)
> > ffffffff80133040: ffb70098 sd s7,152(sp)
> > ffffffff80133044: ffb60090 sd s6,144(sp)
> > ffffffff80133048: ffb50088 sd s5,136(sp)
> > ffffffff8013304c: ffb40080 sd s4,128(sp)
> > ffffffff80133050: ffb30078 sd s3,120(sp)
> > ffffffff80133054: ffb20070 sd s2,112(sp)
> > ffffffff80133058: ffb10068 sd s1,104(sp)
> > ffffffff8013305c: ffb00060 sd s0,96(sp)
> > ffffffff80133060: 03e0082d move at,ra
> > ffffffff80133064: 0c042930 jal ffffffff8010a4c0 <_mcount>
> > ffffffff80133068: 00020021 nop
> >
> > but we can not get the exact stack address(which saved ra) directly in
> > _mcount, we need to search the content of at register in the stack space
> > or search the "s{d,w} ra, offset(sp)" instruction in the text. 'Cause we
> > can not prove there is only a match in the stack space, so, we search
> > the text instead.
> >
> > as we can see, if the first instruction above "move at, ra" is "move s8,
> > sp"(move fp, sp), it is a leaf function, so we hijack the at register
>
> Are you sure it will always be the first instruction for leaf registers.
> You may want to search for that instruction and stop on it. If you have
> not yet found the storage of ra in the stack, then you know it is a leaf
> function.
>
> > directly via put &return_to_handler into it, and otherwise, we search
> > the "s{d,w} ra, offset(sp)" instruction to get the stack offset, and
> > then the stack address. we use the above copy_process() as an example,
> > we at last find "ffbf00a8", 0xa8 is the stack offset, we plus it with
> > s8(fp), that is the stack address, we hijack the content via writing the
> > &return_to_handler in.
> >
> > Signed-off-by: Wu Zhangjin <[email protected]>
> > ---
> > arch/mips/Kconfig | 1 +
> > arch/mips/kernel/ftrace.c | 192 ++++++++++++++++++++++++++++++++++++++++
> > arch/mips/kernel/mcount.S | 55 +++++++++++-
> > arch/mips/kernel/vmlinux.lds.S | 1 +
> > 4 files changed, 248 insertions(+), 1 deletions(-)
> >
> > diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> > index 147fbbc..de690fd 100644
> > --- a/arch/mips/Kconfig
> > +++ b/arch/mips/Kconfig
> > @@ -8,6 +8,7 @@ config MIPS
> > select HAVE_FUNCTION_TRACE_MCOUNT_TEST
> > select HAVE_DYNAMIC_FTRACE
> > select HAVE_FTRACE_MCOUNT_RECORD
> > + select HAVE_FUNCTION_GRAPH_TRACER
> > # Horrible source of confusion. Die, die, die ...
> > select EMBEDDED
> > select RTC_LIB if !LEMOTE_FULOONG2E
> > diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
> > index 1e44865..fddee5b 100644
> > --- a/arch/mips/kernel/ftrace.c
> > +++ b/arch/mips/kernel/ftrace.c
> > @@ -13,6 +13,8 @@
> > #include <linux/ftrace.h>
> >
> > #include <asm/cacheflush.h>
> > +#include <asm/asm.h>
> > +#include <asm/asm-offsets.h>
> >
> > #ifdef CONFIG_DYNAMIC_FTRACE
> >
> > @@ -105,3 +107,193 @@ int __init ftrace_dyn_arch_init(void *data)
> > return 0;
> > }
> > #endif /* CONFIG_DYNAMIC_FTRACE */
> > +
> > +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> > +
> > +#ifdef CONFIG_DYNAMIC_FTRACE
> > +
> > +#define JMP 0x08000000 /* jump to target directly */
> > +extern void ftrace_graph_call(void);
> > +
> > +int ftrace_enable_ftrace_graph_caller(void)
> > +{
> > + unsigned long ip = (unsigned long)(&ftrace_graph_call);
> > + unsigned char old[MCOUNT_INSN_SIZE], *new;
> > + int ret;
> > +
> > + /* j ftrace_stub */
> > + memcpy(old, (unsigned long *)ip, MCOUNT_INSN_SIZE);
> > + new = ftrace_call_replace(JMP, (unsigned long)ftrace_graph_caller);
> > +
> > + ret = ftrace_modify_code(ip, old, new);
> > +
> > + return ret;
> > +}
> > +
> > +int ftrace_disable_ftrace_graph_caller(void)
> > +{
> > + unsigned long ip = (unsigned long)(&ftrace_graph_call);
> > + unsigned char old[MCOUNT_INSN_SIZE], *new;
> > + int ret;
> > +
> > + /* j ftrace_graph_caller */
> > + memcpy(old, (unsigned long *)ip, MCOUNT_INSN_SIZE);
> > + new = ftrace_call_replace(JMP, (unsigned long)ftrace_stub);
> > +
> > + ret = ftrace_modify_code(ip, old, new);
> > +
> > + return ret;
> > +}
> > +
> > +#endif /* !CONFIG_DYNAMIC_FTRACE */
> > +
> > +#define S_RA (0x2fbf << 16) /* 32bit: afbf, 64bit: ffbf */
> > +/* This is only available when enabled -fno-omit-frame-pointer with CONFIG_FRAME_POINTER=y */
> > +#define MOV_FP_SP 0x03a0f021 /* 32bit: 0x03a0f021, 64bit: 0x03a0f02d */
> > +#define STACK_OFFSET_MASK 0xfff /* stack offset range: 0 ~ PT_SIZE(304) */
> > +
> > +unsigned long ftrace_get_parent_addr(unsigned long self_addr,
> > + unsigned long parent,
> > + unsigned long parent_addr,
> > + unsigned long fp)
> > +{
> > + unsigned long sp, ip, ra;
> > + unsigned int code;
> > +
> > + /* move to the instruction "move ra, at" */
> > + ip = self_addr - 8;
> > +
> > + /* search the text until finding the "move s8, sp" instruction or
> > + * "s{d,w} ra, offset(sp)" instruction */
> > + do {
> > + ip -= 4;
> > + /* read the text we want to match */
> > + if (probe_kernel_read(&code, (void *)ip, 4)) {
>
> I thought you had issues with using probe_kernel_read here?
>
> > + WARN_ON(1);
> > + panic("read the text failure\n");
> > + }
> > +
> > + /* if the first instruction above "move at, ra" is "move
> > + * s8(fp), sp", means the function is a leaf */
>
> Ah, the comment is incorrect. We do search for the move and stop there.
> So it is not the first instruction. The comment should read...
>
> If we hit the "move s8(fp), sp" instruction before finding where the
> ra is stored, then this is a leaf function and it does not store the
> ra on the stack.
>
>
> > + if ((code & MOV_FP_SP) == MOV_FP_SP)
> > + return parent_addr;
> > + } while (((code & S_RA) != S_RA));
> > +
> > + sp = fp + (code & STACK_OFFSET_MASK);
> > + ra = *(unsigned long *)sp;
> > +
> > + if (ra == parent)
> > + return sp;
> > + else
> > + panic
> > + ("failed on getting stack address of ra\n: addr: 0x%lx, code: 0x%x\n",
> > + ip, code);
>
> I would not panic. I would disable the tracing, and not do the change.
> The code should work regardless.
>
> > +}
> > +
> > +/*
> > + * Hook the return address and push it in the stack of return addrs
> > + * in current thread info.
> > + */
> > +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
> > + unsigned long fp)
> > +{
> > + unsigned long old;
> > + int faulted;
> > + struct ftrace_graph_ent trace;
> > + unsigned long return_hooker = (unsigned long)
> > + &return_to_handler;
> > +
> > + /*
> > + * Protect against fault, even if it shouldn't
> > + * happen. This tool is too much intrusive to
> > + * ignore such a protection.
> > + *
> > + * old = *parent;
> > + */
> > + asm volatile (
> > + "1: " STR(PTR_L) " %[old], 0(%[parent])\n"
> > + " li %[faulted], 0\n"
> > + "2:\n"
> > +
> > + ".section .fixup, \"ax\"\n"
> > + "3: li %[faulted], 1\n"
> > + " j 2b\n"
> > + ".previous\n"
> > +
> > + ".section\t__ex_table,\"a\"\n\t"
> > + STR(PTR) "\t1b, 3b\n\t"
> > + ".previous\n"
> > +
> > + : [old] "=&r" (old), [faulted] "=r" (faulted)
> > + : [parent] "r" (parent)
> > + : "memory"
> > + );
> > +
> > + if (unlikely(faulted)) {
> > + ftrace_graph_stop();
> > + WARN_ON(1);
>
> See, this is how we kill the tracing.
>
> > + return;
> > + }
> > +
> > + /* The argument *parent only work for leaf function, we need to check
> > + * whether the function is leaf, if not, we need to get the real stack
> > + * address which stored it.
> > + *
> > + * and here, we must stop tracing before calling probe_kernel_read().
> > + * after calling it, restart tracing. otherwise, it will hang there.*/
> > + tracing_stop();
>
> Ah, here you stop tracing. I still prefer to use a direct asm. This has
> a little overhead.
>
> Also, this completely negates the above asm, which basically gets the
> parent the "x86" way.
>
> > + parent =
> > + (unsigned long *)ftrace_get_parent_addr(self_addr, old,
> > + (unsigned long)parent, fp);
> > + tracing_start();
> > +
> > + if (unlikely(atomic_read(¤t->tracing_graph_pause)))
> > + return;
> > +
> > + /*
> > + * Protect against fault, even if it shouldn't
> > + * happen. This tool is too much intrusive to
> > + * ignore such a protection.
> > + *
> > + * *parent = return_hooker;
> > + */
> > + asm volatile (
> > + "1: " STR(PTR_S) " %[return_hooker], 0(%[parent])\n"
> > + " li %[faulted], 0\n"
> > + "2:\n"
> > +
> > + ".section .fixup, \"ax\"\n"
> > + "3: li %[faulted], 1\n"
> > + " j 2b\n"
> > + ".previous\n"
> > +
> > + ".section\t__ex_table,\"a\"\n\t"
> > + STR(PTR) "\t1b, 3b\n\t"
> > + ".previous\n"
> > +
> > + : [faulted] "=r" (faulted)
> > + : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
> > + : "memory"
> > + );
>
> Hmm, the above should only be done for non leaf functions.
>
> > +
> > + if (unlikely(faulted)) {
> > + ftrace_graph_stop();
> > + WARN_ON(1);
> > + return;
> > + }
> > +
> > + if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) ==
> > + -EBUSY) {
> > + *parent = old;
>
> And this fails for leaf functions too.
>
> > + return;
> > + }
> > +
> > + trace.func = self_addr;
> > +
> > + /* Only trace if the calling function expects to */
> > + if (!ftrace_graph_entry(&trace)) {
> > + current->curr_ret_stack--;
> > + *parent = old;
>
> ditto
>
> > + }
> > +}
> > +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> > diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
> > index 30637e6..356e81c 100644
> > --- a/arch/mips/kernel/mcount.S
> > +++ b/arch/mips/kernel/mcount.S
> > @@ -89,6 +89,14 @@ ftrace_call:
> > nop
> >
> > MCOUNT_RESTORE_REGS
> > +
> > +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> > + .globl ftrace_graph_call
> > +ftrace_graph_call:
> > + j ftrace_stub
> > + nop
> > +#endif
> > +
> > .globl ftrace_stub
> > ftrace_stub:
> > RETURN_BACK
> > @@ -106,7 +114,15 @@ NESTED(_mcount, PT_SIZE, ra)
> > PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
> > bne t0, t1, static_trace
> > nop
> > -
> > +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> > + PTR_L t2, ftrace_graph_return
> > + bne t0, t2, ftrace_graph_caller
> > + nop
> > + PTR_LA t0, ftrace_graph_entry_stub
> > + PTR_L t2, ftrace_graph_entry
> > + bne t0, t2, ftrace_graph_caller
> > + nop
> > +#endif
> > j ftrace_stub
> > nop
> >
> > @@ -125,5 +141,42 @@ ftrace_stub:
> >
> > #endif /* ! CONFIG_DYNAMIC_FTRACE */
> >
> > +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> > +
> > +NESTED(ftrace_graph_caller, PT_SIZE, ra)
> > + MCOUNT_SAVE_REGS
> > +
> > + PTR_LA a0, PT_R1(sp) /* arg1: &AT -> a0 */
>
> Ah! You load the address of the stack you stored the ra with!
>
> Good idea. I guess the leaf function update will work there too!
>
> > + move a1, ra /* arg2: next ip, selfaddr */
> > + PTR_SUBU a1, MCOUNT_INSN_SIZE
> > + move a2, fp /* arg3: frame pointer */
> > + jal prepare_ftrace_return
> > + nop
> > +
> > + MCOUNT_RESTORE_REGS
> > + RETURN_BACK
> > + END(ftrace_graph_caller)
> > +
> > + .align 2
> > + .globl return_to_handler
> > +return_to_handler:
> > + PTR_SUBU sp, PT_SIZE
> > + PTR_S v0, PT_R2(sp)
> > + PTR_S v1, PT_R3(sp)
> > +
> > + jal ftrace_return_to_handler
> > + nop
> > +
> > + /* restore the real parent address: v0 -> ra */
> > + move ra, v0
> > +
> > + PTR_L v0, PT_R2(sp)
> > + PTR_L v1, PT_R3(sp)
> > + PTR_ADDIU sp, PT_SIZE
> > +
> > + jr ra
> > + nop
> > +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> > +
> > .set at
> > .set reorder
> > diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
> > index 162b299..f25df73 100644
> > --- a/arch/mips/kernel/vmlinux.lds.S
> > +++ b/arch/mips/kernel/vmlinux.lds.S
> > @@ -46,6 +46,7 @@ SECTIONS
> > SCHED_TEXT
> > LOCK_TEXT
> > KPROBES_TEXT
> > + IRQENTRY_TEXT
>
> This looks like it should be in a separate patch. I don't see where you
> explain this?
>
This is used to fix this error:
kernel/built-in.o: In function `print_graph_irq':
trace_functions_graph.c:(.text+0x82f40): undefined reference to
`__irqentry_text_start'
trace_functions_graph.c:(.text+0x82f48): undefined reference to
`__irqentry_text_start'
trace_functions_graph.c:(.text+0x82f70): undefined reference to
`__irqentry_text_end'
trace_functions_graph.c:(.text+0x82f74): undefined reference to
`__irqentry_text_end'
> -- Steve
>
> > *(.text.*)
> > *(.fixup)
> > *(.gnu.warning)
>
On Wed, 2009-10-21 at 11:24 -0400, Steven Rostedt wrote:
> On Wed, 2009-10-21 at 22:34 +0800, Wu Zhangjin wrote:
>
> > +++ b/arch/mips/kernel/mcount.S
> > @@ -0,0 +1,94 @@
> > +/*
> > + * the mips-specific _mcount implementation
> > + *
> > + * This file is subject to the terms and conditions of the GNU General Public
> > + * License. See the file "COPYING" in the main directory of this archive for
> > + * more details.
> > + *
> > + * Copyright (C) 2009 DSLab, Lanzhou University, China
> > + * Author: Wu Zhangjin <[email protected]>
> > + */
> > +
> > +#include <asm/regdef.h>
> > +#include <asm/stackframe.h>
> > +#include <asm/ftrace.h>
> > +
> > + .text
> > + .set noreorder
> > + .set noat
> > +
> > + /* since there is a "addiu sp,sp,-8" before "jal _mcount" in 32bit */
> > + .macro RESTORE_SP_FOR_32BIT
> > +#ifdef CONFIG_32BIT
> > + PTR_ADDIU sp, 8
> > +#endif
> > + .endm
> > +
> > + .macro MCOUNT_SAVE_REGS
> > + PTR_SUBU sp, PT_SIZE
> > + PTR_S ra, PT_R31(sp)
> > + PTR_S AT, PT_R1(sp)
> > + PTR_S a0, PT_R4(sp)
> > + PTR_S a1, PT_R5(sp)
> > + PTR_S a2, PT_R6(sp)
> > + PTR_S a3, PT_R7(sp)
> > +#ifdef CONFIG_64BIT
> > + PTR_S a4, PT_R8(sp)
> > + PTR_S a5, PT_R9(sp)
> > + PTR_S a6, PT_R10(sp)
> > + PTR_S a7, PT_R11(sp)
> > +#endif
> > + .endm
> > +
> > + .macro MCOUNT_RESTORE_REGS
> > + PTR_L ra, PT_R31(sp)
> > + PTR_L AT, PT_R1(sp)
> > + PTR_L a0, PT_R4(sp)
> > + PTR_L a1, PT_R5(sp)
> > + PTR_L a2, PT_R6(sp)
> > + PTR_L a3, PT_R7(sp)
> > +#ifdef CONFIG_64BIT
> > + PTR_L a4, PT_R8(sp)
> > + PTR_L a5, PT_R9(sp)
> > + PTR_L a6, PT_R10(sp)
> > + PTR_L a7, PT_R11(sp)
> > +#endif
> > + PTR_ADDIU sp, PT_SIZE
> > +.endm
> > +
> > + .macro MCOUNT_SET_ARGS
> > + move a0, ra /* arg1: next ip, selfaddr */
> > + move a1, AT /* arg2: the caller's next ip, parent */
> > + PTR_SUBU a0, MCOUNT_INSN_SIZE
> > + .endm
> > +
> > + .macro RETURN_BACK
> > + jr ra
> > + move ra, AT
> > + .endm
> > +
> > +NESTED(_mcount, PT_SIZE, ra)
> > + RESTORE_SP_FOR_32BIT
> > + PTR_LA t0, ftrace_stub
> > + PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
>
> Is t0 and t1 safe for mcount to use? Remember, mcount does not follow
> the dynamics of C function ABI.
So, perhaps we can use the saved registers(a0,a1...) instead.
Regards!
FYI:
One of the comments made at the kernel summit, was to have people trim
email when sending. It makes it a lot easier to reply then to scroll
down lots of lines of quoted text just to read something that only
references part of the email.
Don't be afraid to cut out quoted text. It does help (as I did in this
reply).
On Fri, 2009-10-23 at 01:39 +0800, Wu Zhangjin wrote:
> > > .set reorder
> > > diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
> > > index 162b299..f25df73 100644
> > > --- a/arch/mips/kernel/vmlinux.lds.S
> > > +++ b/arch/mips/kernel/vmlinux.lds.S
> > > @@ -46,6 +46,7 @@ SECTIONS
> > > SCHED_TEXT
> > > LOCK_TEXT
> > > KPROBES_TEXT
> > > + IRQENTRY_TEXT
> >
> > This looks like it should be in a separate patch. I don't see where you
> > explain this?
> >
>
> This is used to fix this error:
>
> kernel/built-in.o: In function `print_graph_irq':
> trace_functions_graph.c:(.text+0x82f40): undefined reference to
> `__irqentry_text_start'
> trace_functions_graph.c:(.text+0x82f48): undefined reference to
> `__irqentry_text_start'
> trace_functions_graph.c:(.text+0x82f70): undefined reference to
> `__irqentry_text_end'
> trace_functions_graph.c:(.text+0x82f74): undefined reference to
> `__irqentry_text_end'
Right, and this should be in a separate patch, showing this error in the
change log.
Thanks,
-- Steve
On Fri, 2009-10-23 at 01:47 +0800, Wu Zhangjin wrote:
> On Wed, 2009-10-21 at 11:24 -0400, Steven Rostedt wrote:
> > Is t0 and t1 safe for mcount to use? Remember, mcount does not follow
> > the dynamics of C function ABI.
>
> So, perhaps we can use the saved registers(a0,a1...) instead.
I don't know. I was just asking.
-- Steve
On Thu, 2009-10-22 at 09:16 -0700, David Daney wrote:
> Steven Rostedt wrote:
> > Functions that run off into another function?? I guess the compiler
> > could do that, but with -pg enable, I would think is broken.
> >
>
> Use of GCC-4.5's __builtin_unreachable() can lead to this, as well as
> functions that call noreturn functions.
But still. Should that unreachable code have a "save ra to stack"? If
not, this method is still safe.
-- Steve
On Thu, 2009-10-22 at 13:59 -0400, Steven Rostedt wrote:
> On Fri, 2009-10-23 at 01:47 +0800, Wu Zhangjin wrote:
> > On Wed, 2009-10-21 at 11:24 -0400, Steven Rostedt wrote:
>
> > > Is t0 and t1 safe for mcount to use? Remember, mcount does not follow
> > > the dynamics of C function ABI.
> >
> > So, perhaps we can use the saved registers(a0,a1...) instead.
>
> I don't know. I was just asking.
I just used a0,a1,a2 instead of t0,t1,t2, we are safe to use them for
they have been saved/restored.
Thanks!
Wu Zhangjin wrote:
> On Wed, 2009-10-21 at 11:24 -0400, Steven Rostedt wrote:
[...]
>>> +
>>> +NESTED(_mcount, PT_SIZE, ra)
>>> + RESTORE_SP_FOR_32BIT
>>> + PTR_LA t0, ftrace_stub
>>> + PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
>> Is t0 and t1 safe for mcount to use? Remember, mcount does not follow
>> the dynamics of C function ABI.
>
> So, perhaps we can use the saved registers(a0,a1...) instead.
>
a0..a7 may not always be saved.
You can use at, v0, v1 and all the temporary registers. Note that for
the 64-bit ABIs sometimes the names t0-t4 shadow a4-a7. So for a 64-bit
kernel, you can use: $1, $2, $3, $12, $13, $14, $15, $24, $25, noting
that at == $1 and contains the callers ra. For a 32-bit kernel you can
add $8, $9, $10, and $11
This whole thing seems a little fragile.
I think it might be a good idea to get input from Richard Sandiford,
and/or Adam Nemet about this approach (so I add them to the CC).
This e-mail thread starts here:
http://www.linux-mips.org/archives/linux-mips/2009-10/msg00286.html
and here:
http://www.linux-mips.org/archives/linux-mips/2009-10/msg00290.html
David Daney
> Regards!
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
Hi,
On Thu, 2009-10-22 at 11:34 -0700, David Daney wrote:
> Wu Zhangjin wrote:
> > On Wed, 2009-10-21 at 11:24 -0400, Steven Rostedt wrote:
> [...]
> >>> +
> >>> +NESTED(_mcount, PT_SIZE, ra)
> >>> + RESTORE_SP_FOR_32BIT
> >>> + PTR_LA t0, ftrace_stub
> >>> + PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
> >> Is t0 and t1 safe for mcount to use? Remember, mcount does not follow
> >> the dynamics of C function ABI.
> >
> > So, perhaps we can use the saved registers(a0,a1...) instead.
> >
>
> a0..a7 may not always be saved.
I mean I have saved/restored them as the _mcount of /lib/libc.so.6 for
MIPS did, so I can use them safely.
>
> You can use at, v0, v1 and all the temporary registers. Note that for
> the 64-bit ABIs sometimes the names t0-t4 shadow a4-a7. So for a 64-bit
> kernel, you can use: $1, $2, $3, $12, $13, $14, $15, $24, $25, noting
> that at == $1 and contains the callers ra. For a 32-bit kernel you can
> add $8, $9, $10, and $11
>
In a generic function, they should be okay, but _mcount is a little
specific. so, Not sure these rules are suitable to it or not.
> This whole thing seems a little fragile.
>
> I think it might be a good idea to get input from Richard Sandiford,
> and/or Adam Nemet about this approach (so I add them to the CC).
>
> This e-mail thread starts here:
>
> http://www.linux-mips.org/archives/linux-mips/2009-10/msg00286.html
>
> and here:
>
> http://www.linux-mips.org/archives/linux-mips/2009-10/msg00290.html
looking forward to their reply, thanks!
Regards!
Wu Zhangjin
David Daney writes:
> Wu Zhangjin wrote:
> > On Wed, 2009-10-21 at 11:24 -0400, Steven Rostedt wrote:
> [...]
> >>> +
> >>> +NESTED(_mcount, PT_SIZE, ra)
> >>> + RESTORE_SP_FOR_32BIT
> >>> + PTR_LA t0, ftrace_stub
> >>> + PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
> >> Is t0 and t1 safe for mcount to use? Remember, mcount does not follow
> >> the dynamics of C function ABI.
> >
> > So, perhaps we can use the saved registers(a0,a1...) instead.
> >
>
> a0..a7 may not always be saved.
>
> You can use at, v0, v1 and all the temporary registers. Note that for
> the 64-bit ABIs sometimes the names t0-t4 shadow a4-a7. So for a 64-bit
> kernel, you can use: $1, $2, $3, $12, $13, $14, $15, $24, $25, noting
> that at == $1 and contains the callers ra. For a 32-bit kernel you can
> add $8, $9, $10, and $11
>
> This whole thing seems a little fragile.
Maybe scanning the instructions should stop at the beginning of the function
based on the kernel's symbol table. I am not sure if we can establish any
other stopping condition without affecting performance too much.
Speaking of performance, -pg also affects the instruction scheduling freedom
of the compiler in the prologue. With profiling, we limit optimizations not
to move instructions in and out of the prologue.
Also note that for functions invoked via tail call you won't get an exit
event. E.g. if bar is tail-called from foo:
foo entered
bar entered
foo/bar exited
However, this is not MIPS-specific and you can always disable tail calls
with -fno-optimize-sibling-calls.
Adam
On Thu, 2009-10-22 at 13:30 -0700, Adam Nemet wrote:
> Maybe scanning the instructions should stop at the beginning of the function
> based on the kernel's symbol table. I am not sure if we can establish any
> other stopping condition without affecting performance too much.
How would we check the kernel symbol table? Remember, this happens at
_every_ function call.
>
> Speaking of performance, -pg also affects the instruction scheduling freedom
> of the compiler in the prologue. With profiling, we limit optimizations not
> to move instructions in and out of the prologue.
>
> Also note that for functions invoked via tail call you won't get an exit
> event. E.g. if bar is tail-called from foo:
>
> foo entered
> bar entered
> foo/bar exited
>
> However, this is not MIPS-specific and you can always disable tail calls
> with -fno-optimize-sibling-calls.
The question is, would bar have a _mcount call? So far, we have not had
any issues with this on either x86 nor PPC.
/me knocks on wood.
-- Steve
On Thu, Oct 22, 2009 at 04:52:06PM -0400, Steven Rostedt wrote:
> On Thu, 2009-10-22 at 13:30 -0700, Adam Nemet wrote:
> >
> > Speaking of performance, -pg also affects the instruction scheduling freedom
> > of the compiler in the prologue. With profiling, we limit optimizations not
> > to move instructions in and out of the prologue.
> >
> > Also note that for functions invoked via tail call you won't get an exit
> > event. E.g. if bar is tail-called from foo:
> >
> > foo entered
> > bar entered
> > foo/bar exited
> >
> > However, this is not MIPS-specific and you can always disable tail calls
> > with -fno-optimize-sibling-calls.
Ouch..
> The question is, would bar have a _mcount call? So far, we have not had
> any issues with this on either x86 nor PPC.
Nothing would prevent that I guess. I mean, we are doing a very specific
use of -pg, and common uses wouldn't require to disable the mcount call on
bar in this situation, so it's not something that -pg is supposed to care
about.
> /me knocks on wood.
Me too (but not so hard...)
Steven Rostedt writes:
> On Thu, 2009-10-22 at 13:30 -0700, Adam Nemet wrote:
> > Also note that for functions invoked via tail call you won't get an exit
> > event. E.g. if bar is tail-called from foo:
> >
> > foo entered
> > bar entered
> > foo/bar exited
> >
> > However, this is not MIPS-specific and you can always disable tail calls
> > with -fno-optimize-sibling-calls.
>
> The question is, would bar have a _mcount call? So far, we have not had
> any issues with this on either x86 nor PPC.
Yes, bar will have an _mcount call. The difference is that bar will return to
foo's caller directly rather than to foo first.
Adam
On Thu, 2009-10-22 at 14:29 -0700, Adam Nemet wrote:
> Steven Rostedt writes:
> > On Thu, 2009-10-22 at 13:30 -0700, Adam Nemet wrote:
> > > Also note that for functions invoked via tail call you won't get an exit
> > > event. E.g. if bar is tail-called from foo:
> > >
> > > foo entered
> > > bar entered
> > > foo/bar exited
> > >
> > > However, this is not MIPS-specific and you can always disable tail calls
> > > with -fno-optimize-sibling-calls.
> >
> > The question is, would bar have a _mcount call? So far, we have not had
> > any issues with this on either x86 nor PPC.
>
> Yes, bar will have an _mcount call. The difference is that bar will return to
> foo's caller directly rather than to foo first.
I guess the best bet is to have CONFIG_FUNCTION_GRAPH enable
-fno-optimize-sibling-calls and be done with it.
-- Steve
David Daney <[email protected]> writes:
> Wu Zhangjin wrote:
>> On Wed, 2009-10-21 at 11:24 -0400, Steven Rostedt wrote:
> [...]
>>>> +
>>>> +NESTED(_mcount, PT_SIZE, ra)
>>>> + RESTORE_SP_FOR_32BIT
>>>> + PTR_LA t0, ftrace_stub
>>>> + PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
>>> Is t0 and t1 safe for mcount to use? Remember, mcount does not follow
>>> the dynamics of C function ABI.
>>
>> So, perhaps we can use the saved registers(a0,a1...) instead.
>>
>
> a0..a7 may not always be saved.
>
> You can use at, v0, v1 and all the temporary registers. Note that for
> the 64-bit ABIs sometimes the names t0-t4 shadow a4-a7. So for a 64-bit
> kernel, you can use: $1, $2, $3, $12, $13, $14, $15, $24, $25, noting
> that at == $1 and contains the callers ra. For a 32-bit kernel you can
> add $8, $9, $10, and $11
>
> This whole thing seems a little fragile.
>
> I think it might be a good idea to get input from Richard Sandiford,
> and/or Adam Nemet about this approach (so I add them to the CC).
>
> This e-mail thread starts here:
>
> http://www.linux-mips.org/archives/linux-mips/2009-10/msg00286.html
>
> and here:
>
> http://www.linux-mips.org/archives/linux-mips/2009-10/msg00290.html
I'm not sure that the "search for a save of RA" thing is really a good idea.
The last version of that seemed to be "assume that any register stores
will be in a block that immediately precedes the move into RA", but even
if that's true now, it might not be in future. And as Wu Zhangjin says,
it doesn't cope with long calls, where the target address is loaded
into a temporary register before the call.
FWIW, I'd certainly be happy to make GCC pass an additional parameter
to _mcount. The parameter could give the address of the return slot,
or null for leaf functions. In almost all cases[*], there would be
no overhead, since the move would go in the delay slot of the call.
[*] Meaning when the frame is <=32k. ;) I'm guessing you never
get anywhere near that, and if you did, the scan thing wouldn't
work anyway.
The new behaviour could be controlled by a command-line option,
which would also give linux a cheap way of checking whether the
feature is available.
Richard
On Thu, 2009-10-22 at 17:55 -0400, Steven Rostedt wrote:
> On Thu, 2009-10-22 at 14:29 -0700, Adam Nemet wrote:
> > Steven Rostedt writes:
> > > On Thu, 2009-10-22 at 13:30 -0700, Adam Nemet wrote:
> > > > Also note that for functions invoked via tail call you won't get an exit
> > > > event. E.g. if bar is tail-called from foo:
> > > >
> > > > foo entered
> > > > bar entered
> > > > foo/bar exited
> > > >
> > > > However, this is not MIPS-specific and you can always disable tail calls
> > > > with -fno-optimize-sibling-calls.
> > >
> > > The question is, would bar have a _mcount call? So far, we have not had
> > > any issues with this on either x86 nor PPC.
> >
> > Yes, bar will have an _mcount call. The difference is that bar will return to
> > foo's caller directly rather than to foo first.
>
> I guess the best bet is to have CONFIG_FUNCTION_GRAPH enable
> -fno-optimize-sibling-calls and be done with it.
>
Hello, This did for us:
Makefile:
ifdef CONFIG_FRAME_POINTER
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
else
KBUILD_CFLAGS += -fomit-frame-pointer
endif
So, the only thing we need to do is enabling CONFIG_FRAME_POINTER.
Seems it was selected by FTRACE by default.
Regards,
Wu Zhangjin
On Thu, 2009-10-22 at 11:34 -0700, David Daney wrote:
> Wu Zhangjin wrote:
> > On Wed, 2009-10-21 at 11:24 -0400, Steven Rostedt wrote:
> [...]
> >>> +
> >>> +NESTED(_mcount, PT_SIZE, ra)
> >>> + RESTORE_SP_FOR_32BIT
> >>> + PTR_LA t0, ftrace_stub
> >>> + PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
> >> Is t0 and t1 safe for mcount to use? Remember, mcount does not follow
> >> the dynamics of C function ABI.
> >
> > So, perhaps we can use the saved registers(a0,a1...) instead.
> >
>
> a0..a7 may not always be saved.
>
> You can use at, v0, v1 and all the temporary registers. Note that for
> the 64-bit ABIs sometimes the names t0-t4 shadow a4-a7. So for a 64-bit
> kernel, you can use: $1, $2, $3, $12, $13, $14, $15, $24, $25, noting
> that at == $1 and contains the callers ra. For a 32-bit kernel you can
> add $8, $9, $10, and $11
before the profiling source code(jal _mcount), there are only necessary
stack operations, nobody have touched the GPRs, so, I think we are safe
to use any of them, that's why the t0,t1 I have used not fail. and
'Cause the a0,a1 is used as the arguments to the real profiling
function, so, I will keep back to use t0, t1, as David Daney said $12,
$13 in 64bit and $8, $9 in 32bit.
Thanks!
Regards,
Wu Zhangjin
Hi,
On Thu, 2009-10-22 at 23:17 +0100, Richard Sandiford wrote:
> David Daney <[email protected]> writes:
[...]
> > and here:
> >
> > http://www.linux-mips.org/archives/linux-mips/2009-10/msg00290.html
>
> I'm not sure that the "search for a save of RA" thing is really a good idea.
> The last version of that seemed to be "assume that any register stores
> will be in a block that immediately precedes the move into RA", but even
> if that's true now, it might not be in future. And as Wu Zhangjin says,
> it doesn't cope with long calls, where the target address is loaded
> into a temporary register before the call.
>
-mlong-calls works with the current implementation of static function
tracer and function graph tracer for MIPS, just tried them, and module
support is supported by default with -mlong-calls, let's have a look at
the dumped code with -mlong-calls, only a few difference.
ffffffff80241520 <copy_process>:
ffffffff80241520: 67bdff40 daddiu sp,sp,-192
ffffffff80241524: ffbe00b0 sd s8,176(sp)
ffffffff80241528: 03a0f02d move s8,sp
ffffffff8024152c: ffbf00b8 sd ra,184(sp)
ffffffff80241530: ffb700a8 sd s7,168(sp)
ffffffff80241534: ffb600a0 sd s6,160(sp)
ffffffff80241538: ffb50098 sd s5,152(sp)
ffffffff8024153c: ffb40090 sd s4,144(sp)
ffffffff80241540: ffb30088 sd s3,136(sp)
ffffffff80241544: ffb20080 sd s2,128(sp)
ffffffff80241548: ffb10078 sd s1,120(sp)
ffffffff8024154c: ffb00070 sd s0,112(sp)
ffffffff80241550: 3c038021 lui v1,0x8021
ffffffff80241554: 64631750 daddiu v1,v1,5968
ffffffff80241558: 03e0082d move at,ra
ffffffff8024155c: 0060f809 jalr v1
so, the only left job is making dynamic function tracer work with
-mlong-calls, I think it's not that complex, after using -mlong-calls,
we need to search "move at,ra; jalr v1" instead of "jal _mcount", and
also, some relative job need to do. will try to make it work next week.
> FWIW, I'd certainly be happy to make GCC pass an additional parameter
> to _mcount. The parameter could give the address of the return slot,
> or null for leaf functions. In almost all cases[*], there would be
> no overhead, since the move would go in the delay slot of the call.
>
> [*] Meaning when the frame is <=32k. ;) I'm guessing you never
> get anywhere near that, and if you did, the scan thing wouldn't
> work anyway.
>
> The new behaviour could be controlled by a command-line option,
> which would also give linux a cheap way of checking whether the
> feature is available.
I like your suggestion, and I have tried to make gcc do something like
this before your reply.
orig:
move at,ra
jal _mcount
new:
sd ra,184(sp)
...
move at, ra
jal _mcount
lui ra, 184 --> This is new
so, in a non-leaf function, the at register stored the stack offset of
the return address(range from 0 to PT_SIZE). in a leaf function, it is
the return address itself(at least bigger than PT_SIZE). we are easier
to distinguish them. and only a few lines of source code need to be
added for gcc.
Regards,
Wu Zhangjin
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi (revision 153502)
+++ gcc/doc/invoke.texi (working copy)
@@ -709,7 +709,7 @@ Objective-C and Objective-C++ Dialects}.
-mbranch-cost=@var{num} -mbranch-likely -mno-branch-likely @gol
-mfp-exceptions -mno-fp-exceptions @gol
-mvr4130-align -mno-vr4130-align -msynci -mno-synci @gol
--mrelax-pic-calls -mno-relax-pic-calls}
+-mrelax-pic-calls -mno-relax-pic-calls -mmcount-raloc}
@emph{MMIX Options}
@gccoptlist{-mlibfuncs -mno-libfuncs -mepsilon -mno-epsilon -mabi=gnu @gol
@@ -14192,6 +14192,20 @@ an assembler and a linker that supports
directive and @code{-mexplicit-relocs} is in effect. With
@code{-mno-explicit-relocs}, this optimization can be performed by the
assembler and the linker alone without help from the compiler.
+
+@item -mmcount-raloc
+@itemx -mno-mcount-raloc
+@opindex mmcount-raloc
+@opindex mno-mcount-raloc
+Emit (do not emit) code to pass the offset (from the stack pointer) of
+the return address save location to @code{_mcount}. The default is
+@option{-mno-mcount-raloc}.
+
+@option{-mmcount-raloc} can be used in conjunction with @option{-pg}
+and a specially coded @code{_mcount} function to record function exit
+by replacing the return address on the stack with a pointer to the
+function exit profiling function.
+
@end table
@node MMIX Options
Index: gcc/config/mips/mips.opt
===================================================================
--- gcc/config/mips/mips.opt (revision 153502)
+++ gcc/config/mips/mips.opt (working copy)
@@ -208,6 +208,10 @@ mlong64
Target Report RejectNegative Mask(LONG64)
Use a 64-bit long type
+mmcount-raloc
+Target Report Var(TARGET_RALOC)
+Pass the offset from sp of the ra save location to _mcount in $12
+
mmemcpy
Target Report Mask(MEMCPY)
Don't optimize block moves
Index: gcc/config/mips/mips-protos.h
===================================================================
--- gcc/config/mips/mips-protos.h (revision 153502)
+++ gcc/config/mips/mips-protos.h (working copy)
@@ -344,5 +344,6 @@ extern bool mips_eh_uses (unsigned int);
extern bool mips_epilogue_uses (unsigned int);
extern void mips_final_prescan_insn (rtx, rtx *, int);
extern int mips_trampoline_code_size (void);
+extern void mips_function_profiler (FILE *);
#endif /* ! GCC_MIPS_PROTOS_H */
Index: gcc/config/mips/mips.c
===================================================================
--- gcc/config/mips/mips.c (revision 153502)
+++ gcc/config/mips/mips.c (working copy)
@@ -319,6 +319,9 @@ struct GTY(()) mips_frame_info {
HOST_WIDE_INT acc_sp_offset;
HOST_WIDE_INT cop0_sp_offset;
+ /* Similar, but the value passed to _mcount. */
+ HOST_WIDE_INT ra_fp_offset;
+
/* The offset of arg_pointer_rtx from the bottom of the frame. */
HOST_WIDE_INT arg_pointer_offset;
@@ -9616,6 +9619,9 @@ mips_for_each_saved_gpr_and_fpr (HOST_WI
for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
{
+ /* Record the ra offset for use by mips_function_profiler. */
+ if (regno == RETURN_ADDR_REGNUM)
+ cfun->machine->frame.ra_fp_offset = offset + sp_offset;
mips_save_restore_reg (word_mode, regno, offset, fn);
offset -= UNITS_PER_WORD;
}
@@ -16088,6 +16094,86 @@ mips_trampoline_init (rtx m_tramp, tree
emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
emit_insn (gen_clear_cache (addr, end_addr));
}
+
+/* Return true if the loading of $12 can be done in the jal/jalr delay
+ slot. We can do this only if the jal will expand to a single
+ instruction and only a single instruction is required to load the
+ value to $12. */
+
+static bool mips_raloc_in_delay_slot_p(void)
+{
+ return (SMALL_OPERAND_UNSIGNED(cfun->machine->frame.ra_fp_offset)
+ && !TARGET_USE_GOT);
+}
+
+/* Implement FUNCTION_PROFILER. */
+
+void mips_function_profiler (FILE *file)
+{
+ int emit_small_ra_offset;
+
+ emit_small_ra_offset = 0;
+
+ if (TARGET_MIPS16)
+ sorry ("mips16 function profiling");
+ if (TARGET_LONG_CALLS)
+ {
+ /* For TARGET_LONG_CALLS use $3 for the address of _mcount. */
+ if (Pmode == DImode)
+ fprintf (file, "\tdla\t%s,_mcount\n", reg_names[3]);
+ else
+ fprintf (file, "\tla\t%s,_mcount\n", reg_names[3]);
+ }
+ if (TARGET_RALOC)
+ {
+ /* If TARGET_RALOC load $12 with the offset of the ra save
+ location. */
+ if (mips_raloc_in_delay_slot_p())
+ emit_small_ra_offset = 1;
+ else
+ {
+ if (Pmode == DImode)
+ fprintf (file, "\tdli\t%s,%d\t\t# offset of ra\n", reg_names[12],
+ cfun->machine->frame.ra_fp_offset);
+ else
+ fprintf (file, "\tli\t%s,%d\t\t# offset of ra\n", reg_names[12],
+ cfun->machine->frame.ra_fp_offset);
+ }
+ }
+ mips_push_asm_switch (&mips_noat);
+ fprintf (file, "\tmove\t%s,%s\t\t# save current return address\n",
+ reg_names[AT_REGNUM], reg_names[RETURN_ADDR_REGNUM]);
+ /* _mcount treats $2 as the static chain register. */
+ if (cfun->static_chain_decl != NULL)
+ fprintf (file, "\tmove\t%s,%s\n", reg_names[2],
+ reg_names[STATIC_CHAIN_REGNUM]);
+ if (!TARGET_NEWABI)
+ {
+ fprintf (file,
+ "\t%s\t%s,%s,%d\t\t# _mcount pops 2 words from stack\n",
+ TARGET_64BIT ? "dsubu" : "subu",
+ reg_names[STACK_POINTER_REGNUM],
+ reg_names[STACK_POINTER_REGNUM],
+ Pmode == DImode ? 16 : 8);
+ }
+ if (emit_small_ra_offset)
+ mips_push_asm_switch (&mips_noreorder);
+ if (TARGET_LONG_CALLS)
+ fprintf (file, "\tjalr\t%s\n", reg_names[3]);
+ else
+ fprintf (file, "\tjal\t_mcount\n");
+ if (emit_small_ra_offset)
+ {
+ fprintf (file, "\tori\t%s,%s,%d\t\t# offset of ra\n",
+ reg_names[12], reg_names[0], cfun->machine->frame.ra_fp_offset);
+ mips_pop_asm_switch (&mips_noreorder);
+ }
+ mips_pop_asm_switch (&mips_noat);
+ /* _mcount treats $2 as the static chain register. */
+ if (cfun->static_chain_decl != NULL)
+ fprintf (file, "\tmove\t%s,%s\n", reg_names[STATIC_CHAIN_REGNUM],
+ reg_names[2]);
+}
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
Index: gcc/config/mips/mips.h
===================================================================
--- gcc/config/mips/mips.h (revision 153502)
+++ gcc/config/mips/mips.h (working copy)
@@ -2372,44 +2372,7 @@ typedef struct mips_args {
/* Output assembler code to FILE to increment profiler label # LABELNO
for profiling a function entry. */
-#define FUNCTION_PROFILER(FILE, LABELNO) \
-{ \
- if (TARGET_MIPS16) \
- sorry ("mips16 function profiling"); \
- if (TARGET_LONG_CALLS) \
- { \
- /* For TARGET_LONG_CALLS use $3 for the address of _mcount. */ \
- if (Pmode == DImode) \
- fprintf (FILE, "\tdla\t%s,_mcount\n", reg_names[GP_REG_FIRST + 3]); \
- else \
- fprintf (FILE, "\tla\t%s,_mcount\n", reg_names[GP_REG_FIRST + 3]); \
- } \
- mips_push_asm_switch (&mips_noat); \
- fprintf (FILE, "\tmove\t%s,%s\t\t# save current return address\n", \
- reg_names[AT_REGNUM], reg_names[RETURN_ADDR_REGNUM]); \
- /* _mcount treats $2 as the static chain register. */ \
- if (cfun->static_chain_decl != NULL) \
- fprintf (FILE, "\tmove\t%s,%s\n", reg_names[2], \
- reg_names[STATIC_CHAIN_REGNUM]); \
- if (!TARGET_NEWABI) \
- { \
- fprintf (FILE, \
- "\t%s\t%s,%s,%d\t\t# _mcount pops 2 words from stack\n", \
- TARGET_64BIT ? "dsubu" : "subu", \
- reg_names[STACK_POINTER_REGNUM], \
- reg_names[STACK_POINTER_REGNUM], \
- Pmode == DImode ? 16 : 8); \
- } \
- if (TARGET_LONG_CALLS) \
- fprintf (FILE, "\tjalr\t%s\n", reg_names[GP_REG_FIRST + 3]); \
- else \
- fprintf (FILE, "\tjal\t_mcount\n"); \
- mips_pop_asm_switch (&mips_noat); \
- /* _mcount treats $2 as the static chain register. */ \
- if (cfun->static_chain_decl != NULL) \
- fprintf (FILE, "\tmove\t%s,%s\n", reg_names[STATIC_CHAIN_REGNUM], \
- reg_names[2]); \
-}
+#define FUNCTION_PROFILER(FILE, LABELNO) mips_function_profiler ((FILE))
/* The profiler preserves all interesting registers, including $31. */
#define MIPS_SAVE_REG_FOR_PROFILING_P(REGNO) false
Thanks for the patch.
David Daney <[email protected]> writes:
> Richard Sandiford wrote:
>> David Daney <[email protected]> writes:
>>> Wu Zhangjin wrote:
>>>> On Wed, 2009-10-21 at 11:24 -0400, Steven Rostedt wrote:
>>> [...]
>>>>>> +
>>>>>> +NESTED(_mcount, PT_SIZE, ra)
>>>>>> + RESTORE_SP_FOR_32BIT
>>>>>> + PTR_LA t0, ftrace_stub
>>>>>> + PTR_L t1, ftrace_trace_function /* please don't use t1 later, safe? */
>>>>> Is t0 and t1 safe for mcount to use? Remember, mcount does not follow
>>>>> the dynamics of C function ABI.
>>>> So, perhaps we can use the saved registers(a0,a1...) instead.
>>>>
>>> a0..a7 may not always be saved.
>>>
>>> You can use at, v0, v1 and all the temporary registers. Note that for
>>> the 64-bit ABIs sometimes the names t0-t4 shadow a4-a7. So for a 64-bit
>>> kernel, you can use: $1, $2, $3, $12, $13, $14, $15, $24, $25, noting
>>> that at == $1 and contains the callers ra. For a 32-bit kernel you can
>>> add $8, $9, $10, and $11
>>>
>>> This whole thing seems a little fragile.
>>>
>>> I think it might be a good idea to get input from Richard Sandiford,
>>> and/or Adam Nemet about this approach (so I add them to the CC).
>>>
>>> This e-mail thread starts here:
>>>
>>> http://www.linux-mips.org/archives/linux-mips/2009-10/msg00286.html
>>>
>>> and here:
>>>
>>> http://www.linux-mips.org/archives/linux-mips/2009-10/msg00290.html
>>
>> I'm not sure that the "search for a save of RA" thing is really a good idea.
>> The last version of that seemed to be "assume that any register stores
>> will be in a block that immediately precedes the move into RA", but even
>> if that's true now, it might not be in future. And as Wu Zhangjin says,
>> it doesn't cope with long calls, where the target address is loaded
>> into a temporary register before the call.
>>
>> FWIW, I'd certainly be happy to make GCC pass an additional parameter
>> to _mcount. The parameter could give the address of the return slot,
>> or null for leaf functions. In almost all cases[*], there would be
>> no overhead, since the move would go in the delay slot of the call.
>>
>> [*] Meaning when the frame is <=32k. ;) I'm guessing you never
>> get anywhere near that, and if you did, the scan thing wouldn't
>> work anyway.
>>
>> The new behaviour could be controlled by a command-line option,
>> which would also give linux a cheap way of checking whether the
>> feature is available.
>>
>
> How about this patch, I think it does what you suggest.
>
> When we pass -pg -mmcount-raloc, the address of the return address
> relative to sp is passed in $12 to _mcount. If the return address is
> not saved, $12 will be zero. I think this will work as registers are
> never saved with an offset of zero. $12 is a temporary register that is
> not part of the ABI.
Hmm, well, the suggestion was to pass a pointer rather than an offset,
but both you and Wu Zhangjin seem to prefer the offset. Is there a
reason for that? I suggested a pointer because
(a) they're more C-like
(b) they're just as quick and easy to compute
(c) MIPS doesn't have indexed addresses (well, except for a few
special cases) so the callee is going to have to compute the
pointer at some stage anyway
(It sounds from Wu Zhangjin's reply like he'd alread suggested the
offset thing before I sent my message. If so, sorry for not using
that earlier message as context.)
> + if (TARGET_RALOC)
> + {
> + /* If TARGET_RALOC load $12 with the offset of the ra save
> + location. */
> + if (mips_raloc_in_delay_slot_p())
> + emit_small_ra_offset = 1;
> + else
> + {
> + if (Pmode == DImode)
> + fprintf (file, "\tdli\t%s,%d\t\t# offset of ra\n", reg_names[12],
> + cfun->machine->frame.ra_fp_offset);
> + else
> + fprintf (file, "\tli\t%s,%d\t\t# offset of ra\n", reg_names[12],
> + cfun->machine->frame.ra_fp_offset);
> + }
> + }
We shouldn't need to do the delay slot dance. With either the pointer
((D)LA) or offset ((D)LI) approach, the macro won't use $at, so we can
insert the new code immediately before the jump, leaving the assembler
to fill the delay slot. This is simpler and should mean that the delay
slot gets filled more often in the multi-insn-macro cases.
Looks good otherwise, but I'd be interested in other suggestions for
the option name. I kept misreading "raloc" as a typo for "reloc".
Richard
Hi,
On Sat, 2009-10-24 at 10:12 +0100, Richard Sandiford wrote:
> Thanks for the patch.
[...]
> > How about this patch, I think it does what you suggest.
> >
> > When we pass -pg -mmcount-raloc, the address of the return address
> > relative to sp is passed in $12 to _mcount. If the return address is
> > not saved, $12 will be zero. I think this will work as registers are
> > never saved with an offset of zero. $12 is a temporary register that is
> > not part of the ABI.
>
> Hmm, well, the suggestion was to pass a pointer rather than an offset,
> but both you and Wu Zhangjin seem to prefer the offset. Is there a
> reason for that? I suggested a pointer because
>
> (a) they're more C-like
> (b) they're just as quick and easy to compute
> (c) MIPS doesn't have indexed addresses (well, except for a few
> special cases) so the callee is going to have to compute the
> pointer at some stage anyway
>
Agree with you.
if not with -fno-omit-frame-pointer, we also need to calculate the frame
pointer, and then plus it with the offset. with pointer, we can get it
directly, but it may need a more instruction(lui..., addiu...) for
loading the pointer. of course, at last, the pointer will save more time
for us :-)
so, David, could you please use pointer instead? and then I will test it
asap(cloning the latest gcc currently). thanks!
> (It sounds from Wu Zhangjin's reply like he'd alread suggested the
> offset thing before I sent my message. If so, sorry for not using
> that earlier message as context.)
>
It doesn't matter, Seems at that time, you were not added in the CC
list, but added by David Daney later.
> > + if (TARGET_RALOC)
> > + {
> > + /* If TARGET_RALOC load $12 with the offset of the ra save
> > + location. */
> > + if (mips_raloc_in_delay_slot_p())
> > + emit_small_ra_offset = 1;
> > + else
> > + {
> > + if (Pmode == DImode)
> > + fprintf (file, "\tdli\t%s,%d\t\t# offset of ra\n", reg_names[12],
> > + cfun->machine->frame.ra_fp_offset);
> > + else
> > + fprintf (file, "\tli\t%s,%d\t\t# offset of ra\n", reg_names[12],
> > + cfun->machine->frame.ra_fp_offset);
> > + }
> > + }
>
> We shouldn't need to do the delay slot dance. With either the pointer
> ((D)LA) or offset ((D)LI) approach, the macro won't use $at, so we can
> insert the new code immediately before the jump, leaving the assembler
> to fill the delay slot. This is simpler and should mean that the delay
> slot gets filled more often in the multi-insn-macro cases.
>
> Looks good otherwise, but I'd be interested in other suggestions for
> the option name. I kept misreading "raloc" as a typo for "reloc".
>
The same misreading to me, what about -mmcount-ra-loc? add one "-", or
-mcount-ra-location?
BTW: Just made dynamic function tracer for MIPS support module tracing
with the help of -mlong-calls. after some more tests, I will send it as
-v5 revision later. hope the -v6 revision work with this new feature of
gcc from David Daney.
Regards,
Wu Zhangjin
(Added linux-mips mailing list and the other people to the CC list.)
On Thu, 2009-10-22 at 04:37 -0700, pajko wrote:
[...]
> >
>
> All this stuff can be avoided having PROFILE_BEFORE_PROLOGUE enabled in GCC
> (gcc/config/mips/mips.h), like it is done one several other architectures.
> Some of them even require it to be set for a working _mcount.
> Putting the call of _mcount before the function prologue should make no harm
> (it's working for me), and this way RA can be hooked for function graph
> tracing
> before it is saved to stack in the function prologue. Thus there will be no
> difference between leaf and non-leaf functions.
Good idea! Seems PROFILE_BEFORE_PROLOGUE is commented by default in
gcc/config/mips/mips.h of gcc 4.4:
/* #define PROFILE_BEFORE_PROLOGUE */
if we enable this macro, the handling will be the same to non-leaf and
leaf function, so, David's patch to gcc is not need again.
> This change is no big deal as GCC has to be patched anyway to get dynamic
> ftracing (and with 4.2.1 I had to patch it to get ftracing in modules
> working
> - using -mlong-calls -, but it's possible that this is fixed in the newer
> releases).
>
do you mean if enabling PROFILE_BEFORE_PROLOGUE, there will be some
problems with module support using -mlong-calls?
I have made dynamic ftrace support module with -mlong-calls, I think, if
we move the codes before prologue, it should have no side effect.
lui v1,HI16bit (&_mcount -> v1)
daddiu v1,v1,LOW16bit
move at,ra
jalr v1
Regards,
Wu Zhangjin
On Sun, Oct 25, 2009 at 11:48 AM, Wu Zhangjin <[email protected]> wrote:
>
> do you mean if enabling PROFILE_BEFORE_PROLOGUE, there will be some
> problems with module support using -mlong-calls?
>
No, there are no problems. I've tested it on friday, and function
graph tracing was working correctly.
I meant to say that 4.2.1 we use does not generate correct profile
calls from kernel modules. Maybe this issue was fixed in newer
releases, I did not check. I've applied a patch (don't remember where
have I found that, maybe it was created by you) to our toolchain
several months ago.
I was thinking about dynamic tracing, and I think a toolchain patch
can be avoided completely. We only need to make difference between
"jal _mcount" and "jalr v1"-style mcount calls when replacing them
with "nop" instructions in the code-patching function called by
ftrace_convert_nops(). This can be done in 2 ways:
1) keeping old instructions - takes extra memory, not an option
2) using 2 separate instructions to replace with. One of them could be
the normal NOP instruction, which expands to "sll r0, r0, 0". For the
other we could use "sll r0, r0, 1" but as it has already special
meaning (SSNOP) a better candidate could be something like "sll r1,
r1, 0". This way we can decide which instruction to patch in when
tracing is enabled for a function, eg. when the code patcher
encounters a "sll r0, r0, 0" it emits a function call using JAL and
when it encounters "sll r1, r1, 0" it emits a function call using
"JALR v1".
Regards,
Patrik
On Sun, 2009-10-25 at 14:37 +0100, Patrik Kluba wrote:
> On Sun, Oct 25, 2009 at 11:48 AM, Wu Zhangjin <[email protected]> wrote:
> >
> > do you mean if enabling PROFILE_BEFORE_PROLOGUE, there will be some
> > problems with module support using -mlong-calls?
> >
>
> No, there are no problems. I've tested it on friday, and function
> graph tracing was working correctly.
> I meant to say that 4.2.1 we use does not generate correct profile
> calls from kernel modules. Maybe this issue was fixed in newer
> releases, I did not check. I've applied a patch (don't remember where
> have I found that, maybe it was created by you) to our toolchain
> several months ago.
I have never sent a patch to gcc before :-) but perhaps somebody have
fixed it for us. so, the left job is hoping somebody enable
PROFILE_BEFORE_PROLOGUE for MIPS in the next version of gcc if there is
no side effect, and then we can hijack the return address of non-leaf &
leaf function directly in the same way in _mcount.
>
> I was thinking about dynamic tracing, and I think a toolchain patch
> can be avoided completely. We only need to make difference between
> "jal _mcount" and "jalr v1"-style mcount calls when replacing them
> with "nop" instructions in the code-patching function called by
> ftrace_convert_nops(). This can be done in 2 ways:
> 1) keeping old instructions - takes extra memory, not an option
> 2) using 2 separate instructions to replace with. One of them could be
> the normal NOP instruction, which expands to "sll r0, r0, 0". For the
> other we could use "sll r0, r0, 1" but as it has already special
> meaning (SSNOP) a better candidate could be something like "sll r1,
> r1, 0". This way we can decide which instruction to patch in when
> tracing is enabled for a function, eg. when the code patcher
> encounters a "sll r0, r0, 0" it emits a function call using JAL and
> when it encounters "sll r1, r1, 0" it emits a function call using
> "JALR v1".
If only thinking about dynamic tracing, no patch for gcc needed,
-mlong-calls is enough, I have done it via a "stupid" trick, will send
the patchset out asap :-)
Regards,
Wu Zhangjin
Wu Zhangjin <[email protected]> writes:
> (Added linux-mips mailing list and the other people to the CC list.)
>
> On Thu, 2009-10-22 at 04:37 -0700, pajko wrote:
> [...]
>> >
>>
>> All this stuff can be avoided having PROFILE_BEFORE_PROLOGUE enabled in GCC
>> (gcc/config/mips/mips.h), like it is done one several other architectures.
>> Some of them even require it to be set for a working _mcount.
>> Putting the call of _mcount before the function prologue should make no harm
>> (it's working for me), and this way RA can be hooked for function graph
>> tracing
>> before it is saved to stack in the function prologue. Thus there will be no
>> difference between leaf and non-leaf functions.
>
> Good idea! Seems PROFILE_BEFORE_PROLOGUE is commented by default in
> gcc/config/mips/mips.h of gcc 4.4:
>
> /* #define PROFILE_BEFORE_PROLOGUE */
>
> if we enable this macro, the handling will be the same to non-leaf and
> leaf function, so, David's patch to gcc is not need again.
Defining PROFILE_BEFORE_PROLOGUE isn't correct for abicalls code,
because "jal _mcount" is a macro that loads _mcount from the
GOT into $25. We don't have access to $28 at the beginning of
the function, and we mustn't clobber the incoming value of $25.
So we could only make this change for non-abicalls code.
It's then a choice between (a) having new non-abicalls-specific
behaviour or (b) going with David's patch. The advantage of
(a) is that the linux code is slightly simpler. The disadvantage
is that it makes the _mcount interface differ between -mabicalls
and -mno-abicalls. And IMO the disadvantage outweights the advantage.
If this new behaviour is useful for linux, it could easily be useful
for userspace too. And with the new PLT support, non-shared abicalls
code is supposed to be link-compatible with non-abicalls code.
I think David's patch is the way to go.
Richard