2014-04-26 08:40:16

by Ding Tianhong

[permalink] [raw]
Subject: [PATCH] arm64: add OProfile support

Add OProfile support for arm64, using the perf backend, and failing back
to generic timer based sampling if PMU interrupt is not supported.

I have test this patch on Cortex-A53 and Cortex-A57 motherboard, the OProfile
could work well by PMU irq or arch timer irq.

Signed-off-by: Xinwei Hu <[email protected]>
Signed-off-by: Ding Tianhong <[email protected]>
---
arch/arm64/Kconfig | 1 +
arch/arm64/Makefile | 2 +
arch/arm64/include/asm/stacktrace.h | 1 +
arch/arm64/kernel/perf_event.c | 13 ++++-
arch/arm64/oprofile/Makefile | 13 +++++
arch/arm64/oprofile/common.c | 106 ++++++++++++++++++++++++++++++++++++
6 files changed, 135 insertions(+), 1 deletion(-)
create mode 100644 arch/arm64/oprofile/Makefile
create mode 100644 arch/arm64/oprofile/common.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e6e4d37..f711445 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -40,6 +40,7 @@ config ARM64
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_MEMBLOCK
select HAVE_PATA_PLATFORM
+ select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2fceb71..6bb3d66 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -48,6 +48,8 @@ core-$(CONFIG_XEN) += arch/arm64/xen/
libs-y := arch/arm64/lib/ $(libs-y)
libs-y += $(LIBGCC)

+drivers-$(CONFIG_OPROFILE) += arch/arm64/oprofile/
+
# Default target when executing plain make
KBUILD_IMAGE := Image.gz
KBUILD_DTBS := dtbs
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 7318f6d..fec7e84 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -19,6 +19,7 @@
struct stackframe {
unsigned long fp;
unsigned long sp;
+ unsigned long lr;
unsigned long pc;
};

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index baf5afb..a8fd1c1 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -51,6 +51,15 @@ static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
/* Set at runtime when we know what CPU type we are. */
static struct arm_pmu *cpu_pmu;

+const char *perf_pmu_name(void)
+{
+ if (!cpu_pmu)
+ return NULL;
+
+ return cpu_pmu->name;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
int
armpmu_get_max_events(void)
{
@@ -640,7 +649,7 @@ enum armv8_pmuv3_perf_types {
ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL = 0x03,
ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS = 0x04,
ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
- ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES = 0x11,
+ ARMV8_PMUV3_PERFCTR_CPU_CYCLES = 0x11,
ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED = 0x12,

/* At least one of the following is required. */
@@ -672,6 +681,8 @@ enum armv8_pmuv3_perf_types {
ARMV8_PMUV3_PERFCTR_BUS_ACCESS = 0x19,
ARMV8_PMUV3_PERFCTR_MEM_ERROR = 0x1A,
ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D,
+
+ ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES = 0XFF,
};

/* PMUv3 HW events mapping. */
diff --git a/arch/arm64/oprofile/Makefile b/arch/arm64/oprofile/Makefile
new file mode 100644
index 0000000..b2215c6
--- /dev/null
+++ b/arch/arm64/oprofile/Makefile
@@ -0,0 +1,13 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+ oprof.o cpu_buffer.o buffer_sync.o \
+ event_buffer.o oprofile_files.o \
+ oprofilefs.o oprofile_stats.o \
+ timer_int.o )
+
+ifeq ($(CONFIG_HW_PERF_EVENTS),y)
+DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
+endif
+
+oprofile-y := $(DRIVER_OBJS) common.o
diff --git a/arch/arm64/oprofile/common.c b/arch/arm64/oprofile/common.c
new file mode 100644
index 0000000..7d1c19c
--- /dev/null
+++ b/arch/arm64/oprofile/common.c
@@ -0,0 +1,106 @@
+/**
+ * @file common.c
+ *
+ * @remark Copyright 2004 Oprofile Authors
+ * @remark Copyright 2010 ARM Ltd.
+ * @remark Read the file COPYING
+ *
+ * @author Zwane Mwaikambo
+ * @author Will Deacon [move to perf]
+ */
+
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/oprofile.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <asm/stacktrace.h>
+#include <linux/uaccess.h>
+
+#include <asm/perf_event.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_HW_PERF_EVENTS
+
+char *op_name_from_perf_id(void)
+{
+ return perf_pmu_name();
+}
+#endif
+
+static int report_trace(struct stackframe *frame, void *d)
+{
+ unsigned int *depth = d;
+
+ if (*depth) {
+ oprofile_add_trace(frame->pc);
+ (*depth)--;
+ }
+
+ return *depth == 0;
+}
+
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct frame_tail *)(xxx->fp)-1
+ */
+struct frame_tail {
+ struct frame_tail *fp;
+ unsigned long sp;
+ unsigned long lr;
+} __attribute__((packed));
+
+static struct frame_tail *user_backtrace(struct frame_tail *tail)
+{
+ struct frame_tail buftail[2];
+
+ /* Also check accessibility of one struct frame_tail beyond */
+ if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+ return NULL;
+ if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail)))
+ return NULL;
+
+ oprofile_add_trace(buftail[0].lr);
+
+ /* frame pointers should strictly progress back up the stack
+ * (towards higher addresses) */
+ if (tail + 1 >= buftail[0].fp)
+ return NULL;
+
+ return buftail[0].fp-1;
+}
+
+static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+ struct frame_tail *tail = ((struct frame_tail *) regs->regs[29]) - 1;
+
+ if (!user_mode(regs)) {
+ struct stackframe frame;
+ frame.fp = regs->regs[29];
+ frame.sp = regs->sp;
+ frame.lr = regs->regs[30];
+ frame.pc = regs->pc;
+ walk_stackframe(&frame, report_trace, &depth);
+ return;
+ }
+
+ while (depth-- && tail && !((unsigned long) tail & 3))
+ tail = user_backtrace(tail);
+}
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+ /* provide backtrace support also in timer mode: */
+ ops->backtrace = arm_backtrace;
+
+ return oprofile_perf_init(ops);
+}
+
+void oprofile_arch_exit(void)
+{
+ oprofile_perf_exit();
+}
--
1.8.0


2014-04-26 09:23:15

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH] arm64: add OProfile support

On 26 Apr 2014, at 09:38, Ding Tianhong <[email protected]> wrote:
> Add OProfile support for arm64, using the perf backend, and failing back
> to generic timer based sampling if PMU interrupt is not supported.
>
> I have test this patch on Cortex-A53 and Cortex-A57 motherboard, the OProfile
> could work well by PMU irq or arch timer irq.

This came up before a few times and we also had an implementation but
decided not to merge it. We should rather get the user space oprofile to
use the perf kernel API.

That?s an old thread, it may have even made it into mainline oprofile
but I haven?t followed the development:

http://marc.info/?l=oprofile-list&m=133002515616302&w=2

Catalin-

2014-04-26 10:23:22

by Ding Tianhong

[permalink] [raw]
Subject: Re: [PATCH] arm64: add OProfile support

On 2014/4/26 17:23, Catalin Marinas wrote:
> On 26 Apr 2014, at 09:38, Ding Tianhong <[email protected]> wrote:
>> Add OProfile support for arm64, using the perf backend, and failing back
>> to generic timer based sampling if PMU interrupt is not supported.
>>
>> I have test this patch on Cortex-A53 and Cortex-A57 motherboard, the OProfile
>> could work well by PMU irq or arch timer irq.
>
> This came up before a few times and we also had an implementation but
> decided not to merge it. We should rather get the user space oprofile to
> use the perf kernel API.
>
> That’s an old thread, it may have even made it into mainline oprofile
> but I haven’t followed the development:
>
> http://marc.info/?l=oprofile-list&m=133002515616302&w=2
>
> Catalin
>
Ok, I will check it and then decide the next step, thanks for your feedback.

Regards
Ding

2014-04-28 02:33:05

by Ding Tianhong

[permalink] [raw]
Subject: Re: [PATCH] arm64: add OProfile support

On 2014/4/26 18:22, Ding Tianhong wrote:
> On 2014/4/26 17:23, Catalin Marinas wrote:
>> On 26 Apr 2014, at 09:38, Ding Tianhong <[email protected]> wrote:
>>> Add OProfile support for arm64, using the perf backend, and failing back
>>> to generic timer based sampling if PMU interrupt is not supported.
>>>
>>> I have test this patch on Cortex-A53 and Cortex-A57 motherboard, the OProfile
>>> could work well by PMU irq or arch timer irq.
>>
>> This came up before a few times and we also had an implementation but
>> decided not to merge it. We should rather get the user space oprofile to
>> use the perf kernel API.
>>
>> That’s an old thread, it may have even made it into mainline oprofile
>> but I haven’t followed the development:
>>
>> http://marc.info/?l=oprofile-list&m=133002515616302&w=2
>>
>> Catalin
>>

Hi Cadtalin:

Sorry I could not find the implementation that not to merge the orpfile support for aarch64 till now, and
I still have questions that the existing code only support oprofile by arch timer event, but not
PMU event, this patch only add HW PMU support for oprofile, it is more accurate and stable, can you
give me more advise and appreciate for your help.

Regards
Ding

> Ok, I will check it and then decide the next step, thanks for your feedback.
>
> Regards
> Ding
>

2014-04-28 19:08:57

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH] arm64: add OProfile support

On Mon, Apr 28, 2014 at 03:32:04AM +0100, Ding Tianhong wrote:
> On 2014/4/26 18:22, Ding Tianhong wrote:
> > On 2014/4/26 17:23, Catalin Marinas wrote:
> >> On 26 Apr 2014, at 09:38, Ding Tianhong <[email protected]> wrote:
> >>> Add OProfile support for arm64, using the perf backend, and failing back
> >>> to generic timer based sampling if PMU interrupt is not supported.
> >>>
> >>> I have test this patch on Cortex-A53 and Cortex-A57 motherboard, the OProfile
> >>> could work well by PMU irq or arch timer irq.
> >>
> >> This came up before a few times and we also had an implementation but
> >> decided not to merge it. We should rather get the user space oprofile to
> >> use the perf kernel API.
> >>
> >> That’s an old thread, it may have even made it into mainline oprofile
> >> but I haven’t followed the development:
> >>
> >> http://marc.info/?l=oprofile-list&m=133002515616302&w=2
> >>
> >> Catalin
> >>
>
> Hi Cadtalin:
>
> Sorry I could not find the implementation that not to merge the orpfile support for aarch64 till now, and
> I still have questions that the existing code only support oprofile by arch timer event, but not
> PMU event, this patch only add HW PMU support for oprofile, it is more accurate and stable, can you
> give me more advise and appreciate for your help.

Just use the operf tool, which is part of the oprofile project. It uses the
perf syscall under the hood and doesn't need any kernel-side additions over
perf.

Will