Hello,
this series adds support for hardware performance counters in counting
and sampling modes as well as for page fault counting. Perf IRQ is handled
as NMI when configured properly (as the only topmost medium-priority
interrupt).
It reorganizes stack tracing code to share it between old oprofile and new
perf interfaces, and improves kernel stack traces both for builtin debug
functions and for gdb.
There's also fixes and cleanups for the areas touched by the new features.
Changes v1->v2:
- make continuous stack changes conditional;
- replace l32i/s32i + add used to access spilled registers with l32e/s32e;
- use -EINVAL instead of -ENOENT for invalid PMU event configuratons;
- fix kernel register spilling code;
- optionally treat perf IRQ as NMI.
Max Filippov (13):
xtensa: clean up Kconfig dependencies for custom cores
xtensa: keep exception/interrupt stack continuous
xtensa: move oprofile stack tracing to stacktrace.c
xtensa: select PERF_USE_VMALLOC for cache-aliasing configurations
xtensa: add profiling IRQ type to xtensa_irq_map
xtensa: count software page fault perf events
xtensa: implement counting and sampling perf events
perf tools: xtensa: add DWARF register names
xtensa: reorganize irq flags tracing
xtensa: fix kernel register spilling
xtensa: don't touch EXC_TABLE_FIXUP in _switch_to
xtensa: implement fake NMI
xtensa: drop unused irq_err_count
arch/xtensa/Kconfig | 22 +-
arch/xtensa/include/asm/atomic.h | 10 +-
arch/xtensa/include/asm/cmpxchg.h | 4 +-
arch/xtensa/include/asm/irqflags.h | 22 +-
arch/xtensa/include/asm/processor.h | 31 ++-
arch/xtensa/include/asm/stacktrace.h | 8 +
arch/xtensa/include/asm/traps.h | 29 +-
arch/xtensa/kernel/Makefile | 1 +
arch/xtensa/kernel/entry.S | 197 ++++++++++----
arch/xtensa/kernel/irq.c | 17 +-
arch/xtensa/kernel/perf_event.c | 454 +++++++++++++++++++++++++++++++
arch/xtensa/kernel/stacktrace.c | 167 +++++++++++-
arch/xtensa/kernel/traps.c | 31 ++-
arch/xtensa/kernel/vectors.S | 10 +-
arch/xtensa/mm/fault.c | 7 +
arch/xtensa/oprofile/backtrace.c | 158 +----------
tools/perf/arch/xtensa/Build | 1 +
tools/perf/arch/xtensa/Makefile | 3 +
tools/perf/arch/xtensa/util/Build | 1 +
tools/perf/arch/xtensa/util/dwarf-regs.c | 25 ++
20 files changed, 954 insertions(+), 244 deletions(-)
create mode 100644 arch/xtensa/kernel/perf_event.c
create mode 100644 tools/perf/arch/xtensa/Build
create mode 100644 tools/perf/arch/xtensa/Makefile
create mode 100644 tools/perf/arch/xtensa/util/Build
create mode 100644 tools/perf/arch/xtensa/util/dwarf-regs.c
--
1.8.1.4
Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/Kconfig | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 87be10e..ba22699 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -61,9 +61,7 @@ config TRACE_IRQFLAGS_SUPPORT
def_bool y
config MMU
- bool
- default n if !XTENSA_VARIANT_CUSTOM
- default XTENSA_VARIANT_MMU if XTENSA_VARIANT_CUSTOM
+ def_bool n
config VARIANT_IRQ_SWITCH
def_bool n
@@ -71,9 +69,6 @@ config VARIANT_IRQ_SWITCH
config HAVE_XTENSA_GPIO32
def_bool n
-config MAY_HAVE_SMP
- def_bool n
-
menu "Processor type and features"
choice
@@ -100,7 +95,6 @@ config XTENSA_VARIANT_DC233C
config XTENSA_VARIANT_CUSTOM
bool "Custom Xtensa processor configuration"
- select MAY_HAVE_SMP
select HAVE_XTENSA_GPIO32
help
Select this variant to use a custom Xtensa processor configuration.
@@ -126,6 +120,7 @@ config XTENSA_VARIANT_MMU
bool "Core variant has a Full MMU (TLB, Pages, Protection, etc)"
depends on XTENSA_VARIANT_CUSTOM
default y
+ select MMU
help
Build a Conventional Kernel with full MMU support,
ie: it supports a TLB with auto-loading, page protection.
@@ -143,7 +138,7 @@ source "kernel/Kconfig.preempt"
config HAVE_SMP
bool "System Supports SMP (MX)"
- depends on MAY_HAVE_SMP
+ depends on XTENSA_VARIANT_CUSTOM
select XTENSA_MX
help
This option is use to indicate that the system-on-a-chip (SOC)
--
1.8.1.4
Restore original a0 in the kernel exception stack frame. This way it
looks like the frame that got interrupt/exception did alloca (copy a0 and
a1 potentially spilled under old stack to the new location as well) to
save registers and then did a call to handler. The point where
interrupt/exception was taken is not in the stack chain, only in pt_regs
(call4 from that address can be simulated to keep it in the stack trace).
Signed-off-by: Max Filippov <[email protected]>
---
Changes v1->v2:
- make continuous stack changes conditional;
- replace l32i/s32i + add used to access spilled registers with l32e/s32e.
arch/xtensa/kernel/entry.S | 59 ++++++++++++++++++++++++++++++++++------------
1 file changed, 44 insertions(+), 15 deletions(-)
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 82bbfa5..b64075f 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -28,6 +28,10 @@
#include <asm/tlbflush.h>
#include <variant/tie-asm.h>
+#if defined(CONFIG_DEBUG_KERNEL) || defined(CONFIG_STACKTRACE_SUPPORT)
+#define XTENSA_CONTINUOUS_STACK
+#endif
+
/* Unimplemented features. */
#undef KERNEL_STACK_OVERFLOW_CHECK
@@ -219,6 +223,10 @@ _user_exception:
2: /* Now, jump to the common exception handler. */
+#ifdef XTENSA_CONTINUOUS_STACK
+ movi a0, 0 # terminate user stack trace with 0
+ wsr a0, depc
+#endif
j common_exception
ENDPROC(user_exception)
@@ -264,6 +272,24 @@ ENTRY(kernel_exception)
.globl _kernel_exception
_kernel_exception:
+ /* Copy spill slots of a0 and a1 to imitate movsp
+ * in order to keep exception stack continuous
+ */
+#ifdef XTENSA_CONTINUOUS_STACK
+ /* Reload previous stack pointer: l32e relative to the current stack
+ * won't reach it, because kernel exception stack frame is definitely
+ * larger than l32e range (64 bytes).
+ */
+ l32i a0, a1, PT_AREG1
+ l32e a3, a0, -16
+ l32e a0, a0, -12
+ s32e a3, a1, -16
+ s32e a0, a1, -12
+
+ l32i a0, a1, PT_AREG0 # restore saved a0
+ wsr a0, depc
+#endif
+
/* Save SAR and turn off single stepping */
movi a2, 0
@@ -346,12 +372,12 @@ common_exception:
s32i a0, a1, PT_EXCCAUSE
s32i a3, a2, EXC_TABLE_FIXUP
- /* All unrecoverable states are saved on stack, now, and a1 is valid,
- * so we can allow exceptions and interrupts (*) again.
- * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
+ /* All unrecoverable states are saved on stack, now, and a1 is valid.
+ * Now we can allow exceptions again. In case we've got an interrupt
+ * PS.INTLEVEL is set to LOCKLEVEL disabling furhter interrupts,
+ * otherwise it's left unchanged.
*
- * (*) We only allow interrupts if they were previously enabled and
- * we're not handling an IRQ
+ * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
*/
rsr a3, ps
@@ -362,28 +388,32 @@ common_exception:
moveqz a3, a2, a0 # a3 = LOCKLEVEL iff interrupt
movi a2, 1 << PS_WOE_BIT
or a3, a3, a2
- rsr a0, exccause
+ rsr a2, exccause
+#ifdef XTENSA_CONTINUOUS_STACK
+ /* restore return address (or 0 if return to userspace) */
+ rsr a0, depc
+#endif
xsr a3, ps
s32i a3, a1, PT_PS # save ps
/* Save lbeg, lend */
- rsr a2, lbeg
+ rsr a4, lbeg
rsr a3, lend
- s32i a2, a1, PT_LBEG
+ s32i a4, a1, PT_LBEG
s32i a3, a1, PT_LEND
/* Save SCOMPARE1 */
#if XCHAL_HAVE_S32C1I
- rsr a2, scompare1
- s32i a2, a1, PT_SCOMPARE1
+ rsr a3, scompare1
+ s32i a3, a1, PT_SCOMPARE1
#endif
/* Save optional registers. */
- save_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
+ save_xtregs_opt a1 a3 a4 a5 a6 a7 PT_XTREGS_OPT
#ifdef CONFIG_TRACE_IRQFLAGS
l32i a4, a1, PT_DEPC
@@ -391,8 +421,7 @@ common_exception:
* while PS.EXCM was set, i.e. interrupts disabled.
*/
bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
- l32i a4, a1, PT_EXCCAUSE
- bnei a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
+ bnei a2, EXCCAUSE_LEVEL1_INTERRUPT, 1f
/* We came here with an interrupt means interrupts were enabled
* and we've just disabled them.
*/
@@ -407,8 +436,8 @@ common_exception:
rsr a4, excsave1
mov a6, a1 # pass stack frame
- mov a7, a0 # pass EXCCAUSE
- addx4 a4, a0, a4
+ mov a7, a2 # pass EXCCAUSE
+ addx4 a4, a2, a4
l32i a4, a4, EXC_TABLE_DEFAULT # load handler
/* Call the second-level handler */
--
1.8.1.4
Old oprofile interface will share user stack tracing with new perf
interface. Move oprofile user/kernel stack tracing to stacktrace.c to
make it possible.
Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/include/asm/stacktrace.h | 8 ++
arch/xtensa/kernel/stacktrace.c | 167 ++++++++++++++++++++++++++++++++++-
arch/xtensa/oprofile/backtrace.c | 158 ++-------------------------------
3 files changed, 182 insertions(+), 151 deletions(-)
diff --git a/arch/xtensa/include/asm/stacktrace.h b/arch/xtensa/include/asm/stacktrace.h
index 6a05fcb..fe06e8e 100644
--- a/arch/xtensa/include/asm/stacktrace.h
+++ b/arch/xtensa/include/asm/stacktrace.h
@@ -33,4 +33,12 @@ void walk_stackframe(unsigned long *sp,
int (*fn)(struct stackframe *frame, void *data),
void *data);
+void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth,
+ int (*kfn)(struct stackframe *frame, void *data),
+ int (*ufn)(struct stackframe *frame, void *data),
+ void *data);
+void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth,
+ int (*ufn)(struct stackframe *frame, void *data),
+ void *data);
+
#endif /* _XTENSA_STACKTRACE_H */
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 7d2c317..7538d80 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -1,11 +1,12 @@
/*
- * arch/xtensa/kernel/stacktrace.c
+ * Kernel and userspace stack tracing.
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2001 - 2013 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
*/
#include <linux/export.h>
#include <linux/sched.h>
@@ -13,6 +14,170 @@
#include <asm/stacktrace.h>
#include <asm/traps.h>
+#include <asm/uaccess.h>
+
+#if IS_ENABLED(CONFIG_OPROFILE) || IS_ENABLED(CONFIG_PERF_EVENTS)
+
+/* Address of common_exception_return, used to check the
+ * transition from kernel to user space.
+ */
+extern int common_exception_return;
+
+/* A struct that maps to the part of the frame containing the a0 and
+ * a1 registers.
+ */
+struct frame_start {
+ unsigned long a0;
+ unsigned long a1;
+};
+
+void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth,
+ int (*ufn)(struct stackframe *frame, void *data),
+ void *data)
+{
+ unsigned long windowstart = regs->windowstart;
+ unsigned long windowbase = regs->windowbase;
+ unsigned long a0 = regs->areg[0];
+ unsigned long a1 = regs->areg[1];
+ unsigned long pc = regs->pc;
+ struct stackframe frame;
+ int index;
+
+ if (!depth--)
+ return;
+
+ frame.pc = pc;
+ frame.sp = a1;
+
+ if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+ return;
+
+ /* Two steps:
+ *
+ * 1. Look through the register window for the
+ * previous PCs in the call trace.
+ *
+ * 2. Look on the stack.
+ */
+
+ /* Step 1. */
+ /* Rotate WINDOWSTART to move the bit corresponding to
+ * the current window to the bit #0.
+ */
+ windowstart = (windowstart << WSBITS | windowstart) >> windowbase;
+
+ /* Look for bits that are set, they correspond to
+ * valid windows.
+ */
+ for (index = WSBITS - 1; (index > 0) && depth; depth--, index--)
+ if (windowstart & (1 << index)) {
+ /* Get the PC from a0 and a1. */
+ pc = MAKE_PC_FROM_RA(a0, pc);
+ /* Read a0 and a1 from the
+ * corresponding position in AREGs.
+ */
+ a0 = regs->areg[index * 4];
+ a1 = regs->areg[index * 4 + 1];
+
+ frame.pc = pc;
+ frame.sp = a1;
+
+ if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+ return;
+ }
+
+ /* Step 2. */
+ /* We are done with the register window, we need to
+ * look through the stack.
+ */
+ if (!depth)
+ return;
+
+ /* Start from the a1 register. */
+ /* a1 = regs->areg[1]; */
+ while (a0 != 0 && depth--) {
+ struct frame_start frame_start;
+ /* Get the location for a1, a0 for the
+ * previous frame from the current a1.
+ */
+ unsigned long *psp = (unsigned long *)a1;
+
+ psp -= 4;
+
+ /* Check if the region is OK to access. */
+ if (!access_ok(VERIFY_READ, psp, sizeof(frame_start)))
+ return;
+ /* Copy a1, a0 from user space stack frame. */
+ if (__copy_from_user_inatomic(&frame_start, psp,
+ sizeof(frame_start)))
+ return;
+
+ pc = MAKE_PC_FROM_RA(a0, pc);
+ a0 = frame_start.a0;
+ a1 = frame_start.a1;
+
+ frame.pc = pc;
+ frame.sp = a1;
+
+ if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+ return;
+ }
+}
+EXPORT_SYMBOL(xtensa_backtrace_user);
+
+void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth,
+ int (*kfn)(struct stackframe *frame, void *data),
+ int (*ufn)(struct stackframe *frame, void *data),
+ void *data)
+{
+ unsigned long pc = regs->depc > VALID_DOUBLE_EXCEPTION_ADDRESS ?
+ regs->depc : regs->pc;
+ unsigned long sp_start, sp_end;
+ unsigned long a0 = regs->areg[0];
+ unsigned long a1 = regs->areg[1];
+
+ sp_start = a1 & ~(THREAD_SIZE - 1);
+ sp_end = sp_start + THREAD_SIZE;
+
+ /* Spill the register window to the stack first. */
+ spill_registers();
+
+ /* Read the stack frames one by one and create the PC
+ * from the a0 and a1 registers saved there.
+ */
+ while (a1 > sp_start && a1 < sp_end && depth--) {
+ struct stackframe frame;
+ unsigned long *psp = (unsigned long *)a1;
+
+ frame.pc = pc;
+ frame.sp = a1;
+
+ if (kernel_text_address(pc) && kfn(&frame, data))
+ return;
+
+ if (pc == (unsigned long)&common_exception_return) {
+ regs = (struct pt_regs *)a1;
+ if (user_mode(regs)) {
+ if (ufn == NULL)
+ return;
+ xtensa_backtrace_user(regs, depth, ufn, data);
+ return;
+ }
+ a0 = regs->areg[0];
+ a1 = regs->areg[1];
+ continue;
+ }
+
+ sp_start = a1;
+
+ pc = MAKE_PC_FROM_RA(a0, pc);
+ a0 = *(psp - 4);
+ a1 = *(psp - 3);
+ }
+}
+EXPORT_SYMBOL(xtensa_backtrace_kernel);
+
+#endif
void walk_stackframe(unsigned long *sp,
int (*fn)(struct stackframe *frame, void *data),
diff --git a/arch/xtensa/oprofile/backtrace.c b/arch/xtensa/oprofile/backtrace.c
index 5f03a59..8f95203 100644
--- a/arch/xtensa/oprofile/backtrace.c
+++ b/arch/xtensa/oprofile/backtrace.c
@@ -2,168 +2,26 @@
* @file backtrace.c
*
* @remark Copyright 2008 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
* @remark Read the file COPYING
*
*/
#include <linux/oprofile.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
#include <asm/ptrace.h>
-#include <asm/uaccess.h>
-#include <asm/traps.h>
+#include <asm/stacktrace.h>
-/* Address of common_exception_return, used to check the
- * transition from kernel to user space.
- */
-extern int common_exception_return;
-
-/* A struct that maps to the part of the frame containing the a0 and
- * a1 registers.
- */
-struct frame_start {
- unsigned long a0;
- unsigned long a1;
-};
-
-static void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth)
-{
- unsigned long windowstart = regs->windowstart;
- unsigned long windowbase = regs->windowbase;
- unsigned long a0 = regs->areg[0];
- unsigned long a1 = regs->areg[1];
- unsigned long pc = MAKE_PC_FROM_RA(a0, regs->pc);
- int index;
-
- /* First add the current PC to the trace. */
- if (pc != 0 && pc <= TASK_SIZE)
- oprofile_add_trace(pc);
- else
- return;
-
- /* Two steps:
- *
- * 1. Look through the register window for the
- * previous PCs in the call trace.
- *
- * 2. Look on the stack.
- */
-
- /* Step 1. */
- /* Rotate WINDOWSTART to move the bit corresponding to
- * the current window to the bit #0.
- */
- windowstart = (windowstart << WSBITS | windowstart) >> windowbase;
-
- /* Look for bits that are set, they correspond to
- * valid windows.
- */
- for (index = WSBITS - 1; (index > 0) && depth; depth--, index--)
- if (windowstart & (1 << index)) {
- /* Read a0 and a1 from the
- * corresponding position in AREGs.
- */
- a0 = regs->areg[index * 4];
- a1 = regs->areg[index * 4 + 1];
- /* Get the PC from a0 and a1. */
- pc = MAKE_PC_FROM_RA(a0, pc);
-
- /* Add the PC to the trace. */
- if (pc != 0 && pc <= TASK_SIZE)
- oprofile_add_trace(pc);
- else
- return;
- }
-
- /* Step 2. */
- /* We are done with the register window, we need to
- * look through the stack.
- */
- if (depth > 0) {
- /* Start from the a1 register. */
- /* a1 = regs->areg[1]; */
- while (a0 != 0 && depth--) {
-
- struct frame_start frame_start;
- /* Get the location for a1, a0 for the
- * previous frame from the current a1.
- */
- unsigned long *psp = (unsigned long *)a1;
- psp -= 4;
-
- /* Check if the region is OK to access. */
- if (!access_ok(VERIFY_READ, psp, sizeof(frame_start)))
- return;
- /* Copy a1, a0 from user space stack frame. */
- if (__copy_from_user_inatomic(&frame_start, psp,
- sizeof(frame_start)))
- return;
-
- a0 = frame_start.a0;
- a1 = frame_start.a1;
- pc = MAKE_PC_FROM_RA(a0, pc);
-
- if (pc != 0 && pc <= TASK_SIZE)
- oprofile_add_trace(pc);
- else
- return;
- }
- }
-}
-
-static void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth)
+static int xtensa_backtrace_cb(struct stackframe *frame, void *data)
{
- unsigned long pc = regs->pc;
- unsigned long *psp;
- unsigned long sp_start, sp_end;
- unsigned long a0 = regs->areg[0];
- unsigned long a1 = regs->areg[1];
-
- sp_start = a1 & ~(THREAD_SIZE-1);
- sp_end = sp_start + THREAD_SIZE;
-
- /* Spill the register window to the stack first. */
- spill_registers();
-
- /* Read the stack frames one by one and create the PC
- * from the a0 and a1 registers saved there.
- */
- while (a1 > sp_start && a1 < sp_end && depth--) {
- pc = MAKE_PC_FROM_RA(a0, pc);
-
- /* Add the PC to the trace. */
- oprofile_add_trace(pc);
- if (pc == (unsigned long) &common_exception_return) {
- regs = (struct pt_regs *)a1;
- if (user_mode(regs)) {
- pc = regs->pc;
- if (pc != 0 && pc <= TASK_SIZE)
- oprofile_add_trace(pc);
- else
- return;
- return xtensa_backtrace_user(regs, depth);
- }
- a0 = regs->areg[0];
- a1 = regs->areg[1];
- continue;
- }
-
- psp = (unsigned long *)a1;
-
- a0 = *(psp - 4);
- a1 = *(psp - 3);
-
- if (a1 <= (unsigned long)psp)
- return;
-
- }
- return;
+ oprofile_add_trace(frame->pc);
+ return 0;
}
void xtensa_backtrace(struct pt_regs * const regs, unsigned int depth)
{
if (user_mode(regs))
- xtensa_backtrace_user(regs, depth);
+ xtensa_backtrace_user(regs, depth, xtensa_backtrace_cb, NULL);
else
- xtensa_backtrace_kernel(regs, depth);
+ xtensa_backtrace_kernel(regs, depth, xtensa_backtrace_cb,
+ xtensa_backtrace_cb, NULL);
}
--
1.8.1.4
Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index ba22699..3c57934 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -20,6 +20,7 @@ config XTENSA
select HAVE_PERF_EVENTS
select IRQ_DOMAIN
select MODULES_USE_ELF_RELA
+ select PERF_USE_VMALLOC
select VIRT_TO_BUS
help
Xtensa processors are 32-bit RISC machines designed by Tensilica
--
1.8.1.4
Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/kernel/irq.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 3eee94f..32b6056 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -106,6 +106,12 @@ int xtensa_irq_map(struct irq_domain *d, unsigned int irq,
irq_set_chip_and_handler_name(irq, irq_chip,
handle_percpu_irq, "timer");
irq_clear_status_flags(irq, IRQ_LEVEL);
+#ifdef XCHAL_INTTYPE_MASK_PROFILING
+ } else if (mask & XCHAL_INTTYPE_MASK_PROFILING) {
+ irq_set_chip_and_handler_name(irq, irq_chip,
+ handle_percpu_irq, "profiling");
+ irq_set_status_flags(irq, IRQ_LEVEL);
+#endif
} else {/* XCHAL_INTTYPE_MASK_WRITE_ERROR */
/* XCHAL_INTTYPE_MASK_NMI */
irq_set_chip_and_handler_name(irq, irq_chip,
--
1.8.1.4
Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/mm/fault.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 9e3571a..76360a2 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/hardirq.h>
+#include <linux/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
#include <asm/hardirq.h>
@@ -142,6 +143,12 @@ good_area:
}
up_read(&mm->mmap_sem);
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+ if (flags & VM_FAULT_MAJOR)
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+ else if (flags & VM_FAULT_MINOR)
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
+
return;
/* Something tried to access memory that isn't in our memory map..
--
1.8.1.4
Xtensa Performance Monitor Module has up to 8 32 bit wide performance
counters. Each counter may be enabled independently and can count any
single type of hardware performance events. Event counting may be enabled
and disabled globally (per PMM).
Each counter has status register with bits indicating if the counter has
been overflown and may be programmed to raise profiling IRQ on overflow.
This IRQ is used to rewind counters and allow for counting more than 2^32
samples for counting events and to report samples for sampling events.
For more details see Tensilica Debug User's Guide, chapter 8
"Performance monitor module".
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Signed-off-by: Max Filippov <[email protected]>
---
Changes v1->v2:
- use -EINVAL instead of -ENOENT for invalid PMU event configuratons.
arch/xtensa/Kconfig | 10 +
arch/xtensa/kernel/Makefile | 1 +
arch/xtensa/kernel/perf_event.c | 450 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 461 insertions(+)
create mode 100644 arch/xtensa/kernel/perf_event.c
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 3c57934..0e92885 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -126,6 +126,16 @@ config XTENSA_VARIANT_MMU
Build a Conventional Kernel with full MMU support,
ie: it supports a TLB with auto-loading, page protection.
+config XTENSA_VARIANT_HAVE_PERF_EVENTS
+ bool "Core variant has Performance Monitor Module"
+ depends on XTENSA_VARIANT_CUSTOM
+ default n
+ help
+ Enable if core variant has Performance Monitor Module with
+ External Registers Interface.
+
+ If unsure, say N.
+
config XTENSA_UNALIGNED_USER
bool "Unaligned memory access in use space"
help
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index d3a0f0f..547a757 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_PCI) += pci.o
obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
obj-$(CONFIG_SMP) += smp.o mxhead.o
+obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o
AFLAGS_head.o += -mtext-section-literals
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
new file mode 100644
index 0000000..b44df3c
--- /dev/null
+++ b/arch/xtensa/kernel/perf_event.c
@@ -0,0 +1,450 @@
+/*
+ * Xtensa Performance Monitor Module driver
+ * See Tensilica Debug User's Guide for PMU registers documentation.
+ *
+ * Copyright (C) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+#include <asm/processor.h>
+#include <asm/stacktrace.h>
+
+/* Global control/status for all perf counters */
+#define XTENSA_PMU_PMG 0x1000
+/* Perf counter values */
+#define XTENSA_PMU_PM(i) (0x1080 + (i) * 4)
+/* Perf counter control registers */
+#define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4)
+/* Perf counter status registers */
+#define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4)
+
+#define XTENSA_PMU_PMG_PMEN 0x1
+
+#define XTENSA_PMU_COUNTER_MASK 0xffffffffULL
+#define XTENSA_PMU_COUNTER_MAX 0x7fffffff
+
+#define XTENSA_PMU_PMCTRL_INTEN 0x00000001
+#define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008
+#define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0
+#define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8
+#define XTENSA_PMU_PMCTRL_SELECT 0x00001f00
+#define XTENSA_PMU_PMCTRL_MASK_SHIFT 16
+#define XTENSA_PMU_PMCTRL_MASK 0xffff0000
+
+#define XTENSA_PMU_MASK(select, mask) \
+ (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
+ ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
+ XTENSA_PMU_PMCTRL_TRACELEVEL | \
+ XTENSA_PMU_PMCTRL_INTEN)
+
+#define XTENSA_PMU_PMSTAT_OVFL 0x00000001
+#define XTENSA_PMU_PMSTAT_INTASRT 0x00000010
+
+struct xtensa_pmu_events {
+ /* Array of events currently on this core */
+ struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
+ /* Bitmap of used hardware counters */
+ unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
+};
+static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
+
+static const u32 xtensa_hw_ctl[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1),
+ [PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff),
+ [PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1),
+ [PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1),
+ /* Taken and non-taken branches + taken loop ends */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490),
+ /* Instruction-related + other global stall cycles */
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff),
+ /* Data-related global stall cycles */
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff),
+};
+
+#define C(_x) PERF_COUNT_HW_CACHE_##_x
+
+static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1),
+ [C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2),
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1),
+ [C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2),
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1),
+ [C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2),
+ },
+ },
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1),
+ [C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8),
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1),
+ [C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8),
+ },
+ },
+};
+
+static int xtensa_pmu_cache_event(u64 config)
+{
+ unsigned int cache_type, cache_op, cache_result;
+ int ret;
+
+ cache_type = (config >> 0) & 0xff;
+ cache_op = (config >> 8) & 0xff;
+ cache_result = (config >> 16) & 0xff;
+
+ if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
+ cache_op >= C(OP_MAX) ||
+ cache_result >= C(RESULT_MAX))
+ return -EINVAL;
+
+ ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
+
+ if (ret == 0)
+ return -EINVAL;
+
+ return ret;
+}
+
+static inline uint32_t xtensa_pmu_read_counter(int idx)
+{
+ return get_er(XTENSA_PMU_PM(idx));
+}
+
+static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
+{
+ set_er(v, XTENSA_PMU_PM(idx));
+}
+
+static void xtensa_perf_event_update(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx)
+{
+ uint64_t prev_raw_count, new_raw_count;
+ int64_t delta;
+
+ do {
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
+ } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count);
+
+ delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+}
+
+static bool xtensa_perf_event_set_period(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx)
+{
+ bool rc = false;
+ s64 left;
+
+ if (!is_sampling_event(event)) {
+ left = XTENSA_PMU_COUNTER_MAX;
+ } else {
+ s64 period = hwc->sample_period;
+
+ left = local64_read(&hwc->period_left);
+ if (left <= -period) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ rc = true;
+ } else if (left <= 0) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ rc = true;
+ }
+ if (left > XTENSA_PMU_COUNTER_MAX)
+ left = XTENSA_PMU_COUNTER_MAX;
+ }
+
+ local64_set(&hwc->prev_count, -left);
+ xtensa_pmu_write_counter(idx, -left);
+ perf_event_update_userpage(event);
+
+ return rc;
+}
+
+static void xtensa_pmu_enable(struct pmu *pmu)
+{
+ set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
+}
+
+static void xtensa_pmu_disable(struct pmu *pmu)
+{
+ set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
+}
+
+static int xtensa_pmu_event_init(struct perf_event *event)
+{
+ int ret;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
+ xtensa_hw_ctl[event->attr.config] == 0)
+ return -EINVAL;
+ event->hw.config = xtensa_hw_ctl[event->attr.config];
+ return 0;
+
+ case PERF_TYPE_HW_CACHE:
+ ret = xtensa_pmu_cache_event(event->attr.config);
+ if (ret < 0)
+ return ret;
+ event->hw.config = ret;
+ return 0;
+
+ case PERF_TYPE_RAW:
+ /* Not 'previous counter' select */
+ if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
+ (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
+ return -EINVAL;
+ event->hw.config = (event->attr.config &
+ (XTENSA_PMU_PMCTRL_KRNLCNT |
+ XTENSA_PMU_PMCTRL_TRACELEVEL |
+ XTENSA_PMU_PMCTRL_SELECT |
+ XTENSA_PMU_PMCTRL_MASK)) |
+ XTENSA_PMU_PMCTRL_INTEN;
+ return 0;
+
+ default:
+ return -ENOENT;
+ }
+}
+
+/*
+ * Starts/Stops a counter present on the PMU. The PMI handler
+ * should stop the counter when perf_event_overflow() returns
+ * !0. ->start() will be used to continue.
+ */
+static void xtensa_pmu_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (WARN_ON_ONCE(idx == -1))
+ return;
+
+ if (flags & PERF_EF_RELOAD) {
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+ xtensa_perf_event_set_period(event, hwc, idx);
+ }
+
+ hwc->state = 0;
+
+ set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
+}
+
+static void xtensa_pmu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ set_er(0, XTENSA_PMU_PMCTRL(idx));
+ set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
+ XTENSA_PMU_PMSTAT(idx));
+ hwc->state |= PERF_HES_STOPPED;
+ }
+
+ if ((flags & PERF_EF_UPDATE) &&
+ !(event->hw.state & PERF_HES_UPTODATE)) {
+ xtensa_perf_event_update(event, &event->hw, idx);
+ event->hw.state |= PERF_HES_UPTODATE;
+ }
+}
+
+/*
+ * Adds/Removes a counter to/from the PMU, can be done inside
+ * a transaction, see the ->*_txn() methods.
+ */
+static int xtensa_pmu_add(struct perf_event *event, int flags)
+{
+ struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (__test_and_set_bit(idx, ev->used_mask)) {
+ idx = find_first_zero_bit(ev->used_mask,
+ XCHAL_NUM_PERF_COUNTERS);
+ if (idx == XCHAL_NUM_PERF_COUNTERS)
+ return -EAGAIN;
+
+ __set_bit(idx, ev->used_mask);
+ hwc->idx = idx;
+ }
+ ev->event[idx] = event;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ xtensa_pmu_start(event, PERF_EF_RELOAD);
+
+ perf_event_update_userpage(event);
+ return 0;
+}
+
+static void xtensa_pmu_del(struct perf_event *event, int flags)
+{
+ struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+
+ xtensa_pmu_stop(event, PERF_EF_UPDATE);
+ __clear_bit(event->hw.idx, ev->used_mask);
+ perf_event_update_userpage(event);
+}
+
+static void xtensa_pmu_read(struct perf_event *event)
+{
+ xtensa_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static int callchain_trace(struct stackframe *frame, void *data)
+{
+ struct perf_callchain_entry *entry = data;
+
+ perf_callchain_store(entry, frame->pc);
+ return 0;
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+ xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
+ callchain_trace, NULL, entry);
+}
+
+void perf_callchain_user(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+ xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
+ callchain_trace, entry);
+}
+
+void perf_event_print_debug(void)
+{
+ unsigned long flags;
+ unsigned i;
+
+ local_irq_save(flags);
+ pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
+ get_er(XTENSA_PMU_PMG));
+ for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
+ pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
+ i, get_er(XTENSA_PMU_PM(i)),
+ i, get_er(XTENSA_PMU_PMCTRL(i)),
+ i, get_er(XTENSA_PMU_PMSTAT(i)));
+ local_irq_restore(flags);
+}
+
+static irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
+{
+ irqreturn_t rc = IRQ_NONE;
+ struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+ unsigned i;
+
+ for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS);
+ i < XCHAL_NUM_PERF_COUNTERS;
+ i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) {
+ uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
+ struct perf_event *event = ev->event[i];
+ struct hw_perf_event *hwc = &event->hw;
+ u64 last_period;
+
+ if (!(v & XTENSA_PMU_PMSTAT_OVFL))
+ continue;
+
+ set_er(v, XTENSA_PMU_PMSTAT(i));
+ xtensa_perf_event_update(event, hwc, i);
+ last_period = hwc->last_period;
+ if (xtensa_perf_event_set_period(event, hwc, i)) {
+ struct perf_sample_data data;
+ struct pt_regs *regs = get_irq_regs();
+
+ perf_sample_data_init(&data, 0, last_period);
+ if (perf_event_overflow(event, &data, regs))
+ xtensa_pmu_stop(event, 0);
+ }
+
+ rc = IRQ_HANDLED;
+ }
+ return rc;
+}
+
+static struct pmu xtensa_pmu = {
+ .pmu_enable = xtensa_pmu_enable,
+ .pmu_disable = xtensa_pmu_disable,
+ .event_init = xtensa_pmu_event_init,
+ .add = xtensa_pmu_add,
+ .del = xtensa_pmu_del,
+ .start = xtensa_pmu_start,
+ .stop = xtensa_pmu_stop,
+ .read = xtensa_pmu_read,
+};
+
+static void xtensa_pmu_setup(void)
+{
+ unsigned i;
+
+ set_er(0, XTENSA_PMU_PMG);
+ for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
+ set_er(0, XTENSA_PMU_PMCTRL(i));
+ set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
+ }
+}
+
+static int xtensa_pmu_notifier(struct notifier_block *self,
+ unsigned long action, void *data)
+{
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_STARTING:
+ xtensa_pmu_setup();
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int __init xtensa_pmu_init(void)
+{
+ int ret;
+ int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
+
+ perf_cpu_notifier(xtensa_pmu_notifier);
+ ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
+ "pmu", NULL);
+ if (ret < 0)
+ return ret;
+
+ ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
+ if (ret)
+ free_irq(irq, NULL);
+
+ return ret;
+}
+early_initcall(xtensa_pmu_init);
--
1.8.1.4
Cc: Peter Zijlstra <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Signed-off-by: Max Filippov <[email protected]>
---
tools/perf/arch/xtensa/Build | 1 +
tools/perf/arch/xtensa/Makefile | 3 +++
tools/perf/arch/xtensa/util/Build | 1 +
tools/perf/arch/xtensa/util/dwarf-regs.c | 25 +++++++++++++++++++++++++
4 files changed, 30 insertions(+)
create mode 100644 tools/perf/arch/xtensa/Build
create mode 100644 tools/perf/arch/xtensa/Makefile
create mode 100644 tools/perf/arch/xtensa/util/Build
create mode 100644 tools/perf/arch/xtensa/util/dwarf-regs.c
diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build
new file mode 100644
index 0000000..54afe4a
--- /dev/null
+++ b/tools/perf/arch/xtensa/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile
new file mode 100644
index 0000000..7fbca17
--- /dev/null
+++ b/tools/perf/arch/xtensa/Makefile
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build
new file mode 100644
index 0000000..954e287
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c
new file mode 100644
index 0000000..4dba76b
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/dwarf-regs.c
@@ -0,0 +1,25 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (c) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define XTENSA_MAX_REGS 16
+
+const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
+ "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+ "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+ return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
+}
--
1.8.1.4
entry.s only disables IRQs on hardware IRQ, move trace_hardirqs_off call
into do_interrupt. Check actual intlevel that will be restored on return
from exception handler to decide if trace_hardirqs_on should be called.
Annotate IRQ on/off points in the TIF_* handling loop on return from
exception handler.
Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/kernel/entry.S | 39 ++++++++++++++-------------------------
arch/xtensa/kernel/traps.c | 5 ++++-
2 files changed, 18 insertions(+), 26 deletions(-)
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index b64075f..d060fc2 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -415,21 +415,6 @@ common_exception:
save_xtregs_opt a1 a3 a4 a5 a6 a7 PT_XTREGS_OPT
-#ifdef CONFIG_TRACE_IRQFLAGS
- l32i a4, a1, PT_DEPC
- /* Double exception means we came here with an exception
- * while PS.EXCM was set, i.e. interrupts disabled.
- */
- bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
- bnei a2, EXCCAUSE_LEVEL1_INTERRUPT, 1f
- /* We came here with an interrupt means interrupts were enabled
- * and we've just disabled them.
- */
- movi a4, trace_hardirqs_off
- callx4 a4
-1:
-#endif
-
/* Go to second-level dispatcher. Set up parameters to pass to the
* exception handler and call the exception handler.
*/
@@ -450,6 +435,10 @@ common_exception_return:
1:
rsil a2, LOCKLEVEL
+#ifdef CONFIG_TRACE_IRQFLAGS
+ movi a4, trace_hardirqs_off
+ callx4 a4
+#endif
/* Jump if we are returning from kernel exceptions. */
@@ -474,6 +463,10 @@ common_exception_return:
/* Call do_signal() */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ movi a4, trace_hardirqs_on
+ callx4 a4
+#endif
rsil a2, 0
movi a4, do_notify_resume # int do_notify_resume(struct pt_regs*)
mov a6, a1
@@ -482,6 +475,10 @@ common_exception_return:
3: /* Reschedule */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ movi a4, trace_hardirqs_on
+ callx4 a4
+#endif
rsil a2, 0
movi a4, schedule # void schedule (void)
callx4 a4
@@ -510,16 +507,8 @@ common_exception_return:
6:
4:
#ifdef CONFIG_TRACE_IRQFLAGS
- l32i a4, a1, PT_DEPC
- /* Double exception means we came here with an exception
- * while PS.EXCM was set, i.e. interrupts disabled.
- */
- bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
- l32i a4, a1, PT_EXCCAUSE
- bnei a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
- /* We came here with an interrupt means interrupts were enabled
- * and we'll reenable them on return.
- */
+ extui a4, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+ bgei a4, LOCKLEVEL, 1f
movi a4, trace_hardirqs_on
callx4 a4
1:
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 9d2f45f..a1b5bd2 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -211,8 +211,11 @@ void do_interrupt(struct pt_regs *regs)
XCHAL_INTLEVEL6_MASK,
XCHAL_INTLEVEL7_MASK,
};
- struct pt_regs *old_regs = set_irq_regs(regs);
+ struct pt_regs *old_regs;
+ trace_hardirqs_off();
+
+ old_regs = set_irq_regs(regs);
irq_enter();
for (;;) {
--
1.8.1.4
call12 can't be safely used as the first call in the inline function,
because the compiler does not extend the stack frame of the bounding
function accordingly, which may result in corruption of local variables.
If a call needs to be done, do call8 first followed by call12.
For pure assembly code in _switch_to increase stack frame size of the
bounding function.
Cc: [email protected]
Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/include/asm/traps.h | 29 +++++++++++++++++++----------
arch/xtensa/kernel/entry.S | 4 ++--
2 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 677bfcf..28f33a8 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -25,30 +25,39 @@ static inline void spill_registers(void)
{
#if XCHAL_NUM_AREGS > 16
__asm__ __volatile__ (
- " call12 1f\n"
+ " call8 1f\n"
" _j 2f\n"
" retw\n"
" .align 4\n"
"1:\n"
+#if XCHAL_NUM_AREGS == 32
+ " _entry a1, 32\n"
+ " addi a8, a0, 3\n"
+ " _entry a1, 16\n"
+ " mov a12, a12\n"
+ " retw\n"
+#else
" _entry a1, 48\n"
- " addi a12, a0, 3\n"
-#if XCHAL_NUM_AREGS > 32
- " .rept (" __stringify(XCHAL_NUM_AREGS) " - 32) / 12\n"
+ " call12 1f\n"
+ " retw\n"
+ " .align 4\n"
+ "1:\n"
+ " .rept (" __stringify(XCHAL_NUM_AREGS) " - 16) / 12\n"
" _entry a1, 48\n"
" mov a12, a0\n"
" .endr\n"
-#endif
- " _entry a1, 48\n"
+ " _entry a1, 16\n"
#if XCHAL_NUM_AREGS % 12 == 0
- " mov a8, a8\n"
-#elif XCHAL_NUM_AREGS % 12 == 4
" mov a12, a12\n"
-#elif XCHAL_NUM_AREGS % 12 == 8
+#elif XCHAL_NUM_AREGS % 12 == 4
" mov a4, a4\n"
+#elif XCHAL_NUM_AREGS % 12 == 8
+ " mov a8, a8\n"
#endif
" retw\n"
+#endif
"2:\n"
- : : : "a12", "a13", "memory");
+ : : : "a8", "a9", "memory");
#else
__asm__ __volatile__ (
" mov a12, a12\n"
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index d060fc2..5771d5c 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1838,7 +1838,7 @@ ENDPROC(system_call)
mov a12, a0
.endr
#endif
- _entry a1, 48
+ _entry a1, 16
#if XCHAL_NUM_AREGS % 12 == 0
mov a8, a8
#elif XCHAL_NUM_AREGS % 12 == 4
@@ -1862,7 +1862,7 @@ ENDPROC(system_call)
ENTRY(_switch_to)
- entry a1, 16
+ entry a1, 48
mov a11, a3 # and 'next' (a3)
--
1.8.1.4
There's no way _switch_to can produce double exceptions now, don't
enter/leave EXC_TABLE_FIXUP critical section.
Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/kernel/entry.S | 4 ----
1 file changed, 4 deletions(-)
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 5771d5c..309e71b 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1883,9 +1883,7 @@ ENTRY(_switch_to)
/* Disable ints while we manipulate the stack pointer. */
rsil a14, LOCKLEVEL
- rsr a3, excsave1
rsync
- s32i a3, a3, EXC_TABLE_FIXUP /* enter critical section */
/* Switch CPENABLE */
@@ -1906,9 +1904,7 @@ ENTRY(_switch_to)
*/
rsr a3, excsave1 # exc_table
- movi a6, 0
addi a7, a5, PT_REGS_OFFSET
- s32i a6, a3, EXC_TABLE_FIXUP
s32i a7, a3, EXC_TABLE_KSTK
/* restore context of the task 'next' */
--
1.8.1.4
In case perf IRQ is the highest of the medium-level IRQs, and is alone
on its level, it may be treated as NMI:
- LOCKLEVEL is defined to be one level less than EXCM level,
- IRQ masking never lowers current IRQ level,
- new fake exception cause code, EXCCAUSE_MAPPED_NMI is assigned to that
IRQ; new second level exception handler, do_nmi, assigned to it
handles it as NMI,
- atomic operations in configurations without s32c1i still need to mask
all interrupts.
Cc: Peter Zijlstra <[email protected]>
Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/include/asm/atomic.h | 10 ++--
arch/xtensa/include/asm/cmpxchg.h | 4 +-
arch/xtensa/include/asm/irqflags.h | 22 ++++++++-
arch/xtensa/include/asm/processor.h | 31 ++++++++++++-
arch/xtensa/kernel/entry.S | 93 +++++++++++++++++++++++++++++++------
arch/xtensa/kernel/irq.c | 8 ++++
arch/xtensa/kernel/perf_event.c | 6 ++-
arch/xtensa/kernel/traps.c | 26 +++++++++++
arch/xtensa/kernel/vectors.S | 10 +++-
9 files changed, 183 insertions(+), 27 deletions(-)
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 00b7d46..ebcd1f6 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -29,7 +29,7 @@
*
* Locking interrupts looks like this:
*
- * rsil a15, LOCKLEVEL
+ * rsil a15, TOPLEVEL
* <code>
* wsr a15, PS
* rsync
@@ -106,7 +106,7 @@ static inline void atomic_##op(int i, atomic_t * v) \
unsigned int vval; \
\
__asm__ __volatile__( \
- " rsil a15, "__stringify(LOCKLEVEL)"\n"\
+ " rsil a15, "__stringify(TOPLEVEL)"\n"\
" l32i %0, %2, 0\n" \
" " #op " %0, %0, %1\n" \
" s32i %0, %2, 0\n" \
@@ -124,7 +124,7 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \
unsigned int vval; \
\
__asm__ __volatile__( \
- " rsil a15,"__stringify(LOCKLEVEL)"\n" \
+ " rsil a15,"__stringify(TOPLEVEL)"\n" \
" l32i %0, %2, 0\n" \
" " #op " %0, %0, %1\n" \
" s32i %0, %2, 0\n" \
@@ -272,7 +272,7 @@ static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
unsigned int vval;
__asm__ __volatile__(
- " rsil a15,"__stringify(LOCKLEVEL)"\n"
+ " rsil a15,"__stringify(TOPLEVEL)"\n"
" l32i %0, %2, 0\n"
" xor %1, %4, %3\n"
" and %0, %0, %4\n"
@@ -306,7 +306,7 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
unsigned int vval;
__asm__ __volatile__(
- " rsil a15,"__stringify(LOCKLEVEL)"\n"
+ " rsil a15,"__stringify(TOPLEVEL)"\n"
" l32i %0, %2, 0\n"
" or %0, %0, %1\n"
" s32i %0, %2, 0\n"
diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h
index 370b26f..201e900 100644
--- a/arch/xtensa/include/asm/cmpxchg.h
+++ b/arch/xtensa/include/asm/cmpxchg.h
@@ -34,7 +34,7 @@ __cmpxchg_u32(volatile int *p, int old, int new)
return new;
#else
__asm__ __volatile__(
- " rsil a15, "__stringify(LOCKLEVEL)"\n"
+ " rsil a15, "__stringify(TOPLEVEL)"\n"
" l32i %0, %1, 0\n"
" bne %0, %2, 1f\n"
" s32i %3, %1, 0\n"
@@ -123,7 +123,7 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
#else
unsigned long tmp;
__asm__ __volatile__(
- " rsil a15, "__stringify(LOCKLEVEL)"\n"
+ " rsil a15, "__stringify(TOPLEVEL)"\n"
" l32i %0, %1, 0\n"
" s32i %2, %1, 0\n"
" wsr a15, ps\n"
diff --git a/arch/xtensa/include/asm/irqflags.h b/arch/xtensa/include/asm/irqflags.h
index ea36674..8e090c7 100644
--- a/arch/xtensa/include/asm/irqflags.h
+++ b/arch/xtensa/include/asm/irqflags.h
@@ -6,6 +6,7 @@
* for more details.
*
* Copyright (C) 2001 - 2005 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
*/
#ifndef _XTENSA_IRQFLAGS_H
@@ -23,8 +24,27 @@ static inline unsigned long arch_local_save_flags(void)
static inline unsigned long arch_local_irq_save(void)
{
unsigned long flags;
- asm volatile("rsil %0, "__stringify(LOCKLEVEL)
+#if XTENSA_FAKE_NMI
+#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL
+ unsigned long tmp;
+
+ asm volatile("rsr %0, ps\t\n"
+ "extui %1, %0, 0, 4\t\n"
+ "bgei %1, "__stringify(LOCKLEVEL)", 1f\t\n"
+ "rsil %0, "__stringify(LOCKLEVEL)"\n"
+ "1:"
+ : "=a" (flags), "=a" (tmp) :: "memory");
+#else
+ asm volatile("rsr %0, ps\t\n"
+ "or %0, %0, %1\t\n"
+ "xsr %0, ps\t\n"
+ "rsync"
+ : "=&a" (flags) : "a" (LOCKLEVEL) : "memory");
+#endif
+#else
+ asm volatile("rsil %0, "__stringify(LOCKLEVEL)
: "=a" (flags) :: "memory");
+#endif
return flags;
}
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index b61bdf0..9ed9b4a 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -1,11 +1,10 @@
/*
- * include/asm-xtensa/processor.h
- *
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2001 - 2008 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
*/
#ifndef _XTENSA_PROCESSOR_H
@@ -45,6 +44,14 @@
#define STACK_TOP_MAX STACK_TOP
/*
+ * General exception cause assigned to fake NMI. Fake NMI needs to be handled
+ * differently from other interrupts, but it uses common kernel entry/exit
+ * code.
+ */
+
+#define EXCCAUSE_MAPPED_NMI 62
+
+/*
* General exception cause assigned to debug exceptions. Debug exceptions go
* to their own vector, rather than the general exception vectors (user,
* kernel, double); and their specific causes are reported via DEBUGCAUSE
@@ -65,10 +72,30 @@
#define VALID_DOUBLE_EXCEPTION_ADDRESS 64
+#define XTENSA_INT_LEVEL(a) _XTENSA_INT_LEVEL(a)
+#define _XTENSA_INT_LEVEL(a) XCHAL_INT##a##_LEVEL
+
+#define XTENSA_INTLEVEL_MASK(a) _XTENSA_INTLEVEL_MASK(a)
+#define _XTENSA_INTLEVEL_MASK(a) (XCHAL_INTLEVEL##a##_MASK)
+
+#define IS_POW2(v) (((v) & ((v) - 1)) == 0)
+
+#define PROFILING_INTLEVEL XTENSA_INT_LEVEL(XCHAL_PROFILING_INTERRUPT)
+
/* LOCKLEVEL defines the interrupt level that masks all
* general-purpose interrupts.
*/
+#if defined(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) && \
+ defined(XCHAL_PROFILING_INTERRUPT) && \
+ PROFILING_INTLEVEL == XCHAL_EXCM_LEVEL && \
+ XCHAL_EXCM_LEVEL > 1 && \
+ IS_POW2(XTENSA_INTLEVEL_MASK(PROFILING_INTLEVEL))
+#define LOCKLEVEL (XCHAL_EXCM_LEVEL - 1)
+#else
#define LOCKLEVEL XCHAL_EXCM_LEVEL
+#endif
+#define TOPLEVEL XCHAL_EXCM_LEVEL
+#define XTENSA_FAKE_NMI (LOCKLEVEL < TOPLEVEL)
/* WSBITS and WBBITS are the width of the WINDOWSTART and WINDOWBASE
* registers
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 309e71b..dd766ed 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1,6 +1,4 @@
/*
- * arch/xtensa/kernel/entry.S
- *
* Low-level exception handling
*
* This file is subject to the terms and conditions of the GNU General Public
@@ -8,6 +6,7 @@
* for more details.
*
* Copyright (C) 2004 - 2008 by Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
*
* Chris Zankel <[email protected]>
*
@@ -79,6 +78,27 @@
#endif
.endm
+
+ .macro irq_save flags tmp
+#if XTENSA_FAKE_NMI
+#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL
+ rsr \flags, ps
+ extui \tmp, \flags, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+ bgei \tmp, LOCKLEVEL, 99f
+ rsil \tmp, LOCKLEVEL
+99:
+#else
+ movi \tmp, LOCKLEVEL
+ rsr \flags, ps
+ or \flags, \flags, \tmp
+ xsr \flags, ps
+ rsync
+#endif
+#else
+ rsil \flags, LOCKLEVEL
+#endif
+ .endm
+
/* ----------------- DEFAULT FIRST LEVEL EXCEPTION HANDLERS ----------------- */
/*
@@ -366,11 +386,11 @@ common_exception:
/* It is now save to restore the EXC_TABLE_FIXUP variable. */
- rsr a0, exccause
+ rsr a2, exccause
movi a3, 0
- rsr a2, excsave1
- s32i a0, a1, PT_EXCCAUSE
- s32i a3, a2, EXC_TABLE_FIXUP
+ rsr a0, excsave1
+ s32i a2, a1, PT_EXCCAUSE
+ s32i a3, a0, EXC_TABLE_FIXUP
/* All unrecoverable states are saved on stack, now, and a1 is valid.
* Now we can allow exceptions again. In case we've got an interrupt
@@ -381,21 +401,48 @@ common_exception:
*/
rsr a3, ps
- addi a0, a0, -EXCCAUSE_LEVEL1_INTERRUPT
- movi a2, LOCKLEVEL
+ s32i a3, a1, PT_PS # save ps
+
+#if XTENSA_FAKE_NMI
+ /* Correct PS needs to be saved in the PT_PS:
+ * - in case of exception or level-1 interrupt it's in the PS,
+ * and is already saved.
+ * - in case of medium level interrupt it's in the excsave2.
+ */
+ movi a0, EXCCAUSE_MAPPED_NMI
+ extui a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+ beq a2, a0, .Lmedium_level_irq
+ bnei a2, EXCCAUSE_LEVEL1_INTERRUPT, .Lexception
+ beqz a3, .Llevel1_irq # level-1 IRQ sets ps.intlevel to 0
+
+.Lmedium_level_irq:
+ rsr a0, excsave2
+ s32i a0, a1, PT_PS # save medium-level interrupt ps
+ bgei a3, LOCKLEVEL, .Lexception
+
+.Llevel1_irq:
+ movi a3, LOCKLEVEL
+
+.Lexception:
+ movi a0, 1 << PS_WOE_BIT
+ or a3, a3, a0
+#else
+ addi a2, a2, -EXCCAUSE_LEVEL1_INTERRUPT
+ movi a0, LOCKLEVEL
extui a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
# a3 = PS.INTLEVEL
- moveqz a3, a2, a0 # a3 = LOCKLEVEL iff interrupt
+ moveqz a3, a0, a2 # a3 = LOCKLEVEL iff interrupt
movi a2, 1 << PS_WOE_BIT
or a3, a3, a2
rsr a2, exccause
+#endif
+
#ifdef XTENSA_CONTINUOUS_STACK
/* restore return address (or 0 if return to userspace) */
rsr a0, depc
#endif
- xsr a3, ps
-
- s32i a3, a1, PT_PS # save ps
+ wsr a3, ps
+ rsync # PS.WOE => rsync => overflow
/* Save lbeg, lend */
@@ -433,8 +480,13 @@ common_exception:
.global common_exception_return
common_exception_return:
+#if XTENSA_FAKE_NMI
+ l32i a2, a1, PT_EXCCAUSE
+ movi a3, EXCCAUSE_MAPPED_NMI
+ beq a2, a3, .LNMIexit
+#endif
1:
- rsil a2, LOCKLEVEL
+ irq_save a2, a3
#ifdef CONFIG_TRACE_IRQFLAGS
movi a4, trace_hardirqs_off
callx4 a4
@@ -497,6 +549,12 @@ common_exception_return:
j 1b
#endif
+#if XTENSA_FAKE_NMI
+.LNMIexit:
+ l32i a3, a1, PT_PS
+ _bbci.l a3, PS_UM_BIT, 4f
+#endif
+
5:
#ifdef CONFIG_DEBUG_TLB_SANITY
l32i a4, a1, PT_DEPC
@@ -1579,6 +1637,13 @@ ENTRY(fast_second_level_miss)
rfde
9: l32i a0, a1, TASK_ACTIVE_MM # unlikely case mm == 0
+ bnez a0, 8b
+
+ /* Even more unlikely case active_mm == 0.
+ * We can get here with NMI in the middle of context_switch that
+ * touches vmalloc area.
+ */
+ movi a0, init_mm
j 8b
#if (DCACHE_WAY_SIZE > PAGE_SIZE)
@@ -1882,7 +1947,7 @@ ENTRY(_switch_to)
/* Disable ints while we manipulate the stack pointer. */
- rsil a14, LOCKLEVEL
+ irq_save a14, a3
rsync
/* Switch CPENABLE */
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 32b6056..8d4f5de 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -29,6 +29,7 @@
#include <asm/platform.h>
atomic_t irq_err_count;
+DECLARE_PER_CPU(unsigned long, nmi_count);
asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
{
@@ -57,11 +58,18 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
int arch_show_interrupts(struct seq_file *p, int prec)
{
+ unsigned cpu __maybe_unused;
#ifdef CONFIG_SMP
show_ipi_list(p, prec);
#endif
seq_printf(p, "%*s: ", prec, "ERR");
seq_printf(p, "%10u\n", atomic_read(&irq_err_count));
+#ifdef XTENSA_FAKE_NMI
+ seq_printf(p, "%*s:", prec, "NMI");
+ for_each_online_cpu(cpu)
+ seq_printf(p, " %10lu", per_cpu(nmi_count, cpu));
+ seq_puts(p, " Non-maskable interrupts\n");
+#endif
return 0;
}
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
index b44df3c..54f0118 100644
--- a/arch/xtensa/kernel/perf_event.c
+++ b/arch/xtensa/kernel/perf_event.c
@@ -359,7 +359,7 @@ void perf_event_print_debug(void)
local_irq_restore(flags);
}
-static irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
+irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
{
irqreturn_t rc = IRQ_NONE;
struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
@@ -436,10 +436,14 @@ static int __init xtensa_pmu_init(void)
int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
perf_cpu_notifier(xtensa_pmu_notifier);
+#if XTENSA_FAKE_NMI
+ enable_irq(irq);
+#else
ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
"pmu", NULL);
if (ret < 0)
return ret;
+#endif
ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
if (ret)
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index a1b5bd2..42d441f 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -62,6 +62,7 @@ extern void fast_coprocessor(void);
extern void do_illegal_instruction (struct pt_regs*);
extern void do_interrupt (struct pt_regs*);
+extern void do_nmi(struct pt_regs *);
extern void do_unaligned_user (struct pt_regs*);
extern void do_multihit (struct pt_regs*, unsigned long);
extern void do_page_fault (struct pt_regs*, unsigned long);
@@ -146,6 +147,9 @@ COPROCESSOR(6),
#if XTENSA_HAVE_COPROCESSOR(7)
COPROCESSOR(7),
#endif
+#if XTENSA_FAKE_NMI
+{ EXCCAUSE_MAPPED_NMI, 0, do_nmi },
+#endif
{ EXCCAUSE_MAPPED_DEBUG, 0, do_debug },
{ -1, -1, 0 }
@@ -199,6 +203,28 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
extern void do_IRQ(int, struct pt_regs *);
+#if XTENSA_FAKE_NMI
+
+irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id);
+
+DEFINE_PER_CPU(unsigned long, nmi_count);
+
+void do_nmi(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs;
+
+ if ((regs->ps & PS_INTLEVEL_MASK) < LOCKLEVEL)
+ trace_hardirqs_off();
+
+ old_regs = set_irq_regs(regs);
+ nmi_enter();
+ ++*this_cpu_ptr(&nmi_count);
+ xtensa_pmu_irq_handler(0, NULL);
+ nmi_exit();
+ set_irq_regs(old_regs);
+}
+#endif
+
void do_interrupt(struct pt_regs *regs)
{
static const unsigned int_level_mask[] = {
diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S
index 1b397a9..abcdb52 100644
--- a/arch/xtensa/kernel/vectors.S
+++ b/arch/xtensa/kernel/vectors.S
@@ -627,7 +627,11 @@ ENTRY(_Level\level\()InterruptVector)
wsr a0, excsave2
rsr a0, epc\level
wsr a0, epc1
+ .if \level <= LOCKLEVEL
movi a0, EXCCAUSE_LEVEL1_INTERRUPT
+ .else
+ movi a0, EXCCAUSE_MAPPED_NMI
+ .endif
wsr a0, exccause
rsr a0, eps\level
# branch to user or kernel vector
@@ -682,11 +686,13 @@ ENDPROC(_WindowOverflow4)
.align 4
_SimulateUserKernelVectorException:
addi a0, a0, (1 << PS_EXCM_BIT)
+#if !XTENSA_FAKE_NMI
wsr a0, ps
+#endif
bbsi.l a0, PS_UM_BIT, 1f # branch if user mode
- rsr a0, excsave2 # restore a0
+ xsr a0, excsave2 # restore a0
j _KernelExceptionVector # simulate kernel vector exception
-1: rsr a0, excsave2 # restore a0
+1: xsr a0, excsave2 # restore a0
j _UserExceptionVector # simulate user vector exception
#endif
--
1.8.1.4
Signed-off-by: Max Filippov <[email protected]>
---
arch/xtensa/kernel/irq.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 8d4f5de..4228603 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -28,7 +28,6 @@
#include <asm/uaccess.h>
#include <asm/platform.h>
-atomic_t irq_err_count;
DECLARE_PER_CPU(unsigned long, nmi_count);
asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
@@ -62,8 +61,6 @@ int arch_show_interrupts(struct seq_file *p, int prec)
#ifdef CONFIG_SMP
show_ipi_list(p, prec);
#endif
- seq_printf(p, "%*s: ", prec, "ERR");
- seq_printf(p, "%10u\n", atomic_read(&irq_err_count));
#ifdef XTENSA_FAKE_NMI
seq_printf(p, "%*s:", prec, "NMI");
for_each_online_cpu(cpu)
--
1.8.1.4
On Sat, Jul 18, 2015 at 11:30 AM, Max Filippov <[email protected]> wrote:
> In case perf IRQ is the highest of the medium-level IRQs, and is alone
> on its level, it may be treated as NMI:
> - LOCKLEVEL is defined to be one level less than EXCM level,
> - IRQ masking never lowers current IRQ level,
> - new fake exception cause code, EXCCAUSE_MAPPED_NMI is assigned to that
> IRQ; new second level exception handler, do_nmi, assigned to it
> handles it as NMI,
> - atomic operations in configurations without s32c1i still need to mask
> all interrupts.
>
> Cc: Peter Zijlstra <[email protected]>
> Signed-off-by: Max Filippov <[email protected]>
> ---
> arch/xtensa/include/asm/atomic.h | 10 ++--
> arch/xtensa/include/asm/cmpxchg.h | 4 +-
> arch/xtensa/include/asm/irqflags.h | 22 ++++++++-
> arch/xtensa/include/asm/processor.h | 31 ++++++++++++-
> arch/xtensa/kernel/entry.S | 93 +++++++++++++++++++++++++++++++------
> arch/xtensa/kernel/irq.c | 8 ++++
> arch/xtensa/kernel/perf_event.c | 6 ++-
> arch/xtensa/kernel/traps.c | 26 +++++++++++
> arch/xtensa/kernel/vectors.S | 10 +++-
> 9 files changed, 183 insertions(+), 27 deletions(-)
Ping?
--
Thanks.
-- Max
On Sat, Jul 18, 2015 at 11:30:15AM +0300, Max Filippov wrote:
> In case perf IRQ is the highest of the medium-level IRQs, and is alone
> on its level, it may be treated as NMI:
> - LOCKLEVEL is defined to be one level less than EXCM level,
> - IRQ masking never lowers current IRQ level,
> - new fake exception cause code, EXCCAUSE_MAPPED_NMI is assigned to that
> IRQ; new second level exception handler, do_nmi, assigned to it
> handles it as NMI,
> - atomic operations in configurations without s32c1i still need to mask
> all interrupts.
>
> Cc: Peter Zijlstra <[email protected]>
> Signed-off-by: Max Filippov <[email protected]>
> ---
> arch/xtensa/include/asm/atomic.h | 10 ++--
> arch/xtensa/include/asm/cmpxchg.h | 4 +-
> arch/xtensa/include/asm/irqflags.h | 22 ++++++++-
> arch/xtensa/include/asm/processor.h | 31 ++++++++++++-
> arch/xtensa/kernel/entry.S | 93 +++++++++++++++++++++++++++++++------
> arch/xtensa/kernel/irq.c | 8 ++++
> arch/xtensa/kernel/perf_event.c | 6 ++-
> arch/xtensa/kernel/traps.c | 26 +++++++++++
> arch/xtensa/kernel/vectors.S | 10 +++-
> 9 files changed, 183 insertions(+), 27 deletions(-)
Looks about right, I've not really gone over the asm bits through.
Acked-by: Peter Zijlstra (Intel) <[email protected]>
Em Sat, Jul 18, 2015 at 11:30:10AM +0300, Max Filippov escreveu:
> Xtensa Performance Monitor Module has up to 8 32 bit wide performance
> counters. Each counter may be enabled independently and can count any
> single type of hardware performance events. Event counting may be enabled
> and disabled globally (per PMM).
> Each counter has status register with bits indicating if the counter has
> been overflown and may be programmed to raise profiling IRQ on overflow.
> This IRQ is used to rewind counters and allow for counting more than 2^32
> samples for counting events and to report samples for sampling events.
>
> For more details see Tensilica Debug User's Guide, chapter 8
> "Performance monitor module".
Has this gone via PeterZ? I added the tools/ bits in my perf/core
branch, will go in next pull req,
- Arnaldo
> Cc: Peter Zijlstra <[email protected]>
> Cc: Paul Mackerras <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> Signed-off-by: Max Filippov <[email protected]>
> ---
> Changes v1->v2:
> - use -EINVAL instead of -ENOENT for invalid PMU event configuratons.
>
> arch/xtensa/Kconfig | 10 +
> arch/xtensa/kernel/Makefile | 1 +
> arch/xtensa/kernel/perf_event.c | 450 ++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 461 insertions(+)
> create mode 100644 arch/xtensa/kernel/perf_event.c
>
> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
> index 3c57934..0e92885 100644
> --- a/arch/xtensa/Kconfig
> +++ b/arch/xtensa/Kconfig
> @@ -126,6 +126,16 @@ config XTENSA_VARIANT_MMU
> Build a Conventional Kernel with full MMU support,
> ie: it supports a TLB with auto-loading, page protection.
>
> +config XTENSA_VARIANT_HAVE_PERF_EVENTS
> + bool "Core variant has Performance Monitor Module"
> + depends on XTENSA_VARIANT_CUSTOM
> + default n
> + help
> + Enable if core variant has Performance Monitor Module with
> + External Registers Interface.
> +
> + If unsure, say N.
> +
> config XTENSA_UNALIGNED_USER
> bool "Unaligned memory access in use space"
> help
> diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
> index d3a0f0f..547a757 100644
> --- a/arch/xtensa/kernel/Makefile
> +++ b/arch/xtensa/kernel/Makefile
> @@ -13,6 +13,7 @@ obj-$(CONFIG_PCI) += pci.o
> obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o
> obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
> obj-$(CONFIG_SMP) += smp.o mxhead.o
> +obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o
>
> AFLAGS_head.o += -mtext-section-literals
>
> diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
> new file mode 100644
> index 0000000..b44df3c
> --- /dev/null
> +++ b/arch/xtensa/kernel/perf_event.c
> @@ -0,0 +1,450 @@
> +/*
> + * Xtensa Performance Monitor Module driver
> + * See Tensilica Debug User's Guide for PMU registers documentation.
> + *
> + * Copyright (C) 2015 Cadence Design Systems Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/interrupt.h>
> +#include <linux/irqdomain.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/perf_event.h>
> +#include <linux/platform_device.h>
> +
> +#include <asm/processor.h>
> +#include <asm/stacktrace.h>
> +
> +/* Global control/status for all perf counters */
> +#define XTENSA_PMU_PMG 0x1000
> +/* Perf counter values */
> +#define XTENSA_PMU_PM(i) (0x1080 + (i) * 4)
> +/* Perf counter control registers */
> +#define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4)
> +/* Perf counter status registers */
> +#define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4)
> +
> +#define XTENSA_PMU_PMG_PMEN 0x1
> +
> +#define XTENSA_PMU_COUNTER_MASK 0xffffffffULL
> +#define XTENSA_PMU_COUNTER_MAX 0x7fffffff
> +
> +#define XTENSA_PMU_PMCTRL_INTEN 0x00000001
> +#define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008
> +#define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0
> +#define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8
> +#define XTENSA_PMU_PMCTRL_SELECT 0x00001f00
> +#define XTENSA_PMU_PMCTRL_MASK_SHIFT 16
> +#define XTENSA_PMU_PMCTRL_MASK 0xffff0000
> +
> +#define XTENSA_PMU_MASK(select, mask) \
> + (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
> + ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
> + XTENSA_PMU_PMCTRL_TRACELEVEL | \
> + XTENSA_PMU_PMCTRL_INTEN)
> +
> +#define XTENSA_PMU_PMSTAT_OVFL 0x00000001
> +#define XTENSA_PMU_PMSTAT_INTASRT 0x00000010
> +
> +struct xtensa_pmu_events {
> + /* Array of events currently on this core */
> + struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
> + /* Bitmap of used hardware counters */
> + unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
> +};
> +static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
> +
> +static const u32 xtensa_hw_ctl[] = {
> + [PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1),
> + [PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff),
> + [PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1),
> + [PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1),
> + /* Taken and non-taken branches + taken loop ends */
> + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490),
> + /* Instruction-related + other global stall cycles */
> + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff),
> + /* Data-related global stall cycles */
> + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff),
> +};
> +
> +#define C(_x) PERF_COUNT_HW_CACHE_##_x
> +
> +static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
> + [C(L1D)] = {
> + [C(OP_READ)] = {
> + [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1),
> + [C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2),
> + },
> + [C(OP_WRITE)] = {
> + [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1),
> + [C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2),
> + },
> + },
> + [C(L1I)] = {
> + [C(OP_READ)] = {
> + [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1),
> + [C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2),
> + },
> + },
> + [C(DTLB)] = {
> + [C(OP_READ)] = {
> + [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1),
> + [C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8),
> + },
> + },
> + [C(ITLB)] = {
> + [C(OP_READ)] = {
> + [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1),
> + [C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8),
> + },
> + },
> +};
> +
> +static int xtensa_pmu_cache_event(u64 config)
> +{
> + unsigned int cache_type, cache_op, cache_result;
> + int ret;
> +
> + cache_type = (config >> 0) & 0xff;
> + cache_op = (config >> 8) & 0xff;
> + cache_result = (config >> 16) & 0xff;
> +
> + if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
> + cache_op >= C(OP_MAX) ||
> + cache_result >= C(RESULT_MAX))
> + return -EINVAL;
> +
> + ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
> +
> + if (ret == 0)
> + return -EINVAL;
> +
> + return ret;
> +}
> +
> +static inline uint32_t xtensa_pmu_read_counter(int idx)
> +{
> + return get_er(XTENSA_PMU_PM(idx));
> +}
> +
> +static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
> +{
> + set_er(v, XTENSA_PMU_PM(idx));
> +}
> +
> +static void xtensa_perf_event_update(struct perf_event *event,
> + struct hw_perf_event *hwc, int idx)
> +{
> + uint64_t prev_raw_count, new_raw_count;
> + int64_t delta;
> +
> + do {
> + prev_raw_count = local64_read(&hwc->prev_count);
> + new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
> + } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
> + new_raw_count) != prev_raw_count);
> +
> + delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
> +
> + local64_add(delta, &event->count);
> + local64_sub(delta, &hwc->period_left);
> +}
> +
> +static bool xtensa_perf_event_set_period(struct perf_event *event,
> + struct hw_perf_event *hwc, int idx)
> +{
> + bool rc = false;
> + s64 left;
> +
> + if (!is_sampling_event(event)) {
> + left = XTENSA_PMU_COUNTER_MAX;
> + } else {
> + s64 period = hwc->sample_period;
> +
> + left = local64_read(&hwc->period_left);
> + if (left <= -period) {
> + left = period;
> + local64_set(&hwc->period_left, left);
> + hwc->last_period = period;
> + rc = true;
> + } else if (left <= 0) {
> + left += period;
> + local64_set(&hwc->period_left, left);
> + hwc->last_period = period;
> + rc = true;
> + }
> + if (left > XTENSA_PMU_COUNTER_MAX)
> + left = XTENSA_PMU_COUNTER_MAX;
> + }
> +
> + local64_set(&hwc->prev_count, -left);
> + xtensa_pmu_write_counter(idx, -left);
> + perf_event_update_userpage(event);
> +
> + return rc;
> +}
> +
> +static void xtensa_pmu_enable(struct pmu *pmu)
> +{
> + set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
> +}
> +
> +static void xtensa_pmu_disable(struct pmu *pmu)
> +{
> + set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
> +}
> +
> +static int xtensa_pmu_event_init(struct perf_event *event)
> +{
> + int ret;
> +
> + switch (event->attr.type) {
> + case PERF_TYPE_HARDWARE:
> + if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
> + xtensa_hw_ctl[event->attr.config] == 0)
> + return -EINVAL;
> + event->hw.config = xtensa_hw_ctl[event->attr.config];
> + return 0;
> +
> + case PERF_TYPE_HW_CACHE:
> + ret = xtensa_pmu_cache_event(event->attr.config);
> + if (ret < 0)
> + return ret;
> + event->hw.config = ret;
> + return 0;
> +
> + case PERF_TYPE_RAW:
> + /* Not 'previous counter' select */
> + if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
> + (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
> + return -EINVAL;
> + event->hw.config = (event->attr.config &
> + (XTENSA_PMU_PMCTRL_KRNLCNT |
> + XTENSA_PMU_PMCTRL_TRACELEVEL |
> + XTENSA_PMU_PMCTRL_SELECT |
> + XTENSA_PMU_PMCTRL_MASK)) |
> + XTENSA_PMU_PMCTRL_INTEN;
> + return 0;
> +
> + default:
> + return -ENOENT;
> + }
> +}
> +
> +/*
> + * Starts/Stops a counter present on the PMU. The PMI handler
> + * should stop the counter when perf_event_overflow() returns
> + * !0. ->start() will be used to continue.
> + */
> +static void xtensa_pmu_start(struct perf_event *event, int flags)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> + int idx = hwc->idx;
> +
> + if (WARN_ON_ONCE(idx == -1))
> + return;
> +
> + if (flags & PERF_EF_RELOAD) {
> + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
> + xtensa_perf_event_set_period(event, hwc, idx);
> + }
> +
> + hwc->state = 0;
> +
> + set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
> +}
> +
> +static void xtensa_pmu_stop(struct perf_event *event, int flags)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> + int idx = hwc->idx;
> +
> + if (!(hwc->state & PERF_HES_STOPPED)) {
> + set_er(0, XTENSA_PMU_PMCTRL(idx));
> + set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
> + XTENSA_PMU_PMSTAT(idx));
> + hwc->state |= PERF_HES_STOPPED;
> + }
> +
> + if ((flags & PERF_EF_UPDATE) &&
> + !(event->hw.state & PERF_HES_UPTODATE)) {
> + xtensa_perf_event_update(event, &event->hw, idx);
> + event->hw.state |= PERF_HES_UPTODATE;
> + }
> +}
> +
> +/*
> + * Adds/Removes a counter to/from the PMU, can be done inside
> + * a transaction, see the ->*_txn() methods.
> + */
> +static int xtensa_pmu_add(struct perf_event *event, int flags)
> +{
> + struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
> + struct hw_perf_event *hwc = &event->hw;
> + int idx = hwc->idx;
> +
> + if (__test_and_set_bit(idx, ev->used_mask)) {
> + idx = find_first_zero_bit(ev->used_mask,
> + XCHAL_NUM_PERF_COUNTERS);
> + if (idx == XCHAL_NUM_PERF_COUNTERS)
> + return -EAGAIN;
> +
> + __set_bit(idx, ev->used_mask);
> + hwc->idx = idx;
> + }
> + ev->event[idx] = event;
> +
> + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
> +
> + if (flags & PERF_EF_START)
> + xtensa_pmu_start(event, PERF_EF_RELOAD);
> +
> + perf_event_update_userpage(event);
> + return 0;
> +}
> +
> +static void xtensa_pmu_del(struct perf_event *event, int flags)
> +{
> + struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
> +
> + xtensa_pmu_stop(event, PERF_EF_UPDATE);
> + __clear_bit(event->hw.idx, ev->used_mask);
> + perf_event_update_userpage(event);
> +}
> +
> +static void xtensa_pmu_read(struct perf_event *event)
> +{
> + xtensa_perf_event_update(event, &event->hw, event->hw.idx);
> +}
> +
> +static int callchain_trace(struct stackframe *frame, void *data)
> +{
> + struct perf_callchain_entry *entry = data;
> +
> + perf_callchain_store(entry, frame->pc);
> + return 0;
> +}
> +
> +void perf_callchain_kernel(struct perf_callchain_entry *entry,
> + struct pt_regs *regs)
> +{
> + xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
> + callchain_trace, NULL, entry);
> +}
> +
> +void perf_callchain_user(struct perf_callchain_entry *entry,
> + struct pt_regs *regs)
> +{
> + xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
> + callchain_trace, entry);
> +}
> +
> +void perf_event_print_debug(void)
> +{
> + unsigned long flags;
> + unsigned i;
> +
> + local_irq_save(flags);
> + pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
> + get_er(XTENSA_PMU_PMG));
> + for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
> + pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
> + i, get_er(XTENSA_PMU_PM(i)),
> + i, get_er(XTENSA_PMU_PMCTRL(i)),
> + i, get_er(XTENSA_PMU_PMSTAT(i)));
> + local_irq_restore(flags);
> +}
> +
> +static irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
> +{
> + irqreturn_t rc = IRQ_NONE;
> + struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
> + unsigned i;
> +
> + for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS);
> + i < XCHAL_NUM_PERF_COUNTERS;
> + i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) {
> + uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
> + struct perf_event *event = ev->event[i];
> + struct hw_perf_event *hwc = &event->hw;
> + u64 last_period;
> +
> + if (!(v & XTENSA_PMU_PMSTAT_OVFL))
> + continue;
> +
> + set_er(v, XTENSA_PMU_PMSTAT(i));
> + xtensa_perf_event_update(event, hwc, i);
> + last_period = hwc->last_period;
> + if (xtensa_perf_event_set_period(event, hwc, i)) {
> + struct perf_sample_data data;
> + struct pt_regs *regs = get_irq_regs();
> +
> + perf_sample_data_init(&data, 0, last_period);
> + if (perf_event_overflow(event, &data, regs))
> + xtensa_pmu_stop(event, 0);
> + }
> +
> + rc = IRQ_HANDLED;
> + }
> + return rc;
> +}
> +
> +static struct pmu xtensa_pmu = {
> + .pmu_enable = xtensa_pmu_enable,
> + .pmu_disable = xtensa_pmu_disable,
> + .event_init = xtensa_pmu_event_init,
> + .add = xtensa_pmu_add,
> + .del = xtensa_pmu_del,
> + .start = xtensa_pmu_start,
> + .stop = xtensa_pmu_stop,
> + .read = xtensa_pmu_read,
> +};
> +
> +static void xtensa_pmu_setup(void)
> +{
> + unsigned i;
> +
> + set_er(0, XTENSA_PMU_PMG);
> + for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
> + set_er(0, XTENSA_PMU_PMCTRL(i));
> + set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
> + }
> +}
> +
> +static int xtensa_pmu_notifier(struct notifier_block *self,
> + unsigned long action, void *data)
> +{
> + switch (action & ~CPU_TASKS_FROZEN) {
> + case CPU_STARTING:
> + xtensa_pmu_setup();
> + break;
> +
> + default:
> + break;
> + }
> +
> + return NOTIFY_OK;
> +}
> +
> +static int __init xtensa_pmu_init(void)
> +{
> + int ret;
> + int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
> +
> + perf_cpu_notifier(xtensa_pmu_notifier);
> + ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
> + "pmu", NULL);
> + if (ret < 0)
> + return ret;
> +
> + ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
> + if (ret)
> + free_irq(irq, NULL);
> +
> + return ret;
> +}
> +early_initcall(xtensa_pmu_init);
> --
> 1.8.1.4
Commit-ID: 74d4582f430a797564f92fbff0bd3a21945528b7
Gitweb: http://git.kernel.org/tip/74d4582f430a797564f92fbff0bd3a21945528b7
Author: Max Filippov <[email protected]>
AuthorDate: Sat, 18 Jul 2015 11:30:11 +0300
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Thu, 6 Aug 2015 16:45:05 -0300
perf tools xtensa: Add DWARF register names
Signed-off-by: Max Filippov <[email protected]>
Cc: Chris Zankel <[email protected]>
Cc: Marc Gauthier <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/arch/{powerpc => xtensa}/Build | 0
tools/perf/arch/{arm => xtensa}/Makefile | 0
tools/perf/arch/{sh => xtensa}/util/Build | 0
tools/perf/arch/xtensa/util/dwarf-regs.c | 25 +++++++++++++++++++++++++
4 files changed, 25 insertions(+)
diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/xtensa/Build
similarity index 100%
copy from tools/perf/arch/powerpc/Build
copy to tools/perf/arch/xtensa/Build
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/xtensa/Makefile
similarity index 100%
copy from tools/perf/arch/arm/Makefile
copy to tools/perf/arch/xtensa/Makefile
diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/xtensa/util/Build
similarity index 100%
copy from tools/perf/arch/sh/util/Build
copy to tools/perf/arch/xtensa/util/Build
diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c
new file mode 100644
index 0000000..4dba76b
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/dwarf-regs.c
@@ -0,0 +1,25 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (c) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define XTENSA_MAX_REGS 16
+
+const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
+ "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+ "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+ return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
+}
On Thu, Aug 06, 2015 at 04:46:32PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Sat, Jul 18, 2015 at 11:30:10AM +0300, Max Filippov escreveu:
> > Xtensa Performance Monitor Module has up to 8 32 bit wide performance
> > counters. Each counter may be enabled independently and can count any
> > single type of hardware performance events. Event counting may be enabled
> > and disabled globally (per PMM).
> > Each counter has status register with bits indicating if the counter has
> > been overflown and may be programmed to raise profiling IRQ on overflow.
> > This IRQ is used to rewind counters and allow for counting more than 2^32
> > samples for counting events and to report samples for sampling events.
> >
> > For more details see Tensilica Debug User's Guide, chapter 8
> > "Performance monitor module".
>
> Has this gone via PeterZ? I added the tools/ bits in my perf/core
> branch, will go in next pull req,
I was thinking it would go through the xtensa tree. Looks fine at a
quick glance and they can actually test it.
On Fri, Aug 7, 2015 at 12:13 PM, Peter Zijlstra <[email protected]> wrote:
> On Thu, Aug 06, 2015 at 04:46:32PM -0300, Arnaldo Carvalho de Melo wrote:
>> Em Sat, Jul 18, 2015 at 11:30:10AM +0300, Max Filippov escreveu:
>> > Xtensa Performance Monitor Module has up to 8 32 bit wide performance
>> > counters. Each counter may be enabled independently and can count any
>> > single type of hardware performance events. Event counting may be enabled
>> > and disabled globally (per PMM).
>> > Each counter has status register with bits indicating if the counter has
>> > been overflown and may be programmed to raise profiling IRQ on overflow.
>> > This IRQ is used to rewind counters and allow for counting more than 2^32
>> > samples for counting events and to report samples for sampling events.
>> >
>> > For more details see Tensilica Debug User's Guide, chapter 8
>> > "Performance monitor module".
>>
>> Has this gone via PeterZ? I added the tools/ bits in my perf/core
>> branch, will go in next pull req,
>
> I was thinking it would go through the xtensa tree. Looks fine at a
> quick glance and they can actually test it.
Right, had the same idea.
--
Thanks.
-- Max
Em Fri, Aug 07, 2015 at 03:03:02PM +0300, Max Filippov escreveu:
> On Fri, Aug 7, 2015 at 12:13 PM, Peter Zijlstra <[email protected]> wrote:
> > On Thu, Aug 06, 2015 at 04:46:32PM -0300, Arnaldo Carvalho de Melo wrote:
> >> Em Sat, Jul 18, 2015 at 11:30:10AM +0300, Max Filippov escreveu:
> >> > Xtensa Performance Monitor Module has up to 8 32 bit wide performance
> >> > counters. Each counter may be enabled independently and can count any
> >> Has this gone via PeterZ? I added the tools/ bits in my perf/core
> >> branch, will go in next pull req,
> > I was thinking it would go through the xtensa tree. Looks fine at a
> > quick glance and they can actually test it.
> Right, had the same idea.
Ok, when you do that, i.e. get the kernel bits in, the tooling part will
be there, as it was already pulled by Ingo, i.e. its in tip.git and thus
should hit 4.3.
- ARnaldo