Add two more sw events that are common to many cpus.
Alignment faults: When a load or store is not aligned properly.
Emulation faults: When an instruction is emulated in software.
Both cause a very significant slowdown (100x or worse), so identifying and
fixing them is very important.
Signed-off-by: Anton Blanchard <[email protected]>
---
Index: linux.trees.git/include/linux/perf_event.h
===================================================================
--- linux.trees.git.orig/include/linux/perf_event.h 2009-10-16 11:17:17.000000000 +1100
+++ linux.trees.git/include/linux/perf_event.h 2009-10-16 11:17:29.000000000 +1100
@@ -102,6 +102,8 @@ enum perf_sw_ids {
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
+ PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
+ PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_MAX, /* non-ABI */
};
Index: linux.trees.git/include/linux/perf_counter.h
===================================================================
--- linux.trees.git.orig/include/linux/perf_counter.h 2009-10-16 11:16:10.000000000 +1100
+++ linux.trees.git/include/linux/perf_counter.h 2009-10-16 11:17:29.000000000 +1100
@@ -106,6 +106,8 @@ enum perf_sw_ids {
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
+ PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
+ PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_MAX, /* non-ABI */
};
Index: linux.trees.git/kernel/perf_event.c
===================================================================
--- linux.trees.git.orig/kernel/perf_event.c 2009-10-16 11:17:17.000000000 +1100
+++ linux.trees.git/kernel/perf_event.c 2009-10-16 11:17:29.000000000 +1100
@@ -4255,6 +4255,8 @@ static const struct pmu *sw_perf_event_i
case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
case PERF_COUNT_SW_CONTEXT_SWITCHES:
case PERF_COUNT_SW_CPU_MIGRATIONS:
+ case PERF_COUNT_SW_ALIGNMENT_FAULTS:
+ case PERF_COUNT_SW_EMULATION_FAULTS:
if (!event->parent) {
atomic_inc(&perf_swevent_enabled[event_id]);
event->destroy = sw_perf_event_destroy;
Index: linux.trees.git/tools/perf/design.txt
===================================================================
--- linux.trees.git.orig/tools/perf/design.txt 2009-10-16 11:16:10.000000000 +1100
+++ linux.trees.git/tools/perf/design.txt 2009-10-16 11:17:29.000000000 +1100
@@ -137,6 +137,8 @@ enum sw_event_ids {
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
+ PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
+ PERF_COUNT_SW_EMULATION_FAULTS = 8,
};
Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
Index: linux.trees.git/tools/perf/util/parse-events.c
===================================================================
--- linux.trees.git.orig/tools/perf/util/parse-events.c 2009-10-16 11:16:10.000000000 +1100
+++ linux.trees.git/tools/perf/util/parse-events.c 2009-10-16 11:17:29.000000000 +1100
@@ -47,6 +47,8 @@ static struct event_symbol event_symbols
{ CSW(PAGE_FAULTS_MAJ), "major-faults", "" },
{ CSW(CONTEXT_SWITCHES), "context-switches", "cs" },
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
+ { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" },
+ { CSW(EMULATION_FAULTS), "emulation-faults", "" },
};
#define __PERF_EVENT_FIELD(config, name) \
@@ -75,6 +77,8 @@ static const char *sw_event_names[] = {
"CPU-migrations",
"minor-faults",
"major-faults",
+ "alignment-faults",
+ "emulation-faults",
};
#define MAX_ALIASES 8
perf_event wants a separate event for alignment and emulation faults,
so create another PPC_WARN_* define. This will make it easy to hook in
perf_event at one spot.
We pass in regs which will be required for these events.
Signed-off-by: Anton Blanchard <[email protected]>
---
Index: linux.trees.git/arch/powerpc/include/asm/emulated_ops.h
===================================================================
--- linux.trees.git.orig/arch/powerpc/include/asm/emulated_ops.h 2009-09-01 15:11:03.000000000 +1000
+++ linux.trees.git/arch/powerpc/include/asm/emulated_ops.h 2009-09-14 09:47:23.000000000 +1000
@@ -57,7 +57,7 @@ extern u32 ppc_warn_emulated;
extern void ppc_warn_emulated_print(const char *type);
-#define PPC_WARN_EMULATED(type) \
+#define __PPC_WARN_EMULATED(type) \
do { \
atomic_inc(&ppc_emulated.type.val); \
if (ppc_warn_emulated) \
@@ -66,8 +66,12 @@ extern void ppc_warn_emulated_print(cons
#else /* !CONFIG_PPC_EMULATED_STATS */
-#define PPC_WARN_EMULATED(type) do { } while (0)
+#define __PPC_WARN_EMULATED(type) do { } while (0)
+#define __PPC_WARN_ALIGNMENT(type) do { } while (0)
#endif /* !CONFIG_PPC_EMULATED_STATS */
+#define PPC_WARN_EMULATED(type, regs) __PPC_WARN_EMULATED(type)
+#define PPC_WARN_ALIGNMENT(type, regs) __PPC_WARN_EMULATED(type)
+
#endif /* _ASM_POWERPC_EMULATED_OPS_H */
Index: linux.trees.git/arch/powerpc/kernel/align.c
===================================================================
--- linux.trees.git.orig/arch/powerpc/kernel/align.c 2009-09-01 15:11:03.000000000 +1000
+++ linux.trees.git/arch/powerpc/kernel/align.c 2009-09-14 09:47:23.000000000 +1000
@@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs)
#ifdef CONFIG_SPE
if ((instr >> 26) == 0x4) {
- PPC_WARN_EMULATED(spe);
+ PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
}
#endif
@@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs)
flags |= SPLT;
nb = 8;
}
- PPC_WARN_EMULATED(vsx);
+ PPC_WARN_ALIGNMENT(vsx, regs);
return emulate_vsx(addr, reg, areg, regs, flags, nb);
}
#endif
@@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs)
* the exception of DCBZ which is handled as a special case here
*/
if (instr == DCBZ) {
- PPC_WARN_EMULATED(dcbz);
+ PPC_WARN_ALIGNMENT(dcbz, regs);
return emulate_dcbz(regs, addr);
}
if (unlikely(nb == 0))
@@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs)
* function
*/
if (flags & M) {
- PPC_WARN_EMULATED(multiple);
+ PPC_WARN_ALIGNMENT(multiple, regs);
return emulate_multiple(regs, addr, reg, nb,
flags, instr, swiz);
}
@@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs)
/* Special case for 16-byte FP loads and stores */
if (nb == 16) {
- PPC_WARN_EMULATED(fp_pair);
+ PPC_WARN_ALIGNMENT(fp_pair, regs);
return emulate_fp_pair(addr, reg, flags);
}
- PPC_WARN_EMULATED(unaligned);
+ PPC_WARN_ALIGNMENT(unaligned, regs);
/* If we are loading, get the data from user space, else
* get it from register values
Index: linux.trees.git/arch/powerpc/kernel/traps.c
===================================================================
--- linux.trees.git.orig/arch/powerpc/kernel/traps.c 2009-09-01 15:11:03.000000000 +1000
+++ linux.trees.git/arch/powerpc/kernel/traps.c 2009-09-14 09:47:23.000000000 +1000
@@ -759,7 +759,7 @@ static int emulate_instruction(struct pt
/* Emulate the mfspr rD, PVR. */
if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
- PPC_WARN_EMULATED(mfpvr);
+ PPC_WARN_EMULATED(mfpvr, regs);
rd = (instword >> 21) & 0x1f;
regs->gpr[rd] = mfspr(SPRN_PVR);
return 0;
@@ -767,7 +767,7 @@ static int emulate_instruction(struct pt
/* Emulating the dcba insn is just a no-op. */
if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
- PPC_WARN_EMULATED(dcba);
+ PPC_WARN_EMULATED(dcba, regs);
return 0;
}
@@ -776,7 +776,7 @@ static int emulate_instruction(struct pt
int shift = (instword >> 21) & 0x1c;
unsigned long msk = 0xf0000000UL >> shift;
- PPC_WARN_EMULATED(mcrxr);
+ PPC_WARN_EMULATED(mcrxr, regs);
regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
regs->xer &= ~0xf0000000UL;
return 0;
@@ -784,19 +784,19 @@ static int emulate_instruction(struct pt
/* Emulate load/store string insn. */
if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
- PPC_WARN_EMULATED(string);
+ PPC_WARN_EMULATED(string, regs);
return emulate_string_inst(regs, instword);
}
/* Emulate the popcntb (Population Count Bytes) instruction. */
if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
- PPC_WARN_EMULATED(popcntb);
+ PPC_WARN_EMULATED(popcntb, regs);
return emulate_popcntb_inst(regs, instword);
}
/* Emulate isel (Integer Select) instruction */
if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
- PPC_WARN_EMULATED(isel);
+ PPC_WARN_EMULATED(isel, regs);
return emulate_isel(regs, instword);
}
@@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *r
#ifdef CONFIG_MATH_EMULATION
errcode = do_mathemu(regs);
if (errcode >= 0)
- PPC_WARN_EMULATED(math);
+ PPC_WARN_EMULATED(math, regs);
switch (errcode) {
case 0:
@@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *r
#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
errcode = Soft_emulate_8xx(regs);
if (errcode >= 0)
- PPC_WARN_EMULATED(8xx);
+ PPC_WARN_EMULATED(8xx, regs);
switch (errcode) {
case 0:
@@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_
flush_altivec_to_thread(current);
- PPC_WARN_EMULATED(altivec);
+ PPC_WARN_EMULATED(altivec, regs);
err = emulate_altivec(regs);
if (err == 0) {
regs->nip += 4; /* skip emulated instruction */
Hook up the alignment-faults and emulation-faults events for powerpc.
Signed-off-by: Anton Blanchard <[email protected]>
---
Index: linux.trees.git/arch/powerpc/include/asm/emulated_ops.h
===================================================================
--- linux.trees.git.orig/arch/powerpc/include/asm/emulated_ops.h 2009-09-22 13:45:07.000000000 +1000
+++ linux.trees.git/arch/powerpc/include/asm/emulated_ops.h 2009-09-22 13:45:27.000000000 +1000
@@ -19,6 +19,7 @@
#define _ASM_POWERPC_EMULATED_OPS_H
#include <asm/atomic.h>
+#include <linux/perf_event.h>
#ifdef CONFIG_PPC_EMULATED_STATS
@@ -71,7 +72,18 @@ extern void ppc_warn_emulated_print(cons
#endif /* !CONFIG_PPC_EMULATED_STATS */
-#define PPC_WARN_EMULATED(type, regs) __PPC_WARN_EMULATED(type)
-#define PPC_WARN_ALIGNMENT(type, regs) __PPC_WARN_EMULATED(type)
+#define PPC_WARN_EMULATED(type, regs) \
+ do { \
+ perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \
+ 1, 0, regs, 0); \
+ __PPC_WARN_EMULATED(type); \
+ } while (0)
+
+#define PPC_WARN_ALIGNMENT(type, regs) \
+ do { \
+ perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \
+ 1, 0, regs, regs->dar); \
+ __PPC_WARN_EMULATED(type); \
+ } while (0)
#endif /* _ASM_POWERPC_EMULATED_OPS_H */
Hi Anton,
On Sun, 18 Oct 2009 22:11:26 +1100 Anton Blanchard <[email protected]> wrote:
>
> @@ -66,8 +66,12 @@ extern void ppc_warn_emulated_print(cons
>
> #else /* !CONFIG_PPC_EMULATED_STATS */
>
> -#define PPC_WARN_EMULATED(type) do { } while (0)
> +#define __PPC_WARN_EMULATED(type) do { } while (0)
> +#define __PPC_WARN_ALIGNMENT(type) do { } while (0)
I think this last line is not needed?
--
Cheers,
Stephen Rothwell [email protected]
http://www.canb.auug.org.au/~sfr/
On Sun, 2009-10-18 at 22:13 +1100, Anton Blanchard wrote:
> Hook up the alignment-faults and emulation-faults events for powerpc.
>
> Signed-off-by: Anton Blanchard <[email protected]>
> ---
>
> Index: linux.trees.git/arch/powerpc/include/asm/emulated_ops.h
> ===================================================================
> --- linux.trees.git.orig/arch/powerpc/include/asm/emulated_ops.h 2009-09-22 13:45:07.000000000 +1000
> +++ linux.trees.git/arch/powerpc/include/asm/emulated_ops.h 2009-09-22 13:45:27.000000000 +1000
> @@ -19,6 +19,7 @@
> #define _ASM_POWERPC_EMULATED_OPS_H
>
> #include <asm/atomic.h>
> +#include <linux/perf_event.h>
>
>
> #ifdef CONFIG_PPC_EMULATED_STATS
> @@ -71,7 +72,18 @@ extern void ppc_warn_emulated_print(cons
>
> #endif /* !CONFIG_PPC_EMULATED_STATS */
>
> -#define PPC_WARN_EMULATED(type, regs) __PPC_WARN_EMULATED(type)
> -#define PPC_WARN_ALIGNMENT(type, regs) __PPC_WARN_EMULATED(type)
> +#define PPC_WARN_EMULATED(type, regs) \
> + do { \
> + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \
> + 1, 0, regs, 0); \
> + __PPC_WARN_EMULATED(type); \
> + } while (0)
> +
> +#define PPC_WARN_ALIGNMENT(type, regs) \
> + do { \
> + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \
> + 1, 0, regs, regs->dar); \
> + __PPC_WARN_EMULATED(type); \
> + } while (0)
Does that work with perfxxx configured off?
cheers
Hi,
> > +#define PPC_WARN_ALIGNMENT(type, regs) \
> > + do { \
> > + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \
> > + 1, 0, regs, regs->dar); \
> > + __PPC_WARN_EMULATED(type); \
> > + } while (0)
>
> Does that work with perfxxx configured off?
Yeah, perf_event.h defines an empty version if its configured off:
static inline void
perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr) { }
Anton
* Anton Blanchard <[email protected]> wrote:
> Hook up the alignment-faults and emulation-faults events for powerpc.
>
> Signed-off-by: Anton Blanchard <[email protected]>
nice.
The first patch is for perf events - it would be nice if we could do the
two PowerPC changes via the perf events tree - that would speed up the
upstream availability of this new feature. Ben, what do you think?
Ingo
Hi Stephen,
> I think this last line is not needed?
Right you are!
Anton
--
perf_event wants a separate event for alignment and emulation faults,
so create another emulation event. This will make it easy to hook in
perf_event at one spot.
We pass in regs which will be required for these events.
Signed-off-by: Anton Blanchard <[email protected]>
---
Updated to remove unused define, as suggested by Stephen.
Index: linux.trees.git/arch/powerpc/include/asm/emulated_ops.h
===================================================================
--- linux.trees.git.orig/arch/powerpc/include/asm/emulated_ops.h 2009-10-19 09:46:58.000000000 +1100
+++ linux.trees.git/arch/powerpc/include/asm/emulated_ops.h 2009-10-21 14:57:00.000000000 +1100
@@ -57,7 +57,7 @@ extern u32 ppc_warn_emulated;
extern void ppc_warn_emulated_print(const char *type);
-#define PPC_WARN_EMULATED(type) \
+#define __PPC_WARN_EMULATED(type) \
do { \
atomic_inc(&ppc_emulated.type.val); \
if (ppc_warn_emulated) \
@@ -66,8 +66,11 @@ extern void ppc_warn_emulated_print(cons
#else /* !CONFIG_PPC_EMULATED_STATS */
-#define PPC_WARN_EMULATED(type) do { } while (0)
+#define __PPC_WARN_EMULATED(type) do { } while (0)
#endif /* !CONFIG_PPC_EMULATED_STATS */
+#define PPC_WARN_EMULATED(type, regs) __PPC_WARN_EMULATED(type)
+#define PPC_WARN_ALIGNMENT(type, regs) __PPC_WARN_EMULATED(type)
+
#endif /* _ASM_POWERPC_EMULATED_OPS_H */
Index: linux.trees.git/arch/powerpc/kernel/align.c
===================================================================
--- linux.trees.git.orig/arch/powerpc/kernel/align.c 2009-10-19 09:46:58.000000000 +1100
+++ linux.trees.git/arch/powerpc/kernel/align.c 2009-10-21 10:21:35.000000000 +1100
@@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs)
#ifdef CONFIG_SPE
if ((instr >> 26) == 0x4) {
- PPC_WARN_EMULATED(spe);
+ PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
}
#endif
@@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs)
flags |= SPLT;
nb = 8;
}
- PPC_WARN_EMULATED(vsx);
+ PPC_WARN_ALIGNMENT(vsx, regs);
return emulate_vsx(addr, reg, areg, regs, flags, nb);
}
#endif
@@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs)
* the exception of DCBZ which is handled as a special case here
*/
if (instr == DCBZ) {
- PPC_WARN_EMULATED(dcbz);
+ PPC_WARN_ALIGNMENT(dcbz, regs);
return emulate_dcbz(regs, addr);
}
if (unlikely(nb == 0))
@@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs)
* function
*/
if (flags & M) {
- PPC_WARN_EMULATED(multiple);
+ PPC_WARN_ALIGNMENT(multiple, regs);
return emulate_multiple(regs, addr, reg, nb,
flags, instr, swiz);
}
@@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs)
/* Special case for 16-byte FP loads and stores */
if (nb == 16) {
- PPC_WARN_EMULATED(fp_pair);
+ PPC_WARN_ALIGNMENT(fp_pair, regs);
return emulate_fp_pair(addr, reg, flags);
}
- PPC_WARN_EMULATED(unaligned);
+ PPC_WARN_ALIGNMENT(unaligned, regs);
/* If we are loading, get the data from user space, else
* get it from register values
Index: linux.trees.git/arch/powerpc/kernel/traps.c
===================================================================
--- linux.trees.git.orig/arch/powerpc/kernel/traps.c 2009-10-19 09:46:58.000000000 +1100
+++ linux.trees.git/arch/powerpc/kernel/traps.c 2009-10-21 10:21:35.000000000 +1100
@@ -759,7 +759,7 @@ static int emulate_instruction(struct pt
/* Emulate the mfspr rD, PVR. */
if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
- PPC_WARN_EMULATED(mfpvr);
+ PPC_WARN_EMULATED(mfpvr, regs);
rd = (instword >> 21) & 0x1f;
regs->gpr[rd] = mfspr(SPRN_PVR);
return 0;
@@ -767,7 +767,7 @@ static int emulate_instruction(struct pt
/* Emulating the dcba insn is just a no-op. */
if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
- PPC_WARN_EMULATED(dcba);
+ PPC_WARN_EMULATED(dcba, regs);
return 0;
}
@@ -776,7 +776,7 @@ static int emulate_instruction(struct pt
int shift = (instword >> 21) & 0x1c;
unsigned long msk = 0xf0000000UL >> shift;
- PPC_WARN_EMULATED(mcrxr);
+ PPC_WARN_EMULATED(mcrxr, regs);
regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
regs->xer &= ~0xf0000000UL;
return 0;
@@ -784,19 +784,19 @@ static int emulate_instruction(struct pt
/* Emulate load/store string insn. */
if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
- PPC_WARN_EMULATED(string);
+ PPC_WARN_EMULATED(string, regs);
return emulate_string_inst(regs, instword);
}
/* Emulate the popcntb (Population Count Bytes) instruction. */
if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
- PPC_WARN_EMULATED(popcntb);
+ PPC_WARN_EMULATED(popcntb, regs);
return emulate_popcntb_inst(regs, instword);
}
/* Emulate isel (Integer Select) instruction */
if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
- PPC_WARN_EMULATED(isel);
+ PPC_WARN_EMULATED(isel, regs);
return emulate_isel(regs, instword);
}
@@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *r
#ifdef CONFIG_MATH_EMULATION
errcode = do_mathemu(regs);
if (errcode >= 0)
- PPC_WARN_EMULATED(math);
+ PPC_WARN_EMULATED(math, regs);
switch (errcode) {
case 0:
@@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *r
#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
errcode = Soft_emulate_8xx(regs);
if (errcode >= 0)
- PPC_WARN_EMULATED(8xx);
+ PPC_WARN_EMULATED(8xx, regs);
switch (errcode) {
case 0:
@@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_
flush_altivec_to_thread(current);
- PPC_WARN_EMULATED(altivec);
+ PPC_WARN_EMULATED(altivec, regs);
err = emulate_altivec(regs);
if (err == 0) {
regs->nip += 4; /* skip emulated instruction */