From: Andi Kleen <[email protected]>
The basic idea is that it does not make sense to list all PEBS
events individually. The list is very long, sometimes outdated
and the hardware doesn't need it. If an event does not support
PEBS it will just not count, there is no security issue.
This vastly simplifies the PEBS event selection. It also
speeds up the scheduling because the scheduler doesn't
have to walk as many constraints.
Bugs fixed:
- We do not allow setting forbidden flags with PEBS anymore
(SDM 18.9.4), except for the special cycle event.
This is done using a new constraint macro that also
matches on the event flags.
- We now allow DataLA on all Haswell events, not just
a small subset. In general all PEBS events that tag memory
accesses support DataLA on Haswell. Otherwise the reported
address is just zero. This allows address profiling
on vastly more events.
- We did not allow all PEBS events on Haswell:
We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)
This includes the changes proposed by Stephane earlier and obsoletes
his patchkit (except for some changes on pre Sandy Bridge/Silvermont
CPUs)
I only did Sandy Bridge and Silvermont and later so far, mostly because these
are the parts I could directly confirm the hardware behavior with hardware
architects. Also I do not believe the older CPUs have any
missing events in their PEBS list, so there's no pressing
need to change them.
I did not implement the flag proposed by Peter to allow
setting forbidden flags. If really needed this could
be implemented on to of this patch.
Cc: [email protected]
v2: Fix broken store events on SNB/IVB (Stephane Eranian)
Update description.
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/include/asm/perf_event.h | 8 +++
arch/x86/kernel/cpu/perf_event.h | 18 +++++--
arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++++++------------------------
3 files changed, 39 insertions(+), 75 deletions(-)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8249df4..8dfc9fd 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -51,6 +51,14 @@
ARCH_PERFMON_EVENTSEL_EDGE | \
ARCH_PERFMON_EVENTSEL_INV | \
ARCH_PERFMON_EVENTSEL_CMASK)
+#define X86_ALL_EVENT_FLAGS \
+ (ARCH_PERFMON_EVENTSEL_EDGE | \
+ ARCH_PERFMON_EVENTSEL_INV | \
+ ARCH_PERFMON_EVENTSEL_CMASK | \
+ ARCH_PERFMON_EVENTSEL_ANY | \
+ ARCH_PERFMON_EVENTSEL_PIN_CONTROL | \
+ HSW_IN_TX | \
+ HSW_IN_TX_CHECKPOINTED)
#define AMD64_RAW_EVENT_MASK \
(X86_RAW_EVENT_MASK | \
AMD64_EVENTSEL_EVENT)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a22a34e9..70273e8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -262,16 +262,24 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
#define INTEL_PLD_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
#define INTEL_PST_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
-/* DataLA version of store sampling without extra enable bit. */
-#define INTEL_PST_HSW_CONSTRAINT(c, n) \
- __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(flags, n) \
+ EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS)
+
+/* Same as above, but enable DataLA */
+#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \
+ __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970c..0e22ce6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
};
struct event_constraint intel_slm_pebs_event_constraints[] = {
- INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
- INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
- INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
- INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
- INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
- INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
- INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
- INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
- INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
- INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
- INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
- INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0xffff, 0x1),
EVENT_CONSTRAINT_END
};
@@ -624,68 +606,34 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
- INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0xffff, 0xf),
EVENT_CONSTRAINT_END
};
struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
- INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
- INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0xffff, 0xf),
EVENT_CONSTRAINT_END
};
struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
- INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
- INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
- INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
- INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
- INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
- /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
- /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
- INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
- /* MEM_UOPS_RETIRED.SPLIT_STORES */
- INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
- INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
- INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
- INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
- INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
- /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
- INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
- /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
- INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
- /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
- INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
- /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
- INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
- INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
-
+ INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
+ /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* Allow all events as PEBS with no flags */
+ /* We allow DATALA for all PEBS events, will be 0 if not supported */
+ INTEL_ALL_EVENT_CONSTRAINT_DATALA(0, 0xf),
EVENT_CONSTRAINT_END
};
--
1.9.3
From: Andi Kleen <[email protected]>
Haswell supports reporting the data address for a range
of PEBS events, including:
UOPS_RETIRED.ALL
MEM_UOPS_RETIRED.STLB_MISS_LOADS
MEM_UOPS_RETIRED.STLB_MISS_STORES
MEM_UOPS_RETIRED.LOCK_LOADS
MEM_UOPS_RETIRED.SPLIT_LOADS
MEM_UOPS_RETIRED.SPLIT_STORES
MEM_UOPS_RETIRED.ALL_LOADS
MEM_UOPS_RETIRED.ALL_STORES
MEM_LOAD_UOPS_RETIRED.L1_HIT
MEM_LOAD_UOPS_RETIRED.L2_HIT
MEM_LOAD_UOPS_RETIRED.L3_HIT
MEM_LOAD_UOPS_RETIRED.L1_MISS
MEM_LOAD_UOPS_RETIRED.L2_MISS
MEM_LOAD_UOPS_RETIRED.L3_MISS
MEM_LOAD_UOPS_RETIRED.HIT_LFB
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE
MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM
This facility was already enabled earlier with the original Haswell
perf changes.
However these addresses were always reports as stores by perf, which is wrong,
as they could be loads too. The hardware does not distinguish loads and stores
for these instructions, so there's no (cheap) way for the profiler
to find out.
Change the type to PERF_MEM_OP_NA instead.
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 0e22ce6..c5d83ab 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -114,7 +114,7 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
dse.val = 0;
- dse.mem_op = PERF_MEM_OP_STORE;
+ dse.mem_op = PERF_MEM_OP_NA;
dse.mem_lvl = PERF_MEM_LVL_NA;
/*
--
1.9.3
On Tue, Jul 8, 2014 at 12:37 AM, Andi Kleen <[email protected]> wrote:
> From: Andi Kleen <[email protected]>
>
> The basic idea is that it does not make sense to list all PEBS
> events individually. The list is very long, sometimes outdated
> and the hardware doesn't need it. If an event does not support
> PEBS it will just not count, there is no security issue.
>
> This vastly simplifies the PEBS event selection. It also
> speeds up the scheduling because the scheduler doesn't
> have to walk as many constraints.
>
> Bugs fixed:
> - We do not allow setting forbidden flags with PEBS anymore
> (SDM 18.9.4), except for the special cycle event.
> This is done using a new constraint macro that also
> matches on the event flags.
> - We now allow DataLA on all Haswell events, not just
> a small subset. In general all PEBS events that tag memory
> accesses support DataLA on Haswell. Otherwise the reported
> address is just zero. This allows address profiling
> on vastly more events.
> - We did not allow all PEBS events on Haswell:
> We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
> MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)
>
> This includes the changes proposed by Stephane earlier and obsoletes
> his patchkit (except for some changes on pre Sandy Bridge/Silvermont
> CPUs)
>
> I only did Sandy Bridge and Silvermont and later so far, mostly because these
> are the parts I could directly confirm the hardware behavior with hardware
> architects. Also I do not believe the older CPUs have any
> missing events in their PEBS list, so there's no pressing
> need to change them.
>
> I did not implement the flag proposed by Peter to allow
> setting forbidden flags. If really needed this could
> be implemented on to of this patch.
>
> Cc: [email protected]
> v2: Fix broken store events on SNB/IVB (Stephane Eranian)
> Update description.
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> arch/x86/include/asm/perf_event.h | 8 +++
> arch/x86/kernel/cpu/perf_event.h | 18 +++++--
> arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++++++------------------------
> 3 files changed, 39 insertions(+), 75 deletions(-)
>
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index 8249df4..8dfc9fd 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -51,6 +51,14 @@
> ARCH_PERFMON_EVENTSEL_EDGE | \
> ARCH_PERFMON_EVENTSEL_INV | \
> ARCH_PERFMON_EVENTSEL_CMASK)
> +#define X86_ALL_EVENT_FLAGS \
> + (ARCH_PERFMON_EVENTSEL_EDGE | \
> + ARCH_PERFMON_EVENTSEL_INV | \
> + ARCH_PERFMON_EVENTSEL_CMASK | \
> + ARCH_PERFMON_EVENTSEL_ANY | \
> + ARCH_PERFMON_EVENTSEL_PIN_CONTROL | \
> + HSW_IN_TX | \
> + HSW_IN_TX_CHECKPOINTED)
> #define AMD64_RAW_EVENT_MASK \
> (X86_RAW_EVENT_MASK | \
> AMD64_EVENTSEL_EVENT)
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index a22a34e9..70273e8 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -262,16 +262,24 @@ struct cpu_hw_events {
> EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
> #define INTEL_PLD_CONSTRAINT(c, n) \
> - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
> HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
>
> #define INTEL_PST_CONSTRAINT(c, n) \
> - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
> HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
>
> -/* DataLA version of store sampling without extra enable bit. */
> -#define INTEL_PST_HSW_CONSTRAINT(c, n) \
> - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> +/* Event constraint, but match on all event flags too. */
> +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
> +
> +/* Check only flags, but allow all event/umask */
> +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n) \
> + EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS)
> +
The first argument is not flags but rather 'code'. This is confusing
otherwise.
> +/* Same as above, but enable DataLA */
> +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \
> + __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \
> HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
>
> /*
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 980970c..0e22ce6 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
> };
>
> struct event_constraint intel_slm_pebs_event_constraints[] = {
> - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
> - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
> - INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
> - INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
> - INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
> - INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
> - INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
> - INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
> - INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
> - INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
> - INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
> - INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
> - INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
> - INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
> - INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
> - INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
> - INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
> - INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
> - INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
> - INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
> - INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
> - INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
> + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
> + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
> + /* Allow all events as PEBS with no flags */
> + INTEL_ALL_EVENT_CONSTRAINT(0xffff, 0x1),
^^^^^ that does not work!
As I said before, you need to pass 0x0 here to catch all non-pebs events.
> EVENT_CONSTRAINT_END
> };
>
> @@ -624,68 +606,34 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
>
> struct event_constraint intel_snb_pebs_event_constraints[] = {
> INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
> - INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
> - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
> - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
> INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
> INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
> - INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
> - INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
> + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
> + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
> + /* Allow all events as PEBS with no flags */
> + INTEL_ALL_EVENT_CONSTRAINT(0xffff, 0xf),
Ditto here.
> EVENT_CONSTRAINT_END
> };
>
> struct event_constraint intel_ivb_pebs_event_constraints[] = {
> INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
> - INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
> - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
> - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
> INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
> INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
> - INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
> - INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
> + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
> + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
> + /* Allow all events as PEBS with no flags */
> + INTEL_ALL_EVENT_CONSTRAINT(0xffff, 0xf),
Ditto here.
> EVENT_CONSTRAINT_END
> };
>
> struct event_constraint intel_hsw_pebs_event_constraints[] = {
> INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
> - INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
> - INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
> - INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
> - INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
> - INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
> - INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
> - INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
> - /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
> - INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
> - /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
> - INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
> - INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
> - INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
> - /* MEM_UOPS_RETIRED.SPLIT_STORES */
> - INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
> - INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
> - INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
> - INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
> - INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
> - INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
> - /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
> - INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
> - /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
> - INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
> - /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
> - INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
> - /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
> - INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
> - INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
> - INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
> -
> + INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
> + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
> + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
> + /* Allow all events as PEBS with no flags */
> + /* We allow DATALA for all PEBS events, will be 0 if not supported */
> + INTEL_ALL_EVENT_CONSTRAINT_DATALA(0, 0xf),
I think you allow for all events, not just PEBS events.
> EVENT_CONSTRAINT_END
> };
>
> --
> 1.9.3
>