2013-06-26 07:42:09

by Sukadev Bhattiprolu

[permalink] [raw]
Subject: [RFC][PATCH 1/3][v2] perf: Move PERF_MEM*SHIFT macros up the block


From: Sukadev Bhattiprolu <[email protected]>
Date: Fri, 21 Jun 2013 14:03:01 -0700
Subject: [RFC][PATCH 1/3][v2] perf: Move PERF_MEM*SHIFT macros up the block

Values for the PERF_MEM_*SHIFT macros depend on the block before
them, so it would be more readable if they are in the beginning
of the block.

Signed-off-by: Sukadev Bhattiprolu <[email protected]>
---
include/uapi/linux/perf_event.h | 10 +++++-----
1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index fb104e5..52697a3 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -632,14 +632,15 @@ union perf_mem_data_src {
};

/* type of opcode (load/store/prefetch,code) */
+#define PERF_MEM_OP_SHIFT 0
#define PERF_MEM_OP_NA 0x01 /* not available */
#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
#define PERF_MEM_OP_STORE 0x04 /* store instruction */
#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
-#define PERF_MEM_OP_SHIFT 0

/* memory hierarchy (memory level, hit or miss) */
+#define PERF_MEM_LVL_SHIFT 5
#define PERF_MEM_LVL_NA 0x01 /* not available */
#define PERF_MEM_LVL_HIT 0x02 /* hit level */
#define PERF_MEM_LVL_MISS 0x04 /* miss level */
@@ -654,22 +655,22 @@ union perf_mem_data_src {
#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
-#define PERF_MEM_LVL_SHIFT 5

/* snoop mode */
+#define PERF_MEM_SNOOP_SHIFT 19
#define PERF_MEM_SNOOP_NA 0x01 /* not available */
#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
-#define PERF_MEM_SNOOP_SHIFT 19

/* locked instruction */
+#define PERF_MEM_LOCK_SHIFT 24
#define PERF_MEM_LOCK_NA 0x01 /* not available */
#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
-#define PERF_MEM_LOCK_SHIFT 24

/* TLB access */
+#define PERF_MEM_TLB_SHIFT 26
#define PERF_MEM_TLB_NA 0x01 /* not available */
#define PERF_MEM_TLB_HIT 0x02 /* hit level */
#define PERF_MEM_TLB_MISS 0x04 /* miss level */
@@ -677,7 +678,6 @@ union perf_mem_data_src {
#define PERF_MEM_TLB_L2 0x10 /* L2 */
#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
-#define PERF_MEM_TLB_SHIFT 26

#define PERF_MEM_S(a, s) \
(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
--
1.7.1


2013-06-26 07:43:27

by Sukadev Bhattiprolu

[permalink] [raw]
Subject: [RFC][PATCH 3/3] perf/Power7: Export DCACHE_SRC field to userspace


From: Sukadev Bhattiprolu <[email protected]>
Date: Tue, 25 Jun 2013 17:12:02 -0700
Subject: [RFC][PATCH 3/3] perf/Power7: Export DCACHE_SRC field to userspace

On Power7, the DCACHE_SRC field (bits 9..12), in the MMCRA register
identify the source from which a data-cache-miss for a marked instruction
was satisified.

Map this source to the architecture-neutral memory hierarchy levels
and add to the sample record so the source information is available to
user space.

Arch-neutral levels Power7 levels
-----------------------------------------------------------------------
local LVL_L2 local (same core) L2 (FROM_L2)
local LVL_L3 local (same core) L3 (FROM_L3)

1-hop XLVL_REM_L2_CCE1* different core on same chip (FROM_L2.1)
1-hop XLVL_REM_L3_CCE1* different core on same chip (FROM_L3.1)

2-hops LVL_REM_CCE2 remote (different chip, same node) (FROM_RL2L3)
3-hops XLVL_REM_CCE3* distant (different node) (FROM_DL2L3)

1-hop LVL_REM_RAM1 unused
2-hops LVL_REM_RAM2 remote (different chip, same node) (FROM_RMEM)

3-hops XLVL_REM_RAM3* distant (different node) (FROM_DMEM)

As shown above, Power7 supports one extra level in the cache-hierarchy (i.e
total of 3-hops). To maintain consistency in terminology (i.e 2-hops = remote,
3-hops = distant), we propose leaving the REM_RAM1 unused in Power7 and adding
another level, REM_CCE3 and REM_RAM3.

Further, in the REM_CCE1 case, Power7 can also identify if the data came from
the L2 or L3 cache of another core on the same chip. To describe this add the
levels:

PERF_MEM_XLVL_REM_L2_CCE1
PERF_MEM_XLVL_REM_L3_CCE1

Finally, in the REM_CCE1 and REM_CCE2 cases, Power7 also indicates whether
the entry found in the remote cache was modified (dirty). So we add a new
state

PERF_MEM_XLVL_CCE_DIRTY

Testing:

memarray is a simple test case that creates a large 2D array and accesses
elements in the array in strides of varying length. Using the perf event
PM_MRK_LD_MISS_L1_CYC (r4003e) with memarrary, we get samples like:

4989704543010 0x1470 [0x38]: PERF_RECORD_SAMPLE(IP, 1): 10816/10816:
0xc0000000001ef514 period: 1 addr: 0xc0000001e64538d8
. data_src: 0x400
... thread: memarray:10816
...... dso: [kernel.kallsyms]

4990265034542 0x9a00 [0x38]: PERF_RECORD_SAMPLE(IP, 3): 10816/10816:
0x54696c period: 2996 addr: 0x8000000064a3001a
. data_src: 0x800
... thread: memarray:10816
...... dso: [hypervisor]

4990505534586 0xd2e0 [0x38]: PERF_RECORD_SAMPLE(IP, 1): 10816/10816:
0xc00000000019bb28 period: 2979 addr: 0xc0000001fffaba40
. data_src: 0x200000000000
... thread: memarray:10816
...... dso: [kernel.kallsyms]

where, the 'data_src' values indicate:

0x400 PERF_MEM_LVL_L2, FROM_L2
0x800 PERF_MEM_LVL_L3, FROM_L3
0x200000000000 PERF_MEM_XLVL_REM_L2_CCE1 FROM_L2.1_SHR

Signed-off-by: Sukadev Bhattiprolu <[email protected]>
---

Changelog[v2]:
[Stephane Eranian] Define new levels rather than ORing the L2 and L3
with REM_CCE1 and REM_CCE2.
[Stephane Eranian] allocate a bit PERF_MEM_XLVL_NA for architectures
that don't use the ->mem_xlvl field.
Insert the TLB patch ahead so the new TLB bits are contigous with
existing TLB bits.

arch/powerpc/perf/power7-pmu.c | 44 +++++++++++++++++++++++++++++++++++++++
include/uapi/linux/perf_event.h | 13 ++++++++++-
2 files changed, 56 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index c1cac96..8cb4cbc 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -209,6 +209,10 @@ static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
return nalt;
}

+#define POWER7_MMCRA_DCACHE_MISS (0x1LL << 55)
+#define POWER7_MMCRA_DCACHE_SRC_SHIFT 51
+#define POWER7_MMCRA_DCACHE_SRC_MASK (0xFLL << POWER7_MMCRA_DCACHE_SRC_SHIFT)
+
#define POWER7_MMCRA_MDTLB_MISS (0x1LL << 50)
#define POWER7_MMCRA_MDTLB_SRC_SHIFT 46
#define POWER7_MMCRA_MDTLB_SRC_MASK (0xFLL << POWER7_MMCRA_MDTLB_SRC_SHIFT)
@@ -231,6 +235,7 @@ static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
*/
#define P(a, b) PERF_MEM_S(a, b)
#define TD(a, b) (P(TLB, CCE_DIRTY) | P(a, b))
+#define XD(a, b) (P(XLVL, CCE_DIRTY) | P(a, b))

static u64 mdtlb_src_map[] = {
P(TLB, L2), /* 00: FROM_L2 */
@@ -258,6 +263,37 @@ static u64 mdtlb_src_map[] = {
P(TLB, NA), /* 15: Reserved */
};

+/*
+ * Similar to mdtlb_src_map[] table above, use dcache_src_map[] to map
+ * the Power7 DCACHE_SRC field (bits 9..12) in MMCRA register to the
+ * Linux memory hierarchy levels.
+ */
+static u64 dcache_src_map[] = {
+ P(LVL, L2), /* 00: FROM_L2 */
+ P(LVL, L3), /* 01: FROM_L3 */
+
+ P(LVL, NA), /* 02: Reserved */
+ P(LVL, NA), /* 03: Reserved */
+
+ P(XLVL, REM_L2_CCE1), /* 04: FROM_L2.1_SHR */
+ XD(XLVL, REM_L2_CCE1), /* 05: FROM_L2.1_MOD */
+
+ P(XLVL, REM_L3_CCE1), /* 06: FROM_L3.1_SHR */
+ XD(XLVL, REM_L3_CCE1), /* 07: FROM_L3.1_MOD */
+
+ P(LVL, REM_CCE2), /* 08: FROM_RL2L3_SHR */
+ XD(LVL, REM_CCE2), /* 09: FROM_RL2L3_MOD */
+
+ P(XLVL, REM_CCE3), /* 10: FROM_DL2L3_SHR */
+ XD(XLVL, REM_CCE3), /* 11: FROM_DL2L3_MOD */
+
+ P(LVL, LOC_RAM), /* 12: FROM_LMEM */
+ P(LVL, REM_RAM2), /* 13: FROM_RMEM */
+ P(XLVL, REM_RAM3), /* 14: FROM_DMEM */
+
+ P(LVL, NA), /* 15: Reserved */
+};
+
static void power7_get_mem_data_src(union perf_mem_data_src *dsrc,
struct pt_regs *regs)
{
@@ -270,6 +306,14 @@ static void power7_get_mem_data_src(union perf_mem_data_src *dsrc,

dsrc->val |= mdtlb_src_map[idx];
}
+
+ if (mmcra & POWER7_MMCRA_DCACHE_MISS) {
+ idx = mmcra & POWER7_MMCRA_DCACHE_SRC_MASK;
+ idx >>= POWER7_MMCRA_DCACHE_SRC_SHIFT;
+
+ dsrc->val |= dcache_src_map[idx];
+ }
+
}

/*
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 815ee12..149b33d 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -627,7 +627,8 @@ union perf_mem_data_src {
mem_snoop:5, /* snoop mode */
mem_lock:2, /* lock instr */
mem_dtlb:17, /* tlb access */
- mem_rsvd:21;
+ mem_xlvl:6, /* memory hierarchy levels contd */
+ mem_rsvd:15;
};
};

@@ -655,6 +656,7 @@ union perf_mem_data_src {
#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
+/* memory hierarchy levels continued/extended below */

/* snoop mode */
#define PERF_MEM_SNOOP_SHIFT 19
@@ -689,6 +691,15 @@ union perf_mem_data_src {
#define PERF_MEM_TLB_REM_RAM3 0x8000 /* Remote DRAM (3 hops) */
#define PERF_MEM_TLB_CCE_DIRTY 0x10000 /* Remote cache entry hit, but dirty */

+/* Extended levels i.e. continuation of PERF_MEM_LVL* values above. */
+#define PERF_MEM_XLVL_SHIFT 43
+#define PERF_MEM_XLVL_NA 0x01 /* not available */
+#define PERF_MEM_XLVL_CCE_DIRTY 0x02 /* cache entry hit, was dirty */
+#define PERF_MEM_XLVL_REM_L2_CCE1 0x04 /* Remote L2-cache (1 hop) */
+#define PERF_MEM_XLVL_REM_L3_CCE1 0x08 /* Remote L3-cache (1 hop) */
+#define PERF_MEM_XLVL_REM_CCE3 0x10 /* Remote cache (3 hops) */
+#define PERF_MEM_XLVL_REM_RAM3 0x20 /* Remote DRAM (3 hops) */
+
#define PERF_MEM_S(a, s) \
(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)

--
1.7.1

2013-06-26 07:53:21

by Sukadev Bhattiprolu

[permalink] [raw]
Subject: [RFC][PATCH 2/3][v2] perf/Power7: Export MDTLB_SRC fields to userspace


From: Sukadev Bhattiprolu <[email protected]>
Date: Tue, 25 Jun 2013 15:50:18 -0700
Subject: [RFC][PATCH 2/3][v2] perf/Power7: Export MDTLB_SRC fields to userspace

Power7 saves the "perf-event vector" information in the mmcra register.
Included in this event vector is a "marked-data-TLB source", MDTLB_SRC,
field which identifies where in the memory-hierarchy the data for a TLB
miss was eventually found.

Use the 'struct perf_mem_data_src' to export the MDTLB_SRC field to
user space.

The mapping between the Power7 hierarchy levels and the arch-neutral levels
is, unfortunately, not trivial. Some existing arch-neutral levels are unused
in Power (eg: TLB_L1, TLB_WK, TLB_OS). But, Power7 provides several other
levels for the MDTLB_SRC, so this patch proposes adding new arch-neutral
levels.

Arch-neutral levels Power7 levels
-----------------------------------------------------------------------
local TLB_L2 local (same core) L2 (FROM_L2)
local TLB_L3 local (same core) L3 (FROM_L3)

1-hop TLB_REM_L2_CCE1* different core on same chip (FROM_L2.1)
1-hop TLB_REM_L3_CCE1* different core on same chip (FROM_L3.1)

2-hops TLB_REM_CCE2* remote (different chip, same node) (FROM_RL2L3)
3-hops TLB_REM_CCE3* distant (different node) (FROM_DL2L3)

1-hop TLB_REM_RAM1* unused
2-hops TLB_REM_RAM2* remote (different chip, same node) (FROM_RMEM)
3-hops TLB_REM_RAM3* distant (different node) (FROM_DMEM)

* proposed new levels.

As shown above, Power7 supports one extra level in the cache-hierarchy (i.e
total of 3-hops). To maintain consistency in terminology (i.e 2-hops = remote,
3-hops = distant), we propose leaving the REM_RAM1 unused in Power7 and adding
another level, REM_RAM3.

Further, in the above REM_CCE1 case, Power7 can also identify if the data came
from the L2 or L3 cache of another core on the same chip. To describe this
add the levels:

PERF_MEM_TLB_REM_L2_CCE1
PERF_MEM_TLB_REM_L3_CCE1

Finally, in the REM_CCE1 and REM_CCE2 cases, Power7 also indicates whether
the entry found in the remote cache was modified (dirty). So we add a new
state

PERF_MEM_TLB_CCE_DIRTY

Signed-off-by: Sukadev Bhattiprolu <[email protected]>
---

Changelog[v2]:
- Address the MDTLB_SRC field before addressing the DCACHE_SRC field
since we can then keep the new ->mem_dtlb bits contigious.
(DCACHE_SRC needs a field, ->mem_xlvl in struct perf_mem_data_src
and will be added in the next patch)

arch/powerpc/include/asm/perf_event_server.h | 2 +
arch/powerpc/perf/core-book3s.c | 4 ++
arch/powerpc/perf/power7-pmu.c | 64 ++++++++++++++++++++++++++
include/uapi/linux/perf_event.h | 14 +++++-
4 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index f265049..30488f5 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -37,6 +37,8 @@ struct power_pmu {
void (*config_bhrb)(u64 pmu_bhrb_filter);
void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event_id);
+ void (*get_mem_data_src)(union perf_mem_data_src *dsrc,
+ struct pt_regs *regs);
u32 flags;
const struct attribute_group **attr_groups;
int n_generic;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 29c6482..e0e0848 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1627,6 +1627,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
data.br_stack = &cpuhw->bhrb_stack;
}

+ if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
+ ppmu->get_mem_data_src)
+ ppmu->get_mem_data_src(&data.data_src, regs);
+
if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);
}
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 3c475d6..c1cac96 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -209,6 +209,69 @@ static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
return nalt;
}

+#define POWER7_MMCRA_MDTLB_MISS (0x1LL << 50)
+#define POWER7_MMCRA_MDTLB_SRC_SHIFT 46
+#define POWER7_MMCRA_MDTLB_SRC_MASK (0xFLL << POWER7_MMCRA_MDTLB_SRC_SHIFT)
+
+/*
+ * Map MDTLB_SRC fields to the Linux memory hierarchy levels.
+ *
+ * Bits 14..17 in the MMCRA indicate the source of a marked-data-TLB miss,
+ * with of the 16 possible values referring to a specific source. Eg: if
+ * the 4-bits have the value 1 (0b0001), the mdtlb entry was found in the
+ * local L3 cache.
+ *
+ * We use the table, mdtlb_src_map, to map the value in this field, to
+ * PERF_MEM_TLB_L3, the arch-neutral representation of TLB L3 cache.
+ *
+ * Architecture neutral to Power7 hierarchy levels:
+ * 1-hop = different core on same chip (L2.1 or L3.1)
+ * 2-hops = remote (different chip on same node, RL2L3, RMEM)
+ * 3-hops = distant (different node, DL2L3, DMEM)
+ */
+#define P(a, b) PERF_MEM_S(a, b)
+#define TD(a, b) (P(TLB, CCE_DIRTY) | P(a, b))
+
+static u64 mdtlb_src_map[] = {
+ P(TLB, L2), /* 00: FROM_L2 */
+ P(TLB, L3), /* 01: FROM_L3 */
+
+ P(TLB, NA), /* 02: Reserved */
+ P(TLB, NA), /* 03: Reserved */
+
+ P(TLB, REM_L2_CCE1), /* 04: FROM_L2.1_SHR */
+ TD(TLB, REM_L2_CCE1), /* 05: FROM_L2.1_MOD */
+
+ P(TLB, REM_L3_CCE1), /* 06: FROM_L3.1_SHR */
+ TD(TLB, REM_L3_CCE1), /* 07: FROM_L3.1_MOD */
+
+ P(TLB, REM_CCE2), /* 08: FROM_RL2L3_SHR */
+ TD(TLB, REM_CCE2), /* 09: FROM_RL2L3_MOD */
+
+ P(TLB, REM_CCE3), /* 10: FROM_DL2L3_SHR */
+ TD(TLB, REM_CCE3), /* 11: FROM_DL2L3_MOD */
+
+ P(TLB, LOC_RAM), /* 12: FROM_LMEM */
+ P(TLB, REM_RAM2), /* 13: FROM_RMEM */
+ P(TLB, REM_RAM3), /* 14: FROM_DMEM */
+
+ P(TLB, NA), /* 15: Reserved */
+};
+
+static void power7_get_mem_data_src(union perf_mem_data_src *dsrc,
+ struct pt_regs *regs)
+{
+ u64 idx;
+ u64 mmcra = regs->dsisr;
+
+ if (mmcra & POWER7_MMCRA_MDTLB_MISS) {
+ idx = mmcra & POWER7_MMCRA_MDTLB_SRC_MASK;
+ idx >>= POWER7_MMCRA_MDTLB_SRC_SHIFT;
+
+ dsrc->val |= mdtlb_src_map[idx];
+ }
+}
+
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
@@ -447,6 +510,7 @@ static struct power_pmu power7_pmu = {
.compute_mmcr = power7_compute_mmcr,
.get_constraint = power7_get_constraint,
.get_alternatives = power7_get_alternatives,
+ .get_mem_data_src = power7_get_mem_data_src,
.disable_pmc = power7_disable_pmc,
.flags = PPMU_ALT_SIPR,
.attr_groups = power7_pmu_attr_groups,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 52697a3..815ee12 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -626,8 +626,8 @@ union perf_mem_data_src {
mem_lvl:14, /* memory hierarchy level */
mem_snoop:5, /* snoop mode */
mem_lock:2, /* lock instr */
- mem_dtlb:7, /* tlb access */
- mem_rsvd:31;
+ mem_dtlb:17, /* tlb access */
+ mem_rsvd:21;
};
};

@@ -678,6 +678,16 @@ union perf_mem_data_src {
#define PERF_MEM_TLB_L2 0x10 /* L2 */
#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
+#define PERF_MEM_TLB_L3 0x80
+#define PERF_MEM_TLB_REM_L2_CCE1 0x100 /* Remote L2 cache (1 hop) */
+#define PERF_MEM_TLB_REM_L3_CCE1 0x200 /* Remote L3 cache (1 hop) */
+#define PERF_MEM_TLB_REM_CCE2 0x400 /* Remote cache (2 hops) */
+#define PERF_MEM_TLB_REM_CCE3 0x800 /* Remote cache (3 hops) */
+#define PERF_MEM_TLB_LOC_RAM 0x1000 /* Local DRAM */
+#define PERF_MEM_TLB_REM_RAM1 0x2000 /* Remote DRAM (1 hop) */
+#define PERF_MEM_TLB_REM_RAM2 0x4000 /* Remote DRAM (2 hops) */
+#define PERF_MEM_TLB_REM_RAM3 0x8000 /* Remote DRAM (3 hops) */
+#define PERF_MEM_TLB_CCE_DIRTY 0x10000 /* Remote cache entry hit, but dirty */

#define PERF_MEM_S(a, s) \
(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
--
1.7.1

2013-07-02 21:26:59

by Stephane Eranian

[permalink] [raw]
Subject: Re: [RFC][PATCH 1/3][v2] perf: Move PERF_MEM*SHIFT macros up the block

On Wed, Jun 26, 2013 at 9:41 AM, Sukadev Bhattiprolu
<[email protected]> wrote:
>
> From: Sukadev Bhattiprolu <[email protected]>
> Date: Fri, 21 Jun 2013 14:03:01 -0700
> Subject: [RFC][PATCH 1/3][v2] perf: Move PERF_MEM*SHIFT macros up the block
>
> Values for the PERF_MEM_*SHIFT macros depend on the block before
> them, so it would be more readable if they are in the beginning
> of the block.
>
> Signed-off-by: Sukadev Bhattiprolu <[email protected]>

Reviewed-by: Stephane Eranian <[email protected]>

> ---
> include/uapi/linux/perf_event.h | 10 +++++-----
> 1 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index fb104e5..52697a3 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -632,14 +632,15 @@ union perf_mem_data_src {
> };
>
> /* type of opcode (load/store/prefetch,code) */
> +#define PERF_MEM_OP_SHIFT 0
> #define PERF_MEM_OP_NA 0x01 /* not available */
> #define PERF_MEM_OP_LOAD 0x02 /* load instruction */
> #define PERF_MEM_OP_STORE 0x04 /* store instruction */
> #define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
> #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
> -#define PERF_MEM_OP_SHIFT 0
>
> /* memory hierarchy (memory level, hit or miss) */
> +#define PERF_MEM_LVL_SHIFT 5
> #define PERF_MEM_LVL_NA 0x01 /* not available */
> #define PERF_MEM_LVL_HIT 0x02 /* hit level */
> #define PERF_MEM_LVL_MISS 0x04 /* miss level */
> @@ -654,22 +655,22 @@ union perf_mem_data_src {
> #define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
> #define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
> #define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
> -#define PERF_MEM_LVL_SHIFT 5
>
> /* snoop mode */
> +#define PERF_MEM_SNOOP_SHIFT 19
> #define PERF_MEM_SNOOP_NA 0x01 /* not available */
> #define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
> #define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
> #define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
> #define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
> -#define PERF_MEM_SNOOP_SHIFT 19
>
> /* locked instruction */
> +#define PERF_MEM_LOCK_SHIFT 24
> #define PERF_MEM_LOCK_NA 0x01 /* not available */
> #define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
> -#define PERF_MEM_LOCK_SHIFT 24
>
> /* TLB access */
> +#define PERF_MEM_TLB_SHIFT 26
> #define PERF_MEM_TLB_NA 0x01 /* not available */
> #define PERF_MEM_TLB_HIT 0x02 /* hit level */
> #define PERF_MEM_TLB_MISS 0x04 /* miss level */
> @@ -677,7 +678,6 @@ union perf_mem_data_src {
> #define PERF_MEM_TLB_L2 0x10 /* L2 */
> #define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
> #define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
> -#define PERF_MEM_TLB_SHIFT 26
>
> #define PERF_MEM_S(a, s) \
> (((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
> --
> 1.7.1
>