2013-04-16 15:54:37

by Anshuman Khandual

[permalink] [raw]
Subject: [PATCH V2 0/5] powerpc, perf: BHRB based branch stack enablement on POWER8

Branch History Rolling Buffer (BHRB) is a new PMU feaure in IBM
POWER8 processor which records the branch instructions inside the execution
pipeline. This patchset enables the basic functionality of the feature through
generic perf branch stack sampling framework.

Sample output
-------------
$./perf record -b top
$./perf report

Overhead Command Source Shared Object Source Symbol Target Shared Object Target Symbol
# ........ ....... .................... ...................................... .................... ...................................
#

7.82% top libc-2.11.2.so [k] _IO_vfscanf libc-2.11.2.so [k] _IO_vfscanf
6.17% top libc-2.11.2.so [k] _IO_vfscanf [unknown] [k] 00000000
2.37% top [unknown] [k] 0xf7aafb30 [unknown] [k] 00000000
1.80% top [unknown] [k] 0x0fe07978 libc-2.11.2.so [k] _IO_vfscanf
1.60% top libc-2.11.2.so [k] _IO_vfscanf [kernel.kallsyms] [k] .do_task_stat
1.20% top [kernel.kallsyms] [k] .do_task_stat [kernel.kallsyms] [k] .do_task_stat
1.02% top libc-2.11.2.so [k] vfprintf libc-2.11.2.so [k] vfprintf
0.92% top top [k] _init [unknown] [k] 0x0fe037f4

Changes in V2
--------------
- Added copyright messages to the newly created files
- Modified couple of commit messages

Anshuman Khandual (5):
powerpc, perf: Add new BHRB related instructions on POWER8
powerpc, perf: Add basic assembly code to read BHRB entries on POWER8
powerpc, perf: Add new BHRB related generic functions, data and flags
powerpc, perf: Define BHRB generic functions, data and flags for POWER8
powerpc, perf: Enable branch stack sampling framework support with BHRB

arch/powerpc/include/asm/perf_event_server.h | 6 ++
arch/powerpc/include/asm/ppc-opcode.h | 7 ++
arch/powerpc/perf/Makefile | 2 +-
arch/powerpc/perf/bhrb.S | 44 +++++++++++++
arch/powerpc/perf/core-book3s.c | 96 +++++++++++++++++++++++++++-
arch/powerpc/perf/perf_event_bhrb.c | 85 ++++++++++++++++++++++++
arch/powerpc/perf/power8-pmu.c | 57 ++++++++++++++++-
7 files changed, 292 insertions(+), 5 deletions(-)
create mode 100644 arch/powerpc/perf/bhrb.S
create mode 100644 arch/powerpc/perf/perf_event_bhrb.c

--
1.7.11.7


2013-04-16 15:54:47

by Anshuman Khandual

[permalink] [raw]
Subject: [PATCH V2 5/5] powerpc, perf: Enable branch stack sampling framework support with BHRB

This patch provides basic enablement for perf branch stack sampling framework
on POWER8 processor with a new PMU feature called BHRB (Branch History Rolling
Buffer).

Signed-off-by: Anshuman Khandual <[email protected]>
---
arch/powerpc/perf/core-book3s.c | 96 +++++++++++++++++++++++++++++++++++--
arch/powerpc/perf/perf_event_bhrb.c | 85 ++++++++++++++++++++++++++++++++
2 files changed, 178 insertions(+), 3 deletions(-)
create mode 100644 arch/powerpc/perf/perf_event_bhrb.c

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 4ac6e64..f4d1347 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -19,6 +19,8 @@
#include <asm/firmware.h>
#include <asm/ptrace.h>

+#define BHRB_MAX_ENTRIES 32
+
struct cpu_hw_events {
int n_events;
int n_percpu;
@@ -38,11 +40,21 @@ struct cpu_hw_events {

unsigned int group_flag;
int n_txn_start;
+
+ /* BHRB bits */
+ u64 bhrb_filter; /* BHRB HW branch filter */
+ int bhrb_users;
+ void *bhrb_context;
+ struct perf_branch_stack bhrb_stack;
+ struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES];
};
+
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);

struct power_pmu *ppmu;

+#include "perf_event_bhrb.c"
+
/*
* Normally, to ignore kernel events we set the FCS (freeze counters
* in supervisor mode) bit in MMCR0, but if the kernel runs with the
@@ -858,6 +870,9 @@ static void power_pmu_enable(struct pmu *pmu)
}

out:
+ if (cpuhw->bhrb_users)
+ ppmu->config_bhrb(cpuhw->bhrb_filter);
+
local_irq_restore(flags);
}

@@ -888,6 +903,47 @@ static int collect_events(struct perf_event *group, int max_count,
return n;
}

+/* Reset all possible BHRB entries */
+static void power_pmu_bhrb_reset(void)
+{
+ asm volatile(PPC_CLRBHRB);
+}
+
+void power_pmu_bhrb_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ if (!ppmu->bhrb_nr)
+ return;
+
+ /* Clear BHRB if we changed task context to avoid data leaks */
+ if (event->ctx->task && cpuhw->bhrb_context != event->ctx) {
+ power_pmu_bhrb_reset();
+ cpuhw->bhrb_context = event->ctx;
+ }
+ cpuhw->bhrb_users++;
+}
+
+void power_pmu_bhrb_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ if (!ppmu->bhrb_nr)
+ return;
+
+ cpuhw->bhrb_users--;
+ WARN_ON_ONCE(cpuhw->bhrb_users < 0);
+
+ if (!cpuhw->disabled && !cpuhw->bhrb_users) {
+ /* BHRB cannot be turned off when other
+ * events are active on the PMU.
+ */
+
+ /* avoid stale pointer */
+ cpuhw->bhrb_context = NULL;
+ }
+}
+
/*
* Add a event to the PMU.
* If all events are not already frozen, then we disable and
@@ -947,6 +1003,9 @@ nocheck:

ret = 0;
out:
+ if (has_branch_stack(event))
+ power_pmu_bhrb_enable(event);
+
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
return ret;
@@ -999,6 +1058,9 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
}

+ if (has_branch_stack(event))
+ power_pmu_bhrb_disable(event);
+
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
@@ -1117,6 +1179,15 @@ int power_pmu_commit_txn(struct pmu *pmu)
return 0;
}

+/* Called from ctxsw to prevent one process's branch entries to
+ * mingle with the other process's entries during context switch.
+ */
+void power_pmu_flush_branch_stack(void)
+{
+ if (ppmu->bhrb_nr)
+ power_pmu_bhrb_reset();
+}
+
/*
* Return 1 if we might be able to put event on a limited PMC,
* or 0 if not.
@@ -1231,9 +1302,11 @@ static int power_pmu_event_init(struct perf_event *event)
if (!ppmu)
return -ENOENT;

- /* does not support taken branch sampling */
- if (has_branch_stack(event))
- return -EOPNOTSUPP;
+ if (has_branch_stack(event)) {
+ /* PMU has BHRB enabled */
+ if (!(ppmu->flags & PPMU_BHRB))
+ return -EOPNOTSUPP;
+ }

switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
@@ -1314,6 +1387,15 @@ static int power_pmu_event_init(struct perf_event *event)

cpuhw = &get_cpu_var(cpu_hw_events);
err = power_check_constraints(cpuhw, events, cflags, n + 1);
+
+ if (has_branch_stack(event)) {
+ cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+ event->attr.branch_sample_type);
+
+ if(cpuhw->bhrb_filter == -1)
+ return -EOPNOTSUPP;
+ }
+
put_cpu_var(cpu_hw_events);
if (err)
return -EINVAL;
@@ -1372,6 +1454,7 @@ struct pmu power_pmu = {
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
.event_idx = power_pmu_event_idx,
+ .flush_branch_stack = power_pmu_flush_branch_stack,
};


@@ -1433,6 +1516,13 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type & PERF_SAMPLE_ADDR)
perf_get_data_addr(regs, &data.addr);

+ if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ struct cpu_hw_events *cpuhw;
+ cpuhw = &__get_cpu_var(cpu_hw_events);
+ power_pmu_bhrb_read(cpuhw);
+ data.br_stack = &cpuhw->bhrb_stack;
+ }
+
if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);
}
diff --git a/arch/powerpc/perf/perf_event_bhrb.c b/arch/powerpc/perf/perf_event_bhrb.c
new file mode 100644
index 0000000..961d31c
--- /dev/null
+++ b/arch/powerpc/perf/perf_event_bhrb.c
@@ -0,0 +1,85 @@
+/*
+ * Processing BHRB entries
+ *
+ * Copyright 2013 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define BHRB_TARGET 0x0000000000000002
+#define BHRB_PREDICTION 0x0000000000000001
+#define BHRB_EA 0xFFFFFFFFFFFFFFFC
+
+extern unsigned long int read_bhrb(int n);
+
+void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
+{
+ u64 val;
+ u64 addr;
+ int r_index, u_index, target, pred;
+
+ r_index = 0;
+ u_index = 0;
+ while (r_index < ppmu->bhrb_nr) {
+ /* Assembly read function */
+ val = read_bhrb(r_index);
+
+ /* Terminal marker: End of valid BHRB entries */
+ if (val == 0) {
+ break;
+ } else {
+ /* BHRB field break up */
+ addr = val & BHRB_EA;
+ pred = val & BHRB_PREDICTION;
+ target = val & BHRB_TARGET;
+
+ /* Probable Missed entry: Not applicable for POWER8 */
+ if ((addr == 0) && (target == 0) && (pred == 1)) {
+ r_index++;
+ continue;
+ }
+
+ /* Real Missed entry: Power8 based missed entry */
+ if ((addr == 0) && (target == 1) && (pred == 1)) {
+ r_index++;
+ continue;
+ }
+
+ /* Reserved condition: Not a valid entry */
+ if ((addr == 0) && (target == 1) && (pred == 0)) {
+ r_index++;
+ continue;
+ }
+
+ /* Is a target address */
+ if (val & BHRB_TARGET) {
+ /* First address cannot be a target address */
+ if (r_index == 0) {
+ r_index++;
+ continue;
+ }
+
+ /* Update target address for the previous entry */
+ cpuhw->bhrb_entries[u_index - 1].to = addr;
+ cpuhw->bhrb_entries[u_index - 1].mispred = pred;
+ cpuhw->bhrb_entries[u_index - 1].predicted = ~pred;
+
+ /* Dont increment u_index */
+ r_index++;
+ } else {
+ /* Update address, flags for current entry */
+ cpuhw->bhrb_entries[u_index].from = addr;
+ cpuhw->bhrb_entries[u_index].mispred = pred;
+ cpuhw->bhrb_entries[u_index].predicted = ~pred;
+
+ /* Successfully popullated one entry */
+ u_index++;
+ r_index++;
+ }
+ }
+ }
+ cpuhw->bhrb_stack.nr = u_index;
+ return;
+}
--
1.7.11.7

2013-04-16 15:55:09

by Anshuman Khandual

[permalink] [raw]
Subject: [PATCH V2 1/5] powerpc, perf: Add new BHRB related instructions on POWER8

This patch adds new instructions support for reading various
BHRB entries and also clearing the BHRB buffer.

Signed-off-by: Anshuman Khandual <[email protected]>
---
arch/powerpc/include/asm/ppc-opcode.h | 7 +++++++
1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 8752bc8..93ae5a1 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -82,6 +82,7 @@
#define __REGA0_R31 31

/* sorted alphabetically */
+#define PPC_INST_BHRBE 0x7c00025c
#define PPC_INST_DCBA 0x7c0005ec
#define PPC_INST_DCBA_MASK 0xfc0007fe
#define PPC_INST_DCBAL 0x7c2005ec
@@ -297,6 +298,12 @@
#define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)

+/* BHRB instructions */
+#define PPC_CLRBHRB stringify_in_c(.long 0x7c00035c)
+#define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_INST_BHRBE | \
+ __PPC_RS(r) | \
+ (((n) & 0x1f) << 11))
+
/* Transactional memory instructions */
#define TRECHKPT stringify_in_c(.long PPC_INST_TRECHKPT)
#define TRECLAIM(r) stringify_in_c(.long PPC_INST_TRECLAIM \
--
1.7.11.7

2013-04-16 15:55:12

by Anshuman Khandual

[permalink] [raw]
Subject: [PATCH V2 3/5] powerpc, perf: Add new BHRB related generic functions, data and flags

This patch adds some new BHRB related generic functions, data and flags

Signed-off-by: Anshuman Khandual <[email protected]>
---
arch/powerpc/include/asm/perf_event_server.h | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 57b42da..3f0c15c 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -33,6 +33,8 @@ struct power_pmu {
unsigned long *valp);
int (*get_alternatives)(u64 event_id, unsigned int flags,
u64 alt[]);
+ u64 (*bhrb_filter_map)(u64 branch_sample_type);
+ void (*config_bhrb)(u64 pmu_bhrb_filter);
void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event_id);
u32 flags;
@@ -42,6 +44,9 @@ struct power_pmu {
int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+ /* BHRB entries in the PMU */
+ int bhrb_nr;
};

/*
@@ -54,6 +59,7 @@ struct power_pmu {
#define PPMU_SIAR_VALID 0x00000010 /* Processor has SIAR Valid bit */
#define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */
#define PPMU_HAS_SIER 0x00000040 /* Has SIER */
+#define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */

/*
* Values for flags to get_alternatives()
--
1.7.11.7

2013-04-16 15:55:16

by Anshuman Khandual

[permalink] [raw]
Subject: [PATCH V2 4/5] powerpc, perf: Define BHRB generic functions, data and flags for POWER8

Defines BHRB functions, data and flags for POWER8

Signed-off-by: Anshuman Khandual <[email protected]>
---
arch/powerpc/perf/power8-pmu.c | 57 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 106ae0b..153408c 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -109,6 +109,16 @@
#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)
#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */

+/* MMCRA IFM bits - POWER8 */
+#define POWER8_MMCRA_IFM1 0x0000000040000000UL
+#define POWER8_MMCRA_IFM2 0x0000000080000000UL
+#define POWER8_MMCRA_IFM3 0x00000000C0000000UL
+
+#define ONLY_PLM \
+ (PERF_SAMPLE_BRANCH_USER |\
+ PERF_SAMPLE_BRANCH_KERNEL |\
+ PERF_SAMPLE_BRANCH_HV)
+
/*
* Layout of constraint bits:
*
@@ -428,6 +438,48 @@ static int power8_generic_events[] = {
[PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
};

+static u64 power8_bhrb_filter_map(u64 branch_sample_type)
+{
+ u64 pmu_bhrb_filter = 0;
+ u64 br_privilege = branch_sample_type & ONLY_PLM;
+
+ /* BHRB and regular PMU events share the same prvillege state
+ * filter configuration. BHRB is always recorded along with a
+ * regular PMU event. So privilege state filter criteria for BHRB
+ * and the companion PMU events has to be the same. As a default
+ * "perf record" tool sets all privillege bits ON when no filter
+ * criteria is provided in the command line. So as along as all
+ * privillege bits are ON or they are OFF, we are good to go.
+ */
+ if ((br_privilege != 7) && (br_privilege != 0))
+ return -1;
+
+ /* No branch filter requested */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
+ return pmu_bhrb_filter;
+
+ /* Invalid branch filter options - HW does not support */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+ pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
+ return pmu_bhrb_filter;
+ }
+
+ /* Every thing else is unsupported */
+ return -1;
+}
+
+static void power8_config_bhrb(u64 pmu_bhrb_filter)
+{
+ /* Enable BHRB filter in PMU */
+ mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
+}
+
static struct power_pmu power8_pmu = {
.name = "POWER8",
.n_counter = 6,
@@ -435,12 +487,15 @@ static struct power_pmu power8_pmu = {
.add_fields = POWER8_ADD_FIELDS,
.test_adder = POWER8_TEST_ADDER,
.compute_mmcr = power8_compute_mmcr,
+ .config_bhrb = power8_config_bhrb,
+ .bhrb_filter_map = power8_bhrb_filter_map,
.get_constraint = power8_get_constraint,
.disable_pmc = power8_disable_pmc,
- .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER,
+ .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB,
.n_generic = ARRAY_SIZE(power8_generic_events),
.generic_events = power8_generic_events,
.attr_groups = power8_pmu_attr_groups,
+ .bhrb_nr = 32,
};

static int __init init_power8_pmu(void)
--
1.7.11.7

2013-04-16 15:55:42

by Anshuman Khandual

[permalink] [raw]
Subject: [PATCH V2 2/5] powerpc, perf: Add basic assembly code to read BHRB entries on POWER8

This patch adds the basic assembly code to read BHRB entries

Signed-off-by: Anshuman Khandual <[email protected]>
---
arch/powerpc/perf/Makefile | 2 +-
arch/powerpc/perf/bhrb.S | 44 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 45 insertions(+), 1 deletion(-)
create mode 100644 arch/powerpc/perf/bhrb.S

diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 472db18..510fae1 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -2,7 +2,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror

obj-$(CONFIG_PERF_EVENTS) += callchain.o

-obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o
+obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
power8-pmu.o
diff --git a/arch/powerpc/perf/bhrb.S b/arch/powerpc/perf/bhrb.S
new file mode 100644
index 0000000..0e8a018
--- /dev/null
+++ b/arch/powerpc/perf/bhrb.S
@@ -0,0 +1,44 @@
+/*
+ * Basic assembly code to read BHRB entries
+ *
+ * Copyright 2013 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+
+ .text
+
+.balign 8
+
+/* r3 = n (where n = [0-1023])
+ * The maximum number of BHRB entries supported with PPC_MFBHRBE instruction
+ * is 1024. We have limited number of table entries here as POWER8 implements
+ * 32 BHRB entries.
+ */
+
+/* .global read_bhrb */
+_GLOBAL(read_bhrb)
+ cmpldi r3,1023
+ bgt 1f
+ ld r4,bhrb_table@got(r2)
+ sldi r3,r3,3
+ add r3,r4,r3
+ mtctr r3
+ bctr
+1: li r3,0
+ blr
+
+#define MFBHRB_TABLE1(n) PPC_MFBHRBE(R3,n); blr
+#define MFBHRB_TABLE2(n) MFBHRB_TABLE1(n); MFBHRB_TABLE1(n+1)
+#define MFBHRB_TABLE4(n) MFBHRB_TABLE2(n); MFBHRB_TABLE2(n+2)
+#define MFBHRB_TABLE8(n) MFBHRB_TABLE4(n); MFBHRB_TABLE4(n+4)
+#define MFBHRB_TABLE16(n) MFBHRB_TABLE8(n); MFBHRB_TABLE8(n+8)
+#define MFBHRB_TABLE32(n) MFBHRB_TABLE16(n); MFBHRB_TABLE16(n+16)
+
+bhrb_table:
+ MFBHRB_TABLE32(0)
--
1.7.11.7

2013-04-16 17:22:57

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH V2 2/5] powerpc, perf: Add basic assembly code to read BHRB entries on POWER8

> +/* r3 = n (where n = [0-1023])
> + * The maximum number of BHRB entries supported with PPC_MFBHRBE
> instruction
> + * is 1024. We have limited number of table entries here as POWER8
> implements
> + * 32 BHRB entries.
> + */
> +
> +/* .global read_bhrb */
> +_GLOBAL(read_bhrb)
> + cmpldi r3,1023

This should be 31, since that is the last entry in the table below.

> + bgt 1f
> + ld r4,bhrb_table@got(r2)
> + sldi r3,r3,3
> + add r3,r4,r3
> + mtctr r3
> + bctr
> +1: li r3,0
> + blr
> +
> +#define MFBHRB_TABLE1(n) PPC_MFBHRBE(R3,n); blr
> +#define MFBHRB_TABLE2(n) MFBHRB_TABLE1(n); MFBHRB_TABLE1(n+1)
> +#define MFBHRB_TABLE4(n) MFBHRB_TABLE2(n); MFBHRB_TABLE2(n+2)
> +#define MFBHRB_TABLE8(n) MFBHRB_TABLE4(n); MFBHRB_TABLE4(n+4)
> +#define MFBHRB_TABLE16(n) MFBHRB_TABLE8(n); MFBHRB_TABLE8(n+8)
> +#define MFBHRB_TABLE32(n) MFBHRB_TABLE16(n); MFBHRB_TABLE16(n+16)
> +
> +bhrb_table:
> + MFBHRB_TABLE32(0)


Segher

2013-04-17 04:01:06

by Anshuman Khandual

[permalink] [raw]
Subject: Re: [PATCH V2 2/5] powerpc, perf: Add basic assembly code to read BHRB entries on POWER8

On 04/16/2013 10:53 PM, Segher Boessenkool wrote:
>> +/* r3 = n (where n = [0-1023])
>> + * The maximum number of BHRB entries supported with PPC_MFBHRBE
>> instruction
>> + * is 1024. We have limited number of table entries here as POWER8
>> implements
>> + * 32 BHRB entries.
>> + */
>> +
>> +/* .global read_bhrb */
>> +_GLOBAL(read_bhrb)
>> + cmpldi r3,1023
>
> This should be 31, since that is the last entry in the table below.

Hey Segher,

Would fix this in the next version. Thanks for pointing it out.

Regards
Anshuman

2013-04-17 07:08:56

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH V2 5/5] powerpc, perf: Enable branch stack sampling framework support with BHRB

On Tue, Apr 16, 2013 at 09:24:10PM +0530, Anshuman Khandual wrote:
> This patch provides basic enablement for perf branch stack sampling framework
> on POWER8 processor with a new PMU feature called BHRB (Branch History Rolling
> Buffer).
>
> Signed-off-by: Anshuman Khandual <[email protected]>
> ---
> arch/powerpc/perf/core-book3s.c | 96 +++++++++++++++++++++++++++++++++++--
> arch/powerpc/perf/perf_event_bhrb.c | 85 ++++++++++++++++++++++++++++++++
> 2 files changed, 178 insertions(+), 3 deletions(-)
> create mode 100644 arch/powerpc/perf/perf_event_bhrb.c
>
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 4ac6e64..f4d1347 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -19,6 +19,8 @@
> #include <asm/firmware.h>
> #include <asm/ptrace.h>
>
> +#define BHRB_MAX_ENTRIES 32
> +
> struct cpu_hw_events {
> int n_events;
> int n_percpu;
> @@ -38,11 +40,21 @@ struct cpu_hw_events {
>
> unsigned int group_flag;
> int n_txn_start;
> +
> + /* BHRB bits */
> + u64 bhrb_filter; /* BHRB HW branch filter */
> + int bhrb_users;
> + void *bhrb_context;
> + struct perf_branch_stack bhrb_stack;
> + struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES];
> };
> +
> DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
>
> struct power_pmu *ppmu;
>
> +#include "perf_event_bhrb.c"
> +

Um, why are you doing that?

If it's code that should be in a header, put it in a header. If it's C
code, add it to the Makefile.

cheers

2013-04-17 12:08:14

by Anshuman Khandual

[permalink] [raw]
Subject: Re: [PATCH V2 5/5] powerpc, perf: Enable branch stack sampling framework support with BHRB

On 04/17/2013 12:38 PM, Michael Ellerman wrote:
> On Tue, Apr 16, 2013 at 09:24:10PM +0530, Anshuman Khandual wrote:
>> This patch provides basic enablement for perf branch stack sampling framework
>> on POWER8 processor with a new PMU feature called BHRB (Branch History Rolling
>> Buffer).
>>
>> Signed-off-by: Anshuman Khandual <[email protected]>
>> ---
>> arch/powerpc/perf/core-book3s.c | 96 +++++++++++++++++++++++++++++++++++--
>> arch/powerpc/perf/perf_event_bhrb.c | 85 ++++++++++++++++++++++++++++++++
>> 2 files changed, 178 insertions(+), 3 deletions(-)
>> create mode 100644 arch/powerpc/perf/perf_event_bhrb.c
>>
>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>> index 4ac6e64..f4d1347 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -19,6 +19,8 @@
>> #include <asm/firmware.h>
>> #include <asm/ptrace.h>
>>
>> +#define BHRB_MAX_ENTRIES 32
>> +
>> struct cpu_hw_events {
>> int n_events;
>> int n_percpu;
>> @@ -38,11 +40,21 @@ struct cpu_hw_events {
>>
>> unsigned int group_flag;
>> int n_txn_start;
>> +
>> + /* BHRB bits */
>> + u64 bhrb_filter; /* BHRB HW branch filter */
>> + int bhrb_users;
>> + void *bhrb_context;
>> + struct perf_branch_stack bhrb_stack;
>> + struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES];
>> };
>> +
>> DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
>>
>> struct power_pmu *ppmu;
>>
>> +#include "perf_event_bhrb.c"
>> +
>
> Um, why are you doing that?
>

There was no specific reason for that.

> If it's code that should be in a header, put it in a header. If it's C
> code, add it to the Makefile.

Sure I would get the new code in the Makefile. Thanks!

Regards
Anshuman

>
> cheers
> _______________________________________________
> Linuxppc-dev mailing list
> [email protected]
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>

2013-04-18 02:59:20

by Anshuman Khandual

[permalink] [raw]
Subject: Re: [PATCH V2 5/5] powerpc, perf: Enable branch stack sampling framework support with BHRB

On 04/17/2013 05:37 PM, Anshuman Khandual wrote:
> On 04/17/2013 12:38 PM, Michael Ellerman wrote:
>> On Tue, Apr 16, 2013 at 09:24:10PM +0530, Anshuman Khandual wrote:
>>> This patch provides basic enablement for perf branch stack sampling framework
>>> on POWER8 processor with a new PMU feature called BHRB (Branch History Rolling
>>> Buffer).
>>>
>>> Signed-off-by: Anshuman Khandual <[email protected]>
>>> ---
>>> arch/powerpc/perf/core-book3s.c | 96 +++++++++++++++++++++++++++++++++++--
>>> arch/powerpc/perf/perf_event_bhrb.c | 85 ++++++++++++++++++++++++++++++++
>>> 2 files changed, 178 insertions(+), 3 deletions(-)
>>> create mode 100644 arch/powerpc/perf/perf_event_bhrb.c
>>>
>>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>>> index 4ac6e64..f4d1347 100644
>>> --- a/arch/powerpc/perf/core-book3s.c
>>> +++ b/arch/powerpc/perf/core-book3s.c
>>> @@ -19,6 +19,8 @@
>>> #include <asm/firmware.h>
>>> #include <asm/ptrace.h>
>>>
>>> +#define BHRB_MAX_ENTRIES 32
>>> +
>>> struct cpu_hw_events {
>>> int n_events;
>>> int n_percpu;
>>> @@ -38,11 +40,21 @@ struct cpu_hw_events {
>>>
>>> unsigned int group_flag;
>>> int n_txn_start;
>>> +
>>> + /* BHRB bits */
>>> + u64 bhrb_filter; /* BHRB HW branch filter */
>>> + int bhrb_users;
>>> + void *bhrb_context;
>>> + struct perf_branch_stack bhrb_stack;
>>> + struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES];
>>> };
>>> +
>>> DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
>>>
>>> struct power_pmu *ppmu;
>>>
>>> +#include "perf_event_bhrb.c"
>>> +
>>
>> Um, why are you doing that?
>>
>
> There was no specific reason for that.
>

Ahh, I remember it now. The function in the new file uses "cpu_hw_events" structure
which is passed during "record_and_restart" data capture phase. Right now cpu_hw_events is
not defined in the header file but inside core-book3s.c itself. Solution to this problem
could be any of these.

(0) Move all the code from the new file perf_event_bhrb.c into core-book3s.c
(1) Move cpu_hw_events structure to perf_event_server.h
(2) Create additional BHRB processing function inside struct power_pmu and
define it for P8 inside power8_pmu.c

Regards
Anshuman