This series enables Control Transfer Records extension support on riscv
platform. CTR extension is similar to Arch LBR in x86 and BRBE in ARM. The
Extension is in stable state but not yet frozen and the latest release can
be found here [0]
CTR extension provides a method to record a limited branch history in
register-accessible internal core storage. CTR allows to selectively record
branches using filter bitmask. On a counter overflow, CTR stops recording and
the kernel samples the recorded branches in the overflow handler. CTR also
supports RASEMU mode. In RASEMU mode, a call is recorded on every function call
and on every return an entry is removed from the buffer.
CTR extension depends on couple of other extensions:
1. S[m|s]csrind : The indirect CSR extension [1] which defines additional
([M|S|VS]IREG2-[M|S|VS]IREG6) register to address size limitation of
RISC-V CSR address space. CTR access ctrsource, ctrtartget and ctrdata
CSRs using sscsrind extension.
2. Smstateen: The mstateen bit[54] controls the access to the CTR ext to
S-mode.
3. Sscofpmf: Counter overflow and privilege mode filtering. [2]
The series is based on Smcdeleg/Ssccfg counter delegation extension [3]
patches [4]. CTR itself doesn't depend on counter delegation support. This
rebase is basically to include the Smcsrind patches.
The last patch is in the perf tool to allow processing 256 entries. Without
this perf seems to consider that sample as corrupted and discards it.
Here is the link to a quick guide [5] to setup and run a basic perf demo on
Linux to use CTR Ext.
The Qemu patches can be found here:
https://github.com/rajnesh-kanwal/qemu/tree/ctr_upstream
The opensbi patch can be found here:
https://github.com/rajnesh-kanwal/opensbi/tree/ctr_upstream
The Linux kernel patches can be found here:
https://github.com/rajnesh-kanwal/linux/tree/ctr_upstream
[0]: https://github.com/riscv/riscv-control-transfer-records/release
[1]: https://github.com/riscv/riscv-indirect-csr-access
[2]: https://github.com/riscvarchive/riscv-count-overflow/tree/main
[3]: https://github.com/riscv/riscv-smcdeleg-ssccfg
[4]: https://lore.kernel.org/lkml/[email protected]/
[5]: https://github.com/rajnesh-kanwal/linux/wiki/Running-CTR-basic-demo-on-QEMU-RISC%E2%80%90V-Virt-machine
Rajnesh Kanwal (6):
perf: Increase the maximum number of samples to 256.
riscv: perf: Add Control transfer records CSR definations.
riscv: perf: Add Control Transfer Records extension parsing
riscv: perf: Add infrastructure for Control Transfer Record
riscv: perf: Add driver for Control Transfer Records Ext.
riscv: perf: Integrate CTR Ext support in riscv_pmu_dev driver
MAINTAINERS | 1 +
arch/riscv/include/asm/csr.h | 83 ++++++
arch/riscv/include/asm/hwcap.h | 4 +
arch/riscv/kernel/cpufeature.c | 2 +
drivers/perf/Kconfig | 11 +
drivers/perf/Makefile | 1 +
drivers/perf/riscv_ctr.c | 469 ++++++++++++++++++++++++++++++++
drivers/perf/riscv_pmu_common.c | 18 +-
drivers/perf/riscv_pmu_dev.c | 84 +++++-
include/linux/perf/riscv_pmu.h | 49 ++++
tools/perf/util/machine.c | 21 +-
11 files changed, 722 insertions(+), 21 deletions(-)
create mode 100644 drivers/perf/riscv_ctr.c
--
2.34.1
To support Control Transfer Records (CTR) extension, we need to extend the
riscv_pmu framework with some basic infrastructure for branch stack sampling.
Subsequent patches will use this to add support for CTR in the riscv_pmu_dev
driver.
With CTR, the branches are stored into a hardware FIFO, which will be sampled
by software when perf events overflow. A task may be context- switched between
overflows, and to avoid leaking samples we need to clear the last task's
records when a task is context-switched In. To do this we will be using the
pmu::sched_task() callback added in this patch.
Signed-off-by: Rajnesh Kanwal <[email protected]>
---
drivers/perf/riscv_pmu_common.c | 15 +++++++++++++++
drivers/perf/riscv_pmu_dev.c | 9 +++++++++
include/linux/perf/riscv_pmu.h | 16 ++++++++++++++++
3 files changed, 40 insertions(+)
diff --git a/drivers/perf/riscv_pmu_common.c b/drivers/perf/riscv_pmu_common.c
index b4efdddb2ad9..e794675e4944 100644
--- a/drivers/perf/riscv_pmu_common.c
+++ b/drivers/perf/riscv_pmu_common.c
@@ -159,6 +159,19 @@ u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event)
return GENMASK_ULL(cwidth, 0);
}
+static void riscv_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ bool sched_in)
+{
+ struct riscv_pmu *pmu;
+
+ if (!pmu_ctx)
+ return;
+
+ pmu = to_riscv_pmu(pmu_ctx->pmu);
+ if (pmu->sched_task)
+ pmu->sched_task(pmu_ctx, sched_in);
+}
+
u64 riscv_pmu_event_update(struct perf_event *event)
{
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
@@ -406,6 +419,7 @@ struct riscv_pmu *riscv_pmu_alloc(void)
for_each_possible_cpu(cpuid) {
cpuc = per_cpu_ptr(pmu->hw_events, cpuid);
cpuc->n_events = 0;
+ cpuc->ctr_users = 0;
for (i = 0; i < RISCV_MAX_COUNTERS; i++)
cpuc->events[i] = NULL;
}
@@ -419,6 +433,7 @@ struct riscv_pmu *riscv_pmu_alloc(void)
.start = riscv_pmu_start,
.stop = riscv_pmu_stop,
.read = riscv_pmu_read,
+ .sched_task = riscv_pmu_sched_task,
};
return pmu;
diff --git a/drivers/perf/riscv_pmu_dev.c b/drivers/perf/riscv_pmu_dev.c
index 5ca8a909f3ab..40ae5fc897a3 100644
--- a/drivers/perf/riscv_pmu_dev.c
+++ b/drivers/perf/riscv_pmu_dev.c
@@ -670,6 +670,14 @@ static void rvpmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
hwc->idx, sbi_err_map_linux_errno(ret.error));
}
+static void pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ bool sched_in)
+{
+ struct riscv_pmu *pmu = to_riscv_pmu(pmu_ctx->pmu);
+
+ /* Call CTR specific Sched hook. */
+}
+
static int rvpmu_sbi_find_num_ctrs(void)
{
struct sbiret ret;
@@ -1494,6 +1502,7 @@ static int rvpmu_device_probe(struct platform_device *pdev)
pmu->event_mapped = rvpmu_event_mapped;
pmu->event_unmapped = rvpmu_event_unmapped;
pmu->csr_index = rvpmu_csr_index;
+ pmu->sched_task = pmu_sched_task;
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
if (ret)
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index 425edd6685a9..5a6b840018bd 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -33,6 +33,13 @@
#define RISCV_PMU_CYCLE_FIXED_CTR_MASK 0x01
#define RISCV_PMU_INSTRUCTION_FIXED_CTR_MASK 0x04
+#define MAX_BRANCH_RECORDS 256
+
+struct branch_records {
+ struct perf_branch_stack branch_stack;
+ struct perf_branch_entry branch_entries[MAX_BRANCH_RECORDS];
+};
+
struct cpu_hw_events {
/* currently enabled events */
int n_events;
@@ -44,6 +51,12 @@ struct cpu_hw_events {
DECLARE_BITMAP(used_hw_ctrs, RISCV_MAX_COUNTERS);
/* currently enabled firmware counters */
DECLARE_BITMAP(used_fw_ctrs, RISCV_MAX_COUNTERS);
+
+ /* Saved branch records. */
+ struct branch_records *branches;
+
+ /* Active events requesting branch records */
+ int ctr_users;
};
struct riscv_pmu {
@@ -64,10 +77,13 @@ struct riscv_pmu {
void (*event_mapped)(struct perf_event *event, struct mm_struct *mm);
void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm);
uint8_t (*csr_index)(struct perf_event *event);
+ void (*sched_task)(struct perf_event_pmu_context *ctx, bool sched_in);
struct cpu_hw_events __percpu *hw_events;
struct hlist_node node;
struct notifier_block riscv_pm_nb;
+
+ unsigned int ctr_depth;
};
#define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu))
--
2.34.1
This adds support for CTR Ext defined in [0]. The extension
allows to records a maximum for 256 last branch records.
CTR extension depends on s[m|s]csrind and Sscofpmf extensions.
Signed-off-by: Rajnesh Kanwal <[email protected]>
---
MAINTAINERS | 1 +
drivers/perf/Kconfig | 11 +
drivers/perf/Makefile | 1 +
drivers/perf/riscv_ctr.c | 469 +++++++++++++++++++++++++++++++++
include/linux/perf/riscv_pmu.h | 33 +++
5 files changed, 515 insertions(+)
create mode 100644 drivers/perf/riscv_ctr.c
diff --git a/MAINTAINERS b/MAINTAINERS
index d6b42d5f62da..868e4b0808ab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19056,6 +19056,7 @@ M: Atish Patra <[email protected]>
R: Anup Patel <[email protected]>
L: [email protected]
S: Supported
+F: drivers/perf/riscv_ctr.c
F: drivers/perf/riscv_pmu_common.c
F: drivers/perf/riscv_pmu_dev.c
F: drivers/perf/riscv_pmu_legacy.c
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 3c37577b25f7..cca6598be739 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -110,6 +110,17 @@ config ANDES_CUSTOM_PMU
If you don't know what to do here, say "Y".
+config RISCV_CTR
+ bool "Enable support for Control Transfer Records (CTR)"
+ depends on PERF_EVENTS && RISCV_PMU
+ default y
+ help
+ Enable support for Control Transfer Records (CTR) which
+ allows recording branches, Jumps, Calls, returns etc taken in an
+ execution path. This also supports privilege based filtering. It
+ captures additional relevant information such as cycle count,
+ branch misprediction etc.
+
config ARM_PMU_ACPI
depends on ARM_PMU && ACPI
def_bool y
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index ba809cc069d5..364b1f66f410 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_RISCV_PMU_COMMON) += riscv_pmu_common.o
obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o
obj-$(CONFIG_RISCV_PMU) += riscv_pmu_dev.o
obj-$(CONFIG_STARFIVE_STARLINK_PMU) += starfive_starlink_pmu.o
+obj-$(CONFIG_RISCV_CTR) += riscv_ctr.o
obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/riscv_ctr.c b/drivers/perf/riscv_ctr.c
new file mode 100644
index 000000000000..95fda1edda4f
--- /dev/null
+++ b/drivers/perf/riscv_ctr.c
@@ -0,0 +1,469 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Control transfer records extension Helpers.
+ *
+ * Copyright (C) 2024 Rivos Inc.
+ *
+ * Author: Rajnesh Kanwal <[email protected]>
+ */
+
+#define pr_fmt(fmt) "CTR: " fmt
+
+#include <linux/bitfield.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <linux/perf/riscv_pmu.h>
+#include <linux/cpufeature.h>
+#include <asm/hwcap.h>
+#include <asm/csr_ind.h>
+#include <asm/csr.h>
+
+#define CTR_BRANCH_FILTERS_INH (CTRCTL_EXCINH | \
+ CTRCTL_INTRINH | \
+ CTRCTL_TRETINH | \
+ CTRCTL_TKBRINH | \
+ CTRCTL_INDCALL_INH | \
+ CTRCTL_DIRCALL_INH | \
+ CTRCTL_INDJUMP_INH | \
+ CTRCTL_DIRJUMP_INH | \
+ CTRCTL_CORSWAP_INH | \
+ CTRCTL_RET_INH | \
+ CTRCTL_INDOJUMP_INH | \
+ CTRCTL_DIROJUMP_INH)
+
+#define CTR_BRANCH_ENABLE_BITS (CTRCTL_KERNEL_ENABLE | CTRCTL_U_ENABLE)
+
+/* Branch filters not-supported by CTR extension. */
+#define CTR_EXCLUDE_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_ABORT_TX | \
+ PERF_SAMPLE_BRANCH_IN_TX | \
+ PERF_SAMPLE_BRANCH_PRIV_SAVE | \
+ PERF_SAMPLE_BRANCH_NO_TX | \
+ PERF_SAMPLE_BRANCH_COUNTERS)
+
+/* Branch filters supported by CTR extension. */
+#define CTR_ALLOWED_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_USER | \
+ PERF_SAMPLE_BRANCH_KERNEL | \
+ PERF_SAMPLE_BRANCH_HV | \
+ PERF_SAMPLE_BRANCH_ANY | \
+ PERF_SAMPLE_BRANCH_ANY_CALL | \
+ PERF_SAMPLE_BRANCH_ANY_RETURN | \
+ PERF_SAMPLE_BRANCH_IND_CALL | \
+ PERF_SAMPLE_BRANCH_COND | \
+ PERF_SAMPLE_BRANCH_IND_JUMP | \
+ PERF_SAMPLE_BRANCH_HW_INDEX | \
+ PERF_SAMPLE_BRANCH_NO_FLAGS | \
+ PERF_SAMPLE_BRANCH_NO_CYCLES | \
+ PERF_SAMPLE_BRANCH_CALL_STACK | \
+ PERF_SAMPLE_BRANCH_CALL | \
+ PERF_SAMPLE_BRANCH_TYPE_SAVE)
+
+#define CTR_PERF_BRANCH_FILTERS (CTR_ALLOWED_BRANCH_FILTERS | \
+ CTR_EXCLUDE_BRANCH_FILTERS)
+
+static u64 allowed_filters __read_mostly;
+
+struct ctr_regset {
+ unsigned long src;
+ unsigned long target;
+ unsigned long ctr_data;
+};
+
+static inline u64 get_ctr_src_reg(unsigned int ctr_idx)
+{
+ return csr_ind_read(CSR_IREG, CTR_ENTRIES_FIRST, ctr_idx);
+}
+
+static inline u64 get_ctr_tgt_reg(unsigned int ctr_idx)
+{
+ return csr_ind_read(CSR_IREG2, CTR_ENTRIES_FIRST, ctr_idx);
+}
+
+static inline u64 get_ctr_data_reg(unsigned int ctr_idx)
+{
+ return csr_ind_read(CSR_IREG3, CTR_ENTRIES_FIRST, ctr_idx);
+}
+
+static inline bool ctr_record_valid(u64 ctr_src)
+{
+ return !!FIELD_GET(CTRSOURCE_VALID, ctr_src);
+}
+
+static inline int ctr_get_mispredict(u64 ctr_target)
+{
+ return FIELD_GET(CTRTARGET_MISP, ctr_target);
+}
+
+static inline unsigned int ctr_get_cycles(u64 ctr_data)
+{
+ const unsigned int cce = FIELD_GET(CTRDATA_CCE_MASK, ctr_data);
+ const unsigned int ccm = FIELD_GET(CTRDATA_CCM_MASK, ctr_data);
+
+ if (ctr_data & CTRDATA_CCV)
+ return 0;
+
+ /* Formula to calculate cycles from spec: (2^12 + CCM) << CCE-1 */
+ if (cce > 0)
+ return (4096 + ccm) << (cce - 1);
+
+ return FIELD_GET(CTRDATA_CCM_MASK, ctr_data);
+}
+
+static inline unsigned int ctr_get_type(u64 ctr_data)
+{
+ return FIELD_GET(CTRDATA_TYPE_MASK, ctr_data);
+}
+
+static inline unsigned int ctr_get_depth(u64 ctr_depth)
+{
+ /* Depth table from CTR Spec: 2.4 sctrdepth.
+ *
+ * sctrdepth.depth Depth
+ * 000 - 16
+ * 001 - 32
+ * 010 - 64
+ * 011 - 128
+ * 100 - 256
+ *
+ * Depth = 16 * 2 ^ (ctrdepth.depth)
+ * or
+ * Depth = 16 << ctrdepth.depth.
+ */
+ return 16 << FIELD_GET(SCTRDEPTH_MASK, ctr_depth);
+}
+
+/* Reads CTR entry at idx and stores it in entry struct. */
+static bool capture_ctr_regset(struct ctr_regset *entry, unsigned int idx)
+{
+ entry->src = get_ctr_src_reg(idx);
+
+ if (!ctr_record_valid(entry->src))
+ return false;
+
+ entry->src = entry->src & (~CTRSOURCE_VALID);
+ entry->target = get_ctr_tgt_reg(idx);
+ entry->ctr_data = get_ctr_data_reg(idx);
+
+ return true;
+}
+
+static u64 branch_type_to_ctr(int branch_type)
+{
+ u64 config = CTR_BRANCH_FILTERS_INH | CTRCTL_LCOFIFRZ;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_USER)
+ config |= CTRCTL_U_ENABLE;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_KERNEL)
+ config |= CTRCTL_KERNEL_ENABLE;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_HV) {
+ if (riscv_isa_extension_available(NULL, h))
+ config |= CTRCTL_KERNEL_ENABLE;
+ }
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
+ config &= ~CTR_BRANCH_FILTERS_INH;
+ return config;
+ }
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+ config &= ~CTRCTL_INDCALL_INH;
+ config &= ~CTRCTL_DIRCALL_INH;
+ config &= ~CTRCTL_EXCINH;
+ config &= ~CTRCTL_INTRINH;
+ }
+
+ if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ config &= ~(CTRCTL_RET_INH | CTRCTL_TRETINH);
+
+ if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ config &= ~CTRCTL_INDCALL_INH;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_COND)
+ config &= ~CTRCTL_TKBRINH;
+
+ if (branch_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
+ config &= ~(CTRCTL_INDCALL_INH | CTRCTL_DIRCALL_INH |
+ CTRCTL_RET_INH);
+ config |= CTRCTL_RASEMU;
+ }
+
+ if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP) {
+ config &= ~CTRCTL_INDJUMP_INH;
+ config &= ~CTRCTL_INDOJUMP_INH;
+ }
+
+ if (branch_type & PERF_SAMPLE_BRANCH_CALL)
+ config &= ~CTRCTL_DIRCALL_INH;
+
+ return config;
+}
+
+static const int ctr_perf_map[] = {
+ [CTRDATA_TYPE_NONE] = PERF_BR_UNKNOWN,
+ [CTRDATA_TYPE_EXCEPTION] = PERF_BR_SYSCALL,
+ [CTRDATA_TYPE_INTERRUPT] = PERF_BR_IRQ,
+ [CTRDATA_TYPE_TRAP_RET] = PERF_BR_ERET,
+ [CTRDATA_TYPE_NONTAKEN_BRANCH] = PERF_BR_COND,
+ [CTRDATA_TYPE_TAKEN_BRANCH] = PERF_BR_COND,
+ [CTRDATA_TYPE_RESERVED_6] = PERF_BR_UNKNOWN,
+ [CTRDATA_TYPE_RESERVED_7] = PERF_BR_UNKNOWN,
+ [CTRDATA_TYPE_INDIRECT_CALL] = PERF_BR_IND_CALL,
+ [CTRDATA_TYPE_DIRECT_CALL] = PERF_BR_CALL,
+ [CTRDATA_TYPE_INDIRECT_JUMP] = PERF_BR_UNCOND,
+ [CTRDATA_TYPE_DIRECT_JUMP] = PERF_BR_UNKNOWN,
+ [CTRDATA_TYPE_CO_ROUTINE_SWAP] = PERF_BR_UNKNOWN,
+ [CTRDATA_TYPE_RETURN] = PERF_BR_RET,
+ [CTRDATA_TYPE_OTHER_INDIRECT_JUMP] = PERF_BR_IND,
+ [CTRDATA_TYPE_OTHER_DIRECT_JUMP] = PERF_BR_UNKNOWN,
+};
+
+static void ctr_set_perf_entry_type(struct perf_branch_entry *entry,
+ u64 ctr_data)
+{
+ int ctr_type = ctr_get_type(ctr_data);
+
+ entry->type = ctr_perf_map[ctr_type];
+ if (entry->type == PERF_BR_UNKNOWN)
+ pr_warn("%d - unknown branch type captured\n", ctr_type);
+}
+
+static void capture_ctr_flags(struct perf_branch_entry *entry,
+ struct perf_event *event, u64 ctr_data,
+ u64 ctr_target)
+{
+ if (branch_sample_type(event))
+ ctr_set_perf_entry_type(entry, ctr_data);
+
+ if (!branch_sample_no_cycles(event))
+ entry->cycles = ctr_get_cycles(ctr_data);
+
+ if (!branch_sample_no_flags(event)) {
+ entry->abort = 0;
+ entry->mispred = ctr_get_mispredict(ctr_target);
+ entry->predicted = !entry->mispred;
+ }
+
+ if (branch_sample_priv(event))
+ entry->priv = PERF_BR_PRIV_UNKNOWN;
+}
+
+
+static void ctr_regset_to_branch_entry(struct cpu_hw_events *cpuc,
+ struct perf_event *event,
+ struct ctr_regset *regset,
+ unsigned int idx)
+{
+ struct perf_branch_entry *entry = &cpuc->branches->branch_entries[idx];
+
+ perf_clear_branch_entry_bitfields(entry);
+ entry->from = regset->src;
+ entry->to = regset->target & (~CTRTARGET_MISP);
+ capture_ctr_flags(entry, event, regset->ctr_data, regset->target);
+}
+
+static void ctr_read_entries(struct cpu_hw_events *cpuc,
+ struct perf_event *event,
+ unsigned int depth)
+{
+ struct ctr_regset entry = {};
+ u64 ctr_ctl;
+ int i;
+
+ ctr_ctl = csr_read_clear(CSR_CTRCTL, CTR_BRANCH_ENABLE_BITS);
+
+ for (i = 0; i < depth; i++) {
+ if (!capture_ctr_regset(&entry, i))
+ break;
+
+ ctr_regset_to_branch_entry(cpuc, event, &entry, i);
+ }
+
+ csr_set(CSR_CTRCTL, ctr_ctl & CTR_BRANCH_ENABLE_BITS);
+
+ cpuc->branches->branch_stack.nr = i;
+ cpuc->branches->branch_stack.hw_idx = 0;
+}
+
+bool riscv_pmu_ctr_valid(struct perf_event *event)
+{
+ u64 branch_type = event->attr.branch_sample_type;
+
+ if (branch_type & ~allowed_filters) {
+ pr_debug_once("Requested branch filters not supported 0x%llx\n",
+ branch_type & ~allowed_filters);
+ return false;
+ }
+
+ return true;
+}
+
+void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ unsigned int depth = to_riscv_pmu(event->pmu)->ctr_depth;
+
+ ctr_read_entries(cpuc, event, depth);
+
+ /* Clear frozen bit. */
+ csr_clear(CSR_SCTRSTATUS, SCTRSTATUS_FROZEN);
+}
+
+static void riscv_pmu_ctr_clear(void)
+{
+ /* FIXME: Replace with sctrclr instruction once support is merged
+ * into toolchain.
+ */
+ asm volatile(".4byte 0x10400073\n" ::: "memory");
+ csr_write(CSR_SCTRSTATUS, 0);
+}
+
+/*
+ * On context switch in, we need to make sure no samples from previous user
+ * are left in the CTR.
+ *
+ * On ctxswin, sched_in = true, called after the PMU has started
+ * On ctxswout, sched_in = false, called before the PMU is stopped
+ */
+void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ bool sched_in)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(pmu_ctx->pmu);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+
+ if (cpuc->ctr_users && sched_in)
+ riscv_pmu_ctr_clear();
+}
+
+void riscv_pmu_ctr_enable(struct perf_event *event)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+ u64 branch_type = event->attr.branch_sample_type;
+ u64 ctr;
+
+ if (!cpuc->ctr_users++ && !event->total_time_running)
+ riscv_pmu_ctr_clear();
+
+ ctr = branch_type_to_ctr(branch_type);
+ csr_write(CSR_CTRCTL, ctr);
+
+ perf_sched_cb_inc(event->pmu);
+}
+
+void riscv_pmu_ctr_disable(struct perf_event *event)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+
+ /* Clear CTRCTL to disable the recording. */
+ csr_write(CSR_CTRCTL, 0);
+
+ cpuc->ctr_users--;
+ WARN_ON_ONCE(cpuc->ctr_users < 0);
+
+ perf_sched_cb_dec(event->pmu);
+}
+
+/*
+ * Check for hardware supported perf filters here. To avoid missing
+ * any new added filter in perf, we do a BUILD_BUG_ON check, so make sure
+ * to update CTR_ALLOWED_BRANCH_FILTERS or CTR_EXCLUDE_BRANCH_FILTERS
+ * defines when adding support for it in below function.
+ */
+static void __init check_available_filters(void)
+{
+ u64 ctr_ctl;
+
+ /*
+ * Ensure both perf branch filter allowed and exclude
+ * masks are always in sync with the generic perf ABI.
+ */
+ BUILD_BUG_ON(CTR_PERF_BRANCH_FILTERS != (PERF_SAMPLE_BRANCH_MAX - 1));
+
+ allowed_filters = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_KERNEL |
+ PERF_SAMPLE_BRANCH_ANY |
+ PERF_SAMPLE_BRANCH_HW_INDEX |
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_TYPE_SAVE;
+
+ csr_write(CSR_CTRCTL, ~0);
+ ctr_ctl = csr_read(CSR_CTRCTL);
+
+ if (riscv_isa_extension_available(NULL, h))
+ allowed_filters |= PERF_SAMPLE_BRANCH_HV;
+
+ if (ctr_ctl & (CTRCTL_INDCALL_INH | CTRCTL_DIRCALL_INH))
+ allowed_filters |= PERF_SAMPLE_BRANCH_ANY_CALL;
+
+ if (ctr_ctl & (CTRCTL_RET_INH | CTRCTL_TRETINH))
+ allowed_filters |= PERF_SAMPLE_BRANCH_ANY_RETURN;
+
+ if (ctr_ctl & CTRCTL_INDCALL_INH)
+ allowed_filters |= PERF_SAMPLE_BRANCH_IND_CALL;
+
+ if (ctr_ctl & CTRCTL_TKBRINH)
+ allowed_filters |= PERF_SAMPLE_BRANCH_COND;
+
+ if (ctr_ctl & CTRCTL_RASEMU)
+ allowed_filters |= PERF_SAMPLE_BRANCH_CALL_STACK;
+
+ if (ctr_ctl & (CTRCTL_INDOJUMP_INH | CTRCTL_INDJUMP_INH))
+ allowed_filters |= PERF_SAMPLE_BRANCH_IND_JUMP;
+
+ if (ctr_ctl & CTRCTL_DIRCALL_INH)
+ allowed_filters |= PERF_SAMPLE_BRANCH_CALL;
+}
+
+void riscv_pmu_ctr_starting_cpu(void)
+{
+ if (!riscv_isa_extension_available(NULL, SxCTR) ||
+ !riscv_isa_extension_available(NULL, SSCOFPMF) ||
+ !riscv_isa_extension_available(NULL, SxCSRIND))
+ return;
+
+ /* Set depth to maximum. */
+ csr_write(CSR_SCTRDEPTH, SCTRDEPTH_MASK);
+}
+
+void riscv_pmu_ctr_dying_cpu(void)
+{
+ if (!riscv_isa_extension_available(NULL, SxCTR) ||
+ !riscv_isa_extension_available(NULL, SSCOFPMF) ||
+ !riscv_isa_extension_available(NULL, SxCSRIND))
+ return;
+
+ /* Clear and reset CTR CSRs. */
+ csr_write(CSR_SCTRDEPTH, 0);
+ csr_write(CSR_CTRCTL, 0);
+ riscv_pmu_ctr_clear();
+}
+
+void __init riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu)
+{
+ if (!riscv_isa_extension_available(NULL, SxCTR) ||
+ !riscv_isa_extension_available(NULL, SSCOFPMF) ||
+ !riscv_isa_extension_available(NULL, SxCSRIND))
+ return;
+
+ check_available_filters();
+
+ /* Set depth to maximum. */
+ csr_write(CSR_SCTRDEPTH, SCTRDEPTH_MASK);
+ riscv_pmu->ctr_depth = ctr_get_depth(csr_read(CSR_SCTRDEPTH));
+
+ pr_info("Perf CTR available, with %d depth\n", riscv_pmu->ctr_depth);
+}
+
+void __init riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu)
+{
+ if (!riscv_pmu_ctr_supported(riscv_pmu))
+ return;
+
+ csr_write(CSR_SCTRDEPTH, 0);
+ csr_write(CSR_CTRCTL, 0);
+ riscv_pmu_ctr_clear();
+ riscv_pmu->ctr_depth = 0;
+}
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index 5a6b840018bd..455d2386936f 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -104,6 +104,39 @@ struct riscv_pmu *riscv_pmu_alloc(void);
int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr);
#endif
+static inline bool riscv_pmu_ctr_supported(struct riscv_pmu *pmu)
+{
+ return !!pmu->ctr_depth;
+}
+
#endif /* CONFIG_RISCV_PMU_COMMON */
+#ifdef CONFIG_RISCV_CTR
+
+bool riscv_pmu_ctr_valid(struct perf_event *event);
+void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc, struct perf_event *event);
+void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
+void riscv_pmu_ctr_enable(struct perf_event *event);
+void riscv_pmu_ctr_disable(struct perf_event *event);
+void riscv_pmu_ctr_dying_cpu(void);
+void riscv_pmu_ctr_starting_cpu(void);
+void riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu);
+void riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu);
+
+#else
+
+static inline bool riscv_pmu_ctr_valid(struct perf_event *event) { return false; }
+static inline void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc,
+ struct perf_event *event) { }
+static inline void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *,
+ bool sched_in) { }
+static inline void riscv_pmu_ctr_enable(struct perf_event *event) { }
+static inline void riscv_pmu_ctr_disable(struct perf_event *event) { }
+static inline void riscv_pmu_ctr_dying_cpu(void) { }
+static inline void riscv_pmu_ctr_starting_cpu(void) { }
+static inline void riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu) { }
+static inline void riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu) { }
+
+#endif /* CONFIG_RISCV_CTR */
+
#endif /* _RISCV_PMU_H */
--
2.34.1
This integrates recently added CTR ext support in riscv_pmu_dev driver
to enable branch stack sampling using PMU events.
This mainly adds CTR enable/disable callbacks in rvpmu_ctr_stop()
and rvpmu_ctr_start() function to start/stop branch recording along
with the event.
PMU overflow handler rvpmu_ovf_handler() is also updated to sample
CTR entries in case of the overflow for the particular event programmed
to records branches. The recorded entries are fed to core perf for
further processing.
Signed-off-by: Rajnesh Kanwal <[email protected]>
---
drivers/perf/riscv_pmu_common.c | 3 +-
drivers/perf/riscv_pmu_dev.c | 77 +++++++++++++++++++++++++++------
2 files changed, 65 insertions(+), 15 deletions(-)
diff --git a/drivers/perf/riscv_pmu_common.c b/drivers/perf/riscv_pmu_common.c
index e794675e4944..e1f3a33b479f 100644
--- a/drivers/perf/riscv_pmu_common.c
+++ b/drivers/perf/riscv_pmu_common.c
@@ -326,8 +326,7 @@ static int riscv_pmu_event_init(struct perf_event *event)
u64 event_config = 0;
uint64_t cmask;
- /* driver does not support branch stack sampling */
- if (has_branch_stack(event))
+ if (has_branch_stack(event) && !riscv_pmu_ctr_supported(rvpmu))
return -EOPNOTSUPP;
hwc->flags = 0;
diff --git a/drivers/perf/riscv_pmu_dev.c b/drivers/perf/riscv_pmu_dev.c
index 40ae5fc897a3..1b2c04c35bed 100644
--- a/drivers/perf/riscv_pmu_dev.c
+++ b/drivers/perf/riscv_pmu_dev.c
@@ -675,7 +675,7 @@ static void pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
{
struct riscv_pmu *pmu = to_riscv_pmu(pmu_ctx->pmu);
- /* Call CTR specific Sched hook. */
+ riscv_pmu_ctr_sched_task(pmu_ctx, sched_in);
}
static int rvpmu_sbi_find_num_ctrs(void)
@@ -935,17 +935,25 @@ static irqreturn_t rvpmu_ovf_handler(int irq, void *dev)
hw_evt = &event->hw;
riscv_pmu_event_update(event);
perf_sample_data_init(&data, 0, hw_evt->last_period);
- if (riscv_pmu_event_set_period(event)) {
- /*
- * Unlike other ISAs, RISC-V don't have to disable interrupts
- * to avoid throttling here. As per the specification, the
- * interrupt remains disabled until the OF bit is set.
- * Interrupts are enabled again only during the start.
- * TODO: We will need to stop the guest counters once
- * virtualization support is added.
- */
- perf_event_overflow(event, &data, regs);
+ if (!riscv_pmu_event_set_period(event))
+ continue;
+
+ if (needs_branch_stack(event)) {
+ riscv_pmu_ctr_consume(cpu_hw_evt, event);
+ perf_sample_save_brstack(
+ &data, event,
+ &cpu_hw_evt->branches->branch_stack, NULL);
}
+
+ /*
+ * Unlike other ISAs, RISC-V don't have to disable interrupts
+ * to avoid throttling here. As per the specification, the
+ * interrupt remains disabled until the OF bit is set.
+ * Interrupts are enabled again only during the start.
+ * TODO: We will need to stop the guest counters once
+ * virtualization support is added.
+ */
+ perf_event_overflow(event, &data, regs);
}
rvpmu_start_overflow_mask(pmu, overflowed_ctrs);
@@ -1103,10 +1111,12 @@ static void rvpmu_ctr_start(struct perf_event *event, u64 ival)
else
rvpmu_sbi_ctr_start(event, ival);
-
if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
(hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
rvpmu_set_scounteren((void *)event);
+
+ if (needs_branch_stack(event))
+ riscv_pmu_ctr_enable(event);
}
static void rvpmu_ctr_stop(struct perf_event *event, unsigned long flag)
@@ -1128,6 +1138,9 @@ static void rvpmu_ctr_stop(struct perf_event *event, unsigned long flag)
} else {
rvpmu_sbi_ctr_stop(event, flag);
}
+
+ if (needs_branch_stack(event))
+ riscv_pmu_ctr_disable(event);
}
static int rvpmu_find_ctrs(void)
@@ -1161,6 +1174,9 @@ static int rvpmu_find_ctrs(void)
static int rvpmu_event_map(struct perf_event *event, u64 *econfig)
{
+ if (needs_branch_stack(event) && !riscv_pmu_ctr_valid(event))
+ return -EOPNOTSUPP;
+
if (static_branch_likely(&riscv_pmu_cdeleg_available) && !pmu_sbi_is_fw_event(event))
return rvpmu_deleg_event_map(event, econfig);
else
@@ -1207,6 +1223,8 @@ static int rvpmu_starting_cpu(unsigned int cpu, struct hlist_node *node)
enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
}
+ riscv_pmu_ctr_starting_cpu();
+
return 0;
}
@@ -1218,6 +1236,7 @@ static int rvpmu_dying_cpu(unsigned int cpu, struct hlist_node *node)
/* Disable all counters access for user mode now */
csr_write(CSR_SCOUNTEREN, 0x0);
+ riscv_pmu_ctr_dying_cpu();
return 0;
}
@@ -1331,6 +1350,29 @@ static void riscv_pmu_destroy(struct riscv_pmu *pmu)
cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
}
+static int branch_records_alloc(struct riscv_pmu *pmu)
+{
+ struct branch_records __percpu *tmp_alloc_ptr;
+ struct branch_records *records;
+ struct cpu_hw_events *events;
+ int cpu;
+
+ if (!riscv_pmu_ctr_supported(pmu))
+ return 0;
+
+ tmp_alloc_ptr = alloc_percpu_gfp(struct branch_records, GFP_KERNEL);
+ if (!tmp_alloc_ptr)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ events = per_cpu_ptr(pmu->hw_events, cpu);
+ records = per_cpu_ptr(tmp_alloc_ptr, cpu);
+ events->branches = records;
+ }
+
+ return 0;
+}
+
static void rvpmu_event_init(struct perf_event *event)
{
/*
@@ -1490,6 +1532,12 @@ static int rvpmu_device_probe(struct platform_device *pdev)
pmu->pmu.attr_groups = riscv_cdeleg_pmu_attr_groups;
else
pmu->pmu.attr_groups = riscv_sbi_pmu_attr_groups;
+
+ riscv_pmu_ctr_init(pmu);
+ ret = branch_records_alloc(pmu);
+ if (ret)
+ goto out_ctr_finish;
+
pmu->cmask = cmask;
pmu->ctr_start = rvpmu_ctr_start;
pmu->ctr_stop = rvpmu_ctr_stop;
@@ -1506,7 +1554,7 @@ static int rvpmu_device_probe(struct platform_device *pdev)
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
if (ret)
- return ret;
+ goto out_ctr_finish;
ret = riscv_pm_pmu_register(pmu);
if (ret)
@@ -1523,6 +1571,9 @@ static int rvpmu_device_probe(struct platform_device *pdev)
out_unregister:
riscv_pmu_destroy(pmu);
+out_ctr_finish:
+ riscv_pmu_ctr_finish(pmu);
+
out_free:
kfree(pmu);
return ret;
--
2.34.1