Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752162AbdLFLsQ (ORCPT ); Wed, 6 Dec 2017 06:48:16 -0500 Received: from [195.214.232.25] ([195.214.232.25]:38570 "EHLO relay.sw.ru" rhost-flags-FAIL-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1751066AbdLFLsM (ORCPT ); Wed, 6 Dec 2017 06:48:12 -0500 From: Jan Dakinevich To: linux-kernel@vger.kernel.org Cc: Jan Dakinevich , "Denis V . Lunev" , Roman Kagan , Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Thomas Gleixner , "H. Peter Anvin" , x86@kernel.org, Paolo Bonzini , =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= , Andi Kleen , Kan Liang , Stephane Eranian , Zhou Chengming , Sebastian Andrzej Siewior , Colin King , Greg Kroah-Hartman , Jin Yao , kvm@vger.kernel.org Subject: [PATCH RFC 1/2] perf/x86/intel: make reusable LBR initialization code Date: Wed, 6 Dec 2017 14:43:02 +0300 Message-Id: <1512560585-27263-2-git-send-email-jan.dakinevich@virtuozzo.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1512560585-27263-1-git-send-email-jan.dakinevich@virtuozzo.com> References: <1512560585-27263-1-git-send-email-jan.dakinevich@virtuozzo.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 27709 Lines: 884 This patch introduces globally visible intel_pmu_lbr_fill() routine, which gathers information which LBR MSRs are support for specific CPU family/model. It is supposed that the routine would be used in KVM code, using guest CPU information as an input. By this reason, it should not have any side effect which could affect host system. * LBR information moved to separate structure `struct x86_pmu_lbr'; * All family-specific tweaks on gathered information are applied only for global x86_pmu.lbr to keep current perf initialization behavior. Signed-off-by: Jan Dakinevich --- arch/x86/events/core.c | 8 +- arch/x86/events/intel/core.c | 59 +++------ arch/x86/events/intel/lbr.c | 272 +++++++++++++++++++++++++------------- arch/x86/events/perf_event.h | 27 +--- arch/x86/include/asm/perf_event.h | 11 ++ 5 files changed, 215 insertions(+), 162 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 140d332..72fc174 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -370,7 +370,7 @@ int x86_add_exclusive(unsigned int what) * When lbr_pt_coexist we allow PT to coexist with either LBR or BTS. * LBR and BTS are still mutually exclusive. */ - if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) + if (x86_pmu.lbr.pt_coexist && what == x86_lbr_exclusive_pt) return 0; if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { @@ -393,7 +393,7 @@ int x86_add_exclusive(unsigned int what) void x86_del_exclusive(unsigned int what) { - if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) + if (x86_pmu.lbr.pt_coexist && what == x86_lbr_exclusive_pt) return; atomic_dec(&x86_pmu.lbr_exclusive[what]); @@ -496,7 +496,7 @@ int x86_pmu_max_precise(void) precise++; /* Support for IP fixup */ - if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2) + if (x86_pmu.lbr.nr || x86_pmu.intel_cap.pebs_format >= 2) precise++; if (x86_pmu.pebs_prec_dist) @@ -1311,7 +1311,7 @@ void perf_event_print_debug(void) rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); } - if (x86_pmu.lbr_nr) { + if (x86_pmu.lbr.nr) { rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl); } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 09c26a4..2ac799d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2182,7 +2182,7 @@ static void intel_pmu_reset(void) } /* Reset LBRs and LBR freezing */ - if (x86_pmu.lbr_nr) { + if (x86_pmu.lbr.nr) { update_debugctlmsr(get_debugctlmsr() & ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR)); } @@ -3262,7 +3262,7 @@ static int intel_pmu_cpu_prepare(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { + if (x86_pmu.extra_regs || x86_pmu.lbr.sel_map) { cpuc->shared_regs = allocate_shared_regs(cpu); if (!cpuc->shared_regs) goto err; @@ -3343,7 +3343,7 @@ static void intel_pmu_cpu_starting(int cpu) cpuc->shared_regs->refcnt++; } - if (x86_pmu.lbr_sel_map) + if (x86_pmu.lbr.sel_map) cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { @@ -3595,9 +3595,9 @@ static void intel_snb_check_microcode(void) static bool is_lbr_from(unsigned long msr) { - unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr; + unsigned long lbr_from_nr = x86_pmu.lbr.from + x86_pmu.lbr.nr; - return x86_pmu.lbr_from <= msr && msr < lbr_from_nr; + return x86_pmu.lbr.from <= msr && msr < lbr_from_nr; } /* @@ -3814,7 +3814,7 @@ static ssize_t branches_show(struct device *cdev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr); + return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr.nr); } static DEVICE_ATTR_RO(branches); @@ -3915,6 +3915,8 @@ __init int intel_pmu_init(void) intel_ds_init(); + intel_pmu_lbr_init(); + x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ /* @@ -3934,8 +3936,6 @@ __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - intel_pmu_lbr_init_core(); - x86_pmu.event_constraints = intel_core2_event_constraints; x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; pr_cont("Core2 events, "); @@ -3950,8 +3950,6 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_nhm(); - x86_pmu.event_constraints = intel_nehalem_event_constraints; x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; @@ -3983,8 +3981,6 @@ __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - intel_pmu_lbr_init_atom(); - x86_pmu.event_constraints = intel_gen_event_constraints; x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_core2; @@ -4000,8 +3996,6 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_slm(); - x86_pmu.event_constraints = intel_slm_event_constraints; x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; x86_pmu.extra_regs = intel_slm_extra_regs; @@ -4019,8 +4013,6 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_skl(); - x86_pmu.event_constraints = intel_slm_event_constraints; x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints; x86_pmu.extra_regs = intel_glm_extra_regs; @@ -4031,7 +4023,6 @@ __init int intel_pmu_init(void) */ x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; - x86_pmu.lbr_pt_coexist = true; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.cpu_events = glm_events_attrs; extra_attr = slm_format_attr; @@ -4045,8 +4036,6 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_skl(); - x86_pmu.event_constraints = intel_slm_event_constraints; x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints; x86_pmu.extra_regs = intel_glm_extra_regs; @@ -4056,7 +4045,6 @@ __init int intel_pmu_init(void) */ x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; - x86_pmu.lbr_pt_coexist = true; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.get_event_constraints = glp_get_event_constraints; x86_pmu.cpu_events = glm_events_attrs; @@ -4075,8 +4063,6 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_nhm(); - x86_pmu.event_constraints = intel_westmere_event_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; @@ -4107,8 +4093,6 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_snb(); - x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; @@ -4148,8 +4132,6 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_snb(); - x86_pmu.event_constraints = intel_ivb_event_constraints; x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; @@ -4184,8 +4166,6 @@ __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_hsw(); - x86_pmu.event_constraints = intel_hsw_event_constraints; x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; x86_pmu.extra_regs = intel_snbep_extra_regs; @@ -4198,7 +4178,6 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; x86_pmu.cpu_events = get_hsw_events_attrs(); - x86_pmu.lbr_double_abort = true; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; pr_cont("Haswell events, "); @@ -4223,8 +4202,6 @@ __init int intel_pmu_init(void) hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; - intel_pmu_lbr_init_hsw(); - x86_pmu.event_constraints = intel_bdw_event_constraints; x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints; x86_pmu.extra_regs = intel_snbep_extra_regs; @@ -4250,7 +4227,6 @@ __init int intel_pmu_init(void) slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_knl(); x86_pmu.event_constraints = intel_slm_event_constraints; x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; @@ -4272,7 +4248,6 @@ __init int intel_pmu_init(void) x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_skl(); /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */ event_attr_td_recovery_bubbles.event_str_noht = @@ -4365,19 +4340,19 @@ __init int intel_pmu_init(void) * Check all LBT MSR here. * Disable LBR access if any LBR MSRs can not be accessed. */ - if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) - x86_pmu.lbr_nr = 0; - for (i = 0; i < x86_pmu.lbr_nr; i++) { - if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && - check_msr(x86_pmu.lbr_to + i, 0xffffUL))) - x86_pmu.lbr_nr = 0; + if (x86_pmu.lbr.nr && !check_msr(x86_pmu.lbr.tos, 0x3UL)) + x86_pmu.lbr.nr = 0; + for (i = 0; i < x86_pmu.lbr.nr; i++) { + if (!(check_msr(x86_pmu.lbr.from + i, 0xffffUL) && + check_msr(x86_pmu.lbr.to + i, 0xffffUL))) + x86_pmu.lbr.nr = 0; } x86_pmu.caps_attrs = intel_pmu_caps_attrs; - if (x86_pmu.lbr_nr) { + if (x86_pmu.lbr.nr) { x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs); - pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); + pr_cont("%d-deep LBR, ", x86_pmu.lbr.nr); } /* @@ -4390,7 +4365,7 @@ __init int intel_pmu_init(void) er->extra_msr_access = check_msr(er->msr, 0x11UL); /* Disable LBR select mapping */ if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) - x86_pmu.lbr_sel_map = NULL; + x86_pmu.lbr.sel_map = NULL; } } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index ae64d0b..f6dba2d 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "../perf_event.h" @@ -167,7 +168,7 @@ static void __intel_pmu_lbr_enable(bool pmi) * did not change. */ if (cpuc->lbr_sel) - lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; + lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr.sel_mask; if (!pmi && cpuc->lbr_sel) wrmsrl(MSR_LBR_SELECT, lbr_select); @@ -198,17 +199,17 @@ static void intel_pmu_lbr_reset_32(void) { int i; - for (i = 0; i < x86_pmu.lbr_nr; i++) - wrmsrl(x86_pmu.lbr_from + i, 0); + for (i = 0; i < x86_pmu.lbr.nr; i++) + wrmsrl(x86_pmu.lbr.from + i, 0); } static void intel_pmu_lbr_reset_64(void) { int i; - for (i = 0; i < x86_pmu.lbr_nr; i++) { - wrmsrl(x86_pmu.lbr_from + i, 0); - wrmsrl(x86_pmu.lbr_to + i, 0); + for (i = 0; i < x86_pmu.lbr.nr; i++) { + wrmsrl(x86_pmu.lbr.from + i, 0); + wrmsrl(x86_pmu.lbr.to + i, 0); if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + i, 0); } @@ -216,7 +217,7 @@ static void intel_pmu_lbr_reset_64(void) void intel_pmu_lbr_reset(void) { - if (!x86_pmu.lbr_nr) + if (!x86_pmu.lbr.nr) return; if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) @@ -232,7 +233,7 @@ static inline u64 intel_pmu_lbr_tos(void) { u64 tos; - rdmsrl(x86_pmu.lbr_tos, tos); + rdmsrl(x86_pmu.lbr.tos, tos); return tos; } @@ -306,19 +307,19 @@ static u64 lbr_from_signext_quirk_rd(u64 val) static inline void wrlbr_from(unsigned int idx, u64 val) { val = lbr_from_signext_quirk_wr(val); - wrmsrl(x86_pmu.lbr_from + idx, val); + wrmsrl(x86_pmu.lbr.from + idx, val); } static inline void wrlbr_to(unsigned int idx, u64 val) { - wrmsrl(x86_pmu.lbr_to + idx, val); + wrmsrl(x86_pmu.lbr.to + idx, val); } static inline u64 rdlbr_from(unsigned int idx) { u64 val; - rdmsrl(x86_pmu.lbr_from + idx, val); + rdmsrl(x86_pmu.lbr.from + idx, val); return lbr_from_signext_quirk_rd(val); } @@ -327,7 +328,7 @@ static inline u64 rdlbr_to(unsigned int idx) { u64 val; - rdmsrl(x86_pmu.lbr_to + idx, val); + rdmsrl(x86_pmu.lbr.to + idx, val); return val; } @@ -344,7 +345,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) return; } - mask = x86_pmu.lbr_nr - 1; + mask = x86_pmu.lbr.nr - 1; tos = task_ctx->tos; for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; @@ -354,7 +355,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } - wrmsrl(x86_pmu.lbr_tos, tos); + wrmsrl(x86_pmu.lbr.tos, tos); task_ctx->lbr_stack_state = LBR_NONE; } @@ -369,7 +370,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) return; } - mask = x86_pmu.lbr_nr - 1; + mask = x86_pmu.lbr.nr - 1; tos = intel_pmu_lbr_tos(); for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; @@ -424,7 +425,7 @@ void intel_pmu_lbr_add(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx; - if (!x86_pmu.lbr_nr) + if (!x86_pmu.lbr.nr) return; cpuc->br_sel = event->hw.branch_reg.reg; @@ -463,7 +464,7 @@ void intel_pmu_lbr_del(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx; - if (!x86_pmu.lbr_nr) + if (!x86_pmu.lbr.nr) return; if (branch_user_callstack(cpuc->br_sel) && @@ -495,11 +496,11 @@ void intel_pmu_lbr_disable_all(void) static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) { - unsigned long mask = x86_pmu.lbr_nr - 1; + unsigned long mask = x86_pmu.lbr.nr - 1; u64 tos = intel_pmu_lbr_tos(); int i; - for (i = 0; i < x86_pmu.lbr_nr; i++) { + for (i = 0; i < x86_pmu.lbr.nr; i++) { unsigned long lbr_idx = (tos - i) & mask; union { struct { @@ -509,7 +510,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) u64 lbr; } msr_lastbranch; - rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); + rdmsrl(x86_pmu.lbr.from + lbr_idx, msr_lastbranch.lbr); cpuc->lbr_entries[i].from = msr_lastbranch.from; cpuc->lbr_entries[i].to = msr_lastbranch.to; @@ -532,12 +533,12 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) { bool need_info = false; - unsigned long mask = x86_pmu.lbr_nr - 1; + unsigned long mask = x86_pmu.lbr.nr - 1; int lbr_format = x86_pmu.intel_cap.lbr_format; u64 tos = intel_pmu_lbr_tos(); int i; int out = 0; - int num = x86_pmu.lbr_nr; + int num = x86_pmu.lbr.nr; if (cpuc->lbr_sel) { need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); @@ -595,7 +596,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) * If the abort just happened outside the window * the extra entry cannot be removed. */ - if (abort && x86_pmu.lbr_double_abort && out > 0) + if (abort && x86_pmu.lbr.double_abort && out > 0) out--; cpuc->lbr_entries[out].from = from; @@ -711,7 +712,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) if (!(br_type & (1ULL << i))) continue; - v = x86_pmu.lbr_sel_map[i]; + v = x86_pmu.lbr.sel_map[i]; if (v == LBR_NOT_SUPP) return -EOPNOTSUPP; @@ -729,7 +730,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) * But the 10th bit LBR_CALL_STACK does not operate * in suppress mode. */ - reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); + reg->config = mask ^ (x86_pmu.lbr.sel_mask & ~LBR_CALL_STACK); if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && @@ -746,7 +747,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) /* * no LBR on this PMU */ - if (!x86_pmu.lbr_nr) + if (!x86_pmu.lbr.nr) return -EOPNOTSUPP; /* @@ -759,7 +760,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) /* * setup HW LBR filter, if any */ - if (x86_pmu.lbr_sel_map) + if (x86_pmu.lbr.sel_map) ret = intel_pmu_setup_hw_lbr_filter(event); return ret; @@ -1091,12 +1092,12 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { }; /* core */ -void __init intel_pmu_lbr_init_core(void) +static void intel_pmu_lbr_init_core(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 4; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_CORE_FROM; - x86_pmu.lbr_to = MSR_LBR_CORE_TO; + lbr->nr = 4; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_CORE_FROM; + lbr->to = MSR_LBR_CORE_TO; /* * SW branch filter usage: @@ -1105,15 +1106,15 @@ void __init intel_pmu_lbr_init_core(void) } /* nehalem/westmere */ -void __init intel_pmu_lbr_init_nhm(void) +static void intel_pmu_lbr_init_nhm(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 16; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; + lbr->nr = 16; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_NHM_FROM; + lbr->to = MSR_LBR_NHM_TO; - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = nhm_lbr_sel_map; + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = nhm_lbr_sel_map; /* * SW branch filter usage: @@ -1125,15 +1126,15 @@ void __init intel_pmu_lbr_init_nhm(void) } /* sandy bridge */ -void __init intel_pmu_lbr_init_snb(void) +static void intel_pmu_lbr_init_snb(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 16; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; + lbr->nr = 16; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_NHM_FROM; + lbr->to = MSR_LBR_NHM_TO; - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = snb_lbr_sel_map; + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = snb_lbr_sel_map; /* * SW branch filter usage: @@ -1144,30 +1145,27 @@ void __init intel_pmu_lbr_init_snb(void) } /* haswell */ -void intel_pmu_lbr_init_hsw(void) +static void intel_pmu_lbr_init_hsw(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 16; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; + lbr->nr = 16; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_NHM_FROM; + lbr->to = MSR_LBR_NHM_TO; - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = hsw_lbr_sel_map; - - if (lbr_from_signext_quirk_needed()) - static_branch_enable(&lbr_from_quirk_key); + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = hsw_lbr_sel_map; } /* skylake */ -__init void intel_pmu_lbr_init_skl(void) +static void intel_pmu_lbr_init_skl(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 32; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; + lbr->nr = 32; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_NHM_FROM; + lbr->to = MSR_LBR_NHM_TO; - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = hsw_lbr_sel_map; /* * SW branch filter usage: @@ -1178,23 +1176,12 @@ __init void intel_pmu_lbr_init_skl(void) } /* atom */ -void __init intel_pmu_lbr_init_atom(void) +static void intel_pmu_lbr_init_atom(struct x86_pmu_lbr *lbr) { - /* - * only models starting at stepping 10 seems - * to have an operational LBR which can freeze - * on PMU interrupt - */ - if (boot_cpu_data.x86_model == 28 - && boot_cpu_data.x86_mask < 10) { - pr_cont("LBR disabled due to erratum"); - return; - } - - x86_pmu.lbr_nr = 8; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_CORE_FROM; - x86_pmu.lbr_to = MSR_LBR_CORE_TO; + lbr->nr = 8; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_CORE_FROM; + lbr->to = MSR_LBR_CORE_TO; /* * SW branch filter usage: @@ -1203,31 +1190,128 @@ void __init intel_pmu_lbr_init_atom(void) } /* slm */ -void __init intel_pmu_lbr_init_slm(void) +static void intel_pmu_lbr_init_slm(struct x86_pmu_lbr *lbr) { - x86_pmu.lbr_nr = 8; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_CORE_FROM; - x86_pmu.lbr_to = MSR_LBR_CORE_TO; + lbr->nr = 8; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_CORE_FROM; + lbr->to = MSR_LBR_CORE_TO; - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = nhm_lbr_sel_map; + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = nhm_lbr_sel_map; /* * SW branch filter usage: * - compensate for lack of HW filter */ - pr_cont("8-deep LBR, "); } /* Knights Landing */ -void intel_pmu_lbr_init_knl(void) +static void intel_pmu_lbr_init_knl(struct x86_pmu_lbr *lbr) +{ + lbr->nr = 8; + lbr->tos = MSR_LBR_TOS; + lbr->from = MSR_LBR_NHM_FROM; + lbr->to = MSR_LBR_NHM_TO; + + lbr->sel_mask = LBR_SEL_MASK; + lbr->sel_map = snb_lbr_sel_map; +} + +static void __intel_pmu_lbr_fill(struct x86_pmu_lbr *lbr, u8 family, u8 model) +{ + bool apply_tweaks = lbr == &x86_pmu.lbr; + + memset(lbr, 0, sizeof(struct x86_pmu_lbr)); + + if (family != 0x6) + return; + + switch (model) { + case INTEL_FAM6_CORE_YONAH: + break; + case INTEL_FAM6_CORE2_MEROM: + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: + intel_pmu_lbr_init_core(lbr); + break; + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: + intel_pmu_lbr_init_nhm(lbr); + break; + case INTEL_FAM6_ATOM_PINEVIEW: + case INTEL_FAM6_ATOM_LINCROFT: + case INTEL_FAM6_ATOM_PENWELL: + case INTEL_FAM6_ATOM_CLOVERVIEW: + case INTEL_FAM6_ATOM_CEDARVIEW: + /* + * only models starting at stepping 10 seems + * to have an operational LBR which can freeze + * on PMU interrupt + */ + if (apply_tweaks && + boot_cpu_data.x86_model == INTEL_FAM6_ATOM_PINEVIEW && + boot_cpu_data.x86_mask < 10) { + pr_cont("LBR disabled due to erratum"); + return; + } + intel_pmu_lbr_init_atom(lbr); + break; + case INTEL_FAM6_ATOM_SILVERMONT1: + case INTEL_FAM6_ATOM_SILVERMONT2: + case INTEL_FAM6_ATOM_AIRMONT: + intel_pmu_lbr_init_slm(lbr); + break; + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_ATOM_DENVERTON: + case INTEL_FAM6_ATOM_GEMINI_LAKE: + lbr->pt_coexist = true; + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + intel_pmu_lbr_init_skl(lbr); + break; + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: + intel_pmu_lbr_init_snb(lbr); + break; + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: + lbr->double_abort = true; + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_BROADWELL_X: + intel_pmu_lbr_init_hsw(lbr); + if (apply_tweaks && lbr_from_signext_quirk_needed()) + static_branch_enable(&lbr_from_quirk_key); + break; + case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: + intel_pmu_lbr_init_knl(lbr); + break; + } +} + +void __init intel_pmu_lbr_init(void) { - x86_pmu.lbr_nr = 8; - x86_pmu.lbr_tos = MSR_LBR_TOS; - x86_pmu.lbr_from = MSR_LBR_NHM_FROM; - x86_pmu.lbr_to = MSR_LBR_NHM_TO; + __intel_pmu_lbr_fill(&x86_pmu.lbr, boot_cpu_data.x86, + boot_cpu_data.x86_model); +} - x86_pmu.lbr_sel_mask = LBR_SEL_MASK; - x86_pmu.lbr_sel_map = snb_lbr_sel_map; +void intel_pmu_lbr_fill(struct x86_pmu_lbr *lbr, u8 family, u8 model) +{ + __intel_pmu_lbr_fill(lbr, family, model); } +EXPORT_SYMBOL_GPL(intel_pmu_lbr_fill); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index f7aaadf..0c436ac 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -627,12 +627,7 @@ struct x86_pmu { /* * Intel LBR */ - unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ - int lbr_nr; /* hardware stack size */ - u64 lbr_sel_mask; /* LBR_SELECT valid bits */ - const int *lbr_sel_map; /* lbr_select mappings */ - bool lbr_double_abort; /* duplicated lbr aborts */ - bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ + struct x86_pmu_lbr lbr; /* * Intel PT/LBR/BTS are exclusive @@ -711,8 +706,8 @@ extern struct x86_pmu x86_pmu __read_mostly; static inline bool x86_pmu_has_lbr_callstack(void) { - return x86_pmu.lbr_sel_map && - x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0; + return x86_pmu.lbr.sel_map && + x86_pmu.lbr.sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0; } DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); @@ -955,21 +950,9 @@ void intel_pmu_lbr_disable_all(void); void intel_pmu_lbr_read(void); -void intel_pmu_lbr_init_core(void); +void intel_pmu_lbr_init(void); -void intel_pmu_lbr_init_nhm(void); - -void intel_pmu_lbr_init_atom(void); - -void intel_pmu_lbr_init_slm(void); - -void intel_pmu_lbr_init_snb(void); - -void intel_pmu_lbr_init_hsw(void); - -void intel_pmu_lbr_init_skl(void); - -void intel_pmu_lbr_init_knl(void); +void intel_pmu_lbr_fill(struct x86_pmu_lbr *lbr, u8 family, u8 model); void intel_pmu_pebs_data_source_nhm(void); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 12f5408..6fc722f 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -125,6 +125,15 @@ struct x86_pmu_capability { int events_mask_len; }; +struct x86_pmu_lbr { + unsigned long tos, from, to; /* MSR base regs */ + int nr; /* hardware stack size */ + u64 sel_mask; /* LBR_SELECT valid bits */ + const int *sel_map; /* lbr_select mappings */ + bool double_abort; /* duplicated lbr aborts */ + bool pt_coexist; /* (LBR|BTS) may coexist with PT */ +}; + /* * Fixed-purpose performance events: */ @@ -300,4 +309,6 @@ static inline void perf_check_microcode(void) { } #define arch_perf_out_copy_user copy_from_user_nmi +extern void intel_pmu_lbr_fill(struct x86_pmu_lbr *lbr, u8 family, u8 model); + #endif /* _ASM_X86_PERF_EVENT_H */ -- 2.1.4