Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751952AbcDOPIs (ORCPT ); Fri, 15 Apr 2016 11:08:48 -0400 Received: from mga09.intel.com ([134.134.136.24]:57377 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751879AbcDOPIq (ORCPT ); Fri, 15 Apr 2016 11:08:46 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.24,487,1455004800"; d="scan'208";a="955755006" From: kan.liang@intel.com To: peterz@infradead.org, tglx@linutronix.de, linux-kernel@vger.kernel.org Cc: ak@linux.intel.com, Kan Liang Subject: [PATCH 1/1] x86, perf: Add Goldmont support Date: Fri, 15 Apr 2016 00:42:47 -0700 Message-Id: <1460706167-45320-1-git-send-email-kan.liang@intel.com> X-Mailer: git-send-email 2.5.0 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8165 Lines: 262 From: Kan Liang Add perf core PMU support for Intel Goldmont CPU cores. - The init code is based on Silvermont. - There is a new cache event list, based on the Silvermont cache event list. - Goldmont has 32 LBR entries. It also uses new LBRv6 format, which report the cycle information using upper 16-bit of the LBR_TO. - It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS for precise cycles. For details, please refer to the latest SDM058. http://www.intel.com/content/dam/www/public/us/en/documents/manuals/ 64-ia-32-architectures-software-developer-vol-3b-part-2-manual.pdf? cache=true Signed-off-by: Kan Liang --- arch/x86/events/intel/core.c | 157 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/events/intel/ds.c | 6 ++ arch/x86/events/intel/lbr.c | 13 +++- arch/x86/events/perf_event.h | 2 + 4 files changed, 177 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 68fa55b..931c13a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1465,6 +1465,140 @@ static __initconst const u64 slm_hw_cache_event_ids }, }; +static struct extra_reg intel_glm_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1), + EVENT_EXTRA_END +}; + +#define GLM_DEMAND_DATA_RD BIT_ULL(0) +#define GLM_DEMAND_RFO BIT_ULL(1) +#define GLM_ANY_RESPONSE BIT_ULL(16) +#define GLM_SNP_NONE_OR_MISS BIT_ULL(33) +#define GLM_DEMAND_READ GLM_DEMAND_DATA_RD +#define GLM_DEMAND_WRITE GLM_DEMAND_RFO +#define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) +#define GLM_LLC_ACCESS GLM_ANY_RESPONSE +#define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM) +#define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM) + +static __initconst const u64 glm_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */ + [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ + [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */ + [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + +static __initconst const u64 glm_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_READ| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_READ| + GLM_LLC_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_WRITE| + GLM_LLC_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH| + GLM_LLC_ACCESS, + [C(RESULT_MISS)] = GLM_DEMAND_PREFETCH| + GLM_LLC_MISS, + }, + }, +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -3456,6 +3590,29 @@ __init int intel_pmu_init(void) pr_cont("Silvermont events, "); break; + case 92: /* 14nm Atom "Goldmont" */ + case 95: /* 14nm Atom "Goldmont Denverton" */ + memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_skl(); + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints; + x86_pmu.extra_regs = intel_glm_extra_regs; + /* + * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS + * for precise cycles. + * :pp is identical to :ppp + */ + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + pr_cont("Goldmont events, "); + break; + case 37: /* 32nm Westmere */ case 44: /* 32nm Westmere-EP */ case 47: /* 32nm Westmere-EX */ diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8584b90..7ce9f3f 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -645,6 +645,12 @@ struct event_constraint intel_slm_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_glm_pebs_event_constraints[] = { + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), + EVENT_CONSTRAINT_END +}; + struct event_constraint intel_nehalem_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 6c3b7c1..ad26ca7 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -14,7 +14,8 @@ enum { LBR_FORMAT_EIP_FLAGS = 0x03, LBR_FORMAT_EIP_FLAGS2 = 0x04, LBR_FORMAT_INFO = 0x05, - LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO, + LBR_FORMAT_TIME = 0x06, + LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME, }; static enum { @@ -464,6 +465,16 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) abort = !!(info & LBR_INFO_ABORT); cycles = (info & LBR_INFO_CYCLES); } + + if (lbr_format == LBR_FORMAT_TIME) { + mis = !!(from & LBR_FROM_FLAG_MISPRED); + pred = !mis; + skip = 1; + cycles = ((to >> 48) & LBR_INFO_CYCLES); + + to = (u64)((((s64)to) << 16) >> 16); + } + if (lbr_flags & LBR_EIP_FLAGS) { mis = !!(from & LBR_FROM_FLAG_MISPRED); pred = !mis; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ad4dc7f..8b78481 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -859,6 +859,8 @@ extern struct event_constraint intel_atom_pebs_event_constraints[]; extern struct event_constraint intel_slm_pebs_event_constraints[]; +extern struct event_constraint intel_glm_pebs_event_constraints[]; + extern struct event_constraint intel_nehalem_pebs_event_constraints[]; extern struct event_constraint intel_westmere_pebs_event_constraints[]; -- 2.5.0