Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753925AbcCAXsE (ORCPT ); Tue, 1 Mar 2016 18:48:04 -0500 Received: from mga09.intel.com ([134.134.136.24]:38975 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753865AbcCAXsA (ORCPT ); Tue, 1 Mar 2016 18:48:00 -0500 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.22,524,1449561600"; d="scan'208";a="662132597" From: Vikas Shivappa To: vikas.shivappa@intel.com, vikas.shivappa@linux.intel.com Cc: linux-kernel@vger.kernel.org, x86@kernel.org, hpa@zytor.com, tglx@linutronix.de, mingo@kernel.org, peterz@infradead.org, ravi.v.shankar@intel.com, tony.luck@intel.com, fenghua.yu@intel.com, h.peter.anvin@intel.com Subject: [PATCH 3/6] x86/mbm: Intel Memory B/W Monitoring enumeration and init Date: Tue, 1 Mar 2016 15:48:25 -0800 Message-Id: <1456876108-28770-4-git-send-email-vikas.shivappa@linux.intel.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1456876108-28770-1-git-send-email-vikas.shivappa@linux.intel.com> References: <1456876108-28770-1-git-send-email-vikas.shivappa@linux.intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9857 Lines: 284 The MBM init patch enumerates the Intel (Memory b/w monitoring)MBM and initializes the perf events and datastructures for monitoring the memory b/w. Its based on original patch series by Tony Luck and Kanaka Juvva. Memory bandwidth monitoring(MBM) provides OS/VMM a way to monitor bandwidth from one level of cache to another. The current patches support L3 external bandwidth monitoring. It supports both 'local bandwidth' and 'total bandwidth' monitoring for the socket. Local bandwidth measures the amount of data sent through the memory controller on the socket and total b/w measures the total system bandwidth. Extending the cache quality of service monitoring(CQM) we add four more events to the perf infrastructure: intel_cqm_llc/local_bytes - bytes sent through local socket memory controller intel_cqm_llc/total_bytes - total L3 external bytes sent intel_cqm_llc/local_bw - Current local b/w intel_cqm_llc/total_bw - current total b/w The tasks are associated with a Resouce Monitoring ID(RMID) just like in cqm and OS uses a MSR write to indicate the RMID of the task during scheduling. Reviewed-by: Tony Luck Signed-off-by: Vikas Shivappa --- arch/x86/include/asm/cpufeature.h | 2 + arch/x86/kernel/cpu/common.c | 4 +- arch/x86/kernel/cpu/perf_event_intel_cqm.c | 157 ++++++++++++++++++++++++++++- 3 files changed, 158 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 7ad8c94..9b4233e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -241,6 +241,8 @@ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ +#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fa05680..13af76e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -635,7 +635,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); c->x86_capability[CPUID_F_1_EDX] = edx; - if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) { + if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) || + ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) || + (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) { c->x86_cache_max_rmid = ecx; c->x86_cache_occ_scale = ebx; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 5666171..cf08a0f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -15,6 +15,7 @@ static u32 cqm_max_rmid = -1; static unsigned int cqm_l3_scale; /* supposedly cacheline size */ +static bool cqm_enabled, mbm_enabled; /** * struct intel_pqr_state - State cache for the PQR MSR @@ -42,6 +43,30 @@ struct intel_pqr_state { * interrupts disabled, which is sufficient for the protection. */ static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); +/** + * struct sample - mbm event's (local or total) data + * @interval_start Time this interval began + * @interval_bytes #bytes in this interval + * @total_bytes #bytes since we began monitoring + * @prev_msr previous value of MSR + * @bandwidth bytes/sec in previous completed interval + */ +struct sample { + ktime_t interval_start; + u64 interval_bytes; + u64 total_bytes; + u64 prev_msr; + u64 bandwidth; +}; + +/* + * samples profiled for total memory bandwidth type events + */ +static struct sample *mbm_total; +/* + * samples profiled for local memory bandwidth type events + */ +static struct sample *mbm_local; /* * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. @@ -1152,6 +1177,28 @@ EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes"); EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL); EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1"); +EVENT_ATTR_STR(total_bytes, intel_cqm_total_bytes, "event=0x02"); +EVENT_ATTR_STR(total_bytes.per-pkg, intel_cqm_total_bytes_pkg, "1"); +EVENT_ATTR_STR(total_bytes.unit, intel_cqm_total_bytes_unit, "MB"); +EVENT_ATTR_STR(total_bytes.scale, intel_cqm_total_bytes_scale, "1e-6"); + +EVENT_ATTR_STR(local_bytes, intel_cqm_local_bytes, "event=0x03"); +EVENT_ATTR_STR(local_bytes.per-pkg, intel_cqm_local_bytes_pkg, "1"); +EVENT_ATTR_STR(local_bytes.unit, intel_cqm_local_bytes_unit, "MB"); +EVENT_ATTR_STR(local_bytes.scale, intel_cqm_local_bytes_scale, "1e-6"); + +EVENT_ATTR_STR(total_bw, intel_cqm_total_bw, "event=0x04"); +EVENT_ATTR_STR(total_bw.per-pkg, intel_cqm_total_bw_pkg, "1"); +EVENT_ATTR_STR(total_bw.unit, intel_cqm_total_bw_unit, "MB/sec"); +EVENT_ATTR_STR(total_bw.scale, intel_cqm_total_bw_scale, "1e-6"); +EVENT_ATTR_STR(total_bw.snapshot, intel_cqm_total_bw_snapshot, "1"); + +EVENT_ATTR_STR(local_bw, intel_cqm_local_bw, "event=0x05"); +EVENT_ATTR_STR(local_bw.per-pkg, intel_cqm_local_bw_pkg, "1"); +EVENT_ATTR_STR(local_bw.unit, intel_cqm_local_bw_unit, "MB/sec"); +EVENT_ATTR_STR(local_bw.scale, intel_cqm_local_bw_scale, "1e-6"); +EVENT_ATTR_STR(local_bw.snapshot, intel_cqm_local_bw_snapshot, "1"); + static struct attribute *intel_cqm_events_attr[] = { EVENT_PTR(intel_cqm_llc), EVENT_PTR(intel_cqm_llc_pkg), @@ -1161,9 +1208,58 @@ static struct attribute *intel_cqm_events_attr[] = { NULL, }; +static struct attribute *intel_mbm_events_attr[] = { + EVENT_PTR(intel_cqm_total_bytes), + EVENT_PTR(intel_cqm_local_bytes), + EVENT_PTR(intel_cqm_total_bw), + EVENT_PTR(intel_cqm_local_bw), + EVENT_PTR(intel_cqm_total_bytes_pkg), + EVENT_PTR(intel_cqm_local_bytes_pkg), + EVENT_PTR(intel_cqm_total_bw_pkg), + EVENT_PTR(intel_cqm_local_bw_pkg), + EVENT_PTR(intel_cqm_total_bytes_unit), + EVENT_PTR(intel_cqm_local_bytes_unit), + EVENT_PTR(intel_cqm_total_bw_unit), + EVENT_PTR(intel_cqm_local_bw_unit), + EVENT_PTR(intel_cqm_total_bytes_scale), + EVENT_PTR(intel_cqm_local_bytes_scale), + EVENT_PTR(intel_cqm_total_bw_scale), + EVENT_PTR(intel_cqm_local_bw_scale), + EVENT_PTR(intel_cqm_total_bw_snapshot), + EVENT_PTR(intel_cqm_local_bw_snapshot), + NULL, +}; + +static struct attribute *intel_cmt_mbm_events_attr[] = { + EVENT_PTR(intel_cqm_llc), + EVENT_PTR(intel_cqm_total_bytes), + EVENT_PTR(intel_cqm_local_bytes), + EVENT_PTR(intel_cqm_total_bw), + EVENT_PTR(intel_cqm_local_bw), + EVENT_PTR(intel_cqm_llc_pkg), + EVENT_PTR(intel_cqm_total_bytes_pkg), + EVENT_PTR(intel_cqm_local_bytes_pkg), + EVENT_PTR(intel_cqm_total_bw_pkg), + EVENT_PTR(intel_cqm_local_bw_pkg), + EVENT_PTR(intel_cqm_llc_unit), + EVENT_PTR(intel_cqm_total_bytes_unit), + EVENT_PTR(intel_cqm_local_bytes_unit), + EVENT_PTR(intel_cqm_total_bw_unit), + EVENT_PTR(intel_cqm_local_bw_unit), + EVENT_PTR(intel_cqm_llc_scale), + EVENT_PTR(intel_cqm_total_bytes_scale), + EVENT_PTR(intel_cqm_local_bytes_scale), + EVENT_PTR(intel_cqm_total_bw_scale), + EVENT_PTR(intel_cqm_local_bw_scale), + EVENT_PTR(intel_cqm_llc_snapshot), + EVENT_PTR(intel_cqm_total_bw_snapshot), + EVENT_PTR(intel_cqm_local_bw_snapshot), + NULL, +}; + static struct attribute_group intel_cqm_events_group = { .name = "events", - .attrs = intel_cqm_events_attr, + .attrs = NULL, }; PMU_FORMAT_ATTR(event, "config:0-7"); @@ -1320,12 +1416,47 @@ static const struct x86_cpu_id intel_cqm_match[] = { {} }; +static const struct x86_cpu_id intel_mbm_local_match[] = { + { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_LOCAL }, + {} +}; + +static const struct x86_cpu_id intel_mbm_total_match[] = { + { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_TOTAL }, + {} +}; + +static int intel_mbm_init(void) +{ + int ret = 0, array_size, maxid = cqm_max_rmid + 1; + + array_size = sizeof(struct sample) * maxid * topology_max_packages(); + mbm_local = kmalloc(array_size, GFP_KERNEL); + if (!mbm_local) + return -ENOMEM; + + mbm_total = kmalloc(array_size, GFP_KERNEL); + if (!mbm_total) { + kfree(mbm_local); + ret = -ENOMEM; + } + + return ret; +} + static int __init intel_cqm_init(void) { char *str = NULL, scale[20]; int i, cpu, ret; - if (!x86_match_cpu(intel_cqm_match)) + if (x86_match_cpu(intel_cqm_match)) + cqm_enabled = true; + + if (x86_match_cpu(intel_mbm_local_match) && + x86_match_cpu(intel_mbm_total_match)) + mbm_enabled = true; + + if (!cqm_enabled && !mbm_enabled) return -ENODEV; cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale; @@ -1382,12 +1513,27 @@ static int __init intel_cqm_init(void) cqm_pick_event_reader(i); } + if (mbm_enabled) + ret = intel_mbm_init(); + if (ret && !cqm_enabled) + goto out; + + if (cqm_enabled && mbm_enabled) + intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr; + else if (!cqm_enabled && mbm_enabled) + intel_cqm_events_group.attrs = intel_mbm_events_attr; + else if (cqm_enabled && !mbm_enabled) + intel_cqm_events_group.attrs = intel_cqm_events_attr; + ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1); if (ret) { pr_err("Intel CQM perf registration failed: %d\n", ret); goto out; } else { - pr_info("Intel CQM monitoring enabled\n"); + if (cqm_enabled) + pr_info("Intel CQM monitoring enabled\n"); + if (mbm_enabled) + pr_info("Intel MBM enabled\n"); } /* @@ -1397,8 +1543,11 @@ static int __init intel_cqm_init(void) __perf_cpu_notifier(intel_cqm_cpu_notifier); out: cpu_notifier_register_done(); - if (ret) + if (ret) { + mbm_enabled = false; + cqm_enabled = false; kfree(str); + } return ret; } -- 1.9.1