Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756696AbbHFVzv (ORCPT ); Thu, 6 Aug 2015 17:55:51 -0400 Received: from mga14.intel.com ([192.55.52.115]:39097 "EHLO mga14.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755445AbbHFVzU (ORCPT ); Thu, 6 Aug 2015 17:55:20 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.15,625,1432623600"; d="scan'208";a="743717982" From: Vikas Shivappa To: vikas.shivappa@intel.com Cc: linux-kernel@vger.kernel.org, x86@kernel.org, hpa@zytor.com, tglx@linutronix.de, mingo@kernel.org, tj@kernel.org, peterz@infradead.org, matt.fleming@intel.com, will.auld@intel.com, glenn.p.williamson@intel.com, kanaka.d.juvva@intel.com, vikas.shivappa@linux.intel.com Subject: [PATCH 6/9] x86/intel_rdt: Add support for cache bit mask management Date: Thu, 6 Aug 2015 14:55:14 -0700 Message-Id: <1438898117-3692-7-git-send-email-vikas.shivappa@linux.intel.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1438898117-3692-1-git-send-email-vikas.shivappa@linux.intel.com> References: <1438898117-3692-1-git-send-email-vikas.shivappa@linux.intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8212 Lines: 298 Adds a file l3_cbm to the intel_rdt cgroup which represents the cache capacity bit mask for the cgroup. The tasks in the cgroup would get to fill the L3 cache represented by the cgroup's l3_cbm file. The bit mask may map to ways in the cache but could be hardware implementation specific. The l3_cbm would represent one of IA32_L3_MASK_n MSRs, there by any updates to the l3_cbm end up in an MSR write to the appropriate IA32_L3_MASK_n. The IA32_L3_MASK_n MSRs are per package but the l3_cbm represents the global value of the MSR on all packages. When a child cgroup is created it inherits the CLOSid and the l3_cbm from its parent. When a user changes the default l3_cbm for a cgroup, a new CLOSid may be allocated if the l3_cbm was not used before. If the new l3_cbm is the one that is already used, the count for that CLOSid <-> l3_cbm is incremented. The changing of 'l3_cbm' may fail with -ENOSPC once the kernel runs out of maximum CLOSids it can support. User can create as many cgroups as he wants, but having different l3_cbm at the same time is restricted by the maximum number of CLOSids. Kernel maintains a CLOSid <-> l3_cbm mapping which keeps count of cgroups using a CLOSid. Reuse of CLOSids for cgroups with same bitmask also has following advantages: - This helps to use the scant CLOSids optimally. - This also implies that during context switch, write to PQR-MSR is done only when a task with a different bitmask is scheduled in. Signed-off-by: Vikas Shivappa --- arch/x86/include/asm/intel_rdt.h | 3 + arch/x86/kernel/cpu/intel_rdt.c | 202 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 204 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h index a887004..58bac91 100644 --- a/arch/x86/include/asm/intel_rdt.h +++ b/arch/x86/include/asm/intel_rdt.h @@ -4,6 +4,9 @@ #ifdef CONFIG_CGROUP_RDT #include +#define MAX_CBM_LENGTH 32 +#define IA32_L3_CBM_BASE 0xc90 +#define CBM_FROM_INDEX(x) (IA32_L3_CBM_BASE + x) struct rdt_subsys_info { unsigned long *closmap; diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 52e1fd6..115f136 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -34,6 +34,13 @@ static struct clos_cbm_table *cctable; static struct rdt_subsys_info rdtss_info; static DEFINE_MUTEX(rdt_group_mutex); struct intel_rdt rdt_root_group; +/* + * Mask of CPUs for writing CBM values. We only need one CPU per-socket. + */ +static cpumask_t rdt_cpumask; + +#define rdt_for_each_child(pos_css, parent_ir) \ + css_for_each_child((pos_css), &(parent_ir)->css) static inline void closid_get(u32 closid) { @@ -117,12 +124,192 @@ static void intel_rdt_css_free(struct cgroup_subsys_state *css) mutex_unlock(&rdt_group_mutex); } +static int intel_cache_alloc_cbm_read(struct seq_file *m, void *v) +{ + struct intel_rdt *ir = css_rdt(seq_css(m)); + + seq_printf(m, "%08lx\n", cctable[ir->closid].l3_cbm); + + return 0; +} + +static inline bool cbm_is_contiguous(unsigned long var) +{ + unsigned long maxcbm = MAX_CBM_LENGTH; + unsigned long first_bit, zero_bit; + + if (!var) + return false; + + first_bit = find_first_bit(&var, maxcbm); + zero_bit = find_next_zero_bit(&var, maxcbm, first_bit); + + if (find_next_bit(&var, maxcbm, zero_bit) < maxcbm) + return false; + + return true; +} + +static int cbm_validate(struct intel_rdt *ir, unsigned long cbmvalue) +{ + struct cgroup_subsys_state *css; + struct intel_rdt *par, *c; + unsigned long *cbm_tmp; + int err = 0; + + if (!cbm_is_contiguous(cbmvalue)) { + err = -EINVAL; + goto out_err; + } + + par = parent_rdt(ir); + cbm_tmp = &cctable[par->closid].l3_cbm; + if (!bitmap_subset(&cbmvalue, cbm_tmp, MAX_CBM_LENGTH)) { + err = -EINVAL; + goto out_err; + } + + rcu_read_lock(); + rdt_for_each_child(css, ir) { + c = css_rdt(css); + cbm_tmp = &cctable[c->closid].l3_cbm; + if (!bitmap_subset(cbm_tmp, &cbmvalue, MAX_CBM_LENGTH)) { + rcu_read_unlock(); + err = -EINVAL; + goto out_err; + } + } + rcu_read_unlock(); +out_err: + + return err; +} + +static bool cbm_search(unsigned long cbm, u32 *closid) +{ + u32 maxid = boot_cpu_data.x86_cache_max_closid; + u32 i; + + for (i = 0; i < maxid; i++) { + if (bitmap_equal(&cbm, &cctable[i].l3_cbm, MAX_CBM_LENGTH)) { + *closid = i; + return true; + } + } + + return false; +} + +static void closcbm_map_dump(void) +{ + u32 i; + + pr_debug("CBMMAP\n"); + for (i = 0; i < boot_cpu_data.x86_cache_max_closid; i++) { + pr_debug("l3_cbm: 0x%x,clos_refcnt: %u\n", + (unsigned int)cctable[i].l3_cbm, cctable[i].clos_refcnt); + } +} + +static void cbm_cpu_update(void *info) +{ + u32 closid = (u32) info; + + wrmsrl(CBM_FROM_INDEX(closid), cctable[closid].l3_cbm); +} + +/* + * cbm_update_all() - Update the cache bit mask for all packages. + */ +static inline void cbm_update_all(u32 closid) +{ + on_each_cpu_mask(&rdt_cpumask, cbm_cpu_update, (void *)closid, 1); +} + +/* + * intel_cache_alloc_cbm_write() - Validates and writes the + * cache bit mask(cbm) to the IA32_L3_MASK_n + * and also store the same in the cctable. + * + * CLOSids are reused for cgroups which have same bitmask. + * This helps to use the scant CLOSids optimally. This also + * implies that at context switch write to PQR-MSR is done + * only when a task with a different bitmask is scheduled in. + */ +static int intel_cache_alloc_cbm_write(struct cgroup_subsys_state *css, + struct cftype *cft, u64 cbmvalue) +{ + u32 max_cbm = boot_cpu_data.x86_cache_max_cbm_len; + struct intel_rdt *ir = css_rdt(css); + u64 max_mask; + int err = 0; + u32 closid; + + if (ir == &rdt_root_group) + return -EPERM; + + /* + * Need global mutex as cbm write may allocate a closid. + */ + mutex_lock(&rdt_group_mutex); + + max_mask = (1ULL << max_cbm) - 1; + if (cbmvalue & ~max_mask) { + err = -EINVAL; + goto out; + } + + if (cbmvalue == cctable[ir->closid].l3_cbm) + goto out; + + err = cbm_validate(ir, cbmvalue); + if (err) + goto out; + + /* + * Try to get a reference for a different CLOSid and release the + * reference to the current CLOSid. + * Need to put down the reference here and get it back in case we + * run out of closids. Otherwise we run into a problem when + * we could be using the last closid that could have been available. + */ + closid_put(ir->closid); + if (cbm_search(cbmvalue, &closid)) { + ir->closid = closid; + closid_get(closid); + } else { + closid = ir->closid; + err = closid_alloc(ir); + if (err) { + closid_get(ir->closid); + goto out; + } + + cctable[ir->closid].l3_cbm = cbmvalue; + cbm_update_all(ir->closid); + } + closcbm_map_dump(); +out: + mutex_unlock(&rdt_group_mutex); + + return err; +} + +static inline void rdt_cpumask_update(int cpu) +{ + static cpumask_t tmp; + + cpumask_and(&tmp, &rdt_cpumask, topology_core_cpumask(cpu)); + if (cpumask_empty(&tmp)) + cpumask_set_cpu(cpu, &rdt_cpumask); +} + static int __init intel_rdt_late_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; static struct clos_cbm_table *cct; u32 maxid, max_cbm_len; - int err = 0, size; + int err = 0, size, i; if (!cpu_has(c, X86_FEATURE_CAT_L3)) { rdt_root_group.css.ss->disabled = 1; @@ -152,6 +339,9 @@ static int __init intel_rdt_late_init(void) cct->l3_cbm = (1ULL << max_cbm_len) - 1; cct->clos_refcnt = 1; + for_each_online_cpu(i) + rdt_cpumask_update(i); + pr_info("Intel cache allocation enabled\n"); out_err: @@ -160,8 +350,18 @@ out_err: late_initcall(intel_rdt_late_init); +static struct cftype rdt_files[] = { + { + .name = "l3_cbm", + .seq_show = intel_cache_alloc_cbm_read, + .write_u64 = intel_cache_alloc_cbm_write, + }, + { } /* terminate */ +}; + struct cgroup_subsys intel_rdt_cgrp_subsys = { .css_alloc = intel_rdt_css_alloc, .css_free = intel_rdt_css_free, + .legacy_cftypes = rdt_files, .early_init = 0, }; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/