Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1762318AbcLPXOY (ORCPT ); Fri, 16 Dec 2016 18:14:24 -0500 Received: from mga02.intel.com ([134.134.136.20]:36940 "EHLO mga02.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757808AbcLPXNg (ORCPT ); Fri, 16 Dec 2016 18:13:36 -0500 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.33,360,1477983600"; d="scan'208";a="40673357" From: Vikas Shivappa To: vikas.shivappa@intel.com, vikas.shivappa@linux.intel.com Cc: linux-kernel@vger.kernel.org, x86@kernel.org, tglx@linutronix.de, peterz@infradead.org, ravi.v.shankar@intel.com, tony.luck@intel.com, fenghua.yu@intel.com, andi.kleen@intel.com, davidcc@google.com, eranian@google.com, hpa@zytor.com Subject: [PATCH 11/14] x86/cqm: Add failure on open and read Date: Fri, 16 Dec 2016 15:13:05 -0800 Message-Id: <1481929988-31569-12-git-send-email-vikas.shivappa@linux.intel.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1481929988-31569-1-git-send-email-vikas.shivappa@linux.intel.com> References: <1481929988-31569-1-git-send-email-vikas.shivappa@linux.intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9839 Lines: 355 To provide reliable output to the user, cqm throws error when it does not have enough RMIDs to monitor depending upon the mode user choses. This also takes care to not overuse RMIDs. Default is LAZY mode. NOLAZY mode: This patch adds a file mon_mask in the perf_cgroup which indicates the packages which the user wants guaranteed monitoring. For such cgroup events RMIDs are assigned at event create and we fail if enough RMIDs are not present. This is basically a NOLAZY allocation of RMIDs. This mode can be used in real time scenarios where user is sure that tasks that are monitored are scheduled. LAZY mode: If user did not enable the NOLAZY mode, RMIDs are allocated only when tasks are actually scheduled. Upon failure to obtain RMIDs it indicates a failure in read. Typical use case for this mode could be to start monitoring cgroups which still donot have any tasks in them and such cgroups are part of large number of cgroups which are monitored - that way we donot overuse RMIDs. Patch is based on David Carrillo-Cisneros patches in cqm2 series. Signed-off-by: Vikas Shivappa --- arch/x86/events/intel/cqm.c | 145 +++++++++++++++++++++++++++++--- arch/x86/events/intel/cqm.h | 1 + arch/x86/include/asm/intel_rdt_common.h | 7 +- 3 files changed, 141 insertions(+), 12 deletions(-) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 85162aa..e0d4017 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -78,6 +78,11 @@ struct sample { */ static cpumask_t cqm_cpumask; +/* + * Mask of online sockets. + */ +static cpumask_t cqm_pkgmask; + struct pkg_data **cqm_pkgs_data; struct cgrp_cqm_info cqm_rootcginfo; @@ -110,6 +115,14 @@ bool __rmid_valid(u32 rmid) return true; } +static inline bool __rmid_valid_raw(u32 rmid) +{ + if (rmid > cqm_max_rmid) + return false; + + return true; +} + static u64 __rmid_read(u32 rmid) { u64 val; @@ -159,16 +172,19 @@ u32 __get_rmid(int domain) { struct list_head *cqm_flist; struct cqm_rmid_entry *entry; + struct pkg_data *pdata; lockdep_assert_held(&cache_lock); - cqm_flist = &cqm_pkgs_data[domain]->cqm_rmid_free_lru; + pdata = cqm_pkgs_data[domain]; + cqm_flist = &pdata->cqm_rmid_free_lru; if (list_empty(cqm_flist)) return INVALID_RMID; entry = list_first_entry(cqm_flist, struct cqm_rmid_entry, list); list_del(&entry->list); + pdata->rmid_used_count++; return entry->rmid; } @@ -344,6 +360,7 @@ static void __intel_cqm_rmid_reuse(void) */ list_del(&entry->list); list_add_tail(&entry->list, flist); + pdata->rmid_used_count--; } end: @@ -607,6 +624,33 @@ static int cqm_assign_rmid(struct perf_event *event, u32 *rmid) return 0; } +static inline int check_min_rmids(struct cgrp_cqm_info *cqm_info) +{ + int pkg = cpumask_first_and(&cqm_info->mon_mask, &cqm_pkgmask); + + for (; pkg < nr_cpu_ids; + pkg = cpumask_next_and(pkg, &cqm_info->mon_mask, &cqm_pkgmask)) { + if (cqm_pkgs_data[pkg]->rmid_used_count >= cqm_max_rmid) + return -EINVAL; + } + + return 0; +} + +static inline void alloc_min_rmids(struct cgrp_cqm_info *cqm_info) +{ + int pkg = cpumask_first_and(&cqm_info->mon_mask, &cqm_pkgmask); + u32 rmid; + + for ( ; pkg < nr_cpu_ids; + pkg = cpumask_next_and(pkg, &cqm_info->mon_mask, &cqm_pkgmask)) { + + rmid = __get_rmid(pkg); + if (__rmid_valid(rmid)) + cqm_info->rmid[pkg] = rmid; + } +} + /* * Find a group and setup RMID. * @@ -642,6 +686,14 @@ static int intel_cqm_setup_event(struct perf_event *event, event->hw.cqm_rmid = cqm_info->rmid; return 0; } + + /* + * For cgroups which must have RMIDs check if enough + * RMIDs are available. + */ + if (cpumask_weight(&cqm_info->mon_mask) && + check_min_rmids(cqm_info)) + return -EINVAL; } #endif @@ -656,6 +708,11 @@ static int intel_cqm_setup_event(struct perf_event *event, cqm_assign_rmid(event, event->hw.cqm_rmid); +#ifdef CONFIG_CGROUP_PERF + if (event->cgrp && cpumask_weight(&cqm_info->mon_mask)) + alloc_min_rmids(cqm_info); +#endif + return 0; } @@ -896,16 +953,16 @@ static u64 intel_cqm_event_count(struct perf_event *event) return __perf_event_count(event); } -void alloc_needed_pkg_rmid(u32 *cqm_rmid) +u32 alloc_needed_pkg_rmid(u32 *cqm_rmid) { unsigned long flags; u32 rmid; if (WARN_ON(!cqm_rmid)) - return; + return -EINVAL; if (cqm_rmid == cqm_rootcginfo.rmid || cqm_rmid[pkg_id]) - return; + return 0; raw_spin_lock_irqsave(&cache_lock, flags); @@ -914,6 +971,8 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid) cqm_rmid[pkg_id] = rmid; raw_spin_unlock_irqrestore(&cache_lock, flags); + + return rmid; } static void intel_cqm_event_start(struct perf_event *event, int mode) @@ -925,10 +984,8 @@ static void intel_cqm_event_start(struct perf_event *event, int mode) event->hw.cqm_state &= ~PERF_HES_STOPPED; - if (is_task_event(event)) { - alloc_needed_pkg_rmid(event->hw.cqm_rmid); + if (is_task_event(event)) state->next_task_rmid = event->hw.cqm_rmid[pkg_id]; - } } static void intel_cqm_event_stop(struct perf_event *event, int mode) @@ -944,11 +1001,19 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode) static int intel_cqm_event_add(struct perf_event *event, int mode) { + u32 rmid; + event->hw.cqm_state = PERF_HES_STOPPED; - if ((mode & PERF_EF_START)) + /* + * If Lazy RMID alloc fails indicate the error to the user. + */ + if ((mode & PERF_EF_START)) { + rmid = alloc_needed_pkg_rmid(event->hw.cqm_rmid); + if (!__rmid_valid_raw(rmid)) + return -EINVAL; intel_cqm_event_start(event, mode); - + } return 0; } @@ -1426,12 +1491,67 @@ static int cqm_cont_monitoring_write_u64(struct cgroup_subsys_state *css, return ret; } +static int cqm_mon_mask_seq_show(struct seq_file *sf, void *v) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&cache_lock, flags); + seq_printf(sf, "%*pbl\n", + cpumask_pr_args(&css_to_cqm_info(seq_css(sf))->mon_mask)); + raw_spin_unlock_irqrestore(&cache_lock, flags); + + return 0; +} + +static ssize_t cqm_mon_mask_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + cpumask_var_t tmp_cpus, tmp_cpus1; + struct cgrp_cqm_info *cqm_info; + unsigned long flags; + int ret = 0; + + buf = strstrip(buf); + + if (!zalloc_cpumask_var(&tmp_cpus, GFP_KERNEL) || + !zalloc_cpumask_var(&tmp_cpus1, GFP_KERNEL)) { + ret = -ENOMEM; + goto out; + } + + ret = cpulist_parse(buf, tmp_cpus); + if (ret) + goto out; + + if (cpumask_andnot(tmp_cpus1, tmp_cpus, &cqm_pkgmask)) { + ret = -EINVAL; + goto out; + } + + raw_spin_lock_irqsave(&cache_lock, flags); + cqm_info = css_to_cqm_info(of_css(of)); + cpumask_copy(&cqm_info->mon_mask, tmp_cpus); + raw_spin_unlock_irqrestore(&cache_lock, flags); + +out: + free_cpumask_var(tmp_cpus); + free_cpumask_var(tmp_cpus1); + + return ret ?: nbytes; +} + struct cftype perf_event_cgrp_arch_subsys_cftypes[] = { { .name = "cqm_cont_monitoring", .read_u64 = cqm_cont_monitoring_read_u64, .write_u64 = cqm_cont_monitoring_write_u64, }, + { + .name = "cqm_mon_mask", + .seq_show = cqm_mon_mask_seq_show, + .write = cqm_mon_mask_write, + .max_write_len = (100U + 6 * NR_CPUS), + }, {} /* terminate */ }; @@ -1449,8 +1569,10 @@ static inline void cqm_pick_event_reader(int cpu) /* First online cpu in package becomes the reader */ reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu)); - if (reader >= nr_cpu_ids) + if (reader >= nr_cpu_ids) { cpumask_set_cpu(cpu, &cqm_cpumask); + cpumask_set_cpu(pkg_id, &cqm_pkgmask); + } } static int intel_cqm_cpu_starting(unsigned int cpu) @@ -1482,6 +1604,8 @@ static int intel_cqm_cpu_exit(unsigned int cpu) if (target < nr_cpu_ids) cpumask_set_cpu(target, &cqm_cpumask); + else + cpumask_clear_cpu(pkg_id, &cqm_pkgmask); return 0; } @@ -1562,6 +1686,7 @@ static int pkg_data_init_cpu(int cpu) */ entry = __rmid_entry(0, curr_pkgid); list_del(&entry->list); + pkg_data->rmid_used_count++; cqm_rootcginfo.rmid = kzalloc(sizeof(u32) * cqm_socket_max, GFP_KERNEL); if (!cqm_rootcginfo.rmid) { diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h index 4415497..063956d 100644 --- a/arch/x86/events/intel/cqm.h +++ b/arch/x86/events/intel/cqm.h @@ -32,6 +32,7 @@ struct pkg_data { atomic_t reuse_scheduled; int rmid_work_cpu; + int rmid_used_count; }; #endif #endif diff --git a/arch/x86/include/asm/intel_rdt_common.h b/arch/x86/include/asm/intel_rdt_common.h index 6424322..39fa4fb 100644 --- a/arch/x86/include/asm/intel_rdt_common.h +++ b/arch/x86/include/asm/intel_rdt_common.h @@ -29,7 +29,7 @@ struct intel_pqr_state { u32 __get_rmid(int domain); bool __rmid_valid(u32 rmid); -void alloc_needed_pkg_rmid(u32 *cqm_rmid); +u32 alloc_needed_pkg_rmid(u32 *cqm_rmid); struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk); extern struct cgrp_cqm_info cqm_rootcginfo; @@ -42,7 +42,9 @@ struct intel_pqr_state { * @cont_mon Continuous monitoring flag * @mon_enabled Whether monitoring is enabled * @level Level in the cgroup tree. Root is level 0. - * @rmid The rmids of the cgroup. + * @rmid The rmids of the cgroup. + * @mon_mask Package Mask to indicate packages which must + * must have RMIDs(guaranteed cqm monitoring). * @mfa 'Monitoring for ancestor' points to the cqm_info * of the ancestor the cgroup is monitoring for. 'Monitoring for ancestor' * means you will use an ancestors RMID at sched_in if you are @@ -79,6 +81,7 @@ struct cgrp_cqm_info { bool mon_enabled; int level; u32 *rmid; + struct cpumask mon_mask; struct cgrp_cqm_info *mfa; struct list_head tskmon_rlist; }; -- 1.9.1