Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753913AbbGBOyt (ORCPT ); Thu, 2 Jul 2015 10:54:49 -0400 Received: from userp1040.oracle.com ([156.151.31.81]:45749 "EHLO userp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751236AbbGBOyA (ORCPT ); Thu, 2 Jul 2015 10:54:00 -0400 From: Boris Ostrovsky To: david.vrabel@citrix.com, konrad.wilk@oracle.com Cc: dietmar.hahn@ts.fujitsu.com, jbeulich@suse.com, kevin.tian@intel.com, xen-devel@lists.xen.org, linux-kernel@vger.kernel.org, boris.ostrovsky@oracle.com Subject: [PATCH v5 6/6] xen/PMU: PMU emulation code Date: Thu, 2 Jul 2015 10:53:36 -0400 Message-Id: <1435848816-3323-7-git-send-email-boris.ostrovsky@oracle.com> X-Mailer: git-send-email 1.8.1.4 In-Reply-To: <1435848816-3323-1-git-send-email-boris.ostrovsky@oracle.com> References: <1435848816-3323-1-git-send-email-boris.ostrovsky@oracle.com> X-Source-IP: userv0022.oracle.com [156.151.31.74] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9355 Lines: 332 Add PMU emulation code that runs when we are processing a PMU interrupt. This code will allow us not to trap to hypervisor on each MSR/LVTPC access (of which there may be quite a few in the handler). Signed-off-by: Boris Ostrovsky --- arch/x86/xen/pmu.c | 214 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 185 insertions(+), 29 deletions(-) diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 69a0b68..4c09167 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -13,11 +13,20 @@ /* x86_pmu.handle_irq definition */ #include "../kernel/cpu/perf_event.h" +#define XENPMU_IRQ_PROCESSING 1 +struct xenpmu { + /* Shared page between hypervisor and domain */ + struct xen_pmu_data *xenpmu_data; -/* Shared page between hypervisor and domain */ -DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared); -#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id()) + uint8_t flags; +}; +DEFINE_PER_CPU(struct xenpmu, xenpmu_shared); +#define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data) +#define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags) +/* Macro for computing address of a PMU MSR bank */ +#define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \ + (uintptr_t)ctxt->field)) /* AMD PMU */ #define F15H_NUM_COUNTERS 6 @@ -169,19 +178,124 @@ static int is_intel_pmu_msr(u32 msr_index, int *type, int *index) } } -bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) +static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type, + int index, bool is_read) { + uint64_t *reg = NULL; + struct xen_pmu_intel_ctxt *ctxt; + uint64_t *fix_counters; + struct xen_pmu_cntr_pair *arch_cntr_pair; + struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + uint8_t xenpmu_flags = get_xenpmu_flags(); + + if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) + return false; + + ctxt = &xenpmu_data->pmu.c.intel; + + switch (msr) { + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + reg = &ctxt->global_ovf_ctrl; + break; + case MSR_CORE_PERF_GLOBAL_STATUS: + reg = &ctxt->global_status; + break; + case MSR_CORE_PERF_GLOBAL_CTRL: + reg = &ctxt->global_ctrl; + break; + case MSR_CORE_PERF_FIXED_CTR_CTRL: + reg = &ctxt->fixed_ctrl; + break; + default: + switch (type) { + case MSR_TYPE_COUNTER: + fix_counters = field_offset(ctxt, fixed_counters); + reg = &fix_counters[index]; + break; + case MSR_TYPE_ARCH_COUNTER: + arch_cntr_pair = field_offset(ctxt, arch_counters); + reg = &arch_cntr_pair[index].counter; + break; + case MSR_TYPE_ARCH_CTRL: + arch_cntr_pair = field_offset(ctxt, arch_counters); + reg = &arch_cntr_pair[index].control; + break; + default: + return false; + } + } + + if (reg) { + if (is_read) + *val = *reg; + else { + *reg = *val; + + if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL) + ctxt->global_status &= (~(*val)); + } + return true; + } + + return false; +} + +static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) +{ + uint64_t *reg = NULL; + int i, off = 0; + struct xen_pmu_amd_ctxt *ctxt; + uint64_t *counter_regs, *ctrl_regs; + struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + uint8_t xenpmu_flags = get_xenpmu_flags(); + + if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) + return false; + + if (k7_counters_mirrored && + ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3))) + msr = get_fam15h_addr(msr); + + ctxt = &xenpmu_data->pmu.c.amd; + for (i = 0; i < amd_num_counters; i++) { + if (msr == amd_ctrls_base + off) { + ctrl_regs = field_offset(ctxt, ctrls); + reg = &ctrl_regs[i]; + break; + } else if (msr == amd_counters_base + off) { + counter_regs = field_offset(ctxt, counters); + reg = &counter_regs[i]; + break; + } + off += amd_msr_step; + } + + if (reg) { + if (is_read) + *val = *reg; + else + *reg = *val; + + return true; + } + return false; +} + +bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) +{ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { if (is_amd_pmu_msr(msr)) { - *val = native_read_msr_safe(msr, err); + if (!xen_amd_pmu_emulate(msr, val, 1)) + *val = native_read_msr_safe(msr, err); return true; } } else { int type, index; if (is_intel_pmu_msr(msr, &type, &index)) { - *val = native_read_msr_safe(msr, err); + if (!xen_intel_pmu_emulate(msr, val, type, index, 1)) + *val = native_read_msr_safe(msr, err); return true; } } @@ -191,16 +305,20 @@ bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) { + uint64_t val = ((uint64_t)high << 32) | low; + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { if (is_amd_pmu_msr(msr)) { - *err = native_write_msr_safe(msr, low, high); + if (!xen_amd_pmu_emulate(msr, &val, 0)) + *err = native_write_msr_safe(msr, low, high); return true; } } else { int type, index; if (is_intel_pmu_msr(msr, &type, &index)) { - *err = native_write_msr_safe(msr, low, high); + if (!xen_intel_pmu_emulate(msr, &val, type, index, 0)) + *err = native_write_msr_safe(msr, low, high); return true; } } @@ -210,24 +328,52 @@ bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) unsigned long long xen_amd_read_pmc(int counter) { - uint32_t msr; - int err; + struct xen_pmu_amd_ctxt *ctxt; + uint64_t *counter_regs; + struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + uint8_t xenpmu_flags = get_xenpmu_flags(); + + if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { + uint32_t msr; + int err; - msr = amd_counters_base + (counter * amd_msr_step); - return native_read_msr_safe(msr, &err); + msr = amd_counters_base + (counter * amd_msr_step); + return native_read_msr_safe(msr, &err); + } + + ctxt = &xenpmu_data->pmu.c.amd; + counter_regs = field_offset(ctxt, counters); + return counter_regs[counter]; } unsigned long long xen_intel_read_pmc(int counter) { - int err; - uint32_t msr; + struct xen_pmu_intel_ctxt *ctxt; + uint64_t *fixed_counters; + struct xen_pmu_cntr_pair *arch_cntr_pair; + struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + uint8_t xenpmu_flags = get_xenpmu_flags(); - if (counter & (1<pmu.c.intel; + if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) { + fixed_counters = field_offset(ctxt, fixed_counters); + return fixed_counters[counter & 0xffff]; + } + + arch_cntr_pair = field_offset(ctxt, arch_counters); + return arch_cntr_pair[counter].counter; } unsigned long long xen_read_pmc(int counter) @@ -249,6 +395,10 @@ int pmu_apic_update(uint32_t val) } xenpmu_data->pmu.l.lapic_lvtpc = val; + + if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING) + return 0; + ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL); return ret; @@ -329,29 +479,34 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) int err, ret = IRQ_NONE; struct pt_regs regs; const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + uint8_t xenpmu_flags = get_xenpmu_flags(); if (!xenpmu_data) { WARN_ONCE(1, "%s: pmudata not initialized\n", __func__); return ret; } - err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL); - if (err) { - WARN_ONCE(1, "%s failed hypercall, err: %d\n", __func__, err); - return ret; - } - + this_cpu_ptr(&xenpmu_shared)->flags = + xenpmu_flags | XENPMU_IRQ_PROCESSING; xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s, xenpmu_data->pmu.pmu_flags); if (x86_pmu.handle_irq(®s)) ret = IRQ_HANDLED; + /* Write out cached context to HW */ + err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL); + this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags; + if (err) { + WARN_ONCE(1, "%s failed hypercall, err: %d\n", __func__, err); + return IRQ_NONE; + } + return ret; } bool is_xen_pmu(int cpu) { - return (per_cpu(xenpmu_shared, cpu) != NULL); + return (get_xenpmu_data() != NULL); } void xen_pmu_init(int cpu) @@ -381,7 +536,8 @@ void xen_pmu_init(int cpu) if (err) goto fail; - per_cpu(xenpmu_shared, cpu) = xenpmu_data; + per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data; + per_cpu(xenpmu_shared, cpu).flags = 0; if (cpu == 0) { perf_register_guest_info_callbacks(&xen_guest_cbs); @@ -411,6 +567,6 @@ void xen_pmu_finish(int cpu) (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp); - free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0); - per_cpu(xenpmu_shared, cpu) = NULL; + free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0); + per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL; } -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/