Each intel processor trace table of physical addresses (ToPA) entry
has an INT bit. If this bit is set, the processor will signal a
performance-monitoring interrupt (PMI) when the corresponding trace
output region is filled. This patch set will inject a PMI for Intel
Processor Trace when ToPA buffer is filled.
Luwei Kang (3):
perf/x86/intel/pt: Move pt structure to global header
perf/x86/intel/pt: Inject PMI for KVM guest
KVM: x86: Add support of clear Trace_ToPA_PMI status
arch/x86/events/intel/pt.c | 12 +++++++++++-
arch/x86/events/intel/pt.h | 38 -------------------------------------
arch/x86/include/asm/intel_pt.h | 41 ++++++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/include/asm/msr-index.h | 8 ++++++++
arch/x86/kvm/vmx/pmu_intel.c | 8 +++++++-
arch/x86/kvm/x86.h | 6 ++++++
7 files changed, 74 insertions(+), 40 deletions(-)
--
1.8.3.1
Intel PT structure (struct pt) is in a private header.
Move it (and sub structure) to a global header so that
it can be accessible from KVM code.
The definition of perf_output_handle structure included
in "linux/perf_event.h".
Signed-off-by: Luwei Kang <[email protected]>
---
arch/x86/events/intel/pt.h | 38 --------------------------------------
arch/x86/include/asm/intel_pt.h | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 40 insertions(+), 38 deletions(-)
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 269e15a..964948f 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -93,42 +93,4 @@ struct pt_buffer {
struct topa_entry *topa_index[0];
};
-#define PT_FILTERS_NUM 4
-
-/**
- * struct pt_filter - IP range filter configuration
- * @msr_a: range start, goes to RTIT_ADDRn_A
- * @msr_b: range end, goes to RTIT_ADDRn_B
- * @config: 4-bit field in RTIT_CTL
- */
-struct pt_filter {
- unsigned long msr_a;
- unsigned long msr_b;
- unsigned long config;
-};
-
-/**
- * struct pt_filters - IP range filtering context
- * @filter: filters defined for this context
- * @nr_filters: number of defined filters in the @filter array
- */
-struct pt_filters {
- struct pt_filter filter[PT_FILTERS_NUM];
- unsigned int nr_filters;
-};
-
-/**
- * struct pt - per-cpu pt context
- * @handle: perf output handle
- * @filters: last configured filters
- * @handle_nmi: do handle PT PMI on this cpu, there's an active event
- * @vmx_on: 1 if VMX is ON on this cpu
- */
-struct pt {
- struct perf_output_handle handle;
- struct pt_filters filters;
- int handle_nmi;
- int vmx_on;
-};
-
#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index 634f99b..ee960fb 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_INTEL_PT_H
#define _ASM_X86_INTEL_PT_H
+#include <linux/perf_event.h>
+
#define PT_CPUID_LEAVES 2
#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
@@ -24,6 +26,44 @@ enum pt_capabilities {
PT_CAP_psb_periods,
};
+#define PT_FILTERS_NUM 4
+
+/**
+ * struct pt_filter - IP range filter configuration
+ * @msr_a: range start, goes to RTIT_ADDRn_A
+ * @msr_b: range end, goes to RTIT_ADDRn_B
+ * @config: 4-bit field in RTIT_CTL
+ */
+struct pt_filter {
+ unsigned long msr_a;
+ unsigned long msr_b;
+ unsigned long config;
+};
+
+/**
+ * struct pt_filters - IP range filtering context
+ * @filter: filters defined for this context
+ * @nr_filters: number of defined filters in the @filter array
+ */
+struct pt_filters {
+ struct pt_filter filter[PT_FILTERS_NUM];
+ unsigned int nr_filters;
+};
+
+/**
+ * struct pt - per-cpu pt context
+ * @handle: perf output handle
+ * @filters: last configured filters
+ * @handle_nmi: do handle PT PMI on this cpu, there's an active event
+ * @vmx_on: 1 if VMX is ON on this cpu
+ */
+struct pt {
+ struct perf_output_handle handle;
+ struct pt_filters filters;
+ int handle_nmi;
+ int vmx_on;
+};
+
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
void cpu_emergency_stop_pt(void);
extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap);
--
1.8.3.1
Inject a PMI for KVM guest when Intel PT working
in Host-Guest mode and Guest ToPA entry memory buffer
was completely filled.
The definition of ‘kvm_make_request’ and ‘KVM_REQ_PMI’
depend on "linux/kvm_host.h" header.
Signed-off-by: Luwei Kang <[email protected]>
---
arch/x86/events/intel/pt.c | 12 +++++++++++-
arch/x86/include/asm/intel_pt.h | 1 +
arch/x86/include/asm/msr-index.h | 4 ++++
arch/x86/kvm/x86.h | 6 ++++++
4 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 9494ca6..09375bd 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -23,6 +23,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/device.h>
+#include <linux/kvm_host.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
@@ -33,7 +34,8 @@
#include "../perf_event.h"
#include "pt.h"
-static DEFINE_PER_CPU(struct pt, pt_ctx);
+DEFINE_PER_CPU(struct pt, pt_ctx);
+EXPORT_PER_CPU_SYMBOL_GPL(pt_ctx);
static struct pt_pmu pt_pmu;
@@ -1260,6 +1262,14 @@ void intel_pt_interrupt(void)
struct pt_buffer *buf;
struct perf_event *event = pt->handle.event;
+ if (pt->vcpu) {
+ /* Inject PMI to Guest */
+ kvm_make_request(KVM_REQ_PMI, pt->vcpu);
+ __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
+ (unsigned long *)&pt->vcpu->arch.pmu.global_status);
+ return;
+ }
+
/*
* There may be a dangling PT bit in the interrupt status register
* after PT has been disabled by pt_event_stop(). Make sure we don't
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index ee960fb..32da2e9 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -62,6 +62,7 @@ struct pt {
struct pt_filters filters;
int handle_nmi;
int vmx_on;
+ struct kvm_vcpu *vcpu;
};
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 8e40c24..ae01fb0 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -775,6 +775,10 @@
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+/* PERF_GLOBAL_OVF_CTL bits */
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
+
/* Geode defined MSRs */
#define MSR_GEODE_BUSCONT_CONF0 0x00001900
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 224cd0a..a9ee498 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -4,6 +4,7 @@
#include <linux/kvm_host.h>
#include <asm/pvclock.h>
+#include <asm/intel_pt.h>
#include "kvm_cache_regs.h"
#define KVM_DEFAULT_PLE_GAP 128
@@ -331,15 +332,20 @@ static inline bool kvm_pause_in_guest(struct kvm *kvm)
}
DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+DECLARE_PER_CPU(struct pt, pt_ctx);
static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
{
__this_cpu_write(current_vcpu, vcpu);
+ if (kvm_x86_ops->pt_supported())
+ this_cpu_ptr(&pt_ctx)->vcpu = vcpu;
}
static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
{
__this_cpu_write(current_vcpu, NULL);
+ if (kvm_x86_ops->pt_supported())
+ this_cpu_ptr(&pt_ctx)->vcpu = NULL;
}
#endif
--
1.8.3.1
Add support of clear Intel PT ToPA PMI status for
KVM guest.
Signed-off-by: Luwei Kang <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/include/asm/msr-index.h | 4 ++++
arch/x86/kvm/vmx/pmu_intel.c | 8 +++++++-
3 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4660ce9..de95704 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -468,6 +468,7 @@ struct kvm_pmu {
u64 global_ovf_ctrl;
u64 counter_bitmask[2];
u64 global_ctrl_mask;
+ u64 global_ovf_ctrl_mask;
u64 reserved_bits;
u8 version;
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ae01fb0..c0ea4aa 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -778,6 +778,10 @@
/* PERF_GLOBAL_OVF_CTL bits */
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT)
/* Geode defined MSRs */
#define MSR_GEODE_BUSCONT_CONF0 0x00001900
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 5ab4a36..6dee7cf 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -227,7 +227,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
+ if (!(data & pmu->global_ovf_ctrl_mask)) {
if (!msr_info->host_initiated)
pmu->global_status &= ~data;
pmu->global_ovf_ctrl = data;
@@ -297,6 +297,12 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
pmu->global_ctrl_mask = ~pmu->global_ctrl;
+ pmu->global_ovf_ctrl_mask = ~(pmu->global_ctrl |
+ MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF |
+ MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD);
+ if (kvm_x86_ops->pt_supported())
+ pmu->global_ovf_ctrl_mask &=
+ ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
entry = kvm_find_cpuid_entry(vcpu, 7, 0);
if (entry &&
--
1.8.3.1
On 19/01/19 21:04, Luwei Kang wrote:
> static struct pt_pmu pt_pmu;
>
> @@ -1260,6 +1262,14 @@ void intel_pt_interrupt(void)
> struct pt_buffer *buf;
> struct perf_event *event = pt->handle.event;
>
> + if (pt->vcpu) {
> + /* Inject PMI to Guest */
> + kvm_make_request(KVM_REQ_PMI, pt->vcpu);
> + __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
> + (unsigned long *)&pt->vcpu->arch.pmu.global_status);
> + return;
> + }
> +
There is no need to touch struct pt and to know details of KVM in
arch/x86/events. Please add a function pointer
void (*kvm_handle_pt_interrupt)(void);
to some header, and in handle_pmi_common do
if (unlikely(kvm_handle_intel_pt_interrupt))
kvm_handle_intel_pt_interrupt();
else
intel_pt_interrupt();
The function pointer can be assigned in
kvm_before_interrupt/kvm_after_interrupt just like you do now.
This should be a simpler patch too.
Paolo
On Wed, Jan 30, 2019 at 06:02:27PM +0100, Paolo Bonzini wrote:
> On 19/01/19 21:04, Luwei Kang wrote:
> > static struct pt_pmu pt_pmu;
> >
> > @@ -1260,6 +1262,14 @@ void intel_pt_interrupt(void)
> > struct pt_buffer *buf;
> > struct perf_event *event = pt->handle.event;
> >
> > + if (pt->vcpu) {
> > + /* Inject PMI to Guest */
> > + kvm_make_request(KVM_REQ_PMI, pt->vcpu);
> > + __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
> > + (unsigned long *)&pt->vcpu->arch.pmu.global_status);
> > + return;
> > + }
> > +
>
> There is no need to touch struct pt and to know details of KVM in
> arch/x86/events. Please add a function pointer
>
> void (*kvm_handle_pt_interrupt)(void);
>
> to some header, and in handle_pmi_common do
>
> if (unlikely(kvm_handle_intel_pt_interrupt))
> kvm_handle_intel_pt_interrupt();
> else
> intel_pt_interrupt();
>
> The function pointer can be assigned in
> kvm_before_interrupt/kvm_after_interrupt just like you do now.
>
> This should be a simpler patch too.
I know we do this in other places too; but it really is a very bad
pattern.
Exported function pointers are a fscking disaster waiting to happen.
There is nothing that limits access to kvm.o, any random module can try
and poke at it.
How about we extend perf_guest_info_callback with an arch section and
add:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5aeb4c74fb99..76ce804e72c1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5835,6 +5835,9 @@ struct perf_guest_info_callbacks *perf_guest_cbs;
int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
{
+ if (WARN_ON_ONCE(perf_guest_cbs))
+ return -EBUSY;
+
perf_guest_cbs = cbs;
return 0;
}
@@ -5842,6 +5845,9 @@ EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
{
+ if (WARN_ON_ONCE(perf_guest_cbs != cbs))
+ return -EINVAL;
+
perf_guest_cbs = NULL;
return 0;
}
On 06/02/19 17:34, Peter Zijlstra wrote:
>
> How about we extend perf_guest_info_callback with an arch section and
> add:
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 5aeb4c74fb99..76ce804e72c1 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -5835,6 +5835,9 @@ struct perf_guest_info_callbacks *perf_guest_cbs;
>
> int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
> {
> + if (WARN_ON_ONCE(perf_guest_cbs))
> + return -EBUSY;
> +
> perf_guest_cbs = cbs;
> return 0;
> }
> @@ -5842,6 +5845,9 @@ EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
>
> int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
> {
> + if (WARN_ON_ONCE(perf_guest_cbs != cbs))
> + return -EINVAL;
> +
> perf_guest_cbs = NULL;
> return 0;
> }
Makes total sense.
Paolo