2020-03-19 06:36:08

by Luwei Kang

[permalink] [raw]
Subject: [PATCH v2 0/5] PEBS virtualization enabling via Intel PT

Intel new hardware(Atom processors based on the Tremont
microarchitecture) introduces some Processor Event-Based Sampling(PEBS)
extensions that output the PEBS record to Intel PT stream instead of DS
area. The PEBS record will be packaged in a specific format when
outputting to Intel PT.

This patch set will enable PEBS functionality in KVM Guest by PEBS output
to Intel PT, base on PEBS virtualization enabling via DS patch set[1].

Compared to the v1, the common code of PEBS virtualization enabling(PEBS
via DS and PEBS via Intel PT) has been moved to PEBS via DS patch set.
This patch set only includes the PEBS via PT specific changes.

Patch 1 is an extension to get fixed function counter by reload MSRs;
Patch 2,3 implement the CPUID and MSRs emulation;
Patch 4 will add the counter reload MSRs to MSR list during VM-entry/exit;
Patch 5 will swith the PEBS records to Intel PT buffer if PEBS via PT is
enabled in KVM guest.

[1]: https://lore.kernel.org/kvm/[email protected]/

Luwei Kang (5):
KVM: x86/pmu: Add base address parameter for get_fixed_pmc function
KVM: x86/pmu: Expose PDCM feature when PEBS output to PT
KVM: x86/pmu: PEBS output Intel PT MSRs emulation
KVM: x86/pmu: Add counter reload register to MSR list
KVM: VMX: Switch PEBS records output to Intel PT buffer

arch/x86/events/perf_event.h | 5 --
arch/x86/include/asm/kvm_host.h | 2 +
arch/x86/include/asm/msr-index.h | 6 +++
arch/x86/kvm/pmu.h | 6 +--
arch/x86/kvm/vmx/capabilities.h | 9 +++-
arch/x86/kvm/vmx/pmu_intel.c | 112 ++++++++++++++++++++++++++++++++++-----
arch/x86/kvm/vmx/vmx.c | 3 ++
arch/x86/kvm/vmx/vmx.h | 2 +-
arch/x86/kvm/x86.c | 32 +++++++++++
9 files changed, 154 insertions(+), 23 deletions(-)

--
1.8.3.1


2020-03-19 06:36:22

by Luwei Kang

[permalink] [raw]
Subject: [PATCH v2 1/5] KVM: x86/pmu: Add base address parameter for get_fixed_pmc function

PEBS output to Inte PT feature introduces some new
MSRs(MSR_RELOAD_FIXED_CTRx) for fixed function counters that using for
autoload the present value after writing out a PEBS event.

Introduce a base MSRs address parameter to make this function can get
kvm performance monitor counter structure by MSR_RELOAD_FIXED_CTRx
registers.

Signed-off-by: Luwei Kang <[email protected]>
---
arch/x86/kvm/pmu.h | 5 ++---
arch/x86/kvm/vmx/pmu_intel.c | 16 ++++++++++------
2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 9de6ef1..d640628 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -117,10 +117,9 @@ static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
}

/* returns fixed PMC with the specified MSR */
-static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
+static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu,
+ u32 msr, u32 base)
{
- int base = MSR_CORE_PERF_FIXED_CTR0;
-
if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) {
u32 index = array_index_nospec(msr - base,
pmu->nr_arch_fixed_counters);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 6a0eef3..2db9b9e 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -43,7 +43,8 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
u8 old_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, i);
struct kvm_pmc *pmc;

- pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
+ pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i,
+ MSR_CORE_PERF_FIXED_CTR0);

if (old_ctrl == new_ctrl)
continue;
@@ -135,7 +136,8 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
else {
u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED;

- return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0);
+ return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0,
+ MSR_CORE_PERF_FIXED_CTR0);
}
}

@@ -196,7 +198,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
default:
ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
- get_fixed_pmc(pmu, msr);
+ get_fixed_pmc(pmu, msr, MSR_CORE_PERF_FIXED_CTR0);
break;
}

@@ -236,7 +238,7 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;

- pmc = get_fixed_pmc(pmu, msr);
+ pmc = get_fixed_pmc(pmu, msr, MSR_CORE_PERF_FIXED_CTR0);
pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0);
pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0);

@@ -278,7 +280,8 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
u64 val = pmc_read_counter(pmc);
*data = val & pmu->counter_bitmask[KVM_PMC_GP];
return 0;
- } else if ((pmc = get_fixed_pmc(pmu, msr))) {
+ } else if ((pmc = get_fixed_pmc(pmu, msr,
+ MSR_CORE_PERF_FIXED_CTR0))) {
u64 val = pmc_read_counter(pmc);
*data = val & pmu->counter_bitmask[KVM_PMC_FIXED];
return 0;
@@ -354,7 +357,8 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
data = (s64)(s32)data;
pmc->counter += data - pmc_read_counter(pmc);
return 0;
- } else if ((pmc = get_fixed_pmc(pmu, msr))) {
+ } else if ((pmc = get_fixed_pmc(pmu, msr,
+ MSR_CORE_PERF_FIXED_CTR0))) {
pmc->counter += data - pmc_read_counter(pmc);
return 0;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
--
1.8.3.1

2020-03-19 06:36:33

by Luwei Kang

[permalink] [raw]
Subject: [PATCH v2 3/5] KVM: x86/pmu: PEBS output Intel PT MSRs emulation

PEBS output to PT introduce a mechanism to direct PEBS output into
the Intel PT output stream and new performance monitoring counter
reload MSRs, which are used by PEBS in place of the counter reload
values stored in the DS management area when PEBS output is directed
into the Intel PT output stream.

This patch implements the reload MSRs read/write emulation and update
the mask value of MSR_IA32_PEBS_ENABLE register.

Signed-off-by: Luwei Kang <[email protected]>
---
arch/x86/events/perf_event.h | 5 -----
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/include/asm/msr-index.h | 5 +++++
arch/x86/kvm/vmx/pmu_intel.c | 42 +++++++++++++++++++++++++++++++++++-----
arch/x86/kvm/x86.c | 32 ++++++++++++++++++++++++++++++
5 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 621529c..957adbb 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -87,11 +87,6 @@ struct amd_nb {
};

#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
-#define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60)
-#define PEBS_OUTPUT_OFFSET 61
-#define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET)
-#define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET)
-#define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD)

/*
* Flags PEBS can handle without an PMI.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7b0a023..bba7270 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -456,6 +456,7 @@ struct kvm_pmc {
enum pmc_type type;
u8 idx;
u64 counter;
+ u64 reload_cnt;
u64 eventsel;
struct perf_event *perf_event;
struct kvm_vcpu *vcpu;
@@ -500,6 +501,7 @@ struct kvm_pmu {
bool need_cleanup;

bool has_pebs_via_ds;
+ bool has_pebs_via_pt;
bool has_pebs_adaptive;

/*
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 750a2d5..768e61c 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -148,6 +148,11 @@
#define LBR_INFO_CYCLES 0xffff

#define MSR_IA32_PEBS_ENABLE 0x000003f1
+#define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60)
+#define PEBS_OUTPUT_OFFSET 61
+#define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET)
+#define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET)
+#define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD)
#define MSR_PEBS_DATA_CFG 0x000003f2
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index f04e5eb..a8b0a8d 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -188,9 +188,11 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
ret = pmu->version > 1;
break;
case MSR_IA32_DS_AREA:
+ ret = pmu->has_pebs_via_ds;
+ break;
case MSR_IA32_PEBS_ENABLE:
case MSR_IA32_PERF_CAPABILITIES:
- ret = pmu->has_pebs_via_ds;
+ ret = pmu->has_pebs_via_ds || pmu->has_pebs_via_pt;
break;
case MSR_PEBS_DATA_CFG:
ret = pmu->has_pebs_adaptive;
@@ -199,6 +201,9 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
get_fixed_pmc(pmu, msr, MSR_CORE_PERF_FIXED_CTR0);
+ if (!ret && pmu->has_pebs_via_pt)
+ ret = get_gp_pmc(pmu, msr, MSR_RELOAD_PMC0) ||
+ get_fixed_pmc(pmu, msr, MSR_RELOAD_FIXED_CTR0);
break;
}

@@ -253,6 +258,11 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
pmc = get_fixed_pmc(pmu, msr, MSR_CORE_PERF_FIXED_CTR0);
pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0);
pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0);
+ if (!pmc && pmu->has_pebs_via_pt) {
+ pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_RELOAD_PMC0);
+ pmc = pmc ? pmc :
+ get_fixed_pmc(pmu, msr, MSR_RELOAD_FIXED_CTR0);
+ }

return pmc;
}
@@ -300,6 +310,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
*data = pmc->eventsel;
return 0;
+ } else if ((pmc = get_gp_pmc(pmu, msr, MSR_RELOAD_PMC0)) ||
+ (pmc = get_fixed_pmc(pmu, msr,
+ MSR_RELOAD_FIXED_CTR0))) {
+ *data = pmc->reload_cnt;
+ return 0;
}
}

@@ -380,6 +395,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
reprogram_gp_counter(pmc, data);
return 0;
}
+ } else if ((pmc = get_gp_pmc(pmu, msr, MSR_RELOAD_PMC0)) ||
+ (pmc = get_fixed_pmc(pmu, msr,
+ MSR_RELOAD_FIXED_CTR0))) {
+ if (!(data & ~pmc_bitmask(pmc))) {
+ pmc->reload_cnt = data;
+ return 0;
+ }
}
}

@@ -449,14 +471,17 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
if (entry && (entry->ecx & X86_FEATURE_DTES64) &&
(entry->ecx & X86_FEATURE_PDCM) &&
(entry->edx & X86_FEATURE_DS) &&
- intel_is_pebs_via_ds_supported()) {
+ intel_is_pebs_via_ds_supported())
pmu->has_pebs_via_ds = 1;
- pmu->pebs_enable_mask = ~pmu->global_ctrl;
- }

- if (pmu->has_pebs_via_ds) {
+ if (intel_is_pebs_via_pt_supported())
+ pmu->has_pebs_via_pt = 1;
+
+ if (pmu->has_pebs_via_ds || pmu->has_pebs_via_pt) {
u64 perf_cap;

+ pmu->pebs_enable_mask = ~pmu->global_ctrl;
+
rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap);
pmu->perf_cap = (perf_cap & (PERF_CAP_PEBS_TRAP |
PERF_CAP_ARCH_REG |
@@ -471,6 +496,11 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->fixed_ctr_ctrl_mask &= ~(1ULL <<
(INTEL_PMC_IDX_FIXED + i * 4));
}
+
+ if (pmu->has_pebs_via_pt) {
+ pmu->pebs_enable_mask &= ~PEBS_VIA_PT_MASK;
+ pmu->perf_cap |= PERF_CAP_PEBS_OUTPUT_PT;
+ }
}

entry = kvm_find_cpuid_entry(vcpu, 7, 0);
@@ -497,6 +527,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
pmu->gp_counters[i].current_config = 0;
+ pmu->gp_counters[i].reload_cnt = 0;
}

for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
@@ -504,6 +535,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
pmu->fixed_counters[i].vcpu = vcpu;
pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
pmu->fixed_counters[i].current_config = 0;
+ pmu->fixed_counters[i].reload_cnt = 0;
}
}

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aa1344b..5031f50 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1193,6 +1193,17 @@ bool kvm_rdpmc(struct kvm_vcpu *vcpu)
MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
+ MSR_RELOAD_FIXED_CTR0, MSR_RELOAD_FIXED_CTR0 + 1,
+ MSR_RELOAD_FIXED_CTR0 + 2, MSR_RELOAD_FIXED_CTR0 + 3,
+ MSR_RELOAD_PMC0, MSR_RELOAD_PMC0 + 1,
+ MSR_RELOAD_PMC0 + 2, MSR_RELOAD_PMC0 + 3,
+ MSR_RELOAD_PMC0 + 4, MSR_RELOAD_PMC0 + 5,
+ MSR_RELOAD_PMC0 + 6, MSR_RELOAD_PMC0 + 7,
+ MSR_RELOAD_PMC0 + 8, MSR_RELOAD_PMC0 + 9,
+ MSR_RELOAD_PMC0 + 10, MSR_RELOAD_PMC0 + 11,
+ MSR_RELOAD_PMC0 + 12, MSR_RELOAD_PMC0 + 13,
+ MSR_RELOAD_PMC0 + 14, MSR_RELOAD_PMC0 + 15,
+ MSR_RELOAD_PMC0 + 16, MSR_RELOAD_PMC0 + 17,
MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA,
MSR_PEBS_DATA_CFG,
};
@@ -5270,11 +5281,32 @@ static void kvm_init_msr_list(void)
continue;
break;
case MSR_IA32_PEBS_ENABLE:
+ if (!kvm_x86_ops->pmu_ops ||
+ (!kvm_x86_ops->pmu_ops->is_pebs_via_ds_supported() &&
+ !kvm_x86_ops->pmu_ops->is_pebs_via_pt_supported()))
+ continue;
+ break;
case MSR_IA32_DS_AREA:
if (!kvm_x86_ops->pmu_ops ||
!kvm_x86_ops->pmu_ops->is_pebs_via_ds_supported())
continue;
break;
+ case MSR_RELOAD_FIXED_CTR0 ... MSR_RELOAD_FIXED_CTR0 + INTEL_PMC_MAX_FIXED:
+ if (!kvm_x86_ops->pmu_ops ||
+ !kvm_x86_ops->pmu_ops->is_pebs_via_pt_supported())
+ continue;
+ if (msrs_to_save_all[i] - MSR_RELOAD_FIXED_CTR0 >=
+ min(INTEL_PMC_MAX_FIXED, x86_pmu.num_counters_fixed))
+ continue;
+ break;
+ case MSR_RELOAD_PMC0 ... MSR_RELOAD_PMC0 + 17:
+ if (!kvm_x86_ops->pmu_ops ||
+ !kvm_x86_ops->pmu_ops->is_pebs_via_pt_supported())
+ continue;
+ if (msrs_to_save_all[i] - MSR_RELOAD_PMC0 >=
+ min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
+ continue;
+ break;
case MSR_PEBS_DATA_CFG:
if (!kvm_x86_ops->pmu_ops ||
!kvm_x86_ops->pmu_ops->is_pebs_baseline_supported())
--
1.8.3.1

2020-03-19 06:37:09

by Luwei Kang

[permalink] [raw]
Subject: [PATCH v2 5/5] KVM: VMX: Switch PEBS records output to Intel PT buffer

Switch PEBS records output to Intel PT buffer when PEBS is enabled
in KVM guest by Intel PT.

Signed-off-by: Luwei Kang <[email protected]>
---
arch/x86/kvm/vmx/vmx.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c6d9a87..ec74656 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -921,6 +921,7 @@ void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
{
int i, j = 0;
struct msr_autoload *m = &vmx->msr_autoload;
+ struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu);

switch (msr) {
case MSR_EFER:
@@ -952,6 +953,8 @@ void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
* guest's memory.
*/
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+ if ((pmu->pebs_enable & PEBS_OUTPUT_MASK) == PEBS_OUTPUT_PT)
+ guest_val |= PEBS_OUTPUT_PT;
}

i = vmx_find_msr_index(&m->guest, msr);
--
1.8.3.1

2020-03-19 06:37:46

by Luwei Kang

[permalink] [raw]
Subject: [PATCH v2 2/5] KVM: x86/pmu: Expose PDCM feature when PEBS output to PT

PDCM(Perfmon and Debug Capability) indicates the processor supports
the performance and debug feature indication IA32_PERF_CAPABILITIES.

Expose PDCM feature when PEBS virtualization via Intel PT is supported
in KVM guest.

Signed-off-by: Luwei Kang <[email protected]>
---
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/kvm/pmu.h | 1 +
arch/x86/kvm/vmx/capabilities.h | 9 +++++++--
arch/x86/kvm/vmx/pmu_intel.c | 13 +++++++++++++
4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d3d6e48..750a2d5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -155,6 +155,7 @@
#define PERF_CAP_ARCH_REG BIT_ULL(7)
#define PERF_CAP_PEBS_FORMAT 0xf00
#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
+#define PERF_CAP_PEBS_OUTPUT_PT BIT_ULL(16)
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6

#define MSR_IA32_RTIT_CTL 0x00000570
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index d640628..ba8c68d 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -33,6 +33,7 @@ struct kvm_pmu_ops {
int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
bool (*is_pebs_via_ds_supported)(void);
+ bool (*is_pebs_via_pt_supported)(void);
bool (*is_pebs_baseline_supported)(void);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 9e352b5..dc480c9 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -154,10 +154,15 @@ static inline bool vmx_pku_supported(void)

static inline bool vmx_pdcm_supported(void)
{
+ bool ret = 0;
+
if (kvm_x86_ops->pmu_ops->is_pebs_via_ds_supported)
- return kvm_x86_ops->pmu_ops->is_pebs_via_ds_supported();
+ ret |= kvm_x86_ops->pmu_ops->is_pebs_via_ds_supported();

- return false;
+ if (kvm_x86_ops->pmu_ops->is_pebs_via_pt_supported)
+ ret |= kvm_x86_ops->pmu_ops->is_pebs_via_pt_supported();
+
+ return ret;
}

static inline bool vmx_dtes64_supported(void)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 2db9b9e..f04e5eb 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -221,6 +221,18 @@ static bool intel_is_pebs_via_ds_supported(void)
return true;
}

+static bool intel_is_pebs_via_pt_supported(void)
+{
+ u64 misc, perf_cap;
+
+ rdmsrl(MSR_IA32_MISC_ENABLE, misc);
+ rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap);
+
+ return (!(misc & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL) &&
+ (perf_cap & PERF_CAP_PEBS_OUTPUT_PT) &&
+ (pt_mode == PT_MODE_HOST_GUEST));
+}
+
static bool intel_is_pebs_baseline_supported(void)
{
u64 perf_cap;
@@ -529,6 +541,7 @@ struct kvm_pmu_ops intel_pmu_ops = {
.is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
.is_valid_msr = intel_is_valid_msr,
.is_pebs_via_ds_supported = intel_is_pebs_via_ds_supported,
+ .is_pebs_via_pt_supported = intel_is_pebs_via_pt_supported,
.is_pebs_baseline_supported = intel_is_pebs_baseline_supported,
.get_msr = intel_pmu_get_msr,
.set_msr = intel_pmu_set_msr,
--
1.8.3.1

2020-03-19 06:37:58

by Luwei Kang

[permalink] [raw]
Subject: [PATCH v2 4/5] KVM: x86/pmu: Add counter reload register to MSR list

The guest counter reload registers need to be loaded to real
HW before VM-entry. This patch add the counter reload registers to
MSR-load list when the corresponding counter is enabled, and remove
them when the counter is disabled.

Following the description in SDM, there are 3 fixed counters per
core and 4 general-purpose counters per core in Tremont
Microarchitecture. This patch extended the value of NR_LOADSTORE_MSRS
from 8 to 16 because there are 7 counter reload registers need to be
added into the MSR-load list when all the counters are enabled.

Signed-off-by: Luwei Kang <[email protected]>
---
arch/x86/kvm/vmx/pmu_intel.c | 41 ++++++++++++++++++++++++++++++++++++++++-
arch/x86/kvm/vmx/vmx.h | 2 +-
2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index a8b0a8d..75e1d2c 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -68,12 +68,42 @@ static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
reprogram_counter(pmu, bit);
}

+static void intel_pmu_set_reload_counter(struct kvm_vcpu *vcpu, u64 data,
+ bool add)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc;
+ unsigned long bit;
+ u64 set, host_reload_ctr;
+ u32 msr;
+
+ set = data & ~pmu->global_ctrl_mask;
+
+ for_each_set_bit(bit, (unsigned long *)&set, X86_PMC_IDX_MAX) {
+ if (bit < INTEL_PMC_IDX_FIXED) {
+ msr = MSR_RELOAD_PMC0 + bit;
+ pmc = &pmu->gp_counters[bit];
+ } else {
+ msr = MSR_RELOAD_PMC0 + bit - INTEL_PMC_IDX_FIXED;
+ pmc = &pmu->gp_counters[bit - INTEL_PMC_IDX_FIXED];
+ }
+
+ rdmsrl_safe(msr, &host_reload_ctr);
+ if (add)
+ add_atomic_switch_msr(vmx, msr,
+ pmc->reload_cnt, host_reload_ctr, false);
+ else
+ clear_atomic_switch_msr(vmx, msr);
+ }
+}
+
static void pebs_enable_changed(struct kvm_pmu *pmu, u64 data)
{
struct vcpu_vmx *vmx = to_vmx(pmu_to_vcpu(pmu));
u64 host_ds_area, host_pebs_data_cfg;

- if (data) {
+ if (data && ((data & PEBS_OUTPUT_MASK) == 0)) {
rdmsrl_safe(MSR_IA32_DS_AREA, &host_ds_area);
add_atomic_switch_msr(vmx, MSR_IA32_DS_AREA,
pmu->ds_area, host_ds_area, false);
@@ -81,10 +111,19 @@ static void pebs_enable_changed(struct kvm_pmu *pmu, u64 data)
rdmsrl_safe(MSR_PEBS_DATA_CFG, &host_pebs_data_cfg);
add_atomic_switch_msr(vmx, MSR_PEBS_DATA_CFG,
pmu->pebs_data_cfg, host_pebs_data_cfg, false);
+ } else if (data && ((data & PEBS_OUTPUT_MASK) == PEBS_OUTPUT_PT)) {
+ intel_pmu_set_reload_counter(pmu_to_vcpu(pmu), data, true);

+ rdmsrl_safe(MSR_PEBS_DATA_CFG, &host_pebs_data_cfg);
+ add_atomic_switch_msr(vmx, MSR_PEBS_DATA_CFG,
+ pmu->pebs_data_cfg, host_pebs_data_cfg, false);
} else {
clear_atomic_switch_msr(vmx, MSR_IA32_DS_AREA);
clear_atomic_switch_msr(vmx, MSR_PEBS_DATA_CFG);
+
+ if (pmu->has_pebs_via_pt)
+ intel_pmu_set_reload_counter(pmu_to_vcpu(pmu),
+ data, false);
}

pmu->pebs_enable = data;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index ea899e7..f185144 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -28,7 +28,7 @@
#define NR_SHARED_MSRS 4
#endif

-#define NR_LOADSTORE_MSRS 8
+#define NR_LOADSTORE_MSRS 16

struct vmx_msrs {
unsigned int nr;
--
1.8.3.1