Starting with Zen4, core PMU on AMD platforms such as Genoa and
Ryzen-7000 will support PerfMonV2, and it is also compatible with
legacy PERFCTR_CORE behavior and msr addresses.
If you don't have access to the hardware specification, the commits
d6d0c7f681fd..7685665c390d for host perf can also bring a quick
overview. Its main change is the addition of three msr's equivalent
to Intel V2, namely global_ctrl, global_status, global_status_clear.
It is worth noting that this feature is very attractive for reducing the
overhead of PMU virtualization, since multiple msr accesses to multiple
counters will be replaced by a single access to the global register,
plus more accuracy gain when multiple guest counters are used.
The KVM part is based on the latest vPMU fixes patch set [1], while
the kvm-unit-test patch relies on preemptive test cases effort [2] for
testing leagcy AMD vPMU, which didn't exist before.
All related testcases are passed on a Genoa box.
Please feel free to run more tests, add more or share comments.
[1] https://lore.kernel.org/kvm/[email protected]/
[2] https://lore.kernel.org/kvm/[email protected]/
Like Xu (3):
KVM: x86/svm/pmu: Limit the maximum number of supported GP counters
KVM: x86/pmu: Make part of the Intel v2 PMU MSRs handling x86 generic
KVM: x86/svm/pmu: Add AMD PerfMonV2 support
Sandipan Das (1):
KVM: x86/cpuid: Add AMD CPUID ExtPerfMonAndDbg leaf 0x80000022
arch/x86/include/asm/kvm-x86-pmu-ops.h | 1 -
arch/x86/include/asm/perf_event.h | 8 ++++
arch/x86/kvm/cpuid.c | 21 +++++++-
arch/x86/kvm/pmu.c | 61 ++++++++++++++++++++++--
arch/x86/kvm/pmu.h | 32 ++++++++++++-
arch/x86/kvm/svm/pmu.c | 66 +++++++++++++++++---------
arch/x86/kvm/vmx/pmu_intel.c | 58 +---------------------
arch/x86/kvm/x86.c | 13 +++++
8 files changed, 173 insertions(+), 87 deletions(-)
--
2.37.3
From: Like Xu <[email protected]>
The AMD PerfMonV2 defines three registers similar to part of the
Intel v2 PMU registers, including the GLOBAL_CTRL, GLOBAL_STATUS
and GLOBAL_OVF_CTRL MSRs. For better code reuse, this specific
part of the handling can be extracted to make it generic for X86.
The new non-prefix pmc_is_enabled() works well as legacy AMD vPMU
version is indexeqd as 1. Note that the specific *_is_valid_msr will
continue to be used to avoid cross-vendor msr access.
Signed-off-by: Like Xu <[email protected]>
---
arch/x86/include/asm/kvm-x86-pmu-ops.h | 1 -
arch/x86/kvm/pmu.c | 55 +++++++++++++++++++++---
arch/x86/kvm/pmu.h | 30 ++++++++++++-
arch/x86/kvm/svm/pmu.c | 9 ----
arch/x86/kvm/vmx/pmu_intel.c | 58 +-------------------------
5 files changed, 80 insertions(+), 73 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h
index c17e3e96fc1d..6c98f4bb4228 100644
--- a/arch/x86/include/asm/kvm-x86-pmu-ops.h
+++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h
@@ -13,7 +13,6 @@ BUILD_BUG_ON(1)
* at the call sites.
*/
KVM_X86_PMU_OP(hw_event_available)
-KVM_X86_PMU_OP(pmc_is_enabled)
KVM_X86_PMU_OP(pmc_idx_to_pmc)
KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
KVM_X86_PMU_OP(msr_idx_to_pmc)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 3c42df3a55ff..7002e1b74108 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -83,11 +83,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}
-static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
-{
- return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc);
-}
-
static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
{
struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
@@ -455,11 +450,61 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ u32 msr = msr_info->index;
+
+ switch (msr) {
+ case MSR_CORE_PERF_GLOBAL_STATUS:
+ msr_info->data = pmu->global_status;
+ return 0;
+ case MSR_CORE_PERF_GLOBAL_CTRL:
+ msr_info->data = pmu->global_ctrl;
+ return 0;
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ msr_info->data = 0;
+ return 0;
+ default:
+ break;
+ }
+
return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info);
}
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ u32 msr = msr_info->index;
+ u64 data = msr_info->data;
+ u64 diff;
+
+ switch (msr) {
+ case MSR_CORE_PERF_GLOBAL_STATUS:
+ if (msr_info->host_initiated) {
+ pmu->global_status = data;
+ return 0;
+ }
+ break; /* RO MSR */
+ case MSR_CORE_PERF_GLOBAL_CTRL:
+ if (pmu->global_ctrl == data)
+ return 0;
+ if (kvm_valid_perf_global_ctrl(pmu, data)) {
+ diff = pmu->global_ctrl ^ data;
+ pmu->global_ctrl = data;
+ reprogram_counters(pmu, diff);
+ return 0;
+ }
+ break;
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ if (!(data & pmu->global_ovf_ctrl_mask)) {
+ if (!msr_info->host_initiated)
+ pmu->global_status &= ~data;
+ return 0;
+ }
+ break;
+ default:
+ break;
+ }
+
kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info);
}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index e3a3813b6a38..3f9823b503fb 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -28,7 +28,6 @@ struct kvm_event_hw_type_mapping {
struct kvm_pmu_ops {
bool (*hw_event_available)(struct kvm_pmc *pmc);
- bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
unsigned int idx, u64 *mask);
@@ -191,6 +190,35 @@ static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc)
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
}
+/*
+ * Check if a PMC is enabled by comparing it against global_ctrl bits.
+ *
+ * If the current version of vPMU doesn't have global_ctrl MSR,
+ * all vPMCs are enabled (return TRUE).
+ */
+static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
+{
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+
+ if (pmu->version < 2)
+ return true;
+
+ return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
+}
+
+static inline void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
+{
+ int bit;
+
+ if (!diff)
+ return;
+
+ for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
+ __set_bit(bit, pmu->reprogram_pmi);
+
+ kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu));
+}
+
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index f99f2c869664..3a20972e9f1a 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -76,14 +76,6 @@ static bool amd_hw_event_available(struct kvm_pmc *pmc)
return true;
}
-/* check if a PMC is enabled by comparing it against global_ctrl bits. Because
- * AMD CPU doesn't have global_ctrl MSR, all PMCs are enabled (return TRUE).
- */
-static bool amd_pmc_is_enabled(struct kvm_pmc *pmc)
-{
- return true;
-}
-
static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -218,7 +210,6 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmu_ops amd_pmu_ops __initdata = {
.hw_event_available = amd_hw_event_available,
- .pmc_is_enabled = amd_pmc_is_enabled,
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
.rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
.msr_idx_to_pmc = amd_msr_idx_to_pmc,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 1d3d0bd3e0e7..cfc6de706bf4 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -68,18 +68,6 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
}
}
-static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
-{
- int bit;
- struct kvm_pmc *pmc;
-
- for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) {
- pmc = intel_pmc_idx_to_pmc(pmu, bit);
- if (pmc)
- kvm_pmu_request_counter_reprogam(pmc);
- }
-}
-
static bool intel_hw_event_available(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -102,17 +90,6 @@ static bool intel_hw_event_available(struct kvm_pmc *pmc)
return true;
}
-/* check if a PMC is enabled by comparing it with globl_ctrl bits. */
-static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
-{
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-
- if (!intel_pmu_has_perf_global_ctrl(pmu))
- return true;
-
- return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
-}
-
static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -347,15 +324,6 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_CORE_PERF_FIXED_CTR_CTRL:
msr_info->data = pmu->fixed_ctr_ctrl;
return 0;
- case MSR_CORE_PERF_GLOBAL_STATUS:
- msr_info->data = pmu->global_status;
- return 0;
- case MSR_CORE_PERF_GLOBAL_CTRL:
- msr_info->data = pmu->global_ctrl;
- return 0;
- case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- msr_info->data = 0;
- return 0;
case MSR_IA32_PEBS_ENABLE:
msr_info->data = pmu->pebs_enable;
return 0;
@@ -404,29 +372,6 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 0;
}
break;
- case MSR_CORE_PERF_GLOBAL_STATUS:
- if (msr_info->host_initiated) {
- pmu->global_status = data;
- return 0;
- }
- break; /* RO MSR */
- case MSR_CORE_PERF_GLOBAL_CTRL:
- if (pmu->global_ctrl == data)
- return 0;
- if (kvm_valid_perf_global_ctrl(pmu, data)) {
- diff = pmu->global_ctrl ^ data;
- pmu->global_ctrl = data;
- reprogram_counters(pmu, diff);
- return 0;
- }
- break;
- case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- if (!(data & pmu->global_ovf_ctrl_mask)) {
- if (!msr_info->host_initiated)
- pmu->global_status &= ~data;
- return 0;
- }
- break;
case MSR_IA32_PEBS_ENABLE:
if (pmu->pebs_enable == data)
return 0;
@@ -783,7 +728,7 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
pmc = intel_pmc_idx_to_pmc(pmu, bit);
if (!pmc || !pmc_speculative_in_use(pmc) ||
- !intel_pmc_is_enabled(pmc) || !pmc->perf_event)
+ !pmc_is_enabled(pmc) || !pmc->perf_event)
continue;
hw_idx = pmc->perf_event->hw.idx;
@@ -795,7 +740,6 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
struct kvm_pmu_ops intel_pmu_ops __initdata = {
.hw_event_available = intel_hw_event_available,
- .pmc_is_enabled = intel_pmc_is_enabled,
.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
.msr_idx_to_pmc = intel_msr_idx_to_pmc,
--
2.37.3
From: Like Xu <[email protected]>
Specifying an unsupported PMC encoding will cause a #GP(0).
All testcases should be passed when the KVM_FEP prefix is added.
Signed-off-by: Like Xu <[email protected]>
---
lib/x86/processor.h | 5 ++++-
x86/pmu.c | 13 +++++++++++++
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index 10bca27..9c490d9 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -441,7 +441,10 @@ static inline int wrmsr_safe(u32 index, u64 val)
static inline uint64_t rdpmc(uint32_t index)
{
uint32_t a, d;
- asm volatile ("rdpmc" : "=a"(a), "=d"(d) : "c"(index));
+ if (is_fep_available())
+ asm volatile (KVM_FEP "rdpmc" : "=a"(a), "=d"(d) : "c"(index));
+ else
+ asm volatile ("rdpmc" : "=a"(a), "=d"(d) : "c"(index));
return a | ((uint64_t)d << 32);
}
diff --git a/x86/pmu.c b/x86/pmu.c
index 203a9d4..11607c0 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -758,12 +758,25 @@ static bool pmu_is_detected(void)
return detect_intel_pmu();
}
+static void rdpmc_unsupported_counter(void *data)
+{
+ rdpmc(64);
+}
+
+static void check_rdpmc_cause_gp(void)
+{
+ report(test_for_exception(GP_VECTOR, rdpmc_unsupported_counter, NULL),
+ "rdpmc with invalid PMC index raises #GP");
+}
+
int main(int ac, char **av)
{
setup_vm();
handle_irq(PC_VECTOR, cnt_overflow);
buf = malloc(N*64);
+ check_rdpmc_cause_gp();
+
if (!pmu_is_detected())
return report_summary();
--
2.37.3
From: Like Xu <[email protected]>
If AMD Performance Monitoring Version 2 (PerfMonV2) is detected
by the guest, it can use a new scheme to manage the Core PMCs using
the new global control and status registers.
In addition to benefiting from the PerfMonV2 functionality in the same
way as the host (higher precision), the guest also can reduce the number
of vm-exits by lowering the total number of MSRs accesses.
In terms of implementation details, amd_is_valid_msr() is resurrected
since three newly added MSRs could not be mapped to one vPMC.
The possibility of emulating PerfMonV2 on the mainframe has also
been eliminated for reasons of precision.
Co-developed-by: Sandipan Das <[email protected]>
Signed-off-by: Sandipan Das <[email protected]>
Signed-off-by: Like Xu <[email protected]>
---
arch/x86/kvm/pmu.c | 6 +++++
arch/x86/kvm/svm/pmu.c | 50 +++++++++++++++++++++++++++++++++---------
arch/x86/kvm/x86.c | 11 ++++++++++
3 files changed, 57 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 7002e1b74108..56b4f898a246 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -455,12 +455,15 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_CORE_PERF_GLOBAL_STATUS:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
msr_info->data = pmu->global_status;
return 0;
case MSR_CORE_PERF_GLOBAL_CTRL:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
msr_info->data = pmu->global_ctrl;
return 0;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
msr_info->data = 0;
return 0;
default:
@@ -479,12 +482,14 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_CORE_PERF_GLOBAL_STATUS:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
if (msr_info->host_initiated) {
pmu->global_status = data;
return 0;
}
break; /* RO MSR */
case MSR_CORE_PERF_GLOBAL_CTRL:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
if (pmu->global_ctrl == data)
return 0;
if (kvm_valid_perf_global_ctrl(pmu, data)) {
@@ -495,6 +500,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
if (!(data & pmu->global_ovf_ctrl_mask)) {
if (!msr_info->host_initiated)
pmu->global_status &= ~data;
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 3a20972e9f1a..4c7d408e3caa 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -92,12 +92,6 @@ static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
}
-static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
-{
- /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */
- return false;
-}
-
static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -109,6 +103,29 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
return pmc;
}
+static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
+ switch (msr) {
+ case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
+ return pmu->version > 0;
+ case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
+ return guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE);
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
+ return pmu->version > 1;
+ default:
+ if (msr > MSR_F15H_PERF_CTR5 &&
+ msr < MSR_F15H_PERF_CTL0 + 2 * KVM_AMD_PMC_MAX_GENERIC)
+ return pmu->version > 1;
+ break;
+ }
+
+ return amd_msr_idx_to_pmc(vcpu, msr);
+}
+
static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -162,20 +179,31 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_cpuid_entry2 *entry;
+ union cpuid_0x80000022_ebx ebx;
- if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
+ pmu->version = 1;
+ entry = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0);
+ if (kvm_pmu_cap.version > 1 && entry && (entry->eax & BIT(0))) {
+ pmu->version = 2;
+ ebx.full = entry->ebx;
+ pmu->nr_arch_gp_counters = min3((unsigned int)ebx.split.num_core_pmc,
+ (unsigned int)kvm_pmu_cap.num_counters_gp,
+ (unsigned int)KVM_AMD_PMC_MAX_GENERIC);
+ pmu->global_ctrl_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1);
+ pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask;
+ } else if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE;
- else
+ } else {
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
+ }
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
pmu->reserved_bits = 0xfffffff000280000ull;
pmu->raw_event_mask = AMD64_RAW_EVENT_MASK;
- pmu->version = 1;
/* not applicable to AMD; but clean them to prevent any fall out */
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->nr_arch_fixed_counters = 0;
- pmu->global_status = 0;
bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
}
@@ -206,6 +234,8 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
pmc_stop_counter(pmc);
pmc->counter = pmc->prev_counter = pmc->eventsel = 0;
}
+
+ pmu->global_ctrl = pmu->global_status = 0;
}
struct kvm_pmu_ops amd_pmu_ops __initdata = {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b9738efd8425..96bb01c5eab8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1424,6 +1424,11 @@ static const u32 msrs_to_save_all[] = {
MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
+
+ MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
+ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
+ MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
+
MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
@@ -3856,6 +3861,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_DS_AREA:
case MSR_PEBS_DATA_CFG:
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
/*
@@ -3959,6 +3967,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_DS_AREA:
case MSR_PEBS_DATA_CFG:
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
+ case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info);
/*
--
2.37.3
From: Like Xu <[email protected]>
The AMD PerfMonV2 specification allows for a maximum of 16 GP counters,
which is clearly not supported with zero code effort in the current KVM.
A local macro (named like INTEL_PMC_MAX_GENERIC) is introduced to
take back control of this virt capability, which also makes it easier to
statically partition all available counters between hosts and guests.
Signed-off-by: Like Xu <[email protected]>
---
arch/x86/kvm/pmu.h | 2 ++
arch/x86/kvm/svm/pmu.c | 7 ++++---
arch/x86/kvm/x86.c | 2 ++
3 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 847e7112a5d3..e3a3813b6a38 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -18,6 +18,8 @@
#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
+#define KVM_AMD_PMC_MAX_GENERIC AMD64_NUM_COUNTERS_CORE
+
struct kvm_event_hw_type_mapping {
u8 eventsel;
u8 unit_mask;
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 2ec420b85d6a..f99f2c869664 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
+ BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > KVM_AMD_PMC_MAX_GENERIC);
+ BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
- for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
+ for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
@@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
+ for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
struct kvm_pmc *pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 43a6a7efc6ec..b9738efd8425 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1444,12 +1444,14 @@ static const u32 msrs_to_save_all[] = {
MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
+ /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */
MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
+
MSR_IA32_XFD, MSR_IA32_XFD_ERR,
};
--
2.37.3
From: Sandipan Das <[email protected]>
CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
new performance monitoring features for AMD processors.
Bit 0 of EAX indicates support for Performance Monitoring
Version 2 (PerfMonV2) features. If found to be set during
PMU initialization, the EBX bits of the same CPUID function
can be used to determine the number of available PMCs for
different PMU types.
Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
that guests can make use of the PerfMonV2 features.
Co-developed-by: Like Xu <[email protected]>
Signed-off-by: Like Xu <[email protected]>
Signed-off-by: Sandipan Das <[email protected]>
---
arch/x86/include/asm/perf_event.h | 8 ++++++++
arch/x86/kvm/cpuid.c | 21 ++++++++++++++++++++-
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f6fc8dd51ef4..c848f504e467 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
unsigned int full;
};
+union cpuid_0x80000022_eax {
+ struct {
+ /* Performance Monitoring Version 2 Supported */
+ unsigned int perfmon_v2:1;
+ } split;
+ unsigned int full;
+};
+
struct x86_pmu_capability {
int version;
int num_counters_gp;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 75dcf7a72605..08a29ab096d2 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->edx = 0;
break;
case 0x80000000:
- entry->eax = min(entry->eax, 0x80000021);
+ entry->eax = min(entry->eax, 0x80000022);
/*
* Serializing LFENCE is reported in a multitude of ways, and
* NullSegClearsBase is not reported in CPUID on Zen2; help
@@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
entry->eax |= BIT(6);
break;
+ /* AMD Extended Performance Monitoring and Debug */
+ case 0x80000022: {
+ union cpuid_0x80000022_eax eax;
+ union cpuid_0x80000022_ebx ebx;
+
+ entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+ if (!enable_pmu)
+ break;
+
+ if (kvm_pmu_cap.version > 1) {
+ /* AMD PerfMon is only supported up to V2 in the KVM. */
+ eax.split.perfmon_v2 = 1;
+ ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
+ KVM_AMD_PMC_MAX_GENERIC);
+ }
+ entry->eax = eax.full;
+ entry->ebx = ebx.full;
+ break;
+ }
/*Add support for Centaur's CPUID instruction*/
case 0xC0000000:
/*Just support up to 0xC0000004 now*/
--
2.37.3
From: Like Xu <[email protected]>
Updated test cases to cover KVM enabling code for AMD Guest PerfMonV2.
The Intel-specific PMU helpers were added to check for AMD cpuid, and
some of the same semantics of MSRs were assigned during the initialization
phase. The vast majority of pmu test cases are reused seamlessly.
On some x86 machines (AMD only), even with retired events, the same
workload is measured repeatedly and the number of events collected is
erratic, which essentially reflects the details of hardware implementation,
and from a software perspective, the type of event is an unprecise event,
which brings a tolerance check in the counter overflow testcases.
Signed-off-by: Like Xu <[email protected]>
---
lib/x86/msr.h | 5 ++++
lib/x86/processor.h | 9 ++++++-
x86/pmu.c | 61 ++++++++++++++++++++++++++++++++-------------
3 files changed, 56 insertions(+), 19 deletions(-)
diff --git a/lib/x86/msr.h b/lib/x86/msr.h
index 5f16a58..6f31155 100644
--- a/lib/x86/msr.h
+++ b/lib/x86/msr.h
@@ -419,6 +419,11 @@
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+/* AMD Performance Counter Global Status and Control MSRs */
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
+#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+
/* Geode defined MSRs */
#define MSR_GEODE_BUSCONT_CONF0 0x00001900
diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index 9c490d9..b9592c4 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -796,8 +796,12 @@ static inline void flush_tlb(void)
static inline u8 pmu_version(void)
{
- if (!is_intel())
+ if (!is_intel()) {
+ /* Performance Monitoring Version 2 Supported */
+ if (cpuid(0x80000022).a & 0x1)
+ return 2;
return 0;
+ }
return cpuid(10).a & 0xff;
}
@@ -824,6 +828,9 @@ static inline u8 pmu_nr_gp_counters(void)
{
if (is_intel()) {
return (cpuid(10).a >> 8) & 0xff;
+ } else if (this_cpu_has_perf_global_ctrl()) {
+ /* Number of Core Performance Counters. */
+ return cpuid(0x80000022).b & 0xf;
} else if (!has_amd_perfctr_core()) {
return AMD64_NUM_COUNTERS;
}
diff --git a/x86/pmu.c b/x86/pmu.c
index 11607c0..6d5363b 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -72,6 +72,9 @@ struct pmu_event {
#define PMU_CAP_FW_WRITES (1ULL << 13)
static u32 gp_counter_base;
static u32 gp_select_base;
+static u32 global_status_msr;
+static u32 global_ctl_msr;
+static u32 global_status_clr_msr;
static unsigned int gp_events_size;
static unsigned int nr_gp_counters;
@@ -150,8 +153,7 @@ static void global_enable(pmu_counter_t *cnt)
return;
cnt->idx = event_to_global_idx(cnt);
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) |
- (1ull << cnt->idx));
+ wrmsr(global_ctl_msr, rdmsr(global_ctl_msr) | (1ull << cnt->idx));
}
static void global_disable(pmu_counter_t *cnt)
@@ -159,8 +161,7 @@ static void global_disable(pmu_counter_t *cnt)
if (pmu_version() < 2)
return;
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) &
- ~(1ull << cnt->idx));
+ wrmsr(global_ctl_msr, rdmsr(global_ctl_msr) & ~(1ull << cnt->idx));
}
static inline uint32_t get_gp_counter_msr(unsigned int i)
@@ -326,6 +327,23 @@ static void check_counters_many(void)
report(i == n, "all counters");
}
+static bool is_the_count_reproducible(pmu_counter_t *cnt)
+{
+ unsigned int i;
+ uint64_t count;
+
+ __measure(cnt, 0);
+ count = cnt->count;
+
+ for (i = 0; i < 10; i++) {
+ __measure(cnt, 0);
+ if (count != cnt->count)
+ return false;
+ }
+
+ return true;
+}
+
static void check_counter_overflow(void)
{
uint64_t count;
@@ -334,13 +352,14 @@ static void check_counter_overflow(void)
.ctr = gp_counter_base,
.config = EVNTSEL_OS | EVNTSEL_USR | (*gp_events)[1].unit_sel /* instructions */,
};
+ bool precise_event = is_the_count_reproducible(&cnt);
+
__measure(&cnt, 0);
count = cnt.count;
/* clear status before test */
if (pmu_version() > 1) {
- wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL,
- rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
+ wrmsr(global_status_clr_msr, rdmsr(global_status_msr));
}
report_prefix_push("overflow");
@@ -373,7 +392,7 @@ static void check_counter_overflow(void)
__measure(&cnt, cnt.count);
report(check_irq() == (i % 2), "irq-%d", i);
- if (pmu_version() > 1)
+ if (precise_event)
report(cnt.count == 1, "cntr-%d", i);
else
report(cnt.count < 4, "cntr-%d", i);
@@ -381,10 +400,10 @@ static void check_counter_overflow(void)
if (pmu_version() < 2)
continue;
- status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
+ status = rdmsr(global_status_msr);
report(status & (1ull << idx), "status-%d", i);
- wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status);
- status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
+ wrmsr(global_status_clr_msr, status);
+ status = rdmsr(global_status_msr);
report(!(status & (1ull << idx)), "status clear-%d", i);
}
@@ -492,8 +511,7 @@ static void check_running_counter_wrmsr(void)
/* clear status before overflow test */
if (pmu_version() > 1) {
- wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL,
- rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
+ wrmsr(global_status_clr_msr, rdmsr(global_status_msr));
}
start_event(&evt);
@@ -508,7 +526,7 @@ static void check_running_counter_wrmsr(void)
stop_event(&evt);
if (pmu_version() > 1) {
- status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
+ status = rdmsr(global_status_msr);
report(status & 1, "status");
}
@@ -532,8 +550,7 @@ static void check_emulated_instr(void)
report_prefix_push("emulated instruction");
if (pmu_version() > 1) {
- wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL,
- rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
+ wrmsr(global_status_clr_msr, rdmsr(global_status_msr));
}
start_event(&brnch_cnt);
@@ -576,7 +593,7 @@ static void check_emulated_instr(void)
: "eax", "ebx", "ecx", "edx");
if (pmu_version() > 1)
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(global_ctl_msr, 0);
stop_event(&brnch_cnt);
stop_event(&instr_cnt);
@@ -590,7 +607,7 @@ static void check_emulated_instr(void)
if (pmu_version() > 1) {
// Additionally check that those counters overflowed properly.
- status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
+ status = rdmsr(global_status_msr);
report(status & 1, "instruction counter overflow");
report(status & 2, "branch counter overflow");
}
@@ -679,7 +696,7 @@ static void set_ref_cycle_expectations(void)
if (!nr_gp_counters || !pmu_gp_counter_is_available(2))
return;
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(global_ctl_msr, 0);
t0 = fenced_rdtsc();
start_event(&cnt);
@@ -722,6 +739,10 @@ static bool detect_intel_pmu(void)
gp_counter_base = MSR_IA32_PERFCTR0;
gp_select_base = MSR_P6_EVNTSEL0;
+ global_status_msr = MSR_CORE_PERF_GLOBAL_STATUS;
+ global_ctl_msr = MSR_CORE_PERF_GLOBAL_CTRL;
+ global_status_clr_msr = MSR_CORE_PERF_GLOBAL_OVF_CTRL;
+
report_prefix_push("Intel");
return true;
}
@@ -746,6 +767,10 @@ static bool detect_amd_pmu(void)
gp_counter_base = MSR_F15H_PERF_CTR0;
gp_select_base = MSR_F15H_PERF_CTL0;
+ global_status_msr = MSR_AMD64_PERF_CNTR_GLOBAL_STATUS;
+ global_ctl_msr = MSR_AMD64_PERF_CNTR_GLOBAL_CTL;
+ global_status_clr_msr = MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR;
+
report_prefix_push("AMD");
return true;
}
--
2.37.3
On Mon, Sep 5, 2022 at 5:45 AM Like Xu <[email protected]> wrote:
>
> From: Like Xu <[email protected]>
>
> The AMD PerfMonV2 specification allows for a maximum of 16 GP counters,
> which is clearly not supported with zero code effort in the current KVM.
>
> A local macro (named like INTEL_PMC_MAX_GENERIC) is introduced to
> take back control of this virt capability, which also makes it easier to
> statically partition all available counters between hosts and guests.
>
> Signed-off-by: Like Xu <[email protected]>
> ---
> arch/x86/kvm/pmu.h | 2 ++
> arch/x86/kvm/svm/pmu.c | 7 ++++---
> arch/x86/kvm/x86.c | 2 ++
> 3 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index 847e7112a5d3..e3a3813b6a38 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -18,6 +18,8 @@
> #define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
> #define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
>
> +#define KVM_AMD_PMC_MAX_GENERIC AMD64_NUM_COUNTERS_CORE
> +
> struct kvm_event_hw_type_mapping {
> u8 eventsel;
> u8 unit_mask;
> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
> index 2ec420b85d6a..f99f2c869664 100644
> --- a/arch/x86/kvm/svm/pmu.c
> +++ b/arch/x86/kvm/svm/pmu.c
> @@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> int i;
>
> - BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
> + BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > KVM_AMD_PMC_MAX_GENERIC);
> + BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
>
> - for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
> + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
> pmu->gp_counters[i].type = KVM_PMC_GP;
> pmu->gp_counters[i].vcpu = vcpu;
> pmu->gp_counters[i].idx = i;
> @@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> int i;
>
> - for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
> + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
> struct kvm_pmc *pmc = &pmu->gp_counters[i];
>
> pmc_stop_counter(pmc);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 43a6a7efc6ec..b9738efd8425 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1444,12 +1444,14 @@ static const u32 msrs_to_save_all[] = {
> MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
IIRC, the effective maximum on the Intel side is 18, despite what
INTEL_PMC_MAX_GENERIC says, due to collisions with other existing MSR
indices. That's why the Intel list stops here. Should we introduce a
KVM_INTEL_PMC_MAX_GENERIC as well?
> MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
>
> + /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */
Perhaps the comment above should be moved down two lines, since the
next two lines deal with the legacy counters.
> MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
> MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
> MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
> MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
> MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
> MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
At some point, we may want to consider populating the PMU MSR list
dynamically, rather than statically enumerating all of them (for both
AMD and Intel).
Reviewed-by: Jim Mattson <[email protected]>
On Mon, Sep 5, 2022 at 5:45 AM Like Xu <[email protected]> wrote:
>
> From: Sandipan Das <[email protected]>
>
> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
> new performance monitoring features for AMD processors.
>
> Bit 0 of EAX indicates support for Performance Monitoring
> Version 2 (PerfMonV2) features. If found to be set during
> PMU initialization, the EBX bits of the same CPUID function
> can be used to determine the number of available PMCs for
> different PMU types.
>
> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
> that guests can make use of the PerfMonV2 features.
>
> Co-developed-by: Like Xu <[email protected]>
> Signed-off-by: Like Xu <[email protected]>
> Signed-off-by: Sandipan Das <[email protected]>
> ---
> arch/x86/include/asm/perf_event.h | 8 ++++++++
> arch/x86/kvm/cpuid.c | 21 ++++++++++++++++++++-
> 2 files changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index f6fc8dd51ef4..c848f504e467 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
> unsigned int full;
> };
>
> +union cpuid_0x80000022_eax {
> + struct {
> + /* Performance Monitoring Version 2 Supported */
> + unsigned int perfmon_v2:1;
> + } split;
> + unsigned int full;
> +};
> +
> struct x86_pmu_capability {
> int version;
> int num_counters_gp;
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 75dcf7a72605..08a29ab096d2 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> entry->edx = 0;
> break;
> case 0x80000000:
> - entry->eax = min(entry->eax, 0x80000021);
> + entry->eax = min(entry->eax, 0x80000022);
> /*
> * Serializing LFENCE is reported in a multitude of ways, and
> * NullSegClearsBase is not reported in CPUID on Zen2; help
> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
> entry->eax |= BIT(6);
> break;
> + /* AMD Extended Performance Monitoring and Debug */
> + case 0x80000022: {
> + union cpuid_0x80000022_eax eax;
> + union cpuid_0x80000022_ebx ebx;
> +
> + entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
> + if (!enable_pmu)
> + break;
> +
> + if (kvm_pmu_cap.version > 1) {
> + /* AMD PerfMon is only supported up to V2 in the KVM. */
> + eax.split.perfmon_v2 = 1;
> + ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
> + KVM_AMD_PMC_MAX_GENERIC);
Note that the number of core PMCs has to be at least 6 if
guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
could claim fewer, but the first 6 PMCs must work, per the v1 PMU
spec. That is, software that knows about PERFCTR_CORE, but not about
PMU v2, can rightfully expect 6 PMCs.
> + }
> + entry->eax = eax.full;
> + entry->ebx = ebx.full;
> + break;
> + }
> /*Add support for Centaur's CPUID instruction*/
> case 0xC0000000:
> /*Just support up to 0xC0000004 now*/
> --
> 2.37.3
>
On Mon, Sep 5, 2022 at 5:44 AM Like Xu <[email protected]> wrote:
>
> From: Like Xu <[email protected]>
>
> If AMD Performance Monitoring Version 2 (PerfMonV2) is detected
> by the guest, it can use a new scheme to manage the Core PMCs using
> the new global control and status registers.
>
> In addition to benefiting from the PerfMonV2 functionality in the same
> way as the host (higher precision), the guest also can reduce the number
> of vm-exits by lowering the total number of MSRs accesses.
>
> In terms of implementation details, amd_is_valid_msr() is resurrected
> since three newly added MSRs could not be mapped to one vPMC.
> The possibility of emulating PerfMonV2 on the mainframe has also
> been eliminated for reasons of precision.
>
> Co-developed-by: Sandipan Das <[email protected]>
> Signed-off-by: Sandipan Das <[email protected]>
> Signed-off-by: Like Xu <[email protected]>
> ---
> arch/x86/kvm/pmu.c | 6 +++++
> arch/x86/kvm/svm/pmu.c | 50 +++++++++++++++++++++++++++++++++---------
> arch/x86/kvm/x86.c | 11 ++++++++++
> 3 files changed, 57 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 7002e1b74108..56b4f898a246 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -455,12 +455,15 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>
> switch (msr) {
> case MSR_CORE_PERF_GLOBAL_STATUS:
> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
> msr_info->data = pmu->global_status;
> return 0;
> case MSR_CORE_PERF_GLOBAL_CTRL:
> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
> msr_info->data = pmu->global_ctrl;
> return 0;
> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
> msr_info->data = 0;
> return 0;
> default:
> @@ -479,12 +482,14 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>
> switch (msr) {
> case MSR_CORE_PERF_GLOBAL_STATUS:
> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
> if (msr_info->host_initiated) {
> pmu->global_status = data;
> return 0;
> }
> break; /* RO MSR */
> case MSR_CORE_PERF_GLOBAL_CTRL:
> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
> if (pmu->global_ctrl == data)
> return 0;
> if (kvm_valid_perf_global_ctrl(pmu, data)) {
> @@ -495,6 +500,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> }
> break;
> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
> if (!(data & pmu->global_ovf_ctrl_mask)) {
> if (!msr_info->host_initiated)
> pmu->global_status &= ~data;
> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
> index 3a20972e9f1a..4c7d408e3caa 100644
> --- a/arch/x86/kvm/svm/pmu.c
> +++ b/arch/x86/kvm/svm/pmu.c
> @@ -92,12 +92,6 @@ static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
> return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
> }
>
> -static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
> -{
> - /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */
> - return false;
> -}
> -
> static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
> {
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> @@ -109,6 +103,29 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
> return pmc;
> }
>
> +static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
> +{
> + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +
> + switch (msr) {
> + case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
> + return pmu->version > 0;
> + case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
> + return guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE);
> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
> + return pmu->version > 1;
> + default:
> + if (msr > MSR_F15H_PERF_CTR5 &&
> + msr < MSR_F15H_PERF_CTL0 + 2 * KVM_AMD_PMC_MAX_GENERIC)
> + return pmu->version > 1;
Should this be bounded by guest CPUID.80000022H:EBX[NumCorePmc]
(unless host-initiated)?
> + break;
> + }
> +
> + return amd_msr_idx_to_pmc(vcpu, msr);
> +}
> +
> static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> {
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> @@ -162,20 +179,31 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
> {
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> + struct kvm_cpuid_entry2 *entry;
> + union cpuid_0x80000022_ebx ebx;
>
> - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
> + pmu->version = 1;
> + entry = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0);
> + if (kvm_pmu_cap.version > 1 && entry && (entry->eax & BIT(0))) {
> + pmu->version = 2;
> + ebx.full = entry->ebx;
> + pmu->nr_arch_gp_counters = min3((unsigned int)ebx.split.num_core_pmc,
> + (unsigned int)kvm_pmu_cap.num_counters_gp,
> + (unsigned int)KVM_AMD_PMC_MAX_GENERIC);
> + pmu->global_ctrl_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1);
> + pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask;
> + } else if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
> pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE;
The logic above doesn't seem quite right, since guest_cpuid_has(vcpu,
X86_FEATURE_PERFCTR_CORE) promises 6 PMCs, regardless of what
CPUID.80000022 says.
On 6/9/2022 2:00 am, Jim Mattson wrote:
> On Mon, Sep 5, 2022 at 5:44 AM Like Xu <[email protected]> wrote:
>>
>> From: Like Xu <[email protected]>
>>
>> If AMD Performance Monitoring Version 2 (PerfMonV2) is detected
>> by the guest, it can use a new scheme to manage the Core PMCs using
>> the new global control and status registers.
>>
>> In addition to benefiting from the PerfMonV2 functionality in the same
>> way as the host (higher precision), the guest also can reduce the number
>> of vm-exits by lowering the total number of MSRs accesses.
>>
>> In terms of implementation details, amd_is_valid_msr() is resurrected
>> since three newly added MSRs could not be mapped to one vPMC.
>> The possibility of emulating PerfMonV2 on the mainframe has also
>> been eliminated for reasons of precision.
>>
>> Co-developed-by: Sandipan Das <[email protected]>
>> Signed-off-by: Sandipan Das <[email protected]>
>> Signed-off-by: Like Xu <[email protected]>
>> ---
>> arch/x86/kvm/pmu.c | 6 +++++
>> arch/x86/kvm/svm/pmu.c | 50 +++++++++++++++++++++++++++++++++---------
>> arch/x86/kvm/x86.c | 11 ++++++++++
>> 3 files changed, 57 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
>> index 7002e1b74108..56b4f898a246 100644
>> --- a/arch/x86/kvm/pmu.c
>> +++ b/arch/x86/kvm/pmu.c
>> @@ -455,12 +455,15 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>
>> switch (msr) {
>> case MSR_CORE_PERF_GLOBAL_STATUS:
>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
>> msr_info->data = pmu->global_status;
>> return 0;
>> case MSR_CORE_PERF_GLOBAL_CTRL:
>> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
>> msr_info->data = pmu->global_ctrl;
>> return 0;
>> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
>> msr_info->data = 0;
>> return 0;
>> default:
>> @@ -479,12 +482,14 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>
>> switch (msr) {
>> case MSR_CORE_PERF_GLOBAL_STATUS:
>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
>> if (msr_info->host_initiated) {
>> pmu->global_status = data;
>> return 0;
>> }
>> break; /* RO MSR */
>> case MSR_CORE_PERF_GLOBAL_CTRL:
>> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
>> if (pmu->global_ctrl == data)
>> return 0;
>> if (kvm_valid_perf_global_ctrl(pmu, data)) {
>> @@ -495,6 +500,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> }
>> break;
>> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
>> if (!(data & pmu->global_ovf_ctrl_mask)) {
>> if (!msr_info->host_initiated)
>> pmu->global_status &= ~data;
>> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
>> index 3a20972e9f1a..4c7d408e3caa 100644
>> --- a/arch/x86/kvm/svm/pmu.c
>> +++ b/arch/x86/kvm/svm/pmu.c
>> @@ -92,12 +92,6 @@ static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
>> return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
>> }
>>
>> -static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
>> -{
>> - /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */
>> - return false;
>> -}
>> -
>> static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
>> {
>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>> @@ -109,6 +103,29 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
>> return pmc;
>> }
>>
>> +static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
>> +{
>> + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>> +
>> + switch (msr) {
>> + case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
>> + return pmu->version > 0;
>> + case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
>> + return guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE);
>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
>> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
>> + return pmu->version > 1;
>> + default:
>> + if (msr > MSR_F15H_PERF_CTR5 &&
>> + msr < MSR_F15H_PERF_CTL0 + 2 * KVM_AMD_PMC_MAX_GENERIC)
>> + return pmu->version > 1;
>
> Should this be bounded by guest CPUID.80000022H:EBX[NumCorePmc]
> (unless host-initiated)?
Indeed, how about:
default:
if (msr > MSR_F15H_PERF_CTR5 &&
msr < MSR_F15H_PERF_CTL0 + 2 * pmu->nr_arch_gp_counters)
return pmu->version > 1;
and for host-initiated:
#define MSR_F15H_PERF_MSR_MAX \
(MSR_F15H_PERF_CTR0 + 2 * (KVM_AMD_PMC_MAX_GENERIC - 1))
kvm_{set|get}_msr_common()
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_MSR_MAX:
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
?
>
>> + break;
>> + }
>> +
>> + return amd_msr_idx_to_pmc(vcpu, msr);
>> +}
>> +
>> static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> {
>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>> @@ -162,20 +179,31 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
>> {
>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>> + struct kvm_cpuid_entry2 *entry;
>> + union cpuid_0x80000022_ebx ebx;
>>
>> - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
>> + pmu->version = 1;
>> + entry = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0);
>> + if (kvm_pmu_cap.version > 1 && entry && (entry->eax & BIT(0))) {
>> + pmu->version = 2;
>> + ebx.full = entry->ebx;
>> + pmu->nr_arch_gp_counters = min3((unsigned int)ebx.split.num_core_pmc,
>> + (unsigned int)kvm_pmu_cap.num_counters_gp,
>> + (unsigned int)KVM_AMD_PMC_MAX_GENERIC);
>> + pmu->global_ctrl_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1);
>> + pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask;
>> + } else if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
>> pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE;
>
> The logic above doesn't seem quite right, since guest_cpuid_has(vcpu,
> X86_FEATURE_PERFCTR_CORE) promises 6 PMCs, regardless of what
> CPUID.80000022 says.
I would have expected the appearance of CPUID.80000022 to override PERFCTR_CORE,
now I don't think it's a good idea as you do, so how about:
amd_pmu_refresh():
bool perfctr_core = guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE);
pmu->version = 1;
if (kvm_pmu_cap.version > 1)
entry = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0);
if (!perfctr_core)
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
if (entry && (entry->eax & BIT(0))) {
pmu->version = 2;
ebx.full = entry->ebx;
pmu->nr_arch_gp_counters = min3((unsigned int)ebx.split.num_core_pmc,
(unsigned int)kvm_pmu_cap.num_counters_gp,
(unsigned int)KVM_AMD_PMC_MAX_GENERIC);
}
/* PERFCTR_CORE promises 6 PMCs, regardless of CPUID.80000022 */
if (perfctr_core) {
pmu->nr_arch_gp_counters = max(pmu->nr_arch_gp_counters,
AMD64_NUM_COUNTERS_CORE);
}
if (pmu->version > 1) {
pmu->global_ctrl_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1);
pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask;
}
?
On 6/9/2022 1:26 am, Jim Mattson wrote:
> On Mon, Sep 5, 2022 at 5:45 AM Like Xu <[email protected]> wrote:
>>
>> From: Like Xu <[email protected]>
>>
>> The AMD PerfMonV2 specification allows for a maximum of 16 GP counters,
>> which is clearly not supported with zero code effort in the current KVM.
>>
>> A local macro (named like INTEL_PMC_MAX_GENERIC) is introduced to
>> take back control of this virt capability, which also makes it easier to
>> statically partition all available counters between hosts and guests.
>>
>> Signed-off-by: Like Xu <[email protected]>
>> ---
>> arch/x86/kvm/pmu.h | 2 ++
>> arch/x86/kvm/svm/pmu.c | 7 ++++---
>> arch/x86/kvm/x86.c | 2 ++
>> 3 files changed, 8 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
>> index 847e7112a5d3..e3a3813b6a38 100644
>> --- a/arch/x86/kvm/pmu.h
>> +++ b/arch/x86/kvm/pmu.h
>> @@ -18,6 +18,8 @@
>> #define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
>> #define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
>>
>> +#define KVM_AMD_PMC_MAX_GENERIC AMD64_NUM_COUNTERS_CORE
>> +
>> struct kvm_event_hw_type_mapping {
>> u8 eventsel;
>> u8 unit_mask;
>> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
>> index 2ec420b85d6a..f99f2c869664 100644
>> --- a/arch/x86/kvm/svm/pmu.c
>> +++ b/arch/x86/kvm/svm/pmu.c
>> @@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>> int i;
>>
>> - BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
>> + BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > KVM_AMD_PMC_MAX_GENERIC);
>> + BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
>>
>> - for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
>> + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
>> pmu->gp_counters[i].type = KVM_PMC_GP;
>> pmu->gp_counters[i].vcpu = vcpu;
>> pmu->gp_counters[i].idx = i;
>> @@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>> int i;
>>
>> - for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
>> + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
>> struct kvm_pmc *pmc = &pmu->gp_counters[i];
>>
>> pmc_stop_counter(pmc);
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 43a6a7efc6ec..b9738efd8425 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -1444,12 +1444,14 @@ static const u32 msrs_to_save_all[] = {
>> MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
>
> IIRC, the effective maximum on the Intel side is 18, despite what
> INTEL_PMC_MAX_GENERIC says, due to collisions with other existing MSR
Emm, check https://lore.kernel.org/kvm/[email protected]/
> indices. That's why the Intel list stops here. Should we introduce a
> KVM_INTEL_PMC_MAX_GENERIC as well?
Yes, this suggestion will be applied in the next version.
>
>> MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
>>
>> + /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */
>
> Perhaps the comment above should be moved down two lines, since the
> next two lines deal with the legacy counters.
Applied, thanks.
>
>> MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
>> MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
>> MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
>> MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
>> MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
>> MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
>
> At some point, we may want to consider populating the PMU MSR list
> dynamically, rather than statically enumerating all of them (for both
> AMD and Intel).
The uncertainty of msrs_to_save_all[] may cause troubles for legacy user spaces.
I had draft patches to rewrite pmu msr accesses for host-initiated as first move.
>
> Reviewed-by: Jim Mattson <[email protected]>
On 6/9/2022 1:36 am, Jim Mattson wrote:
> On Mon, Sep 5, 2022 at 5:45 AM Like Xu <[email protected]> wrote:
>>
>> From: Sandipan Das <[email protected]>
>>
>> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
>> new performance monitoring features for AMD processors.
>>
>> Bit 0 of EAX indicates support for Performance Monitoring
>> Version 2 (PerfMonV2) features. If found to be set during
>> PMU initialization, the EBX bits of the same CPUID function
>> can be used to determine the number of available PMCs for
>> different PMU types.
>>
>> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
>> that guests can make use of the PerfMonV2 features.
>>
>> Co-developed-by: Like Xu <[email protected]>
>> Signed-off-by: Like Xu <[email protected]>
>> Signed-off-by: Sandipan Das <[email protected]>
>> ---
>> arch/x86/include/asm/perf_event.h | 8 ++++++++
>> arch/x86/kvm/cpuid.c | 21 ++++++++++++++++++++-
>> 2 files changed, 28 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
>> index f6fc8dd51ef4..c848f504e467 100644
>> --- a/arch/x86/include/asm/perf_event.h
>> +++ b/arch/x86/include/asm/perf_event.h
>> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
>> unsigned int full;
>> };
>>
>> +union cpuid_0x80000022_eax {
>> + struct {
>> + /* Performance Monitoring Version 2 Supported */
>> + unsigned int perfmon_v2:1;
>> + } split;
>> + unsigned int full;
>> +};
>> +
>> struct x86_pmu_capability {
>> int version;
>> int num_counters_gp;
>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> index 75dcf7a72605..08a29ab096d2 100644
>> --- a/arch/x86/kvm/cpuid.c
>> +++ b/arch/x86/kvm/cpuid.c
>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>> entry->edx = 0;
>> break;
>> case 0x80000000:
>> - entry->eax = min(entry->eax, 0x80000021);
>> + entry->eax = min(entry->eax, 0x80000022);
>> /*
>> * Serializing LFENCE is reported in a multitude of ways, and
>> * NullSegClearsBase is not reported in CPUID on Zen2; help
>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>> if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>> entry->eax |= BIT(6);
>> break;
>> + /* AMD Extended Performance Monitoring and Debug */
>> + case 0x80000022: {
>> + union cpuid_0x80000022_eax eax;
>> + union cpuid_0x80000022_ebx ebx;
>> +
>> + entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>> + if (!enable_pmu)
>> + break;
>> +
>> + if (kvm_pmu_cap.version > 1) {
>> + /* AMD PerfMon is only supported up to V2 in the KVM. */
>> + eax.split.perfmon_v2 = 1;
>> + ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>> + KVM_AMD_PMC_MAX_GENERIC);
>
> Note that the number of core PMCs has to be at least 6 if
> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
> spec. That is, software that knows about PERFCTR_CORE, but not about
> PMU v2, can rightfully expect 6 PMCs.
I thought the NumCorePmc number would only make sense if
CPUID.80000022.eax.perfmon_v2
bit was present, but considering that the user space is perfectly fine with just
configuring the
NumCorePmc number without setting perfmon_v2 bit at all, so how about:
/* AMD Extended Performance Monitoring and Debug */
case 0x80000022: {
union cpuid_0x80000022_eax eax;
union cpuid_0x80000022_ebx ebx;
bool perfctr_core;
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
if (!enable_pmu)
break;
perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
if (!perfctr_core)
ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
if (kvm_pmu_cap.version > 1) {
/* AMD PerfMon is only supported up to V2 in the KVM. */
eax.split.perfmon_v2 = 1;
ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
KVM_AMD_PMC_MAX_GENERIC);
}
if (perfctr_core) {
ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
AMD64_NUM_COUNTERS_CORE);
}
entry->eax = eax.full;
entry->ebx = ebx.full;
break;
}
?
Once 0x80000022 appears, ebx.split.num_core_pmc will report only
the real "Number of Core Performance Counters" regardless of perfmon_v2.
>
>
>> + }
>> + entry->eax = eax.full;
>> + entry->ebx = ebx.full;
>> + break;
>> + }
>> /*Add support for Centaur's CPUID instruction*/
>> case 0xC0000000:
>> /*Just support up to 0xC0000004 now*/
>> --
>> 2.37.3
>>
On Tue, Sep 6, 2022 at 5:53 AM Like Xu <[email protected]> wrote:
>
> On 6/9/2022 1:36 am, Jim Mattson wrote:
> > On Mon, Sep 5, 2022 at 5:45 AM Like Xu <[email protected]> wrote:
> >>
> >> From: Sandipan Das <[email protected]>
> >>
> >> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
> >> new performance monitoring features for AMD processors.
> >>
> >> Bit 0 of EAX indicates support for Performance Monitoring
> >> Version 2 (PerfMonV2) features. If found to be set during
> >> PMU initialization, the EBX bits of the same CPUID function
> >> can be used to determine the number of available PMCs for
> >> different PMU types.
> >>
> >> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
> >> that guests can make use of the PerfMonV2 features.
> >>
> >> Co-developed-by: Like Xu <[email protected]>
> >> Signed-off-by: Like Xu <[email protected]>
> >> Signed-off-by: Sandipan Das <[email protected]>
> >> ---
> >> arch/x86/include/asm/perf_event.h | 8 ++++++++
> >> arch/x86/kvm/cpuid.c | 21 ++++++++++++++++++++-
> >> 2 files changed, 28 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> >> index f6fc8dd51ef4..c848f504e467 100644
> >> --- a/arch/x86/include/asm/perf_event.h
> >> +++ b/arch/x86/include/asm/perf_event.h
> >> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
> >> unsigned int full;
> >> };
> >>
> >> +union cpuid_0x80000022_eax {
> >> + struct {
> >> + /* Performance Monitoring Version 2 Supported */
> >> + unsigned int perfmon_v2:1;
> >> + } split;
> >> + unsigned int full;
> >> +};
> >> +
> >> struct x86_pmu_capability {
> >> int version;
> >> int num_counters_gp;
> >> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> >> index 75dcf7a72605..08a29ab096d2 100644
> >> --- a/arch/x86/kvm/cpuid.c
> >> +++ b/arch/x86/kvm/cpuid.c
> >> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> >> entry->edx = 0;
> >> break;
> >> case 0x80000000:
> >> - entry->eax = min(entry->eax, 0x80000021);
> >> + entry->eax = min(entry->eax, 0x80000022);
> >> /*
> >> * Serializing LFENCE is reported in a multitude of ways, and
> >> * NullSegClearsBase is not reported in CPUID on Zen2; help
> >> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> >> if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
> >> entry->eax |= BIT(6);
> >> break;
> >> + /* AMD Extended Performance Monitoring and Debug */
> >> + case 0x80000022: {
> >> + union cpuid_0x80000022_eax eax;
> >> + union cpuid_0x80000022_ebx ebx;
> >> +
> >> + entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
> >> + if (!enable_pmu)
> >> + break;
> >> +
> >> + if (kvm_pmu_cap.version > 1) {
> >> + /* AMD PerfMon is only supported up to V2 in the KVM. */
> >> + eax.split.perfmon_v2 = 1;
> >> + ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
> >> + KVM_AMD_PMC_MAX_GENERIC);
> >
> > Note that the number of core PMCs has to be at least 6 if
> > guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
> > could claim fewer, but the first 6 PMCs must work, per the v1 PMU
> > spec. That is, software that knows about PERFCTR_CORE, but not about
> > PMU v2, can rightfully expect 6 PMCs.
>
> I thought the NumCorePmc number would only make sense if
> CPUID.80000022.eax.perfmon_v2
> bit was present, but considering that the user space is perfectly fine with just
> configuring the
> NumCorePmc number without setting perfmon_v2 bit at all, so how about:
CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
present. It's hard to know in the absence of documentation.
> /* AMD Extended Performance Monitoring and Debug */
> case 0x80000022: {
> union cpuid_0x80000022_eax eax;
> union cpuid_0x80000022_ebx ebx;
> bool perfctr_core;
>
> entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
> if (!enable_pmu)
> break;
>
> perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
> if (!perfctr_core)
> ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
> if (kvm_pmu_cap.version > 1) {
> /* AMD PerfMon is only supported up to V2 in the KVM. */
> eax.split.perfmon_v2 = 1;
> ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
> KVM_AMD_PMC_MAX_GENERIC);
> }
> if (perfctr_core) {
> ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
> AMD64_NUM_COUNTERS_CORE);
> }
This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.
>
> entry->eax = eax.full;
> entry->ebx = ebx.full;
> break;
> }
>
> ?
>
> Once 0x80000022 appears, ebx.split.num_core_pmc will report only
> the real "Number of Core Performance Counters" regardless of perfmon_v2.
>
> >
> >
> >> + }
> >> + entry->eax = eax.full;
> >> + entry->ebx = ebx.full;
> >> + break;
> >> + }
> >> /*Add support for Centaur's CPUID instruction*/
> >> case 0xC0000000:
> >> /*Just support up to 0xC0000004 now*/
> >> --
> >> 2.37.3
> >>
On Tue, Sep 6, 2022 at 5:45 AM Like Xu <[email protected]> wrote:
>
> On 6/9/2022 2:00 am, Jim Mattson wrote:
> > On Mon, Sep 5, 2022 at 5:44 AM Like Xu <[email protected]> wrote:
> >>
> >> From: Like Xu <[email protected]>
> >>
> >> If AMD Performance Monitoring Version 2 (PerfMonV2) is detected
> >> by the guest, it can use a new scheme to manage the Core PMCs using
> >> the new global control and status registers.
> >>
> >> In addition to benefiting from the PerfMonV2 functionality in the same
> >> way as the host (higher precision), the guest also can reduce the number
> >> of vm-exits by lowering the total number of MSRs accesses.
> >>
> >> In terms of implementation details, amd_is_valid_msr() is resurrected
> >> since three newly added MSRs could not be mapped to one vPMC.
> >> The possibility of emulating PerfMonV2 on the mainframe has also
> >> been eliminated for reasons of precision.
> >>
> >> Co-developed-by: Sandipan Das <[email protected]>
> >> Signed-off-by: Sandipan Das <[email protected]>
> >> Signed-off-by: Like Xu <[email protected]>
> >> ---
> >> arch/x86/kvm/pmu.c | 6 +++++
> >> arch/x86/kvm/svm/pmu.c | 50 +++++++++++++++++++++++++++++++++---------
> >> arch/x86/kvm/x86.c | 11 ++++++++++
> >> 3 files changed, 57 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> >> index 7002e1b74108..56b4f898a246 100644
> >> --- a/arch/x86/kvm/pmu.c
> >> +++ b/arch/x86/kvm/pmu.c
> >> @@ -455,12 +455,15 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >>
> >> switch (msr) {
> >> case MSR_CORE_PERF_GLOBAL_STATUS:
> >> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
> >> msr_info->data = pmu->global_status;
> >> return 0;
> >> case MSR_CORE_PERF_GLOBAL_CTRL:
> >> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
> >> msr_info->data = pmu->global_ctrl;
> >> return 0;
> >> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> >> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
> >> msr_info->data = 0;
> >> return 0;
> >> default:
> >> @@ -479,12 +482,14 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >>
> >> switch (msr) {
> >> case MSR_CORE_PERF_GLOBAL_STATUS:
> >> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
> >> if (msr_info->host_initiated) {
> >> pmu->global_status = data;
> >> return 0;
> >> }
> >> break; /* RO MSR */
> >> case MSR_CORE_PERF_GLOBAL_CTRL:
> >> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
> >> if (pmu->global_ctrl == data)
> >> return 0;
> >> if (kvm_valid_perf_global_ctrl(pmu, data)) {
> >> @@ -495,6 +500,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >> }
> >> break;
> >> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> >> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
> >> if (!(data & pmu->global_ovf_ctrl_mask)) {
> >> if (!msr_info->host_initiated)
> >> pmu->global_status &= ~data;
> >> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
> >> index 3a20972e9f1a..4c7d408e3caa 100644
> >> --- a/arch/x86/kvm/svm/pmu.c
> >> +++ b/arch/x86/kvm/svm/pmu.c
> >> @@ -92,12 +92,6 @@ static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
> >> return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
> >> }
> >>
> >> -static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
> >> -{
> >> - /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */
> >> - return false;
> >> -}
> >> -
> >> static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
> >> {
> >> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> >> @@ -109,6 +103,29 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
> >> return pmc;
> >> }
> >>
> >> +static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
> >> +{
> >> + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> >> +
> >> + switch (msr) {
> >> + case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
> >> + return pmu->version > 0;
> >> + case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
> >> + return guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE);
> >> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
> >> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
> >> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
> >> + return pmu->version > 1;
> >> + default:
> >> + if (msr > MSR_F15H_PERF_CTR5 &&
> >> + msr < MSR_F15H_PERF_CTL0 + 2 * KVM_AMD_PMC_MAX_GENERIC)
> >> + return pmu->version > 1;
> >
> > Should this be bounded by guest CPUID.80000022H:EBX[NumCorePmc]
> > (unless host-initiated)?
>
> Indeed, how about:
>
> default:
> if (msr > MSR_F15H_PERF_CTR5 &&
> msr < MSR_F15H_PERF_CTL0 + 2 * pmu->nr_arch_gp_counters)
> return pmu->version > 1;
>
> and for host-initiated:
>
> #define MSR_F15H_PERF_MSR_MAX \
> (MSR_F15H_PERF_CTR0 + 2 * (KVM_AMD_PMC_MAX_GENERIC - 1))
I think there may be an off-by-one error here.
>
> kvm_{set|get}_msr_common()
> case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_MSR_MAX:
> if (kvm_pmu_is_valid_msr(vcpu, msr))
> return kvm_pmu_set_msr(vcpu, msr_info);
> ?
>
> >
> >> + break;
> >> + }
> >> +
> >> + return amd_msr_idx_to_pmc(vcpu, msr);
> >> +}
> >> +
> >> static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >> {
> >> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> >> @@ -162,20 +179,31 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >> static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
> >> {
> >> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> >> + struct kvm_cpuid_entry2 *entry;
> >> + union cpuid_0x80000022_ebx ebx;
> >>
> >> - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
> >> + pmu->version = 1;
> >> + entry = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0);
> >> + if (kvm_pmu_cap.version > 1 && entry && (entry->eax & BIT(0))) {
> >> + pmu->version = 2;
> >> + ebx.full = entry->ebx;
> >> + pmu->nr_arch_gp_counters = min3((unsigned int)ebx.split.num_core_pmc,
> >> + (unsigned int)kvm_pmu_cap.num_counters_gp,
> >> + (unsigned int)KVM_AMD_PMC_MAX_GENERIC);
> >> + pmu->global_ctrl_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1);
> >> + pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask;
> >> + } else if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
> >> pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE;
> >
> > The logic above doesn't seem quite right, since guest_cpuid_has(vcpu,
> > X86_FEATURE_PERFCTR_CORE) promises 6 PMCs, regardless of what
> > CPUID.80000022 says.
>
> I would have expected the appearance of CPUID.80000022 to override PERFCTR_CORE,
> now I don't think it's a good idea as you do, so how about:
>
> amd_pmu_refresh():
>
> bool perfctr_core = guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE);
>
> pmu->version = 1;
> if (kvm_pmu_cap.version > 1)
> entry = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0);
>
> if (!perfctr_core)
> pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
> if (entry && (entry->eax & BIT(0))) {
> pmu->version = 2;
> ebx.full = entry->ebx;
> pmu->nr_arch_gp_counters = min3((unsigned int)ebx.split.num_core_pmc,
> (unsigned int)kvm_pmu_cap.num_counters_gp,
> (unsigned int)KVM_AMD_PMC_MAX_GENERIC);
> }
> /* PERFCTR_CORE promises 6 PMCs, regardless of CPUID.80000022 */
> if (perfctr_core) {
> pmu->nr_arch_gp_counters = max(pmu->nr_arch_gp_counters,
> AMD64_NUM_COUNTERS_CORE);
> }
Even if X86_FEATURE_PERFCTR_CORE is clear, all AMD CPUs promise 4 PMCs.
>
> if (pmu->version > 1) {
> pmu->global_ctrl_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1);
> pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask;
> }
>
> ?
>
>
On 7/9/2022 4:08 am, Jim Mattson wrote:
> On Tue, Sep 6, 2022 at 5:53 AM Like Xu <[email protected]> wrote:
>>
>> On 6/9/2022 1:36 am, Jim Mattson wrote:
>>> On Mon, Sep 5, 2022 at 5:45 AM Like Xu <[email protected]> wrote:
>>>>
>>>> From: Sandipan Das <[email protected]>
>>>>
>>>> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
>>>> new performance monitoring features for AMD processors.
>>>>
>>>> Bit 0 of EAX indicates support for Performance Monitoring
>>>> Version 2 (PerfMonV2) features. If found to be set during
>>>> PMU initialization, the EBX bits of the same CPUID function
>>>> can be used to determine the number of available PMCs for
>>>> different PMU types.
>>>>
>>>> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
>>>> that guests can make use of the PerfMonV2 features.
>>>>
>>>> Co-developed-by: Like Xu <[email protected]>
>>>> Signed-off-by: Like Xu <[email protected]>
>>>> Signed-off-by: Sandipan Das <[email protected]>
>>>> ---
>>>> arch/x86/include/asm/perf_event.h | 8 ++++++++
>>>> arch/x86/kvm/cpuid.c | 21 ++++++++++++++++++++-
>>>> 2 files changed, 28 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
>>>> index f6fc8dd51ef4..c848f504e467 100644
>>>> --- a/arch/x86/include/asm/perf_event.h
>>>> +++ b/arch/x86/include/asm/perf_event.h
>>>> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
>>>> unsigned int full;
>>>> };
>>>>
>>>> +union cpuid_0x80000022_eax {
>>>> + struct {
>>>> + /* Performance Monitoring Version 2 Supported */
>>>> + unsigned int perfmon_v2:1;
>>>> + } split;
>>>> + unsigned int full;
>>>> +};
>>>> +
>>>> struct x86_pmu_capability {
>>>> int version;
>>>> int num_counters_gp;
>>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>>> index 75dcf7a72605..08a29ab096d2 100644
>>>> --- a/arch/x86/kvm/cpuid.c
>>>> +++ b/arch/x86/kvm/cpuid.c
>>>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>> entry->edx = 0;
>>>> break;
>>>> case 0x80000000:
>>>> - entry->eax = min(entry->eax, 0x80000021);
>>>> + entry->eax = min(entry->eax, 0x80000022);
>>>> /*
>>>> * Serializing LFENCE is reported in a multitude of ways, and
>>>> * NullSegClearsBase is not reported in CPUID on Zen2; help
>>>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>> if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>>>> entry->eax |= BIT(6);
>>>> break;
>>>> + /* AMD Extended Performance Monitoring and Debug */
>>>> + case 0x80000022: {
>>>> + union cpuid_0x80000022_eax eax;
>>>> + union cpuid_0x80000022_ebx ebx;
>>>> +
>>>> + entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>> + if (!enable_pmu)
>>>> + break;
>>>> +
>>>> + if (kvm_pmu_cap.version > 1) {
>>>> + /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>> + eax.split.perfmon_v2 = 1;
>>>> + ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>> + KVM_AMD_PMC_MAX_GENERIC);
>>>
>>> Note that the number of core PMCs has to be at least 6 if
>>> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
>>> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
>>> spec. That is, software that knows about PERFCTR_CORE, but not about
>>> PMU v2, can rightfully expect 6 PMCs.
>>
>> I thought the NumCorePmc number would only make sense if
>> CPUID.80000022.eax.perfmon_v2
>> bit was present, but considering that the user space is perfectly fine with just
>> configuring the
>> NumCorePmc number without setting perfmon_v2 bit at all, so how about:
>
> CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
> present. It's hard to know in the absence of documentation.
Whenever this happens, we may always leave the definition of behavior to the
hypervisor.
>
>> /* AMD Extended Performance Monitoring and Debug */
>> case 0x80000022: {
>> union cpuid_0x80000022_eax eax;
>> union cpuid_0x80000022_ebx ebx;
>> bool perfctr_core;
>>
>> entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>> if (!enable_pmu)
>> break;
>>
>> perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
>> if (!perfctr_core)
>> ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
>> if (kvm_pmu_cap.version > 1) {
>> /* AMD PerfMon is only supported up to V2 in the KVM. */
>> eax.split.perfmon_v2 = 1;
>> ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>> KVM_AMD_PMC_MAX_GENERIC);
>> }
>> if (perfctr_core) {
>> ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
>> AMD64_NUM_COUNTERS_CORE);
>> }
>
> This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.
K7 at least. I could not confirm that all antique AMD CPUs have 4 counters w/o
perfctr_core.
>
>>
>> entry->eax = eax.full;
>> entry->ebx = ebx.full;
>> break;
>> }
>>
>> ?
>>
>> Once 0x80000022 appears, ebx.split.num_core_pmc will report only
>> the real "Number of Core Performance Counters" regardless of perfmon_v2.
>>
>>>
>>>
>>>> + }
>>>> + entry->eax = eax.full;
>>>> + entry->ebx = ebx.full;
>>>> + break;
>>>> + }
>>>> /*Add support for Centaur's CPUID instruction*/
>>>> case 0xC0000000:
>>>> /*Just support up to 0xC0000004 now*/
>>>> --
>>>> 2.37.3
>>>>
On 7/9/2022 4:19 am, Jim Mattson wrote:
> On Tue, Sep 6, 2022 at 5:45 AM Like Xu <[email protected]> wrote:
>>
>> On 6/9/2022 2:00 am, Jim Mattson wrote:
>>> On Mon, Sep 5, 2022 at 5:44 AM Like Xu <[email protected]> wrote:
>>>>
>>>> From: Like Xu <[email protected]>
>>>>
>>>> If AMD Performance Monitoring Version 2 (PerfMonV2) is detected
>>>> by the guest, it can use a new scheme to manage the Core PMCs using
>>>> the new global control and status registers.
>>>>
>>>> In addition to benefiting from the PerfMonV2 functionality in the same
>>>> way as the host (higher precision), the guest also can reduce the number
>>>> of vm-exits by lowering the total number of MSRs accesses.
>>>>
>>>> In terms of implementation details, amd_is_valid_msr() is resurrected
>>>> since three newly added MSRs could not be mapped to one vPMC.
>>>> The possibility of emulating PerfMonV2 on the mainframe has also
>>>> been eliminated for reasons of precision.
>>>>
>>>> Co-developed-by: Sandipan Das <[email protected]>
>>>> Signed-off-by: Sandipan Das <[email protected]>
>>>> Signed-off-by: Like Xu <[email protected]>
>>>> ---
>>>> arch/x86/kvm/pmu.c | 6 +++++
>>>> arch/x86/kvm/svm/pmu.c | 50 +++++++++++++++++++++++++++++++++---------
>>>> arch/x86/kvm/x86.c | 11 ++++++++++
>>>> 3 files changed, 57 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
>>>> index 7002e1b74108..56b4f898a246 100644
>>>> --- a/arch/x86/kvm/pmu.c
>>>> +++ b/arch/x86/kvm/pmu.c
>>>> @@ -455,12 +455,15 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>>
>>>> switch (msr) {
>>>> case MSR_CORE_PERF_GLOBAL_STATUS:
>>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
>>>> msr_info->data = pmu->global_status;
>>>> return 0;
>>>> case MSR_CORE_PERF_GLOBAL_CTRL:
>>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
>>>> msr_info->data = pmu->global_ctrl;
>>>> return 0;
>>>> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
>>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
>>>> msr_info->data = 0;
>>>> return 0;
>>>> default:
>>>> @@ -479,12 +482,14 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>>
>>>> switch (msr) {
>>>> case MSR_CORE_PERF_GLOBAL_STATUS:
>>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
>>>> if (msr_info->host_initiated) {
>>>> pmu->global_status = data;
>>>> return 0;
>>>> }
>>>> break; /* RO MSR */
>>>> case MSR_CORE_PERF_GLOBAL_CTRL:
>>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
>>>> if (pmu->global_ctrl == data)
>>>> return 0;
>>>> if (kvm_valid_perf_global_ctrl(pmu, data)) {
>>>> @@ -495,6 +500,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>> }
>>>> break;
>>>> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
>>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
>>>> if (!(data & pmu->global_ovf_ctrl_mask)) {
>>>> if (!msr_info->host_initiated)
>>>> pmu->global_status &= ~data;
>>>> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
>>>> index 3a20972e9f1a..4c7d408e3caa 100644
>>>> --- a/arch/x86/kvm/svm/pmu.c
>>>> +++ b/arch/x86/kvm/svm/pmu.c
>>>> @@ -92,12 +92,6 @@ static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
>>>> return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
>>>> }
>>>>
>>>> -static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
>>>> -{
>>>> - /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */
>>>> - return false;
>>>> -}
>>>> -
>>>> static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
>>>> {
>>>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>>>> @@ -109,6 +103,29 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
>>>> return pmc;
>>>> }
>>>>
>>>> +static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
>>>> +{
>>>> + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>>>> +
>>>> + switch (msr) {
>>>> + case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
>>>> + return pmu->version > 0;
>>>> + case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
>>>> + return guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE);
>>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
>>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
>>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
>>>> + return pmu->version > 1;
>>>> + default:
>>>> + if (msr > MSR_F15H_PERF_CTR5 &&
>>>> + msr < MSR_F15H_PERF_CTL0 + 2 * KVM_AMD_PMC_MAX_GENERIC)
>>>> + return pmu->version > 1;
>>>
>>> Should this be bounded by guest CPUID.80000022H:EBX[NumCorePmc]
>>> (unless host-initiated)?
>>
>> Indeed, how about:
>>
>> default:
>> if (msr > MSR_F15H_PERF_CTR5 &&
>> msr < MSR_F15H_PERF_CTL0 + 2 * pmu->nr_arch_gp_counters)
>> return pmu->version > 1;
>>
>> and for host-initiated:
>>
>> #define MSR_F15H_PERF_MSR_MAX \
>> (MSR_F15H_PERF_CTR0 + 2 * (KVM_AMD_PMC_MAX_GENERIC - 1))
>
> I think there may be an off-by-one error here.
If KVM_AMD_PMC_MAX_GENERIC is 6:
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10)
#define MSR_F15H_PERF_CTR 0xc0010201
#define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR
#define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10)
>
>>
>> kvm_{set|get}_msr_common()
>> case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_MSR_MAX:
the original code is "case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:",
in that case, MSR_F15H_PERF_MSR_MAX make sense, right ?
>> if (kvm_pmu_is_valid_msr(vcpu, msr))
>> return kvm_pmu_set_msr(vcpu, msr_info);
>> ?
>>
>>>
>>>> + break;
>>>> + }
>>>> +
>>>> + return amd_msr_idx_to_pmc(vcpu, msr);
>>>> +}
>>>> +
>>>> static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>> {
>>>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>>>> @@ -162,20 +179,31 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>> static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
>>>> {
>>>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>>>> + struct kvm_cpuid_entry2 *entry;
>>>> + union cpuid_0x80000022_ebx ebx;
>>>>
>>>> - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
>>>> + pmu->version = 1;
>>>> + entry = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0);
>>>> + if (kvm_pmu_cap.version > 1 && entry && (entry->eax & BIT(0))) {
>>>> + pmu->version = 2;
>>>> + ebx.full = entry->ebx;
>>>> + pmu->nr_arch_gp_counters = min3((unsigned int)ebx.split.num_core_pmc,
>>>> + (unsigned int)kvm_pmu_cap.num_counters_gp,
>>>> + (unsigned int)KVM_AMD_PMC_MAX_GENERIC);
>>>> + pmu->global_ctrl_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1);
>>>> + pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask;
>>>> + } else if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
>>>> pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE;
>>>
>>> The logic above doesn't seem quite right, since guest_cpuid_has(vcpu,
>>> X86_FEATURE_PERFCTR_CORE) promises 6 PMCs, regardless of what
>>> CPUID.80000022 says.
>>
>> I would have expected the appearance of CPUID.80000022 to override PERFCTR_CORE,
>> now I don't think it's a good idea as you do, so how about:
>>
>> amd_pmu_refresh():
>>
>> bool perfctr_core = guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE);
>>
>> pmu->version = 1;
>> if (kvm_pmu_cap.version > 1)
>> entry = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0);
>>
>> if (!perfctr_core)
>> pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
>> if (entry && (entry->eax & BIT(0))) {
>> pmu->version = 2;
>> ebx.full = entry->ebx;
>> pmu->nr_arch_gp_counters = min3((unsigned int)ebx.split.num_core_pmc,
>> (unsigned int)kvm_pmu_cap.num_counters_gp,
>> (unsigned int)KVM_AMD_PMC_MAX_GENERIC);
>> }
>> /* PERFCTR_CORE promises 6 PMCs, regardless of CPUID.80000022 */
>> if (perfctr_core) {
>> pmu->nr_arch_gp_counters = max(pmu->nr_arch_gp_counters,
>> AMD64_NUM_COUNTERS_CORE);
>> }
>
> Even if X86_FEATURE_PERFCTR_CORE is clear, all AMD CPUs promise 4 PMCs.
>
>>
>> if (pmu->version > 1) {
>> pmu->global_ctrl_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1);
>> pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask;
>> }
>>
>> ?
>>
>>
On Tue, Sep 6, 2022 at 8:59 PM Like Xu <[email protected]> wrote:
>
> On 7/9/2022 4:08 am, Jim Mattson wrote:
> > On Tue, Sep 6, 2022 at 5:53 AM Like Xu <[email protected]> wrote:
> >>
> >> On 6/9/2022 1:36 am, Jim Mattson wrote:
> >>> On Mon, Sep 5, 2022 at 5:45 AM Like Xu <[email protected]> wrote:
> >>>>
> >>>> From: Sandipan Das <[email protected]>
> >>>>
> >>>> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
> >>>> new performance monitoring features for AMD processors.
> >>>>
> >>>> Bit 0 of EAX indicates support for Performance Monitoring
> >>>> Version 2 (PerfMonV2) features. If found to be set during
> >>>> PMU initialization, the EBX bits of the same CPUID function
> >>>> can be used to determine the number of available PMCs for
> >>>> different PMU types.
> >>>>
> >>>> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
> >>>> that guests can make use of the PerfMonV2 features.
> >>>>
> >>>> Co-developed-by: Like Xu <[email protected]>
> >>>> Signed-off-by: Like Xu <[email protected]>
> >>>> Signed-off-by: Sandipan Das <[email protected]>
> >>>> ---
> >>>> arch/x86/include/asm/perf_event.h | 8 ++++++++
> >>>> arch/x86/kvm/cpuid.c | 21 ++++++++++++++++++++-
> >>>> 2 files changed, 28 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> >>>> index f6fc8dd51ef4..c848f504e467 100644
> >>>> --- a/arch/x86/include/asm/perf_event.h
> >>>> +++ b/arch/x86/include/asm/perf_event.h
> >>>> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
> >>>> unsigned int full;
> >>>> };
> >>>>
> >>>> +union cpuid_0x80000022_eax {
> >>>> + struct {
> >>>> + /* Performance Monitoring Version 2 Supported */
> >>>> + unsigned int perfmon_v2:1;
> >>>> + } split;
> >>>> + unsigned int full;
> >>>> +};
> >>>> +
> >>>> struct x86_pmu_capability {
> >>>> int version;
> >>>> int num_counters_gp;
> >>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> >>>> index 75dcf7a72605..08a29ab096d2 100644
> >>>> --- a/arch/x86/kvm/cpuid.c
> >>>> +++ b/arch/x86/kvm/cpuid.c
> >>>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> >>>> entry->edx = 0;
> >>>> break;
> >>>> case 0x80000000:
> >>>> - entry->eax = min(entry->eax, 0x80000021);
> >>>> + entry->eax = min(entry->eax, 0x80000022);
> >>>> /*
> >>>> * Serializing LFENCE is reported in a multitude of ways, and
> >>>> * NullSegClearsBase is not reported in CPUID on Zen2; help
> >>>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> >>>> if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
> >>>> entry->eax |= BIT(6);
> >>>> break;
> >>>> + /* AMD Extended Performance Monitoring and Debug */
> >>>> + case 0x80000022: {
> >>>> + union cpuid_0x80000022_eax eax;
> >>>> + union cpuid_0x80000022_ebx ebx;
> >>>> +
> >>>> + entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
> >>>> + if (!enable_pmu)
> >>>> + break;
> >>>> +
> >>>> + if (kvm_pmu_cap.version > 1) {
> >>>> + /* AMD PerfMon is only supported up to V2 in the KVM. */
> >>>> + eax.split.perfmon_v2 = 1;
> >>>> + ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
> >>>> + KVM_AMD_PMC_MAX_GENERIC);
> >>>
> >>> Note that the number of core PMCs has to be at least 6 if
> >>> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
> >>> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
> >>> spec. That is, software that knows about PERFCTR_CORE, but not about
> >>> PMU v2, can rightfully expect 6 PMCs.
> >>
> >> I thought the NumCorePmc number would only make sense if
> >> CPUID.80000022.eax.perfmon_v2
> >> bit was present, but considering that the user space is perfectly fine with just
> >> configuring the
> >> NumCorePmc number without setting perfmon_v2 bit at all, so how about:
> >
> > CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
> > present. It's hard to know in the absence of documentation.
>
> Whenever this happens, we may always leave the definition of behavior to the
> hypervisor.
I disagree. If CPUID.0H reports "AuthenticAMD," then AMD is the sole
authority on behavior.
> >
> >> /* AMD Extended Performance Monitoring and Debug */
> >> case 0x80000022: {
> >> union cpuid_0x80000022_eax eax;
> >> union cpuid_0x80000022_ebx ebx;
> >> bool perfctr_core;
> >>
> >> entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
> >> if (!enable_pmu)
> >> break;
> >>
> >> perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
> >> if (!perfctr_core)
> >> ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
> >> if (kvm_pmu_cap.version > 1) {
> >> /* AMD PerfMon is only supported up to V2 in the KVM. */
> >> eax.split.perfmon_v2 = 1;
> >> ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
> >> KVM_AMD_PMC_MAX_GENERIC);
> >> }
> >> if (perfctr_core) {
> >> ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
> >> AMD64_NUM_COUNTERS_CORE);
> >> }
> >
> > This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.
>
> K7 at least. I could not confirm that all antique AMD CPUs have 4 counters w/o
> perfctr_core.
The APM says, "All implementations support the base set of four
performance counter / event-select pairs." That is unequivocal.
> >
> >>
> >> entry->eax = eax.full;
> >> entry->ebx = ebx.full;
> >> break;
> >> }
> >>
> >> ?
> >>
> >> Once 0x80000022 appears, ebx.split.num_core_pmc will report only
> >> the real "Number of Core Performance Counters" regardless of perfmon_v2.
> >>
> >>>
> >>>
> >>>> + }
> >>>> + entry->eax = eax.full;
> >>>> + entry->ebx = ebx.full;
> >>>> + break;
> >>>> + }
> >>>> /*Add support for Centaur's CPUID instruction*/
> >>>> case 0xC0000000:
> >>>> /*Just support up to 0xC0000004 now*/
> >>>> --
> >>>> 2.37.3
> >>>>
On Tue, Sep 6, 2022 at 8:50 PM Like Xu <[email protected]> wrote:
>
> On 7/9/2022 4:19 am, Jim Mattson wrote:
> > On Tue, Sep 6, 2022 at 5:45 AM Like Xu <[email protected]> wrote:
> >>
> >> On 6/9/2022 2:00 am, Jim Mattson wrote:
> >>> On Mon, Sep 5, 2022 at 5:44 AM Like Xu <[email protected]> wrote:
> >>>>
> >>>> From: Like Xu <[email protected]>
> >>>>
> >>>> If AMD Performance Monitoring Version 2 (PerfMonV2) is detected
> >>>> by the guest, it can use a new scheme to manage the Core PMCs using
> >>>> the new global control and status registers.
> >>>>
> >>>> In addition to benefiting from the PerfMonV2 functionality in the same
> >>>> way as the host (higher precision), the guest also can reduce the number
> >>>> of vm-exits by lowering the total number of MSRs accesses.
> >>>>
> >>>> In terms of implementation details, amd_is_valid_msr() is resurrected
> >>>> since three newly added MSRs could not be mapped to one vPMC.
> >>>> The possibility of emulating PerfMonV2 on the mainframe has also
> >>>> been eliminated for reasons of precision.
> >>>>
> >>>> Co-developed-by: Sandipan Das <[email protected]>
> >>>> Signed-off-by: Sandipan Das <[email protected]>
> >>>> Signed-off-by: Like Xu <[email protected]>
> >>>> ---
> >>>> arch/x86/kvm/pmu.c | 6 +++++
> >>>> arch/x86/kvm/svm/pmu.c | 50 +++++++++++++++++++++++++++++++++---------
> >>>> arch/x86/kvm/x86.c | 11 ++++++++++
> >>>> 3 files changed, 57 insertions(+), 10 deletions(-)
> >>>>
> >>>> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> >>>> index 7002e1b74108..56b4f898a246 100644
> >>>> --- a/arch/x86/kvm/pmu.c
> >>>> +++ b/arch/x86/kvm/pmu.c
> >>>> @@ -455,12 +455,15 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >>>>
> >>>> switch (msr) {
> >>>> case MSR_CORE_PERF_GLOBAL_STATUS:
> >>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
> >>>> msr_info->data = pmu->global_status;
> >>>> return 0;
> >>>> case MSR_CORE_PERF_GLOBAL_CTRL:
> >>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
> >>>> msr_info->data = pmu->global_ctrl;
> >>>> return 0;
> >>>> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> >>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
> >>>> msr_info->data = 0;
> >>>> return 0;
> >>>> default:
> >>>> @@ -479,12 +482,14 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >>>>
> >>>> switch (msr) {
> >>>> case MSR_CORE_PERF_GLOBAL_STATUS:
> >>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
> >>>> if (msr_info->host_initiated) {
> >>>> pmu->global_status = data;
> >>>> return 0;
> >>>> }
> >>>> break; /* RO MSR */
> >>>> case MSR_CORE_PERF_GLOBAL_CTRL:
> >>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
> >>>> if (pmu->global_ctrl == data)
> >>>> return 0;
> >>>> if (kvm_valid_perf_global_ctrl(pmu, data)) {
> >>>> @@ -495,6 +500,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >>>> }
> >>>> break;
> >>>> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> >>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
> >>>> if (!(data & pmu->global_ovf_ctrl_mask)) {
> >>>> if (!msr_info->host_initiated)
> >>>> pmu->global_status &= ~data;
> >>>> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
> >>>> index 3a20972e9f1a..4c7d408e3caa 100644
> >>>> --- a/arch/x86/kvm/svm/pmu.c
> >>>> +++ b/arch/x86/kvm/svm/pmu.c
> >>>> @@ -92,12 +92,6 @@ static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
> >>>> return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
> >>>> }
> >>>>
> >>>> -static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
> >>>> -{
> >>>> - /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough. */
> >>>> - return false;
> >>>> -}
> >>>> -
> >>>> static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
> >>>> {
> >>>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> >>>> @@ -109,6 +103,29 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
> >>>> return pmc;
> >>>> }
> >>>>
> >>>> +static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
> >>>> +{
> >>>> + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> >>>> +
> >>>> + switch (msr) {
> >>>> + case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
> >>>> + return pmu->version > 0;
> >>>> + case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
> >>>> + return guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE);
> >>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
> >>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
> >>>> + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
> >>>> + return pmu->version > 1;
> >>>> + default:
> >>>> + if (msr > MSR_F15H_PERF_CTR5 &&
> >>>> + msr < MSR_F15H_PERF_CTL0 + 2 * KVM_AMD_PMC_MAX_GENERIC)
> >>>> + return pmu->version > 1;
> >>>
> >>> Should this be bounded by guest CPUID.80000022H:EBX[NumCorePmc]
> >>> (unless host-initiated)?
> >>
> >> Indeed, how about:
> >>
> >> default:
> >> if (msr > MSR_F15H_PERF_CTR5 &&
> >> msr < MSR_F15H_PERF_CTL0 + 2 * pmu->nr_arch_gp_counters)
> >> return pmu->version > 1;
> >>
> >> and for host-initiated:
> >>
> >> #define MSR_F15H_PERF_MSR_MAX \
> >> (MSR_F15H_PERF_CTR0 + 2 * (KVM_AMD_PMC_MAX_GENERIC - 1))
> >
> > I think there may be an off-by-one error here.
>
> If KVM_AMD_PMC_MAX_GENERIC is 6:
>
> #define MSR_F15H_PERF_CTL 0xc0010200
> #define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10)
>
> #define MSR_F15H_PERF_CTR 0xc0010201
> #define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR
> #define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10)
>
> >
> >>
> >> kvm_{set|get}_msr_common()
> >> case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_MSR_MAX:
>
> the original code is "case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:",
>
> in that case, MSR_F15H_PERF_MSR_MAX make sense, right ?
Right. I was misreading the definition.
On 9/7/2022 9:41 AM, Jim Mattson wrote:
> On Tue, Sep 6, 2022 at 8:59 PM Like Xu <[email protected]> wrote:
> [...]
>>>>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>>>>> index 75dcf7a72605..08a29ab096d2 100644
>>>>>> --- a/arch/x86/kvm/cpuid.c
>>>>>> +++ b/arch/x86/kvm/cpuid.c
>>>>>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>> entry->edx = 0;
>>>>>> break;
>>>>>> case 0x80000000:
>>>>>> - entry->eax = min(entry->eax, 0x80000021);
>>>>>> + entry->eax = min(entry->eax, 0x80000022);
>>>>>> /*
>>>>>> * Serializing LFENCE is reported in a multitude of ways, and
>>>>>> * NullSegClearsBase is not reported in CPUID on Zen2; help
>>>>>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>> if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>>>>>> entry->eax |= BIT(6);
>>>>>> break;
>>>>>> + /* AMD Extended Performance Monitoring and Debug */
>>>>>> + case 0x80000022: {
>>>>>> + union cpuid_0x80000022_eax eax;
>>>>>> + union cpuid_0x80000022_ebx ebx;
>>>>>> +
>>>>>> + entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>>>> + if (!enable_pmu)
>>>>>> + break;
>>>>>> +
>>>>>> + if (kvm_pmu_cap.version > 1) {
>>>>>> + /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>>>> + eax.split.perfmon_v2 = 1;
>>>>>> + ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>>>> + KVM_AMD_PMC_MAX_GENERIC);
>>>>>
>>>>> Note that the number of core PMCs has to be at least 6 if
>>>>> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
>>>>> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
>>>>> spec. That is, software that knows about PERFCTR_CORE, but not about
>>>>> PMU v2, can rightfully expect 6 PMCs.
>>>>
>>>> I thought the NumCorePmc number would only make sense if
>>>> CPUID.80000022.eax.perfmon_v2
>>>> bit was present, but considering that the user space is perfectly fine with just
>>>> configuring the
>>>> NumCorePmc number without setting perfmon_v2 bit at all, so how about:
>>>
>>> CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
>>> present. It's hard to know in the absence of documentation.
>>
>> Whenever this happens, we may always leave the definition of behavior to the
>> hypervisor.
>
> I disagree. If CPUID.0H reports "AuthenticAMD," then AMD is the sole
> authority on behavior.
>
I understand that official documentation is not out yet. However, for Zen 4
models, it is expected that both the PerfMonV2 bit of CPUID.80000022H EAX and
the PerfCtrExtCore bit of CPUID.80000001H ECX will be set.
>>>
>>>> /* AMD Extended Performance Monitoring and Debug */
>>>> case 0x80000022: {
>>>> union cpuid_0x80000022_eax eax;
>>>> union cpuid_0x80000022_ebx ebx;
>>>> bool perfctr_core;
>>>>
>>>> entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>> if (!enable_pmu)
>>>> break;
>>>>
>>>> perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
>>>> if (!perfctr_core)
>>>> ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
>>>> if (kvm_pmu_cap.version > 1) {
>>>> /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>> eax.split.perfmon_v2 = 1;
>>>> ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>> KVM_AMD_PMC_MAX_GENERIC);
>>>> }
>>>> if (perfctr_core) {
>>>> ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
>>>> AMD64_NUM_COUNTERS_CORE);
>>>> }
>>>
>>> This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.
>>
>> K7 at least. I could not confirm that all antique AMD CPUs have 4 counters w/o
>> perfctr_core.
>
> The APM says, "All implementations support the base set of four
> performance counter / event-select pairs." That is unequivocal.
>
That is true. The same can be inferred from amd_core_pmu_init() in
arch/x86/events/amd/core.c. If PERFCTR_CORE is not detected, it assumes
that the four legacy counters are always available.
- Sandipan
On 7/9/2022 1:52 pm, Sandipan Das wrote:
> On 9/7/2022 9:41 AM, Jim Mattson wrote:
>> On Tue, Sep 6, 2022 at 8:59 PM Like Xu <[email protected]> wrote:
>> [...]
>>>>>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>>>>>> index 75dcf7a72605..08a29ab096d2 100644
>>>>>>> --- a/arch/x86/kvm/cpuid.c
>>>>>>> +++ b/arch/x86/kvm/cpuid.c
>>>>>>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>>> entry->edx = 0;
>>>>>>> break;
>>>>>>> case 0x80000000:
>>>>>>> - entry->eax = min(entry->eax, 0x80000021);
>>>>>>> + entry->eax = min(entry->eax, 0x80000022);
>>>>>>> /*
>>>>>>> * Serializing LFENCE is reported in a multitude of ways, and
>>>>>>> * NullSegClearsBase is not reported in CPUID on Zen2; help
>>>>>>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>>> if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>>>>>>> entry->eax |= BIT(6);
>>>>>>> break;
>>>>>>> + /* AMD Extended Performance Monitoring and Debug */
>>>>>>> + case 0x80000022: {
>>>>>>> + union cpuid_0x80000022_eax eax;
>>>>>>> + union cpuid_0x80000022_ebx ebx;
>>>>>>> +
>>>>>>> + entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>>>>> + if (!enable_pmu)
>>>>>>> + break;
>>>>>>> +
>>>>>>> + if (kvm_pmu_cap.version > 1) {
>>>>>>> + /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>>>>> + eax.split.perfmon_v2 = 1;
>>>>>>> + ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>>>>> + KVM_AMD_PMC_MAX_GENERIC);
>>>>>>
>>>>>> Note that the number of core PMCs has to be at least 6 if
>>>>>> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
>>>>>> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
>>>>>> spec. That is, software that knows about PERFCTR_CORE, but not about
>>>>>> PMU v2, can rightfully expect 6 PMCs.
>>>>>
>>>>> I thought the NumCorePmc number would only make sense if
>>>>> CPUID.80000022.eax.perfmon_v2
>>>>> bit was present, but considering that the user space is perfectly fine with just
>>>>> configuring the
>>>>> NumCorePmc number without setting perfmon_v2 bit at all, so how about:
>>>>
>>>> CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
>>>> present. It's hard to know in the absence of documentation.
>>>
>>> Whenever this happens, we may always leave the definition of behavior to the
>>> hypervisor.
>>
>> I disagree. If CPUID.0H reports "AuthenticAMD," then AMD is the sole
>> authority on behavior.
The real world isn't like that, because even AMD has multiple implementations in
cases
where the hardware specs aren't explicitly stated, and sometimes they're
intentionally vague.
And the hypervisor can't do nothing, it prefers one over the other and maintains
maximum compatibility with the legacy user space.
>>
>
> I understand that official documentation is not out yet. However, for Zen 4
> models, it is expected that both the PerfMonV2 bit of CPUID.80000022H EAX and
> the PerfCtrExtCore bit of CPUID.80000001H ECX will be set.
Is PerfCtrExtCore a PerfMonV2 or PerfMonV3+ precondition ?
Is PerfCtrExtCore a CPUID.80000022 precondition ?
Should we always expect CPUID_Fn80000022_EBX.NumCorePmc to reflect the real
Number of Core Performance Counters regardless of whether PerfMonV2 is set ?
>
>>>>
>>>>> /* AMD Extended Performance Monitoring and Debug */
>>>>> case 0x80000022: {
>>>>> union cpuid_0x80000022_eax eax;
>>>>> union cpuid_0x80000022_ebx ebx;
>>>>> bool perfctr_core;
>>>>>
>>>>> entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>>> if (!enable_pmu)
>>>>> break;
>>>>>
>>>>> perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
>>>>> if (!perfctr_core)
>>>>> ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
>>>>> if (kvm_pmu_cap.version > 1) {
>>>>> /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>>> eax.split.perfmon_v2 = 1;
>>>>> ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>>> KVM_AMD_PMC_MAX_GENERIC);
>>>>> }
>>>>> if (perfctr_core) {
>>>>> ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
>>>>> AMD64_NUM_COUNTERS_CORE);
>>>>> }
>>>>
>>>> This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.
>>>
>>> K7 at least. I could not confirm that all antique AMD CPUs have 4 counters w/o
>>> perfctr_core.
>>
>> The APM says, "All implementations support the base set of four
>> performance counter / event-select pairs." That is unequivocal.
>>
>
> That is true. The same can be inferred from amd_core_pmu_init() in
> arch/x86/events/amd/core.c. If PERFCTR_CORE is not detected, it assumes
> that the four legacy counters are always available.
>
> - Sandipan
On 9/7/2022 12:09 PM, Like Xu wrote:
> On 7/9/2022 1:52 pm, Sandipan Das wrote:
>> On 9/7/2022 9:41 AM, Jim Mattson wrote:
>>> On Tue, Sep 6, 2022 at 8:59 PM Like Xu <[email protected]> wrote:
>>> [...]
>>>>>>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>>>>>>> index 75dcf7a72605..08a29ab096d2 100644
>>>>>>>> --- a/arch/x86/kvm/cpuid.c
>>>>>>>> +++ b/arch/x86/kvm/cpuid.c
>>>>>>>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>>>> entry->edx = 0;
>>>>>>>> break;
>>>>>>>> case 0x80000000:
>>>>>>>> - entry->eax = min(entry->eax, 0x80000021);
>>>>>>>> + entry->eax = min(entry->eax, 0x80000022);
>>>>>>>> /*
>>>>>>>> * Serializing LFENCE is reported in a multitude of ways, and
>>>>>>>> * NullSegClearsBase is not reported in CPUID on Zen2; help
>>>>>>>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>>>> if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>>>>>>>> entry->eax |= BIT(6);
>>>>>>>> break;
>>>>>>>> + /* AMD Extended Performance Monitoring and Debug */
>>>>>>>> + case 0x80000022: {
>>>>>>>> + union cpuid_0x80000022_eax eax;
>>>>>>>> + union cpuid_0x80000022_ebx ebx;
>>>>>>>> +
>>>>>>>> + entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>>>>>> + if (!enable_pmu)
>>>>>>>> + break;
>>>>>>>> +
>>>>>>>> + if (kvm_pmu_cap.version > 1) {
>>>>>>>> + /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>>>>>> + eax.split.perfmon_v2 = 1;
>>>>>>>> + ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>>>>>> + KVM_AMD_PMC_MAX_GENERIC);
>>>>>>>
>>>>>>> Note that the number of core PMCs has to be at least 6 if
>>>>>>> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
>>>>>>> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
>>>>>>> spec. That is, software that knows about PERFCTR_CORE, but not about
>>>>>>> PMU v2, can rightfully expect 6 PMCs.
>>>>>>
>>>>>> I thought the NumCorePmc number would only make sense if
>>>>>> CPUID.80000022.eax.perfmon_v2
>>>>>> bit was present, but considering that the user space is perfectly fine with just
>>>>>> configuring the
>>>>>> NumCorePmc number without setting perfmon_v2 bit at all, so how about:
>>>>>
>>>>> CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
>>>>> present. It's hard to know in the absence of documentation.
>>>>
>>>> Whenever this happens, we may always leave the definition of behavior to the
>>>> hypervisor.
>>>
>>> I disagree. If CPUID.0H reports "AuthenticAMD," then AMD is the sole
>>> authority on behavior.
>
> The real world isn't like that, because even AMD has multiple implementations in cases
> where the hardware specs aren't explicitly stated, and sometimes they're intentionally vague.
> And the hypervisor can't do nothing, it prefers one over the other and maintains maximum compatibility with the legacy user space.
>
>>>
>>
>> I understand that official documentation is not out yet. However, for Zen 4
>> models, it is expected that both the PerfMonV2 bit of CPUID.80000022H EAX and
>> the PerfCtrExtCore bit of CPUID.80000001H ECX will be set.
>
> Is PerfCtrExtCore a PerfMonV2 or PerfMonV3+ precondition ?
> Is PerfCtrExtCore a CPUID.80000022 precondition ?
>
> Should we always expect CPUID_Fn80000022_EBX.NumCorePmc to reflect the real
> Number of Core Performance Counters regardless of whether PerfMonV2 is set ?
>
This is the suggested method for detecting the number of counters:
If CPUID Fn8000_0022_EAX[PerfMonV2] is set, then use the new interface in
CPUID Fn8000_0022_EBX to determine the number of counters.
Else if CPUID Fn8000_0001_ECX[PerfCtrExtCore] is set, then six counters
are available.
Else, four legacy counters are available.
There will be an APM update that will have this information in the
"Detecting Hardware Support for Performance Counters" section.
>>
>>>>>
>>>>>> /* AMD Extended Performance Monitoring and Debug */
>>>>>> case 0x80000022: {
>>>>>> union cpuid_0x80000022_eax eax;
>>>>>> union cpuid_0x80000022_ebx ebx;
>>>>>> bool perfctr_core;
>>>>>>
>>>>>> entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>>>> if (!enable_pmu)
>>>>>> break;
>>>>>>
>>>>>> perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
>>>>>> if (!perfctr_core)
>>>>>> ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
>>>>>> if (kvm_pmu_cap.version > 1) {
>>>>>> /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>>>> eax.split.perfmon_v2 = 1;
>>>>>> ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>>>> KVM_AMD_PMC_MAX_GENERIC);
>>>>>> }
>>>>>> if (perfctr_core) {
>>>>>> ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
>>>>>> AMD64_NUM_COUNTERS_CORE);
>>>>>> }
>>>>>
>>>>> This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.
>>>>
>>>> K7 at least. I could not confirm that all antique AMD CPUs have 4 counters w/o
>>>> perfctr_core.
>>>
>>> The APM says, "All implementations support the base set of four
>>> performance counter / event-select pairs." That is unequivocal.
>>>
>>
>> That is true. The same can be inferred from amd_core_pmu_init() in
>> arch/x86/events/amd/core.c. If PERFCTR_CORE is not detected, it assumes
>> that the four legacy counters are always available.
>>
>> - Sandipan
On Wed, Sep 7, 2022 at 11:00 PM Sandipan Das <[email protected]> wrote:
> This is the suggested method for detecting the number of counters:
>
> If CPUID Fn8000_0022_EAX[PerfMonV2] is set, then use the new interface in
> CPUID Fn8000_0022_EBX to determine the number of counters.
>
> Else if CPUID Fn8000_0001_ECX[PerfCtrExtCore] is set, then six counters
> are available.
>
> Else, four legacy counters are available.
>
> There will be an APM update that will have this information in the
> "Detecting Hardware Support for Performance Counters" section.
Nonetheless, for compatibility with old software, Fn8000_0022_EBX can
never report less than four counters (or six, if
Fn8000_0001_ECX[PerfCtrExtCore] is set).
On Mon, Sep 05, 2022, Like Xu wrote:
> From: Like Xu <[email protected]>
>
> Specifying an unsupported PMC encoding will cause a #GP(0).
> All testcases should be passed when the KVM_FEP prefix is added.
>
> Signed-off-by: Like Xu <[email protected]>
> ---
> lib/x86/processor.h | 5 ++++-
> x86/pmu.c | 13 +++++++++++++
> 2 files changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/lib/x86/processor.h b/lib/x86/processor.h
> index 10bca27..9c490d9 100644
> --- a/lib/x86/processor.h
> +++ b/lib/x86/processor.h
> @@ -441,7 +441,10 @@ static inline int wrmsr_safe(u32 index, u64 val)
> static inline uint64_t rdpmc(uint32_t index)
> {
> uint32_t a, d;
> - asm volatile ("rdpmc" : "=a"(a), "=d"(d) : "c"(index));
> + if (is_fep_available())
> + asm volatile (KVM_FEP "rdpmc" : "=a"(a), "=d"(d) : "c"(index));
> + else
> + asm volatile ("rdpmc" : "=a"(a), "=d"(d) : "c"(index));
Hmm, not sure how I feel about the idea of always use FEP in a common helper when
it's available. Part of me likes the idea, but part of me is worried that it
will cause confusion due to not being explicit.
Unless there's a pressing need to force emulation, let's punt the FEP stuff for
now. More below.
> return a | ((uint64_t)d << 32);
> }
>
> diff --git a/x86/pmu.c b/x86/pmu.c
> index 203a9d4..11607c0 100644
> --- a/x86/pmu.c
> +++ b/x86/pmu.c
> @@ -758,12 +758,25 @@ static bool pmu_is_detected(void)
> return detect_intel_pmu();
> }
>
> +static void rdpmc_unsupported_counter(void *data)
> +{
> + rdpmc(64);
> +}
> +
> +static void check_rdpmc_cause_gp(void)
Maybe check_invalid_rdpmc_gp()? There are multiple reasons RDPMC can #GP, the
one that is being relied on to guarantee #GP is specifically that the PMC is
invalid.
dd
> +{
> + report(test_for_exception(GP_VECTOR, rdpmc_unsupported_counter, NULL),
I'd really like to move away from test_for_exception() and use ASM_TRY(). Ignoring
FEP for the moment, the most extensible solution is to provide a safe variant:
static inline int rdpmc_safe(u32 index, uint64_t *val)
{
uint32_t a, d;
asm volatile (ASM_TRY("1f")
"rdpmc"
: "=a"(a), "=d"(d) : "c"(index));
*val = (uint64_t)a | ((uint64_t)d << 32);
return exception_vector();
}
static inline uint64_t rdpmc(uint32_t index)
{
uint64_t val;
int vector = rdpmc_safe(index, &val);
assert_msg(!vector, "Unexpected %s on RDPMC(%d)",
exception_mnemonic(vector), index);
return val;
}
For long-term emulation validation, the best idea I have at this point is to do
add a config knob to opt-in to using FEP in _all_ common helpers (where "all"
means everything KVM actually emulates). It'd take some macro magic, but it'd
be easier to maintain (no need to have two paths in every helper) and would be
controllable.
> + "rdpmc with invalid PMC index raises #GP");
> +}
> +
> int main(int ac, char **av)
> {
> setup_vm();
> handle_irq(PC_VECTOR, cnt_overflow);
> buf = malloc(N*64);
>
> + check_rdpmc_cause_gp();
> +
> if (!pmu_is_detected())
> return report_summary();
>
> --
> 2.37.3
>
Can you provide a single series for all of the KVM-Unit-Tests PMU work (separate
from the KVM patches)? Ya, it'll be big and is a blatant violation of "do one
thing", but trying to manually handle the dependencies on the review side is time
consuming.
One thought to help keep track of dependencies between KVM and KUT would be to
add dummy commits between each sub-series, with the dummy commit containing a lore
link to the relevant KVM patches/series. That would allow throwing everything into
one series without losing track of things. Hopefully.
On Mon, Sep 05, 2022, Like Xu wrote:
> diff --git a/lib/x86/processor.h b/lib/x86/processor.h
> index 9c490d9..b9592c4 100644
> --- a/lib/x86/processor.h
> +++ b/lib/x86/processor.h
> @@ -796,8 +796,12 @@ static inline void flush_tlb(void)
>
> static inline u8 pmu_version(void)
> {
> - if (!is_intel())
> + if (!is_intel()) {
> + /* Performance Monitoring Version 2 Supported */
> + if (cpuid(0x80000022).a & 0x1)
Add an X86_FEATURE_*, that way this is self-documenting.
> + return 2;
> return 0;
> + }
>
> return cpuid(10).a & 0xff;
> }
> @@ -824,6 +828,9 @@ static inline u8 pmu_nr_gp_counters(void)
> {
> if (is_intel()) {
> return (cpuid(10).a >> 8) & 0xff;
> + } else if (this_cpu_has_perf_global_ctrl()) {
Eww. Took me too long to connect the dots to understand how this guarantees that
leaf 0x80000022 is available. With an X86_FEATURE_* this can simply be:
} else if (this_cpu_has(X86_FEATURE_AMD_PMU_V2) {
or whatever name is appropriate.
> + /* Number of Core Performance Counters. */
> + return cpuid(0x80000022).b & 0xf;
> } else if (!has_amd_perfctr_core()) {
> return AMD64_NUM_COUNTERS;
> }
> diff --git a/x86/pmu.c b/x86/pmu.c
> index 11607c0..6d5363b 100644
> --- a/x86/pmu.c
> +++ b/x86/pmu.c
> @@ -72,6 +72,9 @@ struct pmu_event {
> #define PMU_CAP_FW_WRITES (1ULL << 13)
> static u32 gp_counter_base;
> static u32 gp_select_base;
> +static u32 global_status_msr;
> +static u32 global_ctl_msr;
> +static u32 global_status_clr_msr;
What do you think about naming these like MSR #defines? E.g.
MSR_PERF_GLOBAL_CTRL
MSR_PERF_GLOBAL_STATUS
MSR_PERF_GLOBAL_STATUS_CLR
There's a little risk of causing confusing, but I think it would make the code
easier to read.
> static unsigned int gp_events_size;
> static unsigned int nr_gp_counters;
>
> @@ -150,8 +153,7 @@ static void global_enable(pmu_counter_t *cnt)
> return;
>
> cnt->idx = event_to_global_idx(cnt);
> - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) |
> - (1ull << cnt->idx));
> + wrmsr(global_ctl_msr, rdmsr(global_ctl_msr) | (1ull << cnt->idx));
Opportunistically use BIT_ULL().
> }
>
> static void global_disable(pmu_counter_t *cnt)
> @@ -159,8 +161,7 @@ static void global_disable(pmu_counter_t *cnt)
> if (pmu_version() < 2)
> return;
>
> - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) &
> - ~(1ull << cnt->idx));
> + wrmsr(global_ctl_msr, rdmsr(global_ctl_msr) & ~(1ull << cnt->idx));
BIT_ULL()
> }
>
> static inline uint32_t get_gp_counter_msr(unsigned int i)
> @@ -326,6 +327,23 @@ static void check_counters_many(void)
> report(i == n, "all counters");
> }
>
> +static bool is_the_count_reproducible(pmu_counter_t *cnt)
> +{
> + unsigned int i;
> + uint64_t count;
> +
> + __measure(cnt, 0);
> + count = cnt->count;
> +
> + for (i = 0; i < 10; i++) {
> + __measure(cnt, 0);
> + if (count != cnt->count)
> + return false;
> + }
> +
> + return true;
> +}
> +
> static void check_counter_overflow(void)
> {
> uint64_t count;
> @@ -334,13 +352,14 @@ static void check_counter_overflow(void)
> .ctr = gp_counter_base,
> .config = EVNTSEL_OS | EVNTSEL_USR | (*gp_events)[1].unit_sel /* instructions */,
> };
> + bool precise_event = is_the_count_reproducible(&cnt);
> +
> __measure(&cnt, 0);
> count = cnt.count;
>
> /* clear status before test */
> if (pmu_version() > 1) {
Please provide helper(s) to replace the myriad open coded "pmu_version() > ???"
checks. E.g. this one appears to be:
if (this_cpu_has_perf_global_status_clr())
I obviously don't care about the verbosity, it's that people like me might not
know what the PMU version has to do with an MSR being accessible.
On 6/10/2022 5:36 am, Sean Christopherson wrote:
> On Mon, Sep 05, 2022, Like Xu wrote:
>> From: Like Xu <[email protected]>
>>
>> Specifying an unsupported PMC encoding will cause a #GP(0).
>> All testcases should be passed when the KVM_FEP prefix is added.
>>
>> Signed-off-by: Like Xu <[email protected]>
>> ---
>> lib/x86/processor.h | 5 ++++-
>> x86/pmu.c | 13 +++++++++++++
>> 2 files changed, 17 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/x86/processor.h b/lib/x86/processor.h
>> index 10bca27..9c490d9 100644
>> --- a/lib/x86/processor.h
>> +++ b/lib/x86/processor.h
>> @@ -441,7 +441,10 @@ static inline int wrmsr_safe(u32 index, u64 val)
>> static inline uint64_t rdpmc(uint32_t index)
>> {
>> uint32_t a, d;
>> - asm volatile ("rdpmc" : "=a"(a), "=d"(d) : "c"(index));
>> + if (is_fep_available())
>> + asm volatile (KVM_FEP "rdpmc" : "=a"(a), "=d"(d) : "c"(index));
>> + else
>> + asm volatile ("rdpmc" : "=a"(a), "=d"(d) : "c"(index));
>
> Hmm, not sure how I feel about the idea of always use FEP in a common helper when
> it's available. Part of me likes the idea, but part of me is worried that it
> will cause confusion due to not being explicit.
>
> Unless there's a pressing need to force emulation, let's punt the FEP stuff for
> now. More below.
Some security researchers are very interested in these corners.
To my limited testing, most KVM emulation code (at least arch/x86/kvm/emulate.c)
are not
adequately covered by test cases, and perhaps some will move them to the user space.
>
>> return a | ((uint64_t)d << 32);
>> }
>>
>> diff --git a/x86/pmu.c b/x86/pmu.c
>> index 203a9d4..11607c0 100644
>> --- a/x86/pmu.c
>> +++ b/x86/pmu.c
>> @@ -758,12 +758,25 @@ static bool pmu_is_detected(void)
>> return detect_intel_pmu();
>> }
>>
>> +static void rdpmc_unsupported_counter(void *data)
>> +{
>> + rdpmc(64);
>> +}
>> +
>> +static void check_rdpmc_cause_gp(void)
>
> Maybe check_invalid_rdpmc_gp()? There are multiple reasons RDPMC can #GP, the
> one that is being relied on to guarantee #GP is specifically that the PMC is
> invalid.
Applied.
> dd
p, :D
>
>> +{
>> + report(test_for_exception(GP_VECTOR, rdpmc_unsupported_counter, NULL),
>
> I'd really like to move away from test_for_exception() and use ASM_TRY(). Ignoring
> FEP for the moment, the most extensible solution is to provide a safe variant:
>
> static inline int rdpmc_safe(u32 index, uint64_t *val)
> {
> uint32_t a, d;
>
> asm volatile (ASM_TRY("1f")
> "rdpmc"
> : "=a"(a), "=d"(d) : "c"(index));
asm volatile (ASM_TRY("1f")
"rdpmc\n\t"
"1:"
: "=a"(a), "=d"(d) : "c"(index) : "memory");
, otherwise the compiler will complain.
> *val = (uint64_t)a | ((uint64_t)d << 32);
> return exception_vector();
> }
>
> static inline uint64_t rdpmc(uint32_t index)
> {
> uint64_t val;
> int vector = rdpmc_safe(index, &val);
>
> assert_msg(!vector, "Unexpected %s on RDPMC(%d)",
> exception_mnemonic(vector), index);
> return val;
> }
Applied.
>
>
> For long-term emulation validation, the best idea I have at this point is to do
> add a config knob to opt-in to using FEP in _all_ common helpers (where "all"
> means everything KVM actually emulates). It'd take some macro magic, but it'd
> be easier to maintain (no need to have two paths in every helper) and would be
> controllable.
With both hands up in favour. Leave it to you, as this involves a wider change.
>
>> + "rdpmc with invalid PMC index raises #GP");
>> +}
>> +
>> int main(int ac, char **av)
>> {
>> setup_vm();
>> handle_irq(PC_VECTOR, cnt_overflow);
>> buf = malloc(N*64);
>>
>> + check_rdpmc_cause_gp();
>> +
>> if (!pmu_is_detected())
>> return report_summary();
>>
>> --
>> 2.37.3
>>
All applied, thanks.
On 6/10/2022 6:08 am, Sean Christopherson wrote:
> Can you provide a single series for all of the KVM-Unit-Tests PMU work (separate
> from the KVM patches)? Ya, it'll be big and is a blatant violation of "do one
> thing", but trying to manually handle the dependencies on the review side is time
> consuming.
Thanks for your time. PMU test cases will be combined, if no new ideas emerge.
>
> One thought to help keep track of dependencies between KVM and KUT would be to
> add dummy commits between each sub-series, with the dummy commit containing a lore
> link to the relevant KVM patches/series. That would allow throwing everything into
> one series without losing track of things. Hopefully.
Sure, adding a lore link to a cover letter is always helpful. It seems that the
ageing KVM project
has moved to a test-driven approach to development, and any new developer should
be aware
of this rule.
>
> On Mon, Sep 05, 2022, Like Xu wrote:
>> diff --git a/lib/x86/processor.h b/lib/x86/processor.h
>> index 9c490d9..b9592c4 100644
>> --- a/lib/x86/processor.h
>> +++ b/lib/x86/processor.h
>> @@ -796,8 +796,12 @@ static inline void flush_tlb(void)
>>
>> static inline u8 pmu_version(void)
>> {
>> - if (!is_intel())
>> + if (!is_intel()) {
>> + /* Performance Monitoring Version 2 Supported */
>> + if (cpuid(0x80000022).a & 0x1)
>
> Add an X86_FEATURE_*, that way this is self-documenting.
>
>> + return 2;
>> return 0;
>> + }
>>
>> return cpuid(10).a & 0xff;
>> }
>> @@ -824,6 +828,9 @@ static inline u8 pmu_nr_gp_counters(void)
>> {
>> if (is_intel()) {
>> return (cpuid(10).a >> 8) & 0xff;
>> + } else if (this_cpu_has_perf_global_ctrl()) {
>
> Eww. Took me too long to connect the dots to understand how this guarantees that
> leaf 0x80000022 is available. With an X86_FEATURE_* this can simply be:
>
> } else if (this_cpu_has(X86_FEATURE_AMD_PMU_V2) {
>
> or whatever name is appropriate.
>
>> + /* Number of Core Performance Counters. */
>> + return cpuid(0x80000022).b & 0xf;
>> } else if (!has_amd_perfctr_core()) {
>> return AMD64_NUM_COUNTERS;
>> }
>> diff --git a/x86/pmu.c b/x86/pmu.c
>> index 11607c0..6d5363b 100644
>> --- a/x86/pmu.c
>> +++ b/x86/pmu.c
>> @@ -72,6 +72,9 @@ struct pmu_event {
>> #define PMU_CAP_FW_WRITES (1ULL << 13)
>> static u32 gp_counter_base;
>> static u32 gp_select_base;
>> +static u32 global_status_msr;
>> +static u32 global_ctl_msr;
>> +static u32 global_status_clr_msr;
>
> What do you think about naming these like MSR #defines? E.g.
>
> MSR_PERF_GLOBAL_CTRL
> MSR_PERF_GLOBAL_STATUS
> MSR_PERF_GLOBAL_STATUS_CLR
>
> There's a little risk of causing confusing, but I think it would make the code
> easier to read.
Lowercase variable names are applied.
>
>> static unsigned int gp_events_size;
>> static unsigned int nr_gp_counters;
>>
>> @@ -150,8 +153,7 @@ static void global_enable(pmu_counter_t *cnt)
>> return;
>>
>> cnt->idx = event_to_global_idx(cnt);
>> - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) |
>> - (1ull << cnt->idx));
>> + wrmsr(global_ctl_msr, rdmsr(global_ctl_msr) | (1ull << cnt->idx));
>
> Opportunistically use BIT_ULL().
>
>> }
>>
>> static void global_disable(pmu_counter_t *cnt)
>> @@ -159,8 +161,7 @@ static void global_disable(pmu_counter_t *cnt)
>> if (pmu_version() < 2)
>> return;
>>
>> - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) &
>> - ~(1ull << cnt->idx));
>> + wrmsr(global_ctl_msr, rdmsr(global_ctl_msr) & ~(1ull << cnt->idx));
>
> BIT_ULL()
>
>> }
>>
>> static inline uint32_t get_gp_counter_msr(unsigned int i)
>> @@ -326,6 +327,23 @@ static void check_counters_many(void)
>> report(i == n, "all counters");
>> }
>>
>> +static bool is_the_count_reproducible(pmu_counter_t *cnt)
>> +{
>> + unsigned int i;
>> + uint64_t count;
>> +
>> + __measure(cnt, 0);
>> + count = cnt->count;
>> +
>> + for (i = 0; i < 10; i++) {
>> + __measure(cnt, 0);
>> + if (count != cnt->count)
>> + return false;
>> + }
>> +
>> + return true;
>> +}
>> +
>> static void check_counter_overflow(void)
>> {
>> uint64_t count;
>> @@ -334,13 +352,14 @@ static void check_counter_overflow(void)
>> .ctr = gp_counter_base,
>> .config = EVNTSEL_OS | EVNTSEL_USR | (*gp_events)[1].unit_sel /* instructions */,
>> };
>> + bool precise_event = is_the_count_reproducible(&cnt);
>> +
>> __measure(&cnt, 0);
>> count = cnt.count;
>>
>> /* clear status before test */
>> if (pmu_version() > 1) {
>
> Please provide helper(s) to replace the myriad open coded "pmu_version() > ???"
> checks. E.g. this one appears to be:
>
> if (this_cpu_has_perf_global_status_clr())
>
> I obviously don't care about the verbosity, it's that people like me might not
> know what the PMU version has to do with an MSR being accessible.