2022-07-12 00:08:59

by Sean Christopherson

[permalink] [raw]
Subject: [PATCH v2] KVM: x86: Add dedicated helper to get CPUID entry with significant index

Add a second CPUID helper, kvm_find_cpuid_entry_index(), to handle KVM
queries for CPUID leaves whose index _may_ be significant, and drop the
index param from the existing kvm_find_cpuid_entry(). Add a WARN in the
inner helper, cpuid_entry2_find(), to detect attempts to retrieve a CPUID
entry whose index is significant without explicitly providing an index.

Using an explicit magic number and letting callers omit the index avoids
confusion by eliminating the myriad cases where KVM specifies '0' as a
dummy value.

Suggested-by: Paolo Bonzini <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
---

v2: Rebased to kvm/queue.

v1: https://lore.kernel.org/all/[email protected]

arch/x86/kvm/cpuid.c | 71 ++++++++++++++++++++++++++----------
arch/x86/kvm/cpuid.h | 16 ++++----
arch/x86/kvm/hyperv.c | 8 ++--
arch/x86/kvm/svm/svm.c | 2 +-
arch/x86/kvm/vmx/pmu_intel.c | 4 +-
arch/x86/kvm/vmx/sgx.c | 8 ++--
arch/x86/kvm/vmx/vmx.c | 6 +--
arch/x86/kvm/x86.c | 2 +-
8 files changed, 75 insertions(+), 42 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index d47222ab8e6e..10247528b59b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -67,9 +67,17 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
#define F feature_bit
#define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0)

+/*
+ * Magic value used by KVM when querying userspace-provided CPUID entries and
+ * doesn't care about the CPIUD index because the index of the function in
+ * question is not significant. Note, this magic value must have at least one
+ * bit set in bits[63:32] and must be consumed as a u64 by cpuid_entry2_find()
+ * to avoid false positives when processing guest CPUID input.
+ */
+#define KVM_CPUID_INDEX_NOT_SIGNIFICANT -1ull

static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
- struct kvm_cpuid_entry2 *entries, int nent, u32 function, u32 index)
+ struct kvm_cpuid_entry2 *entries, int nent, u32 function, u64 index)
{
struct kvm_cpuid_entry2 *e;
int i;
@@ -77,9 +85,22 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
for (i = 0; i < nent; i++) {
e = &entries[i];

- if (e->function == function &&
- (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index))
+ if (e->function != function)
+ continue;
+
+ /*
+ * If the index isn't significant, use the first entry with a
+ * matching function. It's userspace's responsibilty to not
+ * provide "duplicate" entries in all cases.
+ */
+ if (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index)
return e;
+
+ /*
+ * Function matches and index is significant; not specifying an
+ * exact index in this case is a KVM bug.
+ */
+ WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT);
}

return NULL;
@@ -96,7 +117,8 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
* The existing code assumes virtual address is 48-bit or 57-bit in the
* canonical address checks; exit if it is ever changed.
*/
- best = cpuid_entry2_find(entries, nent, 0x80000008, 0);
+ best = cpuid_entry2_find(entries, nent, 0x80000008,
+ KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best) {
int vaddr_bits = (best->eax & 0xff00) >> 8;

@@ -151,7 +173,7 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
vcpu->arch.kvm_cpuid_base = 0;

for_each_possible_hypervisor_cpuid_base(function) {
- entry = kvm_find_cpuid_entry(vcpu, function, 0);
+ entry = kvm_find_cpuid_entry(vcpu, function);

if (entry) {
u32 signature[3];
@@ -177,7 +199,8 @@ static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *v
if (!base)
return NULL;

- return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, 0);
+ return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES,
+ KVM_CPUID_INDEX_NOT_SIGNIFICANT);
}

static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
@@ -219,7 +242,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
struct kvm_cpuid_entry2 *best;
u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);

- best = cpuid_entry2_find(entries, nent, 1, 0);
+ best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best) {
/* Update OSXSAVE bit */
if (boot_cpu_has(X86_FEATURE_XSAVE))
@@ -250,7 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);

if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
- best = cpuid_entry2_find(entries, nent, 0x1, 0);
+ best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best)
cpuid_entry_change(best, X86_FEATURE_MWAIT,
vcpu->arch.ia32_misc_enable_msr &
@@ -285,7 +308,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
struct kvm_cpuid_entry2 *best;
u64 guest_supported_xcr0;

- best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ best = kvm_find_cpuid_entry(vcpu, 1);
if (best && apic) {
if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
apic->lapic_timer.timer_mode_mask = 3 << 17;
@@ -325,10 +348,10 @@ int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;

- best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
+ best = kvm_find_cpuid_entry(vcpu, 0x80000000);
if (!best || best->eax < 0x80000008)
goto not_found;
- best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008);
if (best)
return best->eax & 0xff;
not_found:
@@ -1302,12 +1325,20 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
return r;
}

-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
- u32 function, u32 index)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
+ u32 function, u32 index)
{
return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
function, index);
}
+EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry_index);
+
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function)
+{
+ return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
+ function, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+}
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);

/*
@@ -1344,7 +1375,7 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
struct kvm_cpuid_entry2 *basic, *class;
u32 function = *fn_ptr;

- basic = kvm_find_cpuid_entry(vcpu, 0, 0);
+ basic = kvm_find_cpuid_entry(vcpu, 0);
if (!basic)
return NULL;

@@ -1353,11 +1384,11 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
return NULL;

if (function >= 0x40000000 && function <= 0x4fffffff)
- class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00, 0);
+ class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00);
else if (function >= 0xc0000000)
- class = kvm_find_cpuid_entry(vcpu, 0xc0000000, 0);
+ class = kvm_find_cpuid_entry(vcpu, 0xc0000000);
else
- class = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
+ class = kvm_find_cpuid_entry(vcpu, function & 0x80000000);

if (class && function <= class->eax)
return NULL;
@@ -1375,7 +1406,7 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
* the effective CPUID entry is the max basic leaf. Note, the index of
* the original requested leaf is observed!
*/
- return kvm_find_cpuid_entry(vcpu, basic->eax, index);
+ return kvm_find_cpuid_entry_index(vcpu, basic->eax, index);
}

bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
@@ -1385,7 +1416,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
struct kvm_cpuid_entry2 *entry;
bool exact, used_max_basic = false;

- entry = kvm_find_cpuid_entry(vcpu, function, index);
+ entry = kvm_find_cpuid_entry_index(vcpu, function, index);
exact = !!entry;

if (!entry && !exact_only) {
@@ -1414,7 +1445,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
* exists. EDX can be copied from any existing index.
*/
if (function == 0xb || function == 0x1f) {
- entry = kvm_find_cpuid_entry(vcpu, function, 1);
+ entry = kvm_find_cpuid_entry_index(vcpu, function, 1);
if (entry) {
*ecx = index & 0xff;
*edx = entry->edx;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index ac72aabba981..b1658c0de847 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -13,8 +13,10 @@ void kvm_set_cpu_caps(void);

void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
+ u32 function, u32 index);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
- u32 function, u32 index);
+ u32 function);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries,
unsigned int type);
@@ -76,7 +78,7 @@ static __always_inline u32 *guest_cpuid_get_register(struct kvm_vcpu *vcpu,
const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
struct kvm_cpuid_entry2 *entry;

- entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index);
+ entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index);
if (!entry)
return NULL;

@@ -109,7 +111,7 @@ static inline bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;

- best = kvm_find_cpuid_entry(vcpu, 0, 0);
+ best = kvm_find_cpuid_entry(vcpu, 0);
return best &&
(is_guest_vendor_amd(best->ebx, best->ecx, best->edx) ||
is_guest_vendor_hygon(best->ebx, best->ecx, best->edx));
@@ -119,7 +121,7 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;

- best = kvm_find_cpuid_entry(vcpu, 0, 0);
+ best = kvm_find_cpuid_entry(vcpu, 0);
return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
}

@@ -127,7 +129,7 @@ static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;

- best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ best = kvm_find_cpuid_entry(vcpu, 0x1);
if (!best)
return -1;

@@ -138,7 +140,7 @@ static inline int guest_cpuid_model(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;

- best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ best = kvm_find_cpuid_entry(vcpu, 0x1);
if (!best)
return -1;

@@ -154,7 +156,7 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;

- best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ best = kvm_find_cpuid_entry(vcpu, 0x1);
if (!best)
return -1;

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index e2e95a6fccfd..ed804447589c 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1992,7 +1992,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
struct kvm_cpuid_entry2 *entry;
struct kvm_vcpu_hv *hv_vcpu;

- entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0);
+ entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE);
if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) {
vcpu->arch.hyperv_enabled = true;
} else {
@@ -2005,7 +2005,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)

hv_vcpu = to_hv_vcpu(vcpu);

- entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES, 0);
+ entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
if (entry) {
hv_vcpu->cpuid_cache.features_eax = entry->eax;
hv_vcpu->cpuid_cache.features_ebx = entry->ebx;
@@ -2016,7 +2016,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
hv_vcpu->cpuid_cache.features_edx = 0;
}

- entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO, 0);
+ entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
if (entry) {
hv_vcpu->cpuid_cache.enlightenments_eax = entry->eax;
hv_vcpu->cpuid_cache.enlightenments_ebx = entry->ebx;
@@ -2025,7 +2025,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
hv_vcpu->cpuid_cache.enlightenments_ebx = 0;
}

- entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0);
+ entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
if (entry)
hv_vcpu->cpuid_cache.syndbg_cap_eax = entry->eax;
else
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 37ce061dfc76..d2fa008b04ad 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4193,7 +4193,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)

/* For sev guests, the memory encryption bit is not reserved in CR3. */
if (sev_guest(vcpu->kvm)) {
- best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);
+ best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
if (best)
vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
}
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 53ccba896e77..4bc098fbec31 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -531,7 +531,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;

- entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+ entry = kvm_find_cpuid_entry(vcpu, 0xa);
if (!entry || !vcpu->kvm->arch.enable_pmu)
return;
eax.full = entry->eax;
@@ -577,7 +577,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->global_ovf_ctrl_mask &=
~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;

- entry = kvm_find_cpuid_entry(vcpu, 7, 0);
+ entry = kvm_find_cpuid_entry_index(vcpu, 7, 0);
if (entry &&
(boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
(entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) {
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index 35e7ec91ae86..d1cc7244bede 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -148,8 +148,8 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
u8 max_size_log2;
int trapnr, ret;

- sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
- sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
+ sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
+ sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
if (!sgx_12_0 || !sgx_12_1) {
kvm_prepare_emulation_failure_exit(vcpu);
return 0;
@@ -431,7 +431,7 @@ static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
if (!vcpu->kvm->arch.sgx_provisioning_allowed)
return true;

- guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 0);
+ guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
if (!guest_cpuid)
return true;

@@ -439,7 +439,7 @@ static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
return true;

- guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 1);
+ guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
if (!guest_cpuid)
return true;

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c30115b9cb33..74ca64e97643 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7428,7 +7428,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
} while (0)

- entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ entry = kvm_find_cpuid_entry(vcpu, 0x1);
cr4_fixed1_update(X86_CR4_VME, edx, feature_bit(VME));
cr4_fixed1_update(X86_CR4_PVI, edx, feature_bit(VME));
cr4_fixed1_update(X86_CR4_TSD, edx, feature_bit(TSC));
@@ -7444,7 +7444,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
cr4_fixed1_update(X86_CR4_PCIDE, ecx, feature_bit(PCID));
cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, feature_bit(XSAVE));

- entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
+ entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 0);
cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, feature_bit(FSGSBASE));
cr4_fixed1_update(X86_CR4_SMEP, ebx, feature_bit(SMEP));
cr4_fixed1_update(X86_CR4_SMAP, ebx, feature_bit(SMAP));
@@ -7479,7 +7479,7 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
int i;

for (i = 0; i < PT_CPUID_LEAVES; i++) {
- best = kvm_find_cpuid_entry(vcpu, 0x14, i);
+ best = kvm_find_cpuid_entry_index(vcpu, 0x14, i);
if (!best)
return;
vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 567d13405445..329875d2ccf2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11731,7 +11731,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
* i.e. it's impossible for kvm_find_cpuid_entry() to find a valid entry
* on RESET. But, go through the motions in case that's ever remedied.
*/
- cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0);
+ cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1);
kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);

static_call(kvm_x86_vcpu_reset)(vcpu, init_event);

base-commit: b9b71f43683ae9d76b0989249607bbe8c9eb6c5c
--
2.37.0.144.g8ac04bfd2-goog


2022-07-12 07:06:24

by Xiaoyao Li

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: x86: Add dedicated helper to get CPUID entry with significant index

On 7/12/2022 8:06 AM, Sean Christopherson wrote:
> Add a second CPUID helper, kvm_find_cpuid_entry_index(), to handle KVM
> queries for CPUID leaves whose index _may_ be significant, and drop the
> index param from the existing kvm_find_cpuid_entry(). Add a WARN in the
> inner helper, cpuid_entry2_find(), to detect attempts to retrieve a CPUID
> entry whose index is significant without explicitly providing an index.
>
> Using an explicit magic number and letting callers omit the index avoids
> confusion by eliminating the myriad cases where KVM specifies '0' as a
> dummy value.
>

Reviewed-by: Xiaoyao Li <[email protected]>

> Suggested-by: Paolo Bonzini <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
>
> v2: Rebased to kvm/queue.
>
> v1: https://lore.kernel.org/all/[email protected]
>
> arch/x86/kvm/cpuid.c | 71 ++++++++++++++++++++++++++----------
> arch/x86/kvm/cpuid.h | 16 ++++----
> arch/x86/kvm/hyperv.c | 8 ++--
> arch/x86/kvm/svm/svm.c | 2 +-
> arch/x86/kvm/vmx/pmu_intel.c | 4 +-
> arch/x86/kvm/vmx/sgx.c | 8 ++--
> arch/x86/kvm/vmx/vmx.c | 6 +--
> arch/x86/kvm/x86.c | 2 +-
> 8 files changed, 75 insertions(+), 42 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index d47222ab8e6e..10247528b59b 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -67,9 +67,17 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
> #define F feature_bit
> #define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0)
>
> +/*
> + * Magic value used by KVM when querying userspace-provided CPUID entries and
> + * doesn't care about the CPIUD index because the index of the function in
> + * question is not significant. Note, this magic value must have at least one
> + * bit set in bits[63:32] and must be consumed as a u64 by cpuid_entry2_find()
> + * to avoid false positives when processing guest CPUID input.
> + */
> +#define KVM_CPUID_INDEX_NOT_SIGNIFICANT -1ull
>
> static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
> - struct kvm_cpuid_entry2 *entries, int nent, u32 function, u32 index)
> + struct kvm_cpuid_entry2 *entries, int nent, u32 function, u64 index)
> {
> struct kvm_cpuid_entry2 *e;
> int i;
> @@ -77,9 +85,22 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
> for (i = 0; i < nent; i++) {
> e = &entries[i];
>
> - if (e->function == function &&
> - (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index))
> + if (e->function != function)
> + continue;
> +
> + /*
> + * If the index isn't significant, use the first entry with a
> + * matching function. It's userspace's responsibilty to not
> + * provide "duplicate" entries in all cases.
> + */
> + if (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index)
> return e;
> +
> + /*
> + * Function matches and index is significant; not specifying an
> + * exact index in this case is a KVM bug.
> + */
> + WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> }
>
> return NULL;
> @@ -96,7 +117,8 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
> * The existing code assumes virtual address is 48-bit or 57-bit in the
> * canonical address checks; exit if it is ever changed.
> */
> - best = cpuid_entry2_find(entries, nent, 0x80000008, 0);
> + best = cpuid_entry2_find(entries, nent, 0x80000008,
> + KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> if (best) {
> int vaddr_bits = (best->eax & 0xff00) >> 8;
>
> @@ -151,7 +173,7 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
> vcpu->arch.kvm_cpuid_base = 0;
>
> for_each_possible_hypervisor_cpuid_base(function) {
> - entry = kvm_find_cpuid_entry(vcpu, function, 0);
> + entry = kvm_find_cpuid_entry(vcpu, function);
>
> if (entry) {
> u32 signature[3];
> @@ -177,7 +199,8 @@ static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *v
> if (!base)
> return NULL;
>
> - return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, 0);
> + return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES,
> + KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> }
>
> static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
> @@ -219,7 +242,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
> struct kvm_cpuid_entry2 *best;
> u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
>
> - best = cpuid_entry2_find(entries, nent, 1, 0);
> + best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> if (best) {
> /* Update OSXSAVE bit */
> if (boot_cpu_has(X86_FEATURE_XSAVE))
> @@ -250,7 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
> best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
>
> if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
> - best = cpuid_entry2_find(entries, nent, 0x1, 0);
> + best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> if (best)
> cpuid_entry_change(best, X86_FEATURE_MWAIT,
> vcpu->arch.ia32_misc_enable_msr &
> @@ -285,7 +308,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> struct kvm_cpuid_entry2 *best;
> u64 guest_supported_xcr0;
>
> - best = kvm_find_cpuid_entry(vcpu, 1, 0);
> + best = kvm_find_cpuid_entry(vcpu, 1);
> if (best && apic) {
> if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
> apic->lapic_timer.timer_mode_mask = 3 << 17;
> @@ -325,10 +348,10 @@ int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
> {
> struct kvm_cpuid_entry2 *best;
>
> - best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
> + best = kvm_find_cpuid_entry(vcpu, 0x80000000);
> if (!best || best->eax < 0x80000008)
> goto not_found;
> - best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
> + best = kvm_find_cpuid_entry(vcpu, 0x80000008);
> if (best)
> return best->eax & 0xff;
> not_found:
> @@ -1302,12 +1325,20 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
> return r;
> }
>
> -struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
> - u32 function, u32 index)
> +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
> + u32 function, u32 index)
> {
> return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
> function, index);
> }
> +EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry_index);
> +
> +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
> + u32 function)
> +{
> + return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
> + function, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> +}
> EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
>
> /*
> @@ -1344,7 +1375,7 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
> struct kvm_cpuid_entry2 *basic, *class;
> u32 function = *fn_ptr;
>
> - basic = kvm_find_cpuid_entry(vcpu, 0, 0);
> + basic = kvm_find_cpuid_entry(vcpu, 0);
> if (!basic)
> return NULL;
>
> @@ -1353,11 +1384,11 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
> return NULL;
>
> if (function >= 0x40000000 && function <= 0x4fffffff)
> - class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00, 0);
> + class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00);
> else if (function >= 0xc0000000)
> - class = kvm_find_cpuid_entry(vcpu, 0xc0000000, 0);
> + class = kvm_find_cpuid_entry(vcpu, 0xc0000000);
> else
> - class = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
> + class = kvm_find_cpuid_entry(vcpu, function & 0x80000000);
>
> if (class && function <= class->eax)
> return NULL;
> @@ -1375,7 +1406,7 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
> * the effective CPUID entry is the max basic leaf. Note, the index of
> * the original requested leaf is observed!
> */
> - return kvm_find_cpuid_entry(vcpu, basic->eax, index);
> + return kvm_find_cpuid_entry_index(vcpu, basic->eax, index);
> }
>
> bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
> @@ -1385,7 +1416,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
> struct kvm_cpuid_entry2 *entry;
> bool exact, used_max_basic = false;
>
> - entry = kvm_find_cpuid_entry(vcpu, function, index);
> + entry = kvm_find_cpuid_entry_index(vcpu, function, index);
> exact = !!entry;
>
> if (!entry && !exact_only) {
> @@ -1414,7 +1445,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
> * exists. EDX can be copied from any existing index.
> */
> if (function == 0xb || function == 0x1f) {
> - entry = kvm_find_cpuid_entry(vcpu, function, 1);
> + entry = kvm_find_cpuid_entry_index(vcpu, function, 1);
> if (entry) {
> *ecx = index & 0xff;
> *edx = entry->edx;
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index ac72aabba981..b1658c0de847 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -13,8 +13,10 @@ void kvm_set_cpu_caps(void);
>
> void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
> void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);
> +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
> + u32 function, u32 index);
> struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
> - u32 function, u32 index);
> + u32 function);
> int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
> struct kvm_cpuid_entry2 __user *entries,
> unsigned int type);
> @@ -76,7 +78,7 @@ static __always_inline u32 *guest_cpuid_get_register(struct kvm_vcpu *vcpu,
> const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
> struct kvm_cpuid_entry2 *entry;
>
> - entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index);
> + entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index);
> if (!entry)
> return NULL;
>
> @@ -109,7 +111,7 @@ static inline bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
> {
> struct kvm_cpuid_entry2 *best;
>
> - best = kvm_find_cpuid_entry(vcpu, 0, 0);
> + best = kvm_find_cpuid_entry(vcpu, 0);
> return best &&
> (is_guest_vendor_amd(best->ebx, best->ecx, best->edx) ||
> is_guest_vendor_hygon(best->ebx, best->ecx, best->edx));
> @@ -119,7 +121,7 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
> {
> struct kvm_cpuid_entry2 *best;
>
> - best = kvm_find_cpuid_entry(vcpu, 0, 0);
> + best = kvm_find_cpuid_entry(vcpu, 0);
> return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
> }
>
> @@ -127,7 +129,7 @@ static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
> {
> struct kvm_cpuid_entry2 *best;
>
> - best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> + best = kvm_find_cpuid_entry(vcpu, 0x1);
> if (!best)
> return -1;
>
> @@ -138,7 +140,7 @@ static inline int guest_cpuid_model(struct kvm_vcpu *vcpu)
> {
> struct kvm_cpuid_entry2 *best;
>
> - best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> + best = kvm_find_cpuid_entry(vcpu, 0x1);
> if (!best)
> return -1;
>
> @@ -154,7 +156,7 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
> {
> struct kvm_cpuid_entry2 *best;
>
> - best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> + best = kvm_find_cpuid_entry(vcpu, 0x1);
> if (!best)
> return -1;
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index e2e95a6fccfd..ed804447589c 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1992,7 +1992,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
> struct kvm_cpuid_entry2 *entry;
> struct kvm_vcpu_hv *hv_vcpu;
>
> - entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0);
> + entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE);
> if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) {
> vcpu->arch.hyperv_enabled = true;
> } else {
> @@ -2005,7 +2005,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
>
> hv_vcpu = to_hv_vcpu(vcpu);
>
> - entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES, 0);
> + entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
> if (entry) {
> hv_vcpu->cpuid_cache.features_eax = entry->eax;
> hv_vcpu->cpuid_cache.features_ebx = entry->ebx;
> @@ -2016,7 +2016,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
> hv_vcpu->cpuid_cache.features_edx = 0;
> }
>
> - entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO, 0);
> + entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
> if (entry) {
> hv_vcpu->cpuid_cache.enlightenments_eax = entry->eax;
> hv_vcpu->cpuid_cache.enlightenments_ebx = entry->ebx;
> @@ -2025,7 +2025,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
> hv_vcpu->cpuid_cache.enlightenments_ebx = 0;
> }
>
> - entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0);
> + entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
> if (entry)
> hv_vcpu->cpuid_cache.syndbg_cap_eax = entry->eax;
> else
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 37ce061dfc76..d2fa008b04ad 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4193,7 +4193,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>
> /* For sev guests, the memory encryption bit is not reserved in CR3. */
> if (sev_guest(vcpu->kvm)) {
> - best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);
> + best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
> if (best)
> vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
> }
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 53ccba896e77..4bc098fbec31 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -531,7 +531,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
> pmu->pebs_enable_mask = ~0ull;
> pmu->pebs_data_cfg_mask = ~0ull;
>
> - entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
> + entry = kvm_find_cpuid_entry(vcpu, 0xa);
> if (!entry || !vcpu->kvm->arch.enable_pmu)
> return;
> eax.full = entry->eax;
> @@ -577,7 +577,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
> pmu->global_ovf_ctrl_mask &=
> ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
>
> - entry = kvm_find_cpuid_entry(vcpu, 7, 0);
> + entry = kvm_find_cpuid_entry_index(vcpu, 7, 0);
> if (entry &&
> (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
> (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) {
> diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
> index 35e7ec91ae86..d1cc7244bede 100644
> --- a/arch/x86/kvm/vmx/sgx.c
> +++ b/arch/x86/kvm/vmx/sgx.c
> @@ -148,8 +148,8 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
> u8 max_size_log2;
> int trapnr, ret;
>
> - sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
> - sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
> + sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
> + sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
> if (!sgx_12_0 || !sgx_12_1) {
> kvm_prepare_emulation_failure_exit(vcpu);
> return 0;
> @@ -431,7 +431,7 @@ static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
> if (!vcpu->kvm->arch.sgx_provisioning_allowed)
> return true;
>
> - guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 0);
> + guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
> if (!guest_cpuid)
> return true;
>
> @@ -439,7 +439,7 @@ static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
> if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
> return true;
>
> - guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 1);
> + guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
> if (!guest_cpuid)
> return true;
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index c30115b9cb33..74ca64e97643 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7428,7 +7428,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
> vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
> } while (0)
>
> - entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> + entry = kvm_find_cpuid_entry(vcpu, 0x1);
> cr4_fixed1_update(X86_CR4_VME, edx, feature_bit(VME));
> cr4_fixed1_update(X86_CR4_PVI, edx, feature_bit(VME));
> cr4_fixed1_update(X86_CR4_TSD, edx, feature_bit(TSC));
> @@ -7444,7 +7444,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
> cr4_fixed1_update(X86_CR4_PCIDE, ecx, feature_bit(PCID));
> cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, feature_bit(XSAVE));
>
> - entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> + entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 0);
> cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, feature_bit(FSGSBASE));
> cr4_fixed1_update(X86_CR4_SMEP, ebx, feature_bit(SMEP));
> cr4_fixed1_update(X86_CR4_SMAP, ebx, feature_bit(SMAP));
> @@ -7479,7 +7479,7 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
> int i;
>
> for (i = 0; i < PT_CPUID_LEAVES; i++) {
> - best = kvm_find_cpuid_entry(vcpu, 0x14, i);
> + best = kvm_find_cpuid_entry_index(vcpu, 0x14, i);
> if (!best)
> return;
> vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 567d13405445..329875d2ccf2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -11731,7 +11731,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> * i.e. it's impossible for kvm_find_cpuid_entry() to find a valid entry
> * on RESET. But, go through the motions in case that's ever remedied.
> */
> - cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0);
> + cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1);
> kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);
>
> static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
>
> base-commit: b9b71f43683ae9d76b0989249607bbe8c9eb6c5c

2022-07-12 15:57:33

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: x86: Add dedicated helper to get CPUID entry with significant index

On Tue, 2022-07-12 at 00:06 +0000, Sean Christopherson wrote:
> Add a second CPUID helper, kvm_find_cpuid_entry_index(), to handle KVM
> queries for CPUID leaves whose index _may_ be significant, and drop the
> index param from the existing kvm_find_cpuid_entry().  Add a WARN in the
> inner helper, cpuid_entry2_find(), to detect attempts to retrieve a CPUID
> entry whose index is significant without explicitly providing an index.
>
> Using an explicit magic number and letting callers omit the index avoids
> confusion by eliminating the myriad cases where KVM specifies '0' as a
> dummy value.
>
> Suggested-by: Paolo Bonzini <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
>
> v2: Rebased to kvm/queue.
>
> v1: https://lore.kernel.org/all/[email protected]
>
>  arch/x86/kvm/cpuid.c         | 71 ++++++++++++++++++++++++++----------
>  arch/x86/kvm/cpuid.h         | 16 ++++----
>  arch/x86/kvm/hyperv.c        |  8 ++--
>  arch/x86/kvm/svm/svm.c       |  2 +-
>  arch/x86/kvm/vmx/pmu_intel.c |  4 +-
>  arch/x86/kvm/vmx/sgx.c       |  8 ++--
>  arch/x86/kvm/vmx/vmx.c       |  6 +--
>  arch/x86/kvm/x86.c           |  2 +-
>  8 files changed, 75 insertions(+), 42 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index d47222ab8e6e..10247528b59b 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -67,9 +67,17 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
>  #define F feature_bit
>  #define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0)
>  
> +/*
> + * Magic value used by KVM when querying userspace-provided CPUID entries and
> + * doesn't care about the CPIUD index because the index of the function in
> + * question is not significant.  Note, this magic value must have at least one
> + * bit set in bits[63:32] and must be consumed as a u64 by cpuid_entry2_find()
> + * to avoid false positives when processing guest CPUID input.
> + */
> +#define KVM_CPUID_INDEX_NOT_SIGNIFICANT -1ull
>  
>  static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
> -       struct kvm_cpuid_entry2 *entries, int nent, u32 function, u32 index)
> +       struct kvm_cpuid_entry2 *entries, int nent, u32 function, u64 index)
How I wish that this would be just called EAX and ECX... Anyway....

>  {
>         struct kvm_cpuid_entry2 *e;
>         int i;
> @@ -77,9 +85,22 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
>         for (i = 0; i < nent; i++) {
>                 e = &entries[i];
>  
> -               if (e->function == function &&
> -                   (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index))
> +               if (e->function != function)
> +                       continue;
> +
> +               /*
> +                * If the index isn't significant, use the first entry with a
> +                * matching function.  It's userspace's responsibilty to not
> +                * provide "duplicate" entries in all cases.
> +                */
> +               if (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index)
>                         return e;
> +
> +               /*
> +                * Function matches and index is significant; not specifying an
> +                * exact index in this case is a KVM bug.
> +                */
Nitpick: Why KVM bug? Bad userspace can also provide a index-significant entry for cpuid
leaf for which index is not significant in the x86 spec.


We could arrange a table of all known leaves and for each leaf if it has an index
in the x86 spec, and warn/reject the userspace CPUID info if it doesn't match.



> +               WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT);
>         }
>  
>         return NULL;
> @@ -96,7 +117,8 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
>          * The existing code assumes virtual address is 48-bit or 57-bit in the
>          * canonical address checks; exit if it is ever changed.
>          */
> -       best = cpuid_entry2_find(entries, nent, 0x80000008, 0);
> +       best = cpuid_entry2_find(entries, nent, 0x80000008,
> +                                KVM_CPUID_INDEX_NOT_SIGNIFICANT);
OK.

>         if (best) {
>                 int vaddr_bits = (best->eax & 0xff00) >> 8;
>  
> @@ -151,7 +173,7 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
>         vcpu->arch.kvm_cpuid_base = 0;
>  
>         for_each_possible_hypervisor_cpuid_base(function) {
> -               entry = kvm_find_cpuid_entry(vcpu, function, 0);
> +               entry = kvm_find_cpuid_entry(vcpu, function);
KVM leaves have no index indeed.
TIL: I didn't knew that KVM leaves can be offset like that.

>  
>                 if (entry) {
>                         u32 signature[3];
> @@ -177,7 +199,8 @@ static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *v
>         if (!base)
>                 return NULL;
>  
> -       return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, 0);
> +       return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES,
> +                                KVM_CPUID_INDEX_NOT_SIGNIFICANT);
Same.
>  }
>  
>  static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
> @@ -219,7 +242,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
>         struct kvm_cpuid_entry2 *best;
>         u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
>  
> -       best = cpuid_entry2_find(entries, nent, 1, 0);
> +       best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);

Leaf 1, no index indeed.

Offtopic: I wonder why we call this 'best'?


>         if (best) {
>                 /* Update OSXSAVE bit */
>                 if (boot_cpu_has(X86_FEATURE_XSAVE))
> @@ -250,7 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
>                 best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
>  
>         if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
> -               best = cpuid_entry2_find(entries, nent, 0x1, 0);
> +               best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
>                 if (best)
>                         cpuid_entry_change(best, X86_FEATURE_MWAIT,
>                                            vcpu->arch.ia32_misc_enable_msr &
Leaf 1 again, OK.


> @@ -285,7 +308,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>         struct kvm_cpuid_entry2 *best;
>         u64 guest_supported_xcr0;
>  
> -       best = kvm_find_cpuid_entry(vcpu, 1, 0);
> +       best = kvm_find_cpuid_entry(vcpu, 1);
Leaf 1 again, OK.
>         if (best && apic) {
>                 if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
>                         apic->lapic_timer.timer_mode_mask = 3 << 17;
> @@ -325,10 +348,10 @@ int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
>  {
>         struct kvm_cpuid_entry2 *best;
>  
> -       best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
> +       best = kvm_find_cpuid_entry(vcpu, 0x80000000);

Leaf 0x80000000, OK.
>         if (!best || best->eax < 0x80000008)
>                 goto not_found;
> -       best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
> +       best = kvm_find_cpuid_entry(vcpu, 0x80000008);
Leaf 0x80000000, OK.

>         if (best)
>                 return best->eax & 0xff;
>  not_found:
> @@ -1302,12 +1325,20 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
>         return r;
>  }
>  
> -struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
> -                                             u32 function, u32 index)
> +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
> +                                                   u32 function, u32 index)
Nitpick: could you fix the indention while at it?
>  {
>         return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
>                                  function, index);
>  }
> +EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry_index);
> +
> +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
> +                                             u32 function)
> +{
> +       return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
> +                                function, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> +}
>  EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
>  
>  /*
> @@ -1344,7 +1375,7 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
>         struct kvm_cpuid_entry2 *basic, *class;
>         u32 function = *fn_ptr;
>  
> -       basic = kvm_find_cpuid_entry(vcpu, 0, 0);
> +       basic = kvm_find_cpuid_entry(vcpu, 0);
Leaf 0, OK.
>         if (!basic)
>                 return NULL;
>  
> @@ -1353,11 +1384,11 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
>                 return NULL;
>  
>         if (function >= 0x40000000 && function <= 0x4fffffff)
> -               class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00, 0);
> +               class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00);
>         else if (function >= 0xc0000000)
> -               class = kvm_find_cpuid_entry(vcpu, 0xc0000000, 0);
> +               class = kvm_find_cpuid_entry(vcpu, 0xc0000000);
>         else
> -               class = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
> +               class = kvm_find_cpuid_entry(vcpu, function & 0x80000000);
This assumes that all the classes has first entry whose EAX specifies max leaf
for this class. True for sure for basic and extended features, don't know
if true for hypervisor and Centaur entries. Seems OK.


>  
>         if (class && function <= class->eax)
>                 return NULL;
> @@ -1375,7 +1406,7 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
>          * the effective CPUID entry is the max basic leaf.  Note, the index of
>          * the original requested leaf is observed!
>          */
> -       return kvm_find_cpuid_entry(vcpu, basic->eax, index);
> +       return kvm_find_cpuid_entry_index(vcpu, basic->eax, index);
>  }
>  
>  bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
> @@ -1385,7 +1416,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>         struct kvm_cpuid_entry2 *entry;
>         bool exact, used_max_basic = false;
>  
> -       entry = kvm_find_cpuid_entry(vcpu, function, index);
> +       entry = kvm_find_cpuid_entry_index(vcpu, function, index);
>         exact = !!entry;
>  
>         if (!entry && !exact_only) {
> @@ -1414,7 +1445,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
>                  * exists. EDX can be copied from any existing index.
>                  */
>                 if (function == 0xb || function == 0x1f) {
> -                       entry = kvm_find_cpuid_entry(vcpu, function, 1);
> +                       entry = kvm_find_cpuid_entry_index(vcpu, function, 1);
>                         if (entry) {
>                                 *ecx = index & 0xff;
>                                 *edx = entry->edx;
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index ac72aabba981..b1658c0de847 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -13,8 +13,10 @@ void kvm_set_cpu_caps(void);
>  
>  void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
>  void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);
> +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
> +                                                   u32 function, u32 index);
>  struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
> -                                             u32 function, u32 index);
> +                                             u32 function);
>  int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
>                             struct kvm_cpuid_entry2 __user *entries,
>                             unsigned int type);
> @@ -76,7 +78,7 @@ static __always_inline u32 *guest_cpuid_get_register(struct kvm_vcpu *vcpu,
>         const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
>         struct kvm_cpuid_entry2 *entry;
>  
> -       entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index);
> +       entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index);
>         if (!entry)
>                 return NULL;
>  
> @@ -109,7 +111,7 @@ static inline bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
>  {
>         struct kvm_cpuid_entry2 *best;
>  
> -       best = kvm_find_cpuid_entry(vcpu, 0, 0);
> +       best = kvm_find_cpuid_entry(vcpu, 0);
OK
>         return best &&
>                (is_guest_vendor_amd(best->ebx, best->ecx, best->edx) ||
>                 is_guest_vendor_hygon(best->ebx, best->ecx, best->edx));
> @@ -119,7 +121,7 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
>  {
>         struct kvm_cpuid_entry2 *best;
>  
> -       best = kvm_find_cpuid_entry(vcpu, 0, 0);
OK
> +       best = kvm_find_cpuid_entry(vcpu, 0);
>         return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
>  }
>  
> @@ -127,7 +129,7 @@ static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
>  {
>         struct kvm_cpuid_entry2 *best;
>  
> -       best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
OK
> +       best = kvm_find_cpuid_entry(vcpu, 0x1);
>         if (!best)
>                 return -1;
>  
> @@ -138,7 +140,7 @@ static inline int guest_cpuid_model(struct kvm_vcpu *vcpu)
>  {
>         struct kvm_cpuid_entry2 *best;
>  
> -       best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
OK
> +       best = kvm_find_cpuid_entry(vcpu, 0x1);
>         if (!best)
>                 return -1;
>  
> @@ -154,7 +156,7 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
>  {
>         struct kvm_cpuid_entry2 *best;
>  
> -       best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
OK
> +       best = kvm_find_cpuid_entry(vcpu, 0x1);
>         if (!best)
>                 return -1;
>  
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index e2e95a6fccfd..ed804447589c 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1992,7 +1992,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
>         struct kvm_cpuid_entry2 *entry;
>         struct kvm_vcpu_hv *hv_vcpu;
>  
> -       entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0);
> +       entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE);

None of HV cpuids have index, so all look OK.

>         if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) {
>                 vcpu->arch.hyperv_enabled = true;
>         } else {
> @@ -2005,7 +2005,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
>  
>         hv_vcpu = to_hv_vcpu(vcpu);
>  
> -       entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES, 0);
> +       entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
>         if (entry) {
>                 hv_vcpu->cpuid_cache.features_eax = entry->eax;
>                 hv_vcpu->cpuid_cache.features_ebx = entry->ebx;
> @@ -2016,7 +2016,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
>                 hv_vcpu->cpuid_cache.features_edx = 0;
>         }
>  
> -       entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO, 0);
> +       entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
>         if (entry) {
>                 hv_vcpu->cpuid_cache.enlightenments_eax = entry->eax;
>                 hv_vcpu->cpuid_cache.enlightenments_ebx = entry->ebx;
> @@ -2025,7 +2025,7 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
>                 hv_vcpu->cpuid_cache.enlightenments_ebx = 0;
>         }
>  
> -       entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0);
> +       entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
>         if (entry)
>                 hv_vcpu->cpuid_cache.syndbg_cap_eax = entry->eax;
>         else
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 37ce061dfc76..d2fa008b04ad 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4193,7 +4193,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>  
>         /* For sev guests, the memory encryption bit is not reserved in CR3.  */
>         if (sev_guest(vcpu->kvm)) {
> -               best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);
> +               best = kvm_find_cpuid_entry(vcpu, 0x8000001F);

>                 if (best)
>                         vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
OK.

>         }
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 53ccba896e77..4bc098fbec31 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -531,7 +531,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
>         pmu->pebs_enable_mask = ~0ull;
>         pmu->pebs_data_cfg_mask = ~0ull;
>  
> -       entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
> +       entry = kvm_find_cpuid_entry(vcpu, 0xa);
OK
>         if (!entry || !vcpu->kvm->arch.enable_pmu)
>                 return;
>         eax.full = entry->eax;
> @@ -577,7 +577,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
>                 pmu->global_ovf_ctrl_mask &=
>                                 ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
>  
> -       entry = kvm_find_cpuid_entry(vcpu, 7, 0);
> +       entry = kvm_find_cpuid_entry_index(vcpu, 7, 0);
>         if (entry &&
>             (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
>             (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) {
> diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
> index 35e7ec91ae86..d1cc7244bede 100644
> --- a/arch/x86/kvm/vmx/sgx.c
> +++ b/arch/x86/kvm/vmx/sgx.c
> @@ -148,8 +148,8 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
>         u8 max_size_log2;
>         int trapnr, ret;
>  
> -       sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
> -       sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
> +       sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
> +       sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
>         if (!sgx_12_0 || !sgx_12_1) {
>                 kvm_prepare_emulation_failure_exit(vcpu);
>                 return 0;
> @@ -431,7 +431,7 @@ static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
>         if (!vcpu->kvm->arch.sgx_provisioning_allowed)
>                 return true;
>  
> -       guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 0);
> +       guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
>         if (!guest_cpuid)
>                 return true;
>  
> @@ -439,7 +439,7 @@ static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
>         if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
>                 return true;
>  
> -       guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 1);
> +       guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
>         if (!guest_cpuid)
>                 return true;
>  
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index c30115b9cb33..74ca64e97643 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7428,7 +7428,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
>                 vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask);     \
>  } while (0)
>  
> -       entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> +       entry = kvm_find_cpuid_entry(vcpu, 0x1);
OK.
>         cr4_fixed1_update(X86_CR4_VME,        edx, feature_bit(VME));
>         cr4_fixed1_update(X86_CR4_PVI,        edx, feature_bit(VME));
>         cr4_fixed1_update(X86_CR4_TSD,        edx, feature_bit(TSC));
> @@ -7444,7 +7444,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
>         cr4_fixed1_update(X86_CR4_PCIDE,      ecx, feature_bit(PCID));
>         cr4_fixed1_update(X86_CR4_OSXSAVE,    ecx, feature_bit(XSAVE));
>  
> -       entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> +       entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 0);
>         cr4_fixed1_update(X86_CR4_FSGSBASE,   ebx, feature_bit(FSGSBASE));
>         cr4_fixed1_update(X86_CR4_SMEP,       ebx, feature_bit(SMEP));
>         cr4_fixed1_update(X86_CR4_SMAP,       ebx, feature_bit(SMAP));
> @@ -7479,7 +7479,7 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
>         int i;
>  
>         for (i = 0; i < PT_CPUID_LEAVES; i++) {
> -               best = kvm_find_cpuid_entry(vcpu, 0x14, i);
> +               best = kvm_find_cpuid_entry_index(vcpu, 0x14, i);
>                 if (!best)
>                         return;
>                 vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 567d13405445..329875d2ccf2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -11731,7 +11731,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>          * i.e. it's impossible for kvm_find_cpuid_entry() to find a valid entry
>          * on RESET.  But, go through the motions in case that's ever remedied.
>          */
> -       cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0);
> +       cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1);
OK.
>         kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);
>  
>         static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
>
> base-commit: b9b71f43683ae9d76b0989249607bbe8c9eb6c5c


Best regards,
Maxim Levitsky

2022-07-12 16:40:22

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: x86: Add dedicated helper to get CPUID entry with significant index

On Tue, Jul 12, 2022, Maxim Levitsky wrote:
> On Tue, 2022-07-12 at 00:06 +0000, Sean Christopherson wrote:
> > ?static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
> > -???????struct kvm_cpuid_entry2 *entries, int nent, u32 function, u32 index)
> > +???????struct kvm_cpuid_entry2 *entries, int nent, u32 function, u64 index)
> How I wish that this would be just called EAX and ECX... Anyway....

Heh, I strongly disagree. EAX and ECX are how the CPUID instruction specifies
the function and index, CPUID the lookup itself operates on function+index,
e.g. there are plenty of situations where KVM queries CPUID info without the
inputs coming from EAX/ECX.

> > ?{
> > ????????struct kvm_cpuid_entry2 *e;
> > ????????int i;
> > @@ -77,9 +85,22 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
> > ????????for (i = 0; i < nent; i++) {
> > ????????????????e = &entries[i];
> > ?
> > -???????????????if (e->function == function &&
> > -?????????????????? (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index))
> > +???????????????if (e->function != function)
> > +???????????????????????continue;
> > +
> > +???????????????/*
> > +??????????????? * If the index isn't significant, use the first entry with a
> > +??????????????? * matching function.? It's userspace's responsibilty to not
> > +??????????????? * provide "duplicate" entries in all cases.
> > +??????????????? */
> > +???????????????if (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index)
> > ????????????????????????return e;
> > +
> > +???????????????/*
> > +??????????????? * Function matches and index is significant; not specifying an
> > +??????????????? * exact index in this case is a KVM bug.
> > +??????????????? */
> Nitpick: Why KVM bug? Bad userspace can also provide a index-significant entry for cpuid
> leaf for which index is not significant in the x86 spec.

Ugh, you're right.

> We could arrange a table of all known leaves and for each leaf if it has an index
> in the x86 spec, and warn/reject the userspace CPUID info if it doesn't match.

We have such a table, cpuid_function_is_indexed(). The alternative would be to
do:

WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT &&
cpuid_function_is_indexed(function));

The problem with rejecting userspace CPUID on mismatch is that it could break
userspace :-/ Of course, this entire patch would also break userspace to some
extent, e.g. if userspace is relying on an exact match on index==0. The only
difference being the guest lookups with an exact index would still work.

I think the restriction we could put in place would be that userspace can make
a leaf more relaxed, e.g. to play nice if userspace forgets to set the SIGNFICANT
flag, but rejects attempts to make guest CPUID more restrictive, i.e. disallow
setting the SIGNFICANT flag on leafs that KVM doesn't enumerate as significant.

> > +???????????????WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> > ????????}
> > ?
> > ????????return NULL;
> > @@ -96,7 +117,8 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
> > ???????? * The existing code assumes virtual address is 48-bit or 57-bit in the
> > ???????? * canonical address checks; exit if it is ever changed.
> > ???????? */
> > -???????best = cpuid_entry2_find(entries, nent, 0x80000008, 0);
> > +???????best = cpuid_entry2_find(entries, nent, 0x80000008,
> > +??????????????????????????????? KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> OK.

Thanks for looking through all these!

> > ?static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
> > @@ -219,7 +242,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
> > ????????struct kvm_cpuid_entry2 *best;
> > ????????u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
> > ?
> > -???????best = cpuid_entry2_find(entries, nent, 1, 0);
> > +???????best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
>
> Leaf 1, no index indeed.
>
> Offtopic: I wonder why we call this 'best'?

Awful, awful historic code. IIRC, for functions whose index is not significant,
KVM would iterate over all entries and look for an exact function+index match
anyways. If there was at least one partial match (function match only) but no
full match, KVM would use the first partial match, which it called the "best" match.

We've been slowly/opportunistically killing off the "best" terminology.

> > -struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
> > -???????????????????????????????????????????? u32 function, u32 index)
> > +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
> > +?????????????????????????????????????????????????? u32 function, u32 index)
> Nitpick: could you fix the indention while at it?

The indentation is correct, it's only the diff that appears misaligned.

> > @@ -1353,11 +1384,11 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
> > ????????????????return NULL;
> > ?
> > ????????if (function >= 0x40000000 && function <= 0x4fffffff)
> > -???????????????class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00, 0);
> > +???????????????class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00);
> > ????????else if (function >= 0xc0000000)
> > -???????????????class = kvm_find_cpuid_entry(vcpu, 0xc0000000, 0);
> > +???????????????class = kvm_find_cpuid_entry(vcpu, 0xc0000000);
> > ????????else
> > -???????????????class = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
> > +???????????????class = kvm_find_cpuid_entry(vcpu, function & 0x80000000);
> This assumes that all the classes has first entry whose EAX specifies max leaf
> for this class. True for sure for basic and extended features, don't know
> if true for hypervisor and Centaur entries. Seems OK.

It holds true for all known hypervisors. There's no formal definition for using
0x400000yy as the hypervisor range, but the de facto standard is to use EBX, ECX,
and EDX for the signature, and EAX for the max leaf.

The Centaur behavior is very much a guess, but odds are it's a correct guess. When
I added the Centaur code, I spent far too much time trying (and failing) to hunt
down documentation.

2022-07-12 17:12:44

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: x86: Add dedicated helper to get CPUID entry with significant index

On Tue, Jul 12, 2022, Sean Christopherson wrote:
> On Tue, Jul 12, 2022, Maxim Levitsky wrote:
> > On Tue, 2022-07-12 at 00:06 +0000, Sean Christopherson wrote:
> > > +???????????????/*
> > > +??????????????? * Function matches and index is significant; not specifying an
> > > +??????????????? * exact index in this case is a KVM bug.
> > > +??????????????? */
> > Nitpick: Why KVM bug? Bad userspace can also provide a index-significant entry for cpuid
> > leaf for which index is not significant in the x86 spec.
>
> Ugh, you're right.
>
> > We could arrange a table of all known leaves and for each leaf if it has an index
> > in the x86 spec, and warn/reject the userspace CPUID info if it doesn't match.
>
> We have such a table, cpuid_function_is_indexed(). The alternative would be to
> do:
>
> WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT &&
> cpuid_function_is_indexed(function));
>
> The problem with rejecting userspace CPUID on mismatch is that it could break
> userspace :-/ Of course, this entire patch would also break userspace to some
> extent, e.g. if userspace is relying on an exact match on index==0. The only
> difference being the guest lookups with an exact index would still work.
>
> I think the restriction we could put in place would be that userspace can make
> a leaf more relaxed, e.g. to play nice if userspace forgets to set the SIGNFICANT
> flag, but rejects attempts to make guest CPUID more restrictive, i.e. disallow
> setting the SIGNFICANT flag on leafs that KVM doesn't enumerate as significant.
>
> > > +???????????????WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT);

Actually, better idea. Let userspace do whatever, and have direct KVM lookups
for functions that architecturally don't have a significant index use the first
entry even if userspace set the SIGNIFICANT flag. That will mostly maintain
backwards compatibility, the only thing that would break is if userspace set the
SIGNIFICANT flag _and_ provided a non-zero index _and_ relied on KVM to not match
the entry.

We could still enforce matching in the future, but it wouldn't be a prerequisite
for this cleanup.

/*
* Similarly, use the first matching entry if KVM is doing a
* lookup (as opposed to emulating CPUID) for a function that's
* architecturally defined as not having a significant index.
*/
if (index == KVM_CPUID_INDEX_NOT_SIGNIFICANT) {
/*
* Direct lookups from KVM should not diverge from what
* KVM defines internally (the architectural behavior).
*/
WARN_ON_ONCE(cpuid_function_is_indexed(function));
return e;
}

2022-07-14 11:03:16

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: x86: Add dedicated helper to get CPUID entry with significant index

On Tue, 2022-07-12 at 17:09 +0000, Sean Christopherson wrote:
> On Tue, Jul 12, 2022, Sean Christopherson wrote:
> > On Tue, Jul 12, 2022, Maxim Levitsky wrote:
> > > On Tue, 2022-07-12 at 00:06 +0000, Sean Christopherson wrote:
> > > > +               /*
> > > > +                * Function matches and index is significant; not specifying an
> > > > +                * exact index in this case is a KVM bug.
> > > > +                */
> > > Nitpick: Why KVM bug? Bad userspace can also provide a index-significant entry for cpuid
> > > leaf for which index is not significant in the x86 spec.
> >
> > Ugh, you're right.
> >
> > > We could arrange a table of all known leaves and for each leaf if it has an index
> > > in the x86 spec, and warn/reject the userspace CPUID info if it doesn't match.
> >
> > We have such a table, cpuid_function_is_indexed().  The alternative would be to
> > do:
> >
> >                 WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT &&
> >                              cpuid_function_is_indexed(function));
> >
> > The problem with rejecting userspace CPUID on mismatch is that it could break
> > userspace :-/  Of course, this entire patch would also break userspace to some
> > extent, e.g. if userspace is relying on an exact match on index==0.  The only
> > difference being the guest lookups with an exact index would still work.
> >
> > I think the restriction we could put in place would be that userspace can make
> > a leaf more relaxed, e.g. to play nice if userspace forgets to set the SIGNFICANT
> > flag, but rejects attempts to make guest CPUID more restrictive, i.e. disallow
> > setting the SIGNFICANT flag on leafs that KVM doesn't enumerate as significant.
> >
> > > > +               WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT);
>
> Actually, better idea.  Let userspace do whatever, and have direct KVM lookups
> for functions that architecturally don't have a significant index use the first
> entry even if userspace set the SIGNIFICANT flag.  That will mostly maintain
> backwards compatibility, the only thing that would break is if userspace set the
> SIGNIFICANT flag _and_ provided a non-zero index _and_ relied on KVM to not match
> the entry.

Makes sense as well.

Best regards,
Maxim Levitsky

>
> We could still enforce matching in the future, but it wouldn't be a prerequisite
> for this cleanup.
>
>                 /*
>                  * Similarly, use the first matching entry if KVM is doing a
>                  * lookup (as opposed to emulating CPUID) for a function that's
>                  * architecturally defined as not having a significant index.
>                  */
>                 if (index == KVM_CPUID_INDEX_NOT_SIGNIFICANT) {
>                         /*
>                          * Direct lookups from KVM should not diverge from what
>                          * KVM defines internally (the architectural behavior).
>                          */
>                         WARN_ON_ONCE(cpuid_function_is_indexed(function));
>                         return e;
>                 }
>


2022-07-14 11:39:26

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: x86: Add dedicated helper to get CPUID entry with significant index

On Tue, 2022-07-12 at 16:35 +0000, Sean Christopherson wrote:
> On Tue, Jul 12, 2022, Maxim Levitsky wrote:
> > On Tue, 2022-07-12 at 00:06 +0000, Sean Christopherson wrote:
> > >  static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
> > > -       struct kvm_cpuid_entry2 *entries, int nent, u32 function, u32 index)
> > > +       struct kvm_cpuid_entry2 *entries, int nent, u32 function, u64 index)
> > How I wish that this would be just called EAX and ECX... Anyway....
>
> Heh, I strongly disagree.  EAX and ECX are how the CPUID instruction specifies
> the function and index, CPUID the lookup itself operates on function+index,
> e.g. there are plenty of situations where KVM queries CPUID info without the
> inputs coming from EAX/ECX.

Just a matter of taste I guess - note that outputs of CPUID instructions are always called
as registers (EAX/EBX/ECX/EDX). But anyway it doesn't really mattter to me.

>
> > >  {
> > >         struct kvm_cpuid_entry2 *e;
> > >         int i;
> > > @@ -77,9 +85,22 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
> > >         for (i = 0; i < nent; i++) {
> > >                 e = &entries[i];
> > >  
> > > -               if (e->function == function &&
> > > -                   (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index))
> > > +               if (e->function != function)
> > > +                       continue;
> > > +
> > > +               /*
> > > +                * If the index isn't significant, use the first entry with a
> > > +                * matching function.  It's userspace's responsibilty to not
> > > +                * provide "duplicate" entries in all cases.
> > > +                */
> > > +               if (!(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) || e->index == index)
> > >                         return e;
> > > +
> > > +               /*
> > > +                * Function matches and index is significant; not specifying an
> > > +                * exact index in this case is a KVM bug.
> > > +                */
> > Nitpick: Why KVM bug? Bad userspace can also provide a index-significant entry for cpuid
> > leaf for which index is not significant in the x86 spec.
>
> Ugh, you're right.
>
> > We could arrange a table of all known leaves and for each leaf if it has an index
> > in the x86 spec, and warn/reject the userspace CPUID info if it doesn't match.
>
> We have such a table, cpuid_function_is_indexed().  The alternative would be to
> do:
>
>                 WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT &&
>                              cpuid_function_is_indexed(function));
>
> The problem with rejecting userspace CPUID on mismatch is that it could break
> userspace :-/  Of course, this entire patch would also break userspace to some
> extent, e.g. if userspace is relying on an exact match on index==0.  The only
> difference being the guest lookups with an exact index would still work.
>
> I think the restriction we could put in place would be that userspace can make
> a leaf more relaxed, e.g. to play nice if userspace forgets to set the SIGNFICANT
> flag, but rejects attempts to make guest CPUID more restrictive, i.e. disallow
> setting the SIGNFICANT flag on leafs that KVM doesn't enumerate as significant.

Makes sense.

>
> > > +               WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> > >         }
> > >  
> > >         return NULL;
> > > @@ -96,7 +117,8 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
> > >          * The existing code assumes virtual address is 48-bit or 57-bit in the
> > >          * canonical address checks; exit if it is ever changed.
> > >          */
> > > -       best = cpuid_entry2_find(entries, nent, 0x80000008, 0);
> > > +       best = cpuid_entry2_find(entries, nent, 0x80000008,
> > > +                                KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> > OK.
>
> Thanks for looking through all these!
No problem!
>
> > >  static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
> > > @@ -219,7 +242,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
> > >         struct kvm_cpuid_entry2 *best;
> > >         u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
> > >  
> > > -       best = cpuid_entry2_find(entries, nent, 1, 0);
> > > +       best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> >
> > Leaf 1, no index indeed.
> >
> > Offtopic: I wonder why we call this 'best'?
>
> Awful, awful historic code.  IIRC, for functions whose index is not significant,
> KVM would iterate over all entries and look for an exact function+index match
> anyways.  If there was at least one partial match (function match only) but no
> full match, KVM would use the first partial match, which it called the "best" match.
>
> We've been slowly/opportunistically killing off the "best" terminology.

Thanks for the explanation. I also noticed that you removed recently the 'stateful'
CPUID code. Good riddance!

>
> > > -struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
> > > -                                             u32 function, u32 index)
> > > +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
> > > +                                                   u32 function, u32 index)
> > Nitpick: could you fix the indention while at it?
>
> The indentation is correct, it's only the diff that appears misaligned.

I think I already heard about this once, I will try to not forget and keep
that in mind next time I notice this. Sorry!

>
> > > @@ -1353,11 +1384,11 @@ get_out_of_range_cpuid_entry(struct kvm_vcpu *vcpu, u32 *fn_ptr, u32 index)
> > >                 return NULL;
> > >  
> > >         if (function >= 0x40000000 && function <= 0x4fffffff)
> > > -               class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00, 0);
> > > +               class = kvm_find_cpuid_entry(vcpu, function & 0xffffff00);
> > >         else if (function >= 0xc0000000)
> > > -               class = kvm_find_cpuid_entry(vcpu, 0xc0000000, 0);
> > > +               class = kvm_find_cpuid_entry(vcpu, 0xc0000000);
> > >         else
> > > -               class = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
> > > +               class = kvm_find_cpuid_entry(vcpu, function & 0x80000000);
> > This assumes that all the classes has first entry whose EAX specifies max leaf
> > for this class. True for sure for basic and extended features, don't know
> > if true for hypervisor and Centaur entries. Seems OK.
>
> It holds true for all known hypervisors.  There's no formal definition for using
> 0x400000yy as the hypervisor range, but the de facto standard is to use EBX, ECX,
> and EDX for the signature, and EAX for the max leaf.
>
> The Centaur behavior is very much a guess, but odds are it's a correct guess.  When
> I added the Centaur code, I spent far too much time trying (and failing) to hunt
> down documentation. 

I understand very well what you mean.

Best regards,
Maxim Levitsky

>


2022-07-14 16:14:29

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: x86: Add dedicated helper to get CPUID entry with significant index

On 7/14/22 12:58, Maxim Levitsky wrote:
> On Tue, 2022-07-12 at 17:09 +0000, Sean Christopherson wrote:
>> On Tue, Jul 12, 2022, Sean Christopherson wrote:
>>> On Tue, Jul 12, 2022, Maxim Levitsky wrote:
>>>> On Tue, 2022-07-12 at 00:06 +0000, Sean Christopherson wrote:
>>>>> +               /*
>>>>> +                * Function matches and index is significant; not specifying an
>>>>> +                * exact index in this case is a KVM bug.
>>>>> +                */
>>>> Nitpick: Why KVM bug? Bad userspace can also provide a index-significant entry for cpuid
>>>> leaf for which index is not significant in the x86 spec.
>>>
>>> Ugh, you're right.
>>>
>>>> We could arrange a table of all known leaves and for each leaf if it has an index
>>>> in the x86 spec, and warn/reject the userspace CPUID info if it doesn't match.
>>>
>>> We have such a table, cpuid_function_is_indexed().  The alternative would be to
>>> do:
>>>
>>>                 WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT &&
>>>                              cpuid_function_is_indexed(function));
>>>
>>> The problem with rejecting userspace CPUID on mismatch is that it could break
>>> userspace :-/  Of course, this entire patch would also break userspace to some
>>> extent, e.g. if userspace is relying on an exact match on index==0.  The only
>>> difference being the guest lookups with an exact index would still work.
>>>
>>> I think the restriction we could put in place would be that userspace can make
>>> a leaf more relaxed, e.g. to play nice if userspace forgets to set the SIGNFICANT
>>> flag, but rejects attempts to make guest CPUID more restrictive, i.e. disallow
>>> setting the SIGNFICANT flag on leafs that KVM doesn't enumerate as significant.
>>>
>>>>> +               WARN_ON_ONCE(index == KVM_CPUID_INDEX_NOT_SIGNIFICANT);
>>
>> Actually, better idea.  Let userspace do whatever, and have direct KVM lookups
>> for functions that architecturally don't have a significant index use the first
>> entry even if userspace set the SIGNIFICANT flag.  That will mostly maintain
>> backwards compatibility, the only thing that would break is if userspace set the
>> SIGNIFICANT flag _and_ provided a non-zero index _and_ relied on KVM to not match
>> the entry.
>
> Makes sense as well.

Squashed and queued, thanks.

Regarding function and index, I never remember the "function" name, but
it's pretty obvious that the index is ecx so there's no ambiguity. And
using different names for inputs and outputs makes it clear that there
are no games with in/out parameters.

Paolo
Paolo