When the LBR feature is reported by the vmx_get_perf_capabilities(),
the LBR fields in the [vmx|vcpu]_supported debugctl should be unmasked.
The debugctl msr is handled separately in vmx/svm and they're not
completely identical, hence remove the common msr handling code.
Signed-off-by: Like Xu <[email protected]>
---
arch/x86/kvm/vmx/capabilities.h | 12 ++++++++++++
arch/x86/kvm/vmx/pmu_intel.c | 19 +++++++++++++++++++
arch/x86/kvm/x86.c | 13 -------------
3 files changed, 31 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index b633a90320ee..f6fcfabb1026 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -21,6 +21,8 @@ extern int __read_mostly pt_mode;
#define PMU_CAP_FW_WRITES (1ULL << 13)
#define PMU_CAP_LBR_FMT 0x3f
+#define DEBUGCTLMSR_LBR_MASK (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
+
struct nested_vmx_msrs {
/*
* We only store the "true" versions of the VMX capability MSRs. We
@@ -387,4 +389,14 @@ static inline u64 vmx_get_perf_capabilities(void)
return perf_cap;
}
+static inline u64 vmx_get_supported_debugctl(void)
+{
+ u64 val = 0;
+
+ if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
+ val |= DEBUGCTLMSR_LBR_MASK;
+
+ return val;
+}
+
#endif /* __KVM_X86_VMX_CAPS_H */
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index a953c7d633f6..d92e95b64c74 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -187,6 +187,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
ret = pmu->version > 1;
break;
+ case MSR_IA32_DEBUGCTLMSR:
case MSR_IA32_PERF_CAPABILITIES:
ret = 1;
break;
@@ -237,6 +238,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vcpu->arch.perf_capabilities;
return 0;
+ case MSR_IA32_DEBUGCTLMSR:
+ msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ return 0;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -282,6 +286,16 @@ static inline bool lbr_is_compatible(struct kvm_vcpu *vcpu)
return true;
}
+static inline u64 vcpu_get_supported_debugctl(struct kvm_vcpu *vcpu)
+{
+ u64 debugctlmsr = vmx_get_supported_debugctl();
+
+ if (!lbr_is_enabled(vcpu))
+ debugctlmsr &= ~DEBUGCTLMSR_LBR_MASK;
+
+ return debugctlmsr;
+}
+
static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -336,6 +350,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
vcpu->arch.perf_capabilities = data;
return 0;
+ case MSR_IA32_DEBUGCTLMSR:
+ if (data & ~vcpu_get_supported_debugctl(vcpu))
+ return 1;
+ vmcs_write64(GUEST_IA32_DEBUGCTL, data);
+ return 0;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00c88c2f34e4..56f275eb4554 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2840,18 +2840,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
}
break;
- case MSR_IA32_DEBUGCTLMSR:
- if (!data) {
- /* We support the non-activated case already */
- break;
- } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
- /* Values other than LBR and BTF are vendor-specific,
- thus reserved and should throw a #GP */
- return 1;
- }
- vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
- __func__, data);
- break;
case 0x200 ... 0x2ff:
return kvm_mtrr_set_msr(vcpu, msr, data);
case MSR_IA32_APICBASE:
@@ -3120,7 +3108,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr_info->index) {
case MSR_IA32_PLATFORM_ID:
case MSR_IA32_EBL_CR_POWERON:
- case MSR_IA32_DEBUGCTLMSR:
case MSR_IA32_LASTBRANCHFROMIP:
case MSR_IA32_LASTBRANCHTOIP:
case MSR_IA32_LASTINTFROMIP:
--
2.21.3
On 6/13/2020 4:09 PM, Like Xu wrote:
> When the LBR feature is reported by the vmx_get_perf_capabilities(),
> the LBR fields in the [vmx|vcpu]_supported debugctl should be unmasked.
>
> The debugctl msr is handled separately in vmx/svm and they're not
> completely identical, hence remove the common msr handling code.
>
> Signed-off-by: Like Xu <[email protected]>
> ---
> arch/x86/kvm/vmx/capabilities.h | 12 ++++++++++++
> arch/x86/kvm/vmx/pmu_intel.c | 19 +++++++++++++++++++
> arch/x86/kvm/x86.c | 13 -------------
> 3 files changed, 31 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
> index b633a90320ee..f6fcfabb1026 100644
> --- a/arch/x86/kvm/vmx/capabilities.h
> +++ b/arch/x86/kvm/vmx/capabilities.h
> @@ -21,6 +21,8 @@ extern int __read_mostly pt_mode;
> #define PMU_CAP_FW_WRITES (1ULL << 13)
> #define PMU_CAP_LBR_FMT 0x3f
>
> +#define DEBUGCTLMSR_LBR_MASK (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
> +
> struct nested_vmx_msrs {
> /*
> * We only store the "true" versions of the VMX capability MSRs. We
> @@ -387,4 +389,14 @@ static inline u64 vmx_get_perf_capabilities(void)
> return perf_cap;
> }
>
> +static inline u64 vmx_get_supported_debugctl(void)
> +{
> + u64 val = 0;
> +
> + if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
> + val |= DEBUGCTLMSR_LBR_MASK;
> +
> + return val;
> +}
> +
> #endif /* __KVM_X86_VMX_CAPS_H */
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index a953c7d633f6..d92e95b64c74 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -187,6 +187,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> ret = pmu->version > 1;
> break;
> + case MSR_IA32_DEBUGCTLMSR:
> case MSR_IA32_PERF_CAPABILITIES:
> ret = 1;
> break;
> @@ -237,6 +238,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> return 1;
> msr_info->data = vcpu->arch.perf_capabilities;
> return 0;
> + case MSR_IA32_DEBUGCTLMSR:
> + msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
Can we put the emulation of MSR_IA32_DEBUGCTLMSR in
vmx_{get/set})_msr(). AFAIK, MSR_IA32_DEBUGCTLMSR is not a pure PMU
related MSR that there is bit 2 to enable #DB for bus lock.
> + return 0;
> default:
> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> @@ -282,6 +286,16 @@ static inline bool lbr_is_compatible(struct kvm_vcpu *vcpu)
> return true;
> }
>
> +static inline u64 vcpu_get_supported_debugctl(struct kvm_vcpu *vcpu)
> +{
> + u64 debugctlmsr = vmx_get_supported_debugctl();
> +
> + if (!lbr_is_enabled(vcpu))
> + debugctlmsr &= ~DEBUGCTLMSR_LBR_MASK;
> +
> + return debugctlmsr;
> +}
> +
> static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> {
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> @@ -336,6 +350,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> }
> vcpu->arch.perf_capabilities = data;
> return 0;
> + case MSR_IA32_DEBUGCTLMSR:
> + if (data & ~vcpu_get_supported_debugctl(vcpu))
> + return 1;
> + vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> + return 0;
> default:
> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 00c88c2f34e4..56f275eb4554 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2840,18 +2840,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> return 1;
> }
> break;
> - case MSR_IA32_DEBUGCTLMSR:
> - if (!data) {
> - /* We support the non-activated case already */
> - break;
> - } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
So after this patch, guest trying to set bit DEBUGCTLMSR_BTF will get a
#GP instead of being ignored and printing a log in kernel.
These codes were introduced ~12 years ago in commit b5e2fec0ebc3 ("KVM:
Ignore DEBUGCTL MSRs with no effect"), just to make Netware happy. Maybe
I'm overthinking for that too old thing.
> - /* Values other than LBR and BTF are vendor-specific,
> - thus reserved and should throw a #GP */
> - return 1;
> - }
> - vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
> - __func__, data);
> - break;
> case 0x200 ... 0x2ff:
> return kvm_mtrr_set_msr(vcpu, msr, data);
> case MSR_IA32_APICBASE:
> @@ -3120,7 +3108,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> switch (msr_info->index) {
> case MSR_IA32_PLATFORM_ID:
> case MSR_IA32_EBL_CR_POWERON:
> - case MSR_IA32_DEBUGCTLMSR:
> case MSR_IA32_LASTBRANCHFROMIP:
> case MSR_IA32_LASTBRANCHTOIP:
> case MSR_IA32_LASTINTFROMIP:
>
On 2020/6/13 17:14, Xiaoyao Li wrote:
> On 6/13/2020 4:09 PM, Like Xu wrote:
>> When the LBR feature is reported by the vmx_get_perf_capabilities(),
>> the LBR fields in the [vmx|vcpu]_supported debugctl should be unmasked.
>>
>> The debugctl msr is handled separately in vmx/svm and they're not
>> completely identical, hence remove the common msr handling code.
>>
>> Signed-off-by: Like Xu <[email protected]>
>> ---
>> arch/x86/kvm/vmx/capabilities.h | 12 ++++++++++++
>> arch/x86/kvm/vmx/pmu_intel.c | 19 +++++++++++++++++++
>> arch/x86/kvm/x86.c | 13 -------------
>> 3 files changed, 31 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/capabilities.h
>> b/arch/x86/kvm/vmx/capabilities.h
>> index b633a90320ee..f6fcfabb1026 100644
>> --- a/arch/x86/kvm/vmx/capabilities.h
>> +++ b/arch/x86/kvm/vmx/capabilities.h
>> @@ -21,6 +21,8 @@ extern int __read_mostly pt_mode;
>> #define PMU_CAP_FW_WRITES (1ULL << 13)
>> #define PMU_CAP_LBR_FMT 0x3f
>> +#define DEBUGCTLMSR_LBR_MASK (DEBUGCTLMSR_LBR |
>> DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
>> +
>> struct nested_vmx_msrs {
>> /*
>> * We only store the "true" versions of the VMX capability MSRs. We
>> @@ -387,4 +389,14 @@ static inline u64 vmx_get_perf_capabilities(void)
>> return perf_cap;
>> }
>> +static inline u64 vmx_get_supported_debugctl(void)
>> +{
>> + u64 val = 0;
>> +
>> + if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
>> + val |= DEBUGCTLMSR_LBR_MASK;
>> +
>> + return val;
>> +}
>> +
>> #endif /* __KVM_X86_VMX_CAPS_H */
>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
>> index a953c7d633f6..d92e95b64c74 100644
>> --- a/arch/x86/kvm/vmx/pmu_intel.c
>> +++ b/arch/x86/kvm/vmx/pmu_intel.c
>> @@ -187,6 +187,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu
>> *vcpu, u32 msr)
>> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
>> ret = pmu->version > 1;
>> break;
>> + case MSR_IA32_DEBUGCTLMSR:
>> case MSR_IA32_PERF_CAPABILITIES:
>> ret = 1;
>> break;
>> @@ -237,6 +238,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu,
>> struct msr_data *msr_info)
>> return 1;
>> msr_info->data = vcpu->arch.perf_capabilities;
>> return 0;
>> + case MSR_IA32_DEBUGCTLMSR:
>> + msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
>
> Can we put the emulation of MSR_IA32_DEBUGCTLMSR in vmx_{get/set})_msr().
> AFAIK, MSR_IA32_DEBUGCTLMSR is not a pure PMU related MSR that there is
> bit 2 to enable #DB for bus lock.
We already have "case MSR_IA32_DEBUGCTLMSR" handler in the vmx_set_msr()
and you may apply you bus lock changes in that handler.
>> + return 0;
>> default:
>> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>> @@ -282,6 +286,16 @@ static inline bool lbr_is_compatible(struct
>> kvm_vcpu *vcpu)
>> return true;
>> }
>> +static inline u64 vcpu_get_supported_debugctl(struct kvm_vcpu *vcpu)
>> +{
>> + u64 debugctlmsr = vmx_get_supported_debugctl();
>> +
>> + if (!lbr_is_enabled(vcpu))
>> + debugctlmsr &= ~DEBUGCTLMSR_LBR_MASK;
>> +
>> + return debugctlmsr;
>> +}
>> +
>> static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data
>> *msr_info)
>> {
>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>> @@ -336,6 +350,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu,
>> struct msr_data *msr_info)
>> }
>> vcpu->arch.perf_capabilities = data;
>> return 0;
>> + case MSR_IA32_DEBUGCTLMSR:
>> + if (data & ~vcpu_get_supported_debugctl(vcpu))
>> + return 1;
>> + vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>> + return 0;
>> default:
>> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 00c88c2f34e4..56f275eb4554 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -2840,18 +2840,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu,
>> struct msr_data *msr_info)
>> return 1;
>> }
>> break;
>> - case MSR_IA32_DEBUGCTLMSR:
>> - if (!data) {
>> - /* We support the non-activated case already */
>> - break;
>> - } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
>
> So after this patch, guest trying to set bit DEBUGCTLMSR_BTF will get a
> #GP instead of being ignored and printing a log in kernel.
>
Since the BTF is not implemented on the KVM at all,
I do propose not left this kind of dummy thing in the future KVM code.
Let's see if Netware or any BTF user will complain about this change.
> These codes were introduced ~12 years ago in commit b5e2fec0ebc3 ("KVM:
> Ignore DEBUGCTL MSRs with no effect"), just to make Netware happy. Maybe
> I'm overthinking for that too old thing.
>
>> - /* Values other than LBR and BTF are vendor-specific,
>> - thus reserved and should throw a #GP */
>> - return 1;
>> - }
>> - vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
>> - __func__, data);
>> - break;
>> case 0x200 ... 0x2ff:
>> return kvm_mtrr_set_msr(vcpu, msr, data);
>> case MSR_IA32_APICBASE:
>> @@ -3120,7 +3108,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu,
>> struct msr_data *msr_info)
>> switch (msr_info->index) {
>> case MSR_IA32_PLATFORM_ID:
>> case MSR_IA32_EBL_CR_POWERON:
>> - case MSR_IA32_DEBUGCTLMSR:
>> case MSR_IA32_LASTBRANCHFROMIP:
>> case MSR_IA32_LASTBRANCHTOIP:
>> case MSR_IA32_LASTINTFROMIP:
>>
>
On Sat, Jun 13, 2020 at 05:42:50PM +0800, Xu, Like wrote:
> On 2020/6/13 17:14, Xiaoyao Li wrote:
> >On 6/13/2020 4:09 PM, Like Xu wrote:
> >>When the LBR feature is reported by the vmx_get_perf_capabilities(),
> >>the LBR fields in the [vmx|vcpu]_supported debugctl should be unmasked.
> >>
> >>The debugctl msr is handled separately in vmx/svm and they're not
> >>completely identical, hence remove the common msr handling code.
I would prefer to put the "remove DEBUGCTRL handling from common x86" in a
separate patch. Without digging into SVM, it's not obvious that dropping
MSR_IA32_DEBUGCTLMSR from kvm_set_msr_common() is a nop for SVM.
> >>Signed-off-by: Like Xu <[email protected]>
> >>---
> >>? arch/x86/kvm/vmx/capabilities.h | 12 ++++++++++++
> >>? arch/x86/kvm/vmx/pmu_intel.c??? | 19 +++++++++++++++++++
> >>? arch/x86/kvm/x86.c????????????? | 13 -------------
> >>? 3 files changed, 31 insertions(+), 13 deletions(-)
> >>
> >>diff --git a/arch/x86/kvm/vmx/capabilities.h
> >>b/arch/x86/kvm/vmx/capabilities.h
> >>index b633a90320ee..f6fcfabb1026 100644
> >>--- a/arch/x86/kvm/vmx/capabilities.h
> >>+++ b/arch/x86/kvm/vmx/capabilities.h
> >>@@ -21,6 +21,8 @@ extern int __read_mostly pt_mode;
> >>? #define PMU_CAP_FW_WRITES??? (1ULL << 13)
> >>? #define PMU_CAP_LBR_FMT??????? 0x3f
> >>? +#define DEBUGCTLMSR_LBR_MASK??????? (DEBUGCTLMSR_LBR |
> >>DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
> >>+
> >>? struct nested_vmx_msrs {
> >>????? /*
> >>?????? * We only store the "true" versions of the VMX capability MSRs. We
> >>@@ -387,4 +389,14 @@ static inline u64 vmx_get_perf_capabilities(void)
> >>????? return perf_cap;
> >>? }
> >>? +static inline u64 vmx_get_supported_debugctl(void)
> >>+{
> >>+??? u64 val = 0;
> >>+
> >>+??? if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
> >>+??????? val |= DEBUGCTLMSR_LBR_MASK;
> >>+
> >>+??? return val;
> >>+}
> >>+
> >>? #endif /* __KVM_X86_VMX_CAPS_H */
> >>diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> >>index a953c7d633f6..d92e95b64c74 100644
> >>--- a/arch/x86/kvm/vmx/pmu_intel.c
> >>+++ b/arch/x86/kvm/vmx/pmu_intel.c
> >>@@ -187,6 +187,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu
> >>*vcpu, u32 msr)
> >>????? case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
> >>????????? ret = pmu->version > 1;
> >>????????? break;
> >>+??? case MSR_IA32_DEBUGCTLMSR:
> >>????? case MSR_IA32_PERF_CAPABILITIES:
> >>????????? ret = 1;
> >>????????? break;
> >>@@ -237,6 +238,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu,
> >>struct msr_data *msr_info)
> >>????????????? return 1;
> >>????????? msr_info->data = vcpu->arch.perf_capabilities;
> >>????????? return 0;
> >>+??? case MSR_IA32_DEBUGCTLMSR:
> >>+??????? msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> >
> >Can we put the emulation of MSR_IA32_DEBUGCTLMSR in vmx_{get/set})_msr().
> >AFAIK, MSR_IA32_DEBUGCTLMSR is not a pure PMU related MSR that there is
> >bit 2 to enable #DB for bus lock.
> We already have "case MSR_IA32_DEBUGCTLMSR" handler in the vmx_set_msr()
> and you may apply you bus lock changes in that handler.
Hrm, but that'd be weird dependency as vmx_set_msr() would need to check for
#DB bus lock support but not actually write GUEST_IA32_DEBUGCTL, or we'd end
up writing it twice when both bus lock and LBR are supported.
I don't see anything in the series that takes action on writes to
MSR_IA32_DEBUGCTLMSR beyond updating the VMCS, i.e. AFAICT there isn't any
reason to call into the PMU, VMX can simply query vmx_get_perf_capabilities()
to check if it's legal to enable DEBUGCTLMSR_LBR_MASK.
A question for both LBR and bus lock: would it make sense to cache the
guest's value in vcpu_vmx so that querying the guest value doesn't require
a VMREAD? I don't have a good feel for how frequently it would be accessed.
> >>+??????? return 0;
> >>????? default:
> >>????????? if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
> >>????????????? (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> >>@@ -282,6 +286,16 @@ static inline bool lbr_is_compatible(struct
> >>kvm_vcpu *vcpu)
> >>????? return true;
> >>? }
> >>? +static inline u64 vcpu_get_supported_debugctl(struct kvm_vcpu *vcpu)
> >>+{
> >>+??? u64 debugctlmsr = vmx_get_supported_debugctl();
> >>+
> >>+??? if (!lbr_is_enabled(vcpu))
> >>+??????? debugctlmsr &= ~DEBUGCTLMSR_LBR_MASK;
> >>+
> >>+??? return debugctlmsr;
> >>+}
> >>+
> >>? static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data
> >>*msr_info)
> >>? {
> >>????? struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> >>@@ -336,6 +350,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu,
> >>struct msr_data *msr_info)
> >>????????? }
> >>????????? vcpu->arch.perf_capabilities = data;
> >>????????? return 0;
> >>+??? case MSR_IA32_DEBUGCTLMSR:
> >>+??????? if (data & ~vcpu_get_supported_debugctl(vcpu))
> >>+??????????? return 1;
> >>+??????? vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> >>+??????? return 0;
> >>????? default:
> >>????????? if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
> >>????????????? (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> >>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>index 00c88c2f34e4..56f275eb4554 100644
> >>--- a/arch/x86/kvm/x86.c
> >>+++ b/arch/x86/kvm/x86.c
> >>@@ -2840,18 +2840,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu,
> >>struct msr_data *msr_info)
> >>????????????? return 1;
> >>????????? }
> >>????????? break;
> >>-??? case MSR_IA32_DEBUGCTLMSR:
> >>-??????? if (!data) {
> >>-??????????? /* We support the non-activated case already */
> >>-??????????? break;
> >>-??????? } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
> >
> >So after this patch, guest trying to set bit DEBUGCTLMSR_BTF will get a
> >#GP instead of being ignored and printing a log in kernel.
> >
>
> Since the BTF is not implemented on the KVM at all,
> I do propose not left this kind of dummy thing in the future KVM code.
>
> Let's see if Netware or any BTF user will complain about this change.
If you want to drop that behavior it needs be done in a separate patch.
Personally I don't see the point in doing so, it's a trivial amount of code
in KVM and there's no harm in dropping the bits on write.
On 7/8/2020 4:21 AM, Sean Christopherson wrote:
> On Sat, Jun 13, 2020 at 05:42:50PM +0800, Xu, Like wrote:
>> On 2020/6/13 17:14, Xiaoyao Li wrote:
>>> On 6/13/2020 4:09 PM, Like Xu wrote:
[...]
>>>> @@ -237,6 +238,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr_info)
>>>> return 1;
>>>> msr_info->data = vcpu->arch.perf_capabilities;
>>>> return 0;
>>>> + case MSR_IA32_DEBUGCTLMSR:
>>>> + msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
>>>
>>> Can we put the emulation of MSR_IA32_DEBUGCTLMSR in vmx_{get/set})_msr().
>>> AFAIK, MSR_IA32_DEBUGCTLMSR is not a pure PMU related MSR that there is
>>> bit 2 to enable #DB for bus lock.
>> We already have "case MSR_IA32_DEBUGCTLMSR" handler in the vmx_set_msr()
>> and you may apply you bus lock changes in that handler.
>
> Hrm, but that'd be weird dependency as vmx_set_msr() would need to check for
> #DB bus lock support but not actually write GUEST_IA32_DEBUGCTL, or we'd end
> up writing it twice when both bus lock and LBR are supported.
Yeah. That's what I concerned as well.
> I don't see anything in the series that takes action on writes to
> MSR_IA32_DEBUGCTLMSR beyond updating the VMCS, i.e. AFAICT there isn't any
> reason to call into the PMU, VMX can simply query vmx_get_perf_capabilities()
> to check if it's legal to enable DEBUGCTLMSR_LBR_MASK.
>
> A question for both LBR and bus lock: would it make sense to cache the
> guest's value in vcpu_vmx so that querying the guest value doesn't require
> a VMREAD? I don't have a good feel for how frequently it would be accessed.
Cache the guest's value is OK, even though #DB bus lock bit wouldn't be
toggled frequently in a normal OS.
Hi Sean,
First of all, are you going to queue the LBR patch series in your tree
considering the host perf patches have already queued in Peter's tree ?
On 2020/7/8 4:21, Sean Christopherson wrote:
> On Sat, Jun 13, 2020 at 05:42:50PM +0800, Xu, Like wrote:
>> On 2020/6/13 17:14, Xiaoyao Li wrote:
>>> On 6/13/2020 4:09 PM, Like Xu wrote:
>>>> When the LBR feature is reported by the vmx_get_perf_capabilities(),
>>>> the LBR fields in the [vmx|vcpu]_supported debugctl should be unmasked.
>>>>
>>>> The debugctl msr is handled separately in vmx/svm and they're not
>>>> completely identical, hence remove the common msr handling code.
> I would prefer to put the "remove DEBUGCTRL handling from common x86" in a
> separate patch. Without digging into SVM, it's not obvious that dropping
> MSR_IA32_DEBUGCTLMSR from kvm_set_msr_common() is a nop for SVM.
Sure, I'll do it in a separate patch.
>
>>>> Signed-off-by: Like Xu <[email protected]>
>>>> ---
>>>> arch/x86/kvm/vmx/capabilities.h | 12 ++++++++++++
>>>> arch/x86/kvm/vmx/pmu_intel.c | 19 +++++++++++++++++++
>>>> arch/x86/kvm/x86.c | 13 -------------
>>>> 3 files changed, 31 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kvm/vmx/capabilities.h
>>>> b/arch/x86/kvm/vmx/capabilities.h
>>>> index b633a90320ee..f6fcfabb1026 100644
>>>> --- a/arch/x86/kvm/vmx/capabilities.h
>>>> +++ b/arch/x86/kvm/vmx/capabilities.h
>>>> @@ -21,6 +21,8 @@ extern int __read_mostly pt_mode;
>>>> #define PMU_CAP_FW_WRITES (1ULL << 13)
>>>> #define PMU_CAP_LBR_FMT 0x3f
>>>> +#define DEBUGCTLMSR_LBR_MASK (DEBUGCTLMSR_LBR |
>>>> DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
>>>> +
>>>> struct nested_vmx_msrs {
>>>> /*
>>>> * We only store the "true" versions of the VMX capability MSRs. We
>>>> @@ -387,4 +389,14 @@ static inline u64 vmx_get_perf_capabilities(void)
>>>> return perf_cap;
>>>> }
>>>> +static inline u64 vmx_get_supported_debugctl(void)
>>>> +{
>>>> + u64 val = 0;
>>>> +
>>>> + if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
>>>> + val |= DEBUGCTLMSR_LBR_MASK;
>>>> +
>>>> + return val;
>>>> +}
>>>> +
>>>> #endif /* __KVM_X86_VMX_CAPS_H */
>>>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
>>>> index a953c7d633f6..d92e95b64c74 100644
>>>> --- a/arch/x86/kvm/vmx/pmu_intel.c
>>>> +++ b/arch/x86/kvm/vmx/pmu_intel.c
>>>> @@ -187,6 +187,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu
>>>> *vcpu, u32 msr)
>>>> case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
>>>> ret = pmu->version > 1;
>>>> break;
>>>> + case MSR_IA32_DEBUGCTLMSR:
>>>> case MSR_IA32_PERF_CAPABILITIES:
>>>> ret = 1;
>>>> break;
>>>> @@ -237,6 +238,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr_info)
>>>> return 1;
>>>> msr_info->data = vcpu->arch.perf_capabilities;
>>>> return 0;
>>>> + case MSR_IA32_DEBUGCTLMSR:
>>>> + msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
>>> Can we put the emulation of MSR_IA32_DEBUGCTLMSR in vmx_{get/set})_msr().
>>> AFAIK, MSR_IA32_DEBUGCTLMSR is not a pure PMU related MSR that there is
>>> bit 2 to enable #DB for bus lock.
>> We already have "case MSR_IA32_DEBUGCTLMSR" handler in the vmx_set_msr()
>> and you may apply you bus lock changes in that handler.
> Hrm, but that'd be weird dependency as vmx_set_msr() would need to check for
> #DB bus lock support but not actually write GUEST_IA32_DEBUGCTL, or we'd end
> up writing it twice when both bus lock and LBR are supported.
Yes, you're right about the multiple writes on GUEST_IA32_DEBUGCTL.
I'll move the handler to vmx_set/get_msr() for other DEBUGCTL users.
>
> I don't see anything in the series that takes action on writes to
> MSR_IA32_DEBUGCTLMSR beyond updating the VMCS, i.e. AFAICT there isn't any
> reason to call into the PMU, VMX can simply query vmx_get_perf_capabilities()
> to check if it's legal to enable DEBUGCTLMSR_LBR_MASK.
There's a gap to enable DEBUGCTLMSR_LBR_MASK.
The vmx_get_perf_capabilities() is queried per-KVM while
the vmx_get_supported_debugctl() is queried per-guest.
>
> A question for both LBR and bus lock: would it make sense to cache the
> guest's value in vcpu_vmx so that querying the guest value doesn't require
> a VMREAD? I don't have a good feel for how frequently it would be accessed.
I'm OK with the cached value for this field and AFAIK,
it will benefit the legacy_freezing_lbrs_on_pmi emulation
if the VMREAD is heavier than normal cache/mem touch.
>
>>>> + return 0;
>>>> default:
>>>> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>>>> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>>>> @@ -282,6 +286,16 @@ static inline bool lbr_is_compatible(struct
>>>> kvm_vcpu *vcpu)
>>>> return true;
>>>> }
>>>> +static inline u64 vcpu_get_supported_debugctl(struct kvm_vcpu *vcpu)
>>>> +{
>>>> + u64 debugctlmsr = vmx_get_supported_debugctl();
>>>> +
>>>> + if (!lbr_is_enabled(vcpu))
>>>> + debugctlmsr &= ~DEBUGCTLMSR_LBR_MASK;
>>>> +
>>>> + return debugctlmsr;
>>>> +}
>>>> +
>>>> static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data
>>>> *msr_info)
>>>> {
>>>> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>>>> @@ -336,6 +350,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr_info)
>>>> }
>>>> vcpu->arch.perf_capabilities = data;
>>>> return 0;
>>>> + case MSR_IA32_DEBUGCTLMSR:
>>>> + if (data & ~vcpu_get_supported_debugctl(vcpu))
>>>> + return 1;
>>>> + vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>>>> + return 0;
>>>> default:
>>>> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>>>> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index 00c88c2f34e4..56f275eb4554 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -2840,18 +2840,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr_info)
>>>> return 1;
>>>> }
>>>> break;
>>>> - case MSR_IA32_DEBUGCTLMSR:
>>>> - if (!data) {
>>>> - /* We support the non-activated case already */
>>>> - break;
>>>> - } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
>>> So after this patch, guest trying to set bit DEBUGCTLMSR_BTF will get a
>>> #GP instead of being ignored and printing a log in kernel.
>>>
>> Since the BTF is not implemented on the KVM at all,
>> I do propose not left this kind of dummy thing in the future KVM code.
>>
>> Let's see if Netware or any BTF user will complain about this change.
> If you want to drop that behavior it needs be done in a separate patch.
> Personally I don't see the point in doing so, it's a trivial amount of code
> in KVM and there's no harm in dropping the bits on write.
No harm in dropping the bits on write ? Interesting.
I may keep the semantics unchanged for LBR patches and make it as separate
proposal.
Thanks,
Like Xu
On Wed, Jul 08, 2020 at 03:06:57PM +0800, Xu, Like wrote:
> Hi Sean,
>
> First of all, are you going to queue the LBR patch series in your tree
> considering the host perf patches have already queued in Peter's tree ?
No, I'll let Paolo take 'em directly, I'm nowhere near knowledgeable enough
with respect to the PMU to feel comfortable taking them.