If lbr_desc->event is successfully created, the intel_pmu_create_
guest_lbr_event() will return 0, otherwise it will return -ENOENT,
and then jump to LBR msrs dummy handling.
Fixes: 1b5ac3226a1a ("KVM: vmx/pmu: Pass-through LBR msrs when the guest LBR event is ACTIVE")
Signed-off-by: Like Xu <[email protected]>
---
arch/x86/kvm/vmx/pmu_intel.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index d1df618cb7de..d6a5fe19ff09 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -320,7 +320,7 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
if (!intel_pmu_is_valid_lbr_msr(vcpu, index))
return false;
- if (!lbr_desc->event && !intel_pmu_create_guest_lbr_event(vcpu))
+ if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu))
goto dummy;
/*
--
2.29.2
When the processor that support model-specific LBR generates a debug
breakpoint event, it automatically clears the LBR flag. This action
does not clear previously stored LBR stack MSRs. (Intel SDM 17.4.2)
Signed-off-by: Like Xu <[email protected]>
---
arch/x86/kvm/vmx/vmx.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e0a3a9be654b..4951b535eb7f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4795,6 +4795,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
u32 intr_info, ex_no, error_code;
unsigned long cr2, rip, dr6;
u32 vect_info;
+ u64 lbr_ctl;
vect_info = vmx->idt_vectoring_info;
intr_info = vmx_get_intr_info(vcpu);
@@ -4886,6 +4887,10 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
rip = kvm_rip_read(vcpu);
kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
kvm_run->debug.arch.exception = ex_no;
+ /* On the debug breakpoint event, the LBREn bit is cleared. */
+ lbr_ctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ if (lbr_ctl & DEBUGCTLMSR_LBR)
+ vmcs_write64(GUEST_IA32_DEBUGCTL, lbr_ctl & ~DEBUGCTLMSR_LBR);
break;
case AC_VECTOR:
if (guest_inject_ac(vcpu)) {
--
2.29.2
On 23/02/21 17:38, Sean Christopherson wrote:
> On Tue, Feb 23, 2021, Like Xu wrote:
>> When the processor that support model-specific LBR generates a debug
>> breakpoint event, it automatically clears the LBR flag. This action
>> does not clear previously stored LBR stack MSRs. (Intel SDM 17.4.2)
>>
>> Signed-off-by: Like Xu <[email protected]>
>> ---
>> arch/x86/kvm/vmx/vmx.c | 5 +++++
>> 1 file changed, 5 insertions(+)
>>
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index e0a3a9be654b..4951b535eb7f 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -4795,6 +4795,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
>> u32 intr_info, ex_no, error_code;
>> unsigned long cr2, rip, dr6;
>> u32 vect_info;
>> + u64 lbr_ctl;
>>
>> vect_info = vmx->idt_vectoring_info;
>> intr_info = vmx_get_intr_info(vcpu);
>> @@ -4886,6 +4887,10 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
>> rip = kvm_rip_read(vcpu);
>> kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
>> kvm_run->debug.arch.exception = ex_no;
>> + /* On the debug breakpoint event, the LBREn bit is cleared. */
>
> Except this code is in BP_VECTOR, not DB_VECTOR as it should be.
>
> When the processor generates a debug exception (#DB), it automatically clears
> the LBR flag before executing the exception handler. This action does not
> clear previously stored LBR stack MSRs.
Also, this should come with a testcase.
Paolo
On Tue, Feb 23, 2021, Like Xu wrote:
> If lbr_desc->event is successfully created, the intel_pmu_create_
> guest_lbr_event() will return 0, otherwise it will return -ENOENT,
> and then jump to LBR msrs dummy handling.
>
> Fixes: 1b5ac3226a1a ("KVM: vmx/pmu: Pass-through LBR msrs when the guest LBR event is ACTIVE")
> Signed-off-by: Like Xu <[email protected]>
> ---
> arch/x86/kvm/vmx/pmu_intel.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index d1df618cb7de..d6a5fe19ff09 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -320,7 +320,7 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
> if (!intel_pmu_is_valid_lbr_msr(vcpu, index))
> return false;
>
> - if (!lbr_desc->event && !intel_pmu_create_guest_lbr_event(vcpu))
> + if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu))
> goto dummy;
Wouldn't it be better to create an event only on write? And really, why create
the event in this flow in the first place? In normal operation, can't event
creation be deferred until GUEST_IA32_DEBUGCTL.DEBUGCTLMSR_LBR=1? If event
creation fails in that flow, I would think KVM would do its best to create an
event in future runs without waiting for additional actions from the guest.
Also, this bug suggests there's a big gaping hole in the test coverage. AFAICT,
event contention would lead to a #GP crash in the host due to lbr_desc->event
being dereferenced, no?
>
> /*
> --
> 2.29.2
>
On Tue, Feb 23, 2021, Like Xu wrote:
> When the processor that support model-specific LBR generates a debug
> breakpoint event, it automatically clears the LBR flag. This action
> does not clear previously stored LBR stack MSRs. (Intel SDM 17.4.2)
>
> Signed-off-by: Like Xu <[email protected]>
> ---
> arch/x86/kvm/vmx/vmx.c | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index e0a3a9be654b..4951b535eb7f 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4795,6 +4795,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> u32 intr_info, ex_no, error_code;
> unsigned long cr2, rip, dr6;
> u32 vect_info;
> + u64 lbr_ctl;
>
> vect_info = vmx->idt_vectoring_info;
> intr_info = vmx_get_intr_info(vcpu);
> @@ -4886,6 +4887,10 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> rip = kvm_rip_read(vcpu);
> kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
> kvm_run->debug.arch.exception = ex_no;
> + /* On the debug breakpoint event, the LBREn bit is cleared. */
Except this code is in BP_VECTOR, not DB_VECTOR as it should be.
When the processor generates a debug exception (#DB), it automatically clears
the LBR flag before executing the exception handler. This action does not
clear previously stored LBR stack MSRs.
> + lbr_ctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
> + if (lbr_ctl & DEBUGCTLMSR_LBR)
> + vmcs_write64(GUEST_IA32_DEBUGCTL, lbr_ctl & ~DEBUGCTLMSR_LBR);
> break;
> case AC_VECTOR:
> if (guest_inject_ac(vcpu)) {
> --
> 2.29.2
>
On 23/02/21 18:15, Sean Christopherson wrote:
> If event
> creation fails in that flow, I would think KVM would do its best to create an
> event in future runs without waiting for additional actions from the guest.
>
> Also, this bug suggests there's a big gaping hole in the test coverage. AFAICT,
> event contention would lead to a #GP crash in the host due to lbr_desc->event
> being dereferenced, no?
Yes, testing contention would use the tools/testing/selftests/kvm
framework rather than just kvm-unit-tests.
Paolo
On 23/02/21 02:39, Like Xu wrote:
> If lbr_desc->event is successfully created, the intel_pmu_create_
> guest_lbr_event() will return 0, otherwise it will return -ENOENT,
> and then jump to LBR msrs dummy handling.
>
> Fixes: 1b5ac3226a1a ("KVM: vmx/pmu: Pass-through LBR msrs when the guest LBR event is ACTIVE")
> Signed-off-by: Like Xu <[email protected]>
> ---
> arch/x86/kvm/vmx/pmu_intel.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index d1df618cb7de..d6a5fe19ff09 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -320,7 +320,7 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
> if (!intel_pmu_is_valid_lbr_msr(vcpu, index))
> return false;
>
> - if (!lbr_desc->event && !intel_pmu_create_guest_lbr_event(vcpu))
> + if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu))
> goto dummy;
Queued, adding a "< 0" to clarify the semantics of the function.
Paolo
On 2021/2/24 1:15, Sean Christopherson wrote:
> On Tue, Feb 23, 2021, Like Xu wrote:
>> If lbr_desc->event is successfully created, the intel_pmu_create_
>> guest_lbr_event() will return 0, otherwise it will return -ENOENT,
>> and then jump to LBR msrs dummy handling.
>>
>> Fixes: 1b5ac3226a1a ("KVM: vmx/pmu: Pass-through LBR msrs when the guest LBR event is ACTIVE")
>> Signed-off-by: Like Xu <[email protected]>
>> ---
>> arch/x86/kvm/vmx/pmu_intel.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
>> index d1df618cb7de..d6a5fe19ff09 100644
>> --- a/arch/x86/kvm/vmx/pmu_intel.c
>> +++ b/arch/x86/kvm/vmx/pmu_intel.c
>> @@ -320,7 +320,7 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
>> if (!intel_pmu_is_valid_lbr_msr(vcpu, index))
>> return false;
>>
>> - if (!lbr_desc->event && !intel_pmu_create_guest_lbr_event(vcpu))
>> + if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu))
>> goto dummy;
> Wouldn't it be better to create an event only on write? And really, why create
> the event in this flow in the first place? In normal operation, can't event
> creation be deferred until GUEST_IA32_DEBUGCTL.DEBUGCTLMSR_LBR=1?
We need event creation and pass-through for both read and write.
The LBR driver would firstly access the MSR_LBR_SELECT to configure branch
types
and we also create LBR event for GUEST_IA32_DEBUGCTL.DEBUGCTLMSR_LBR=1 trap.
A lazy approach requests more cached values and more traps.
> If event
> creation fails in that flow, I would think KVM would do its best to create an
> event in future runs without waiting for additional actions from the guest.
We have done this via releasing the LBR event for next creation and
pass-through try.
>
> Also, this bug suggests there's a big gaping hole in the test coverage.
Not a big but concern one. To hit that, we need run LBR agent on the host
and grab LBR from the guest. And it's not covered in the current test cases
since we do not recommend this kind of usage in the comment.
> AFAICT,
> event contention would lead to a #GP crash in the host due to lbr_desc->event
> being dereferenced, no?
a #GP crash in the host ?Can you share more understanding about it ?
>
>>
>> /*
>> --
>> 2.29.2
>>
On Wed, Feb 24, 2021, Xu, Like wrote:
> On 2021/2/24 1:15, Sean Christopherson wrote:
> > On Tue, Feb 23, 2021, Like Xu wrote:
> > > If lbr_desc->event is successfully created, the intel_pmu_create_
> > > guest_lbr_event() will return 0, otherwise it will return -ENOENT,
> > > and then jump to LBR msrs dummy handling.
> > >
> > > Fixes: 1b5ac3226a1a ("KVM: vmx/pmu: Pass-through LBR msrs when the guest LBR event is ACTIVE")
> > > Signed-off-by: Like Xu <[email protected]>
> > > ---
> > > arch/x86/kvm/vmx/pmu_intel.c | 2 +-
> > > 1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> > > index d1df618cb7de..d6a5fe19ff09 100644
> > > --- a/arch/x86/kvm/vmx/pmu_intel.c
> > > +++ b/arch/x86/kvm/vmx/pmu_intel.c
> > > @@ -320,7 +320,7 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
> > > if (!intel_pmu_is_valid_lbr_msr(vcpu, index))
> > > return false;
> > > - if (!lbr_desc->event && !intel_pmu_create_guest_lbr_event(vcpu))
> > > + if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu))
> > > goto dummy;
...
> > AFAICT, event contention would lead to a #GP crash in the host due to
> > lbr_desc->event being dereferenced, no?
>
> a #GP crash in the host ?Can you share more understanding about it ?
The original code is will dereference a null lbr_desc->event if
intel_pmu_create_guest_lbr_event() fails.
if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu)) <- falls through
goto dummy;
/*
* Disable irq to ensure the LBR feature doesn't get reclaimed by the
* host at the time the value is read from the msr, and this avoids the
* host LBR value to be leaked to the guest. If LBR has been reclaimed,
* return 0 on guest reads.
*/
local_irq_disable();
if (lbr_desc->event->state == PERF_EVENT_STATE_ACTIVE) { <--------- kaboom
if (read)
rdmsrl(index, msr_info->data);
else
wrmsrl(index, msr_info->data);
__set_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
local_irq_enable();
return true;
}