2020-03-17 07:56:42

by Like Xu

[permalink] [raw]
Subject: [PATCH] kvm/x86: Reduce counter period change overhead and delay the effective time

The cost of perf_event_period() is unstable, and when the guest samples
multiple events, the overhead increases dramatically (5378 ns on E5-2699).

For a non-running counter, the effective time of the new period is when
its corresponding enable bit is enabled. Calling perf_event_period()
in advance is superfluous. For a running counter, it's safe to delay the
effective time until the KVM_REQ_PMU event is handled. If there are
multiple perf_event_period() calls before handling KVM_REQ_PMU,
it helps to reduce the total cost.

Signed-off-by: Like Xu <[email protected]>

---
arch/x86/kvm/pmu.c | 11 -----------
arch/x86/kvm/pmu.h | 11 +++++++++++
arch/x86/kvm/vmx/pmu_intel.c | 10 ++++------
3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index d1f8ca57d354..527a8bb85080 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -437,17 +437,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
kvm_pmu_refresh(vcpu);
}

-static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
-{
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-
- if (pmc_is_fixed(pmc))
- return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
- pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
-
- return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
-}
-
/* Release perf_events for vPMCs that have been unused for a full time slice. */
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index d7da2b9e0755..cd112e825d2c 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -138,6 +138,17 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
return sample_period;
}

+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
+{
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+
+ if (pmc_is_fixed(pmc))
+ return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
+
+ return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
+}
+
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 7c857737b438..4e689273eb05 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -263,15 +263,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated)
data = (s64)(s32)data;
pmc->counter += data - pmc_read_counter(pmc);
- if (pmc->perf_event)
- perf_event_period(pmc->perf_event,
- get_sample_period(pmc, data));
+ if (pmc_speculative_in_use(pmc)) {
+ kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
return 0;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
pmc->counter += data - pmc_read_counter(pmc);
- if (pmc->perf_event)
- perf_event_period(pmc->perf_event,
- get_sample_period(pmc, data));
+ if (pmc_speculative_in_use(pmc)) {
+ kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
return 0;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
if (data == pmc->eventsel)
--
2.21.1


2020-03-17 08:01:20

by Like Xu

[permalink] [raw]
Subject: Re: [PATCH] kvm/x86: Reduce counter period change overhead and delay the effective time

On 2020/3/17 15:53, Like Xu wrote:
> The cost of perf_event_period() is unstable, and when the guest samples
> multiple events, the overhead increases dramatically (5378 ns on E5-2699).
>
> For a non-running counter, the effective time of the new period is when
> its corresponding enable bit is enabled. Calling perf_event_period()
> in advance is superfluous. For a running counter, it's safe to delay the
> effective time until the KVM_REQ_PMU event is handled. If there are
> multiple perf_event_period() calls before handling KVM_REQ_PMU,
> it helps to reduce the total cost.
>
> Signed-off-by: Like Xu <[email protected]>
>
> ---
> arch/x86/kvm/pmu.c | 11 -----------
> arch/x86/kvm/pmu.h | 11 +++++++++++
> arch/x86/kvm/vmx/pmu_intel.c | 10 ++++------
> 3 files changed, 15 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index d1f8ca57d354..527a8bb85080 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -437,17 +437,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
> kvm_pmu_refresh(vcpu);
> }
>
> -static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
> -{
> - struct kvm_pmu *pmu = pmc_to_pmu(pmc);
> -
> - if (pmc_is_fixed(pmc))
> - return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
> - pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
> -
> - return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
> -}
> -
> /* Release perf_events for vPMCs that have been unused for a full time slice. */
> void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
> {
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index d7da2b9e0755..cd112e825d2c 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -138,6 +138,17 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
> return sample_period;
> }
>
> +static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
> +{
> + struct kvm_pmu *pmu = pmc_to_pmu(pmc);
> +
> + if (pmc_is_fixed(pmc))
> + return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
> + pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
> +
> + return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
> +}
> +
> void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
> void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
> void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 7c857737b438..4e689273eb05 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -263,15 +263,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> if (!msr_info->host_initiated)
> data = (s64)(s32)data;
> pmc->counter += data - pmc_read_counter(pmc);
> - if (pmc->perf_event)
> - perf_event_period(pmc->perf_event,
> - get_sample_period(pmc, data));
> + if (pmc_speculative_in_use(pmc)) {

Oops, the "{" is a shameful mistake.

> + kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
> return 0;
> } else if ((pmc = get_fixed_pmc(pmu, msr))) {
> pmc->counter += data - pmc_read_counter(pmc);
> - if (pmc->perf_event)
> - perf_event_period(pmc->perf_event,
> - get_sample_period(pmc, data));
> + if (pmc_speculative_in_use(pmc)) {

> + kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
> return 0;
> } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
> if (data == pmc->eventsel)
>

2020-03-17 08:18:23

by Like Xu

[permalink] [raw]
Subject: [PATCH v2] KVM: x86/pmu: Reduce counter period change overhead and delay the effective time

The cost of perf_event_period() is unstable, and when the guest samples
multiple events, the overhead increases dramatically (5378 ns on E5-2699).

For a non-running counter, the effective time of the new period is when
its corresponding enable bit is enabled. Calling perf_event_period()
in advance is superfluous. For a running counter, it's safe to delay the
effective time until the KVM_REQ_PMU event is handled. If there are
multiple perf_event_period() calls before handling KVM_REQ_PMU,
it helps to reduce the total cost.

Signed-off-by: Like Xu <[email protected]>
---
arch/x86/kvm/pmu.c | 11 -----------
arch/x86/kvm/pmu.h | 11 +++++++++++
arch/x86/kvm/vmx/pmu_intel.c | 10 ++++------
3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index d1f8ca57d354..527a8bb85080 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -437,17 +437,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
kvm_pmu_refresh(vcpu);
}

-static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
-{
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-
- if (pmc_is_fixed(pmc))
- return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
- pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
-
- return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
-}
-
/* Release perf_events for vPMCs that have been unused for a full time slice. */
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index d7da2b9e0755..cd112e825d2c 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -138,6 +138,17 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
return sample_period;
}

+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
+{
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+
+ if (pmc_is_fixed(pmc))
+ return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
+
+ return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
+}
+
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 7c857737b438..20f654a0c09b 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -263,15 +263,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated)
data = (s64)(s32)data;
pmc->counter += data - pmc_read_counter(pmc);
- if (pmc->perf_event)
- perf_event_period(pmc->perf_event,
- get_sample_period(pmc, data));
+ if (pmc_speculative_in_use(pmc))
+ kvm_make_request(KVM_REQ_PMU, vcpu);
return 0;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
pmc->counter += data - pmc_read_counter(pmc);
- if (pmc->perf_event)
- perf_event_period(pmc->perf_event,
- get_sample_period(pmc, data));
+ if (pmc_speculative_in_use(pmc))
+ kvm_make_request(KVM_REQ_PMU, vcpu);
return 0;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
if (data == pmc->eventsel)
--
2.21.1

2020-03-26 12:48:37

by Like Xu

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: x86/pmu: Reduce counter period change overhead and delay the effective time

Anyone to help review this change?

Thanks,
Like Xu

On 2020/3/17 16:14, Like Xu wrote:
> The cost of perf_event_period() is unstable, and when the guest samples
> multiple events, the overhead increases dramatically (5378 ns on E5-2699).
>
> For a non-running counter, the effective time of the new period is when
> its corresponding enable bit is enabled. Calling perf_event_period()
> in advance is superfluous. For a running counter, it's safe to delay the
> effective time until the KVM_REQ_PMU event is handled. If there are
> multiple perf_event_period() calls before handling KVM_REQ_PMU,
> it helps to reduce the total cost.
>
> Signed-off-by: Like Xu <[email protected]>
> ---
> arch/x86/kvm/pmu.c | 11 -----------
> arch/x86/kvm/pmu.h | 11 +++++++++++
> arch/x86/kvm/vmx/pmu_intel.c | 10 ++++------
> 3 files changed, 15 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index d1f8ca57d354..527a8bb85080 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -437,17 +437,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
> kvm_pmu_refresh(vcpu);
> }
>
> -static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
> -{
> - struct kvm_pmu *pmu = pmc_to_pmu(pmc);
> -
> - if (pmc_is_fixed(pmc))
> - return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
> - pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
> -
> - return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
> -}
> -
> /* Release perf_events for vPMCs that have been unused for a full time slice. */
> void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
> {
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index d7da2b9e0755..cd112e825d2c 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -138,6 +138,17 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
> return sample_period;
> }
>
> +static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
> +{
> + struct kvm_pmu *pmu = pmc_to_pmu(pmc);
> +
> + if (pmc_is_fixed(pmc))
> + return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
> + pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
> +
> + return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
> +}
> +
> void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
> void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
> void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 7c857737b438..20f654a0c09b 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -263,15 +263,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> if (!msr_info->host_initiated)
> data = (s64)(s32)data;
> pmc->counter += data - pmc_read_counter(pmc);
> - if (pmc->perf_event)
> - perf_event_period(pmc->perf_event,
> - get_sample_period(pmc, data));
> + if (pmc_speculative_in_use(pmc))
> + kvm_make_request(KVM_REQ_PMU, vcpu);
> return 0;
> } else if ((pmc = get_fixed_pmc(pmu, msr))) {
> pmc->counter += data - pmc_read_counter(pmc);
> - if (pmc->perf_event)
> - perf_event_period(pmc->perf_event,
> - get_sample_period(pmc, data));
> + if (pmc_speculative_in_use(pmc))
> + kvm_make_request(KVM_REQ_PMU, vcpu);
> return 0;
> } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
> if (data == pmc->eventsel)
>

2020-04-08 15:13:26

by Like Xu

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: x86/pmu: Reduce counter period change overhead and delay the effective time

Hi Paolo,

Could you please take a look at this patch?
If there is anything needs to be improved, please let me know.

Thanks,
Like Xu

On 2020/3/26 20:47, Like Xu wrote:
> Anyone to help review this change?
>
> Thanks,
> Like Xu
>
> On 2020/3/17 16:14, Like Xu wrote:
>> The cost of perf_event_period() is unstable, and when the guest samples
>> multiple events, the overhead increases dramatically (5378 ns on E5-2699).
>>
>> For a non-running counter, the effective time of the new period is when
>> its corresponding enable bit is enabled. Calling perf_event_period()
>> in advance is superfluous. For a running counter, it's safe to delay the
>> effective time until the KVM_REQ_PMU event is handled. If there are
>> multiple perf_event_period() calls before handling KVM_REQ_PMU,
>> it helps to reduce the total cost.
>>
>> Signed-off-by: Like Xu <[email protected]>
>> ---
>>   arch/x86/kvm/pmu.c           | 11 -----------
>>   arch/x86/kvm/pmu.h           | 11 +++++++++++
>>   arch/x86/kvm/vmx/pmu_intel.c | 10 ++++------
>>   3 files changed, 15 insertions(+), 17 deletions(-)
>>
>> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
>> index d1f8ca57d354..527a8bb85080 100644
>> --- a/arch/x86/kvm/pmu.c
>> +++ b/arch/x86/kvm/pmu.c
>> @@ -437,17 +437,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
>>       kvm_pmu_refresh(vcpu);
>>   }
>> -static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
>> -{
>> -    struct kvm_pmu *pmu = pmc_to_pmu(pmc);
>> -
>> -    if (pmc_is_fixed(pmc))
>> -        return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
>> -            pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
>> -
>> -    return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
>> -}
>> -
>>   /* Release perf_events for vPMCs that have been unused for a full time
>> slice.  */
>>   void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
>>   {
>> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
>> index d7da2b9e0755..cd112e825d2c 100644
>> --- a/arch/x86/kvm/pmu.h
>> +++ b/arch/x86/kvm/pmu.h
>> @@ -138,6 +138,17 @@ static inline u64 get_sample_period(struct kvm_pmc
>> *pmc, u64 counter_value)
>>       return sample_period;
>>   }
>> +static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
>> +{
>> +    struct kvm_pmu *pmu = pmc_to_pmu(pmc);
>> +
>> +    if (pmc_is_fixed(pmc))
>> +        return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
>> +            pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
>> +
>> +    return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
>> +}
>> +
>>   void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
>>   void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
>>   void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
>> index 7c857737b438..20f654a0c09b 100644
>> --- a/arch/x86/kvm/vmx/pmu_intel.c
>> +++ b/arch/x86/kvm/vmx/pmu_intel.c
>> @@ -263,15 +263,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu,
>> struct msr_data *msr_info)
>>               if (!msr_info->host_initiated)
>>                   data = (s64)(s32)data;
>>               pmc->counter += data - pmc_read_counter(pmc);
>> -            if (pmc->perf_event)
>> -                perf_event_period(pmc->perf_event,
>> -                          get_sample_period(pmc, data));
>> +            if (pmc_speculative_in_use(pmc))
>> +                kvm_make_request(KVM_REQ_PMU, vcpu);
>>               return 0;
>>           } else if ((pmc = get_fixed_pmc(pmu, msr))) {
>>               pmc->counter += data - pmc_read_counter(pmc);
>> -            if (pmc->perf_event)
>> -                perf_event_period(pmc->perf_event,
>> -                          get_sample_period(pmc, data));
>> +            if (pmc_speculative_in_use(pmc))
>> +                kvm_make_request(KVM_REQ_PMU, vcpu);
>>               return 0;
>>           } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
>>               if (data == pmc->eventsel)
>>
>

2020-04-16 20:28:34

by Like Xu

[permalink] [raw]
Subject: Re: [PATCH v2] KVM: x86/pmu: Reduce counter period change overhead and delay the effective time

Ping.

On 2020/4/8 22:04, Like Xu wrote:
> Hi Paolo,
>
> Could you please take a look at this patch?
> If there is anything needs to be improved, please let me know.
>
> Thanks,
> Like Xu
>
> On 2020/3/26 20:47, Like Xu wrote:
>> Anyone to help review this change?
>>
>> Thanks,
>> Like Xu
>>
>> On 2020/3/17 16:14, Like Xu wrote:
>>> The cost of perf_event_period() is unstable, and when the guest samples
>>> multiple events, the overhead increases dramatically (5378 ns on E5-2699).
>>>
>>> For a non-running counter, the effective time of the new period is when
>>> its corresponding enable bit is enabled. Calling perf_event_period()
>>> in advance is superfluous. For a running counter, it's safe to delay the
>>> effective time until the KVM_REQ_PMU event is handled. If there are
>>> multiple perf_event_period() calls before handling KVM_REQ_PMU,
>>> it helps to reduce the total cost.
>>>
>>> Signed-off-by: Like Xu <[email protected]>
>>> ---
>>>   arch/x86/kvm/pmu.c           | 11 -----------
>>>   arch/x86/kvm/pmu.h           | 11 +++++++++++
>>>   arch/x86/kvm/vmx/pmu_intel.c | 10 ++++------
>>>   3 files changed, 15 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
>>> index d1f8ca57d354..527a8bb85080 100644
>>> --- a/arch/x86/kvm/pmu.c
>>> +++ b/arch/x86/kvm/pmu.c
>>> @@ -437,17 +437,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
>>>       kvm_pmu_refresh(vcpu);
>>>   }
>>> -static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
>>> -{
>>> -    struct kvm_pmu *pmu = pmc_to_pmu(pmc);
>>> -
>>> -    if (pmc_is_fixed(pmc))
>>> -        return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
>>> -            pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
>>> -
>>> -    return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
>>> -}
>>> -
>>>   /* Release perf_events for vPMCs that have been unused for a full
>>> time slice.  */
>>>   void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
>>>   {
>>> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
>>> index d7da2b9e0755..cd112e825d2c 100644
>>> --- a/arch/x86/kvm/pmu.h
>>> +++ b/arch/x86/kvm/pmu.h
>>> @@ -138,6 +138,17 @@ static inline u64 get_sample_period(struct kvm_pmc
>>> *pmc, u64 counter_value)
>>>       return sample_period;
>>>   }
>>> +static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
>>> +{
>>> +    struct kvm_pmu *pmu = pmc_to_pmu(pmc);
>>> +
>>> +    if (pmc_is_fixed(pmc))
>>> +        return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
>>> +            pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
>>> +
>>> +    return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
>>> +}
>>> +
>>>   void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
>>>   void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int
>>> fixed_idx);
>>>   void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
>>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
>>> index 7c857737b438..20f654a0c09b 100644
>>> --- a/arch/x86/kvm/vmx/pmu_intel.c
>>> +++ b/arch/x86/kvm/vmx/pmu_intel.c
>>> @@ -263,15 +263,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu
>>> *vcpu, struct msr_data *msr_info)
>>>               if (!msr_info->host_initiated)
>>>                   data = (s64)(s32)data;
>>>               pmc->counter += data - pmc_read_counter(pmc);
>>> -            if (pmc->perf_event)
>>> -                perf_event_period(pmc->perf_event,
>>> -                          get_sample_period(pmc, data));
>>> +            if (pmc_speculative_in_use(pmc))
>>> +                kvm_make_request(KVM_REQ_PMU, vcpu);
>>>               return 0;
>>>           } else if ((pmc = get_fixed_pmc(pmu, msr))) {
>>>               pmc->counter += data - pmc_read_counter(pmc);
>>> -            if (pmc->perf_event)
>>> -                perf_event_period(pmc->perf_event,
>>> -                          get_sample_period(pmc, data));
>>> +            if (pmc_speculative_in_use(pmc))
>>> +                kvm_make_request(KVM_REQ_PMU, vcpu);
>>>               return 0;
>>>           } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
>>>               if (data == pmc->eventsel)
>>>
>>
>