2023-01-21 02:20:58

by Kechen Lu

[permalink] [raw]
Subject: [RFC PATCH v6 2/6] KVM: x86: Move *_in_guest power management flags to vCPU scope

Make the runtime disabled mwait/hlt/pause/cstate exits flags vCPU scope
to allow finer-grained, per-vCPU control. The VM-scoped control is only
allowed before vCPUs are created, thus preserving the existing behavior
is a simple matter of snapshotting the flags at vCPU creation.

Signed-off-by: Kechen Lu <[email protected]>
Suggested-by: Sean Christopherson <[email protected]>
Reviewed-by: Sean Christopherson <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 5 +++++
arch/x86/kvm/cpuid.c | 4 ++--
arch/x86/kvm/lapic.c | 7 +++----
arch/x86/kvm/svm/nested.c | 4 ++--
arch/x86/kvm/svm/svm.c | 12 ++++++------
arch/x86/kvm/vmx/vmx.c | 16 ++++++++--------
arch/x86/kvm/x86.c | 6 +++++-
arch/x86/kvm/x86.h | 16 ++++++++--------
8 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6aaae18f1854..41b998234a04 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1009,6 +1009,11 @@ struct kvm_vcpu_arch {
#if IS_ENABLED(CONFIG_HYPERV)
hpa_t hv_root_tdp;
#endif
+
+ bool mwait_in_guest;
+ bool hlt_in_guest;
+ bool pause_in_guest;
+ bool cstate_in_guest;
};

struct kvm_lpage_info {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 596061c1610e..20e427dc608c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -283,8 +283,8 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);

best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
- if (kvm_hlt_in_guest(vcpu->kvm) && best &&
- (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
+ if (kvm_hlt_in_guest(vcpu) &&
+ best && (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);

if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4efdb4a4d72c..f0f49d0c6e69 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -151,14 +151,13 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
{
return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
- (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
+ (kvm_mwait_in_guest(vcpu) || kvm_hlt_in_guest(vcpu));
}

bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
{
- return kvm_x86_ops.set_hv_timer
- && !(kvm_mwait_in_guest(vcpu->kvm) ||
- kvm_can_post_timer_interrupt(vcpu));
+ return kvm_x86_ops.set_hv_timer &&
+ !(kvm_mwait_in_guest(vcpu) || kvm_can_post_timer_interrupt(vcpu));
}

static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index add65dd59756..ed26b6de3007 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -721,7 +721,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,

pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
- if (kvm_pause_in_guest(svm->vcpu.kvm)) {
+ if (kvm_pause_in_guest(&svm->vcpu)) {
/* use guest values since host doesn't intercept PAUSE */
vmcb02->control.pause_filter_count = pause_count12;
vmcb02->control.pause_filter_thresh = pause_thresh12;
@@ -1012,7 +1012,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb12->control.event_inj = svm->nested.ctl.event_inj;
vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;

- if (!kvm_pause_in_guest(vcpu->kvm)) {
+ if (!kvm_pause_in_guest(vcpu)) {
vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9a194aa1a75a..dc7176605e01 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1014,7 +1014,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;

- if (kvm_pause_in_guest(vcpu->kvm))
+ if (kvm_pause_in_guest(vcpu))
return;

control->pause_filter_count = __grow_ple_window(old,
@@ -1035,7 +1035,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;

- if (kvm_pause_in_guest(vcpu->kvm))
+ if (kvm_pause_in_guest(vcpu))
return;

control->pause_filter_count =
@@ -1229,12 +1229,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm_set_intercept(svm, INTERCEPT_RDPRU);
svm_set_intercept(svm, INTERCEPT_RSM);

- if (!kvm_mwait_in_guest(vcpu->kvm)) {
+ if (!kvm_mwait_in_guest(vcpu)) {
svm_set_intercept(svm, INTERCEPT_MONITOR);
svm_set_intercept(svm, INTERCEPT_MWAIT);
}

- if (!kvm_hlt_in_guest(vcpu->kvm))
+ if (!kvm_hlt_in_guest(vcpu))
svm_set_intercept(svm, INTERCEPT_HLT);

control->iopm_base_pa = __sme_set(iopm_base);
@@ -1278,7 +1278,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
svm->nested.vmcb12_gpa = INVALID_GPA;
svm->nested.last_vmcb12_gpa = INVALID_GPA;

- if (!kvm_pause_in_guest(vcpu->kvm)) {
+ if (!kvm_pause_in_guest(vcpu)) {
control->pause_filter_count = pause_filter_count;
if (pause_filter_thresh)
control->pause_filter_thresh = pause_filter_thresh;
@@ -4362,7 +4362,7 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)

static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
- if (!kvm_pause_in_guest(vcpu->kvm))
+ if (!kvm_pause_in_guest(vcpu))
shrink_ple_window(vcpu);
}

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fc9008dbed33..019a20029878 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1689,7 +1689,7 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
* then the instruction is already executing and RIP has already been
* advanced.
*/
- if (kvm_hlt_in_guest(vcpu->kvm) &&
+ if (kvm_hlt_in_guest(vcpu) &&
vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
}
@@ -4412,10 +4412,10 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING |
CPU_BASED_INVLPG_EXITING);
- if (kvm_mwait_in_guest(vmx->vcpu.kvm))
+ if (kvm_mwait_in_guest(&vmx->vcpu))
exec_control &= ~(CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING);
- if (kvm_hlt_in_guest(vmx->vcpu.kvm))
+ if (kvm_hlt_in_guest(&vmx->vcpu))
exec_control &= ~CPU_BASED_HLT_EXITING;
return exec_control;
}
@@ -4515,7 +4515,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
}
if (!enable_unrestricted_guest)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
- if (kvm_pause_in_guest(vmx->vcpu.kvm))
+ if (kvm_pause_in_guest(&vmx->vcpu))
exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
if (!kvm_vcpu_apicv_active(vcpu))
exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -4661,7 +4661,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
vmcs_write16(LAST_PID_POINTER_INDEX, kvm->arch.max_vcpu_ids - 1);
}

- if (!kvm_pause_in_guest(kvm)) {
+ if (!kvm_pause_in_guest(&vmx->vcpu)) {
vmcs_write32(PLE_GAP, ple_gap);
vmx->ple_window = ple_window;
vmx->ple_window_dirty = true;
@@ -5833,7 +5833,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
*/
static int handle_pause(struct kvm_vcpu *vcpu)
{
- if (!kvm_pause_in_guest(vcpu->kvm))
+ if (!kvm_pause_in_guest(vcpu))
grow_ple_window(vcpu);

/*
@@ -7379,7 +7379,7 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
- if (kvm_cstate_in_guest(vcpu->kvm)) {
+ if (kvm_cstate_in_guest(vcpu)) {
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
@@ -7935,7 +7935,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)

static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
- if (!kvm_pause_in_guest(vcpu->kvm))
+ if (!kvm_pause_in_guest(vcpu))
shrink_ple_window(vcpu);
}

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8ae9c4f9f08..9a77b55142c6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11634,6 +11634,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
#if IS_ENABLED(CONFIG_HYPERV)
vcpu->arch.hv_root_tdp = INVALID_PAGE;
#endif
+ vcpu->arch.mwait_in_guest = vcpu->kvm->arch.mwait_in_guest;
+ vcpu->arch.hlt_in_guest = vcpu->kvm->arch.hlt_in_guest;
+ vcpu->arch.pause_in_guest = vcpu->kvm->arch.pause_in_guest;
+ vcpu->arch.cstate_in_guest = vcpu->kvm->arch.cstate_in_guest;

r = static_call(kvm_x86_vcpu_create)(vcpu);
if (r)
@@ -12885,7 +12889,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
kvm_is_exception_pending(vcpu)))
return false;

- if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
+ if (kvm_hlt_in_guest(vcpu) && !kvm_can_deliver_async_pf(vcpu))
return false;

/*
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 9de72586f406..b8e49a9d353d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -351,24 +351,24 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
__rem; \
})

-static inline bool kvm_mwait_in_guest(struct kvm *kvm)
+static inline bool kvm_mwait_in_guest(struct kvm_vcpu *vcpu)
{
- return kvm->arch.mwait_in_guest;
+ return vcpu->arch.mwait_in_guest;
}

-static inline bool kvm_hlt_in_guest(struct kvm *kvm)
+static inline bool kvm_hlt_in_guest(struct kvm_vcpu *vcpu)
{
- return kvm->arch.hlt_in_guest;
+ return vcpu->arch.hlt_in_guest;
}

-static inline bool kvm_pause_in_guest(struct kvm *kvm)
+static inline bool kvm_pause_in_guest(struct kvm_vcpu *vcpu)
{
- return kvm->arch.pause_in_guest;
+ return vcpu->arch.pause_in_guest;
}

-static inline bool kvm_cstate_in_guest(struct kvm *kvm)
+static inline bool kvm_cstate_in_guest(struct kvm_vcpu *vcpu)
{
- return kvm->arch.cstate_in_guest;
+ return vcpu->arch.cstate_in_guest;
}

static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)
--
2.34.1


2023-02-02 14:57:16

by Zhi Wang

[permalink] [raw]
Subject: Re: [RFC PATCH v6 2/6] KVM: x86: Move *_in_guest power management flags to vCPU scope

On Sat, 21 Jan 2023 02:07:34 +0000
Kechen Lu <[email protected]> wrote:

> Make the runtime disabled mwait/hlt/pause/cstate exits flags vCPU scope
> to allow finer-grained, per-vCPU control. The VM-scoped control is only
> allowed before vCPUs are created, thus preserving the existing behavior
> is a simple matter of snapshotting the flags at vCPU creation.
>
> Signed-off-by: Kechen Lu <[email protected]>
> Suggested-by: Sean Christopherson <[email protected]>
> Reviewed-by: Sean Christopherson <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 5 +++++
> arch/x86/kvm/cpuid.c | 4 ++--
> arch/x86/kvm/lapic.c | 7 +++----
> arch/x86/kvm/svm/nested.c | 4 ++--
> arch/x86/kvm/svm/svm.c | 12 ++++++------
> arch/x86/kvm/vmx/vmx.c | 16 ++++++++--------
> arch/x86/kvm/x86.c | 6 +++++-
> arch/x86/kvm/x86.h | 16 ++++++++--------
> 8 files changed, 39 insertions(+), 31 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6aaae18f1854..41b998234a04 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1009,6 +1009,11 @@ struct kvm_vcpu_arch {
> #if IS_ENABLED(CONFIG_HYPERV)
> hpa_t hv_root_tdp;
> #endif
> +
> + bool mwait_in_guest;
> + bool hlt_in_guest;
> + bool pause_in_guest;
> + bool cstate_in_guest;

Better add some comments here. When xxx_in_guest stays together with
XXX_DISABLE_EXIT_XXX, it can be quite confusing. Or maybe align the naming
like bool disable_exit_mwait <-> XXX_DISABLE_EXIT_XXX.

> };
>
> struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 596061c1610e..20e427dc608c 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -283,8 +283,8 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
> best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
>
> best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
> - if (kvm_hlt_in_guest(vcpu->kvm) && best &&
> - (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
> + if (kvm_hlt_in_guest(vcpu) &&
> + best && (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
> best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
>
> if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 4efdb4a4d72c..f0f49d0c6e69 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -151,14 +151,13 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
> static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
> {
> return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
> - (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
> + (kvm_mwait_in_guest(vcpu) || kvm_hlt_in_guest(vcpu));
> }
>
> bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
> {
> - return kvm_x86_ops.set_hv_timer
> - && !(kvm_mwait_in_guest(vcpu->kvm) ||
> - kvm_can_post_timer_interrupt(vcpu));
> + return kvm_x86_ops.set_hv_timer &&
> + !(kvm_mwait_in_guest(vcpu) || kvm_can_post_timer_interrupt(vcpu));
> }
>
> static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index add65dd59756..ed26b6de3007 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -721,7 +721,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
>
> pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
> pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
> - if (kvm_pause_in_guest(svm->vcpu.kvm)) {
> + if (kvm_pause_in_guest(&svm->vcpu)) {
> /* use guest values since host doesn't intercept PAUSE */
> vmcb02->control.pause_filter_count = pause_count12;
> vmcb02->control.pause_filter_thresh = pause_thresh12;
> @@ -1012,7 +1012,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
> vmcb12->control.event_inj = svm->nested.ctl.event_inj;
> vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
>
> - if (!kvm_pause_in_guest(vcpu->kvm)) {
> + if (!kvm_pause_in_guest(vcpu)) {
> vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
> vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 9a194aa1a75a..dc7176605e01 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1014,7 +1014,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
> struct vmcb_control_area *control = &svm->vmcb->control;
> int old = control->pause_filter_count;
>
> - if (kvm_pause_in_guest(vcpu->kvm))
> + if (kvm_pause_in_guest(vcpu))
> return;
>
> control->pause_filter_count = __grow_ple_window(old,
> @@ -1035,7 +1035,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
> struct vmcb_control_area *control = &svm->vmcb->control;
> int old = control->pause_filter_count;
>
> - if (kvm_pause_in_guest(vcpu->kvm))
> + if (kvm_pause_in_guest(vcpu))
> return;
>
> control->pause_filter_count =
> @@ -1229,12 +1229,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
> svm_set_intercept(svm, INTERCEPT_RDPRU);
> svm_set_intercept(svm, INTERCEPT_RSM);
>
> - if (!kvm_mwait_in_guest(vcpu->kvm)) {
> + if (!kvm_mwait_in_guest(vcpu)) {
> svm_set_intercept(svm, INTERCEPT_MONITOR);
> svm_set_intercept(svm, INTERCEPT_MWAIT);
> }
>
> - if (!kvm_hlt_in_guest(vcpu->kvm))
> + if (!kvm_hlt_in_guest(vcpu))
> svm_set_intercept(svm, INTERCEPT_HLT);
>
> control->iopm_base_pa = __sme_set(iopm_base);
> @@ -1278,7 +1278,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
> svm->nested.vmcb12_gpa = INVALID_GPA;
> svm->nested.last_vmcb12_gpa = INVALID_GPA;
>
> - if (!kvm_pause_in_guest(vcpu->kvm)) {
> + if (!kvm_pause_in_guest(vcpu)) {
> control->pause_filter_count = pause_filter_count;
> if (pause_filter_thresh)
> control->pause_filter_thresh = pause_filter_thresh;
> @@ -4362,7 +4362,7 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
>
> static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
> {
> - if (!kvm_pause_in_guest(vcpu->kvm))
> + if (!kvm_pause_in_guest(vcpu))
> shrink_ple_window(vcpu);
> }
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index fc9008dbed33..019a20029878 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1689,7 +1689,7 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
> * then the instruction is already executing and RIP has already been
> * advanced.
> */
> - if (kvm_hlt_in_guest(vcpu->kvm) &&
> + if (kvm_hlt_in_guest(vcpu) &&
> vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
> vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
> }
> @@ -4412,10 +4412,10 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
> exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
> CPU_BASED_CR3_STORE_EXITING |
> CPU_BASED_INVLPG_EXITING);
> - if (kvm_mwait_in_guest(vmx->vcpu.kvm))
> + if (kvm_mwait_in_guest(&vmx->vcpu))
> exec_control &= ~(CPU_BASED_MWAIT_EXITING |
> CPU_BASED_MONITOR_EXITING);
> - if (kvm_hlt_in_guest(vmx->vcpu.kvm))
> + if (kvm_hlt_in_guest(&vmx->vcpu))
> exec_control &= ~CPU_BASED_HLT_EXITING;
> return exec_control;
> }
> @@ -4515,7 +4515,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> }
> if (!enable_unrestricted_guest)
> exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
> - if (kvm_pause_in_guest(vmx->vcpu.kvm))
> + if (kvm_pause_in_guest(&vmx->vcpu))
> exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
> if (!kvm_vcpu_apicv_active(vcpu))
> exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
> @@ -4661,7 +4661,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
> vmcs_write16(LAST_PID_POINTER_INDEX, kvm->arch.max_vcpu_ids - 1);
> }
>
> - if (!kvm_pause_in_guest(kvm)) {
> + if (!kvm_pause_in_guest(&vmx->vcpu)) {
> vmcs_write32(PLE_GAP, ple_gap);
> vmx->ple_window = ple_window;
> vmx->ple_window_dirty = true;
> @@ -5833,7 +5833,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
> */
> static int handle_pause(struct kvm_vcpu *vcpu)
> {
> - if (!kvm_pause_in_guest(vcpu->kvm))
> + if (!kvm_pause_in_guest(vcpu))
> grow_ple_window(vcpu);
>
> /*
> @@ -7379,7 +7379,7 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
> vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
> vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
> vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
> - if (kvm_cstate_in_guest(vcpu->kvm)) {
> + if (kvm_cstate_in_guest(vcpu)) {
> vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
> vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
> vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
> @@ -7935,7 +7935,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
>
> static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
> {
> - if (!kvm_pause_in_guest(vcpu->kvm))
> + if (!kvm_pause_in_guest(vcpu))
> shrink_ple_window(vcpu);
> }
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c8ae9c4f9f08..9a77b55142c6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -11634,6 +11634,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
> #if IS_ENABLED(CONFIG_HYPERV)
> vcpu->arch.hv_root_tdp = INVALID_PAGE;
> #endif
> + vcpu->arch.mwait_in_guest = vcpu->kvm->arch.mwait_in_guest;
> + vcpu->arch.hlt_in_guest = vcpu->kvm->arch.hlt_in_guest;
> + vcpu->arch.pause_in_guest = vcpu->kvm->arch.pause_in_guest;
> + vcpu->arch.cstate_in_guest = vcpu->kvm->arch.cstate_in_guest;
>
> r = static_call(kvm_x86_vcpu_create)(vcpu);
> if (r)
> @@ -12885,7 +12889,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
> kvm_is_exception_pending(vcpu)))
> return false;
>
> - if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
> + if (kvm_hlt_in_guest(vcpu) && !kvm_can_deliver_async_pf(vcpu))
> return false;
>
> /*
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 9de72586f406..b8e49a9d353d 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -351,24 +351,24 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
> __rem; \
> })
>
> -static inline bool kvm_mwait_in_guest(struct kvm *kvm)
> +static inline bool kvm_mwait_in_guest(struct kvm_vcpu *vcpu)
> {
> - return kvm->arch.mwait_in_guest;
> + return vcpu->arch.mwait_in_guest;
> }
>
> -static inline bool kvm_hlt_in_guest(struct kvm *kvm)
> +static inline bool kvm_hlt_in_guest(struct kvm_vcpu *vcpu)
> {
> - return kvm->arch.hlt_in_guest;
> + return vcpu->arch.hlt_in_guest;
> }
>
> -static inline bool kvm_pause_in_guest(struct kvm *kvm)
> +static inline bool kvm_pause_in_guest(struct kvm_vcpu *vcpu)
> {
> - return kvm->arch.pause_in_guest;
> + return vcpu->arch.pause_in_guest;
> }
>
> -static inline bool kvm_cstate_in_guest(struct kvm *kvm)
> +static inline bool kvm_cstate_in_guest(struct kvm_vcpu *vcpu)
> {
> - return kvm->arch.cstate_in_guest;
> + return vcpu->arch.cstate_in_guest;
> }
>
> static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)


2023-02-02 19:42:28

by Kechen Lu

[permalink] [raw]
Subject: RE: [RFC PATCH v6 2/6] KVM: x86: Move *_in_guest power management flags to vCPU scope

Hi Zhi,

> -----Original Message-----
> From: Zhi Wang <[email protected]>
> Sent: Thursday, February 2, 2023 6:57 AM
> To: Kechen Lu <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]
> Subject: Re: [RFC PATCH v6 2/6] KVM: x86: Move *_in_guest power
> management flags to vCPU scope
>
> External email: Use caution opening links or attachments
>
>
> On Sat, 21 Jan 2023 02:07:34 +0000
> Kechen Lu <[email protected]> wrote:
>
> > Make the runtime disabled mwait/hlt/pause/cstate exits flags vCPU
> > scope to allow finer-grained, per-vCPU control. The VM-scoped control
> > is only allowed before vCPUs are created, thus preserving the existing
> > behavior is a simple matter of snapshotting the flags at vCPU creation.
> >
> > Signed-off-by: Kechen Lu <[email protected]>
> > Suggested-by: Sean Christopherson <[email protected]>
> > Reviewed-by: Sean Christopherson <[email protected]>
> > ---
> > arch/x86/include/asm/kvm_host.h | 5 +++++
> > arch/x86/kvm/cpuid.c | 4 ++--
> > arch/x86/kvm/lapic.c | 7 +++----
> > arch/x86/kvm/svm/nested.c | 4 ++--
> > arch/x86/kvm/svm/svm.c | 12 ++++++------
> > arch/x86/kvm/vmx/vmx.c | 16 ++++++++--------
> > arch/x86/kvm/x86.c | 6 +++++-
> > arch/x86/kvm/x86.h | 16 ++++++++--------
> > 8 files changed, 39 insertions(+), 31 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> > b/arch/x86/include/asm/kvm_host.h index 6aaae18f1854..41b998234a04
> > 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1009,6 +1009,11 @@ struct kvm_vcpu_arch { #if
> > IS_ENABLED(CONFIG_HYPERV)
> > hpa_t hv_root_tdp;
> > #endif
> > +
> > + bool mwait_in_guest;
> > + bool hlt_in_guest;
> > + bool pause_in_guest;
> > + bool cstate_in_guest;
>
> Better add some comments here. When xxx_in_guest stays together with
> XXX_DISABLE_EXIT_XXX, it can be quite confusing. Or maybe align the
> naming like bool disable_exit_mwait <-> XXX_DISABLE_EXIT_XXX.
>
This *_in_guest naming aligns with what current kvm_arch has, and exists for long time, not sure if there are still needs to add comments here. Would like to see Sean's options on this.

BR,
Kechen