Fold the guts of kvm_arch_sched_in() into kvm_arch_vcpu_load(), keying
off the recently added kvm_vcpu.scheduled_out as appropriate.
Note, there is a very slight functional change, as PLE shrink updates will
now happen after blasting WBINVD, but that is quite uninteresting as the
two operations do not interact in any way.
Signed-off-by: Sean Christopherson <[email protected]>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 -
arch/x86/include/asm/kvm_host.h | 2 --
arch/x86/kvm/svm/svm.c | 11 +++--------
arch/x86/kvm/vmx/main.c | 2 --
arch/x86/kvm/vmx/vmx.c | 9 +++------
arch/x86/kvm/vmx/x86_ops.h | 1 -
arch/x86/kvm/x86.c | 17 ++++++++++-------
7 files changed, 16 insertions(+), 27 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 566d19b02483..5a8b74c2e6c4 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -103,7 +103,6 @@ KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
-KVM_X86_OP(sched_in)
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
KVM_X86_OP_OPTIONAL(vcpu_blocking)
KVM_X86_OP_OPTIONAL(vcpu_unblocking)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index aabf1648a56a..0df4d14db896 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1750,8 +1750,6 @@ struct kvm_x86_ops {
struct x86_exception *exception);
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
- void (*sched_in)(struct kvm_vcpu *vcpu, int cpu);
-
/*
* Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero
* value indicates CPU dirty logging is unsupported or disabled.
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 3d0549ca246f..51a5eb31aee5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1548,6 +1548,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
+ if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
+ shrink_ple_window(vcpu);
+
if (sd->current_vmcb != svm->vmcb) {
sd->current_vmcb = svm->vmcb;
@@ -4572,12 +4575,6 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
vcpu->arch.at_instruction_boundary = true;
}
-static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
- if (!kvm_pause_in_guest(vcpu->kvm))
- shrink_ple_window(vcpu);
-}
-
static void svm_setup_mce(struct kvm_vcpu *vcpu)
{
/* [63:9] are reserved. */
@@ -5046,8 +5043,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.check_intercept = svm_check_intercept,
.handle_exit_irqoff = svm_handle_exit_irqoff,
- .sched_in = svm_sched_in,
-
.nested_ops = &svm_nested_ops,
.deliver_interrupt = svm_deliver_interrupt,
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 7c546ad3e4c9..4fee9a8cc5a1 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -121,8 +121,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.check_intercept = vmx_check_intercept,
.handle_exit_irqoff = vmx_handle_exit_irqoff,
- .sched_in = vmx_sched_in,
-
.cpu_dirty_log_size = PML_ENTITY_NUM,
.update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 07a4d6a3a43e..da2f95385a12 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1517,6 +1517,9 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
+ shrink_ple_window(vcpu);
+
vmx_vcpu_load_vmcs(vcpu, cpu, NULL);
vmx_vcpu_pi_load(vcpu, cpu);
@@ -8171,12 +8174,6 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
}
#endif
-void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
- if (!kvm_pause_in_guest(vcpu->kvm))
- shrink_ple_window(vcpu);
-}
-
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 502704596c83..3cb0be94e779 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -112,7 +112,6 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
void vmx_write_tsc_offset(struct kvm_vcpu *vcpu);
void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu);
void vmx_request_immediate_exit(struct kvm_vcpu *vcpu);
-void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu);
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
#ifdef CONFIG_X86_64
int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d750546ec934..e924d1c51e31 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5004,6 +5004,16 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
+ if (vcpu->scheduled_out) {
+ vcpu->arch.l1tf_flush_l1d = true;
+ if (pmu->version && unlikely(pmu->event_count)) {
+ pmu->need_cleanup = true;
+ kvm_make_request(KVM_REQ_PMU, vcpu);
+ }
+ }
+
/* Address WBINVD may be executed by guest */
if (need_emulate_wbinvd(vcpu)) {
if (static_call(kvm_x86_has_wbinvd_exit)())
@@ -12578,14 +12588,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- vcpu->arch.l1tf_flush_l1d = true;
- if (pmu->version && unlikely(pmu->event_count)) {
- pmu->need_cleanup = true;
- kvm_make_request(KVM_REQ_PMU, vcpu);
- }
- static_call(kvm_x86_sched_in)(vcpu, cpu);
}
void kvm_arch_free_vm(struct kvm *kvm)
--
2.45.0.215.g3402c0e53f-goog
On 22/05/2024 1:40 pm, Sean Christopherson wrote:
> Fold the guts of kvm_arch_sched_in() into kvm_arch_vcpu_load(), keying
> off the recently added kvm_vcpu.scheduled_out as appropriate.
>
> Note, there is a very slight functional change, as PLE shrink updates will
> now happen after blasting WBINVD, but that is quite uninteresting as the
> two operations do not interact in any way.
>
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
Acked-by: Kai Huang <[email protected]>
[...]
> @@ -1548,6 +1548,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> struct vcpu_svm *svm = to_svm(vcpu);
> struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
>
> + if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
> + shrink_ple_window(vcpu);
> +
[...]
> @@ -1517,6 +1517,9 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> + if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
> + shrink_ple_window(vcpu);
> +
Nit: Perhaps we need a kvm_x86_ops::shrink_ple_window()? :-)
On Fri, May 24, 2024, Kai Huang wrote:
> > @@ -1548,6 +1548,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> > struct vcpu_svm *svm = to_svm(vcpu);
> > struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
> > + if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
> > + shrink_ple_window(vcpu);
> > +
>
> [...]
>
> > @@ -1517,6 +1517,9 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> > {
> > struct vcpu_vmx *vmx = to_vmx(vcpu);
> > + if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
> > + shrink_ple_window(vcpu);
> > +
>
> Nit: Perhaps we need a kvm_x86_ops::shrink_ple_window()? :-)
Heh, that duplicate code annoys me too. The problem is the "old" window value
comes from the VMCS/VMCB, so either we'd end up with multiple kvm_x86_ops, or
we'd only be able to consolidate the scheduled_out + kvm_pause_in_guest() code,
which isn't all that interesting.
Aha! Actually, VMX already open codes the functionality provided by VCPU_EXREG_*,
e.g. has vmx->ple_window_dirty. If we add VCPU_EXREG_PLE_WINDOW, then the info
get be made available to common x86 code without having to add new hooks. And
that would also allow moving the guts of handle_pause()/pause_interception() to
common code, i.e. will also allow deduplicating the "grow" side of things.
On Tue, 2024-05-28 at 12:16 -0700, Sean Christopherson wrote:
> On Fri, May 24, 2024, Kai Huang wrote:
> > > @@ -1548,6 +1548,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> > > struct vcpu_svm *svm = to_svm(vcpu);
> > > struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
> > > + if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
> > > + shrink_ple_window(vcpu);
> > > +
> >
> > [...]
> >
> > > @@ -1517,6 +1517,9 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> > > {
> > > struct vcpu_vmx *vmx = to_vmx(vcpu);
> > > + if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
> > > + shrink_ple_window(vcpu);
> > > +
> >
> > Nit: Perhaps we need a kvm_x86_ops::shrink_ple_window()? :-)
>
> Heh, that duplicate code annoys me too. The problem is the "old" window value
> comes from the VMCS/VMCB, so either we'd end up with multiple kvm_x86_ops, or
> we'd only be able to consolidate the scheduled_out + kvm_pause_in_guest() code,
> which isn't all that interesting.
Agreed only consolidating scheduled_out + kvm_pause_in_guest() isn't quite
interesting.
>
> Aha! Actually, VMX already open codes the functionality provided by VCPU_EXREG_*,
> e.g. has vmx->ple_window_dirty. If we add VCPU_EXREG_PLE_WINDOW, then the info
> get be made available to common x86 code without having to add new hooks. And
> that would also allow moving the guts of handle_pause()/pause_interception() to
> common code, i.e. will also allow deduplicating the "grow" side of things.
Sounds feasible. I am not sure whether we should use
VCPU_EXREG_PLE_WINDOW, though. We can just have "ple_window" +
"ple_window_dirty" concept in the vcpu:
vcpu->ple_window;
vcpu->ple_window_dirty;
I.e., kinda make current VMX's version of {grow|shrink}_ple_window() as
common code.
I am not familiar with SVM, but it seems the relevant parts are:
control->pause_filter_count;
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
And it seems they are directly related to programming the hardware, i.e.,
they got automatically loaded to hardware during VMRUN.
They need to be updated in the SVM specific code when @ple_window_dirty is
true in the relevant code path.
Anyway, even it is feasible and worth to do, we should do in a separate
patchset.
On Wed, May 29, 2024, Kai Huang wrote:
> I am not familiar with SVM, but it seems the relevant parts are:
>
> control->pause_filter_count;
> vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
>
> And it seems they are directly related to programming the hardware, i.e.,
> they got automatically loaded to hardware during VMRUN.
"control" is the control area of the VMCB, i.e. the above pause_filter_count is
equivalent to a VMCS field.
> They need to be updated in the SVM specific code when @ple_window_dirty is
> true in the relevant code path.
>
> Anyway, even it is feasible and worth to do, we should do in a separate
> patchset.
Ya.