The control state changes on every L2->L0 vmexit, and we will have to
serialize it in the nested state. So keep it up to date in svm->nested.ctl
and just copy them back to the nested VMCB in nested_svm_vmexit.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/svm/nested.c | 57 ++++++++++++++++++++++-----------------
arch/x86/kvm/svm/svm.c | 5 +++-
arch/x86/kvm/svm/svm.h | 1 +
3 files changed, 38 insertions(+), 25 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 1e5f460b5540..921466eba556 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -234,6 +234,34 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm,
svm->nested.ctl.iopm_base_pa &= ~0x0fffULL;
}
+/*
+ * Synchronize fields that are written by the processor, so that
+ * they can be copied back into the nested_vmcb.
+ */
+void sync_nested_vmcb_control(struct vcpu_svm *svm)
+{
+ u32 mask;
+ svm->nested.ctl.event_inj = svm->vmcb->control.event_inj;
+ svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err;
+
+ /* Only a few fields of int_ctl are written by the processor. */
+ mask = V_IRQ_MASK | V_TPR_MASK;
+ if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
+ is_intercept(svm, SVM_EXIT_VINTR)) {
+ /*
+ * In order to request an interrupt window, L0 is usurping
+ * svm->vmcb->control.int_ctl and possibly setting V_IRQ
+ * even if it was clear in L1's VMCB. Restoring it would be
+ * wrong. However, in this case V_IRQ will remain true until
+ * interrupt_window_interception calls svm_clear_vintr and
+ * restores int_ctl. We can just leave it aside.
+ */
+ mask &= ~V_IRQ_MASK;
+ }
+ svm->nested.ctl.int_ctl &= ~mask;
+ svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask;
+}
+
static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
{
/* Load the nested guest state */
@@ -471,6 +499,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
/* Exit Guest-Mode */
leave_guest_mode(&svm->vcpu);
svm->nested.vmcb = 0;
+ WARN_ON_ONCE(svm->nested.nested_run_pending);
/* in case we halted in L2 */
svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -497,8 +526,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->save.dr6 = svm->vcpu.arch.dr6;
nested_vmcb->save.cpl = vmcb->save.cpl;
- nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
- nested_vmcb->control.int_vector = vmcb->control.int_vector;
nested_vmcb->control.int_state = vmcb->control.int_state;
nested_vmcb->control.exit_code = vmcb->control.exit_code;
nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
@@ -510,34 +537,16 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
if (svm->nrips_enabled)
nested_vmcb->control.next_rip = vmcb->control.next_rip;
- /*
- * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
- * to make sure that we do not lose injected events. So check event_inj
- * here and copy it to exit_int_info if it is valid.
- * Exit_int_info and event_inj can't be both valid because the case
- * below only happens on a VMRUN instruction intercept which has
- * no valid exit_int_info set.
- */
- if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
- struct vmcb_control_area *nc = &nested_vmcb->control;
-
- nc->exit_int_info = vmcb->control.event_inj;
- nc->exit_int_info_err = vmcb->control.event_inj_err;
- }
-
- nested_vmcb->control.tlb_ctl = 0;
- nested_vmcb->control.event_inj = 0;
- nested_vmcb->control.event_inj_err = 0;
+ nested_vmcb->control.int_ctl = svm->nested.ctl.int_ctl;
+ nested_vmcb->control.tlb_ctl = svm->nested.ctl.tlb_ctl;
+ nested_vmcb->control.event_inj = svm->nested.ctl.event_inj;
+ nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err;
nested_vmcb->control.pause_filter_count =
svm->vmcb->control.pause_filter_count;
nested_vmcb->control.pause_filter_thresh =
svm->vmcb->control.pause_filter_thresh;
- /* We always set V_INTR_MASKING and remember the old value in hflags */
- if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
- nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-
/* Restore the original control entries */
copy_vmcb_control_area(&vmcb->control, &hsave->control);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4122ba86bac2..b710e62ace16 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3427,7 +3427,10 @@ static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
sync_cr8_to_lapic(vcpu);
svm->next_rip = 0;
- svm->nested.nested_run_pending = 0;
+ if (is_guest_mode(&svm->vcpu)) {
+ sync_nested_vmcb_control(svm);
+ svm->nested.nested_run_pending = 0;
+ }
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index dd5418f20256..7e79f0af1204 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -394,6 +394,7 @@ int nested_svm_check_permissions(struct vcpu_svm *svm);
int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
+void sync_nested_vmcb_control(struct vcpu_svm *svm);
extern struct kvm_x86_nested_ops svm_nested_ops;
--
2.26.2
On 5/29/20 8:39 AM, Paolo Bonzini wrote:
> The control state changes on every L2->L0 vmexit, and we will have to
> serialize it in the nested state. So keep it up to date in svm->nested.ctl
> and just copy them back to the nested VMCB in nested_svm_vmexit.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/svm/nested.c | 57 ++++++++++++++++++++++-----------------
> arch/x86/kvm/svm/svm.c | 5 +++-
> arch/x86/kvm/svm/svm.h | 1 +
> 3 files changed, 38 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 1e5f460b5540..921466eba556 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -234,6 +234,34 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm,
> svm->nested.ctl.iopm_base_pa &= ~0x0fffULL;
> }
>
> +/*
> + * Synchronize fields that are written by the processor, so that
> + * they can be copied back into the nested_vmcb.
> + */
> +void sync_nested_vmcb_control(struct vcpu_svm *svm)
> +{
> + u32 mask;
> + svm->nested.ctl.event_inj = svm->vmcb->control.event_inj;
> + svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err;
> +
> + /* Only a few fields of int_ctl are written by the processor. */
> + mask = V_IRQ_MASK | V_TPR_MASK;
> + if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
> + is_intercept(svm, SVM_EXIT_VINTR)) {
> + /*
> + * In order to request an interrupt window, L0 is usurping
> + * svm->vmcb->control.int_ctl and possibly setting V_IRQ
> + * even if it was clear in L1's VMCB. Restoring it would be
> + * wrong. However, in this case V_IRQ will remain true until
> + * interrupt_window_interception calls svm_clear_vintr and
> + * restores int_ctl. We can just leave it aside.
> + */
> + mask &= ~V_IRQ_MASK;
> + }
> + svm->nested.ctl.int_ctl &= ~mask;
> + svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask;
> +}
> +
> static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
> {
> /* Load the nested guest state */
> @@ -471,6 +499,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
> /* Exit Guest-Mode */
> leave_guest_mode(&svm->vcpu);
> svm->nested.vmcb = 0;
> + WARN_ON_ONCE(svm->nested.nested_run_pending);
>
> /* in case we halted in L2 */
> svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
> @@ -497,8 +526,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
> nested_vmcb->save.dr6 = svm->vcpu.arch.dr6;
> nested_vmcb->save.cpl = vmcb->save.cpl;
>
> - nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
> - nested_vmcb->control.int_vector = vmcb->control.int_vector;
While it's not related to this patch, I am wondering if we need the
following line in enter_svm_guest_mode():
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
If int_vector is used only to trigger a #VMEXIT after we have decided to
inject a virtual interrupt, we probably don't the vector value from the
nested vmcb.
> nested_vmcb->control.int_state = vmcb->control.int_state;
> nested_vmcb->control.exit_code = vmcb->control.exit_code;
> nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
> @@ -510,34 +537,16 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
> if (svm->nrips_enabled)
> nested_vmcb->control.next_rip = vmcb->control.next_rip;
>
> - /*
> - * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
> - * to make sure that we do not lose injected events. So check event_inj
> - * here and copy it to exit_int_info if it is valid.
> - * Exit_int_info and event_inj can't be both valid because the case
> - * below only happens on a VMRUN instruction intercept which has
> - * no valid exit_int_info set.
> - */
> - if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
> - struct vmcb_control_area *nc = &nested_vmcb->control;
> -
> - nc->exit_int_info = vmcb->control.event_inj;
> - nc->exit_int_info_err = vmcb->control.event_inj_err;
> - }
> -
> - nested_vmcb->control.tlb_ctl = 0;
> - nested_vmcb->control.event_inj = 0;
> - nested_vmcb->control.event_inj_err = 0;
> + nested_vmcb->control.int_ctl = svm->nested.ctl.int_ctl;
> + nested_vmcb->control.tlb_ctl = svm->nested.ctl.tlb_ctl;
> + nested_vmcb->control.event_inj = svm->nested.ctl.event_inj;
> + nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err;
>
> nested_vmcb->control.pause_filter_count =
> svm->vmcb->control.pause_filter_count;
> nested_vmcb->control.pause_filter_thresh =
> svm->vmcb->control.pause_filter_thresh;
>
> - /* We always set V_INTR_MASKING and remember the old value in hflags */
> - if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
> - nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
> -
> /* Restore the original control entries */
> copy_vmcb_control_area(&vmcb->control, &hsave->control);
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 4122ba86bac2..b710e62ace16 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3427,7 +3427,10 @@ static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
> sync_cr8_to_lapic(vcpu);
>
> svm->next_rip = 0;
> - svm->nested.nested_run_pending = 0;
> + if (is_guest_mode(&svm->vcpu)) {
> + sync_nested_vmcb_control(svm);
> + svm->nested.nested_run_pending = 0;
I don't see any existence of nested_run_pending for nSVM either in Linus
tree or KVM tree. Is there a patch that has this added but not pushed yet ?
> + }
>
> svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
>
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index dd5418f20256..7e79f0af1204 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -394,6 +394,7 @@ int nested_svm_check_permissions(struct vcpu_svm *svm);
> int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
> bool has_error_code, u32 error_code);
> int nested_svm_exit_special(struct vcpu_svm *svm);
> +void sync_nested_vmcb_control(struct vcpu_svm *svm);
>
> extern struct kvm_x86_nested_ops svm_nested_ops;
>
On 30/05/20 04:06, Krish Sadhukhan wrote:
>>
>> - nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
>> - nested_vmcb->control.int_vector = vmcb->control.int_vector;
>
>
> While it's not related to this patch, I am wondering if we need the
> following line in enter_svm_guest_mode():
>
> svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
>
> If int_vector is used only to trigger a #VMEXIT after we have decided to
> inject a virtual interrupt, we probably don't the vector value from the
> nested vmcb.
KVM does not use int_vector, but other hypervisor sometimes trigger
virtual interrupts using int_ctl+int_vector instead of eventinj.
(Unlike KVM they don't intercept EXIT_VINTR). Hyper-V operates like that.
Paolo