Here are some refactorings to prepare for an SVM implementation of
KVM_SET_NESTED_STATE. It's a prerequisite for that to eliminate
exit_required, moving exceptions to svm_check_nested_events. However:
- I might work on that soon, because it's needed to handle RSM when
the L1 hypervisor wants to get it from #UD rather than the specific
RSM intercept
- this should be enough to get a quick prototype, that I need in order to
debug a particularly crazy bug and figure out its reproducibility.
So, I am getting these patches out of my todo list for now.
Thanks,
Paolo
Paolo Bonzini (7):
KVM: SVM: move map argument out of enter_svm_guest_mode
KVM: SVM: extract load_nested_vmcb_control
KVM: SVM: extract preparation of VMCB for nested run
KVM: SVM: save all control fields in svm->nested
KVM: nSVM: remove HF_VINTR_MASK
KVM: nSVM: do not reload pause filter fields from VMCB
KVM: SVM: introduce data structures for nested virt state
arch/x86/include/asm/kvm_host.h | 1 -
arch/x86/include/uapi/asm/kvm.h | 26 +++++++-
arch/x86/kvm/svm/nested.c | 115 +++++++++++++++++---------------
arch/x86/kvm/svm/svm.c | 11 ++-
arch/x86/kvm/svm/svm.h | 28 +++++---
5 files changed, 116 insertions(+), 65 deletions(-)
--
2.18.2
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/include/uapi/asm/kvm.h | 26 +++++++++++++++++++++++++-
1 file changed, 25 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 3f3f780c8c65..cdca0fd1b107 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -385,7 +385,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
#define KVM_STATE_NESTED_FORMAT_VMX 0
-#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */
+#define KVM_STATE_NESTED_FORMAT_SVM 1
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
@@ -395,8 +395,14 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
+#define KVM_STATE_NESTED_SVM_VMENTRY_IF 0x00000001
+#define KVM_STATE_NESTED_SVM_GIF 0x00000002
+
#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000
+#define KVM_STATE_NESTED_SVM_VMCB_SIZE 0x1000
+
+
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@@ -411,6 +417,17 @@ struct kvm_vmx_nested_state_hdr {
} smm;
};
+struct kvm_svm_nested_state_data {
+ /* Save area only used if KVM_STATE_NESTED_RUN_PENDING. */
+ __u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE];
+};
+
+struct kvm_svm_nested_state_hdr {
+ __u64 vmcb_pa;
+
+ __u16 interrupt_flags;
+};
+
/* for KVM_CAP_NESTED_STATE */
struct kvm_nested_state {
__u16 flags;
@@ -419,6 +441,7 @@ struct kvm_nested_state {
union {
struct kvm_vmx_nested_state_hdr vmx;
+ struct kvm_svm_nested_state_hdr svm;
/* Pad the header to 128 bytes. */
__u8 pad[120];
@@ -431,6 +454,7 @@ struct kvm_nested_state {
*/
union {
struct kvm_vmx_nested_state_data vmx[0];
+ struct kvm_svm_nested_state_data svm[0];
} data;
};
--
2.18.2
These fields do not change from VMRUN to VMEXIT; there is no need
to reload them on nested VMEXIT.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/svm/nested.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index e3338aa8b0a3..ba7dedbcc985 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -544,11 +544,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->control.event_inj = 0;
nested_vmcb->control.event_inj_err = 0;
- nested_vmcb->control.pause_filter_count =
- svm->vmcb->control.pause_filter_count;
- nested_vmcb->control.pause_filter_thresh =
- svm->vmcb->control.pause_filter_thresh;
-
/* We always set V_INTR_MASKING and remember the old value in svm->nested */
if (!(svm->nested.int_ctl & V_INTR_MASKING_MASK))
nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
--
2.18.2
In preparation for nested SVM save/restore, store all data that matters
from the VMCB control area into svm->nested. It will then become part
of the nested SVM state that is saved by KVM_SET_NESTED_STATE and
restored by KVM_GET_NESTED_STATE, just like the cached vmcs12 for nVMX.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/svm/nested.c | 37 ++++++++++++++++++++++---------------
arch/x86/kvm/svm/svm.c | 6 ++++++
arch/x86/kvm/svm/svm.h | 22 ++++++++++++++++------
3 files changed, 44 insertions(+), 21 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 7807f6cc01fc..54be341322d8 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -237,7 +237,16 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm, struct vmcb *nested_v
svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
- /* cache intercepts */
+ svm->nested.nested_ctl = nested_vmcb->control.nested_ctl;
+ svm->nested.int_ctl = nested_vmcb->control.int_ctl;
+ svm->nested.virt_ext = nested_vmcb->control.virt_ext;
+ svm->nested.int_vector = nested_vmcb->control.int_vector;
+ svm->nested.int_state = nested_vmcb->control.int_state;
+ svm->nested.event_inj = nested_vmcb->control.event_inj;
+ svm->nested.event_inj_err = nested_vmcb->control.event_inj_err;
+ svm->nested.pause_filter_count = nested_vmcb->control.pause_filter_count;
+ svm->nested.pause_filter_thresh = nested_vmcb->control.pause_filter_thresh;
+
svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
@@ -279,33 +288,31 @@ static void load_nested_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb
svm->vmcb->save.cpl = nested_vmcb->save.cpl;
}
-static void nested_prepare_vmcb_control(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
+static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
{
- if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
+ if (svm->nested.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
nested_svm_init_mmu_context(&svm->vcpu);
/* Guest paging mode is active - reset mmu */
kvm_mmu_reset_context(&svm->vcpu);
svm_flush_tlb(&svm->vcpu);
- if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
+ if (svm->nested.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
else
svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
- svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
- svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
- svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
- svm->vmcb->control.int_state = nested_vmcb->control.int_state;
- svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
- svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+ svm->vmcb->control.int_ctl = svm->nested.int_ctl | V_INTR_MASKING_MASK;
+ svm->vmcb->control.virt_ext = svm->nested.virt_ext;
+ svm->vmcb->control.int_vector = svm->nested.int_vector;
+ svm->vmcb->control.int_state = svm->nested.int_state;
+ svm->vmcb->control.event_inj = svm->nested.event_inj;
+ svm->vmcb->control.event_inj_err = svm->nested.event_inj_err;
- svm->vmcb->control.pause_filter_count =
- nested_vmcb->control.pause_filter_count;
- svm->vmcb->control.pause_filter_thresh =
- nested_vmcb->control.pause_filter_thresh;
+ svm->vmcb->control.pause_filter_count = svm->nested.pause_filter_count;
+ svm->vmcb->control.pause_filter_thresh = svm->nested.pause_filter_thresh;
/* Enter Guest-Mode */
enter_guest_mode(&svm->vcpu);
@@ -329,7 +336,7 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->nested.vmcb = vmcb_gpa;
load_nested_vmcb_control(svm, nested_vmcb);
load_nested_vmcb_save(svm, nested_vmcb);
- nested_prepare_vmcb_control(svm, nested_vmcb);
+ nested_prepare_vmcb_control(svm);
/*
* If L1 had a pending IRQ/NMI before executing VMRUN,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index dc12a03d16f6..2b63d15328ba 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3343,6 +3343,12 @@ static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(svm->nested.exit_required))
return EXIT_FASTPATH_NONE;
+ if (unlikely(svm->nested.nested_run_pending)) {
+ /* After this vmentry, these fields will be used up. */
+ svm->nested.event_inj = 0;
+ svm->nested.event_inj_err = 0;
+ }
+
/*
* Disable singlestep if we're injecting an interrupt/exception.
* We don't want our modified rflags to be pushed on the stack where
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 730eb7242930..5cabed9c733a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -90,10 +90,6 @@ struct nested_state {
/* These are the merged vectors */
u32 *msrpm;
- /* gpa pointers to the real vectors */
- u64 vmcb_msrpm;
- u64 vmcb_iopm;
-
/* A VMEXIT is required but not yet emulated */
bool exit_required;
@@ -101,13 +97,27 @@ struct nested_state {
* we cannot inject a nested vmexit yet. */
bool nested_run_pending;
- /* cache for intercepts of the guest */
+ /* cache for control fields of the guest */
+ u64 vmcb_msrpm;
+ u64 vmcb_iopm;
+
u32 intercept_cr;
u32 intercept_dr;
u32 intercept_exceptions;
u64 intercept;
- /* Nested Paging related state */
+ u32 event_inj;
+ u32 event_inj_err;
+
+ u64 virt_ext;
+ u32 int_ctl;
+ u32 int_vector;
+ u32 int_state;
+
+ u16 pause_filter_thresh;
+ u16 pause_filter_count;
+
+ u64 nested_ctl;
u64 nested_cr3;
};
--
2.18.2
Unmapping the nested VMCB in enter_svm_guest_mode is a bit of a wart,
since the map is not used elsewhere in the function. There are
just two calls, so move it there.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/svm/nested.c | 14 ++++++--------
arch/x86/kvm/svm/svm.c | 3 ++-
arch/x86/kvm/svm/svm.h | 2 +-
3 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index a89a166d1cb8..22f75f66084f 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -226,7 +226,7 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
}
void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- struct vmcb *nested_vmcb, struct kvm_host_map *map)
+ struct vmcb *nested_vmcb)
{
bool evaluate_pending_interrupts =
is_intercept(svm, INTERCEPT_VINTR) ||
@@ -305,8 +305,6 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->vmcb->control.pause_filter_thresh =
nested_vmcb->control.pause_filter_thresh;
- kvm_vcpu_unmap(&svm->vcpu, map, true);
-
/* Enter Guest-Mode */
enter_guest_mode(&svm->vcpu);
@@ -369,10 +367,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
nested_vmcb->control.exit_code_hi = 0;
nested_vmcb->control.exit_info_1 = 0;
nested_vmcb->control.exit_info_2 = 0;
-
- kvm_vcpu_unmap(&svm->vcpu, &map, true);
-
- return ret;
+ goto out;
}
trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
@@ -415,7 +410,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
copy_vmcb_control_area(hsave, vmcb);
svm->nested.nested_run_pending = 1;
- enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
+ enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb);
if (!nested_svm_vmrun_msrpm(svm)) {
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
@@ -426,6 +421,9 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
nested_svm_vmexit(svm);
}
+out:
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
+
return ret;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4e9cd2a73ad0..dc12a03d16f6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3843,7 +3843,8 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
return 1;
nested_vmcb = map.hva;
- enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map);
+ enter_svm_guest_mode(svm, vmcb, nested_vmcb);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
}
return 0;
}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5cc559ab862d..730eb7242930 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -397,7 +397,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
}
void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- struct vmcb *nested_vmcb, struct kvm_host_map *map);
+ struct vmcb *nested_vmcb);
int nested_svm_vmrun(struct vcpu_svm *svm);
void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
int nested_svm_vmexit(struct vcpu_svm *svm);
--
2.18.2
Split out filling svm->vmcb.save and svm->vmcb.control before VMRUN.
Only the latter will be useful when restoring nested SVM state.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/svm/nested.c | 42 +++++++++++++++++++++++----------------
1 file changed, 25 insertions(+), 17 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index e79acc852000..7807f6cc01fc 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -246,19 +246,8 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm, struct vmcb *nested_v
svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
}
-void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- struct vmcb *nested_vmcb)
+static void load_nested_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
{
- bool evaluate_pending_interrupts =
- is_intercept(svm, INTERCEPT_VINTR) ||
- is_intercept(svm, INTERCEPT_IRET);
-
- svm->nested.vmcb = vmcb_gpa;
- load_nested_vmcb_control(svm, nested_vmcb);
-
- if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
- nested_svm_init_mmu_context(&svm->vcpu);
-
/* Load the nested guest state */
svm->vmcb->save.es = nested_vmcb->save.es;
svm->vmcb->save.cs = nested_vmcb->save.cs;
@@ -276,9 +265,6 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
} else
(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
- /* Guest paging mode is active - reset mmu */
- kvm_mmu_reset_context(&svm->vcpu);
-
svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
@@ -291,6 +277,15 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
svm->vcpu.arch.dr6 = nested_vmcb->save.dr6;
svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+}
+
+static void nested_prepare_vmcb_control(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
+{
+ if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
+ nested_svm_init_mmu_context(&svm->vcpu);
+
+ /* Guest paging mode is active - reset mmu */
+ kvm_mmu_reset_context(&svm->vcpu);
svm_flush_tlb(&svm->vcpu);
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
@@ -321,6 +316,21 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
*/
recalc_intercepts(svm);
+ mark_all_dirty(svm->vmcb);
+}
+
+void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
+ struct vmcb *nested_vmcb)
+{
+ bool evaluate_pending_interrupts =
+ is_intercept(svm, INTERCEPT_VINTR) ||
+ is_intercept(svm, INTERCEPT_IRET);
+
+ svm->nested.vmcb = vmcb_gpa;
+ load_nested_vmcb_control(svm, nested_vmcb);
+ load_nested_vmcb_save(svm, nested_vmcb);
+ nested_prepare_vmcb_control(svm, nested_vmcb);
+
/*
* If L1 had a pending IRQ/NMI before executing VMRUN,
* which wasn't delivered because it was disallowed (e.g.
@@ -336,8 +346,6 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
enable_gif(svm);
if (unlikely(evaluate_pending_interrupts))
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
-
- mark_all_dirty(svm->vmcb);
}
int nested_svm_vmrun(struct vcpu_svm *svm)
--
2.18.2
When restoring SVM nested state, the control state will be stored already
in svm->nested by KVM_SET_NESTED_STATE. We will not need to fish it out of
L1's VMCB. Pull everything into a separate function so that it is
documented which fields are needed.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/svm/nested.c | 45 ++++++++++++++++++++++-----------------
1 file changed, 25 insertions(+), 20 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 22f75f66084f..e79acc852000 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -225,6 +225,27 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
return true;
}
+static void load_nested_vmcb_control(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
+{
+ if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
+ svm->vcpu.arch.hflags |= HF_HIF_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+
+ svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
+
+ svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
+ svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
+
+ /* cache intercepts */
+ svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
+ svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
+ svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
+ svm->nested.intercept = nested_vmcb->control.intercept;
+
+ svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
+}
+
void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
struct vmcb *nested_vmcb)
{
@@ -232,15 +253,11 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
is_intercept(svm, INTERCEPT_VINTR) ||
is_intercept(svm, INTERCEPT_IRET);
- if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
- svm->vcpu.arch.hflags |= HF_HIF_MASK;
- else
- svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+ svm->nested.vmcb = vmcb_gpa;
+ load_nested_vmcb_control(svm, nested_vmcb);
- if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
- svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
+ if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
nested_svm_init_mmu_context(&svm->vcpu);
- }
/* Load the nested guest state */
svm->vmcb->save.es = nested_vmcb->save.es;
@@ -275,25 +292,15 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->vcpu.arch.dr6 = nested_vmcb->save.dr6;
svm->vmcb->save.cpl = nested_vmcb->save.cpl;
- svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
- svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
-
- /* cache intercepts */
- svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
- svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
- svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
- svm->nested.intercept = nested_vmcb->control.intercept;
-
svm_flush_tlb(&svm->vcpu);
- svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
else
svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
- svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
+ svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
@@ -314,8 +321,6 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
*/
recalc_intercepts(svm);
- svm->nested.vmcb = vmcb_gpa;
-
/*
* If L1 had a pending IRQ/NMI before executing VMRUN,
* which wasn't delivered because it was disallowed (e.g.
--
2.18.2
Now that the int_ctl field is stored in svm->nested.int_ctl, we can
use it instead of vcpu->arch.hflags.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 1 -
arch/x86/kvm/svm/nested.c | 12 ++++--------
arch/x86/kvm/svm/svm.c | 2 +-
arch/x86/kvm/svm/svm.h | 4 +++-
4 files changed, 8 insertions(+), 11 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fd78bd44b2d6..6c8417d01bf9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1594,7 +1594,6 @@ enum {
#define HF_GIF_MASK (1 << 0)
#define HF_HIF_MASK (1 << 1)
-#define HF_VINTR_MASK (1 << 2)
#define HF_NMI_MASK (1 << 3)
#define HF_IRET_MASK (1 << 4)
#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 54be341322d8..e3338aa8b0a3 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -116,13 +116,13 @@ void recalc_intercepts(struct vcpu_svm *svm)
c->intercept_exceptions = h->intercept_exceptions;
c->intercept = h->intercept;
- if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
+ if (svm->nested.int_ctl & V_INTR_MASKING_MASK) {
/* We only want the cr8 intercept bits of L1 */
c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ);
c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE);
/*
- * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
+ * Once running L2 with V_INTR_MASKING set, EFLAGS.IF does not
* affect any interrupt we may want to inject; therefore,
* interrupt window vmexits are irrelevant to L0.
*/
@@ -297,10 +297,6 @@ static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
kvm_mmu_reset_context(&svm->vcpu);
svm_flush_tlb(&svm->vcpu);
- if (svm->nested.int_ctl & V_INTR_MASKING_MASK)
- svm->vcpu.arch.hflags |= HF_VINTR_MASK;
- else
- svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
@@ -553,8 +549,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->control.pause_filter_thresh =
svm->vmcb->control.pause_filter_thresh;
- /* We always set V_INTR_MASKING and remember the old value in hflags */
- if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+ /* We always set V_INTR_MASKING and remember the old value in svm->nested */
+ if (!(svm->nested.int_ctl & V_INTR_MASKING_MASK))
nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
/* Restore the original control entries */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2b63d15328ba..95d16aa76ebb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3096,7 +3096,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu)) {
/* As long as interrupts are being delivered... */
- if ((svm->vcpu.arch.hflags & HF_VINTR_MASK)
+ if ((svm->nested.int_ctl & V_INTR_MASKING_MASK)
? !(svm->vcpu.arch.hflags & HF_HIF_MASK)
: !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
return true;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5cabed9c733a..39706aa845f2 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -388,7 +388,9 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
{
- return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ return is_guest_mode(vcpu) && (svm->nested.int_ctl & V_INTR_MASKING_MASK);
}
static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
--
2.18.2
On 5/15/20 10:41 AM, Paolo Bonzini wrote:
> When restoring SVM nested state, the control state will be stored already
> in svm->nested by KVM_SET_NESTED_STATE. We will not need to fish it out of
> L1's VMCB. Pull everything into a separate function so that it is
> documented which fields are needed.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/svm/nested.c | 45 ++++++++++++++++++++++-----------------
> 1 file changed, 25 insertions(+), 20 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 22f75f66084f..e79acc852000 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -225,6 +225,27 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
> return true;
> }
>
> +static void load_nested_vmcb_control(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
This function only separates a subset of the controls. If the purpose of
the function is to separate only the controls that are related to
migration, should it be called something like
load_nested_state_vmcb_control or something like that ?
> +{
> + if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
> + svm->vcpu.arch.hflags |= HF_HIF_MASK;
> + else
> + svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
> +
> + svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
> +
> + svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
> + svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
> +
> + /* cache intercepts */
> + svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
> + svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
> + svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
> + svm->nested.intercept = nested_vmcb->control.intercept;
> +
> + svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
> +}
> +
> void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
> struct vmcb *nested_vmcb)
> {
> @@ -232,15 +253,11 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
> is_intercept(svm, INTERCEPT_VINTR) ||
> is_intercept(svm, INTERCEPT_IRET);
>
> - if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
> - svm->vcpu.arch.hflags |= HF_HIF_MASK;
> - else
> - svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
> + svm->nested.vmcb = vmcb_gpa;
> + load_nested_vmcb_control(svm, nested_vmcb);
>
> - if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
> - svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
> + if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
> nested_svm_init_mmu_context(&svm->vcpu);
> - }
>
> /* Load the nested guest state */
> svm->vmcb->save.es = nested_vmcb->save.es;
> @@ -275,25 +292,15 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
> svm->vcpu.arch.dr6 = nested_vmcb->save.dr6;
> svm->vmcb->save.cpl = nested_vmcb->save.cpl;
>
> - svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
> - svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
> -
> - /* cache intercepts */
> - svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
> - svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
> - svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
> - svm->nested.intercept = nested_vmcb->control.intercept;
> -
> svm_flush_tlb(&svm->vcpu);
> - svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
> if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
> svm->vcpu.arch.hflags |= HF_VINTR_MASK;
> else
> svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
>
> - svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
> svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
>
> + svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
> svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
> svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
> svm->vmcb->control.int_state = nested_vmcb->control.int_state;
> @@ -314,8 +321,6 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
> */
> recalc_intercepts(svm);
>
> - svm->nested.vmcb = vmcb_gpa;
> -
> /*
> * If L1 had a pending IRQ/NMI before executing VMRUN,
> * which wasn't delivered because it was disallowed (e.g.
On 15/05/20 19:41, Paolo Bonzini wrote:
> Split out filling svm->vmcb.save and svm->vmcb.control before VMRUN.
> Only the latter will be useful when restoring nested SVM state.
More precisely: when restoring nested SVM state, svm->vmcb.save will
only have to be filled if the nested_run_pending flag is true.
Paolo
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/svm/nested.c | 42 +++++++++++++++++++++++----------------
> 1 file changed, 25 insertions(+), 17 deletions(-)
On 16/05/20 01:09, Krish Sadhukhan wrote:
>>
>> }
>> +static void load_nested_vmcb_control(struct vcpu_svm *svm, struct
>> vmcb *nested_vmcb)
>
>
> This function only separates a subset of the controls. If the purpose of
> the function is to separate only the controls that are related to
> migration, should it be called something like
> load_nested_state_vmcb_control or something like that ?
This function loads into svm->nested. The others are loaded into
svm->vmcb. They will be moved to this function later in the series,
when we add fields to svm->nested for all the controls that have to be
serialized in KVM_GET/SET_NESTED_STATE.
Paolo
On 5/15/20 10:41 AM, Paolo Bonzini wrote:
> Here are some refactorings to prepare for an SVM implementation of
> KVM_SET_NESTED_STATE. It's a prerequisite for that to eliminate
> exit_required, moving exceptions to svm_check_nested_events. However:
>
> - I might work on that soon, because it's needed to handle RSM when
> the L1 hypervisor wants to get it from #UD rather than the specific
> RSM intercept
>
> - this should be enough to get a quick prototype, that I need in order to
> debug a particularly crazy bug and figure out its reproducibility.
>
> So, I am getting these patches out of my todo list for now.
>
> Thanks,
>
> Paolo
>
> Paolo Bonzini (7):
> KVM: SVM: move map argument out of enter_svm_guest_mode
> KVM: SVM: extract load_nested_vmcb_control
> KVM: SVM: extract preparation of VMCB for nested run
> KVM: SVM: save all control fields in svm->nested
> KVM: nSVM: remove HF_VINTR_MASK
> KVM: nSVM: do not reload pause filter fields from VMCB
> KVM: SVM: introduce data structures for nested virt state
>
> arch/x86/include/asm/kvm_host.h | 1 -
> arch/x86/include/uapi/asm/kvm.h | 26 +++++++-
> arch/x86/kvm/svm/nested.c | 115 +++++++++++++++++---------------
> arch/x86/kvm/svm/svm.c | 11 ++-
> arch/x86/kvm/svm/svm.h | 28 +++++---
> 5 files changed, 116 insertions(+), 65 deletions(-)
>
Reviewed-by: Krish Sadhukhan <[email protected]>
On 18/05/20 22:07, Krish Sadhukhan wrote:
>>
>>
>> Paolo Bonzini (7):
>> KVM: SVM: move map argument out of enter_svm_guest_mode
>> KVM: SVM: extract load_nested_vmcb_control
>> KVM: SVM: extract preparation of VMCB for nested run
>> KVM: SVM: save all control fields in svm->nested
>> KVM: nSVM: remove HF_VINTR_MASK
>> KVM: nSVM: do not reload pause filter fields from VMCB
>> KVM: SVM: introduce data structures for nested virt state
>>
>> arch/x86/include/asm/kvm_host.h | 1 -
>> arch/x86/include/uapi/asm/kvm.h | 26 +++++++-
>> arch/x86/kvm/svm/nested.c | 115 +++++++++++++++++---------------
>> arch/x86/kvm/svm/svm.c | 11 ++-
>> arch/x86/kvm/svm/svm.h | 28 +++++---
>> 5 files changed, 116 insertions(+), 65 deletions(-)
>>
> Reviewed-by: Krish Sadhukhan <[email protected]>
Thanks! Note that (while these patches should be okay) they are not
really ready to be committed because more cleanups and refactorings will
become evident through the rest of the work.
Paolo