Subject: [PATCH v3 0/8] KVM: nSVM: avoid TOC/TOU race when checking vmcb12

Currently there is a TOC/TOU race between the check of vmcb12's
efer, cr0 and cr4 registers and the later save of their values in
svm_set_*, because the guest could modify the values in the meanwhile.

To solve this issue, this series introduces and uses svm->nested.save
structure in enter_svm_guest_mode to save the current value of efer,
cr0 and cr4 and later use these to set the vcpu->arch.* state.

Similarly, svm->nested.ctl contains fields that are not used, so having
a full vmcb_control_area means passing uninitialized fields.

Patches 1,3 and 8 take care of renaming and refactoring code.
Patches 2 and 6 introduce respectively vmcb_ctrl_area_cached and
vmcb_save_area_cached.
Patches 4 and 5 use vmcb_save_area_cached to avoid TOC/TOU, and patch
7 uses vmcb_ctrl_area_cached.

Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>

---
v3:
* merge this series with "KVM: nSVM: use vmcb_ctrl_area_cached instead
of vmcb_control_area in nested state"
* rename "nested_load_save_from_vmcb12" in
"nested_copy_vmcb_save_to_cache"
* rename "nested_load_control_from_vmcb12" in
"nested_copy_vmcb_control_to_cache"
* change check functions (nested_vmcb_valid_sregs and nested_vmcb_valid_sregs)
to accept only the vcpu parameter, since we only check
nested state now
* rename "vmcb_is_intercept_cached" in "vmcb12_is_intercept"
and duplicate the implementation instead of calling vmcb_is_intercept

v2:
* svm->nested.save is a separate struct vmcb_save_area_cached,
and not vmcb_save_area.
* update also vmcb02->cr3 with svm->nested.save.cr3

RFC:
* use svm->nested.save instead of local variables.
* not dependent anymore from "KVM: nSVM: remove useless kvm_clear_*_queue"
* simplified patches, we just use the struct and not move the check
nearer to the TOU.

Emanuele Giuseppe Esposito (8):
KVM: nSVM: move nested_vmcb_check_cr3_cr4 logic in
nested_vmcb_valid_sregs
nSVM: introduce smv->nested.save to cache save area fields
nSVM: rename nested_load_control_from_vmcb12 in
nested_copy_vmcb_control_to_cache
nSVM: use vmcb_save_area_cached in nested_vmcb_valid_sregs()
nSVM: use svm->nested.save to load vmcb12 registers and avoid TOC/TOU
races
nSVM: introduce struct vmcb_ctrl_area_cached
nSVM: use vmcb_ctrl_area_cached instead of vmcb_control_area in struct
svm_nested_state
nSVM: remove unnecessary parameter in nested_vmcb_check_controls

arch/x86/kvm/svm/nested.c | 235 ++++++++++++++++++++++----------------
arch/x86/kvm/svm/svm.c | 7 +-
arch/x86/kvm/svm/svm.h | 57 ++++++++-
3 files changed, 192 insertions(+), 107 deletions(-)

--
2.27.0


Subject: [PATCH v3 1/8] KVM: nSVM: move nested_vmcb_check_cr3_cr4 logic in nested_vmcb_valid_sregs

Inline nested_vmcb_check_cr3_cr4 as it is not called by anyone else.
Doing so simplifies next patches.

Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
Reviewed-by: Maxim Levitsky <[email protected]>
---
arch/x86/kvm/svm/nested.c | 35 +++++++++++++----------------------
1 file changed, 13 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index e5515477c30a..d2fe65e2a7a4 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -260,27 +260,6 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
return true;
}

-static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu,
- struct vmcb_save_area *save)
-{
- /*
- * These checks are also performed by KVM_SET_SREGS,
- * except that EFER.LMA is not checked by SVM against
- * CR0.PG && EFER.LME.
- */
- if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
- if (CC(!(save->cr4 & X86_CR4_PAE)) ||
- CC(!(save->cr0 & X86_CR0_PE)) ||
- CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
- return false;
- }
-
- if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
- return false;
-
- return true;
-}
-
/* Common checks that apply to both L1 and L2 state. */
static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
struct vmcb_save_area *save)
@@ -302,7 +281,19 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
return false;

- if (!nested_vmcb_check_cr3_cr4(vcpu, save))
+ /*
+ * These checks are also performed by KVM_SET_SREGS,
+ * except that EFER.LMA is not checked by SVM against
+ * CR0.PG && EFER.LME.
+ */
+ if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
+ if (CC(!(save->cr4 & X86_CR4_PAE)) ||
+ CC(!(save->cr0 & X86_CR0_PE)) ||
+ CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
+ return false;
+ }
+
+ if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
return false;

if (CC(!kvm_valid_efer(vcpu, save->efer)))
--
2.27.0

Subject: [PATCH v3 7/8] nSVM: use vmcb_ctrl_area_cached instead of vmcb_control_area in struct svm_nested_state

This requires changing all vmcb_is_intercept(&svm->nested.ctl, ...)
calls with vmcb12_is_intercept().

In addition, in svm_get_nested_state() user space expects a
vmcb_control_area struct, so we need to copy back all fields
in a temporary structure to provide to the user space.

Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
---
arch/x86/kvm/svm/nested.c | 41 +++++++++++++++++++++++----------------
arch/x86/kvm/svm/svm.c | 4 ++--
arch/x86/kvm/svm/svm.h | 8 ++++----
3 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c84cded1dcf6..13be1002ad1c 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -58,8 +58,9 @@ static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_excep
struct vcpu_svm *svm = to_svm(vcpu);
WARN_ON(!is_guest_mode(vcpu));

- if (vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
- !svm->nested.nested_run_pending) {
+ if (vmcb12_is_intercept(&svm->nested.ctl,
+ INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
+ !svm->nested.nested_run_pending) {
svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = fault->error_code;
@@ -121,7 +122,8 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)

void recalc_intercepts(struct vcpu_svm *svm)
{
- struct vmcb_control_area *c, *h, *g;
+ struct vmcb_control_area *c, *h;
+ struct vmcb_ctrl_area_cached *g;
unsigned int i;

vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
@@ -172,7 +174,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
*/
int i;

- if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
+ if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
return true;

for (i = 0; i < MSRPM_OFFSETS; i++) {
@@ -208,9 +210,9 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
}

static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
- struct vmcb_control_area *control)
+ struct vmcb_ctrl_area_cached *control)
{
- if (CC(!vmcb_is_intercept(control, INTERCEPT_VMRUN)))
+ if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
return false;

if (CC(control->asid == 0))
@@ -960,7 +962,7 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
u32 offset, msr, value;
int write, mask;

- if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
+ if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
return NESTED_EXIT_HOST;

msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
@@ -987,7 +989,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
u8 start_bit;
u64 gpa;

- if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
+ if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
return NESTED_EXIT_HOST;

port = svm->vmcb->control.exit_info_1 >> 16;
@@ -1018,12 +1020,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
vmexit = nested_svm_intercept_ioio(svm);
break;
case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
- if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
+ if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
vmexit = NESTED_EXIT_DONE;
break;
}
case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
- if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
+ if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
vmexit = NESTED_EXIT_DONE;
break;
}
@@ -1041,7 +1043,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
break;
}
default: {
- if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
+ if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
vmexit = NESTED_EXIT_DONE;
}
}
@@ -1119,7 +1121,7 @@ static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)

static inline bool nested_exit_on_init(struct vcpu_svm *svm)
{
- return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
+ return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
}

static int svm_check_nested_events(struct kvm_vcpu *vcpu)
@@ -1250,6 +1252,7 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu,
u32 user_data_size)
{
struct vcpu_svm *svm;
+ struct vmcb_control_area ctl_temp;
struct kvm_nested_state kvm_state = {
.flags = 0,
.format = KVM_STATE_NESTED_FORMAT_SVM,
@@ -1291,7 +1294,8 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu,
*/
if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
return -EFAULT;
- if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
+ nested_copy_vmcb_cache_to_control(&ctl_temp, &svm->nested.ctl);
+ if (copy_to_user(&user_vmcb->control, &ctl_temp,
sizeof(user_vmcb->control)))
return -EFAULT;
if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
@@ -1362,8 +1366,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
goto out_free;

ret = -EINVAL;
- if (!nested_vmcb_check_controls(vcpu, ctl))
- goto out_free;
+ nested_copy_vmcb_control_to_cache(svm, ctl);
+ if (!nested_vmcb_check_controls(vcpu, &svm->nested.ctl))
+ goto out_free_ctl;

/*
* Processor state contains L2 state. Check that it is
@@ -1371,7 +1376,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
*/
cr0 = kvm_read_cr0(vcpu);
if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
- goto out_free;
+ goto out_free_ctl;

/*
* Validate host state saved from before VMRUN (see
@@ -1417,7 +1422,6 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;

svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
- nested_copy_vmcb_control_to_cache(svm, ctl);

svm_switch_vmcb(svm, &svm->nested.vmcb02);
nested_vmcb02_prepare_control(svm);
@@ -1427,6 +1431,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
out_free_save:
memset(&svm->nested.save, 0, sizeof(struct vmcb_save_area_cached));

+out_free_ctl:
+ memset(&svm->nested.ctl, 0, sizeof(struct vmcb_ctrl_area_cached));
+
out_free:
kfree(save);
kfree(ctl);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1b6d25c6e0ae..d866eea39777 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2465,7 +2465,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
bool ret = false;

if (!is_guest_mode(vcpu) ||
- (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
+ (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
return false;

cr0 &= ~SVM_CR0_SELECTIVE_MASK;
@@ -4184,7 +4184,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
info->intercept == x86_intercept_clts)
break;

- if (!(vmcb_is_intercept(&svm->nested.ctl,
+ if (!(vmcb12_is_intercept(&svm->nested.ctl,
INTERCEPT_SELECTIVE_CR0)))
break;

diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 78006245e334..051b7d0a13a1 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -156,7 +156,7 @@ struct svm_nested_state {
bool nested_run_pending;

/* cache for control fields of the guest */
- struct vmcb_control_area ctl;
+ struct vmcb_ctrl_area_cached ctl;
struct vmcb_save_area_cached save;

bool initialized;
@@ -491,17 +491,17 @@ static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu)

static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
{
- return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
+ return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
}

static inline bool nested_exit_on_intr(struct vcpu_svm *svm)
{
- return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
+ return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
}

static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
{
- return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
+ return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
}

int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12);
--
2.27.0

Subject: [PATCH v3 4/8] nSVM: use vmcb_save_area_cached in nested_vmcb_valid_sregs()

Now that struct vmcb_save_area_cached contains the required
vmcb fields values (done in nested_load_save_from_vmcb12()),
check them to see if they are correct in nested_vmcb_valid_sregs().

Since we are always checking for the nested struct, it is enough
to have only the vcpu as parameter.

Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
---
arch/x86/kvm/svm/nested.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index f6030a202bc5..d07cd4b88acd 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -230,9 +230,10 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
}

/* Common checks that apply to both L1 and L2 state. */
-static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
- struct vmcb_save_area *save)
+static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_save_area_cached *save = &svm->nested.save;
/*
* FIXME: these should be done after copying the fields,
* to avoid TOC/TOU races. For these save area checks
@@ -658,7 +659,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);

- if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
+ if (!nested_vmcb_valid_sregs(vcpu) ||
!nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) {
vmcb12->control.exit_code = SVM_EXIT_ERR;
vmcb12->control.exit_code_hi = 0;
@@ -1355,11 +1356,12 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
* Validate host state saved from before VMRUN (see
* nested_svm_check_permissions).
*/
+ nested_copy_vmcb_save_to_cache(svm, save);
if (!(save->cr0 & X86_CR0_PG) ||
!(save->cr0 & X86_CR0_PE) ||
(save->rflags & X86_EFLAGS_VM) ||
- !nested_vmcb_valid_sregs(vcpu, save))
- goto out_free;
+ !nested_vmcb_valid_sregs(vcpu))
+ goto out_free_save;

/*
* While the nested guest CR3 is already checked and set by
@@ -1371,7 +1373,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
nested_npt_enabled(svm), false);
if (WARN_ON_ONCE(ret))
- goto out_free;
+ goto out_free_save;


/*
@@ -1395,12 +1397,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,

svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
nested_copy_vmcb_control_to_cache(svm, ctl);
- nested_copy_vmcb_save_to_cache(svm, save);

svm_switch_vmcb(svm, &svm->nested.vmcb02);
nested_vmcb02_prepare_control(svm);
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
ret = 0;
+
+out_free_save:
+ memset(&svm->nested.save, 0, sizeof(struct vmcb_save_area_cached));
+
out_free:
kfree(save);
kfree(ctl);
--
2.27.0

Subject: [PATCH v3 5/8] nSVM: use svm->nested.save to load vmcb12 registers and avoid TOC/TOU races

Use the already checked svm->nested.save cached fields
(EFER, CR0, CR4, ...) instead of vmcb12's in
nested_vmcb02_prepare_save().
This prevents from creating TOC/TOU races, since the
guest could modify the vmcb12 fields.

This also avoids the need of force-setting EFER_SVME in
nested_vmcb02_prepare_save.

Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
---
arch/x86/kvm/svm/nested.c | 25 +++++++------------------
1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index d07cd4b88acd..e08f2c31beae 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -234,13 +234,7 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_save_area_cached *save = &svm->nested.save;
- /*
- * FIXME: these should be done after copying the fields,
- * to avoid TOC/TOU races. For these save area checks
- * the possible damage is limited since kvm_set_cr0 and
- * kvm_set_cr4 handle failure; EFER_SVME is an exception
- * so it is force-set later in nested_prepare_vmcb_save.
- */
+
if (CC(!(save->efer & EFER_SVME)))
return false;

@@ -476,15 +470,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12

kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);

- /*
- * Force-set EFER_SVME even though it is checked earlier on the
- * VMCB12, because the guest can flip the bit between the check
- * and now. Clearing EFER_SVME would call svm_free_nested.
- */
- svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME);
+ svm_set_efer(&svm->vcpu, svm->nested.save.efer);

- svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
- svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
+ svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
+ svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);

svm->vcpu.arch.cr2 = vmcb12->save.cr2;

@@ -499,8 +488,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12

/* These bits will be set properly on the first execution when new_vmc12 is true */
if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
- svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
- svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_ACTIVE_LOW;
+ svm->vmcb->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
+ svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
vmcb_mark_dirty(svm->vmcb, VMCB_DR);
}
}
@@ -609,7 +598,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
nested_vmcb02_prepare_control(svm);
nested_vmcb02_prepare_save(svm, vmcb12);

- ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
+ ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
nested_npt_enabled(svm), true);
if (ret)
return ret;
--
2.27.0

Subject: [PATCH v3 3/8] nSVM: rename nested_load_control_from_vmcb12 in nested_copy_vmcb_control_to_cache

Following the same naming convention of the previous patch,
rename nested_load_control_from_vmcb12.
In addition, inline copy_vmcb_control_area as it is only called
by this function.

Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
---
arch/x86/kvm/svm/nested.c | 67 ++++++++++++++++++---------------------
arch/x86/kvm/svm/svm.c | 2 +-
arch/x86/kvm/svm/svm.h | 2 +-
3 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c4959da8aec0..f6030a202bc5 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -163,37 +163,6 @@ void recalc_intercepts(struct vcpu_svm *svm)
vmcb_set_intercept(c, INTERCEPT_VMSAVE);
}

-static void copy_vmcb_control_area(struct vmcb_control_area *dst,
- struct vmcb_control_area *from)
-{
- unsigned int i;
-
- for (i = 0; i < MAX_INTERCEPT; i++)
- dst->intercepts[i] = from->intercepts[i];
-
- dst->iopm_base_pa = from->iopm_base_pa;
- dst->msrpm_base_pa = from->msrpm_base_pa;
- dst->tsc_offset = from->tsc_offset;
- /* asid not copied, it is handled manually for svm->vmcb. */
- dst->tlb_ctl = from->tlb_ctl;
- dst->int_ctl = from->int_ctl;
- dst->int_vector = from->int_vector;
- dst->int_state = from->int_state;
- dst->exit_code = from->exit_code;
- dst->exit_code_hi = from->exit_code_hi;
- dst->exit_info_1 = from->exit_info_1;
- dst->exit_info_2 = from->exit_info_2;
- dst->exit_int_info = from->exit_int_info;
- dst->exit_int_info_err = from->exit_int_info_err;
- dst->nested_ctl = from->nested_ctl;
- dst->event_inj = from->event_inj;
- dst->event_inj_err = from->event_inj_err;
- dst->nested_cr3 = from->nested_cr3;
- dst->virt_ext = from->virt_ext;
- dst->pause_filter_count = from->pause_filter_count;
- dst->pause_filter_thresh = from->pause_filter_thresh;
-}
-
static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
{
/*
@@ -302,12 +271,36 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
return true;
}

-void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
- struct vmcb_control_area *control)
+void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
+ struct vmcb_control_area *control)
{
- copy_vmcb_control_area(&svm->nested.ctl, control);
+ unsigned int i;

- /* Copy it here because nested_svm_check_controls will check it. */
+ for (i = 0; i < MAX_INTERCEPT; i++)
+ svm->nested.ctl.intercepts[i] = control->intercepts[i];
+
+ svm->nested.ctl.iopm_base_pa = control->iopm_base_pa;
+ svm->nested.ctl.msrpm_base_pa = control->msrpm_base_pa;
+ svm->nested.ctl.tsc_offset = control->tsc_offset;
+ svm->nested.ctl.tlb_ctl = control->tlb_ctl;
+ svm->nested.ctl.int_ctl = control->int_ctl;
+ svm->nested.ctl.int_vector = control->int_vector;
+ svm->nested.ctl.int_state = control->int_state;
+ svm->nested.ctl.exit_code = control->exit_code;
+ svm->nested.ctl.exit_code_hi = control->exit_code_hi;
+ svm->nested.ctl.exit_info_1 = control->exit_info_1;
+ svm->nested.ctl.exit_info_2 = control->exit_info_2;
+ svm->nested.ctl.exit_int_info = control->exit_int_info;
+ svm->nested.ctl.exit_int_info_err = control->exit_int_info_err;
+ svm->nested.ctl.nested_ctl = control->nested_ctl;
+ svm->nested.ctl.event_inj = control->event_inj;
+ svm->nested.ctl.event_inj_err = control->event_inj_err;
+ svm->nested.ctl.nested_cr3 = control->nested_cr3;
+ svm->nested.ctl.virt_ext = control->virt_ext;
+ svm->nested.ctl.pause_filter_count = control->pause_filter_count;
+ svm->nested.ctl.pause_filter_thresh = control->pause_filter_thresh;
+
+ /* Copy asid here because nested_vmcb_check_controls will check it. */
svm->nested.ctl.asid = control->asid;
svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL;
svm->nested.ctl.iopm_base_pa &= ~0x0fffULL;
@@ -662,7 +655,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
if (WARN_ON_ONCE(!svm->nested.initialized))
return -EINVAL;

- nested_load_control_from_vmcb12(svm, &vmcb12->control);
+ nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);

if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
@@ -1401,7 +1394,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;

svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
- nested_load_control_from_vmcb12(svm, ctl);
+ nested_copy_vmcb_control_to_cache(svm, ctl);
nested_copy_vmcb_save_to_cache(svm, save);

svm_switch_vmcb(svm, &svm->nested.vmcb02);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index bf171f5f6158..1b6d25c6e0ae 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4385,7 +4385,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)

vmcb12 = map.hva;

- nested_load_control_from_vmcb12(svm, &vmcb12->control);
+ nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);

ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f0195bc263e9..3c950aeca646 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -496,7 +496,7 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu);
int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
-void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
+void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
struct vmcb_control_area *control);
void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
struct vmcb_save_area *save);
--
2.27.0

Subject: [PATCH v3 2/8] nSVM: introduce smv->nested.save to cache save area fields

This is useful in next patch, to avoid having temporary
copies of vmcb12 registers and passing them manually.

Right now, instead of blindly copying everything,
we just copy EFER, CR0, CR3, CR4, DR6 and DR7. If more fields
will need to be added, it will be more obvious to see
that they must be added in struct vmcb_save_area_cached and
in nested_copy_vmcb_save_to_cache().

Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
---
arch/x86/kvm/svm/nested.c | 18 ++++++++++++++++++
arch/x86/kvm/svm/svm.c | 1 +
arch/x86/kvm/svm/svm.h | 16 ++++++++++++++++
3 files changed, 35 insertions(+)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index d2fe65e2a7a4..c4959da8aec0 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -313,6 +313,22 @@ void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
svm->nested.ctl.iopm_base_pa &= ~0x0fffULL;
}

+void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
+ struct vmcb_save_area *save)
+{
+ /*
+ * Copy only fields that are validated, as we need them
+ * to avoid TOC/TOU races.
+ */
+ svm->nested.save.efer = save->efer;
+ svm->nested.save.cr0 = save->cr0;
+ svm->nested.save.cr3 = save->cr3;
+ svm->nested.save.cr4 = save->cr4;
+
+ svm->nested.save.dr6 = save->dr6;
+ svm->nested.save.dr7 = save->dr7;
+}
+
/*
* Synchronize fields that are written by the processor, so that
* they can be copied back into the vmcb12.
@@ -647,6 +663,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
return -EINVAL;

nested_load_control_from_vmcb12(svm, &vmcb12->control);
+ nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);

if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
!nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) {
@@ -1385,6 +1402,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,

svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
nested_load_control_from_vmcb12(svm, ctl);
+ nested_copy_vmcb_save_to_cache(svm, save);

svm_switch_vmcb(svm, &svm->nested.vmcb02);
nested_vmcb02_prepare_control(svm);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 69639f9624f5..bf171f5f6158 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4386,6 +4386,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
vmcb12 = map.hva;

nested_load_control_from_vmcb12(svm, &vmcb12->control);
+ nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);

ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
kvm_vcpu_unmap(vcpu, &map, true);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index bd0fe94c2920..f0195bc263e9 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -103,6 +103,19 @@ struct kvm_vmcb_info {
uint64_t asid_generation;
};

+/*
+ * This struct is not kept up-to-date, and it is only valid within
+ * svm_set_nested_state and nested_svm_vmrun.
+ */
+struct vmcb_save_area_cached {
+ u64 efer;
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+};
+
struct svm_nested_state {
struct kvm_vmcb_info vmcb02;
u64 hsave_msr;
@@ -119,6 +132,7 @@ struct svm_nested_state {

/* cache for control fields of the guest */
struct vmcb_control_area ctl;
+ struct vmcb_save_area_cached save;

bool initialized;
};
@@ -484,6 +498,8 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
int nested_svm_exit_special(struct vcpu_svm *svm);
void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
struct vmcb_control_area *control);
+void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
+ struct vmcb_save_area *save);
void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
--
2.27.0

Subject: [PATCH v3 8/8] nSVM: remove unnecessary parameter in nested_vmcb_check_controls

Just as in nested_vmcb_valid_sregs, we only need the vcpu param
to perform the checks on vmcb nested state, since we always
look at the cached fields.

Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
---
arch/x86/kvm/svm/nested.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 13be1002ad1c..19bce3819cce 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -209,9 +209,11 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
}

-static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
- struct vmcb_ctrl_area_cached *control)
+static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_ctrl_area_cached *control = &svm->nested.ctl;
+
if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
return false;

@@ -651,7 +653,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);

if (!nested_vmcb_valid_sregs(vcpu) ||
- !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) {
+ !nested_vmcb_check_controls(vcpu)) {
vmcb12->control.exit_code = SVM_EXIT_ERR;
vmcb12->control.exit_code_hi = 0;
vmcb12->control.exit_info_1 = 0;
@@ -1367,7 +1369,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,

ret = -EINVAL;
nested_copy_vmcb_control_to_cache(svm, ctl);
- if (!nested_vmcb_check_controls(vcpu, &svm->nested.ctl))
+ if (!nested_vmcb_check_controls(vcpu))
goto out_free_ctl;

/*
--
2.27.0

2021-10-22 07:16:39

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH v3 7/8] nSVM: use vmcb_ctrl_area_cached instead of vmcb_control_area in struct svm_nested_state

On 11/10/21 16:37, Emanuele Giuseppe Esposito wrote:
> ZE))
> return -EFAULT;
> - if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
> + nested_copy_vmcb_cache_to_control(&ctl_temp, &svm->nested.ctl);
> + if (copy_to_user(&user_vmcb->control, &ctl_temp,
> sizeof(user_vmcb->control)))
> return -EFAULT;

This needs a memset of ctl_temp so that kernel memory contents are not
leaked to userspace. However, it's also better to avoid large structs
on the stack, and do a quick kzalloc/kfree instead:

- nested_copy_vmcb_cache_to_control(&ctl_temp, &svm->nested.ctl);
- if (copy_to_user(&user_vmcb->control, &ctl_temp,
- sizeof(user_vmcb->control)))
+
+ ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
+ if (!ctl)
+ return -ENOMEM;
+ nested_copy_vmcb_cache_to_control(ctl, &svm->nested.ctl);
+ r = copy_to_user(&user_vmcb->control, ctl,
+ sizeof(user_vmcb->control));
+ kfree(ctl);
+ if (r)
return -EFAULT;

I can do this change when committing too.

Paolo

2021-10-22 07:18:23

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH v3 4/8] nSVM: use vmcb_save_area_cached in nested_vmcb_valid_sregs()

On 11/10/21 16:36, Emanuele Giuseppe Esposito wrote:
> +
> +out_free_save:
> + memset(&svm->nested.save, 0, sizeof(struct vmcb_save_area_cached));
> +

This memset is not strictly necessary, is it? (Same for out_free_ctl
later on).

Paolo

Subject: Re: [PATCH v3 4/8] nSVM: use vmcb_save_area_cached in nested_vmcb_valid_sregs()



On 22/10/2021 09:14, Paolo Bonzini wrote:
> On 11/10/21 16:36, Emanuele Giuseppe Esposito wrote:
>> +
>> +out_free_save:
>> +    memset(&svm->nested.save, 0, sizeof(struct vmcb_save_area_cached));
>> +
>
> This memset is not strictly necessary, is it?  (Same for out_free_ctl
> later on).
>

It was just to keep the struct clean in case of error, but
you are right, it can be removed.

Thank you,
Emanuele

2021-10-22 14:47:41

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v3 2/8] nSVM: introduce smv->nested.save to cache save area fields

On Mon, 2021-10-11 at 10:36 -0400, Emanuele Giuseppe Esposito wrote:
> This is useful in next patch, to avoid having temporary
> copies of vmcb12 registers and passing them manually.
>
> Right now, instead of blindly copying everything,
> we just copy EFER, CR0, CR3, CR4, DR6 and DR7. If more fields
> will need to be added, it will be more obvious to see
> that they must be added in struct vmcb_save_area_cached and
> in nested_copy_vmcb_save_to_cache().
>
> Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
> ---
> arch/x86/kvm/svm/nested.c | 18 ++++++++++++++++++
> arch/x86/kvm/svm/svm.c | 1 +
> arch/x86/kvm/svm/svm.h | 16 ++++++++++++++++
> 3 files changed, 35 insertions(+)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index d2fe65e2a7a4..c4959da8aec0 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -313,6 +313,22 @@ void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
> svm->nested.ctl.iopm_base_pa &= ~0x0fffULL;
> }
>
> +void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
> + struct vmcb_save_area *save)
> +{
> + /*
> + * Copy only fields that are validated, as we need them
> + * to avoid TOC/TOU races.
> + */
> + svm->nested.save.efer = save->efer;
> + svm->nested.save.cr0 = save->cr0;
> + svm->nested.save.cr3 = save->cr3;
> + svm->nested.save.cr4 = save->cr4;
> +
> + svm->nested.save.dr6 = save->dr6;
> + svm->nested.save.dr7 = save->dr7;
> +}
> +
> /*
> * Synchronize fields that are written by the processor, so that
> * they can be copied back into the vmcb12.
> @@ -647,6 +663,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
> return -EINVAL;
>
> nested_load_control_from_vmcb12(svm, &vmcb12->control);
> + nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
>
> if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
> !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) {
> @@ -1385,6 +1402,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
>
> svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
> nested_load_control_from_vmcb12(svm, ctl);
> + nested_copy_vmcb_save_to_cache(svm, save);

This is wrong (and this is due to a very non obivous way SVM restores
the nested state which I would like to rewrite.)

On SVM we migrate L2's control area, and *L1's save* area inside a single vmcb
while L2's save area is supposed to be migrated with regular KVM_SET_{S}REGS ioctls.


So I think you want something like this instead:


diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 4cd53a56f3ebc..fcb38b220e706 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1409,8 +1409,10 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,

if (is_guest_mode(vcpu))
svm_leave_nested(svm);
- else
+ else {
svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+ nested_copy_vmcb_save_to_cache(svm, svm->nested.vmcb02.ptr->save);
+ }

svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));


Note two branches here. If we are already in guest mode (qemu currently doesn't set
nested state while already in guest mode, but I added this to make this code
more robust, then we leave it for a while, to make the following code assume correctly
that we are not in guest mode, then it returns us back to the guest mode and never
touches the save area of L2, thus its cache shouldn't update either.

If we are in normal non guest mode, then as you see we just copy the whole L1
save area to L2, because the assumption is that at least some L2 state is there,
restored earlier (currently the result of KVM_SET_REGS), and after nested state
is set, qemu then restores the regular L2 registers with KVM_SET_REGS,
and other stuff like msrs.

So at the point of this copying, we need to update the cache.




>
> svm_switch_vmcb(svm, &svm->nested.vmcb02);
> nested_vmcb02_prepare_control(svm);
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 69639f9624f5..bf171f5f6158 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4386,6 +4386,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
> vmcb12 = map.hva;
>
> nested_load_control_from_vmcb12(svm, &vmcb12->control);
> + nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);

This looks correct.

>
> ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
> kvm_vcpu_unmap(vcpu, &map, true);
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index bd0fe94c2920..f0195bc263e9 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -103,6 +103,19 @@ struct kvm_vmcb_info {
> uint64_t asid_generation;
> };
>
> +/*
> + * This struct is not kept up-to-date, and it is only valid within
> + * svm_set_nested_state and nested_svm_vmrun.
> + */
> +struct vmcb_save_area_cached {
> + u64 efer;
> + u64 cr4;
> + u64 cr3;
> + u64 cr0;
> + u64 dr7;
> + u64 dr6;
> +};
> +
> struct svm_nested_state {
> struct kvm_vmcb_info vmcb02;
> u64 hsave_msr;
> @@ -119,6 +132,7 @@ struct svm_nested_state {
>
> /* cache for control fields of the guest */
> struct vmcb_control_area ctl;
> + struct vmcb_save_area_cached save;
>
> bool initialized;
> };
> @@ -484,6 +498,8 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
> int nested_svm_exit_special(struct vcpu_svm *svm);
> void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
> struct vmcb_control_area *control);
> +void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
> + struct vmcb_save_area *save);
> void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
> void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
> void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);

Note that you need to rebase this and most other patches, due to changes (mostly mine, sorry)
in the kvm/queue. Conflicts are mostly trivial though.


Best regards,
Maxim Levitsky


2021-10-22 14:48:20

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v3 3/8] nSVM: rename nested_load_control_from_vmcb12 in nested_copy_vmcb_control_to_cache

On Mon, 2021-10-11 at 10:36 -0400, Emanuele Giuseppe Esposito wrote:
> Following the same naming convention of the previous patch,
> rename nested_load_control_from_vmcb12.
> In addition, inline copy_vmcb_control_area as it is only called
> by this function.

>
> Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
> ---
> arch/x86/kvm/svm/nested.c | 67 ++++++++++++++++++---------------------
> arch/x86/kvm/svm/svm.c | 2 +-
> arch/x86/kvm/svm/svm.h | 2 +-
> 3 files changed, 32 insertions(+), 39 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index c4959da8aec0..f6030a202bc5 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -163,37 +163,6 @@ void recalc_intercepts(struct vcpu_svm *svm)
> vmcb_set_intercept(c, INTERCEPT_VMSAVE);
> }
>
> -static void copy_vmcb_control_area(struct vmcb_control_area *dst,
> - struct vmcb_control_area *from)
> -{
> - unsigned int i;
> -
> - for (i = 0; i < MAX_INTERCEPT; i++)
> - dst->intercepts[i] = from->intercepts[i];
> -
> - dst->iopm_base_pa = from->iopm_base_pa;
> - dst->msrpm_base_pa = from->msrpm_base_pa;
> - dst->tsc_offset = from->tsc_offset;
> - /* asid not copied, it is handled manually for svm->vmcb. */
> - dst->tlb_ctl = from->tlb_ctl;
> - dst->int_ctl = from->int_ctl;
> - dst->int_vector = from->int_vector;
> - dst->int_state = from->int_state;
> - dst->exit_code = from->exit_code;
> - dst->exit_code_hi = from->exit_code_hi;
> - dst->exit_info_1 = from->exit_info_1;
> - dst->exit_info_2 = from->exit_info_2;
> - dst->exit_int_info = from->exit_int_info;
> - dst->exit_int_info_err = from->exit_int_info_err;
> - dst->nested_ctl = from->nested_ctl;
> - dst->event_inj = from->event_inj;
> - dst->event_inj_err = from->event_inj_err;
> - dst->nested_cr3 = from->nested_cr3;
> - dst->virt_ext = from->virt_ext;
> - dst->pause_filter_count = from->pause_filter_count;
> - dst->pause_filter_thresh = from->pause_filter_thresh;
> -}
> -
> static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
> {
> /*
> @@ -302,12 +271,36 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
> return true;
> }
>
> -void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
> - struct vmcb_control_area *control)
> +void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
> + struct vmcb_control_area *control)
> {
> - copy_vmcb_control_area(&svm->nested.ctl, control);
> + unsigned int i;
>
> - /* Copy it here because nested_svm_check_controls will check it. */
> + for (i = 0; i < MAX_INTERCEPT; i++)
> + svm->nested.ctl.intercepts[i] = control->intercepts[i];
> +
> + svm->nested.ctl.iopm_base_pa = control->iopm_base_pa;
> + svm->nested.ctl.msrpm_base_pa = control->msrpm_base_pa;
> + svm->nested.ctl.tsc_offset = control->tsc_offset;
> + svm->nested.ctl.tlb_ctl = control->tlb_ctl;
> + svm->nested.ctl.int_ctl = control->int_ctl;
> + svm->nested.ctl.int_vector = control->int_vector;
> + svm->nested.ctl.int_state = control->int_state;
> + svm->nested.ctl.exit_code = control->exit_code;
> + svm->nested.ctl.exit_code_hi = control->exit_code_hi;
> + svm->nested.ctl.exit_info_1 = control->exit_info_1;
> + svm->nested.ctl.exit_info_2 = control->exit_info_2;
> + svm->nested.ctl.exit_int_info = control->exit_int_info;
> + svm->nested.ctl.exit_int_info_err = control->exit_int_info_err;
> + svm->nested.ctl.nested_ctl = control->nested_ctl;
> + svm->nested.ctl.event_inj = control->event_inj;
> + svm->nested.ctl.event_inj_err = control->event_inj_err;
> + svm->nested.ctl.nested_cr3 = control->nested_cr3;
> + svm->nested.ctl.virt_ext = control->virt_ext;
> + svm->nested.ctl.pause_filter_count = control->pause_filter_count;
> + svm->nested.ctl.pause_filter_thresh = control->pause_filter_thresh;
> +
> + /* Copy asid here because nested_vmcb_check_controls will check it. */
> svm->nested.ctl.asid = control->asid;
> svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL;
> svm->nested.ctl.iopm_base_pa &= ~0x0fffULL;
> @@ -662,7 +655,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
> if (WARN_ON_ONCE(!svm->nested.initialized))
> return -EINVAL;
>
> - nested_load_control_from_vmcb12(svm, &vmcb12->control);
> + nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
> nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
>
> if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
> @@ -1401,7 +1394,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
>
> svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
> - nested_load_control_from_vmcb12(svm, ctl);
> + nested_copy_vmcb_control_to_cache(svm, ctl);
> nested_copy_vmcb_save_to_cache(svm, save);
>
> svm_switch_vmcb(svm, &svm->nested.vmcb02);
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index bf171f5f6158..1b6d25c6e0ae 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4385,7 +4385,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
>
> vmcb12 = map.hva;
>
> - nested_load_control_from_vmcb12(svm, &vmcb12->control);
> + nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
> nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
>
> ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index f0195bc263e9..3c950aeca646 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -496,7 +496,7 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu);
> int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
> bool has_error_code, u32 error_code);
> int nested_svm_exit_special(struct vcpu_svm *svm);
> -void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
> +void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
> struct vmcb_control_area *control);
> void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
> struct vmcb_save_area *save);


Looks great!

Reviewed-by: Maxim Levitsky <[email protected]>

Best regards,
Maxim Levitsky

2021-10-22 14:50:27

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v3 5/8] nSVM: use svm->nested.save to load vmcb12 registers and avoid TOC/TOU races

On Mon, 2021-10-11 at 10:36 -0400, Emanuele Giuseppe Esposito wrote:
> Use the already checked svm->nested.save cached fields
> (EFER, CR0, CR4, ...) instead of vmcb12's in
> nested_vmcb02_prepare_save().
> This prevents from creating TOC/TOU races, since the
> guest could modify the vmcb12 fields.
>
> This also avoids the need of force-setting EFER_SVME in
> nested_vmcb02_prepare_save.
>
> Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
> ---
> arch/x86/kvm/svm/nested.c | 25 +++++++------------------
> 1 file changed, 7 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index d07cd4b88acd..e08f2c31beae 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -234,13 +234,7 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> struct vmcb_save_area_cached *save = &svm->nested.save;
> - /*
> - * FIXME: these should be done after copying the fields,
> - * to avoid TOC/TOU races. For these save area checks
> - * the possible damage is limited since kvm_set_cr0 and
> - * kvm_set_cr4 handle failure; EFER_SVME is an exception
> - * so it is force-set later in nested_prepare_vmcb_save.
> - */
> +
> if (CC(!(save->efer & EFER_SVME)))
> return false;
>
> @@ -476,15 +470,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
>
> kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
>
> - /*
> - * Force-set EFER_SVME even though it is checked earlier on the
> - * VMCB12, because the guest can flip the bit between the check
> - * and now. Clearing EFER_SVME would call svm_free_nested.
> - */
> - svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME);
> + svm_set_efer(&svm->vcpu, svm->nested.save.efer);
>
> - svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
> - svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
> + svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
> + svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
>
> svm->vcpu.arch.cr2 = vmcb12->save.cr2;
>
> @@ -499,8 +488,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
>
> /* These bits will be set properly on the first execution when new_vmc12 is true */
> if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
> - svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
> - svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_ACTIVE_LOW;
> + svm->vmcb->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
> + svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
> vmcb_mark_dirty(svm->vmcb, VMCB_DR);
> }
> }
> @@ -609,7 +598,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
> nested_vmcb02_prepare_control(svm);
> nested_vmcb02_prepare_save(svm, vmcb12);
>
> - ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
> + ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
> nested_npt_enabled(svm), true);
> if (ret)
> return ret;

Reviewed-by: Maxim Levitsky <[email protected]>

Best regards,
Maxim Levitsky

2021-10-22 14:51:21

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v3 4/8] nSVM: use vmcb_save_area_cached in nested_vmcb_valid_sregs()

On Mon, 2021-10-11 at 10:36 -0400, Emanuele Giuseppe Esposito wrote:
> Now that struct vmcb_save_area_cached contains the required
> vmcb fields values (done in nested_load_save_from_vmcb12()),
> check them to see if they are correct in nested_vmcb_valid_sregs().
>
> Since we are always checking for the nested struct, it is enough
> to have only the vcpu as parameter.
>
> Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
> ---
> arch/x86/kvm/svm/nested.c | 19 ++++++++++++-------
> 1 file changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index f6030a202bc5..d07cd4b88acd 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -230,9 +230,10 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
> }
>
> /* Common checks that apply to both L1 and L2 state. */
> -static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
> - struct vmcb_save_area *save)
> +static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu)
> {
> + struct vcpu_svm *svm = to_svm(vcpu);
> + struct vmcb_save_area_cached *save = &svm->nested.save;
> /*
> * FIXME: these should be done after copying the fields,
> * to avoid TOC/TOU races. For these save area checks
> @@ -658,7 +659,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
> nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
> nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
>
> - if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
> + if (!nested_vmcb_valid_sregs(vcpu) ||
> !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) {
> vmcb12->control.exit_code = SVM_EXIT_ERR;
> vmcb12->control.exit_code_hi = 0;
> @@ -1355,11 +1356,12 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> * Validate host state saved from before VMRUN (see
> * nested_svm_check_permissions).
> */
> + nested_copy_vmcb_save_to_cache(svm, save);


> if (!(save->cr0 & X86_CR0_PG) ||
> !(save->cr0 & X86_CR0_PE) ||
> (save->rflags & X86_EFLAGS_VM) ||
> - !nested_vmcb_valid_sregs(vcpu, save))
> - goto out_free;
> + !nested_vmcb_valid_sregs(vcpu))
> + goto out_free_save;

The two changes from above can't be done like that sadly.

We cache only vmcb12 because it comes from the guest which is untrusted.

Here we validate the L1's saved host state which is (once again,
SVM nested state is confused), the 'save' variable.
That state is not guest controlled, but here we validate it
to avoid trusting the KVM_SET_NESTED_STATE caller.

I guess we can copy this to an extra 'struct vmcb_save_area_cached' on stack (this struct is small),
and then pass its address to nested_vmcb_valid_sregs (or better to __nested_vmcb_valid_sregs,
so that nested_vmcb_valid_sregs could still take one parameter, but delegate
its work to __nested_vmcb_valid_sregs with two parameters.

>
> /*
> * While the nested guest CR3 is already checked and set by
> @@ -1371,7 +1373,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
> nested_npt_enabled(svm), false);
> if (WARN_ON_ONCE(ret))
> - goto out_free;
> + goto out_free_save;
>
>
> /*
> @@ -1395,12 +1397,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
>
> svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
> nested_copy_vmcb_control_to_cache(svm, ctl);
> - nested_copy_vmcb_save_to_cache(svm, save);
>
> svm_switch_vmcb(svm, &svm->nested.vmcb02);
> nested_vmcb02_prepare_control(svm);
> kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
> ret = 0;
> +
> +out_free_save:
> + memset(&svm->nested.save, 0, sizeof(struct vmcb_save_area_cached));
> +

This won't be needed if we don't touch svm->nested.save.

> out_free:
> kfree(save);
> kfree(ctl);


Best regards,
Maxim Levitsky

2021-10-22 14:52:25

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v3 7/8] nSVM: use vmcb_ctrl_area_cached instead of vmcb_control_area in struct svm_nested_state

On Mon, 2021-10-11 at 10:37 -0400, Emanuele Giuseppe Esposito wrote:
> This requires changing all vmcb_is_intercept(&svm->nested.ctl, ...)
> calls with vmcb12_is_intercept().
>
> In addition, in svm_get_nested_state() user space expects a
> vmcb_control_area struct, so we need to copy back all fields
> in a temporary structure to provide to the user space.
>
> Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
> ---
> arch/x86/kvm/svm/nested.c | 41 +++++++++++++++++++++++----------------
> arch/x86/kvm/svm/svm.c | 4 ++--
> arch/x86/kvm/svm/svm.h | 8 ++++----
> 3 files changed, 30 insertions(+), 23 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index c84cded1dcf6..13be1002ad1c 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -58,8 +58,9 @@ static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_excep
> struct vcpu_svm *svm = to_svm(vcpu);
> WARN_ON(!is_guest_mode(vcpu));
>
> - if (vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
> - !svm->nested.nested_run_pending) {
> + if (vmcb12_is_intercept(&svm->nested.ctl,
> + INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
> + !svm->nested.nested_run_pending) {
> svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
> svm->vmcb->control.exit_code_hi = 0;
> svm->vmcb->control.exit_info_1 = fault->error_code;
> @@ -121,7 +122,8 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
>
> void recalc_intercepts(struct vcpu_svm *svm)
> {
> - struct vmcb_control_area *c, *h, *g;
> + struct vmcb_control_area *c, *h;
> + struct vmcb_ctrl_area_cached *g;
> unsigned int i;
>
> vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
> @@ -172,7 +174,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
> */
> int i;
>
> - if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
> + if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
> return true;
>
> for (i = 0; i < MSRPM_OFFSETS; i++) {
> @@ -208,9 +210,9 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
> }
>
> static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
> - struct vmcb_control_area *control)
> + struct vmcb_ctrl_area_cached *control)
> {
> - if (CC(!vmcb_is_intercept(control, INTERCEPT_VMRUN)))
> + if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
> return false;
>
> if (CC(control->asid == 0))
> @@ -960,7 +962,7 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
> u32 offset, msr, value;
> int write, mask;
>
> - if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
> + if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
> return NESTED_EXIT_HOST;
>
> msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
> @@ -987,7 +989,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
> u8 start_bit;
> u64 gpa;
>
> - if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
> + if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
> return NESTED_EXIT_HOST;
>
> port = svm->vmcb->control.exit_info_1 >> 16;
> @@ -1018,12 +1020,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
> vmexit = nested_svm_intercept_ioio(svm);
> break;
> case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
> - if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
> + if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
> vmexit = NESTED_EXIT_DONE;
> break;
> }
> case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
> - if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
> + if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
> vmexit = NESTED_EXIT_DONE;
> break;
> }
> @@ -1041,7 +1043,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
> break;
> }
> default: {
> - if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
> + if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
> vmexit = NESTED_EXIT_DONE;
> }
> }
> @@ -1119,7 +1121,7 @@ static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
>
> static inline bool nested_exit_on_init(struct vcpu_svm *svm)
> {
> - return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
> + return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
> }
>
> static int svm_check_nested_events(struct kvm_vcpu *vcpu)
> @@ -1250,6 +1252,7 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu,
> u32 user_data_size)
> {
> struct vcpu_svm *svm;
> + struct vmcb_control_area ctl_temp;
> struct kvm_nested_state kvm_state = {
> .flags = 0,
> .format = KVM_STATE_NESTED_FORMAT_SVM,
> @@ -1291,7 +1294,8 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu,
> */
> if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
> return -EFAULT;
> - if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
> + nested_copy_vmcb_cache_to_control(&ctl_temp, &svm->nested.ctl);
> + if (copy_to_user(&user_vmcb->control, &ctl_temp,
> sizeof(user_vmcb->control)))
> return -EFAULT;
> if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
> @@ -1362,8 +1366,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> goto out_free;
>
> ret = -EINVAL;
> - if (!nested_vmcb_check_controls(vcpu, ctl))
> - goto out_free;
> + nested_copy_vmcb_control_to_cache(svm, ctl);
> + if (!nested_vmcb_check_controls(vcpu, &svm->nested.ctl))
> + goto out_free_ctl;

I also don't like this, like about writing and clearing of svm->nested.save in patch 4.
Unlike the former case, this case is functionally correct, but still a failure will
leave partially initialized state (which you zero at the end to make it better, but it is best
that failure would leave the state untouched).

Since loading/saving the nested state is anything but performance critical code,
I would prefer to also copy control area to a temp variable, and then pass it to
__nested_vmcb_check_controls.


Other than that, this patch looks good.

Best regards,
Maxim Levitsky



>
> /*
> * Processor state contains L2 state. Check that it is
> @@ -1371,7 +1376,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> */
> cr0 = kvm_read_cr0(vcpu);
> if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
> - goto out_free;
> + goto out_free_ctl;
>
> /*
> * Validate host state saved from before VMRUN (see
> @@ -1417,7 +1422,6 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
>
> svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
> - nested_copy_vmcb_control_to_cache(svm, ctl);
>
> svm_switch_vmcb(svm, &svm->nested.vmcb02);
> nested_vmcb02_prepare_control(svm);
> @@ -1427,6 +1431,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> out_free_save:
> memset(&svm->nested.save, 0, sizeof(struct vmcb_save_area_cached));
>
> +out_free_ctl:
> + memset(&svm->nested.ctl, 0, sizeof(struct vmcb_ctrl_area_cached));
> +
> out_free:
> kfree(save);
> kfree(ctl);
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 1b6d25c6e0ae..d866eea39777 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -2465,7 +2465,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
> bool ret = false;
>
> if (!is_guest_mode(vcpu) ||
> - (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
> + (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
> return false;
>
> cr0 &= ~SVM_CR0_SELECTIVE_MASK;
> @@ -4184,7 +4184,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
> info->intercept == x86_intercept_clts)
> break;
>
> - if (!(vmcb_is_intercept(&svm->nested.ctl,
> + if (!(vmcb12_is_intercept(&svm->nested.ctl,
> INTERCEPT_SELECTIVE_CR0)))
> break;
>
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 78006245e334..051b7d0a13a1 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -156,7 +156,7 @@ struct svm_nested_state {
> bool nested_run_pending;
>
> /* cache for control fields of the guest */
> - struct vmcb_control_area ctl;
> + struct vmcb_ctrl_area_cached ctl;
> struct vmcb_save_area_cached save;
>
> bool initialized;
> @@ -491,17 +491,17 @@ static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu)
>
> static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
> {
> - return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
> + return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
> }
>
> static inline bool nested_exit_on_intr(struct vcpu_svm *svm)
> {
> - return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
> + return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
> }
>
> static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
> {
> - return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
> + return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
> }
>
> int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12);


2021-10-22 14:54:36

by Maxim Levitsky

[permalink] [raw]
Subject: Re: [PATCH v3 8/8] nSVM: remove unnecessary parameter in nested_vmcb_check_controls

On Mon, 2021-10-11 at 10:37 -0400, Emanuele Giuseppe Esposito wrote:
> Just as in nested_vmcb_valid_sregs, we only need the vcpu param
> to perform the checks on vmcb nested state, since we always
> look at the cached fields.
>
> Signed-off-by: Emanuele Giuseppe Esposito <[email protected]>
> ---
> arch/x86/kvm/svm/nested.c | 10 ++++++----
> 1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 13be1002ad1c..19bce3819cce 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -209,9 +209,11 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
> kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
> }
>
> -static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
> - struct vmcb_ctrl_area_cached *control)
> +static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
> {
> + struct vcpu_svm *svm = to_svm(vcpu);
> + struct vmcb_ctrl_area_cached *control = &svm->nested.ctl;
> +
> if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
> return false;
>
> @@ -651,7 +653,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
> nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
>
> if (!nested_vmcb_valid_sregs(vcpu) ||
> - !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) {
> + !nested_vmcb_check_controls(vcpu)) {
> vmcb12->control.exit_code = SVM_EXIT_ERR;
> vmcb12->control.exit_code_hi = 0;
> vmcb12->control.exit_info_1 = 0;
> @@ -1367,7 +1369,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
>
> ret = -EINVAL;
> nested_copy_vmcb_control_to_cache(svm, ctl);
> - if (!nested_vmcb_check_controls(vcpu, &svm->nested.ctl))
> + if (!nested_vmcb_check_controls(vcpu))
> goto out_free_ctl;
>
> /*

Because of the issue I pointed out in patch 7, you probably want:


static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
struct vmcb_ctrl_area_cached *control)
{
....
}


static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
{
return __nested_vmcb_check_controls(vcpu, &svm->nested.ctl);
}



Same for nested_vmcb_valid_sregs
(maybe you even want to rename it to nested_vmcb_check_save while at it?):


static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu, struct vmcb_save_area_cached *save)
{
...
}


static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu)
{
return __nested_vmcb_check_save(vcpu, &svm->nested.save);

}


Best regards,
Maxim Levitsky