2023-06-13 20:51:46

by Sean Christopherson

[permalink] [raw]
Subject: [PATCH 1/3] KVM: x86: Disallow KVM_SET_SREGS{2} if incoming CR0 is invalid

Reject KVM_SET_SREGS{2} with -EINVAL if the incoming CR0 is invalid,
e.g. due to setting bits 63:32, illegal combinations, or to a value that
isn't allowed in VMX (non-)root mode. The VMX checks in particular are
"fun" as failure to disallow Real Mode for an L2 that is configured with
unrestricted guest disabled, when KVM itself has unrestricted guest
enabled, will result in KVM forcing VM86 mode to virtual Real Mode for
L2, but then fail to unwind the related metadata when synthesizing a
nested VM-Exit back to L1 (which has unrestricted guest enabled).

Opportunistically fix a benign typo in the prototype for is_valid_cr4().

Cc: [email protected]
Reported-by: [email protected]
Closes: https://lore.kernel.org/all/[email protected]
Signed-off-by: Sean Christopherson <[email protected]>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/kvm/svm/svm.c | 6 ++++++
arch/x86/kvm/vmx/vmx.c | 28 ++++++++++++++++++------
arch/x86/kvm/x86.c | 34 +++++++++++++++++++-----------
5 files changed, 52 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 13bc212cd4bc..e3054e3e46d5 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -37,6 +37,7 @@ KVM_X86_OP(get_segment)
KVM_X86_OP(get_cpl)
KVM_X86_OP(set_segment)
KVM_X86_OP(get_cs_db_l_bits)
+KVM_X86_OP(is_valid_cr0)
KVM_X86_OP(set_cr0)
KVM_X86_OP_OPTIONAL(post_set_cr3)
KVM_X86_OP(is_valid_cr4)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 28bd38303d70..3bc146dfd38d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1566,9 +1566,10 @@ struct kvm_x86_ops {
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
+ bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
- bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
+ bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e265834fe859..b29d0650582e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1786,6 +1786,11 @@ static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
}
}

+static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+ return true;
+}
+
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4815,6 +4820,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_segment = svm_set_segment,
.get_cpl = svm_get_cpl,
.get_cs_db_l_bits = svm_get_cs_db_l_bits,
+ .is_valid_cr0 = svm_is_valid_cr0,
.set_cr0 = svm_set_cr0,
.post_set_cr3 = sev_post_set_cr3,
.is_valid_cr4 = svm_is_valid_cr4,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0ecf4be2c6af..355b0e8c9b00 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3037,6 +3037,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);

+ /*
+ * KVM should never use VM86 to virtualize Real Mode when L2 is active,
+ * as using VM86 is unnecessary if unrestricted guest is enabled, and
+ * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
+ * should VM-Fail and KVM should reject userspace attempts to stuff
+ * CR0.PG=0 when L2 is active.
+ */
+ WARN_ON_ONCE(is_guest_mode(vcpu));
+
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
@@ -3226,6 +3235,17 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
CPU_BASED_CR3_STORE_EXITING)

+static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+ if (is_guest_mode(vcpu))
+ return nested_guest_cr0_valid(vcpu, cr0);
+
+ if (to_vmx(vcpu)->nested.vmxon)
+ return nested_host_cr0_valid(vcpu, cr0);
+
+ return true;
+}
+
void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5364,18 +5384,11 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
val = (val & ~vmcs12->cr0_guest_host_mask) |
(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);

- if (!nested_guest_cr0_valid(vcpu, val))
- return 1;
-
if (kvm_set_cr0(vcpu, val))
return 1;
vmcs_writel(CR0_READ_SHADOW, orig_val);
return 0;
} else {
- if (to_vmx(vcpu)->nested.vmxon &&
- !nested_host_cr0_valid(vcpu, val))
- return 1;
-
return kvm_set_cr0(vcpu, val);
}
}
@@ -8203,6 +8216,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_segment = vmx_set_segment,
.get_cpl = vmx_get_cpl,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
+ .is_valid_cr0 = vmx_is_valid_cr0,
.set_cr0 = vmx_set_cr0,
.is_valid_cr4 = vmx_is_valid_cr4,
.set_cr4 = vmx_set_cr4,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9e7186864542..2703eb734bca 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -906,6 +906,22 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
}
EXPORT_SYMBOL_GPL(load_pdptrs);

+static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+#ifdef CONFIG_X86_64
+ if (cr0 & 0xffffffff00000000UL)
+ return false;
+#endif
+
+ if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
+ return false;
+
+ if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
+ return false;
+
+ return static_call(kvm_x86_is_valid_cr0)(vcpu, cr0);
+}
+
void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
{
/*
@@ -952,21 +968,14 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
unsigned long old_cr0 = kvm_read_cr0(vcpu);

+ if (!kvm_is_valid_cr0(vcpu, cr0))
+ return 1;
+
cr0 |= X86_CR0_ET;

-#ifdef CONFIG_X86_64
- if (cr0 & 0xffffffff00000000UL)
- return 1;
-#endif
-
+ /* Write to CR0 reserved bits are ignored, even on Intel. */
cr0 &= ~CR0_RESERVED_BITS;

- if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
- return 1;
-
- if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
- return 1;
-
#ifdef CONFIG_X86_64
if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
(cr0 & X86_CR0_PG)) {
@@ -11459,7 +11468,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return false;
}

- return kvm_is_valid_cr4(vcpu, sregs->cr4);
+ return kvm_is_valid_cr4(vcpu, sregs->cr4) &&
+ kvm_is_valid_cr0(vcpu, sregs->cr0);
}

static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
--
2.41.0.162.gfafddb0af9-goog



2023-06-22 08:31:34

by Yu Zhang

[permalink] [raw]
Subject: Re: [PATCH 1/3] KVM: x86: Disallow KVM_SET_SREGS{2} if incoming CR0 is invalid

On Tue, Jun 13, 2023 at 01:30:35PM -0700, Sean Christopherson wrote:
> Reject KVM_SET_SREGS{2} with -EINVAL if the incoming CR0 is invalid,
> e.g. due to setting bits 63:32, illegal combinations, or to a value that
> isn't allowed in VMX (non-)root mode. The VMX checks in particular are
> "fun" as failure to disallow Real Mode for an L2 that is configured with
> unrestricted guest disabled, when KVM itself has unrestricted guest
> enabled, will result in KVM forcing VM86 mode to virtual Real Mode for
> L2, but then fail to unwind the related metadata when synthesizing a
> nested VM-Exit back to L1 (which has unrestricted guest enabled).
>
> Opportunistically fix a benign typo in the prototype for is_valid_cr4().
>
> Cc: [email protected]
> Reported-by: [email protected]
> Closes: https://lore.kernel.org/all/[email protected]
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
> arch/x86/include/asm/kvm-x86-ops.h | 1 +
> arch/x86/include/asm/kvm_host.h | 3 ++-
> arch/x86/kvm/svm/svm.c | 6 ++++++
> arch/x86/kvm/vmx/vmx.c | 28 ++++++++++++++++++------
> arch/x86/kvm/x86.c | 34 +++++++++++++++++++-----------
> 5 files changed, 52 insertions(+), 20 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> index 13bc212cd4bc..e3054e3e46d5 100644
> --- a/arch/x86/include/asm/kvm-x86-ops.h
> +++ b/arch/x86/include/asm/kvm-x86-ops.h
> @@ -37,6 +37,7 @@ KVM_X86_OP(get_segment)
> KVM_X86_OP(get_cpl)
> KVM_X86_OP(set_segment)
> KVM_X86_OP(get_cs_db_l_bits)
> +KVM_X86_OP(is_valid_cr0)
> KVM_X86_OP(set_cr0)
> KVM_X86_OP_OPTIONAL(post_set_cr3)
> KVM_X86_OP(is_valid_cr4)
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 28bd38303d70..3bc146dfd38d 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1566,9 +1566,10 @@ struct kvm_x86_ops {
> void (*set_segment)(struct kvm_vcpu *vcpu,
> struct kvm_segment *var, int seg);
> void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
> + bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
> void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
> void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
> - bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
> + bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
> void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
> int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
> void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index e265834fe859..b29d0650582e 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1786,6 +1786,11 @@ static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> }
> }
>
> +static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> +{
> + return true;
> +}
> +
> void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> @@ -4815,6 +4820,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> .set_segment = svm_set_segment,
> .get_cpl = svm_get_cpl,
> .get_cs_db_l_bits = svm_get_cs_db_l_bits,
> + .is_valid_cr0 = svm_is_valid_cr0,
> .set_cr0 = svm_set_cr0,
> .post_set_cr3 = sev_post_set_cr3,
> .is_valid_cr4 = svm_is_valid_cr4,
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 0ecf4be2c6af..355b0e8c9b00 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -3037,6 +3037,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
>
> + /*
> + * KVM should never use VM86 to virtualize Real Mode when L2 is active,
> + * as using VM86 is unnecessary if unrestricted guest is enabled, and
> + * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
> + * should VM-Fail and KVM should reject userspace attempts to stuff

VM Enry shall fail(with CR0.PG=0), because SECONDARY_EXEC_UNRESTRICTED_GUEST
will be cleared in L1's secondary_ctls_high MSR, and hence in its VMCS12?

When will an unrestricted L1 run L2 as a restricted one? Shadow on EPT(L0
uses EPT for L1 and L1 uses shadow for L2)?

> + * CR0.PG=0 when L2 is active.
> + */
> + WARN_ON_ONCE(is_guest_mode(vcpu));
> +

B.R.
Yu

2023-06-22 21:56:44

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH 1/3] KVM: x86: Disallow KVM_SET_SREGS{2} if incoming CR0 is invalid

On Thu, Jun 22, 2023, Yu Zhang wrote:
> On Tue, Jun 13, 2023 at 01:30:35PM -0700, Sean Christopherson wrote:
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 0ecf4be2c6af..355b0e8c9b00 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -3037,6 +3037,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
> > struct vcpu_vmx *vmx = to_vmx(vcpu);
> > struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
> >
> > + /*
> > + * KVM should never use VM86 to virtualize Real Mode when L2 is active,
> > + * as using VM86 is unnecessary if unrestricted guest is enabled, and
> > + * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
> > + * should VM-Fail and KVM should reject userspace attempts to stuff
>
> VM Enry shall fail(with CR0.PG=0), because SECONDARY_EXEC_UNRESTRICTED_GUEST
> will be cleared in L1's secondary_ctls_high MSR, and hence in its VMCS12?

Yep.

>
> When will an unrestricted L1 run L2 as a restricted one? Shadow on EPT(L0
> uses EPT for L1 and L1 uses shadow for L2)?

Ya, the L1 VMM/hypervisor disabling EPT is the most likely scenario, i.e. the only
thing I would expect to encounter outside of testing. Other than testing, e.g. to
ensure compatibility with Nehalem CPUs (the only Intel CPUs with EPT but not URG),
I don't know of any reason to disable URG but not EPT.