2019-04-12 07:57:58

by WANG Chao

[permalink] [raw]
Subject: [PATCH] x86/kvm: move kvm_load/put_guest_xcr0 into atomic context

guest xcr0 could leak into host when MCE happens in guest mode. Because
do_machine_check() could schedule out at a few places.

For example:

kvm_load_guest_xcr0
...
kvm_x86_ops->run(vcpu) {
vmx_vcpu_run
vmx_complete_atomic_exit
kvm_machine_check
do_machine_check
do_memory_failure
memory_failure
lock_page

In this case, host_xcr0 is 0x2ff, guest vcpu xcr0 is 0xff. After schedule
out, host cpu has guest xcr0 loaded (0xff).

In __switch_to {
switch_fpu_finish
copy_kernel_to_fpregs
XRSTORS

If any bit i in XSTATE_BV[i] == 1 and xcr0[i] == 0, XRSTORS will
generate #GP (In this case, bit 9). Then ex_handler_fprestore kicks in
and tries to reinitialize fpu by restoring init fpu state. Same story as
last #GP, except we get DOUBLE FAULT this time.

Cc: [email protected]
Signed-off-by: WANG Chao <[email protected]>
---
arch/x86/kvm/svm.c | 2 ++
arch/x86/kvm/vmx/vmx.c | 4 ++++
arch/x86/kvm/x86.c | 10 ++++------
arch/x86/kvm/x86.h | 2 ++
4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e0a791c3d4fc..2bf73076de7f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5621,6 +5621,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
svm->vmcb->save.cr2 = vcpu->arch.cr2;

clgi();
+ kvm_load_guest_xcr0(vcpu);

/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -5766,6 +5767,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(&svm->vcpu);

+ kvm_put_guest_xcr0(vcpu);
stgi();

/* Any pending NMI will happen here */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ab432a930ae8..3157598c52f1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6410,6 +6410,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);

+ kvm_load_guest_xcr0(vcpu);
+
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
vcpu->arch.pkru != vmx->host_pkru)
@@ -6506,6 +6508,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
__write_pkru(vmx->host_pkru);
}

+ kvm_put_guest_xcr0(vcpu);
+
vmx->nested.nested_run_pending = 0;
vmx->idt_vectoring_info = 0;

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 099b851dabaf..22f66e9a7dc5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -800,7 +800,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
}
EXPORT_SYMBOL_GPL(kvm_lmsw);

-static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
!vcpu->guest_xcr0_loaded) {
@@ -810,8 +810,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
vcpu->guest_xcr0_loaded = 1;
}
}
+EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);

-static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_xcr0_loaded) {
if (vcpu->arch.xcr0 != host_xcr0)
@@ -819,6 +820,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
vcpu->guest_xcr0_loaded = 0;
}
}
+EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);

static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
@@ -7865,8 +7867,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}

- kvm_load_guest_xcr0(vcpu);
-
if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_x86_ops->request_immediate_exit(vcpu);
@@ -7919,8 +7919,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();

- kvm_put_guest_xcr0(vcpu);
-
kvm_before_interrupt(vcpu);
kvm_x86_ops->handle_external_intr(vcpu);
kvm_after_interrupt(vcpu);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 28406aa1136d..aedc5d0d4989 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -347,4 +347,6 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
__this_cpu_write(current_vcpu, NULL);
}

+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
#endif
--
2.21.0


2019-04-12 08:08:27

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH] x86/kvm: move kvm_load/put_guest_xcr0 into atomic context

On 12/04/19 09:55, WANG Chao wrote:
> guest xcr0 could leak into host when MCE happens in guest mode. Because
> do_machine_check() could schedule out at a few places.
>
> For example:
>
> kvm_load_guest_xcr0
> ...
> kvm_x86_ops->run(vcpu) {
> vmx_vcpu_run
> vmx_complete_atomic_exit
> kvm_machine_check
> do_machine_check
> do_memory_failure
> memory_failure
> lock_page
>
> In this case, host_xcr0 is 0x2ff, guest vcpu xcr0 is 0xff. After schedule
> out, host cpu has guest xcr0 loaded (0xff).
>
> In __switch_to {
> switch_fpu_finish
> copy_kernel_to_fpregs
> XRSTORS
>
> If any bit i in XSTATE_BV[i] == 1 and xcr0[i] == 0, XRSTORS will
> generate #GP (In this case, bit 9). Then ex_handler_fprestore kicks in
> and tries to reinitialize fpu by restoring init fpu state. Same story as
> last #GP, except we get DOUBLE FAULT this time.
>
> Cc: [email protected]
> Signed-off-by: WANG Chao <[email protected]>

Thanks for the detailed commit message. Patch queued!.

Paolo

> ---
> arch/x86/kvm/svm.c | 2 ++
> arch/x86/kvm/vmx/vmx.c | 4 ++++
> arch/x86/kvm/x86.c | 10 ++++------
> arch/x86/kvm/x86.h | 2 ++
> 4 files changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index e0a791c3d4fc..2bf73076de7f 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -5621,6 +5621,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
> svm->vmcb->save.cr2 = vcpu->arch.cr2;
>
> clgi();
> + kvm_load_guest_xcr0(vcpu);
>
> /*
> * If this vCPU has touched SPEC_CTRL, restore the guest's value if
> @@ -5766,6 +5767,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
> if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
> kvm_before_interrupt(&svm->vcpu);
>
> + kvm_put_guest_xcr0(vcpu);
> stgi();
>
> /* Any pending NMI will happen here */
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index ab432a930ae8..3157598c52f1 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6410,6 +6410,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
> if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> vmx_set_interrupt_shadow(vcpu, 0);
>
> + kvm_load_guest_xcr0(vcpu);
> +
> if (static_cpu_has(X86_FEATURE_PKU) &&
> kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
> vcpu->arch.pkru != vmx->host_pkru)
> @@ -6506,6 +6508,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
> __write_pkru(vmx->host_pkru);
> }
>
> + kvm_put_guest_xcr0(vcpu);
> +
> vmx->nested.nested_run_pending = 0;
> vmx->idt_vectoring_info = 0;
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 099b851dabaf..22f66e9a7dc5 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -800,7 +800,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
> }
> EXPORT_SYMBOL_GPL(kvm_lmsw);
>
> -static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
> +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
> {
> if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
> !vcpu->guest_xcr0_loaded) {
> @@ -810,8 +810,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
> vcpu->guest_xcr0_loaded = 1;
> }
> }
> +EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
>
> -static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
> +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
> {
> if (vcpu->guest_xcr0_loaded) {
> if (vcpu->arch.xcr0 != host_xcr0)
> @@ -819,6 +820,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
> vcpu->guest_xcr0_loaded = 0;
> }
> }
> +EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
>
> static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
> {
> @@ -7865,8 +7867,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> goto cancel_injection;
> }
>
> - kvm_load_guest_xcr0(vcpu);
> -
> if (req_immediate_exit) {
> kvm_make_request(KVM_REQ_EVENT, vcpu);
> kvm_x86_ops->request_immediate_exit(vcpu);
> @@ -7919,8 +7919,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> vcpu->mode = OUTSIDE_GUEST_MODE;
> smp_wmb();
>
> - kvm_put_guest_xcr0(vcpu);
> -
> kvm_before_interrupt(vcpu);
> kvm_x86_ops->handle_external_intr(vcpu);
> kvm_after_interrupt(vcpu);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 28406aa1136d..aedc5d0d4989 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -347,4 +347,6 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
> __this_cpu_write(current_vcpu, NULL);
> }
>
> +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
> +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
> #endif
>

2019-04-28 01:05:25

by Wanpeng Li

[permalink] [raw]
Subject: Re: [PATCH] x86/kvm: move kvm_load/put_guest_xcr0 into atomic context

On Fri, 12 Apr 2019 at 16:09, Paolo Bonzini <[email protected]> wrote:
>
> On 12/04/19 09:55, WANG Chao wrote:
> > guest xcr0 could leak into host when MCE happens in guest mode. Because
> > do_machine_check() could schedule out at a few places.
> >
> > For example:
> >
> > kvm_load_guest_xcr0
> > ...
> > kvm_x86_ops->run(vcpu) {
> > vmx_vcpu_run
> > vmx_complete_atomic_exit
> > kvm_machine_check
> > do_machine_check
> > do_memory_failure
> > memory_failure
> > lock_page

Maybe this should not be counted to guest time in guest_exit_irqoff()?

Regards,
Wanpeng Li

> >
> > In this case, host_xcr0 is 0x2ff, guest vcpu xcr0 is 0xff. After schedule
> > out, host cpu has guest xcr0 loaded (0xff).
> >
> > In __switch_to {
> > switch_fpu_finish
> > copy_kernel_to_fpregs
> > XRSTORS
> >
> > If any bit i in XSTATE_BV[i] == 1 and xcr0[i] == 0, XRSTORS will
> > generate #GP (In this case, bit 9). Then ex_handler_fprestore kicks in
> > and tries to reinitialize fpu by restoring init fpu state. Same story as
> > last #GP, except we get DOUBLE FAULT this time.
> >
> > Cc: [email protected]
> > Signed-off-by: WANG Chao <[email protected]>
>
> Thanks for the detailed commit message. Patch queued!.
>
> Paolo
>
> > ---
> > arch/x86/kvm/svm.c | 2 ++
> > arch/x86/kvm/vmx/vmx.c | 4 ++++
> > arch/x86/kvm/x86.c | 10 ++++------
> > arch/x86/kvm/x86.h | 2 ++
> > 4 files changed, 12 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index e0a791c3d4fc..2bf73076de7f 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -5621,6 +5621,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
> > svm->vmcb->save.cr2 = vcpu->arch.cr2;
> >
> > clgi();
> > + kvm_load_guest_xcr0(vcpu);
> >
> > /*
> > * If this vCPU has touched SPEC_CTRL, restore the guest's value if
> > @@ -5766,6 +5767,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
> > if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
> > kvm_before_interrupt(&svm->vcpu);
> >
> > + kvm_put_guest_xcr0(vcpu);
> > stgi();
> >
> > /* Any pending NMI will happen here */
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index ab432a930ae8..3157598c52f1 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -6410,6 +6410,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
> > if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> > vmx_set_interrupt_shadow(vcpu, 0);
> >
> > + kvm_load_guest_xcr0(vcpu);
> > +
> > if (static_cpu_has(X86_FEATURE_PKU) &&
> > kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
> > vcpu->arch.pkru != vmx->host_pkru)
> > @@ -6506,6 +6508,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
> > __write_pkru(vmx->host_pkru);
> > }
> >
> > + kvm_put_guest_xcr0(vcpu);
> > +
> > vmx->nested.nested_run_pending = 0;
> > vmx->idt_vectoring_info = 0;
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 099b851dabaf..22f66e9a7dc5 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -800,7 +800,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
> > }
> > EXPORT_SYMBOL_GPL(kvm_lmsw);
> >
> > -static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
> > +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
> > {
> > if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
> > !vcpu->guest_xcr0_loaded) {
> > @@ -810,8 +810,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
> > vcpu->guest_xcr0_loaded = 1;
> > }
> > }
> > +EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
> >
> > -static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
> > +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
> > {
> > if (vcpu->guest_xcr0_loaded) {
> > if (vcpu->arch.xcr0 != host_xcr0)
> > @@ -819,6 +820,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
> > vcpu->guest_xcr0_loaded = 0;
> > }
> > }
> > +EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
> >
> > static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
> > {
> > @@ -7865,8 +7867,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > goto cancel_injection;
> > }
> >
> > - kvm_load_guest_xcr0(vcpu);
> > -
> > if (req_immediate_exit) {
> > kvm_make_request(KVM_REQ_EVENT, vcpu);
> > kvm_x86_ops->request_immediate_exit(vcpu);
> > @@ -7919,8 +7919,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > vcpu->mode = OUTSIDE_GUEST_MODE;
> > smp_wmb();
> >
> > - kvm_put_guest_xcr0(vcpu);
> > -
> > kvm_before_interrupt(vcpu);
> > kvm_x86_ops->handle_external_intr(vcpu);
> > kvm_after_interrupt(vcpu);
> > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> > index 28406aa1136d..aedc5d0d4989 100644
> > --- a/arch/x86/kvm/x86.h
> > +++ b/arch/x86/kvm/x86.h
> > @@ -347,4 +347,6 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
> > __this_cpu_write(current_vcpu, NULL);
> > }
> >
> > +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
> > +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
> > #endif
> >
>