2019-07-22 04:27:03

by Wanpeng Li

[permalink] [raw]
Subject: [PATCH 1/2] KVM: X86: Fix fpu state crash in kvm guest

From: Wanpeng Li <[email protected]>

The idea before commit 240c35a37 was that we have the following FPU states:

userspace (QEMU) guest
---------------------------------------------------------------------------
processor vcpu->arch.guest_fpu
>>> KVM_RUN: kvm_load_guest_fpu
vcpu->arch.user_fpu processor
>>> preempt out
vcpu->arch.user_fpu current->thread.fpu
>>> preempt in
vcpu->arch.user_fpu processor
>>> back to userspace
>>> kvm_put_guest_fpu
processor vcpu->arch.guest_fpu
---------------------------------------------------------------------------

With the new lazy model we want to get the state back to the processor
when schedule in from current->thread.fpu.

Reported-by: Thomas Lambertz <[email protected]>
Reported-by: anthony <[email protected]>
Tested-by: anthony <[email protected]>
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Cc: Thomas Lambertz <[email protected]>
Cc: anthony <[email protected]>
Cc: [email protected]
Fixes: 5f409e20b (x86/fpu: Defer FPU state load until return to userspace)
Signed-off-by: Wanpeng Li <[email protected]>
---
arch/x86/kvm/x86.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cf2afdf..bdcd250 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3306,6 +3306,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)

kvm_x86_ops->vcpu_load(vcpu, cpu);

+ fpregs_assert_state_consistent();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+
/* Apply any externally detected TSC adjustments (due to suspend) */
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
@@ -7990,9 +7994,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
trace_kvm_entry(vcpu->vcpu_id);
guest_enter_irqoff();

- fpregs_assert_state_consistent();
- if (test_thread_flag(TIF_NEED_FPU_LOAD))
- switch_fpu_return();
+ WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD));

if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
--
2.7.4


2019-07-22 04:28:47

by Wanpeng Li

[permalink] [raw]
Subject: [PATCH 2/2] KVM: X86: Dynamically allocate user_fpu

From: Wanpeng Li <[email protected]>

After reverting commit 240c35a3783a (kvm: x86: Use task structs fpu field
for user), struct kvm_vcpu is 19456 bytes on my server, PAGE_ALLOC_COSTLY_ORDER(3)
is the order at which allocations are deemed costly to service. In serveless
scenario, one host can service hundreds/thoudands firecracker/kata-container
instances, howerver, new instance will fail to launch after memory is too
fragmented to allocate kvm_vcpu struct on host, this was observed in some
cloud provider product environments.

This patch dynamically allocates user_fpu, kvm_vcpu is 15168 bytes now on my
Skylake server.

Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/svm.c | 13 ++++++++++++-
arch/x86/kvm/vmx/vmx.c | 13 ++++++++++++-
arch/x86/kvm/x86.c | 4 ++--
4 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4f938ac..7b0a4ee 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -616,7 +616,7 @@ struct kvm_vcpu_arch {
* "guest_fpu" state here contains the guest FPU context, with the
* host PRKU bits.
*/
- struct fpu user_fpu;
+ struct fpu *user_fpu;
struct fpu *guest_fpu;

u64 xcr0;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 19f69df..7eafc69 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2143,12 +2143,20 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
goto out;
}

+ svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
+ GFP_KERNEL_ACCOUNT);
+ if (!svm->vcpu.arch.user_fpu) {
+ printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
+ err = -ENOMEM;
+ goto free_partial_svm;
+ }
+
svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
GFP_KERNEL_ACCOUNT);
if (!svm->vcpu.arch.guest_fpu) {
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
err = -ENOMEM;
- goto free_partial_svm;
+ goto free_user_fpu;
}

err = kvm_vcpu_init(&svm->vcpu, kvm, id);
@@ -2211,6 +2219,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
kvm_vcpu_uninit(&svm->vcpu);
free_svm:
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
+free_user_fpu:
+ kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
free_partial_svm:
kmem_cache_free(kvm_vcpu_cache, svm);
out:
@@ -2241,6 +2251,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
kmem_cache_free(kvm_vcpu_cache, svm);
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a279447..074385c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6598,6 +6598,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
free_loaded_vmcs(vmx->loaded_vmcs);
kfree(vmx->guest_msrs);
kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
kmem_cache_free(kvm_vcpu_cache, vmx);
}
@@ -6613,12 +6614,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (!vmx)
return ERR_PTR(-ENOMEM);

+ vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
+ GFP_KERNEL_ACCOUNT);
+ if (!vmx->vcpu.arch.user_fpu) {
+ printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
+ err = -ENOMEM;
+ goto free_partial_vcpu;
+ }
+
vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
GFP_KERNEL_ACCOUNT);
if (!vmx->vcpu.arch.guest_fpu) {
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
err = -ENOMEM;
- goto free_partial_vcpu;
+ goto free_user_fpu;
}

vmx->vpid = allocate_vpid();
@@ -6721,6 +6730,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
free_vcpu:
free_vpid(vmx->vpid);
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
+free_user_fpu:
+ kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
free_partial_vcpu:
kmem_cache_free(kvm_vcpu_cache, vmx);
return ERR_PTR(err);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bdcd250..09dbc93 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8272,7 +8272,7 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
fpregs_lock();

- copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
+ copy_fpregs_to_fpstate(vcpu->arch.user_fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
@@ -8289,7 +8289,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
fpregs_lock();

copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
- copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+ copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);

fpregs_mark_activate();
fpregs_unlock();
--
2.7.4