Date: Fri, 24 Apr 2015 06:57:24 +0800
From: Wanpeng Li <wanpeng.li@linux.intel.com>
To: Liang Li <liang.z.li@intel.com>
Cc: Rik van Riel <riel@redhat.com>, kvm@vger.kernel.org,
        linux-kernel@vger.kernel.org, gleb@kernel.org, pbonzini@redhat.com,
        mtosatt@redhat.com, tglx@linutronix.de, mingo@redhat.com,
        hpa@zytor.com, x86@kernel.org, joro@8bytes.org, yang.z.zhang@intel.com,
        Liang Li <liang.z.li@intel.com>, Xudong Hao <xudong.hao@intel.com>
Subject: Re: [v6] kvm/fpu: Enable fully eager restore kvm FPU
Message-ID: <20150423225724.GA5763@kernel>
Reply-To: Wanpeng Li <wanpeng.li@linux.intel.com>
References: <1429823583-3226-1-git-send-email-liang.z.li@intel.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
In-Reply-To: <1429823583-3226-1-git-send-email-liang.z.li@intel.com>
User-Agent: Mutt/1.5.21 (2010-09-15)
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 11478
Lines: 321

Cc Rik, who is doing the similar work. :)
On Fri, Apr 24, 2015 at 05:13:03AM +0800, Liang Li wrote:
>Romove lazy FPU logic and use eager FPU entirely. Eager FPU does
>not have performance regression, and it can simplify the code.
>
>When compiling kernel on westmere, the performance of eager FPU
>is about 0.4% faster than lazy FPU.
>
>Signed-off-by: Liang Li <liang.z.li@intel.com>
>Signed-off-by: Xudong Hao <xudong.hao@intel.com>
>---
> arch/x86/include/asm/kvm_host.h |  1 -
> arch/x86/kvm/svm.c              | 22 ++----------
> arch/x86/kvm/vmx.c              | 74 +++--------------------------------------
> arch/x86/kvm/x86.c              |  8 +----
> include/linux/kvm_host.h        |  2 --
> 5 files changed, 9 insertions(+), 98 deletions(-)
>
>diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>index dea2e7e..5d84cc9 100644
>--- a/arch/x86/include/asm/kvm_host.h
>+++ b/arch/x86/include/asm/kvm_host.h
>@@ -743,7 +743,6 @@ struct kvm_x86_ops {
> 	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
> 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
> 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
>-	void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
> 
> 	void (*tlb_flush)(struct kvm_vcpu *vcpu);
> 
>diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>index ce741b8..1b3b29b 100644
>--- a/arch/x86/kvm/svm.c
>+++ b/arch/x86/kvm/svm.c
>@@ -1087,7 +1087,6 @@ static void init_vmcb(struct vcpu_svm *svm)
> 	struct vmcb_control_area *control = &svm->vmcb->control;
> 	struct vmcb_save_area *save = &svm->vmcb->save;
> 
>-	svm->vcpu.fpu_active = 1;
> 	svm->vcpu.arch.hflags = 0;
> 
> 	set_cr_intercept(svm, INTERCEPT_CR0_READ);
>@@ -1529,15 +1528,12 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
> 	ulong gcr0 = svm->vcpu.arch.cr0;
> 	u64 *hcr0 = &svm->vmcb->save.cr0;
> 
>-	if (!svm->vcpu.fpu_active)
>-		*hcr0 |= SVM_CR0_SELECTIVE_MASK;
>-	else
>-		*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
>-			| (gcr0 & SVM_CR0_SELECTIVE_MASK);
>+	*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
>+		| (gcr0 & SVM_CR0_SELECTIVE_MASK);
> 
> 	mark_dirty(svm->vmcb, VMCB_CR);
> 
>-	if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
>+	if (gcr0 == *hcr0) {
> 		clr_cr_intercept(svm, INTERCEPT_CR0_READ);
> 		clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
> 	} else {
>@@ -1568,8 +1564,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> 	if (!npt_enabled)
> 		cr0 |= X86_CR0_PG | X86_CR0_WP;
> 
>-	if (!vcpu->fpu_active)
>-		cr0 |= X86_CR0_TS;
> 	/*
> 	 * re-enable caching here because the QEMU bios
> 	 * does not do it - this results in some delay at
>@@ -1795,7 +1789,6 @@ static void svm_fpu_activate(struct kvm_vcpu *vcpu)
> 
> 	clr_exception_intercept(svm, NM_VECTOR);
> 
>-	svm->vcpu.fpu_active = 1;
> 	update_cr0_intercept(svm);
> }
> 
>@@ -4139,14 +4132,6 @@ static bool svm_has_wbinvd_exit(void)
> 	return true;
> }
> 
>-static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
>-{
>-	struct vcpu_svm *svm = to_svm(vcpu);
>-
>-	set_exception_intercept(svm, NM_VECTOR);
>-	update_cr0_intercept(svm);
>-}
>-
> #define PRE_EX(exit)  { .exit_code = (exit), \
> 			.stage = X86_ICPT_PRE_EXCEPT, }
> #define POST_EX(exit) { .exit_code = (exit), \
>@@ -4381,7 +4366,6 @@ static struct kvm_x86_ops svm_x86_ops = {
> 	.cache_reg = svm_cache_reg,
> 	.get_rflags = svm_get_rflags,
> 	.set_rflags = svm_set_rflags,
>-	.fpu_deactivate = svm_fpu_deactivate,
> 
> 	.tlb_flush = svm_flush_tlb,
> 
>diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>index f5e8dce..811a666 100644
>--- a/arch/x86/kvm/vmx.c
>+++ b/arch/x86/kvm/vmx.c
>@@ -1567,7 +1567,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
> 	u32 eb;
> 
> 	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
>-	     (1u << NM_VECTOR) | (1u << DB_VECTOR);
>+	     (1u << DB_VECTOR);
> 	if ((vcpu->guest_debug &
> 	     (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
> 	    (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
>@@ -1576,8 +1576,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
> 		eb = ~0;
> 	if (enable_ept)
> 		eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
>-	if (vcpu->fpu_active)
>-		eb &= ~(1u << NM_VECTOR);
> 
> 	/* When we are running a nested L2 guest and L1 specified for it a
> 	 * certain exception bitmap, we must trap the same exceptions and pass
>@@ -1961,9 +1959,6 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
> {
> 	ulong cr0;
> 
>-	if (vcpu->fpu_active)
>-		return;
>-	vcpu->fpu_active = 1;
> 	cr0 = vmcs_readl(GUEST_CR0);
> 	cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
> 	cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
>@@ -1994,33 +1989,6 @@ static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
> 		(fields->cr4_read_shadow & fields->cr4_guest_host_mask);
> }
> 
>-static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
>-{
>-	/* Note that there is no vcpu->fpu_active = 0 here. The caller must
>-	 * set this *before* calling this function.
>-	 */
>-	vmx_decache_cr0_guest_bits(vcpu);
>-	vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
>-	update_exception_bitmap(vcpu);
>-	vcpu->arch.cr0_guest_owned_bits = 0;
>-	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
>-	if (is_guest_mode(vcpu)) {
>-		/*
>-		 * L1's specified read shadow might not contain the TS bit,
>-		 * so now that we turned on shadowing of this bit, we need to
>-		 * set this bit of the shadow. Like in nested_vmx_run we need
>-		 * nested_read_cr0(vmcs12), but vmcs12->guest_cr0 is not yet
>-		 * up-to-date here because we just decached cr0.TS (and we'll
>-		 * only update vmcs12->guest_cr0 on nested exit).
>-		 */
>-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>-		vmcs12->guest_cr0 = (vmcs12->guest_cr0 & ~X86_CR0_TS) |
>-			(vcpu->arch.cr0 & X86_CR0_TS);
>-		vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
>-	} else
>-		vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
>-}
>-
> static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
> {
> 	unsigned long rflags, save_rflags;
>@@ -3575,9 +3543,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> 	if (enable_ept)
> 		ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
> 
>-	if (!vcpu->fpu_active)
>-		hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
>-
> 	vmcs_writel(CR0_READ_SHADOW, cr0);
> 	vmcs_writel(GUEST_CR0, hw_cr0);
> 	vcpu->arch.cr0 = cr0;
>@@ -5066,11 +5031,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
> 	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
> 		return 1;  /* already handled by vmx_vcpu_run() */
> 
>-	if (is_no_device(intr_info)) {
>-		vmx_fpu_activate(vcpu);
>-		return 1;
>-	}
>-
> 	if (is_invalid_opcode(intr_info)) {
> 		if (is_guest_mode(vcpu)) {
> 			kvm_queue_exception(vcpu, UD_VECTOR);
>@@ -5263,22 +5223,6 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
> 		return kvm_set_cr4(vcpu, val);
> }
> 
>-/* called to set cr0 as approriate for clts instruction exit. */
>-static void handle_clts(struct kvm_vcpu *vcpu)
>-{
>-	if (is_guest_mode(vcpu)) {
>-		/*
>-		 * We get here when L2 did CLTS, and L1 didn't shadow CR0.TS
>-		 * but we did (!fpu_active). We need to keep GUEST_CR0.TS on,
>-		 * just pretend it's off (also in arch.cr0 for fpu_activate).
>-		 */
>-		vmcs_writel(CR0_READ_SHADOW,
>-			vmcs_readl(CR0_READ_SHADOW) & ~X86_CR0_TS);
>-		vcpu->arch.cr0 &= ~X86_CR0_TS;
>-	} else
>-		vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
>-}
>-
> static int handle_cr(struct kvm_vcpu *vcpu)
> {
> 	unsigned long exit_qualification, val;
>@@ -5321,10 +5265,6 @@ static int handle_cr(struct kvm_vcpu *vcpu)
> 		}
> 		break;
> 	case 2: /* clts */
>-		handle_clts(vcpu);
>-		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
>-		skip_emulated_instruction(vcpu);
>-		vmx_fpu_activate(vcpu);
> 		return 1;
> 	case 1: /*mov from cr*/
> 		switch (cr) {
>@@ -9856,19 +9796,16 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
> 	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
> 	vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
> 	/*
>-	 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
>-	 * actually changed, because it depends on the current state of
>-	 * fpu_active (which may have changed).
> 	 * Note that vmx_set_cr0 refers to efer set above.
> 	 */
> 	vmx_set_cr0(vcpu, vmcs12->host_cr0);
> 	/*
>-	 * If we did fpu_activate()/fpu_deactivate() during L2's run, we need
>-	 * to apply the same changes to L1's vmcs. We just set cr0 correctly,
>-	 * but we also need to update cr0_guest_host_mask and exception_bitmap.
>+	 * If we did fpu_activate() during L2's run, we need to apply the same
>+	 * changes to L1's vmcs. We just set cr0 correctly, but we also need
>+	 * to update cr0_guest_host_mask and exception_bitmap.
> 	 */
> 	update_exception_bitmap(vcpu);
>-	vcpu->arch.cr0_guest_owned_bits = (vcpu->fpu_active ? X86_CR0_TS : 0);
>+	vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
> 	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
> 
> 	/*
>@@ -10177,7 +10114,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
> 	.cache_reg = vmx_cache_reg,
> 	.get_rflags = vmx_get_rflags,
> 	.set_rflags = vmx_set_rflags,
>-	.fpu_deactivate = vmx_fpu_deactivate,
> 
> 	.tlb_flush = vmx_flush_tlb,
> 
>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index e1a8126..f7597fe 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -6236,10 +6236,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> 			r = 0;
> 			goto out;
> 		}
>-		if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
>-			vcpu->fpu_active = 0;
>-			kvm_x86_ops->fpu_deactivate(vcpu);
>-		}
> 		if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
> 			/* Page is swapped out. Do synthetic halt */
> 			vcpu->arch.apf.halted = true;
>@@ -6296,8 +6292,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> 	preempt_disable();
> 
> 	kvm_x86_ops->prepare_guest_switch(vcpu);
>-	if (vcpu->fpu_active)
>-		kvm_load_guest_fpu(vcpu);
>+	kvm_load_guest_fpu(vcpu);
> 	kvm_load_guest_xcr0(vcpu);
> 
> 	vcpu->mode = IN_GUEST_MODE;
>@@ -7038,7 +7033,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
> 	fpu_save_init(&vcpu->arch.guest_fpu);
> 	__kernel_fpu_end();
> 	++vcpu->stat.fpu_reload;
>-	kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
> 	trace_kvm_fpu(0);
> }
> 
>diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>index 82af5d0..c82f798 100644
>--- a/include/linux/kvm_host.h
>+++ b/include/linux/kvm_host.h
>@@ -118,7 +118,6 @@ static inline bool is_error_page(struct page *page)
> #define KVM_REQ_MMU_SYNC           7
> #define KVM_REQ_CLOCK_UPDATE       8
> #define KVM_REQ_KICK               9
>-#define KVM_REQ_DEACTIVATE_FPU    10
> #define KVM_REQ_EVENT             11
> #define KVM_REQ_APF_HALT          12
> #define KVM_REQ_STEAL_UPDATE      13
>@@ -228,7 +227,6 @@ struct kvm_vcpu {
> 	struct mutex mutex;
> 	struct kvm_run *run;
> 
>-	int fpu_active;
> 	int guest_fpu_loaded, guest_xcr0_loaded;
> 	wait_queue_head_t wq;
> 	struct pid *pid;
>-- 
>1.9.1
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/