2017-11-10 09:52:11

by Wanpeng Li

[permalink] [raw]
Subject: [PATCH v3 0/4] KVM: Paravirt remote TLB flush

Remote flushing api's does a busy wait which is fine in bare-metal
scenario. But with-in the guest, the vcpus might have been pre-empted
or blocked. In this scenario, the initator vcpu would end up
busy-waiting for a long amount of time.

This patch set implements para-virt flush tlbs making sure that it
does not wait for vcpus that are sleeping. And all the sleeping vcpus
flush the tlb on guest enter. Idea was discussed here:
https://lkml.org/lkml/2012/2/20/157

The best result is achieved when we're overcommiting the host by running
multiple vCPUs on each pCPU. In this case PV tlb flush avoids touching
vCPUs which are not scheduled and avoid the wait on the main CPU.

In addition, thanks for commit 9e52fc2b50d ("x86/mm: Enable RCU based
page table freeing (CONFIG_HAVE_RCU_TABLE_FREE=y)")

Test on a Haswell i7 desktop 4 cores (2HT), so 8 pCPUs, running ebizzy in
one linux guest.

ebizzy -M
vanilla optimized boost
8 vCPUs 10152 10083 -0.68%
16 vCPUs 1224 4866 297.5%
24 vCPUs 1109 3871 249%
32 vCPUs 1025 3375 229.3%

Note: The patchset is rebased against "locking/qspinlock/x86: Avoid
test-and-set when PV_DEDICATED is set" v3

v2 -> v3:
* percpu cpumask

v1 -> v2:
* a new CPUID feature bit
* fix cmpxchg check
* use kvm_vcpu_flush_tlb() to get the statistics right
* just OR the KVM_VCPU_PREEMPTED in kvm_steal_time_set_preempted
* add a new bool argument to kvm_x86_ops->tlb_flush
* __cpumask_clear_cpu() instead of cpumask_clear_cpu()
* not put cpumask_t on stack
* rebase the patchset against "locking/qspinlock/x86: Avoid
test-and-set when PV_DEDICATED is set" v3

Wanpeng Li (4):
KVM: Add vCPU running/preempted state
KVM: Add paravirt remote TLB flush
KVM: X86: introduce invalidate_gpa argument to tlb flush
KVM: Add flush_on_enter before guest enter

Documentation/virtual/kvm/cpuid.txt | 10 +++++++++
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/include/uapi/asm/kvm_para.h | 6 +++++
arch/x86/kernel/kvm.c | 43 ++++++++++++++++++++++++++++++++++--
arch/x86/kvm/cpuid.c | 3 ++-
arch/x86/kvm/svm.c | 14 ++++++------
arch/x86/kvm/vmx.c | 21 +++++++++---------
arch/x86/kvm/x86.c | 24 ++++++++++++--------
8 files changed, 93 insertions(+), 30 deletions(-)

--
2.7.4


From 1583662152621415653@xxx Fri Nov 10 07:12:00 +0000 2017
X-GM-THRID: 1583661782195795507
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread


2017-11-10 09:51:02

by Wanpeng Li

[permalink] [raw]
Subject: [PATCH v3 3/4] KVM: X86: introduce invalidate_gpa argument to tlb flush

From: Wanpeng Li <[email protected]>

Introduce a new bool invalidate_gpa argument to kvm_x86_ops->tlb_flush,
it will be used by later patches to just flush guest tlb.

Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/svm.c | 14 +++++++-------
arch/x86/kvm/vmx.c | 21 +++++++++++----------
arch/x86/kvm/x86.c | 6 +++---
4 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c73e493..b4f7bb1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -952,7 +952,7 @@ struct kvm_x86_ops {
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);

- void (*tlb_flush)(struct kvm_vcpu *vcpu);
+ void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);

void (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0e68f0b..efaf95f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -285,7 +285,7 @@ static int vgif = true;
module_param(vgif, int, 0444);

static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-static void svm_flush_tlb(struct kvm_vcpu *vcpu);
+static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
static void svm_complete_interrupts(struct vcpu_svm *svm);

static int nested_svm_exit_handled(struct vcpu_svm *svm);
@@ -2032,7 +2032,7 @@ static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;

if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);

vcpu->arch.cr4 = cr4;
if (!npt_enabled)
@@ -2368,7 +2368,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,

svm->vmcb->control.nested_cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_NPT);
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}

static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
@@ -3033,7 +3033,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
svm->nested.intercept = nested_vmcb->control.intercept;

- svm_flush_tlb(&svm->vcpu);
+ svm_flush_tlb(&svm->vcpu, true);
svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
@@ -4755,7 +4755,7 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}

-static void svm_flush_tlb(struct kvm_vcpu *vcpu)
+static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
struct vcpu_svm *svm = to_svm(vcpu);

@@ -5046,7 +5046,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)

svm->vmcb->save.cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_CR);
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}

static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
@@ -5060,7 +5060,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
mark_dirty(svm->vmcb, VMCB_CR);

- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}

static int is_disabled(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e5bea5e..17d13d2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4113,9 +4113,10 @@ static void exit_lmode(struct kvm_vcpu *vcpu)

#endif

-static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
+static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
+ bool invalidate_gpa)
{
- if (enable_ept) {
+ if (enable_ept && (invalidate_gpa || !enable_vpid)) {
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
@@ -4124,15 +4125,15 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
}
}

-static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
+ __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
}

static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
{
if (enable_ept)
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}

static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
@@ -4330,7 +4331,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
ept_load_pdptrs(vcpu);
}

- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
vmcs_writel(GUEST_CR3, guest_cr3);
}

@@ -7962,7 +7963,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}

- __vmx_flush_tlb(vcpu, vmx->nested.vpid02);
+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
nested_vmx_succeed(vcpu);

return kvm_skip_emulated_instruction(vcpu);
@@ -10614,11 +10615,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
+ __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true);
}
} else {
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}

}
@@ -11324,7 +11325,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* L1's vpid. TODO: move to a more elaborate solution, giving
* each L2 its own vpid and exposing the vpid feature to L1.
*/
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
/* Restore posted intr vector. */
if (nested_cpu_has_posted_intr(vmcs12))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 46d4158..2b2cc99 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6737,10 +6737,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
}

-static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
++vcpu->stat.tlb_flush;
- kvm_x86_ops->tlb_flush(vcpu);
+ kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
}

void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
@@ -6797,7 +6797,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- kvm_vcpu_flush_tlb(vcpu);
+ kvm_vcpu_flush_tlb(vcpu, true);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
--
2.7.4


From 1583661793844982516@xxx Fri Nov 10 07:06:18 +0000 2017
X-GM-THRID: 1583661793844982516
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread