Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752878AbcCGOQH (ORCPT ); Mon, 7 Mar 2016 09:16:07 -0500 Received: from mail-wm0-f66.google.com ([74.125.82.66]:34480 "EHLO mail-wm0-f66.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752825AbcCGOQA (ORCPT ); Mon, 7 Mar 2016 09:16:00 -0500 From: Paolo Bonzini To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org Cc: Takuya Yoshikawa , Xiao Guangrong Subject: [PATCH v2 0/9] cleanup around kvm_sync_page, and a few micro-optimizations Date: Mon, 7 Mar 2016 15:15:45 +0100 Message-Id: <1457360155-9610-1-git-send-email-pbonzini@redhat.com> X-Mailer: git-send-email 1.8.3.1 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 26107 Lines: 811 Having committed the ubsan fixes, this are the cleanups that are left. Compared to v1, I have fixed the patch to coalesce page zapping after mmu_sync_children (as requested by Takuya and Guangrong), and I have rewritten is_last_gpte again in an even simpler way. Paolo Paolo Bonzini (9): KVM: MMU: introduce kvm_mmu_flush_or_zap KVM: MMU: move TLB flush out of __kvm_sync_page KVM: MMU: use kvm_sync_page in kvm_sync_pages KVM: MMU: cleanup __kvm_sync_page and its callers KVM: MMU: invert return value of mmu.sync_page and *kvm_sync_page* KVM: MMU: move zap/flush to kvm_mmu_get_page KVM: MMU: coalesce more page zapping in mmu_sync_children KVM: MMU: simplify is_last_gpte KVM: MMU: micro-optimize gpte_access arch/x86/include/asm/kvm_host.h | 8 +- arch/x86/kvm/mmu.c | 167 ++++++++++++++++++++-------------------- arch/x86/kvm/paging_tmpl.h | 11 ++- 3 files changed, 92 insertions(+), 94 deletions(-) -- 1.8.3.1 >From 2e741e7bb4204c43adbabfa1b32a854cb3935140 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Feb 2016 11:21:55 +0100 Subject: [PATCH 1/9] KVM: MMU: introduce kvm_mmu_flush_or_zap This is a generalization of mmu_pte_write_flush_tlb, that also takes care of calling kvm_mmu_commit_zap_page. The next patches will introduce more uses. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 0a4dc9b54181..6dae2356b9f5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4188,11 +4188,14 @@ static bool need_remote_flush(u64 old, u64 new) return (old & ~new & PT64_PERM_MASK) != 0; } -static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, - bool remote_flush, bool local_flush) +static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, + struct list_head *invalid_list, + bool remote_flush, bool local_flush) { - if (zap_page) + if (!list_empty(invalid_list)) { + kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list); return; + } if (remote_flush) kvm_flush_remote_tlbs(vcpu->kvm); @@ -4320,7 +4323,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; - bool remote_flush, local_flush, zap_page; + bool remote_flush, local_flush; union kvm_mmu_page_role mask = { }; mask.cr0_wp = 1; @@ -4337,7 +4340,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return; - zap_page = remote_flush = local_flush = false; + remote_flush = local_flush = false; pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); @@ -4357,8 +4360,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { - zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, - &invalid_list); + kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_flooded; continue; } @@ -4380,8 +4382,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ++spte; } } - mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); - kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); + kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush); kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); spin_unlock(&vcpu->kvm->mmu_lock); } -- 1.8.3.1 >From c3c7e29240d21fd80d28ff26ec544e31beadff54 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Feb 2016 10:03:27 +0100 Subject: [PATCH 2/9] KVM: MMU: move TLB flush out of __kvm_sync_page By doing this, kvm_sync_pages can use __kvm_sync_page instead of reinventing it. Because of kvm_mmu_flush_or_zap, the code does not end up being more complex than before, and more cleanups to kvm_sync_pages will come in the next patches. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 53 ++++++++++++++++++++++++----------------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6dae2356b9f5..45a8a0605a09 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1932,10 +1932,24 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return 1; } - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } +static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, + struct list_head *invalid_list, + bool remote_flush, bool local_flush) +{ + if (!list_empty(invalid_list)) { + kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list); + return; + } + + if (remote_flush) + kvm_flush_remote_tlbs(vcpu->kvm); + else if (local_flush) + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); +} + static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { @@ -1943,8 +1957,7 @@ static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, int ret; ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); - if (ret) - kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); + kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, !ret); return ret; } @@ -1975,17 +1988,11 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); kvm_unlink_unsync_page(vcpu->kvm, s); - if ((s->role.cr4_pae != !!is_pae(vcpu)) || - (vcpu->arch.mmu.sync_page(vcpu, s))) { - kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); - continue; - } - flush = true; + if (!__kvm_sync_page(vcpu, s, &invalid_list, false)) + flush = true; } - kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); - if (flush) - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); } struct mmu_page_path { @@ -2071,6 +2078,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, while (mmu_unsync_walk(parent, &pages)) { bool protected = false; + bool flush = false; for_each_sp(pages, sp, parents, i) protected |= rmap_write_protect(vcpu, sp->gfn); @@ -2079,10 +2087,12 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, kvm_flush_remote_tlbs(vcpu->kvm); for_each_sp(pages, sp, parents, i) { - kvm_sync_page(vcpu, sp, &invalid_list); + if (!kvm_sync_page(vcpu, sp, &invalid_list)) + flush = true; + mmu_pages_clear_parents(&parents); } - kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); + kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); cond_resched_lock(&vcpu->kvm->mmu_lock); } } @@ -4188,21 +4198,6 @@ static bool need_remote_flush(u64 old, u64 new) return (old & ~new & PT64_PERM_MASK) != 0; } -static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, - struct list_head *invalid_list, - bool remote_flush, bool local_flush) -{ - if (!list_empty(invalid_list)) { - kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list); - return; - } - - if (remote_flush) - kvm_flush_remote_tlbs(vcpu->kvm); - else if (local_flush) - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); -} - static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, const u8 *new, int *bytes) { -- 1.8.3.1 >From b8029b35c6a75c7e388c07be035624394c2199bd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Feb 2016 10:19:30 +0100 Subject: [PATCH 3/9] KVM: MMU: use kvm_sync_page in kvm_sync_pages If the last argument is true, kvm_unlink_unsync_page is called anyway in __kvm_sync_page (either by kvm_mmu_prepare_zap_page or by __kvm_sync_page itself). Therefore, kvm_sync_pages can just call kvm_sync_page, instead of going through kvm_unlink_unsync_page+__kvm_sync_page. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 45a8a0605a09..56be33714036 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1987,8 +1987,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) continue; WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); - kvm_unlink_unsync_page(vcpu->kvm, s); - if (!__kvm_sync_page(vcpu, s, &invalid_list, false)) + if (!kvm_sync_page(vcpu, s, &invalid_list)) flush = true; } -- 1.8.3.1 >From dad20250ae48906ca78b318a93f2a8223743dcc8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Feb 2016 10:28:01 +0100 Subject: [PATCH 4/9] KVM: MMU: cleanup __kvm_sync_page and its callers Calling kvm_unlink_unsync_page in the middle of __kvm_sync_page makes things unnecessarily tricky. If kvm_mmu_prepare_zap_page is called, it will call kvm_unlink_unsync_page too. So kvm_unlink_unsync_page can be called just as well at the beginning or the end of __kvm_sync_page... which means that we might do it in kvm_sync_page too and remove the parameter. kvm_sync_page ends up being the same code that kvm_sync_pages used to have before the previous patch. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 56be33714036..88a1a79c869e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1917,16 +1917,13 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, /* @sp->gfn should be write-protected at the call site */ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - struct list_head *invalid_list, bool clear_unsync) + struct list_head *invalid_list) { if (sp->role.cr4_pae != !!is_pae(vcpu)) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); return 1; } - if (clear_unsync) - kvm_unlink_unsync_page(vcpu->kvm, sp); - if (vcpu->arch.mmu.sync_page(vcpu, sp)) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); return 1; @@ -1956,7 +1953,7 @@ static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, LIST_HEAD(invalid_list); int ret; - ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); + ret = __kvm_sync_page(vcpu, sp, &invalid_list); kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, !ret); return ret; @@ -1972,7 +1969,8 @@ static void mmu_audit_disable(void) { } static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { - return __kvm_sync_page(vcpu, sp, invalid_list, true); + kvm_unlink_unsync_page(vcpu->kvm, sp); + return __kvm_sync_page(vcpu, sp, invalid_list); } /* @gfn should be write-protected at the call site */ -- 1.8.3.1 >From 3283c4c561d7f5eca8144fa5b6a666df2ae00172 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Feb 2016 11:07:14 +0100 Subject: [PATCH 5/9] KVM: MMU: invert return value of mmu.sync_page and *kvm_sync_page* Return true if the page was synced (and the TLB must be flushed) and false if the page was zapped. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 31 ++++++++++++++----------------- arch/x86/kvm/paging_tmpl.h | 4 ++-- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 88a1a79c869e..1c87102efb3d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1784,7 +1784,7 @@ static void mark_unsync(u64 *spte) static int nonpaging_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - return 1; + return 0; } static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) @@ -1916,20 +1916,20 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, if ((_sp)->role.direct || (_sp)->role.invalid) {} else /* @sp->gfn should be write-protected at the call site */ -static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - struct list_head *invalid_list) +static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, + struct list_head *invalid_list) { if (sp->role.cr4_pae != !!is_pae(vcpu)) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); - return 1; + return false; } - if (vcpu->arch.mmu.sync_page(vcpu, sp)) { + if (vcpu->arch.mmu.sync_page(vcpu, sp) == 0) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); - return 1; + return false; } - return 0; + return true; } static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, @@ -1947,14 +1947,14 @@ static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } -static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp) +static bool kvm_sync_page_transient(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp) { LIST_HEAD(invalid_list); int ret; ret = __kvm_sync_page(vcpu, sp, &invalid_list); - kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, !ret); + kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, ret); return ret; } @@ -1966,7 +1966,7 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { } static void mmu_audit_disable(void) { } #endif -static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, +static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { kvm_unlink_unsync_page(vcpu->kvm, sp); @@ -1985,8 +1985,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) continue; WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); - if (!kvm_sync_page(vcpu, s, &invalid_list)) - flush = true; + flush |= kvm_sync_page(vcpu, s, &invalid_list); } kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); @@ -2084,9 +2083,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, kvm_flush_remote_tlbs(vcpu->kvm); for_each_sp(pages, sp, parents, i) { - if (!kvm_sync_page(vcpu, sp, &invalid_list)) - flush = true; - + flush |= kvm_sync_page(vcpu, sp, &invalid_list); mmu_pages_clear_parents(&parents); } kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); @@ -2145,7 +2142,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (sp->role.word != role.word) continue; - if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) + if (sp->unsync && !kvm_sync_page_transient(vcpu, sp)) break; if (sp->unsync_children) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4174cf290fa3..a1f5459edcec 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -943,7 +943,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte, sizeof(pt_element_t))) - return -EINVAL; + return 0; if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { vcpu->kvm->tlbs_dirty++; @@ -975,7 +975,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) host_writable); } - return !nr_present; + return nr_present; } #undef pt_element_t -- 1.8.3.1 >From 298603ce19f76347c3416fe2e730c4bc577a9675 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Feb 2016 11:26:10 +0100 Subject: [PATCH 6/9] KVM: MMU: move zap/flush to kvm_mmu_get_page kvm_mmu_get_page is the only caller of kvm_sync_page_transient and kvm_sync_pages. Moving the handling of the invalid_list there removes the need for the underdocumented kvm_sync_page_transient function. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1c87102efb3d..fecc9c51d924 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1947,18 +1947,6 @@ static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } -static bool kvm_sync_page_transient(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp) -{ - LIST_HEAD(invalid_list); - int ret; - - ret = __kvm_sync_page(vcpu, sp, &invalid_list); - kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, ret); - - return ret; -} - #ifdef CONFIG_KVM_MMU_AUDIT #include "mmu_audit.c" #else @@ -1974,21 +1962,21 @@ static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, } /* @gfn should be write-protected at the call site */ -static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) +static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, + struct list_head *invalid_list) { struct kvm_mmu_page *s; - LIST_HEAD(invalid_list); - bool flush = false; + bool ret = false; for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { if (!s->unsync) continue; WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); - flush |= kvm_sync_page(vcpu, s, &invalid_list); + ret |= kvm_sync_page(vcpu, s, invalid_list); } - kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); + return ret; } struct mmu_page_path { @@ -2119,6 +2107,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, unsigned quadrant; struct kvm_mmu_page *sp; bool need_sync = false; + bool flush = false; + LIST_HEAD(invalid_list); role = vcpu->arch.mmu.base_role; role.level = level; @@ -2142,8 +2132,16 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (sp->role.word != role.word) continue; - if (sp->unsync && !kvm_sync_page_transient(vcpu, sp)) - break; + if (sp->unsync) { + /* The page is good, but __kvm_sync_page might still end + * up zapping it. If so, break in order to rebuild it. + */ + if (!__kvm_sync_page(vcpu, sp, &invalid_list)) + break; + + WARN_ON(!list_empty(&invalid_list)); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + } if (sp->unsync_children) kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); @@ -2173,11 +2171,13 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, kvm_flush_remote_tlbs(vcpu->kvm); if (level > PT_PAGE_TABLE_LEVEL && need_sync) - kvm_sync_pages(vcpu, gfn); + flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); } sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; clear_page(sp->spt); trace_kvm_mmu_get_page(sp, true); + + kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); return sp; } -- 1.8.3.1 >From a824edfa6b6d919b8a47c077fe9a3f26e6987041 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 25 Feb 2016 10:47:38 +0100 Subject: [PATCH 7/9] KVM: MMU: coalesce more page zapping in mmu_sync_children mmu_sync_children can only process up to 16 pages at a time. Check if we need to reschedule, and do not bother zapping the pages until that happens. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1dbef19867e4..2463de0b935c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2059,24 +2059,31 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, struct mmu_page_path parents; struct kvm_mmu_pages pages; LIST_HEAD(invalid_list); + bool flush = false; while (mmu_unsync_walk(parent, &pages)) { bool protected = false; - bool flush = false; for_each_sp(pages, sp, parents, i) protected |= rmap_write_protect(vcpu, sp->gfn); - if (protected) + if (protected) { kvm_flush_remote_tlbs(vcpu->kvm); + flush = false; + } for_each_sp(pages, sp, parents, i) { flush |= kvm_sync_page(vcpu, sp, &invalid_list); mmu_pages_clear_parents(&parents); } - kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); - cond_resched_lock(&vcpu->kvm->mmu_lock); + if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) { + kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); + cond_resched_lock(&vcpu->kvm->mmu_lock); + flush = false; + } } + + kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); } static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) -- 1.8.3.1 >From 3cf42eeae76b93529bd0d768c64a10e8db9817b3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Feb 2016 12:51:19 +0100 Subject: [PATCH 8/9] KVM: MMU: simplify is_last_gpte Branch-free code is fun and everybody knows how much Avi loves it, but is_last_gpte takes it a bit to the extreme. Since the code is simply doing a range check, like (level == 1 || ((gpte & PT_PAGE_SIZE_MASK) && level < N) we can make it branch-free without storing the entire truth table; it is enough to cache N. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 8 ++----- arch/x86/kvm/mmu.c | 50 +++++++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1c3e390993a2..d110dc44d6c2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -347,12 +347,8 @@ struct kvm_mmu { struct rsvd_bits_validate guest_rsvd_check; - /* - * Bitmap: bit set = last pte in walk - * index[0:1]: level (zero-based) - * index[2]: pte.ps - */ - u8 last_pte_bitmap; + /* Can have large pages at levels 2..last_nonleaf_level-1. */ + u8 last_nonleaf_level; bool nx; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fecc9c51d924..1dbef19867e4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3625,13 +3625,24 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, return false; } -static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) +static inline bool is_last_gpte(struct kvm_mmu *mmu, + unsigned level, unsigned gpte) { - unsigned index; + /* + * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set + * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means + * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. + */ + gpte |= level - PT_PAGE_TABLE_LEVEL - 1; - index = level - 1; - index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2); - return mmu->last_pte_bitmap & (1 << index); + /* + * The RHS has bit 7 set iff level < mmu->last_nonleaf_level. + * If it is clear, there are no large pages at this level, so clear + * PT_PAGE_SIZE_MASK in gpte if that is the case. + */ + gpte &= level - mmu->last_nonleaf_level; + + return gpte & PT_PAGE_SIZE_MASK; } #define PTTYPE_EPT 18 /* arbitrary */ @@ -3903,22 +3914,13 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, } } -static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) +static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { - u8 map; - unsigned level, root_level = mmu->root_level; - const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */ - - if (root_level == PT32E_ROOT_LEVEL) - --root_level; - /* PT_PAGE_TABLE_LEVEL always terminates */ - map = 1 | (1 << ps_set_index); - for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) { - if (level <= PT_PDPE_LEVEL - && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu))) - map |= 1 << (ps_set_index | (level - 1)); - } - mmu->last_pte_bitmap = map; + unsigned root_level = mmu->root_level; + + mmu->last_nonleaf_level = root_level; + if (root_level == PT32_ROOT_LEVEL && is_pse(vcpu)) + mmu->last_nonleaf_level++; } static void paging64_init_context_common(struct kvm_vcpu *vcpu, @@ -3930,7 +3932,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, reset_rsvds_bits_mask(vcpu, context); update_permission_bitmask(vcpu, context, false); - update_last_pte_bitmap(vcpu, context); + update_last_nonleaf_level(vcpu, context); MMU_WARN_ON(!is_pae(vcpu)); context->page_fault = paging64_page_fault; @@ -3957,7 +3959,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu, reset_rsvds_bits_mask(vcpu, context); update_permission_bitmask(vcpu, context, false); - update_last_pte_bitmap(vcpu, context); + update_last_nonleaf_level(vcpu, context); context->page_fault = paging32_page_fault; context->gva_to_gpa = paging32_gva_to_gpa; @@ -4015,7 +4017,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) } update_permission_bitmask(vcpu, context, false); - update_last_pte_bitmap(vcpu, context); + update_last_nonleaf_level(vcpu, context); reset_tdp_shadow_zero_bits_mask(vcpu, context); } @@ -4121,7 +4123,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) } update_permission_bitmask(vcpu, g_context, false); - update_last_pte_bitmap(vcpu, g_context); + update_last_nonleaf_level(vcpu, g_context); } static void init_kvm_mmu(struct kvm_vcpu *vcpu) -- 1.8.3.1 >From 30fe56cdbd78a25a263d16e88083e6c8f797ce51 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Feb 2016 14:19:20 +0100 Subject: [PATCH 9/9] KVM: MMU: micro-optimize gpte_access Avoid AND-NOT, most x86 processor lack an instruction for it. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/paging_tmpl.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a1f5459edcec..6013f3685ef4 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -189,8 +189,11 @@ static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | ACC_USER_MASK; #else - access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; - access &= ~(gpte >> PT64_NX_SHIFT); + BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK); + BUILD_BUG_ON(ACC_EXEC_MASK != 1); + access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK); + /* Combine NX with P (which is set here) to get ACC_EXEC_MASK. */ + access ^= (gpte >> PT64_NX_SHIFT); #endif return access; -- 1.8.3.1