LinuxLists.cc - [PATCH 1/7] KVM: MMU: rename 'page' and 'shadow

2010-06-11 13:31:50

Subject: [PATCH 1/7] KVM: MMU: rename 'page' and 'shadow_page' to 'sp'

Rename 'page' and 'shadow_page' to 'sp' to better fit the context

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/paging_tmpl.h | 22 +++++++++++-----------
1 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6cd318d..8d00bb2 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -253,7 +253,7 @@ err:
return 0;
}

-static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
+static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte)
{
pt_element_t gpte;
@@ -264,7 +264,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
gpte = *(const pt_element_t *)pte;
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
if (!is_present_gpte(gpte)) {
- if (page->unsync)
+ if (sp->unsync)
new_spte = shadow_trap_nonpresent_pte;
else
new_spte = shadow_notrap_nonpresent_pte;
@@ -273,7 +273,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
return;
}
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
- pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
+ pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
return;
pfn = vcpu->arch.update_pte.pfn;
@@ -286,7 +286,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
* we call mmu_set_spte() with reset_host_protection = true beacuse that
* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
*/
- mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
+ mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
gpte_to_gfn(gpte), pfn, true, true);
}
@@ -300,7 +300,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
int *ptwrite, pfn_t pfn)
{
unsigned access = gw->pt_access;
- struct kvm_mmu_page *shadow_page;
+ struct kvm_mmu_page *sp;
u64 spte, *sptep = NULL;
int direct;
gfn_t table_gfn;
@@ -341,30 +341,30 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
access &= ~ACC_WRITE_MASK;
/*
* It is a large guest pages backed by small host pages,
- * So we set @direct(@shadow_page->role.direct)=1, and
- * set @table_gfn(@shadow_page->gfn)=the base page frame
- * for linear translations.
+ * So we set @direct(@sp->role.direct)=1, and set
+ * @table_gfn(@sp->gfn)=the base page frame for linear
+ * translations.
*/
table_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
} else {
direct = 0;
table_gfn = gw->table_gfn[level - 2];
}
- shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+ sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
direct, access, sptep);
if (!direct) {
r = kvm_read_guest_atomic(vcpu->kvm,
gw->pte_gpa[level - 2],
&curr_pte, sizeof(curr_pte));
if (r || curr_pte != gw->ptes[level - 2]) {
- kvm_mmu_put_page(shadow_page, sptep);
+ kvm_mmu_put_page(sp, sptep);
kvm_release_pfn_clean(pfn);
sptep = NULL;
break;
}
}

- spte = __pa(shadow_page->spt)
+ spte = __pa(sp->spt)
| PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
*sptep = spte;
--
1.6.1.2

2010-06-11 13:33:17

by Xiao Guangrong

[permalink] [raw]

Subject: [PATCH 2/7] KVM: MMU: cleanup for dirty page judgment

Using wrap function to cleanup page dirty judgment

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/paging_tmpl.h | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 8d00bb2..876e705 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -287,7 +287,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
*/
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
- gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
+ is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL,
gpte_to_gfn(gpte), pfn, true, true);
}

@@ -319,7 +319,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
mmu_set_spte(vcpu, sptep, access,
gw->pte_access & access,
user_fault, write_fault,
- gw->ptes[gw->level-1] & PT_DIRTY_MASK,
+ is_dirty_gpte(gw->ptes[gw->level-1]),
ptwrite, level,
gw->gfn, pfn, false, true);
break;
--
1.6.1.2

2010-06-11 13:34:12

by Xiao Guangrong

[permalink] [raw]

Subject: [PATCH 3/7] KVM: MMU: avoid double write protected in sync page path

The sync page is already write protected in mmu_sync_children(), don't
write protected it again

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 6 ++----
1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 21ab85b..2ffd673 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1216,6 +1216,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
if ((sp)->gfn != (gfn) || (sp)->role.direct || \
(sp)->role.invalid) {} else

+/* @sp->gfn should be write-protected at the call site */
static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list, bool clear_unsync)
{
@@ -1224,11 +1225,8 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
return 1;
}

- if (clear_unsync) {
- if (rmap_write_protect(vcpu->kvm, sp->gfn))
- kvm_flush_remote_tlbs(vcpu->kvm);
+ if (clear_unsync)
kvm_unlink_unsync_page(vcpu->kvm, sp);
- }

if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
--
1.6.1.2

2010-06-11 13:35:14

by Xiao Guangrong

[permalink] [raw]

Subject: [PATCH 4/7] KVM: MMU: don't mark pte notrap if it's just sync transient

If the sync-sp just sync transient, don't mark its pte notrap

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/mmu.c | 11 ++++-------
arch/x86/kvm/paging_tmpl.h | 5 +++--
3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 91631b8..fb72bd7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -241,7 +241,7 @@ struct kvm_mmu {
void (*prefetch_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *page);
int (*sync_page)(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp);
+ struct kvm_mmu_page *sp, bool clear_unsync);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
hpa_t root_hpa;
int root_level;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2ffd673..18c14c5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1103,7 +1103,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
}

static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp)
+ struct kvm_mmu_page *sp, bool clear_unsync)
{
return 1;
}
@@ -1228,7 +1228,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
if (clear_unsync)
kvm_unlink_unsync_page(vcpu->kvm, sp);

- if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
+ if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
return 1;
}
@@ -1237,7 +1237,6 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
return 0;
}

-static void mmu_convert_notrap(struct kvm_mmu_page *sp);
static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp)
{
@@ -1245,9 +1244,7 @@ static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
int ret;

ret = __kvm_sync_page(vcpu, sp, &invalid_list, false);
- if (!ret)
- mmu_convert_notrap(sp);
- else
+ if (ret)
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);

return ret;
@@ -1273,7 +1270,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)

WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
- (vcpu->arch.mmu.sync_page(vcpu, s))) {
+ (vcpu->arch.mmu.sync_page(vcpu, s, true))) {
kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list);
continue;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 876e705..3ebc5a0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -577,7 +577,8 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
* can't change unless all sptes pointing to it are nuked first.
* - Alias changes zap the entire shadow cache.
*/
-static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ bool clear_unsync)
{
int i, offset, nr_present;
bool reset_host_protection;
@@ -614,7 +615,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
u64 nonpresent;

rmap_remove(vcpu->kvm, &sp->spt[i]);
- if (is_present_gpte(gpte))
+ if (is_present_gpte(gpte) || !clear_unsync)
nonpresent = shadow_trap_nonpresent_pte;
else
nonpresent = shadow_notrap_nonpresent_pte;
--
1.6.1.2

2010-06-11 13:36:08

by Xiao Guangrong

[permalink] [raw]

Subject: [PATCH 5/7] KVM: MMU: cleanup for __mmu_unsync_walk()

Decrease sp->unsync_children after clear unsync_child_bitmap bit

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 18c14c5..c4b980a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1160,9 +1160,11 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
return -ENOSPC;

ret = __mmu_unsync_walk(child, pvec);
- if (!ret)
+ if (!ret) {
__clear_bit(i, sp->unsync_child_bitmap);
- else if (ret > 0)
+ sp->unsync_children--;
+ WARN_ON((int)sp->unsync_children < 0);
+ } else if (ret > 0)
nr_unsync_leaf += ret;
else
return ret;
@@ -1176,8 +1178,6 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
}
}

- if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
- sp->unsync_children = 0;

return nr_unsync_leaf;
}
--
1.6.1.2

2010-06-11 13:37:38

by Xiao Guangrong

[permalink] [raw]

Subject: [PATCH 6/7] KVM: MMU: clear unsync_child_bitmap completely

In current code, some page's unsync_child_bitmap is not cleared completely
in mmu_sync_children(), for example, if two PDPEs shard one PDT, one of
PDPE's unsync_child_bitmap is not cleared.

Currently, it not harm anything just little overload, but it's the prepare
work for the later patch

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 55 ++++++++++++++++++++++++++++-----------------------
1 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c4b980a..eb20682 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1149,33 +1149,38 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
int i, ret, nr_unsync_leaf = 0;

for_each_unsync_children(sp->unsync_child_bitmap, i) {
+ struct kvm_mmu_page *child;
u64 ent = sp->spt[i];

- if (is_shadow_present_pte(ent) && !is_large_pte(ent)) {
- struct kvm_mmu_page *child;
- child = page_header(ent & PT64_BASE_ADDR_MASK);
-
- if (child->unsync_children) {
- if (mmu_pages_add(pvec, child, i))
- return -ENOSPC;
-
- ret = __mmu_unsync_walk(child, pvec);
- if (!ret) {
- __clear_bit(i, sp->unsync_child_bitmap);
- sp->unsync_children--;
- WARN_ON((int)sp->unsync_children < 0);
- } else if (ret > 0)
- nr_unsync_leaf += ret;
- else
- return ret;
- }
+ if (!is_shadow_present_pte(ent) || is_large_pte(ent))
+ goto clear_child_bitmap;
+
+ child = page_header(ent & PT64_BASE_ADDR_MASK);
+
+ if (child->unsync_children) {
+ if (mmu_pages_add(pvec, child, i))
+ return -ENOSPC;
+
+ ret = __mmu_unsync_walk(child, pvec);
+ if (!ret)
+ goto clear_child_bitmap;
+ else if (ret > 0)
+ nr_unsync_leaf += ret;
+ else
+ return ret;
+ } else if (child->unsync) {
+ nr_unsync_leaf++;
+ if (mmu_pages_add(pvec, child, i))
+ return -ENOSPC;
+ } else
+ goto clear_child_bitmap;

- if (child->unsync) {
- nr_unsync_leaf++;
- if (mmu_pages_add(pvec, child, i))
- return -ENOSPC;
- }
- }
+ continue;
+
+clear_child_bitmap:
+ __clear_bit(i, sp->unsync_child_bitmap);
+ sp->unsync_children--;
+ WARN_ON((int)sp->unsync_children < 0);
}

--
1.6.1.2

2010-06-11 13:38:51

by Xiao Guangrong

[permalink] [raw]

Subject: [PATCH 7/7] KVM: MMU: don't walk every parent pages while mark unsync

While we mark the parent's unsync_child_bitmap, if the parent is already
unsynced, it no need walk it's parent, it can reduce some unnecessary
workload

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 61 ++++++++++++++-------------------------------------
1 files changed, 17 insertions(+), 44 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eb20682..a92863f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -175,7 +175,7 @@ struct kvm_shadow_walk_iterator {
shadow_walk_okay(&(_walker)); \
shadow_walk_next(&(_walker)))

-typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp);
+typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte);

static struct kmem_cache *pte_chain_cache;
static struct kmem_cache *rmap_desc_cache;
@@ -1024,7 +1024,6 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
BUG();
}

-
static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)
{
struct kvm_pte_chain *pte_chain;
@@ -1034,63 +1033,37 @@ static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)

if (!sp->multimapped && sp->parent_pte) {
parent_sp = page_header(__pa(sp->parent_pte));
- fn(parent_sp);
- mmu_parent_walk(parent_sp, fn);
+ fn(parent_sp, sp->parent_pte);
return;
}
+
hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
- if (!pte_chain->parent_ptes[i])
+ u64 *spte = pte_chain->parent_ptes[i];
+
+ if (!spte)
break;
- parent_sp = page_header(__pa(pte_chain->parent_ptes[i]));
- fn(parent_sp);
- mmu_parent_walk(parent_sp, fn);
+ parent_sp = page_header(__pa(spte));
+ fn(parent_sp, spte);
}
}

-static void kvm_mmu_update_unsync_bitmap(u64 *spte)
+static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte);
+static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
{
- unsigned int index;
- struct kvm_mmu_page *sp = page_header(__pa(spte));
-
- index = spte - sp->spt;
- if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
- sp->unsync_children++;
- WARN_ON(!sp->unsync_children);
+ mmu_parent_walk(sp, mark_unsync);
}

-static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
+static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte)
{
- struct kvm_pte_chain *pte_chain;
- struct hlist_node *node;
- int i;
+ unsigned int index;

- if (!sp->parent_pte)
+ index = spte - sp->spt;
+ if (__test_and_set_bit(index, sp->unsync_child_bitmap))
return;
-
- if (!sp->multimapped) {
- kvm_mmu_update_unsync_bitmap(sp->parent_pte);
+ if (sp->unsync_children++)
return;
- }
-
- hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
- for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
- if (!pte_chain->parent_ptes[i])
- break;
- kvm_mmu_update_unsync_bitmap(pte_chain->parent_ptes[i]);
- }
-}
-
-static int unsync_walk_fn(struct kvm_mmu_page *sp)
-{
- kvm_mmu_update_parents_unsync(sp);
- return 1;
-}
-
-static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
-{
- mmu_parent_walk(sp, unsync_walk_fn);
- kvm_mmu_update_parents_unsync(sp);
+ kvm_mmu_mark_parents_unsync(sp);
}

static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
--
1.6.1.2

2010-06-11 13:46:54

by Xiao Guangrong

[permalink] [raw]

Subject: Re: [PATCH 7/7] KVM: MMU: don't walk every parent pages while mark unsync

Xiao Guangrong wrote:
> While we mark the parent's unsync_child_bitmap, if the parent is already
> unsynced, it no need walk it's parent, it can reduce some unnecessary
> workload
>

Hi Avi, Marcelo,

About two months ago, i have sent this optimization, it just a little performance
improved (~0.5%) at that time, now, we allow more page become unsync, this path is
called higher frequency, during my test, kernelbench show the performance improved
~2%, so, i think we may apply it now :-)

2010-06-11 20:35:21

by Marcelo Tosatti

[permalink] [raw]

Subject: Re: [PATCH 4/7] KVM: MMU: don't mark pte notrap if it's just sync transient

On Fri, Jun 11, 2010 at 09:31:38PM +0800, Xiao Guangrong wrote:
> If the sync-sp just sync transient, don't mark its pte notrap
>
> Signed-off-by: Xiao Guangrong <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 2 +-
> arch/x86/kvm/mmu.c | 11 ++++-------
> arch/x86/kvm/paging_tmpl.h | 5 +++--
> 3 files changed, 8 insertions(+), 10 deletions(-)

Xiao,

Can you explain the reasoning for this?

2010-06-12 02:41:51

by Xiao Guangrong

[permalink] [raw]

Subject: Re: [PATCH 4/7] KVM: MMU: don't mark pte notrap if it's just sync transient

Marcelo Tosatti wrote:
> On Fri, Jun 11, 2010 at 09:31:38PM +0800, Xiao Guangrong wrote:
>> If the sync-sp just sync transient, don't mark its pte notrap
>>
>> Signed-off-by: Xiao Guangrong <[email protected]>
>> ---
>> arch/x86/include/asm/kvm_host.h | 2 +-
>> arch/x86/kvm/mmu.c | 11 ++++-------
>> arch/x86/kvm/paging_tmpl.h | 5 +++--
>> 3 files changed, 8 insertions(+), 10 deletions(-)
>
> Xiao,
>
> Can you explain the reasoning for this?
>

Marcelo,

In the kvm_sync_page_transient() path, the sp is keep unsync and sp->gfn is not
write protected, we can't set 'spte == shadow_notrap_nonpresent_pte' if the
'gpte.p == 0' in this case.

It's because if 'gpte.p == 0', when guest change the gpte's mapping, it's no need
to flush tlb(p == 0 means the mapping is not in tlb), if we set spte to
'shadow_notrap_nonpresent_pte', we will miss the chance to update the mapping, also
a incorrect #PF will pass to guest directly, may cause guest crash.

And, this is the reasoning we do mmu_convert_notrap() in kvm_sync_page_transient()
before, this patch just avoid this unnecessary workload. :-)

2010-06-14 22:20:25

by Marcelo Tosatti

[permalink] [raw]

Subject: Re: [PATCH 7/7] KVM: MMU: don't walk every parent pages while mark unsync

On Fri, Jun 11, 2010 at 09:35:15PM +0800, Xiao Guangrong wrote:
> While we mark the parent's unsync_child_bitmap, if the parent is already
> unsynced, it no need walk it's parent, it can reduce some unnecessary
> workload
>
> Signed-off-by: Xiao Guangrong <[email protected]>
> ---
> arch/x86/kvm/mmu.c | 61 ++++++++++++++-------------------------------------
> 1 files changed, 17 insertions(+), 44 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index eb20682..a92863f 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -175,7 +175,7 @@ struct kvm_shadow_walk_iterator {
> shadow_walk_okay(&(_walker)); \
> shadow_walk_next(&(_walker)))
>
> -typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp);
> +typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte);
>
> static struct kmem_cache *pte_chain_cache;
> static struct kmem_cache *rmap_desc_cache;
> @@ -1024,7 +1024,6 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
> BUG();
> }
>
> -
> static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)
> {
> struct kvm_pte_chain *pte_chain;
> @@ -1034,63 +1033,37 @@ static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)
>
> if (!sp->multimapped && sp->parent_pte) {
> parent_sp = page_header(__pa(sp->parent_pte));
> - fn(parent_sp);
> - mmu_parent_walk(parent_sp, fn);
> + fn(parent_sp, sp->parent_pte);
> return;
> }
> +
> hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
> for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
> - if (!pte_chain->parent_ptes[i])
> + u64 *spte = pte_chain->parent_ptes[i];
> +
> + if (!spte)
> break;
> - parent_sp = page_header(__pa(pte_chain->parent_ptes[i]));
> - fn(parent_sp);
> - mmu_parent_walk(parent_sp, fn);
> + parent_sp = page_header(__pa(spte));
> + fn(parent_sp, spte);
> }
> }
>
> -static void kvm_mmu_update_unsync_bitmap(u64 *spte)
> +static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte);
> +static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
> {
> - unsigned int index;
> - struct kvm_mmu_page *sp = page_header(__pa(spte));
> -
> - index = spte - sp->spt;
> - if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
> - sp->unsync_children++;
> - WARN_ON(!sp->unsync_children);
> + mmu_parent_walk(sp, mark_unsync);
> }
>
> -static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
> +static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte)
> {
> - struct kvm_pte_chain *pte_chain;
> - struct hlist_node *node;
> - int i;
> + unsigned int index;
>
> - if (!sp->parent_pte)
> + index = spte - sp->spt;
> + if (__test_and_set_bit(index, sp->unsync_child_bitmap))
> return;
> -
> - if (!sp->multimapped) {
> - kvm_mmu_update_unsync_bitmap(sp->parent_pte);
> + if (sp->unsync_children++)
> return;

This looks wrong. If the sp has an unrelated children marked as
unsync (which increased sp->unsync_children), you stop the walk?

Applied 1-6, thanks.

2010-06-15 01:36:04

by Xiao Guangrong

[permalink] [raw]

Subject: Re: [PATCH 7/7] KVM: MMU: don't walk every parent pages while mark unsync

Marcelo Tosatti wrote:

>> - if (!sp->multimapped) {
>> - kvm_mmu_update_unsync_bitmap(sp->parent_pte);
>> + if (sp->unsync_children++)
>> return;
>
> This looks wrong. If the sp has an unrelated children marked as
> unsync (which increased sp->unsync_children), you stop the walk?
>

Marcelo,

I think it's right :-), we only walk the parents only when
sp->unsync_children is 0, since sp->unsync_children is the number bit
set in sp->unsync_child_bitmap, if sp->unsync_children > 0, we can sure
its parents already have mark unsync-child-exist, assume, for example,
have this mapping:

/ SP1
P1 -> P2
\ SP2

[ P2 = P1.pte[0] SP1 = P2.pte[0] SP2 = P2.pte[1] ]

First, we mark SP1 unsyc, it will set:
P2.unsync_child_bitmap[0] = 1, P2.unsync_children = 1
and
P1.unsync_child_bitmap[0] = 1, P1.unsync_children = 1

Then, we mark SP2 unsync, we only need do:
P2.unsync_child_bitmap[1] = 1, P2.unsync_children = 2
no need touch P1, since the P1 is already mark pte[0] unsync-child-exist.

2010-06-15 23:48:19

by Marcelo Tosatti

[permalink] [raw]

Subject: Re: [PATCH 7/7] KVM: MMU: don't walk every parent pages while mark unsync

On Tue, Jun 15, 2010 at 09:32:25AM +0800, Xiao Guangrong wrote:
>
>
> Marcelo Tosatti wrote:
>
> >> - if (!sp->multimapped) {
> >> - kvm_mmu_update_unsync_bitmap(sp->parent_pte);
> >> + if (sp->unsync_children++)
> >> return;
> >
> > This looks wrong. If the sp has an unrelated children marked as
> > unsync (which increased sp->unsync_children), you stop the walk?
> >
>
> Marcelo,
>
> I think it's right :-), we only walk the parents only when
> sp->unsync_children is 0, since sp->unsync_children is the number bit
> set in sp->unsync_child_bitmap, if sp->unsync_children > 0, we can sure
> its parents already have mark unsync-child-exist, assume, for example,
> have this mapping:
>
> / SP1
> P1 -> P2
> \ SP2
>
> [ P2 = P1.pte[0] SP1 = P2.pte[0] SP2 = P2.pte[1] ]
>
> First, we mark SP1 unsyc, it will set:
> P2.unsync_child_bitmap[0] = 1, P2.unsync_children = 1
> and
> P1.unsync_child_bitmap[0] = 1, P1.unsync_children = 1
>
> Then, we mark SP2 unsync, we only need do:
> P2.unsync_child_bitmap[1] = 1, P2.unsync_children = 2
> no need touch P1, since the P1 is already mark pte[0] unsync-child-exist.

You're right, applied.

2010-06-15 23:48:27

by Marcelo Tosatti

[permalink] [raw]