2010-06-04 13:55:55

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH v2 1/7] KVM: MMU: skip invalid sp when unprotect page

In kvm_mmu_unprotect_page(), the invalid sp can be skipped

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a62e3ba..e962f26 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1629,7 +1629,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
bucket = &kvm->arch.mmu_page_hash[index];
restart:
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.direct) {
+ if (sp->gfn == gfn && !sp->role.direct && !sp->role.invalid) {
pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
--
1.6.1.2


2010-06-04 13:56:45

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH v2 2/7] KVM: MMU: introduce some macros to cleanup hlist traverseing

Introduce for_each_gfn_sp() and for_each_gfn_indirect_valid_sp() to
cleanup hlist traverseing

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 122 ++++++++++++++++++++--------------------------------
1 files changed, 47 insertions(+), 75 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e962f26..75bd6df 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1201,6 +1201,17 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)

static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);

+#define for_each_gfn_sp(kvm, sp, gfn, pos, n) \
+ hlist_for_each_entry_safe(sp, pos, n, \
+ &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
+ if ((sp)->gfn != (gfn)) {} else
+
+#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos, n) \
+ hlist_for_each_entry_safe(sp, pos, n, \
+ &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
+ if ((sp)->gfn != (gfn) || (sp)->role.direct || \
+ (sp)->role.invalid) {} else
+
static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
bool clear_unsync)
{
@@ -1244,16 +1255,12 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
/* @gfn should be write-protected at the call site */
static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
{
- struct hlist_head *bucket;
struct kvm_mmu_page *s;
struct hlist_node *node, *n;
- unsigned index;
bool flush = false;

- index = kvm_page_table_hashfn(gfn);
- bucket = &vcpu->kvm->arch.mmu_page_hash[index];
- hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
- if (s->gfn != gfn || !s->unsync || s->role.invalid)
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node, n) {
+ if (!s->unsync)
continue;

WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
@@ -1365,9 +1372,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
u64 *parent_pte)
{
union kvm_mmu_page_role role;
- unsigned index;
unsigned quadrant;
- struct hlist_head *bucket;
struct kvm_mmu_page *sp;
struct hlist_node *node, *tmp;
bool need_sync = false;
@@ -1383,36 +1388,34 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
}
- index = kvm_page_table_hashfn(gfn);
- bucket = &vcpu->kvm->arch.mmu_page_hash[index];
- hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
- if (sp->gfn == gfn) {
- if (!need_sync && sp->unsync)
- need_sync = true;
+ for_each_gfn_sp(vcpu->kvm, sp, gfn, node, tmp) {
+ if (!need_sync && sp->unsync)
+ need_sync = true;

- if (sp->role.word != role.word)
- continue;
+ if (sp->role.word != role.word)
+ continue;

- if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
- break;
+ if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
+ break;

- mmu_page_add_parent_pte(vcpu, sp, parent_pte);
- if (sp->unsync_children) {
- set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
- kvm_mmu_mark_parents_unsync(sp);
- } else if (sp->unsync)
- kvm_mmu_mark_parents_unsync(sp);
+ mmu_page_add_parent_pte(vcpu, sp, parent_pte);
+ if (sp->unsync_children) {
+ set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
+ kvm_mmu_mark_parents_unsync(sp);
+ } else if (sp->unsync)
+ kvm_mmu_mark_parents_unsync(sp);

- trace_kvm_mmu_get_page(sp, false);
- return sp;
- }
+ trace_kvm_mmu_get_page(sp, false);
+ return sp;
+ }
++vcpu->kvm->stat.mmu_cache_miss;
sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct);
if (!sp)
return sp;
sp->gfn = gfn;
sp->role = role;
- hlist_add_head(&sp->hash_link, bucket);
+ hlist_add_head(&sp->hash_link,
+ &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
if (!direct) {
if (rmap_write_protect(vcpu->kvm, gfn))
kvm_flush_remote_tlbs(vcpu->kvm);
@@ -1617,46 +1620,34 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)

static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
{
- unsigned index;
- struct hlist_head *bucket;
struct kvm_mmu_page *sp;
struct hlist_node *node, *n;
int r;

pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
r = 0;
- index = kvm_page_table_hashfn(gfn);
- bucket = &kvm->arch.mmu_page_hash[index];
restart:
- hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.direct && !sp->role.invalid) {
- pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
- sp->role.word);
- r = 1;
- if (kvm_mmu_zap_page(kvm, sp))
- goto restart;
- }
+ for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node, n) {
+ pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
+ sp->role.word);
+ r = 1;
+ if (kvm_mmu_zap_page(kvm, sp))
+ goto restart;
+ }
return r;
}

static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
{
- unsigned index;
- struct hlist_head *bucket;
struct kvm_mmu_page *sp;
struct hlist_node *node, *nn;

- index = kvm_page_table_hashfn(gfn);
- bucket = &kvm->arch.mmu_page_hash[index];
restart:
- hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
- if (sp->gfn == gfn && !sp->role.direct
- && !sp->role.invalid) {
- pgprintk("%s: zap %lx %x\n",
- __func__, gfn, sp->role.word);
- if (kvm_mmu_zap_page(kvm, sp))
- goto restart;
- }
+ for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node, nn) {
+ pgprintk("%s: zap %lx %x\n",
+ __func__, gfn, sp->role.word);
+ if (kvm_mmu_zap_page(kvm, sp))
+ goto restart;
}
}

@@ -1799,17 +1790,11 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)

static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
{
- struct hlist_head *bucket;
struct kvm_mmu_page *s;
struct hlist_node *node, *n;
- unsigned index;
-
- index = kvm_page_table_hashfn(gfn);
- bucket = &vcpu->kvm->arch.mmu_page_hash[index];

- hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
- if (s->gfn != gfn || s->role.direct || s->unsync ||
- s->role.invalid)
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node, n) {
+ if (s->unsync)
continue;
WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
__kvm_unsync_page(vcpu, s);
@@ -1819,18 +1804,11 @@ static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
bool can_unsync)
{
- unsigned index;
- struct hlist_head *bucket;
struct kvm_mmu_page *s;
struct hlist_node *node, *n;
bool need_unsync = false;

- index = kvm_page_table_hashfn(gfn);
- bucket = &vcpu->kvm->arch.mmu_page_hash[index];
- hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
- if (s->gfn != gfn || s->role.direct || s->role.invalid)
- continue;
-
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node, n) {
if (s->role.level != PT_PAGE_TABLE_LEVEL)
return 1;

@@ -2701,8 +2679,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *sp;
struct hlist_node *node, *n;
- struct hlist_head *bucket;
- unsigned index;
u64 entry, gentry;
u64 *spte;
unsigned offset = offset_in_page(gpa);
@@ -2770,13 +2746,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
vcpu->arch.last_pte_updated = NULL;
}
}
- index = kvm_page_table_hashfn(gfn);
- bucket = &vcpu->kvm->arch.mmu_page_hash[index];

restart:
- hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
- if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
- continue;
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node, n) {
pte_size = sp->role.cr4_pae ? 8 : 4;
misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
misaligned |= bytes < 4;
--
1.6.1.2

2010-06-04 13:57:30

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH v2 3/7] KVM: MMU: split the operations of kvm_mmu_zap_page()

Using kvm_mmu_prepare_zap_page() and kvm_mmu_commit_zap_page() to
split kvm_mmu_zap_page() function, then we can:

- traverse hlist safely
- easily to gather remote tlb flush which occurs during page zapped

Those feature can be used in the later patches

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 52 ++++++++++++++++++++++++++++++++++++++--------
arch/x86/kvm/mmutrace.h | 2 +-
2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 75bd6df..a64c0e0 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -916,6 +916,7 @@ static int is_empty_shadow_page(u64 *spt)
static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
ASSERT(is_empty_shadow_page(sp->spt));
+ hlist_del(&sp->hash_link);
list_del(&sp->link);
__free_page(virt_to_page(sp->spt));
if (!sp->role.direct)
@@ -1200,6 +1201,10 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
}

static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+ struct list_head *invalid_list);
+static void kvm_mmu_commit_zap_page(struct kvm *kvm,
+ struct list_head *invalid_list);

#define for_each_gfn_sp(kvm, sp, gfn, pos, n) \
hlist_for_each_entry_safe(sp, pos, n, \
@@ -1530,7 +1535,8 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
}

static int mmu_zap_unsync_children(struct kvm *kvm,
- struct kvm_mmu_page *parent)
+ struct kvm_mmu_page *parent,
+ struct list_head *invalid_list)
{
int i, zapped = 0;
struct mmu_page_path parents;
@@ -1544,7 +1550,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
struct kvm_mmu_page *sp;

for_each_sp(pages, sp, parents, i) {
- kvm_mmu_zap_page(kvm, sp);
+ kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
mmu_pages_clear_parents(&parents);
zapped++;
}
@@ -1554,16 +1560,16 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
return zapped;
}

-static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+ struct list_head *invalid_list)
{
int ret;

- trace_kvm_mmu_zap_page(sp);
+ trace_kvm_mmu_prepare_zap_page(sp);
++kvm->stat.mmu_shadow_zapped;
- ret = mmu_zap_unsync_children(kvm, sp);
+ ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
kvm_mmu_page_unlink_children(kvm, sp);
kvm_mmu_unlink_parents(kvm, sp);
- kvm_flush_remote_tlbs(kvm);
if (!sp->role.invalid && !sp->role.direct)
unaccount_shadowed(kvm, sp->gfn);
if (sp->unsync)
@@ -1571,17 +1577,45 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
if (!sp->root_count) {
/* Count self */
ret++;
- hlist_del(&sp->hash_link);
- kvm_mmu_free_page(kvm, sp);
+ list_move(&sp->link, invalid_list);
} else {
- sp->role.invalid = 1;
list_move(&sp->link, &kvm->arch.active_mmu_pages);
kvm_reload_remote_mmus(kvm);
}
+
+ sp->role.invalid = 1;
kvm_mmu_reset_last_pte_updated(kvm);
return ret;
}

+static void kvm_mmu_commit_zap_page(struct kvm *kvm,
+ struct list_head *invalid_list)
+{
+ struct kvm_mmu_page *sp;
+
+ if (list_empty(invalid_list))
+ return;
+
+ kvm_flush_remote_tlbs(kvm);
+
+ do {
+ sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
+ WARN_ON(!sp->role.invalid || sp->root_count);
+ kvm_mmu_free_page(kvm, sp);
+ } while (!list_empty(invalid_list));
+
+}
+
+static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ LIST_HEAD(invalid_list);
+ int ret;
+
+ ret = kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ return ret;
+}
+
/*
* Changing the number of mmu pages allocated to the vm
* Note: if kvm_nr_mmu_pages is too small, you will get dead lock
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 42f07b1..3aab0f0 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -190,7 +190,7 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page,
TP_ARGS(sp)
);

-DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page,
+DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
TP_PROTO(struct kvm_mmu_page *sp),

TP_ARGS(sp)
--
1.6.1.2

2010-06-04 13:58:11

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH v2 4/7] KVM: MMU: don't get free page number in the loop

In the later patch, we will modify sp's zapping way like below:

kvm_mmu_prepare_zap_page A
kvm_mmu_prepare_zap_page B
kvm_mmu_prepare_zap_page C
....
kvm_mmu_commit_zap_page

[ zaped multiple sps only need to call kvm_mmu_commit_zap_page once ]

In __kvm_mmu_free_some_pages() function, the free page number is
getted form 'vcpu->kvm->arch.n_free_mmu_pages' in loop, it will
hinders us to apply kvm_mmu_prepare_zap_page() and kvm_mmu_commit_zap_page()
since kvm_mmu_prepare_zap_page() not free sp.

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a64c0e0..77bc4ba 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2861,13 +2861,16 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);

void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
- while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES &&
+ int free_pages;
+
+ free_pages = vcpu->kvm->arch.n_free_mmu_pages;
+ while (free_pages < KVM_REFILL_PAGES &&
!list_empty(&vcpu->kvm->arch.active_mmu_pages)) {
struct kvm_mmu_page *sp;

sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- kvm_mmu_zap_page(vcpu->kvm, sp);
+ free_pages += kvm_mmu_zap_page(vcpu->kvm, sp);
++vcpu->kvm->stat.mmu_recycled;
}
}
--
1.6.1.2

2010-06-04 13:59:14

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH v2 5/7] KVM: MMU: gather remote tlb flush which occurs during page zapped

Using kvm_mmu_prepare_zap_page() and kvm_mmu_zap_page() instead of
kvm_mmu_zap_page() that can reduce remote tlb flush IPI

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 84 ++++++++++++++++++++++++++++++++-------------------
1 files changed, 53 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 77bc4ba..6544d8e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1200,7 +1200,6 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
--kvm->stat.mmu_unsync;
}

-static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
struct list_head *invalid_list);
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
@@ -1218,10 +1217,10 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
(sp)->role.invalid) {} else

static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- bool clear_unsync)
+ struct list_head *invalid_list, bool clear_unsync)
{
if (sp->role.cr4_pae != !!is_pae(vcpu)) {
- kvm_mmu_zap_page(vcpu->kvm, sp);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
return 1;
}

@@ -1232,7 +1231,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
}

if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
- kvm_mmu_zap_page(vcpu->kvm, sp);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
return 1;
}

@@ -1244,17 +1243,22 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp);
static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp)
{
+ LIST_HEAD(invalid_list);
int ret;

- ret = __kvm_sync_page(vcpu, sp, false);
+ ret = __kvm_sync_page(vcpu, sp, &invalid_list, false);
if (!ret)
mmu_convert_notrap(sp);
+ else
+ kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+
return ret;
}

-static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ struct list_head *invalid_list)
{
- return __kvm_sync_page(vcpu, sp, true);
+ return __kvm_sync_page(vcpu, sp, invalid_list, true);
}

/* @gfn should be write-protected at the call site */
@@ -1262,6 +1266,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
{
struct kvm_mmu_page *s;
struct hlist_node *node, *n;
+ LIST_HEAD(invalid_list);
bool flush = false;

for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node, n) {
@@ -1271,13 +1276,14 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
(vcpu->arch.mmu.sync_page(vcpu, s))) {
- kvm_mmu_zap_page(vcpu->kvm, s);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list);
continue;
}
kvm_unlink_unsync_page(vcpu->kvm, s);
flush = true;
}

+ kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
if (flush)
kvm_mmu_flush_tlb(vcpu);
}
@@ -1348,6 +1354,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp;
struct mmu_page_path parents;
struct kvm_mmu_pages pages;
+ LIST_HEAD(invalid_list);

kvm_mmu_pages_init(parent, &parents, &pages);
while (mmu_unsync_walk(parent, &pages)) {
@@ -1360,9 +1367,10 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
kvm_flush_remote_tlbs(vcpu->kvm);

for_each_sp(pages, sp, parents, i) {
- kvm_sync_page(vcpu, sp);
+ kvm_sync_page(vcpu, sp, &invalid_list);
mmu_pages_clear_parents(&parents);
}
+ kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
cond_resched_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_pages_init(parent, &parents, &pages);
}
@@ -1606,16 +1614,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,

}

-static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
- LIST_HEAD(invalid_list);
- int ret;
-
- ret = kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
- kvm_mmu_commit_zap_page(kvm, &invalid_list);
- return ret;
-}
-
/*
* Changing the number of mmu pages allocated to the vm
* Note: if kvm_nr_mmu_pages is too small, you will get dead lock
@@ -1623,6 +1621,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
{
int used_pages;
+ LIST_HEAD(invalid_list);

used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;
used_pages = max(0, used_pages);
@@ -1640,8 +1639,10 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)

page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- used_pages -= kvm_mmu_zap_page(kvm, page);
+ used_pages -= kvm_mmu_prepare_zap_page(kvm, page,
+ &invalid_list);
}
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
kvm_nr_mmu_pages = used_pages;
kvm->arch.n_free_mmu_pages = 0;
}
@@ -1656,6 +1657,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_mmu_page *sp;
struct hlist_node *node, *n;
+ LIST_HEAD(invalid_list);
int r;

pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
@@ -1665,9 +1667,10 @@ restart:
pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
- if (kvm_mmu_zap_page(kvm, sp))
+ if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
goto restart;
}
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
return r;
}

@@ -1675,14 +1678,16 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
{
struct kvm_mmu_page *sp;
struct hlist_node *node, *nn;
+ LIST_HEAD(invalid_list);

restart:
for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node, nn) {
pgprintk("%s: zap %lx %x\n",
__func__, gfn, sp->role.word);
- if (kvm_mmu_zap_page(kvm, sp))
+ if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
goto restart;
}
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
}

static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
@@ -2121,6 +2126,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
{
int i;
struct kvm_mmu_page *sp;
+ LIST_HEAD(invalid_list);

if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
@@ -2130,8 +2136,10 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)

sp = page_header(root);
--sp->root_count;
- if (!sp->root_count && sp->role.invalid)
- kvm_mmu_zap_page(vcpu->kvm, sp);
+ if (!sp->root_count && sp->role.invalid) {
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
+ kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+ }
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
spin_unlock(&vcpu->kvm->mmu_lock);
return;
@@ -2144,10 +2152,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
sp = page_header(root);
--sp->root_count;
if (!sp->root_count && sp->role.invalid)
- kvm_mmu_zap_page(vcpu->kvm, sp);
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
+ &invalid_list);
}
vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
}
+ kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
}
@@ -2713,6 +2723,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *sp;
struct hlist_node *node, *n;
+ LIST_HEAD(invalid_list);
u64 entry, gentry;
u64 *spte;
unsigned offset = offset_in_page(gpa);
@@ -2799,7 +2810,8 @@ restart:
*/
pgprintk("misaligned: gpa %llx bytes %d role %x\n",
gpa, bytes, sp->role.word);
- if (kvm_mmu_zap_page(vcpu->kvm, sp))
+ if (kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
+ &invalid_list))
goto restart;
++vcpu->kvm->stat.mmu_flooded;
continue;
@@ -2834,6 +2846,7 @@ restart:
++spte;
}
}
+ kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
kvm_mmu_audit(vcpu, "post pte write");
spin_unlock(&vcpu->kvm->mmu_lock);
if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
@@ -2862,6 +2875,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
int free_pages;
+ LIST_HEAD(invalid_list);

free_pages = vcpu->kvm->arch.n_free_mmu_pages;
while (free_pages < KVM_REFILL_PAGES &&
@@ -2870,9 +2884,11 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)

sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- free_pages += kvm_mmu_zap_page(vcpu->kvm, sp);
+ free_pages += kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
+ &invalid_list);
++vcpu->kvm->stat.mmu_recycled;
}
+ kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
}

int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
@@ -3007,25 +3023,28 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
void kvm_mmu_zap_all(struct kvm *kvm)
{
struct kvm_mmu_page *sp, *node;
+ LIST_HEAD(invalid_list);

spin_lock(&kvm->mmu_lock);
restart:
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
- if (kvm_mmu_zap_page(kvm, sp))
+ if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
goto restart;

+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
spin_unlock(&kvm->mmu_lock);

kvm_flush_remote_tlbs(kvm);
}

-static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm)
+static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
+ struct list_head *invalid_list)
{
struct kvm_mmu_page *page;

page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- return kvm_mmu_zap_page(kvm, page);
+ return kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
}

static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
@@ -3038,6 +3057,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)

list_for_each_entry(kvm, &vm_list, vm_list) {
int npages, idx, freed_pages;
+ LIST_HEAD(invalid_list);

idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
@@ -3045,12 +3065,14 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
kvm->arch.n_free_mmu_pages;
cache_count += npages;
if (!kvm_freed && nr_to_scan > 0 && npages > 0) {
- freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm);
+ freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm,
+ &invalid_list);
cache_count -= freed_pages;
kvm_freed = kvm;
}
nr_to_scan--;

+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
}
--
1.6.1.2

2010-06-04 13:59:47

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH v2 6/7] KVM: MMU: traverse sp hlish safely

Now, we can safely to traverse sp hlish

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 51 +++++++++++++++++++++++----------------------------
1 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6544d8e..845cba2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1205,13 +1205,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list);

-#define for_each_gfn_sp(kvm, sp, gfn, pos, n) \
- hlist_for_each_entry_safe(sp, pos, n, \
+#define for_each_gfn_sp(kvm, sp, gfn, pos) \
+ hlist_for_each_entry(sp, pos, \
&(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
if ((sp)->gfn != (gfn)) {} else

-#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos, n) \
- hlist_for_each_entry_safe(sp, pos, n, \
+#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \
+ hlist_for_each_entry(sp, pos, \
&(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
if ((sp)->gfn != (gfn) || (sp)->role.direct || \
(sp)->role.invalid) {} else
@@ -1265,11 +1265,11 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
{
struct kvm_mmu_page *s;
- struct hlist_node *node, *n;
+ struct hlist_node *node;
LIST_HEAD(invalid_list);
bool flush = false;

- for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node, n) {
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
if (!s->unsync)
continue;

@@ -1387,7 +1387,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
union kvm_mmu_page_role role;
unsigned quadrant;
struct kvm_mmu_page *sp;
- struct hlist_node *node, *tmp;
+ struct hlist_node *node;
bool need_sync = false;

role = vcpu->arch.mmu.base_role;
@@ -1401,7 +1401,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
}
- for_each_gfn_sp(vcpu->kvm, sp, gfn, node, tmp) {
+ for_each_gfn_sp(vcpu->kvm, sp, gfn, node) {
if (!need_sync && sp->unsync)
need_sync = true;

@@ -1656,19 +1656,18 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_mmu_page *sp;
- struct hlist_node *node, *n;
+ struct hlist_node *node;
LIST_HEAD(invalid_list);
int r;

pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
r = 0;
-restart:
- for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node, n) {
+
+ for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
- if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
- goto restart;
+ kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
}
kvm_mmu_commit_zap_page(kvm, &invalid_list);
return r;
@@ -1677,15 +1676,13 @@ restart:
static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
{
struct kvm_mmu_page *sp;
- struct hlist_node *node, *nn;
+ struct hlist_node *node;
LIST_HEAD(invalid_list);

-restart:
- for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node, nn) {
+ for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
pgprintk("%s: zap %lx %x\n",
__func__, gfn, sp->role.word);
- if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
- goto restart;
+ kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
}
kvm_mmu_commit_zap_page(kvm, &invalid_list);
}
@@ -1830,9 +1827,9 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
{
struct kvm_mmu_page *s;
- struct hlist_node *node, *n;
+ struct hlist_node *node;

- for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node, n) {
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
if (s->unsync)
continue;
WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
@@ -1844,10 +1841,10 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
bool can_unsync)
{
struct kvm_mmu_page *s;
- struct hlist_node *node, *n;
+ struct hlist_node *node;
bool need_unsync = false;

- for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node, n) {
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
if (s->role.level != PT_PAGE_TABLE_LEVEL)
return 1;

@@ -2722,7 +2719,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
{
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *sp;
- struct hlist_node *node, *n;
+ struct hlist_node *node;
LIST_HEAD(invalid_list);
u64 entry, gentry;
u64 *spte;
@@ -2792,8 +2789,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
}
}

-restart:
- for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node, n) {
+ for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
pte_size = sp->role.cr4_pae ? 8 : 4;
misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
misaligned |= bytes < 4;
@@ -2810,9 +2806,8 @@ restart:
*/
pgprintk("misaligned: gpa %llx bytes %d role %x\n",
gpa, bytes, sp->role.word);
- if (kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
- &invalid_list))
- goto restart;
+ kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
+ &invalid_list);
++vcpu->kvm->stat.mmu_flooded;
continue;
}
--
1.6.1.2

2010-06-04 14:00:35

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH v2 7/7] KVM: MMU: reduce remote tlb flush in kvm_mmu_pte_write()

collect remote tlb flush in kvm_mmu_pte_write() path

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 20 +++++++++++++++-----
1 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 845cba2..8528e5b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2664,11 +2664,15 @@ static bool need_remote_flush(u64 old, u64 new)
return (old & ~new & PT64_PERM_MASK) != 0;
}

-static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, u64 old, u64 new)
+static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
+ bool remote_flush, bool local_flush)
{
- if (need_remote_flush(old, new))
+ if (zap_page)
+ return;
+
+ if (remote_flush)
kvm_flush_remote_tlbs(vcpu->kvm);
- else
+ else if (local_flush)
kvm_mmu_flush_tlb(vcpu);
}

@@ -2733,6 +2737,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
int npte;
int r;
int invlpg_counter;
+ bool remote_flush, local_flush, zap_page;
+
+ zap_page = remote_flush = local_flush = false;

pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);

@@ -2806,7 +2813,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
*/
pgprintk("misaligned: gpa %llx bytes %d role %x\n",
gpa, bytes, sp->role.word);
- kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
+ zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
&invalid_list);
++vcpu->kvm->stat.mmu_flooded;
continue;
@@ -2831,16 +2838,19 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if (quadrant != sp->role.quadrant)
continue;
}
+ local_flush = true;
spte = &sp->spt[page_offset / sizeof(*spte)];
while (npte--) {
entry = *spte;
mmu_pte_write_zap_pte(vcpu, sp, spte);
if (gentry)
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
- mmu_pte_write_flush_tlb(vcpu, entry, *spte);
+ if (!remote_flush && need_remote_flush(entry, *spte))
+ remote_flush = true;
++spte;
}
}
+ mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush);
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
kvm_mmu_audit(vcpu, "post pte write");
spin_unlock(&vcpu->kvm->mmu_lock);
--
1.6.1.2

2010-06-06 10:52:16

by Avi Kivity

[permalink] [raw]
Subject: Re: [PATCH v2 1/7] KVM: MMU: skip invalid sp when unprotect page

On 06/04/2010 04:52 PM, Xiao Guangrong wrote:
> In kvm_mmu_unprotect_page(), the invalid sp can be skipped
>

Applied all, thanks.

--
error compiling committee.c: too many arguments to function