Simply return from kvm_mmu_pte_write path if no shadow page is
write-protected, then we can avoid to walk all shadow pages and hold
mmu-lock
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/mmu.c | 9 +++++++++
2 files changed, 10 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d2ac8e2..d2e5fb8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -442,6 +442,7 @@ struct kvm_arch {
unsigned int n_requested_mmu_pages;
unsigned int n_max_mmu_pages;
atomic_t invlpg_counter;
+ atomic_t indirect_shadow_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
* Hash table of struct kvm_mmu_page.
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2841805..971e2d2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -498,6 +498,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
linfo = lpage_info_slot(gfn, slot, i);
linfo->write_count += 1;
}
+ atomic_inc(&kvm->arch.indirect_shadow_pages);
}
static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
@@ -513,6 +514,7 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
linfo->write_count -= 1;
WARN_ON(linfo->write_count < 0);
}
+ atomic_dec(&kvm->arch.indirect_shadow_pages);
}
static int has_wrprotected_page(struct kvm *kvm,
@@ -3233,6 +3235,13 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
int level, npte, invlpg_counter, r, flooded = 0;
bool remote_flush, local_flush, zap_page;
+ /*
+ * If we don't have indirect shadow pages, it means no page is
+ * write-protected, so we can exit simply.
+ */
+ if (!atomic_read(&vcpu->kvm->arch.indirect_shadow_pages))
+ return;
+
zap_page = remote_flush = local_flush = false;
offset = offset_in_page(gpa);
--
1.7.4.4
Simply use __copy_to_user/__clear_user to write guest page since we have
already verified the user address when the memslot is set
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/x86.c | 4 ++--
virt/kvm/kvm_main.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 77c9d86..a419bd1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1388,7 +1388,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
return 1;
kvm_x86_ops->patch_hypercall(vcpu, instructions);
((unsigned char *)instructions)[3] = 0xc3; /* ret */
- if (copy_to_user((void __user *)addr, instructions, 4))
+ if (__copy_to_user((void __user *)addr, instructions, 4))
return 1;
kvm->arch.hv_hypercall = data;
break;
@@ -1415,7 +1415,7 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
if (kvm_is_error_hva(addr))
return 1;
- if (clear_user((void __user *)addr, PAGE_SIZE))
+ if (__clear_user((void __user *)addr, PAGE_SIZE))
return 1;
vcpu->arch.hv_vapic = data;
break;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 22cdb96..3962899 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1342,7 +1342,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
- r = copy_to_user((void __user *)addr + offset, data, len);
+ r = __copy_to_user((void __user *)addr + offset, data, len);
if (r)
return -EFAULT;
mark_page_dirty(kvm, gfn);
@@ -1402,7 +1402,7 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
- r = copy_to_user((void __user *)ghc->hva, data, len);
+ r = __copy_to_user((void __user *)ghc->hva, data, len);
if (r)
return -EFAULT;
mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
--
1.7.4.4
Fix:
warning: ‘cs_sel’ may be used uninitialized in this function
warning: ‘ss_sel’ may be used uninitialized in this function
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/emulate.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 291c872..ea32340 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2015,7 +2015,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
struct desc_struct cs, ss;
u64 msr_data;
int usermode;
- u16 cs_sel, ss_sel;
+ u16 cs_sel = 0, ss_sel = 0;
/* inject #GP if in real mode or Virtual 8086 mode */
if (ctxt->mode == X86EMUL_MODE_REAL ||
--
1.7.4.4
Abstract the operation of rmap, then we can use it for the reverse mapping
of parent pte in the later patch
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 142 +++++++++++++++++++++++++++++-----------------------
1 files changed, 79 insertions(+), 63 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 971e2d2..8da77b3 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -590,28 +590,9 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
}
/*
- * Take gfn and return the reverse mapping to it.
- */
-
-static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
-{
- struct kvm_memory_slot *slot;
- struct kvm_lpage_info *linfo;
-
- slot = gfn_to_memslot(kvm, gfn);
- if (likely(level == PT_PAGE_TABLE_LEVEL))
- return &slot->rmap[gfn - slot->base_gfn];
-
- linfo = lpage_info_slot(gfn, slot, level);
-
- return &linfo->rmap_pde;
-}
-
-/*
* Reverse mapping data structures:
*
- * If rmapp bit zero is zero, then rmapp point to the shadw page table entry
- * that points to page_address(page).
+ * If rmapp bit zero is zero, then rmapp point to the spte.
*
* If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
* containing more mappings.
@@ -620,18 +601,11 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
* the spte was not added.
*
*/
-static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
+static int __rmap_add(struct kvm_vcpu *vcpu, u64 *spte, unsigned long *rmapp)
{
- struct kvm_mmu_page *sp;
struct kvm_rmap_desc *desc;
- unsigned long *rmapp;
int i, count = 0;
- if (!is_rmap_spte(*spte))
- return count;
- sp = page_header(__pa(spte));
- kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
- rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
if (!*rmapp) {
rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
*rmapp = (unsigned long)spte;
@@ -660,7 +634,33 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
return count;
}
-static void rmap_desc_remove_entry(unsigned long *rmapp,
+static u64 *__rmap_next(unsigned long *rmapp, u64 *spte)
+{
+ struct kvm_rmap_desc *desc;
+ u64 *prev_spte;
+ int i;
+
+ if (!*rmapp)
+ return NULL;
+ else if (!(*rmapp & 1)) {
+ if (!spte)
+ return (u64 *)*rmapp;
+ return NULL;
+ }
+ desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
+ prev_spte = NULL;
+ while (desc) {
+ for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) {
+ if (prev_spte == spte)
+ return desc->sptes[i];
+ prev_spte = desc->sptes[i];
+ }
+ desc = desc->more;
+ }
+ return NULL;
+}
+
+static void __rmap_desc_remove_entry(unsigned long *rmapp,
struct kvm_rmap_desc *desc,
int i,
struct kvm_rmap_desc *prev_desc)
@@ -683,18 +683,12 @@ static void rmap_desc_remove_entry(unsigned long *rmapp,
mmu_free_rmap_desc(desc);
}
-static void rmap_remove(struct kvm *kvm, u64 *spte)
+static void __rmap_remove(u64 *spte, unsigned long *rmapp)
{
struct kvm_rmap_desc *desc;
struct kvm_rmap_desc *prev_desc;
- struct kvm_mmu_page *sp;
- gfn_t gfn;
- unsigned long *rmapp;
int i;
- sp = page_header(__pa(spte));
- gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
- rmapp = gfn_to_rmap(kvm, gfn, sp->role.level);
if (!*rmapp) {
printk(KERN_ERR "rmap_remove: %p 0->BUG\n", spte);
BUG();
@@ -712,7 +706,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
while (desc) {
for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i)
if (desc->sptes[i] == spte) {
- rmap_desc_remove_entry(rmapp,
+ __rmap_desc_remove_entry(rmapp,
desc, i,
prev_desc);
return;
@@ -725,6 +719,54 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
}
}
+/*
+ * Take gfn and return the reverse mapping to it.
+ */
+static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
+{
+ struct kvm_memory_slot *slot;
+ struct kvm_lpage_info *linfo;
+
+ slot = gfn_to_memslot(kvm, gfn);
+ if (likely(level == PT_PAGE_TABLE_LEVEL))
+ return &slot->rmap[gfn - slot->base_gfn];
+
+ linfo = lpage_info_slot(gfn, slot, level);
+
+ return &linfo->rmap_pde;
+}
+
+static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
+{
+ struct kvm_mmu_page *sp;
+ unsigned long *rmapp;
+
+ if (!is_rmap_spte(*spte))
+ return 0;
+
+ sp = page_header(__pa(spte));
+ kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
+ rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
+ return __rmap_add(vcpu, spte, rmapp);
+}
+
+static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
+{
+ return __rmap_next(rmapp, spte);
+}
+
+static void rmap_remove(struct kvm *kvm, u64 *spte)
+{
+ struct kvm_mmu_page *sp;
+ gfn_t gfn;
+ unsigned long *rmapp;
+
+ sp = page_header(__pa(spte));
+ gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
+ rmapp = gfn_to_rmap(kvm, gfn, sp->role.level);
+ __rmap_remove(spte, rmapp);
+}
+
static int set_spte_track_bits(u64 *sptep, u64 new_spte)
{
pfn_t pfn;
@@ -752,32 +794,6 @@ static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte)
rmap_remove(kvm, sptep);
}
-static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
-{
- struct kvm_rmap_desc *desc;
- u64 *prev_spte;
- int i;
-
- if (!*rmapp)
- return NULL;
- else if (!(*rmapp & 1)) {
- if (!spte)
- return (u64 *)*rmapp;
- return NULL;
- }
- desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
- prev_spte = NULL;
- while (desc) {
- for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) {
- if (prev_spte == spte)
- return desc->sptes[i];
- prev_spte = desc->sptes[i];
- }
- desc = desc->more;
- }
- return NULL;
-}
-
static int rmap_write_protect(struct kvm *kvm, u64 gfn)
{
unsigned long *rmapp;
--
1.7.4.4
Parent pte rmap and page rmap are very similar, so use the same arithmetic for
them
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 7 +--
arch/x86/kvm/mmu.c | 178 ++++++++++-----------------------------
2 files changed, 45 insertions(+), 140 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d2e5fb8..71c0b86 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -227,14 +227,10 @@ struct kvm_mmu_page {
* in this shadow page.
*/
DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
- bool multimapped; /* More than one parent_pte? */
bool unsync;
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
- union {
- u64 *parent_pte; /* !multimapped */
- struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
- };
+ unsigned long rmap; /* Rmap for parent_pte */
DECLARE_BITMAP(unsync_child_bitmap, 512);
};
@@ -346,7 +342,6 @@ struct kvm_vcpu_arch {
* put it here to avoid allocation */
struct kvm_pv_mmu_op_buffer mmu_op_buffer;
- struct kvm_mmu_memory_cache mmu_pte_chain_cache;
struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8da77b3..b68f42c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -182,8 +182,6 @@ struct kvm_shadow_walk_iterator {
shadow_walk_okay(&(_walker)); \
shadow_walk_next(&(_walker)))
-typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte);
-
static struct kmem_cache *pte_chain_cache;
static struct kmem_cache *rmap_desc_cache;
static struct kmem_cache *mmu_page_header_cache;
@@ -397,12 +395,8 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
{
int r;
- r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_chain_cache,
- pte_chain_cache, 4);
- if (r)
- goto out;
r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
- rmap_desc_cache, 4 + PTE_PREFETCH_NUM);
+ rmap_desc_cache, 8 + PTE_PREFETCH_NUM);
if (r)
goto out;
r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
@@ -416,7 +410,6 @@ out:
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
- mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache);
mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache);
mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache,
@@ -433,17 +426,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
return p;
}
-static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
-{
- return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_chain_cache,
- sizeof(struct kvm_pte_chain));
-}
-
-static void mmu_free_pte_chain(struct kvm_pte_chain *pc)
-{
- kmem_cache_free(pte_chain_cache, pc);
-}
-
static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
{
return mmu_memory_cache_alloc(&vcpu->arch.mmu_rmap_desc_cache,
@@ -634,6 +616,26 @@ static int __rmap_add(struct kvm_vcpu *vcpu, u64 *spte, unsigned long *rmapp)
return count;
}
+typedef void (*__rmap_walk_fn) (u64 *spte);
+static void __rmap_walk(unsigned long *rmapp, __rmap_walk_fn fn)
+{
+ struct kvm_rmap_desc *desc;
+ int i;
+
+ if (!*rmapp)
+ return;
+
+ if (!(*rmapp & 1))
+ return fn((u64 *)*rmapp);
+
+ desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
+ while (desc) {
+ for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i)
+ fn(desc->sptes[i]);
+ desc = desc->more;
+ }
+}
+
static u64 *__rmap_next(unsigned long *rmapp, u64 *spte)
{
struct kvm_rmap_desc *desc;
@@ -1067,134 +1069,52 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn)
return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1);
}
-static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
- u64 *parent_pte, int direct)
-{
- struct kvm_mmu_page *sp;
-
- sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);
- sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
- if (!direct)
- sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache,
- PAGE_SIZE);
- set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
- list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
- bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
- sp->multimapped = 0;
- sp->parent_pte = parent_pte;
- kvm_mod_used_mmu_pages(vcpu->kvm, +1);
- return sp;
-}
-
static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *parent_pte)
{
- struct kvm_pte_chain *pte_chain;
- struct hlist_node *node;
- int i;
-
if (!parent_pte)
return;
- if (!sp->multimapped) {
- u64 *old = sp->parent_pte;
- if (!old) {
- sp->parent_pte = parent_pte;
- return;
- }
- sp->multimapped = 1;
- pte_chain = mmu_alloc_pte_chain(vcpu);
- INIT_HLIST_HEAD(&sp->parent_ptes);
- hlist_add_head(&pte_chain->link, &sp->parent_ptes);
- pte_chain->parent_ptes[0] = old;
- }
- hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) {
- if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1])
- continue;
- for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i)
- if (!pte_chain->parent_ptes[i]) {
- pte_chain->parent_ptes[i] = parent_pte;
- return;
- }
- }
- pte_chain = mmu_alloc_pte_chain(vcpu);
- BUG_ON(!pte_chain);
- hlist_add_head(&pte_chain->link, &sp->parent_ptes);
- pte_chain->parent_ptes[0] = parent_pte;
+ __rmap_add(vcpu, parent_pte, &sp->rmap);
}
static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
u64 *parent_pte)
{
- struct kvm_pte_chain *pte_chain;
- struct hlist_node *node;
- int i;
-
- if (!sp->multimapped) {
- BUG_ON(sp->parent_pte != parent_pte);
- sp->parent_pte = NULL;
- return;
- }
- hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
- for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
- if (!pte_chain->parent_ptes[i])
- break;
- if (pte_chain->parent_ptes[i] != parent_pte)
- continue;
- while (i + 1 < NR_PTE_CHAIN_ENTRIES
- && pte_chain->parent_ptes[i + 1]) {
- pte_chain->parent_ptes[i]
- = pte_chain->parent_ptes[i + 1];
- ++i;
- }
- pte_chain->parent_ptes[i] = NULL;
- if (i == 0) {
- hlist_del(&pte_chain->link);
- mmu_free_pte_chain(pte_chain);
- if (hlist_empty(&sp->parent_ptes)) {
- sp->multimapped = 0;
- sp->parent_pte = NULL;
- }
- }
- return;
- }
- BUG();
+ __rmap_remove(parent_pte, &sp->rmap);
}
-static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)
+static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
+ u64 *parent_pte, int direct)
{
- struct kvm_pte_chain *pte_chain;
- struct hlist_node *node;
- struct kvm_mmu_page *parent_sp;
- int i;
-
- if (!sp->multimapped && sp->parent_pte) {
- parent_sp = page_header(__pa(sp->parent_pte));
- fn(parent_sp, sp->parent_pte);
- return;
- }
-
- hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
- for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
- u64 *spte = pte_chain->parent_ptes[i];
+ struct kvm_mmu_page *sp;
- if (!spte)
- break;
- parent_sp = page_header(__pa(spte));
- fn(parent_sp, spte);
- }
+ sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);
+ sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
+ if (!direct)
+ sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache,
+ PAGE_SIZE);
+ set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
+ list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
+ bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
+ sp->rmap = 0;
+ mmu_page_add_parent_pte(vcpu, sp, parent_pte);
+ kvm_mod_used_mmu_pages(vcpu->kvm, +1);
+ return sp;
}
-static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte);
+static void mark_unsync(u64 *spte);
static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
{
- mmu_parent_walk(sp, mark_unsync);
+ __rmap_walk(&sp->rmap, mark_unsync);
}
-static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte)
+static void mark_unsync(u64 *spte)
{
+ struct kvm_mmu_page *sp;
unsigned int index;
+ sp = page_header(__pa(spte));
index = spte - sp->spt;
if (__test_and_set_bit(index, sp->unsync_child_bitmap))
return;
@@ -1692,17 +1612,7 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
{
u64 *parent_pte;
- while (sp->multimapped || sp->parent_pte) {
- if (!sp->multimapped)
- parent_pte = sp->parent_pte;
- else {
- struct kvm_pte_chain *chain;
-
- chain = container_of(sp->parent_ptes.first,
- struct kvm_pte_chain, link);
- parent_pte = chain->parent_ptes[0];
- }
- BUG_ON(!parent_pte);
+ while ((parent_pte = __rmap_next(&sp->rmap, NULL))) {
kvm_mmu_put_page(sp, parent_pte);
__set_spte(parent_pte, shadow_trap_nonpresent_pte);
}
--
1.7.4.4
Cleanup the same operation between kvm_mmu_page_unlink_children and
mmu_pte_write_zap_pte
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 66 ++++++++++++++++++---------------------------------
1 files changed, 23 insertions(+), 43 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b68f42c..a8def38 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1566,32 +1566,33 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}
}
+static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
+ u64 *spte)
+{
+ u64 pte;
+ struct kvm_mmu_page *child;
+
+ pte = *spte;
+ if (is_shadow_present_pte(pte)) {
+ if (is_last_spte(pte, sp->role.level))
+ drop_spte(kvm, spte, shadow_trap_nonpresent_pte);
+ else {
+ child = page_header(pte & PT64_BASE_ADDR_MASK);
+ mmu_page_remove_parent_pte(child, spte);
+ }
+ }
+ __set_spte(spte, shadow_trap_nonpresent_pte);
+ if (is_large_pte(pte))
+ --kvm->stat.lpages;
+}
+
static void kvm_mmu_page_unlink_children(struct kvm *kvm,
struct kvm_mmu_page *sp)
{
unsigned i;
- u64 *pt;
- u64 ent;
-
- pt = sp->spt;
- for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
- ent = pt[i];
-
- if (is_shadow_present_pte(ent)) {
- if (!is_last_spte(ent, sp->role.level)) {
- ent &= PT64_BASE_ADDR_MASK;
- mmu_page_remove_parent_pte(page_header(ent),
- &pt[i]);
- } else {
- if (is_large_pte(ent))
- --kvm->stat.lpages;
- drop_spte(kvm, &pt[i],
- shadow_trap_nonpresent_pte);
- }
- }
- pt[i] = shadow_trap_nonpresent_pte;
- }
+ for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
+ mmu_page_zap_pte(kvm, sp, sp->spt + i);
}
static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
@@ -3069,27 +3070,6 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_mmu_unload);
-static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp,
- u64 *spte)
-{
- u64 pte;
- struct kvm_mmu_page *child;
-
- pte = *spte;
- if (is_shadow_present_pte(pte)) {
- if (is_last_spte(pte, sp->role.level))
- drop_spte(vcpu->kvm, spte, shadow_trap_nonpresent_pte);
- else {
- child = page_header(pte & PT64_BASE_ADDR_MASK);
- mmu_page_remove_parent_pte(child, spte);
- }
- }
- __set_spte(spte, shadow_trap_nonpresent_pte);
- if (is_large_pte(pte))
- --vcpu->kvm->stat.lpages;
-}
-
static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *spte,
const void *new)
@@ -3271,7 +3251,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
spte = &sp->spt[page_offset / sizeof(*spte)];
while (npte--) {
entry = *spte;
- mmu_pte_write_zap_pte(vcpu, sp, spte);
+ mmu_page_zap_pte(vcpu->kvm, sp, spte);
if (gentry &&
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
& mask.word))
--
1.7.4.4
Introduce drop_parent_pte to remove the rmap of parent pte and
clear parent pte
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 21 ++++++++++++---------
1 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a8def38..458e9bc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1084,6 +1084,13 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
__rmap_remove(parent_pte, &sp->rmap);
}
+static void drop_parent_pte(struct kvm_mmu_page *sp,
+ u64 *parent_pte)
+{
+ mmu_page_remove_parent_pte(sp, parent_pte);
+ __set_spte(parent_pte, shadow_trap_nonpresent_pte);
+}
+
static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
u64 *parent_pte, int direct)
{
@@ -1560,8 +1567,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (child->role.access == direct_access)
return;
- mmu_page_remove_parent_pte(child, sptep);
- __set_spte(sptep, shadow_trap_nonpresent_pte);
+ drop_parent_pte(child, sptep);
kvm_flush_remote_tlbs(vcpu->kvm);
}
}
@@ -1578,7 +1584,7 @@ static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
drop_spte(kvm, spte, shadow_trap_nonpresent_pte);
else {
child = page_header(pte & PT64_BASE_ADDR_MASK);
- mmu_page_remove_parent_pte(child, spte);
+ drop_parent_pte(child, spte);
}
}
__set_spte(spte, shadow_trap_nonpresent_pte);
@@ -1613,10 +1619,8 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
{
u64 *parent_pte;
- while ((parent_pte = __rmap_next(&sp->rmap, NULL))) {
- kvm_mmu_put_page(sp, parent_pte);
- __set_spte(parent_pte, shadow_trap_nonpresent_pte);
- }
+ while ((parent_pte = __rmap_next(&sp->rmap, NULL)))
+ drop_parent_pte(sp, parent_pte);
}
static int mmu_zap_unsync_children(struct kvm *kvm,
@@ -2046,8 +2050,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
u64 pte = *sptep;
child = page_header(pte & PT64_BASE_ADDR_MASK);
- mmu_page_remove_parent_pte(child, sptep);
- __set_spte(sptep, shadow_trap_nonpresent_pte);
+ drop_parent_pte(child, sptep);
kvm_flush_remote_tlbs(vcpu->kvm);
} else if (pfn != spte_to_pfn(*sptep)) {
pgprintk("hfn old %llx new %llx\n",
--
1.7.4.4
On 05/15/2011 12:35 AM, Xiao Guangrong wrote:
> Simply return from kvm_mmu_pte_write path if no shadow page is
> write-protected, then we can avoid to walk all shadow pages and hold
> mmu-lock
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 2841805..971e2d2 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -498,6 +498,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
> linfo = lpage_info_slot(gfn, slot, i);
> linfo->write_count += 1;
> }
> + atomic_inc(&kvm->arch.indirect_shadow_pages);
> }
>
> static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
> @@ -513,6 +514,7 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
> linfo->write_count -= 1;
> WARN_ON(linfo->write_count< 0);
> }
> + atomic_dec(&kvm->arch.indirect_shadow_pages);
> }
These atomic ops are always called from within the spinlock, so we don't
need an atomic_t here.
Sorry, I should have noticed this on the first version.
--
error compiling committee.c: too many arguments to function
On 05/15/2011 12:44 AM, Xiao Guangrong wrote:
> Parent pte rmap and page rmap are very similar, so use the same arithmetic for
> them
>
While they're similar, this is going to cause a lot of confusion because
the name 'rmap' already has a deep meaning, at least for me.
Maybe we can call the abstract structure 'spte_list' and define
operations on that, then implement rmap_* and *_parent_pte as wrappers
around spte_list.
--
error compiling committee.c: too many arguments to function
On 05/15/2011 04:20 PM, Avi Kivity wrote:
> On 05/15/2011 12:35 AM, Xiao Guangrong wrote:
>> Simply return from kvm_mmu_pte_write path if no shadow page is
>> write-protected, then we can avoid to walk all shadow pages and hold
>> mmu-lock
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index 2841805..971e2d2 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -498,6 +498,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
>> linfo = lpage_info_slot(gfn, slot, i);
>> linfo->write_count += 1;
>> }
>> + atomic_inc(&kvm->arch.indirect_shadow_pages);
>> }
>>
>> static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
>> @@ -513,6 +514,7 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
>> linfo->write_count -= 1;
>> WARN_ON(linfo->write_count< 0);
>> }
>> + atomic_dec(&kvm->arch.indirect_shadow_pages);
>> }
>
> These atomic ops are always called from within the spinlock, so we don't need an atomic_t here.
>
> Sorry, I should have noticed this on the first version.
We read indirect_shadow_pages atomically on pte write path, that is allowed out of mmu_lock
On 05/15/2011 11:33 AM, Xiao Guangrong wrote:
> >
> > These atomic ops are always called from within the spinlock, so we don't need an atomic_t here.
> >
> > Sorry, I should have noticed this on the first version.
>
> We read indirect_shadow_pages atomically on pte write path, that is allowed out of mmu_lock
Reading is fine:
#define atomic_read(v) (*(volatile int *)&(v)->counter)
--
error compiling committee.c: too many arguments to function
On 05/15/2011 04:23 PM, Avi Kivity wrote:
> On 05/15/2011 12:44 AM, Xiao Guangrong wrote:
>> Parent pte rmap and page rmap are very similar, so use the same arithmetic for
>> them
>>
>
> While they're similar, this is going to cause a lot of confusion because the name 'rmap' already has a deep meaning, at least for me.
>
> Maybe we can call the abstract structure 'spte_list' and define operations on that, then implement rmap_* and *_parent_pte as wrappers around spte_list.
>
OK, will fix the confused name in next version, thanks!