2011-03-09 07:40:11

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH v2 1/4] KVM: fix rcu usage in init_rmode_* functions

fix:
[ 3494.671786] stack backtrace:
[ 3494.671789] Pid: 10527, comm: qemu-system-x86 Not tainted 2.6.38-rc6+ #23
[ 3494.671790] Call Trace:
[ 3494.671796] [] ? lockdep_rcu_dereference+0x9d/0xa5
[ 3494.671826] [] ? kvm_memslots+0x6b/0x73 [kvm]
[ 3494.671834] [] ? gfn_to_memslot+0x16/0x4f [kvm]
[ 3494.671843] [] ? gfn_to_hva+0x16/0x27 [kvm]
[ 3494.671851] [] ? kvm_write_guest_page+0x31/0x83 [kvm]
[ 3494.671861] [] ? kvm_clear_guest_page+0x1a/0x1c [kvm]
[ 3494.671867] [] ? vmx_set_tss_addr+0x83/0x122 [kvm_intel]

and:
[ 8328.789599] stack backtrace:
[ 8328.789601] Pid: 18736, comm: qemu-system-x86 Not tainted 2.6.38-rc6+ #23
[ 8328.789603] Call Trace:
[ 8328.789609] [] ? lockdep_rcu_dereference+0x9d/0xa5
[ 8328.789621] [] ? kvm_memslots+0x6b/0x73 [kvm]
[ 8328.789628] [] ? gfn_to_memslot+0x16/0x4f [kvm]
[ 8328.789635] [] ? gfn_to_hva+0x16/0x27 [kvm]
[ 8328.789643] [] ? kvm_write_guest_page+0x31/0x83 [kvm]
[ 8328.789699] [] ? kvm_clear_guest_page+0x1a/0x1c [kvm]
[ 8328.789713] [] ? vmx_create_vcpu+0x316/0x3c8 [kvm_intel]

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/vmx.c | 12 ++++++++----
1 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3febb76..d8475a2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2397,11 +2397,12 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)

static int init_rmode_tss(struct kvm *kvm)
{
- gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
+ gfn_t fn;
u16 data = 0;
- int ret = 0;
- int r;
+ int r, idx, ret = 0;

+ idx = srcu_read_lock(&kvm->srcu);
+ fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
if (r < 0)
goto out;
@@ -2425,12 +2426,13 @@ static int init_rmode_tss(struct kvm *kvm)

ret = 1;
out:
+ srcu_read_unlock(&kvm->srcu, idx);
return ret;
}

static int init_rmode_identity_map(struct kvm *kvm)
{
- int i, r, ret;
+ int i, idx, r, ret;
pfn_t identity_map_pfn;
u32 tmp;

@@ -2445,6 +2447,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
return 1;
ret = 0;
identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
+ idx = srcu_read_lock(&kvm->srcu);
r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
if (r < 0)
goto out;
@@ -2460,6 +2463,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
kvm->arch.ept_identity_pagetable_done = true;
ret = 1;
out:
+ srcu_read_unlock(&kvm->srcu, idx);
return ret;
}

--
1.7.4


2011-03-09 07:41:09

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH v2 2/4] KVM: cleanup memslot_id function

We can get memslot id from memslot->id directly

Signed-off-by: Xiao Guangrong <[email protected]>
---
include/linux/kvm_host.h | 6 +++++-
virt/kvm/kvm_main.c | 17 -----------------
2 files changed, 5 insertions(+), 18 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ab42855..57d7092 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -365,7 +365,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
-int memslot_id(struct kvm *kvm, gfn_t gfn);
void kvm_release_pfn_dirty(pfn_t);
void kvm_release_pfn_clean(pfn_t pfn);
void kvm_set_pfn_dirty(pfn_t pfn);
@@ -597,6 +596,11 @@ static inline void kvm_guest_exit(void)
current->flags &= ~PF_VCPU;
}

+static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
+{
+ return gfn_to_memslot(kvm, gfn)->id;
+}
+
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
gfn_t gfn)
{
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1fa0d29..cf90f28 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -997,23 +997,6 @@ out:
return size;
}

-int memslot_id(struct kvm *kvm, gfn_t gfn)
-{
- int i;
- struct kvm_memslots *slots = kvm_memslots(kvm);
- struct kvm_memory_slot *memslot = NULL;
-
- for (i = 0; i < slots->nmemslots; ++i) {
- memslot = &slots->memslots[i];
-
- if (gfn >= memslot->base_gfn
- && gfn < memslot->base_gfn + memslot->npages)
- break;
- }
-
- return memslot - slots->memslots;
-}
-
static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
gfn_t *nr_pages)
{
--
1.7.4

2011-03-09 07:42:06

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH v2 3/4] KVM: MMU: introduce a common function to get no-dirty-logged slot

Cleanup the code of pte_prefetch_gfn_to_memslot and mapping_level_dirty_bitmap

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 37 +++++++++++++++++--------------------
1 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 88d3689..83171fd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -549,13 +549,23 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
return ret;
}

-static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn)
+static struct kvm_memory_slot *
+gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
+ bool no_dirty_log)
{
struct kvm_memory_slot *slot;
- slot = gfn_to_memslot(vcpu->kvm, large_gfn);
- if (slot && slot->dirty_bitmap)
- return true;
- return false;
+
+ slot = gfn_to_memslot(vcpu->kvm, gfn);
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID ||
+ (no_dirty_log && slot->dirty_bitmap))
+ slot = NULL;
+
+ return slot;
+}
+
+static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn)
+{
+ return gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true);
}

static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
@@ -2145,26 +2155,13 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}

-static struct kvm_memory_slot *
-pte_prefetch_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log)
-{
- struct kvm_memory_slot *slot;
-
- slot = gfn_to_memslot(vcpu->kvm, gfn);
- if (!slot || slot->flags & KVM_MEMSLOT_INVALID ||
- (no_dirty_log && slot->dirty_bitmap))
- slot = NULL;
-
- return slot;
-}
-
static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
bool no_dirty_log)
{
struct kvm_memory_slot *slot;
unsigned long hva;

- slot = pte_prefetch_gfn_to_memslot(vcpu, gfn, no_dirty_log);
+ slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
if (!slot) {
get_page(bad_page);
return page_to_pfn(bad_page);
@@ -2185,7 +2182,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
gfn_t gfn;

gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt);
- if (!pte_prefetch_gfn_to_memslot(vcpu, gfn, access & ACC_WRITE_MASK))
+ if (!gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK))
return -1;

ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start);
--
1.7.4

2011-03-09 07:42:56

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH v2 4/4] KVM: MMU: cleanup pte write path

This patch does:
- call vcpu->arch.mmu.update_pte directly
- use gfn_to_pfn_atomic in update_pte path

The suggestion is from Avi.

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 7 +---
arch/x86/kvm/mmu.c | 69 +++++++++++++--------------------------
arch/x86/kvm/paging_tmpl.h | 12 ++++---
3 files changed, 32 insertions(+), 56 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f08314f..c8af099 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -255,6 +255,8 @@ struct kvm_mmu {
int (*sync_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
+ void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ u64 *spte, const void *pte, unsigned long mmu_seq);
hpa_t root_hpa;
int root_level;
int shadow_root_level;
@@ -335,11 +337,6 @@ struct kvm_vcpu_arch {
u64 *last_pte_updated;
gfn_t last_pte_gfn;

- struct {
- pfn_t pfn; /* pfn corresponding to that gfn */
- unsigned long mmu_seq;
- } update_pte;
-
struct fpu guest_fpu;
u64 xcr0;

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 83171fd..22fae75 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1204,6 +1204,13 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
}

+static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *sp, u64 *spte,
+ const void *pte, unsigned long mmu_seq)
+{
+ WARN_ON(1);
+}
+
#define KVM_PAGE_ARRAY_NR 16

struct kvm_mmu_pages {
@@ -2796,6 +2803,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu,
context->prefetch_page = nonpaging_prefetch_page;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
+ context->update_pte = nonpaging_update_pte;
context->root_level = 0;
context->shadow_root_level = PT32E_ROOT_LEVEL;
context->root_hpa = INVALID_PAGE;
@@ -2925,6 +2933,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
context->prefetch_page = paging64_prefetch_page;
context->sync_page = paging64_sync_page;
context->invlpg = paging64_invlpg;
+ context->update_pte = paging64_update_pte;
context->free = paging_free;
context->root_level = level;
context->shadow_root_level = level;
@@ -2953,6 +2962,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
context->prefetch_page = paging32_prefetch_page;
context->sync_page = paging32_sync_page;
context->invlpg = paging32_invlpg;
+ context->update_pte = paging32_update_pte;
context->root_level = PT32_ROOT_LEVEL;
context->shadow_root_level = PT32E_ROOT_LEVEL;
context->root_hpa = INVALID_PAGE;
@@ -2977,6 +2987,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->prefetch_page = nonpaging_prefetch_page;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
+ context->update_pte = nonpaging_update_pte;
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
context->root_hpa = INVALID_PAGE;
context->direct_map = true;
@@ -3081,8 +3092,6 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)

static int init_kvm_mmu(struct kvm_vcpu *vcpu)
{
- vcpu->arch.update_pte.pfn = bad_pfn;
-
if (mmu_is_nested(vcpu))
return init_kvm_nested_mmu(vcpu);
else if (tdp_enabled)
@@ -3156,7 +3165,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp,
u64 *spte,
- const void *new)
+ const void *new, unsigned long mmu_seq)
{
if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
++vcpu->kvm->stat.mmu_pde_zapped;
@@ -3164,10 +3173,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
}

++vcpu->kvm->stat.mmu_pte_updated;
- if (!sp->role.cr4_pae)
- paging32_update_pte(vcpu, sp, spte, new);
- else
- paging64_update_pte(vcpu, sp, spte, new);
+ vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq);
}

static bool need_remote_flush(u64 old, u64 new)
@@ -3202,27 +3208,6 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
return !!(spte && (*spte & shadow_accessed_mask));
}

-static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- u64 gpte)
-{
- gfn_t gfn;
- pfn_t pfn;
-
- if (!is_present_gpte(gpte))
- return;
- gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
-
- vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
- smp_rmb();
- pfn = gfn_to_pfn(vcpu->kvm, gfn);
-
- if (is_error_pfn(pfn)) {
- kvm_release_pfn_clean(pfn);
- return;
- }
- vcpu->arch.update_pte.pfn = pfn;
-}
-
static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
{
u64 *spte = vcpu->arch.last_pte_updated;
@@ -3244,21 +3229,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
struct kvm_mmu_page *sp;
struct hlist_node *node;
LIST_HEAD(invalid_list);
- u64 entry, gentry;
- u64 *spte;
- unsigned offset = offset_in_page(gpa);
- unsigned pte_size;
- unsigned page_offset;
- unsigned misaligned;
- unsigned quadrant;
- int level;
- int flooded = 0;
- int npte;
- int r;
- int invlpg_counter;
+ unsigned long mmu_seq;
+ u64 entry, gentry, *spte;
+ unsigned pte_size, page_offset, misaligned, quadrant, offset;
+ int level, npte, invlpg_counter, r, flooded = 0;
bool remote_flush, local_flush, zap_page;

zap_page = remote_flush = local_flush = false;
+ offset = offset_in_page(gpa);

pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);

@@ -3293,7 +3271,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
break;
}

- mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
+ mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ smp_rmb();
+
spin_lock(&vcpu->kvm->mmu_lock);
if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
gentry = 0;
@@ -3365,7 +3345,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if (gentry &&
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
& mask.word))
- mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
+ mmu_pte_write_new_pte(vcpu, sp, spte, &gentry,
+ mmu_seq);
if (!remote_flush && need_remote_flush(entry, *spte))
remote_flush = true;
++spte;
@@ -3375,10 +3356,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
spin_unlock(&vcpu->kvm->mmu_lock);
- if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
- kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
- vcpu->arch.update_pte.pfn = bad_pfn;
- }
}

int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 86eb816..7514050 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -325,7 +325,7 @@ no_present:
}

static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- u64 *spte, const void *pte)
+ u64 *spte, const void *pte, unsigned long mmu_seq)
{
pt_element_t gpte;
unsigned pte_access;
@@ -337,12 +337,14 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,

pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
- pfn = vcpu->arch.update_pte.pfn;
- if (is_error_pfn(pfn))
+ pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
+ if (is_error_pfn(pfn)) {
+ kvm_release_pfn_clean(pfn);
return;
- if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
+ }
+ if (mmu_notifier_retry(vcpu, mmu_seq))
return;
- kvm_get_pfn(pfn);
+
/*
* we call mmu_set_spte() with host_writable = true beacuse that
* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
--
1.7.4

2011-03-10 10:22:04

by Avi Kivity

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] KVM: MMU: cleanup pte write path

On 03/09/2011 09:43 AM, Xiao Guangrong wrote:
> This patch does:
> - call vcpu->arch.mmu.update_pte directly
> - use gfn_to_pfn_atomic in update_pte path
>
> The suggestion is from Avi.
>
>
>
> - mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
> + mmu_seq = vcpu->kvm->mmu_notifier_seq;
> + smp_rmb();

smp_rmb() should come before, no? but the problem was present in the
original code, too.

> +
> spin_lock(&vcpu->kvm->mmu_lock);
> if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
> gentry = 0;
> @@ -3365,7 +3345,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
> if (gentry&&
> !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
> & mask.word))
> - mmu_pte_write_new_pte(vcpu, sp, spte,&gentry);
> + mmu_pte_write_new_pte(vcpu, sp, spte,&gentry,
> + mmu_seq);

Okay, we're only iterating over indirect pages, so we won't call
nonpaging_update_pte().

--
error compiling committee.c: too many arguments to function

2011-03-10 10:22:47

by Avi Kivity

[permalink] [raw]
Subject: Re: [PATCH v2 1/4] KVM: fix rcu usage in init_rmode_* functions

On 03/09/2011 09:41 AM, Xiao Guangrong wrote:
> fix:
> [ 3494.671786] stack backtrace:
> [ 3494.671789] Pid: 10527, comm: qemu-system-x86 Not tainted 2.6.38-rc6+ #23
> [ 3494.671790] Call Trace:
> [ 3494.671796] [] ? lockdep_rcu_dereference+0x9d/0xa5
> [ 3494.671826] [] ? kvm_memslots+0x6b/0x73 [kvm]
> [ 3494.671834] [] ? gfn_to_memslot+0x16/0x4f [kvm]
> [ 3494.671843] [] ? gfn_to_hva+0x16/0x27 [kvm]
> [ 3494.671851] [] ? kvm_write_guest_page+0x31/0x83 [kvm]
> [ 3494.671861] [] ? kvm_clear_guest_page+0x1a/0x1c [kvm]
> [ 3494.671867] [] ? vmx_set_tss_addr+0x83/0x122 [kvm_intel]
>
> and:
> [ 8328.789599] stack backtrace:
> [ 8328.789601] Pid: 18736, comm: qemu-system-x86 Not tainted 2.6.38-rc6+ #23
> [ 8328.789603] Call Trace:
> [ 8328.789609] [] ? lockdep_rcu_dereference+0x9d/0xa5
> [ 8328.789621] [] ? kvm_memslots+0x6b/0x73 [kvm]
> [ 8328.789628] [] ? gfn_to_memslot+0x16/0x4f [kvm]
> [ 8328.789635] [] ? gfn_to_hva+0x16/0x27 [kvm]
> [ 8328.789643] [] ? kvm_write_guest_page+0x31/0x83 [kvm]
> [ 8328.789699] [] ? kvm_clear_guest_page+0x1a/0x1c [kvm]
> [ 8328.789713] [] ? vmx_create_vcpu+0x316/0x3c8 [kvm_intel]
>

Applied all four patches, thanks.

--
error compiling committee.c: too many arguments to function

2011-03-11 03:40:36

by Xiao Guangrong

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] KVM: MMU: cleanup pte write path

On 03/10/2011 06:21 PM, Avi Kivity wrote:
> On 03/09/2011 09:43 AM, Xiao Guangrong wrote:
>> This patch does:
>> - call vcpu->arch.mmu.update_pte directly
>> - use gfn_to_pfn_atomic in update_pte path
>>
>> The suggestion is from Avi.
>>
>>
>>
>> - mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
>> + mmu_seq = vcpu->kvm->mmu_notifier_seq;
>> + smp_rmb();
>
> smp_rmb() should come before, no? but the problem was present in the original code, too.
>

Um, i think smb_rmb is used to avoid read mmu_notifier_seq reorder to the
behind of gfn_to_pfn in the original code, like this:


CPU A: B

gfn_to_pfn
invalidate_page
mmu_notifier_seq++

read mmu_notifier_seq

then, cpu A will get the invalid pfn.

But, after this cleanup patch, we use gfn_to_pfn_atomic in the protection
of mmu_lock, so i think the mmu_seq code can be removed.


Subject: [PATCH] KVM: MMU: remove mmu_seq verification in kvm_mmu_pte_write

The mmu_seq verification can be removed since we get the pfn in the
protection of mmu_lock

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/mmu.c | 16 +++++-----------
arch/x86/kvm/paging_tmpl.h | 4 +---
3 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c8af099..18a95e9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -256,7 +256,7 @@ struct kvm_mmu {
struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- u64 *spte, const void *pte, unsigned long mmu_seq);
+ u64 *spte, const void *pte);
hpa_t root_hpa;
int root_level;
int shadow_root_level;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 22fae75..2841805 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)

static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *spte,
- const void *pte, unsigned long mmu_seq)
+ const void *pte)
{
WARN_ON(1);
}
@@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
}

static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp,
- u64 *spte,
- const void *new, unsigned long mmu_seq)
+ struct kvm_mmu_page *sp, u64 *spte,
+ const void *new)
{
if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
++vcpu->kvm->stat.mmu_pde_zapped;
@@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
}

++vcpu->kvm->stat.mmu_pte_updated;
- vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq);
+ vcpu->arch.mmu.update_pte(vcpu, sp, spte, new);
}

static bool need_remote_flush(u64 old, u64 new)
@@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
struct kvm_mmu_page *sp;
struct hlist_node *node;
LIST_HEAD(invalid_list);
- unsigned long mmu_seq;
u64 entry, gentry, *spte;
unsigned pte_size, page_offset, misaligned, quadrant, offset;
int level, npte, invlpg_counter, r, flooded = 0;
@@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
break;
}

- mmu_seq = vcpu->kvm->mmu_notifier_seq;
- smp_rmb();
-
spin_lock(&vcpu->kvm->mmu_lock);
if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
gentry = 0;
@@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if (gentry &&
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
& mask.word))
- mmu_pte_write_new_pte(vcpu, sp, spte, &gentry,
- mmu_seq);
+ mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
if (!remote_flush && need_remote_flush(entry, *spte))
remote_flush = true;
++spte;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7514050..3dee563 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -325,7 +325,7 @@ no_present:
}

static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- u64 *spte, const void *pte, unsigned long mmu_seq)
+ u64 *spte, const void *pte)
{
pt_element_t gpte;
unsigned pte_access;
@@ -342,8 +342,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
kvm_release_pfn_clean(pfn);
return;
}
- if (mmu_notifier_retry(vcpu, mmu_seq))
- return;

/*
* we call mmu_set_spte() with host_writable = true beacuse that
--
1.7.4