2010-08-28 11:14:21

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH 0/5] KVM: MMU: some bugfix for mmu audit code

There are some bugfix patches for mmu audit code:

[PATCH 1/5] KVM: MMU: fix compile warning in audit code
[PATCH 2/5] KVM: MMU: check rmap for every spte
[PATCH 3/5] KVM: MMU: fix wrong not write protected sp report
[PATCH 4/5] KVM: MMU: rewrite audit_mappings_page() function
[PATCH 5/5] KVM: MMU: remove count_rmaps()


2010-08-28 11:15:52

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH 1/5] KVM: MMU: fix compile warning in audit code

fix:

arch/x86/kvm/mmu.c: In function ‘kvm_mmu_unprotect_page’:
arch/x86/kvm/mmu.c:1741: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 3 has type ‘gfn_t’
arch/x86/kvm/mmu.c:1745: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 3 has type ‘gfn_t’
arch/x86/kvm/mmu.c: In function ‘mmu_unshadow’:
arch/x86/kvm/mmu.c:1761: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 3 has type ‘gfn_t’
arch/x86/kvm/mmu.c: In function ‘set_spte’:
arch/x86/kvm/mmu.c:2005: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 3 has type ‘gfn_t’
arch/x86/kvm/mmu.c: In function ‘mmu_set_spte’:
arch/x86/kvm/mmu.c:2033: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 7 has type ‘gfn_t’

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 20 ++++++++++----------
1 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b0037a7..59bf1d9 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1738,11 +1738,11 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
LIST_HEAD(invalid_list);
int r;

- pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
+ pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;

for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
- pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
+ pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
@@ -1758,7 +1758,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
LIST_HEAD(invalid_list);

for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
- pgprintk("%s: zap %lx %x\n",
+ pgprintk("%s: zap %llx %x\n",
__func__, gfn, sp->role.word);
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
}
@@ -2002,7 +2002,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
goto set_pte;

if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
- pgprintk("%s: found shadow page for %lx, marking ro\n",
+ pgprintk("%s: found shadow page for %llx, marking ro\n",
__func__, gfn);
ret = 1;
pte_access &= ~ACC_WRITE_MASK;
@@ -2031,7 +2031,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
int rmap_count;

pgprintk("%s: spte %llx access %x write_fault %d"
- " user_fault %d gfn %lx\n",
+ " user_fault %d gfn %llx\n",
__func__, *sptep, pt_access,
write_fault, user_fault, gfn);

@@ -2050,7 +2050,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
__set_spte(sptep, shadow_trap_nonpresent_pte);
kvm_flush_remote_tlbs(vcpu->kvm);
} else if (pfn != spte_to_pfn(*sptep)) {
- pgprintk("hfn old %lx new %lx\n",
+ pgprintk("hfn old %llx new %llx\n",
spte_to_pfn(*sptep), pfn);
drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
kvm_flush_remote_tlbs(vcpu->kvm);
@@ -2067,7 +2067,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}

pgprintk("%s: setting spte %llx\n", __func__, *sptep);
- pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n",
+ pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
is_large_pte(*sptep)? "2MB" : "4kB",
*sptep & PT_PRESENT_MASK ?"RW":"R", gfn,
*sptep, sptep);
@@ -3651,9 +3651,9 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
if (!gfn_to_memslot(kvm, gfn)) {
if (!printk_ratelimit())
return;
- printk(KERN_ERR "%s: no memslot for gfn %ld\n",
+ printk(KERN_ERR "%s: no memslot for gfn %llx\n",
audit_msg, gfn);
- printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n",
+ printk(KERN_ERR "%s: index %ld of sp (gfn=%llx)\n",
audit_msg, (long int)(sptep - rev_sp->spt),
rev_sp->gfn);
dump_stack();
@@ -3728,7 +3728,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
while (spte) {
if (is_writable_pte(*spte))
printk(KERN_ERR "%s: (%s) shadow page has "
- "writable mappings: gfn %lx role %x\n",
+ "writable mappings: gfn %llx role %x\n",
__func__, audit_msg, sp->gfn,
sp->role.word);
spte = rmap_next(vcpu->kvm, rmapp, spte);
--
1.7.0.4

2010-08-28 11:16:48

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH 2/5] KVM: MMU: check rmap for every spte

The read-only spte also has reverse mapping, so fix the code to check them,
also modify the function name to fit its doing

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 57 +++++++++++++++++++++++----------------------------
1 files changed, 26 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 59bf1d9..1c784b9 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3644,40 +3644,38 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
struct kvm_mmu_page *rev_sp;
gfn_t gfn;

- if (is_writable_pte(*sptep)) {
- rev_sp = page_header(__pa(sptep));
- gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);

- if (!gfn_to_memslot(kvm, gfn)) {
- if (!printk_ratelimit())
- return;
- printk(KERN_ERR "%s: no memslot for gfn %llx\n",
- audit_msg, gfn);
- printk(KERN_ERR "%s: index %ld of sp (gfn=%llx)\n",
- audit_msg, (long int)(sptep - rev_sp->spt),
- rev_sp->gfn);
- dump_stack();
- return;
- }
+ rev_sp = page_header(__pa(sptep));
+ gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);

- rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
- if (!*rmapp) {
- if (!printk_ratelimit())
- return;
- printk(KERN_ERR "%s: no rmap for writable spte %llx\n",
- audit_msg, *sptep);
- dump_stack();
- }
+ if (!gfn_to_memslot(kvm, gfn)) {
+ if (!printk_ratelimit())
+ return;
+ printk(KERN_ERR "%s: no memslot for gfn %llx\n",
+ audit_msg, gfn);
+ printk(KERN_ERR "%s: index %ld of sp (gfn=%llx)\n",
+ audit_msg, (long int)(sptep - rev_sp->spt),
+ rev_sp->gfn);
+ dump_stack();
+ return;
}

+ rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
+ if (!*rmapp) {
+ if (!printk_ratelimit())
+ return;
+ printk(KERN_ERR "%s: no rmap for writable spte %llx\n",
+ audit_msg, *sptep);
+ dump_stack();
+ }
}

-void audit_writable_sptes_have_rmaps(struct kvm_vcpu *vcpu)
+void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu)
{
mmu_spte_walk(vcpu, inspect_spte_has_rmap);
}

-static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu)
+static void check_mappings_rmap(struct kvm_vcpu *vcpu)
{
struct kvm_mmu_page *sp;
int i;
@@ -3689,12 +3687,9 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu)
continue;

for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
- u64 ent = pt[i];
-
- if (!(ent & PT_PRESENT_MASK))
- continue;
- if (!is_writable_pte(ent))
+ if (!is_rmap_spte(pt[i]))
continue;
+
inspect_spte_has_rmap(vcpu->kvm, &pt[i]);
}
}
@@ -3703,7 +3698,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu)

static void audit_rmap(struct kvm_vcpu *vcpu)
{
- check_writable_mappings_rmap(vcpu);
+ check_mappings_rmap(vcpu);
count_rmaps(vcpu);
}

@@ -3746,7 +3741,7 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
audit_write_protection(vcpu);
if (strcmp("pre pte write", audit_msg) != 0)
audit_mappings(vcpu);
- audit_writable_sptes_have_rmaps(vcpu);
+ audit_sptes_have_rmaps(vcpu);
dbg = olddbg;
}

--
1.7.0.4

2010-08-28 11:18:42

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH 3/5] KVM: MMU: fix wrong not write protected sp report

The audit code reports some sp not write protected in current code, it's just the
bug in audit_write_protection(), since:

- the invalid sp not need write protected
- using uninitialize local variable('gfn')
- call kvm_mmu_audit() out of mmu_lock's protection

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 5 +++--
arch/x86/kvm/paging_tmpl.h | 3 ++-
2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1c784b9..68575dc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3708,16 +3708,17 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
struct kvm_memory_slot *slot;
unsigned long *rmapp;
u64 *spte;
- gfn_t gfn;

list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
if (sp->role.direct)
continue;
if (sp->unsync)
continue;
+ if (sp->role.invalid)
+ continue;

slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
- rmapp = &slot->rmap[gfn - slot->base_gfn];
+ rmapp = &slot->rmap[sp->gfn - slot->base_gfn];

spte = rmap_next(vcpu->kvm, rmapp, NULL);
while (spte) {
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a4e8389..a0f2feb 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -504,7 +504,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
unsigned long mmu_seq;

pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
- kvm_mmu_audit(vcpu, "pre page fault");

r = mmu_topup_memory_caches(vcpu);
if (r)
@@ -542,6 +541,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock;
+
+ kvm_mmu_audit(vcpu, "pre page fault");
kvm_mmu_free_some_pages(vcpu);
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
level, &write_pt, pfn);
--
1.7.0.4

2010-08-28 11:20:18

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH 4/5] KVM: MMU: rewrite audit_mappings_page() function

There is a bugs in this function, we call gfn_to_pfn() and kvm_mmu_gva_to_gpa_read() in
atomic context(kvm_mmu_audit() is called under the spinlock(mmu_lock)'s protection).

This patch fix it by:
- introduce gfn_to_pfn_atomic instead of gfn_to_pfn
- get the mapping gfn from kvm_mmu_page_get_gfn()

And it adds 'notrap' ptes check in unsync/direct sps

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 75 ++++++++++++++++++++++++---------------------
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 15 ++++++++-
3 files changed, 54 insertions(+), 37 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 68575dc..0d91f60 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3487,15 +3487,6 @@ EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);

static const char *audit_msg;

-static gva_t canonicalize(gva_t gva)
-{
-#ifdef CONFIG_X86_64
- gva = (long long)(gva << 16) >> 16;
-#endif
- return gva;
-}
-
-
typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep);

static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp,
@@ -3550,39 +3541,53 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));

for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
- u64 ent = pt[i];
+ u64 *sptep = pt + i;
+ struct kvm_mmu_page *sp;
+ gfn_t gfn;
+ pfn_t pfn;
+ hpa_t hpa;

- if (ent == shadow_trap_nonpresent_pte)
- continue;
+ sp = page_header(__pa(sptep));

- va = canonicalize(va);
- if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
- audit_mappings_page(vcpu, ent, va, level - 1);
- else {
- gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL);
- gfn_t gfn = gpa >> PAGE_SHIFT;
- pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
- hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
+ if (sp->unsync) {
+ if (level != PT_PAGE_TABLE_LEVEL) {
+ printk(KERN_ERR "audit: (%s) error: unsync sp: %p level = %d\n",
+ audit_msg, sp, level);
+ return;
+ }

- if (is_error_pfn(pfn)) {
- kvm_release_pfn_clean(pfn);
- continue;
+ if (*sptep == shadow_notrap_nonpresent_pte) {
+ printk(KERN_ERR "audit: (%s) error: notrap spte in unsync sp: %p\n",
+ audit_msg, sp);
+ return;
}
+ }

- if (is_shadow_present_pte(ent)
- && (ent & PT64_BASE_ADDR_MASK) != hpa)
- printk(KERN_ERR "xx audit error: (%s) levels %d"
- " gva %lx gpa %llx hpa %llx ent %llx %d\n",
- audit_msg, vcpu->arch.mmu.root_level,
- va, gpa, hpa, ent,
- is_shadow_present_pte(ent));
- else if (ent == shadow_notrap_nonpresent_pte
- && !is_error_hpa(hpa))
- printk(KERN_ERR "audit: (%s) notrap shadow,"
- " valid guest gva %lx\n", audit_msg, va);
- kvm_release_pfn_clean(pfn);
+ if (sp->role.direct && *sptep == shadow_notrap_nonpresent_pte) {
+ printk(KERN_ERR "audit: (%s) error: notrap spte in direct sp: %p\n",
+ audit_msg, sp);
+ return;
+ }
+
+ if (!is_shadow_present_pte(*sptep) ||
+ !is_last_spte(*sptep, level))
+ return;
+
+ gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
+ pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);

+ if (is_error_pfn(pfn)) {
+ kvm_release_pfn_clean(pfn);
+ return;
}
+
+ hpa = pfn << PAGE_SHIFT;
+
+ if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
+ printk(KERN_ERR "xx audit error: (%s) levels %d"
+ " gva %lx pfn %llx hpa %llx ent %llxn",
+ audit_msg, vcpu->arch.mmu.root_level,
+ va, pfn, hpa, *sptep);
}
}

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b837ec8..f2ecdd5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -300,6 +300,7 @@ void kvm_set_page_dirty(struct page *page);
void kvm_set_page_accessed(struct page *page);

pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
+pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d08b740..9a73b98 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -999,7 +999,7 @@ pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
}
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);

-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic)
{
unsigned long addr;

@@ -1009,7 +1009,18 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
return page_to_pfn(bad_page);
}

- return hva_to_pfn(kvm, addr, false);
+ return hva_to_pfn(kvm, addr, atomic);
+}
+
+pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
+{
+ return __gfn_to_pfn(kvm, gfn, true);
+}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
+
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+{
+ return __gfn_to_pfn(kvm, gfn, false);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn);

--
1.7.0.4

2010-08-28 11:21:08

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH 5/5] KVM: MMU: remove count_rmaps()

Nothing is checked in count_rmaps(), so remove it

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 38 --------------------------------------
1 files changed, 0 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0d91f60..0bff4d5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3606,43 +3606,6 @@ static void audit_mappings(struct kvm_vcpu *vcpu)
2);
}

-static int count_rmaps(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = vcpu->kvm;
- struct kvm_memslots *slots;
- int nmaps = 0;
- int i, j, k, idx;
-
- idx = srcu_read_lock(&kvm->srcu);
- slots = kvm_memslots(kvm);
- for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
- struct kvm_memory_slot *m = &slots->memslots[i];
- struct kvm_rmap_desc *d;
-
- for (j = 0; j < m->npages; ++j) {
- unsigned long *rmapp = &m->rmap[j];
-
- if (!*rmapp)
- continue;
- if (!(*rmapp & 1)) {
- ++nmaps;
- continue;
- }
- d = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
- while (d) {
- for (k = 0; k < RMAP_EXT; ++k)
- if (d->sptes[k])
- ++nmaps;
- else
- break;
- d = d->more;
- }
- }
- }
- srcu_read_unlock(&kvm->srcu, idx);
- return nmaps;
-}
-
void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
{
unsigned long *rmapp;
@@ -3704,7 +3667,6 @@ static void check_mappings_rmap(struct kvm_vcpu *vcpu)
static void audit_rmap(struct kvm_vcpu *vcpu)
{
check_mappings_rmap(vcpu);
- count_rmaps(vcpu);
}

static void audit_write_protection(struct kvm_vcpu *vcpu)
--
1.7.0.4

2010-08-29 09:08:32

by Avi Kivity

[permalink] [raw]
Subject: Re: [PATCH 0/5] KVM: MMU: some bugfix for mmu audit code

On 08/28/2010 02:18 PM, Xiao Guangrong wrote:
> There are some bugfix patches for mmu audit code:
>
> [PATCH 1/5] KVM: MMU: fix compile warning in audit code
> [PATCH 2/5] KVM: MMU: check rmap for every spte
> [PATCH 3/5] KVM: MMU: fix wrong not write protected sp report
> [PATCH 4/5] KVM: MMU: rewrite audit_mappings_page() function
> [PATCH 5/5] KVM: MMU: remove count_rmaps()

Applied, thanks.

--
error compiling committee.c: too many arguments to function