2010-07-13 09:46:50

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH 1/4] KVM: MMU: fix forgot reserved bits check in speculative path

In the speculative path, we should check guest pte's reserved bits just as
the real processor does

Reported-by: Marcelo Tosatti <[email protected]>
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 8 ++++++++
arch/x86/kvm/paging_tmpl.h | 5 +++--
2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b93b94f..9fc1524 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2783,6 +2783,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
break;
}

+ if (is_rsvd_bits_set(vcpu, gentry, PT_PAGE_TABLE_LEVEL))
+ gentry = 0;
+
mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
spin_lock(&vcpu->kvm->mmu_lock);
if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
@@ -2851,6 +2854,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
while (npte--) {
entry = *spte;
mmu_pte_write_zap_pte(vcpu, sp, spte);
+
+ if (!!is_pae(vcpu) != sp->role.cr4_pae ||
+ is_nx(vcpu) != sp->role.nxe)
+ continue;
+
if (gentry)
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
if (!remote_flush && need_remote_flush(entry, *spte))
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6daeacf..d32484f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -640,8 +640,9 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
return -EINVAL;

gfn = gpte_to_gfn(gpte);
- if (gfn != sp->gfns[i] ||
- !is_present_gpte(gpte) || !(gpte & PT_ACCESSED_MASK)) {
+ if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL) ||
+ gfn != sp->gfns[i] || !is_present_gpte(gpte) ||
+ !(gpte & PT_ACCESSED_MASK)) {
u64 nonpresent;

if (is_present_gpte(gpte) || !clear_unsync)
--
1.6.1.2


2010-07-13 09:47:59

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH 2/4] KVM: MMU: cleanup spte update path

Introduce set_spte_atomic() helper function to cleanup current code

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 37 +++++++++++++++++++------------------
1 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9fc1524..67dbafa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -303,6 +303,22 @@ static u64 __xchg_spte(u64 *sptep, u64 new_spte)
#endif
}

+static void set_spte_atomic(u64 *sptep, u64 new_spte)
+{
+ pfn_t pfn;
+ u64 old_spte;
+
+ old_spte = __xchg_spte(sptep, new_spte);
+ if (!is_rmap_spte(old_spte))
+ return;
+
+ pfn = spte_to_pfn(old_spte);
+ if (old_spte & shadow_accessed_mask)
+ kvm_set_pfn_accessed(pfn);
+ if (is_writable_pte(old_spte))
+ kvm_set_pfn_dirty(pfn);
+}
+
static void update_spte(u64 *sptep, u64 new_spte)
{
u64 old_spte;
@@ -680,17 +696,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)

static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte)
{
- pfn_t pfn;
- u64 old_spte;
-
- old_spte = __xchg_spte(sptep, new_spte);
- if (!is_rmap_spte(old_spte))
- return;
- pfn = spte_to_pfn(old_spte);
- if (old_spte & shadow_accessed_mask)
- kvm_set_pfn_accessed(pfn);
- if (is_writable_pte(old_spte))
- kvm_set_pfn_dirty(pfn);
+ set_spte_atomic(sptep, new_spte);
rmap_remove(kvm, sptep);
}

@@ -790,7 +796,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long data)
{
int need_flush = 0;
- u64 *spte, new_spte, old_spte;
+ u64 *spte, new_spte;
pte_t *ptep = (pte_t *)data;
pfn_t new_pfn;

@@ -811,12 +817,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
new_spte &= ~PT_WRITABLE_MASK;
new_spte &= ~SPTE_HOST_WRITEABLE;
new_spte &= ~shadow_accessed_mask;
- if (is_writable_pte(*spte))
- kvm_set_pfn_dirty(spte_to_pfn(*spte));
- old_spte = __xchg_spte(spte, new_spte);
- if (is_shadow_present_pte(old_spte)
- && (old_spte & shadow_accessed_mask))
- mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte)));
+ set_spte_atomic(spte, new_spte);
spte = rmap_next(kvm, rmapp, spte);
}
}
--
1.6.1.2

2010-07-13 09:49:27

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH 3/4] KVM: MMU: track dirty page in speculative path properly

In speculative path, the page is not real write-access, no need mark it
dirty, so clear dirty bit in this path and later examine this bit when
we release the page

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 24 +++++++++++-------------
1 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 67dbafa..5e9d4a0 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -315,21 +315,19 @@ static void set_spte_atomic(u64 *sptep, u64 new_spte)
pfn = spte_to_pfn(old_spte);
if (old_spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn);
- if (is_writable_pte(old_spte))
+
+ if ((shadow_dirty_mask && (old_spte & shadow_dirty_mask)) ||
+ (!shadow_dirty_mask && is_writable_pte(old_spte)))
kvm_set_pfn_dirty(pfn);
}

static void update_spte(u64 *sptep, u64 new_spte)
{
- u64 old_spte;
-
- if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask)) {
+ if ((!shadow_accessed_mask || (new_spte & shadow_accessed_mask)) &&
+ (!shadow_dirty_mask || (new_spte & shadow_dirty_mask)))
__set_spte(sptep, new_spte);
- } else {
- old_spte = __xchg_spte(sptep, new_spte);
- if (old_spte & shadow_accessed_mask)
- mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte)));
- }
+ else
+ set_spte_atomic(sptep, new_spte);
}

static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -745,7 +743,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
}
spte = rmap_next(kvm, rmapp, spte);
}
- if (write_protected) {
+ if (!shadow_dirty_mask && write_protected) {
pfn_t pfn;

spte = rmap_next(kvm, rmapp, NULL);
@@ -1879,9 +1877,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
* whether the guest actually used the pte (in order to detect
* demand paging).
*/
- spte = shadow_base_present_pte | shadow_dirty_mask;
+ spte = shadow_base_present_pte;
if (!speculative)
- spte |= shadow_accessed_mask;
+ spte |= shadow_accessed_mask | shadow_dirty_mask;
if (!dirty)
pte_access &= ~ACC_WRITE_MASK;
if (pte_access & ACC_EXEC_MASK)
@@ -2007,7 +2005,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (rmap_count > RMAP_RECYCLE_THRESHOLD)
rmap_recycle(vcpu, sptep, gfn);
} else {
- if (was_writable)
+ if (!shadow_dirty_mask && was_writable)
kvm_release_pfn_dirty(pfn);
else
kvm_release_pfn_clean(pfn);
--
1.6.1.2

2010-07-13 09:50:53

by Xiao Guangrong

[permalink] [raw]
Subject: [PATCH 4/4] KVM: MMU: remove valueless output message

After commit 53383eaad08d, the '*spte' has updated before call
rmap_remove()(in most case it's 'shadow_trap_nonpresent_pte'), so
remove this information form error message

Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 11 +++++------
1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5e9d4a0..c07a5eb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -662,18 +662,17 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
rmapp = gfn_to_rmap(kvm, gfn, sp->role.level);
if (!*rmapp) {
- printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
+ printk(KERN_ERR "rmap_remove: %p 0->BUG\n", spte);
BUG();
} else if (!(*rmapp & 1)) {
- rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
+ rmap_printk("rmap_remove: %p 1->0\n", spte);
if ((u64 *)*rmapp != spte) {
- printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
- spte, *spte);
+ printk(KERN_ERR "rmap_remove: %p 1->BUG\n", spte);
BUG();
}
*rmapp = 0;
} else {
- rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
+ rmap_printk("rmap_remove: %p many->many\n", spte);
desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
prev_desc = NULL;
while (desc) {
@@ -687,7 +686,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
prev_desc = desc;
desc = desc->more;
}
- pr_err("rmap_remove: %p %llx many->many\n", spte, *spte);
+ pr_err("rmap_remove: %p many->many\n", spte);
BUG();
}
}
--
1.6.1.2

2010-07-13 22:09:36

by Marcelo Tosatti

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM: MMU: track dirty page in speculative path properly

On Tue, Jul 13, 2010 at 05:45:27PM +0800, Xiao Guangrong wrote:
> In speculative path, the page is not real write-access, no need mark it
> dirty, so clear dirty bit in this path and later examine this bit when
> we release the page
>
> Signed-off-by: Xiao Guangrong <[email protected]>
> ---
> arch/x86/kvm/mmu.c | 24 +++++++++++-------------
> 1 files changed, 11 insertions(+), 13 deletions(-)

Unfortunately all pages that kvm creates translations for are marked
dirty due to get_user_pages(w=1), except KSM which makes them read-only
later.

2010-07-13 22:09:40

by Marcelo Tosatti

[permalink] [raw]
Subject: Re: [PATCH 1/4] KVM: MMU: fix forgot reserved bits check in speculative path

On Tue, Jul 13, 2010 at 05:42:48PM +0800, Xiao Guangrong wrote:
> In the speculative path, we should check guest pte's reserved bits just as
> the real processor does
>
> Reported-by: Marcelo Tosatti <[email protected]>
> Signed-off-by: Xiao Guangrong <[email protected]>
> ---
> arch/x86/kvm/mmu.c | 8 ++++++++
> arch/x86/kvm/paging_tmpl.h | 5 +++--
> 2 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index b93b94f..9fc1524 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2783,6 +2783,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
> break;
> }
>
> + if (is_rsvd_bits_set(vcpu, gentry, PT_PAGE_TABLE_LEVEL))
> + gentry = 0;
> +
> mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
> spin_lock(&vcpu->kvm->mmu_lock);
> if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
> @@ -2851,6 +2854,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
> while (npte--) {
> entry = *spte;
> mmu_pte_write_zap_pte(vcpu, sp, spte);
> +
> + if (!!is_pae(vcpu) != sp->role.cr4_pae ||
> + is_nx(vcpu) != sp->role.nxe)
> + continue;
> +

This breaks remote_flush assignment below.

2010-07-14 01:12:56

by Xiao Guangrong

[permalink] [raw]
Subject: Re: [PATCH 1/4] KVM: MMU: fix forgot reserved bits check in speculative path



Marcelo Tosatti wrote:
entry = *spte;
>> mmu_pte_write_zap_pte(vcpu, sp, spte);
>> +
>> + if (!!is_pae(vcpu) != sp->role.cr4_pae ||
>> + is_nx(vcpu) != sp->role.nxe)
>> + continue;
>> +
>
> This breaks remote_flush assignment below.

Ah, Oops, will fix

2010-07-14 01:28:22

by Xiao Guangrong

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM: MMU: track dirty page in speculative path properly



Marcelo Tosatti wrote:
> On Tue, Jul 13, 2010 at 05:45:27PM +0800, Xiao Guangrong wrote:
>> In speculative path, the page is not real write-access, no need mark it
>> dirty, so clear dirty bit in this path and later examine this bit when
>> we release the page
>>
>> Signed-off-by: Xiao Guangrong <[email protected]>
>> ---
>> arch/x86/kvm/mmu.c | 24 +++++++++++-------------
>> 1 files changed, 11 insertions(+), 13 deletions(-)
>
> Unfortunately all pages that kvm creates translations for are marked
> dirty due to get_user_pages(w=1), except KSM which makes them read-only
> later.

Marcelo, i have looked into get_user_pages() function, but not catch where
to make page dirty, could you point it out for me? :-)

2010-07-14 05:57:28

by Xiao Guangrong

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM: MMU: track dirty page in speculative path properly



Xiao Guangrong wrote:

>
> spte = rmap_next(kvm, rmapp, NULL);
> @@ -1879,9 +1877,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> * whether the guest actually used the pte (in order to detect
> * demand paging).
> */
> - spte = shadow_base_present_pte | shadow_dirty_mask;
> + spte = shadow_base_present_pte;
> if (!speculative)
> - spte |= shadow_accessed_mask;
> + spte |= shadow_accessed_mask | shadow_dirty_mask;

It breaks read-only shadow page, i'll update it... please ignore this version...

2010-07-14 12:47:09

by Marcelo Tosatti

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM: MMU: track dirty page in speculative path properly

On Wed, Jul 14, 2010 at 09:24:22AM +0800, Xiao Guangrong wrote:
>
>
> Marcelo Tosatti wrote:
> > On Tue, Jul 13, 2010 at 05:45:27PM +0800, Xiao Guangrong wrote:
> >> In speculative path, the page is not real write-access, no need mark it
> >> dirty, so clear dirty bit in this path and later examine this bit when
> >> we release the page
> >>
> >> Signed-off-by: Xiao Guangrong <[email protected]>
> >> ---
> >> arch/x86/kvm/mmu.c | 24 +++++++++++-------------
> >> 1 files changed, 11 insertions(+), 13 deletions(-)
> >
> > Unfortunately all pages that kvm creates translations for are marked
> > dirty due to get_user_pages(w=1), except KSM which makes them read-only
> > later.
>
> Marcelo, i have looked into get_user_pages() function, but not catch where
> to make page dirty, could you point it out for me? :-)

See set_page_dirty call in mm/memory.c::follow_page.

2010-07-14 13:07:06

by Avi Kivity

[permalink] [raw]
Subject: Re: [PATCH 1/4] KVM: MMU: fix forgot reserved bits check in speculative path

On 07/13/2010 12:42 PM, Xiao Guangrong wrote:
> In the speculative path, we should check guest pte's reserved bits just as
> the real processor does
>
> Reported-by: Marcelo Tosatti<[email protected]>
> Signed-off-by: Xiao Guangrong<[email protected]>
> ---
> arch/x86/kvm/mmu.c | 8 ++++++++
> arch/x86/kvm/paging_tmpl.h | 5 +++--
> 2 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index b93b94f..9fc1524 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2783,6 +2783,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
> break;
> }
>
> + if (is_rsvd_bits_set(vcpu, gentry, PT_PAGE_TABLE_LEVEL))
> + gentry = 0;
> +
> mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
> spin_lock(&vcpu->kvm->mmu_lock);
> if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
> @@ -2851,6 +2854,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
> while (npte--) {
> entry = *spte;
> mmu_pte_write_zap_pte(vcpu, sp, spte);
> +
> + if (!!is_pae(vcpu) != sp->role.cr4_pae ||
> + is_nx(vcpu) != sp->role.nxe)
> + continue;
> +
>

Do we also need to check cr0.wp? I think so.

> if (gentry)
> mmu_pte_write_new_pte(vcpu, sp, spte,&gentry);
>

Please move the checks to mmu_pte_write_new_pte(), it's a more logical
place.

It means the reserved bits check happens multiple times, but that's ok.

Also, you can use arch.mmu.base_role to compare:

static const kvm_mmu_page_role mask = { .level = -1U, .cr4_pae = 1,
... };

if ((sp->role.word ^ base_role.word) & mask.word)
return;

> @@ -640,8 +640,9 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> return -EINVAL;
>
> gfn = gpte_to_gfn(gpte);
> - if (gfn != sp->gfns[i] ||
> - !is_present_gpte(gpte) || !(gpte& PT_ACCESSED_MASK)) {
> + if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL) ||
> + gfn != sp->gfns[i] || !is_present_gpte(gpte) ||
> + !(gpte& PT_ACCESSED_MASK)) {
> u64 nonpresent;
>
> if (is_present_gpte(gpte) || !clear_unsync)
>

Eventually we have to reduce the number of paths. But lets fix things
first.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

2010-07-14 13:23:09

by Xiao Guangrong

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM: MMU: track dirty page in speculative path properly



Marcelo Tosatti wrote:
> On Wed, Jul 14, 2010 at 09:24:22AM +0800, Xiao Guangrong wrote:
>>
>> Marcelo Tosatti wrote:
>>> On Tue, Jul 13, 2010 at 05:45:27PM +0800, Xiao Guangrong wrote:
>>>> In speculative path, the page is not real write-access, no need mark it
>>>> dirty, so clear dirty bit in this path and later examine this bit when
>>>> we release the page
>>>>
>>>> Signed-off-by: Xiao Guangrong <[email protected]>
>>>> ---
>>>> arch/x86/kvm/mmu.c | 24 +++++++++++-------------
>>>> 1 files changed, 11 insertions(+), 13 deletions(-)
>>> Unfortunately all pages that kvm creates translations for are marked
>>> dirty due to get_user_pages(w=1), except KSM which makes them read-only
>>> later.
>> Marcelo, i have looked into get_user_pages() function, but not catch where
>> to make page dirty, could you point it out for me? :-)
>
> See set_page_dirty call in mm/memory.c::follow_page.

Yeah, you are right, and i want to use another way to do it since track dirty bit
is too heavy, also it's dangerous if we miss to set page dirty.

How about just track access bit for speculative path, we set page both accessed and
dirty(if it's writable) only if the access bit is set?

2010-07-14 13:28:52

by Xiao Guangrong

[permalink] [raw]
Subject: Re: [PATCH 1/4] KVM: MMU: fix forgot reserved bits check in speculative path



Avi Kivity wrote:

>> +
>> + if (!!is_pae(vcpu) != sp->role.cr4_pae ||
>> + is_nx(vcpu) != sp->role.nxe)
>> + continue;
>> +
>>
>
> Do we also need to check cr0.wp? I think so.

I think it's not too bad since we just decrease the access right, for example,
we mark the mapping readonly for cr0.wp=0's page, the later write-access will
cause #PF, and the read-access is OK.

>
>> if (gentry)
>> mmu_pte_write_new_pte(vcpu, sp, spte,&gentry);
>>
>
> Please move the checks to mmu_pte_write_new_pte(), it's a more logical
> place.
>
> It means the reserved bits check happens multiple times, but that's ok.
>

OK

> Also, you can use arch.mmu.base_role to compare:
>
> static const kvm_mmu_page_role mask = { .level = -1U, .cr4_pae = 1,
> ... };
>
> if ((sp->role.word ^ base_role.word) & mask.word)
> return;

OK, will update it :-)

2010-07-14 14:06:55

by Marcelo Tosatti

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM: MMU: track dirty page in speculative path properly

On Wed, Jul 14, 2010 at 09:18:58PM +0800, Xiao Guangrong wrote:
>
>
> Marcelo Tosatti wrote:
> > On Wed, Jul 14, 2010 at 09:24:22AM +0800, Xiao Guangrong wrote:
> >>
> >> Marcelo Tosatti wrote:
> >>> On Tue, Jul 13, 2010 at 05:45:27PM +0800, Xiao Guangrong wrote:
> >>>> In speculative path, the page is not real write-access, no need mark it
> >>>> dirty, so clear dirty bit in this path and later examine this bit when
> >>>> we release the page
> >>>>
> >>>> Signed-off-by: Xiao Guangrong <[email protected]>
> >>>> ---
> >>>> arch/x86/kvm/mmu.c | 24 +++++++++++-------------
> >>>> 1 files changed, 11 insertions(+), 13 deletions(-)
> >>> Unfortunately all pages that kvm creates translations for are marked
> >>> dirty due to get_user_pages(w=1), except KSM which makes them read-only
> >>> later.
> >> Marcelo, i have looked into get_user_pages() function, but not catch where
> >> to make page dirty, could you point it out for me? :-)
> >
> > See set_page_dirty call in mm/memory.c::follow_page.
>
> Yeah, you are right, and i want to use another way to do it since track dirty bit
> is too heavy, also it's dangerous if we miss to set page dirty.
>
> How about just track access bit for speculative path, we set page both accessed and
> dirty(if it's writable) only if the access bit is set?

A useful thing to do would be to allow read-only mappings, in the fault
path (Lai sent a few patches in that direction sometime ago but there
was no follow up).

So in the case of a read-only fault from the guest, you'd inform
get_user_pages() that read-only access is acceptable (so swapcache pages
can be mapped, or qemu can mprotect(PROT_READ) guest memory).

2010-07-14 14:25:13

by Avi Kivity

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM: MMU: track dirty page in speculative path properly

On 07/14/2010 05:06 PM, Marcelo Tosatti wrote:
>
>> Yeah, you are right, and i want to use another way to do it since track dirty bit
>> is too heavy, also it's dangerous if we miss to set page dirty.
>>
>> How about just track access bit for speculative path, we set page both accessed and
>> dirty(if it's writable) only if the access bit is set?
>>
> A useful thing to do would be to allow read-only mappings, in the fault
> path (Lai sent a few patches in that direction sometime ago but there
> was no follow up).
>
> So in the case of a read-only fault from the guest, you'd inform
> get_user_pages() that read-only access is acceptable (so swapcache pages
> can be mapped, or qemu can mprotect(PROT_READ) guest memory).
>

I'd like get_user_pages_ptes_fast(), that returns the pte along with the
page. It can be used for a couple of purposes:

- on read faults or speculative mappings, ask for read access, but allow
write if the pte is writeable
- stick the page size into free bits (need 2 to support 1G pages) to
speed up host_mapping_level()

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

2010-07-14 14:27:29

by Avi Kivity

[permalink] [raw]
Subject: Re: [PATCH 1/4] KVM: MMU: fix forgot reserved bits check in speculative path

On 07/14/2010 04:24 PM, Xiao Guangrong wrote:
>
> Avi Kivity wrote:
>
>
>>> +
>>> + if (!!is_pae(vcpu) != sp->role.cr4_pae ||
>>> + is_nx(vcpu) != sp->role.nxe)
>>> + continue;
>>> +
>>>
>>>
>> Do we also need to check cr0.wp? I think so.
>>
> I think it's not too bad since we just decrease the access right, for example,
> we mark the mapping readonly for cr0.wp=0's page, the later write-access will
> cause #PF, and the read-access is OK.
>

If current cr0.wp=0 and sp->role.cr0_wp=1, that's fine, but the other
way round can relax permissions too much, marking a kernel page writeable.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

2010-07-15 07:48:58

by Xiao Guangrong

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM: MMU: track dirty page in speculative path properly



Marcelo Tosatti wrote:

>> How about just track access bit for speculative path, we set page both accessed and
>> dirty(if it's writable) only if the access bit is set?
>
> A useful thing to do would be to allow read-only mappings, in the fault
> path (Lai sent a few patches in that direction sometime ago but there
> was no follow up).
>
> So in the case of a read-only fault from the guest, you'd inform
> get_user_pages() that read-only access is acceptable (so swapcache pages
> can be mapped, or qemu can mprotect(PROT_READ) guest memory).
>

Yeah, it's a great work, i guess Lai will post the new version soon.

And, even we do this, i think the page dirty track is still needed, right?
Then, how about my new idea to track page dirty for speculative path, just
as below draft patch does:

@@ -687,10 +687,11 @@ static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte)
if (!is_rmap_spte(old_spte))
return;
pfn = spte_to_pfn(old_spte);
- if (old_spte & shadow_accessed_mask)
+ if (old_spte & shadow_accessed_mask) {
kvm_set_pfn_accessed(pfn);
- if (is_writable_pte(old_spte))
- kvm_set_pfn_dirty(pfn);
+ if (is_writable_pte(old_spte))
+ kvm_set_pfn_dirty(pfn);
+ }
rmap_remove(kvm, sptep);
}

@@ -1920,8 +1921,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
* demand paging).
*/
spte = shadow_base_present_pte | shadow_dirty_mask;
- if (!speculative)
+ if (!speculative) {
spte |= shadow_accessed_mask;
+ if (is_writable_pte(*sptep))
+ kvm_set_pfn_dirty(pfn);
+ }
if (!dirty)
pte_access &= ~ACC_WRITE_MASK;
if (pte_access & ACC_EXEC_MASK)

It uses access bit to track both page accessed and page dirty, and it's rather cheap...


2010-07-15 17:53:39

by Marcelo Tosatti

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM: MMU: track dirty page in speculative path properly

On Thu, Jul 15, 2010 at 03:44:48PM +0800, Xiao Guangrong wrote:
>
>
> Marcelo Tosatti wrote:
>
> >> How about just track access bit for speculative path, we set page both accessed and
> >> dirty(if it's writable) only if the access bit is set?
> >
> > A useful thing to do would be to allow read-only mappings, in the fault
> > path (Lai sent a few patches in that direction sometime ago but there
> > was no follow up).
> >
> > So in the case of a read-only fault from the guest, you'd inform
> > get_user_pages() that read-only access is acceptable (so swapcache pages
> > can be mapped, or qemu can mprotect(PROT_READ) guest memory).
> >
>
> Yeah, it's a great work, i guess Lai will post the new version soon.
>
> And, even we do this, i think the page dirty track is still needed, right?
> Then, how about my new idea to track page dirty for speculative path, just
> as below draft patch does:
>
> @@ -687,10 +687,11 @@ static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte)
> if (!is_rmap_spte(old_spte))
> return;
> pfn = spte_to_pfn(old_spte);
> - if (old_spte & shadow_accessed_mask)
> + if (old_spte & shadow_accessed_mask) {
> kvm_set_pfn_accessed(pfn);
> - if (is_writable_pte(old_spte))
> - kvm_set_pfn_dirty(pfn);
> + if (is_writable_pte(old_spte))
> + kvm_set_pfn_dirty(pfn);
> + }
> rmap_remove(kvm, sptep);
> }
>
> @@ -1920,8 +1921,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> * demand paging).
> */
> spte = shadow_base_present_pte | shadow_dirty_mask;
> - if (!speculative)
> + if (!speculative) {
> spte |= shadow_accessed_mask;
> + if (is_writable_pte(*sptep))
> + kvm_set_pfn_dirty(pfn);
> + }
> if (!dirty)
> pte_access &= ~ACC_WRITE_MASK;
> if (pte_access & ACC_EXEC_MASK)
>
> It uses access bit to track both page accessed and page dirty, and it's rather cheap...

Xiao,

I don't understand it. What are you trying to achieve?

2010-07-16 02:03:00

by Xiao Guangrong

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM: MMU: track dirty page in speculative path properly



Marcelo Tosatti wrote:

>> It uses access bit to track both page accessed and page dirty, and it's rather cheap...
>
> Xiao,
>
> I don't understand it. What are you trying to achieve?
>

Marcelo,

The issue which we try to fix in this patch is we mark the page dirty in speculative
path, i'm not sure that after Lai's patch this bug is gone, if it's true, this patch
is not needed anymore, otherwise the later patch is the cheaper way help us to fix this
issue.

In the later patch, we use pte.a to track whether the speculative mapping is accessed,
if it's accessed, we mark the page dirty, just like tracking page-accessed.