Tracing 'async' and *pfn is useless, since 'async' is always true,
and '*pfn' is always "fault_pfn'
We can trace 'gva' and 'gfn' instead, it can help us to see the
life-cycle of an async_pf
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/mmu.c | 2 +-
include/trace/events/kvm.h | 12 +++++++-----
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 39cc0c6..5275c50 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2627,7 +2627,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
put_page(pfn_to_page(*pfn));
if (!no_apf && can_do_async_pf(vcpu)) {
- trace_kvm_try_async_get_page(async, *pfn);
+ trace_kvm_try_async_get_page(gva, gfn);
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
trace_kvm_async_pf_doublefault(gva, gfn);
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 9c2cc6a..30063c6 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -188,18 +188,20 @@ TRACE_EVENT(kvm_age_page,
#ifdef CONFIG_KVM_ASYNC_PF
TRACE_EVENT(
kvm_try_async_get_page,
- TP_PROTO(bool async, u64 pfn),
- TP_ARGS(async, pfn),
+ TP_PROTO(u64 gva, u64 gfn),
+ TP_ARGS(gva, gfn),
TP_STRUCT__entry(
- __field(__u64, pfn)
+ __field(u64, gva)
+ __field(u64, gfn)
),
TP_fast_assign(
- __entry->pfn = (!async) ? pfn : (u64)-1;
+ __entry->gva = gva;
+ __entry->gfn = gfn;
),
- TP_printk("pfn %#llx", __entry->pfn)
+ TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
);
TRACE_EVENT(
--
1.7.0.4
Use 'DECLARE_EVENT_CLASS' to cleanup async_pf tracepoints
Acked-by: Gleb Natapov <[email protected]>
Signed-off-by: Xiao Guangrong <[email protected]>
---
include/trace/events/kvm.h | 76 ++++++++++++++++++++-----------------------
1 files changed, 35 insertions(+), 41 deletions(-)
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 30063c6..7bec396 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -186,59 +186,71 @@ TRACE_EVENT(kvm_age_page,
);
#ifdef CONFIG_KVM_ASYNC_PF
-TRACE_EVENT(
- kvm_try_async_get_page,
+DECLARE_EVENT_CLASS(kvm_async_get_page_class,
+
TP_PROTO(u64 gva, u64 gfn),
+
TP_ARGS(gva, gfn),
TP_STRUCT__entry(
- __field(u64, gva)
+ __field(__u64, gva)
__field(u64, gfn)
- ),
+ ),
TP_fast_assign(
__entry->gva = gva;
__entry->gfn = gfn;
- ),
+ ),
TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
);
-TRACE_EVENT(
- kvm_async_pf_not_present,
+DEFINE_EVENT(kvm_async_get_page_class, kvm_try_async_get_page,
+
+ TP_PROTO(u64 gva, u64 gfn),
+
+ TP_ARGS(gva, gfn)
+);
+
+DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_doublefault,
+
+ TP_PROTO(u64 gva, u64 gfn),
+
+ TP_ARGS(gva, gfn)
+);
+
+DECLARE_EVENT_CLASS(kvm_async_pf_nopresent_ready,
+
TP_PROTO(u64 token, u64 gva),
+
TP_ARGS(token, gva),
TP_STRUCT__entry(
__field(__u64, token)
__field(__u64, gva)
- ),
+ ),
TP_fast_assign(
__entry->token = token;
__entry->gva = gva;
- ),
+ ),
+
+ TP_printk("token %#llx gva %#llx", __entry->token, __entry->gva)
- TP_printk("token %#llx gva %#llx not present", __entry->token,
- __entry->gva)
);
-TRACE_EVENT(
- kvm_async_pf_ready,
+DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_not_present,
+
TP_PROTO(u64 token, u64 gva),
- TP_ARGS(token, gva),
- TP_STRUCT__entry(
- __field(__u64, token)
- __field(__u64, gva)
- ),
+ TP_ARGS(token, gva)
+);
- TP_fast_assign(
- __entry->token = token;
- __entry->gva = gva;
- ),
+DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
+
+ TP_PROTO(u64 token, u64 gva),
- TP_printk("token %#llx gva %#llx ready", __entry->token, __entry->gva)
+ TP_ARGS(token, gva)
);
TRACE_EVENT(
@@ -262,24 +274,6 @@ TRACE_EVENT(
__entry->address, __entry->pfn)
);
-TRACE_EVENT(
- kvm_async_pf_doublefault,
- TP_PROTO(u64 gva, u64 gfn),
- TP_ARGS(gva, gfn),
-
- TP_STRUCT__entry(
- __field(u64, gva)
- __field(u64, gfn)
- ),
-
- TP_fast_assign(
- __entry->gva = gva;
- __entry->gfn = gfn;
- ),
-
- TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
-);
-
#endif
#endif /* _TRACE_KVM_MAIN_H */
--
1.7.0.4
Don't search later slots if the slot is empty
Acked-by: Gleb Natapov <[email protected]>
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/x86.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2cfdf2d..9b543f4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6206,8 +6206,8 @@ static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
u32 key = kvm_async_pf_hash_fn(gfn);
for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
- (vcpu->arch.apf.gfns[key] != gfn ||
- vcpu->arch.apf.gfns[key] == ~0); i++)
+ (vcpu->arch.apf.gfns[key] != gfn &&
+ vcpu->arch.apf.gfns[key] != ~0); i++)
key = kvm_async_pf_next_probe(key);
return key;
--
1.7.0.4
In current code, it checks async pf completion out of the wait context,
like this:
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
r = vcpu_enter_guest(vcpu);
else {
......
kvm_vcpu_block(vcpu)
^- waiting until 'async_pf.done' is not empty
}
kvm_check_async_pf_completion(vcpu)
^- delete list from async_pf.done
So, if we check aysnc pf completion first, it can be blocked at
kvm_vcpu_block
Fixed by mark the vcpu is unhalted in kvm_check_async_pf_completion()
path
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/x86.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b543f4..4da8485 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6280,6 +6280,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
vcpu->arch.fault.address = work->arch.token;
kvm_inject_page_fault(vcpu);
}
+ vcpu->arch.apf.halted = false;
}
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
--
1.7.0.4
If it's no need to inject async #PF to PV guest we can handle
more completed apfs at one time, so we can retry guest #PF
as early as possible
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/kvm/x86.c | 8 ++++++--
virt/kvm/async_pf.c | 28 ++++++++++++++++------------
3 files changed, 24 insertions(+), 15 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1be0058..c95b3ff 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -818,7 +818,8 @@ bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
-void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+/* return true if we can handle more completed apfs, false otherwise */
+bool kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4da8485..189664a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6265,7 +6265,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
}
}
-void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
trace_kvm_async_pf_ready(work->arch.token, work->gva);
@@ -6274,13 +6274,17 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
else
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
+ vcpu->arch.apf.halted = false;
+
if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
vcpu->arch.fault.error_code = 0;
vcpu->arch.fault.address = work->arch.token;
kvm_inject_page_fault(vcpu);
+ return false;
}
- vcpu->arch.apf.halted = false;
+
+ return true;
}
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 60df9e0..d57ec92 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -123,25 +123,29 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
{
struct kvm_async_pf *work;
+ bool ret;
if (list_empty_careful(&vcpu->async_pf.done) ||
!kvm_arch_can_inject_async_page_present(vcpu))
return;
- spin_lock(&vcpu->async_pf.lock);
- work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
- list_del(&work->link);
- spin_unlock(&vcpu->async_pf.lock);
+ do {
+ spin_lock(&vcpu->async_pf.lock);
+ work = list_first_entry(&vcpu->async_pf.done, typeof(*work),
+ link);
+ list_del(&work->link);
+ spin_unlock(&vcpu->async_pf.lock);
- if (work->page)
- kvm_arch_async_page_ready(vcpu, work);
- kvm_arch_async_page_present(vcpu, work);
+ if (work->page)
+ kvm_arch_async_page_ready(vcpu, work);
+ ret = kvm_arch_async_page_present(vcpu, work);
- list_del(&work->queue);
- vcpu->async_pf.queued--;
- if (work->page)
- put_page(work->page);
- kmem_cache_free(async_pf_cache, work);
+ list_del(&work->queue);
+ vcpu->async_pf.queued--;
+ if (work->page)
+ put_page(work->page);
+ kmem_cache_free(async_pf_cache, work);
+ } while (ret && !list_empty_careful(&vcpu->async_pf.done));
}
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
--
1.7.0.4
In kvm_async_pf_wakeup_all(), we add a dummy apf to vcpu->async_pf.done
without holding vcpu->async_pf.lock, it will break if we are handling apfs
at this time.
Also use 'list_empty_careful()' instead of 'list_empty()'
Signed-off-by: Xiao Guangrong <[email protected]>
---
virt/kvm/async_pf.c | 5 ++++-
1 files changed, 4 insertions(+), 1 deletions(-)
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index d57ec92..6ef3373 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -200,7 +200,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
{
struct kvm_async_pf *work;
- if (!list_empty(&vcpu->async_pf.done))
+ if (!list_empty_careful(&vcpu->async_pf.done))
return 0;
work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC);
@@ -211,7 +211,10 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
get_page(bad_page);
INIT_LIST_HEAD(&work->queue); /* for list_del to work */
+ spin_lock(&vcpu->async_pf.lock);
list_add_tail(&work->link, &vcpu->async_pf.done);
+ spin_unlock(&vcpu->async_pf.lock);
+
vcpu->async_pf.queued++;
return 0;
}
--
1.7.0.4
Don't make a KVM_REQ_UNHALT request after async pf is completed since it
can break guest's 'HLT' instruction.
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/kvm/x86.c | 13 ++++++++++---
include/linux/kvm_host.h | 6 ++++++
virt/kvm/kvm_main.c | 9 ++++++++-
3 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 189664a..c57fb38 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6105,13 +6105,20 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
- return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
+ if ((vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
- || !list_empty_careful(&vcpu->async_pf.done)
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
|| vcpu->arch.nmi_pending ||
(kvm_arch_interrupt_allowed(vcpu) &&
- kvm_cpu_has_interrupt(vcpu));
+ kvm_cpu_has_interrupt(vcpu)))
+ return 1;
+
+ if (!list_empty_careful(&vcpu->async_pf.done)) {
+ vcpu->arch.apf.halted = false;
+ return 2;
+ }
+
+ return 0;
}
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 462b982..8def043 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -436,6 +436,12 @@ void kvm_arch_hardware_disable(void *garbage);
int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void);
void kvm_arch_check_processor_compat(void *rtn);
+
+/*
+ * return value: > 0 if the vcpu is runnable, 0 if not.
+ * Especially, if the return value == 1, we should make a
+ * 'KVM_REQ_UNHALT' request.
+ */
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
void kvm_free_physmem(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dbe1d6a..9ccf105 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1375,14 +1375,21 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
{
DEFINE_WAIT(wait);
+ int ret;
for (;;) {
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
- if (kvm_arch_vcpu_runnable(vcpu)) {
+ ret = kvm_arch_vcpu_runnable(vcpu);
+
+ if (ret == 1) {
kvm_make_request(KVM_REQ_UNHALT, vcpu);
break;
}
+
+ if (ret > 1)
+ break;
+
if (kvm_cpu_has_pending_timer(vcpu))
break;
if (signal_pending(current))
--
1.7.0.4
On Mon, Nov 01, 2010 at 05:02:35PM +0800, Xiao Guangrong wrote:
> If it's no need to inject async #PF to PV guest we can handle
> more completed apfs at one time, so we can retry guest #PF
> as early as possible
>
> Signed-off-by: Xiao Guangrong <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 3 ++-
> arch/x86/kvm/x86.c | 8 ++++++--
> virt/kvm/async_pf.c | 28 ++++++++++++++++------------
> 3 files changed, 24 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 1be0058..c95b3ff 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -818,7 +818,8 @@ bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
>
> void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
> struct kvm_async_pf *work);
> -void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> +/* return true if we can handle more completed apfs, false otherwise */
> +bool kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> struct kvm_async_pf *work);
> void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
> struct kvm_async_pf *work);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4da8485..189664a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6265,7 +6265,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
> }
> }
>
> -void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> +bool kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> struct kvm_async_pf *work)
> {
> trace_kvm_async_pf_ready(work->arch.token, work->gva);
> @@ -6274,13 +6274,17 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> else
> kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
>
> + vcpu->arch.apf.halted = false;
> +
> if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
> !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
> vcpu->arch.fault.error_code = 0;
> vcpu->arch.fault.address = work->arch.token;
> kvm_inject_page_fault(vcpu);
> + return false;
> }
> - vcpu->arch.apf.halted = false;
> +
> + return true;
> }
>
> bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
> diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> index 60df9e0..d57ec92 100644
> --- a/virt/kvm/async_pf.c
> +++ b/virt/kvm/async_pf.c
> @@ -123,25 +123,29 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
> void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
> {
> struct kvm_async_pf *work;
> + bool ret;
>
> if (list_empty_careful(&vcpu->async_pf.done) ||
> !kvm_arch_can_inject_async_page_present(vcpu))
> return;
>
> - spin_lock(&vcpu->async_pf.lock);
> - work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
> - list_del(&work->link);
> - spin_unlock(&vcpu->async_pf.lock);
> + do {
> + spin_lock(&vcpu->async_pf.lock);
> + work = list_first_entry(&vcpu->async_pf.done, typeof(*work),
> + link);
> + list_del(&work->link);
> + spin_unlock(&vcpu->async_pf.lock);
>
> - if (work->page)
> - kvm_arch_async_page_ready(vcpu, work);
> - kvm_arch_async_page_present(vcpu, work);
> + if (work->page)
> + kvm_arch_async_page_ready(vcpu, work);
> + ret = kvm_arch_async_page_present(vcpu, work);
>
> - list_del(&work->queue);
> - vcpu->async_pf.queued--;
> - if (work->page)
> - put_page(work->page);
> - kmem_cache_free(async_pf_cache, work);
> + list_del(&work->queue);
> + vcpu->async_pf.queued--;
> + if (work->page)
> + put_page(work->page);
> + kmem_cache_free(async_pf_cache, work);
> + } while (ret && !list_empty_careful(&vcpu->async_pf.done));
> }
>
No need to change kvm_arch_async_page_present() to return anything. You
can do while loop like this:
while (!list_empty_careful(&vcpu->async_pf.done) &&
kvm_arch_can_inject_async_page_present(vcpu)) {
}
If kvm_arch_async_page_present() call injects exception
kvm_arch_can_inject_async_page_present() will return false on next
iteration.
--
Gleb.
On Mon, Nov 01, 2010 at 05:01:28PM +0800, Xiao Guangrong wrote:
> In current code, it checks async pf completion out of the wait context,
> like this:
>
> if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
> !vcpu->arch.apf.halted)
> r = vcpu_enter_guest(vcpu);
> else {
> ......
> kvm_vcpu_block(vcpu)
> ^- waiting until 'async_pf.done' is not empty
> }
>
> kvm_check_async_pf_completion(vcpu)
> ^- delete list from async_pf.done
>
> So, if we check aysnc pf completion first, it can be blocked at
> kvm_vcpu_block
>
> Fixed by mark the vcpu is unhalted in kvm_check_async_pf_completion()
> path
>
> Signed-off-by: Xiao Guangrong <[email protected]>
Acked-by: Gleb Natapov <[email protected]>
> ---
> arch/x86/kvm/x86.c | 1 +
> 1 files changed, 1 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9b543f4..4da8485 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6280,6 +6280,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> vcpu->arch.fault.address = work->arch.token;
> kvm_inject_page_fault(vcpu);
> }
> + vcpu->arch.apf.halted = false;
> }
>
> bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
> --
> 1.7.0.4
--
Gleb.
On 11/01/2010 05:24 PM, Gleb Natapov wrote:
>> - put_page(work->page);
>> - kmem_cache_free(async_pf_cache, work);
>> + list_del(&work->queue);
>> + vcpu->async_pf.queued--;
>> + if (work->page)
>> + put_page(work->page);
>> + kmem_cache_free(async_pf_cache, work);
>> + } while (ret && !list_empty_careful(&vcpu->async_pf.done));
>> }
>>
> No need to change kvm_arch_async_page_present() to return anything. You
> can do while loop like this:
>
> while (!list_empty_careful(&vcpu->async_pf.done) &&
> kvm_arch_can_inject_async_page_present(vcpu)) {
> }
>
> If kvm_arch_async_page_present() call injects exception
> kvm_arch_can_inject_async_page_present() will return false on next
> iteration.
Yeah, it's a better way, i'll fix it, thanks Gleb!
On Mon, Nov 01, 2010 at 05:05:00PM +0800, Xiao Guangrong wrote:
> Don't make a KVM_REQ_UNHALT request after async pf is completed since it
> can break guest's 'HLT' instruction.
>
> Signed-off-by: Xiao Guangrong <[email protected]>
> ---
> arch/x86/kvm/x86.c | 13 ++++++++++---
> include/linux/kvm_host.h | 6 ++++++
> virt/kvm/kvm_main.c | 9 ++++++++-
> 3 files changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 189664a..c57fb38 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6105,13 +6105,20 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
>
> int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
> {
> - return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
> + if ((vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
> !vcpu->arch.apf.halted)
> - || !list_empty_careful(&vcpu->async_pf.done)
> || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
> || vcpu->arch.nmi_pending ||
> (kvm_arch_interrupt_allowed(vcpu) &&
> - kvm_cpu_has_interrupt(vcpu));
> + kvm_cpu_has_interrupt(vcpu)))
> + return 1;
> +
> + if (!list_empty_careful(&vcpu->async_pf.done)) {
> + vcpu->arch.apf.halted = false;
> + return 2;
> + }
kvm_arch_vcpu_runnable() shouldn't change vcpu state. I don't like the
way it propagates internal x86 state to kvm_vcpu_block() too.
May be what you are looking for is the patch below? (not tested).
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2cfdf2d..f7aed95 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5295,8 +5295,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
{
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
- vcpu->arch.mp_state =
- KVM_MP_STATE_RUNNABLE;
+ if (list_empty_careful(&vcpu->async_pf.done))
+ vcpu->arch.mp_state =
+ KVM_MP_STATE_RUNNABLE;
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
break;
@@ -6279,6 +6280,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
vcpu->arch.fault.error_code = 0;
vcpu->arch.fault.address = work->arch.token;
kvm_inject_page_fault(vcpu);
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
}
--
Gleb.
On Mon, Nov 01, 2010 at 05:03:44PM +0800, Xiao Guangrong wrote:
> In kvm_async_pf_wakeup_all(), we add a dummy apf to vcpu->async_pf.done
> without holding vcpu->async_pf.lock, it will break if we are handling apfs
> at this time.
>
This should never happen to well behaved guest, but malicious guest can do it
on purpose.
> Also use 'list_empty_careful()' instead of 'list_empty()'
>
> Signed-off-by: Xiao Guangrong <[email protected]>
Acked-by: Gleb Natapov <[email protected]>
> ---
> virt/kvm/async_pf.c | 5 ++++-
> 1 files changed, 4 insertions(+), 1 deletions(-)
>
> diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> index d57ec92..6ef3373 100644
> --- a/virt/kvm/async_pf.c
> +++ b/virt/kvm/async_pf.c
> @@ -200,7 +200,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
> {
> struct kvm_async_pf *work;
>
> - if (!list_empty(&vcpu->async_pf.done))
> + if (!list_empty_careful(&vcpu->async_pf.done))
> return 0;
>
> work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC);
> @@ -211,7 +211,10 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
> get_page(bad_page);
> INIT_LIST_HEAD(&work->queue); /* for list_del to work */
>
> + spin_lock(&vcpu->async_pf.lock);
> list_add_tail(&work->link, &vcpu->async_pf.done);
> + spin_unlock(&vcpu->async_pf.lock);
> +
> vcpu->async_pf.queued++;
> return 0;
> }
> --
> 1.7.0.4
--
Gleb.
On Mon, Nov 01, 2010 at 04:58:43PM +0800, Xiao Guangrong wrote:
> Tracing 'async' and *pfn is useless, since 'async' is always true,
> and '*pfn' is always "fault_pfn'
>
> We can trace 'gva' and 'gfn' instead, it can help us to see the
> life-cycle of an async_pf
>
> Signed-off-by: Xiao Guangrong <[email protected]>
Acked-by: Gleb Natapov <[email protected]>
> ---
> arch/x86/kvm/mmu.c | 2 +-
> include/trace/events/kvm.h | 12 +++++++-----
> 2 files changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 39cc0c6..5275c50 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2627,7 +2627,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
> put_page(pfn_to_page(*pfn));
>
> if (!no_apf && can_do_async_pf(vcpu)) {
> - trace_kvm_try_async_get_page(async, *pfn);
> + trace_kvm_try_async_get_page(gva, gfn);
> if (kvm_find_async_pf_gfn(vcpu, gfn)) {
> trace_kvm_async_pf_doublefault(gva, gfn);
> kvm_make_request(KVM_REQ_APF_HALT, vcpu);
> diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
> index 9c2cc6a..30063c6 100644
> --- a/include/trace/events/kvm.h
> +++ b/include/trace/events/kvm.h
> @@ -188,18 +188,20 @@ TRACE_EVENT(kvm_age_page,
> #ifdef CONFIG_KVM_ASYNC_PF
> TRACE_EVENT(
> kvm_try_async_get_page,
> - TP_PROTO(bool async, u64 pfn),
> - TP_ARGS(async, pfn),
> + TP_PROTO(u64 gva, u64 gfn),
> + TP_ARGS(gva, gfn),
>
> TP_STRUCT__entry(
> - __field(__u64, pfn)
> + __field(u64, gva)
> + __field(u64, gfn)
> ),
>
> TP_fast_assign(
> - __entry->pfn = (!async) ? pfn : (u64)-1;
> + __entry->gva = gva;
> + __entry->gfn = gfn;
> ),
>
> - TP_printk("pfn %#llx", __entry->pfn)
> + TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
> );
>
> TRACE_EVENT(
> --
> 1.7.0.4
--
Gleb.
On 11/01/2010 08:55 PM, Gleb Natapov wrote:
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2cfdf2d..f7aed95 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5295,8 +5295,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> {
> switch(vcpu->arch.mp_state) {
> case KVM_MP_STATE_HALTED:
> - vcpu->arch.mp_state =
> - KVM_MP_STATE_RUNNABLE;
> + if (list_empty_careful(&vcpu->async_pf.done))
> + vcpu->arch.mp_state =
> + KVM_MP_STATE_RUNNABLE;
if nmi/interrupt and apfs completed event occur at the same time, we will miss to
exit halt sate. Maybe we can check the pending event here, see below patch please.
> case KVM_MP_STATE_RUNNABLE:
> vcpu->arch.apf.halted = false;
> break;
> @@ -6279,6 +6280,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> vcpu->arch.fault.error_code = 0;
> vcpu->arch.fault.address = work->arch.token;
> kvm_inject_page_fault(vcpu);
> + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> }
> }
Have a stupid question, why we make the vcpu runnable here? Sorry i don't know
kvm pv guest to much. :-(
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2044302..27a712b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5267,6 +5267,12 @@ out:
return r;
}
+static bool kvm_vcpu_leave_halt_state(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.nmi_pending ||
+ (kvm_arch_interrupt_allowed(vcpu) &&
+ kvm_cpu_has_interrupt(vcpu));
+}
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
@@ -5299,8 +5305,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
{
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
- vcpu->arch.mp_state =
- KVM_MP_STATE_RUNNABLE;
+ if (kvm_vcpu_leave_halt_state(vcpu))
+ vcpu->arch.mp_state =
+ KVM_MP_STATE_RUNNABLE;
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
break;
@@ -6113,9 +6120,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
!vcpu->arch.apf.halted)
|| !list_empty_careful(&vcpu->async_pf.done)
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
- || vcpu->arch.nmi_pending ||
- (kvm_arch_interrupt_allowed(vcpu) &&
- kvm_cpu_has_interrupt(vcpu));
+ || kvm_vcpu_leave_halt_state(vcpu);
}
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
On Tue, Nov 02, 2010 at 10:30:10AM +0800, Xiao Guangrong wrote:
> On 11/01/2010 08:55 PM, Gleb Natapov wrote:
>
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 2cfdf2d..f7aed95 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -5295,8 +5295,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> > {
> > switch(vcpu->arch.mp_state) {
> > case KVM_MP_STATE_HALTED:
> > - vcpu->arch.mp_state =
> > - KVM_MP_STATE_RUNNABLE;
> > + if (list_empty_careful(&vcpu->async_pf.done))
> > + vcpu->arch.mp_state =
> > + KVM_MP_STATE_RUNNABLE;
>
> if nmi/interrupt and apfs completed event occur at the same time, we will miss to
> exit halt sate. Maybe we can check the pending event here, see below patch please.
>
No, we will not. If nmi/interrupt and apfs completed event occur at the same
time kvm_vcpu_block() will exit with KVM_REQ_UNHALT set, but cpu will
not be unhalted because of list_empty_careful(&vcpu->async_pf.done)
check. Vcpu then will process pending apf completion and enter
kvm_vcpu_block() again which will immediately exit because
kvm_arch_vcpu_runnable() will return true since there is pending
nmi/interrupt. This time vcpu will be unhalted.
> > case KVM_MP_STATE_RUNNABLE:
> > vcpu->arch.apf.halted = false;
> > break;
> > @@ -6279,6 +6280,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> > vcpu->arch.fault.error_code = 0;
> > vcpu->arch.fault.address = work->arch.token;
> > kvm_inject_page_fault(vcpu);
> > + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> > }
> > }
>
> Have a stupid question, why we make the vcpu runnable here? Sorry i don't know
> kvm pv guest to much. :-(
Because kvm_arch_vcpu_runnable() does not check for pending exceptions.
Since now we do not unhalt vcpu when apf completion happens we need to
unhalt it here. But, as I said, the patch is untested.
--
Gleb.
On 11/02/2010 02:56 PM, Gleb Natapov wrote:
> On Tue, Nov 02, 2010 at 10:30:10AM +0800, Xiao Guangrong wrote:
>> On 11/01/2010 08:55 PM, Gleb Natapov wrote:
>>
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 2cfdf2d..f7aed95 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -5295,8 +5295,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
>>> {
>>> switch(vcpu->arch.mp_state) {
>>> case KVM_MP_STATE_HALTED:
>>> - vcpu->arch.mp_state =
>>> - KVM_MP_STATE_RUNNABLE;
>>> + if (list_empty_careful(&vcpu->async_pf.done))
>>> + vcpu->arch.mp_state =
>>> + KVM_MP_STATE_RUNNABLE;
>>
>> if nmi/interrupt and apfs completed event occur at the same time, we will miss to
>> exit halt sate. Maybe we can check the pending event here, see below patch please.
>>
> No, we will not. If nmi/interrupt and apfs completed event occur at the same
> time kvm_vcpu_block() will exit with KVM_REQ_UNHALT set, but cpu will
> not be unhalted because of list_empty_careful(&vcpu->async_pf.done)
> check. Vcpu then will process pending apf completion and enter
> kvm_vcpu_block() again which will immediately exit because
> kvm_arch_vcpu_runnable() will return true since there is pending
> nmi/interrupt. This time vcpu will be unhalted.
Thanks for your explanation, but if it has nmi/interrupt pending,
kvm_arch_can_inject_async_page_present() always return false in PV guest case,
it can not process pending apf completion, so, the vcpu is remain halt state
forever?
Also, the pv guest can only handle an apf completion at one time, it can not ensure
vcpu->async_pf.done is empty after kvm_check_async_pf_completion()
>
>>> case KVM_MP_STATE_RUNNABLE:
>>> vcpu->arch.apf.halted = false;
>>> break;
>>> @@ -6279,6 +6280,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
>>> vcpu->arch.fault.error_code = 0;
>>> vcpu->arch.fault.address = work->arch.token;
>>> kvm_inject_page_fault(vcpu);
>>> + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
>>> }
>>> }
>>
>> Have a stupid question, why we make the vcpu runnable here? Sorry i don't know
>> kvm pv guest to much. :-(
> Because kvm_arch_vcpu_runnable() does not check for pending exceptions.
> Since now we do not unhalt vcpu when apf completion happens we need to
> unhalt it here. But, as I said, the patch is untested.
>
As i know, exception can not let guest exit halt state, only NMI/interruption can do it, yes? :-)
On Tue, Nov 02, 2010 at 03:31:26PM +0800, Xiao Guangrong wrote:
> On 11/02/2010 02:56 PM, Gleb Natapov wrote:
> > On Tue, Nov 02, 2010 at 10:30:10AM +0800, Xiao Guangrong wrote:
> >> On 11/01/2010 08:55 PM, Gleb Natapov wrote:
> >>
> >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>> index 2cfdf2d..f7aed95 100644
> >>> --- a/arch/x86/kvm/x86.c
> >>> +++ b/arch/x86/kvm/x86.c
> >>> @@ -5295,8 +5295,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> >>> {
> >>> switch(vcpu->arch.mp_state) {
> >>> case KVM_MP_STATE_HALTED:
> >>> - vcpu->arch.mp_state =
> >>> - KVM_MP_STATE_RUNNABLE;
> >>> + if (list_empty_careful(&vcpu->async_pf.done))
> >>> + vcpu->arch.mp_state =
> >>> + KVM_MP_STATE_RUNNABLE;
> >>
> >> if nmi/interrupt and apfs completed event occur at the same time, we will miss to
> >> exit halt sate. Maybe we can check the pending event here, see below patch please.
> >>
> > No, we will not. If nmi/interrupt and apfs completed event occur at the same
> > time kvm_vcpu_block() will exit with KVM_REQ_UNHALT set, but cpu will
> > not be unhalted because of list_empty_careful(&vcpu->async_pf.done)
> > check. Vcpu then will process pending apf completion and enter
> > kvm_vcpu_block() again which will immediately exit because
> > kvm_arch_vcpu_runnable() will return true since there is pending
> > nmi/interrupt. This time vcpu will be unhalted.
>
> Thanks for your explanation, but if it has nmi/interrupt pending,
> kvm_arch_can_inject_async_page_present() always return false in PV guest case,
> it can not process pending apf completion, so, the vcpu is remain halt state
> forever?
>
kvm_event_needs_reinjection() checks for nmi/interrupts that
need reinjection (not injection). Those are nmi/interrupts that
was injected but injection failed for some reason. For nmi, for
instance, kvm_arch_vcpu_runnable() checks vcpu->arch.nmi_pending,
but kvm_event_needs_reinjection() checks for vcpu->arch.nmi_injected.
NMI moves from nmi_pending to nmi_injected when it is injected into vcpu
for the first time. CPU cannot be halted in this state.
> Also, the pv guest can only handle an apf completion at one time, it can not ensure
> vcpu->async_pf.done is empty after kvm_check_async_pf_completion()
>
In case of PV guest it will be woken up by apf completion by
kvm_arch_async_page_present() below.
> >
> >>> case KVM_MP_STATE_RUNNABLE:
> >>> vcpu->arch.apf.halted = false;
> >>> break;
> >>> @@ -6279,6 +6280,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> >>> vcpu->arch.fault.error_code = 0;
> >>> vcpu->arch.fault.address = work->arch.token;
> >>> kvm_inject_page_fault(vcpu);
> >>> + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> >>> }
> >>> }
> >>
> >> Have a stupid question, why we make the vcpu runnable here? Sorry i don't know
> >> kvm pv guest to much. :-(
> > Because kvm_arch_vcpu_runnable() does not check for pending exceptions.
> > Since now we do not unhalt vcpu when apf completion happens we need to
> > unhalt it here. But, as I said, the patch is untested.
> >
>
> As i know, exception can not let guest exit halt state, only NMI/interruption can do it, yes? :-)
On physical HW exception cannot happen while cpu is in halt state, but
with PV we define what guest can and cannot expect, so when guest
explicitly enables apf it should be able to handle it during halt.
--
Gleb.
On 11/02/2010 03:45 PM, Gleb Natapov wrote:
> kvm_event_needs_reinjection() checks for nmi/interrupts that
> need reinjection (not injection). Those are nmi/interrupts that
> was injected but injection failed for some reason. For nmi, for
> instance, kvm_arch_vcpu_runnable() checks vcpu->arch.nmi_pending,
> but kvm_event_needs_reinjection() checks for vcpu->arch.nmi_injected.
> NMI moves from nmi_pending to nmi_injected when it is injected into vcpu
> for the first time. CPU cannot be halted in this state.
>
Yeah, nmi is handled like this way, but for interrupt:
If interruption controller is in userspace (!irqchip_in_kernel(v->kvm)),
kvm_arch_vcpu_runnable() checks vcpu->arch.interrupt.pending and
kvm_event_needs_reinjection() also checks vcpu->arch.interrupt.pending.
Consider this case:
- Guest vcpu executes 'HLT'
- wakeup the vcpu and inject interrupt and apfs is completed at this time
- then the vcpu can't handle apf completion and .done list keeps nonempty.
Can this case happen? Sorry if i missed it again.
>> Also, the pv guest can only handle an apf completion at one time, it can not ensure
>> vcpu->async_pf.done is empty after kvm_check_async_pf_completion()
>>
> In case of PV guest it will be woken up by apf completion by
> kvm_arch_async_page_present() below.
......
>> As i know, exception can not let guest exit halt state, only NMI/interruption can do it, yes? :-)
> On physical HW exception cannot happen while cpu is in halt state, but
> with PV we define what guest can and cannot expect, so when guest
> explicitly enables apf it should be able to handle it during halt.
>
Ah, i see, thanks your very much.
On Tue, Nov 02, 2010 at 05:09:42PM +0800, Xiao Guangrong wrote:
> On 11/02/2010 03:45 PM, Gleb Natapov wrote:
>
> > kvm_event_needs_reinjection() checks for nmi/interrupts that
> > need reinjection (not injection). Those are nmi/interrupts that
> > was injected but injection failed for some reason. For nmi, for
> > instance, kvm_arch_vcpu_runnable() checks vcpu->arch.nmi_pending,
> > but kvm_event_needs_reinjection() checks for vcpu->arch.nmi_injected.
> > NMI moves from nmi_pending to nmi_injected when it is injected into vcpu
> > for the first time. CPU cannot be halted in this state.
> >
>
> Yeah, nmi is handled like this way, but for interrupt:
> If interruption controller is in userspace (!irqchip_in_kernel(v->kvm)),
> kvm_arch_vcpu_runnable() checks vcpu->arch.interrupt.pending and
> kvm_event_needs_reinjection() also checks vcpu->arch.interrupt.pending.
>
> Consider this case:
>
> - Guest vcpu executes 'HLT'
> - wakeup the vcpu and inject interrupt and apfs is completed at this time
> - then the vcpu can't handle apf completion and .done list keeps nonempty.
>
> Can this case happen? Sorry if i missed it again.
>
If irqchip is in userspace apf is disabled (see mmu.c:can_do_async_pf()).
The reason for this is that when irqchip_in_kernel(v->kvm) cpu sleeps in
userspace during halt, so all event that can cause it to be unhalted
should be generated in userspace too. This is also the reason you can't have
pit in kernel and irqchip in userpsace.
--
Gleb.
On 11/02/2010 05:14 PM, Gleb Natapov wrote:
> If irqchip is in userspace apf is disabled (see mmu.c:can_do_async_pf()).
> The reason for this is that when irqchip_in_kernel(v->kvm) cpu sleeps in
> userspace during halt, so all event that can cause it to be unhalted
> should be generated in userspace too. This is also the reason you can't have
> pit in kernel and irqchip in userpsace.
>
Oh, thank you very much for answering so many questions, and your patch is
looks good for me! ;-)
If it's no need to inject async #PF to PV guest we can handle
more completed apfs at one time, so we can retry guest #PF
as early as possible
Signed-off-by: Xiao Guangrong <[email protected]>
---
virt/kvm/async_pf.c | 32 ++++++++++++++++----------------
1 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 60df9e0..100c66e 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -124,24 +124,24 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
{
struct kvm_async_pf *work;
- if (list_empty_careful(&vcpu->async_pf.done) ||
- !kvm_arch_can_inject_async_page_present(vcpu))
- return;
-
- spin_lock(&vcpu->async_pf.lock);
- work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link);
- list_del(&work->link);
- spin_unlock(&vcpu->async_pf.lock);
+ while (!list_empty_careful(&vcpu->async_pf.done) &&
+ kvm_arch_can_inject_async_page_present(vcpu)) {
+ spin_lock(&vcpu->async_pf.lock);
+ work = list_first_entry(&vcpu->async_pf.done, typeof(*work),
+ link);
+ list_del(&work->link);
+ spin_unlock(&vcpu->async_pf.lock);
- if (work->page)
- kvm_arch_async_page_ready(vcpu, work);
- kvm_arch_async_page_present(vcpu, work);
+ if (work->page)
+ kvm_arch_async_page_ready(vcpu, work);
+ kvm_arch_async_page_present(vcpu, work);
- list_del(&work->queue);
- vcpu->async_pf.queued--;
- if (work->page)
- put_page(work->page);
- kmem_cache_free(async_pf_cache, work);
+ list_del(&work->queue);
+ vcpu->async_pf.queued--;
+ if (work->page)
+ put_page(work->page);
+ kmem_cache_free(async_pf_cache, work);
+ }
}
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
--
1.7.0.4
On Tue, Nov 02, 2010 at 05:35:35PM +0800, Xiao Guangrong wrote:
> If it's no need to inject async #PF to PV guest we can handle
> more completed apfs at one time, so we can retry guest #PF
> as early as possible
>
> Signed-off-by: Xiao Guangrong <[email protected]>
Acked-by: Gleb Natapov <[email protected]>
--
Gleb.
On Tue, Nov 02, 2010 at 05:30:16PM +0800, Xiao Guangrong wrote:
> On 11/02/2010 05:14 PM, Gleb Natapov wrote:
>
> > If irqchip is in userspace apf is disabled (see mmu.c:can_do_async_pf()).
> > The reason for this is that when irqchip_in_kernel(v->kvm) cpu sleeps in
> > userspace during halt, so all event that can cause it to be unhalted
> > should be generated in userspace too. This is also the reason you can't have
> > pit in kernel and irqchip in userpsace.
> >
>
> Oh, thank you very much for answering so many questions, and your patch is
> looks good for me! ;-)
It is still not tested though :)
--
Gleb.
On 11/01/2010 08:55 PM, Gleb Natapov wrote:
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2cfdf2d..f7aed95 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5295,8 +5295,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> {
> switch(vcpu->arch.mp_state) {
> case KVM_MP_STATE_HALTED:
> - vcpu->arch.mp_state =
> - KVM_MP_STATE_RUNNABLE;
> + if (list_empty_careful(&vcpu->async_pf.done))
> + vcpu->arch.mp_state =
> + KVM_MP_STATE_RUNNABLE;
> case KVM_MP_STATE_RUNNABLE:
> vcpu->arch.apf.halted = false;
> break;
> @@ -6279,6 +6280,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> vcpu->arch.fault.error_code = 0;
> vcpu->arch.fault.address = work->arch.token;
> kvm_inject_page_fault(vcpu);
> + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> }
> }
>
Tested with full virtualization guests, it works well for me. Thanks!
On Wed, Nov 03, 2010 at 05:47:53PM +0800, Xiao Guangrong wrote:
> On 11/01/2010 08:55 PM, Gleb Natapov wrote:
>
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 2cfdf2d..f7aed95 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -5295,8 +5295,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> > {
> > switch(vcpu->arch.mp_state) {
> > case KVM_MP_STATE_HALTED:
> > - vcpu->arch.mp_state =
> > - KVM_MP_STATE_RUNNABLE;
> > + if (list_empty_careful(&vcpu->async_pf.done))
> > + vcpu->arch.mp_state =
> > + KVM_MP_STATE_RUNNABLE;
> > case KVM_MP_STATE_RUNNABLE:
> > vcpu->arch.apf.halted = false;
> > break;
> > @@ -6279,6 +6280,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> > vcpu->arch.fault.error_code = 0;
> > vcpu->arch.fault.address = work->arch.token;
> > kvm_inject_page_fault(vcpu);
> > + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> > }
> > }
> >
>
> Tested with full virtualization guests, it works well for me. Thanks!
Thank you. Will repost properly.
--
Gleb.
On Wed, Nov 03, 2010 at 11:45:08AM +0200, Gleb Natapov wrote:
> On Wed, Nov 03, 2010 at 05:47:53PM +0800, Xiao Guangrong wrote:
> > On 11/01/2010 08:55 PM, Gleb Natapov wrote:
> >
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index 2cfdf2d..f7aed95 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -5295,8 +5295,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> > > {
> > > switch(vcpu->arch.mp_state) {
> > > case KVM_MP_STATE_HALTED:
> > > - vcpu->arch.mp_state =
> > > - KVM_MP_STATE_RUNNABLE;
> > > + if (list_empty_careful(&vcpu->async_pf.done))
> > > + vcpu->arch.mp_state =
> > > + KVM_MP_STATE_RUNNABLE;
> > > case KVM_MP_STATE_RUNNABLE:
> > > vcpu->arch.apf.halted = false;
> > > break;
> > > @@ -6279,6 +6280,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> > > vcpu->arch.fault.error_code = 0;
> > > vcpu->arch.fault.address = work->arch.token;
> > > kvm_inject_page_fault(vcpu);
> > > + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> > > }
> > > }
> > >
> >
> > Tested with full virtualization guests, it works well for me. Thanks!
> Thank you. Will repost properly.
There is no need to optimize this. Current code is simpler, less
error-prone. Applied 1-6, thanks.