See individual patches. The savings is mostly due to avoiding
srcu_read_unlock/srcu_read_lock and, if halt_poll_ns is set,
ktime_get.
Paolo
Paolo Bonzini (2):
KVM: x86: extract guest running logic from __vcpu_run
KVM: x86: optimize delivery of TSC deadline timer interrupt
arch/x86/kvm/x86.c | 71 ++++++++++++++++++++++++++++++------------------------
1 file changed, 40 insertions(+), 31 deletions(-)
--
1.8.3.1
Rename the old __vcpu_run to vcpu_run, and extract its main logic
to a new function.
The next patch will add a new early-exit condition to __vcpu_run,
avoid extra indentation.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/x86.c | 67 +++++++++++++++++++++++++++++-------------------------
1 file changed, 36 insertions(+), 31 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd7a70be41b3..0b8dd13676ef 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6165,7 +6165,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
}
/*
- * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
* userspace.
*/
@@ -6383,42 +6383,45 @@ out:
return r;
}
+static inline int __vcpu_run(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
+ !vcpu->arch.apf.halted)
+ return vcpu_enter_guest(vcpu);
+
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_block(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+ if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
+ return 1;
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+ kvm_apic_accept_events(vcpu);
+ switch(vcpu->arch.mp_state) {
+ case KVM_MP_STATE_HALTED:
+ vcpu->arch.pv.pv_unhalted = false;
+ vcpu->arch.mp_state =
+ KVM_MP_STATE_RUNNABLE;
+ case KVM_MP_STATE_RUNNABLE:
+ vcpu->arch.apf.halted = false;
+ break;
+ case KVM_MP_STATE_INIT_RECEIVED:
+ break;
+ default:
+ return -EINTR;
+ break;
+ }
+ return 1;
+}
+
+static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
struct kvm *kvm = vcpu->kvm;
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- r = 1;
- while (r > 0) {
- if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
- !vcpu->arch.apf.halted)
- r = vcpu_enter_guest(vcpu);
- else {
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
- kvm_vcpu_block(vcpu);
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
- kvm_apic_accept_events(vcpu);
- switch(vcpu->arch.mp_state) {
- case KVM_MP_STATE_HALTED:
- vcpu->arch.pv.pv_unhalted = false;
- vcpu->arch.mp_state =
- KVM_MP_STATE_RUNNABLE;
- case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.apf.halted = false;
- break;
- case KVM_MP_STATE_INIT_RECEIVED:
- break;
- default:
- r = -EINTR;
- break;
- }
- }
- }
-
+ for (;;) {
+ r = __vcpu_run(kvm, vcpu);
if (r <= 0)
break;
@@ -6430,6 +6433,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
r = -EINTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.request_irq_exits;
+ break;
}
kvm_check_async_pf_completion(vcpu);
@@ -6438,6 +6442,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
r = -EINTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
+ break;
}
if (need_resched()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -6569,7 +6574,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
} else
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
- r = __vcpu_run(vcpu);
+ r = vcpu_run(vcpu);
out:
post_kvm_run_save(vcpu);
--
1.8.3.1
The newly-added tracepoint shows the following results on
the tscdeadline_latency test:
qemu-kvm-8387 [002] 6425.558974: kvm_vcpu_wakeup: poll time 10407 ns
qemu-kvm-8387 [002] 6425.558984: kvm_vcpu_wakeup: poll time 0 ns
qemu-kvm-8387 [002] 6425.561242: kvm_vcpu_wakeup: poll time 10477 ns
qemu-kvm-8387 [002] 6425.561251: kvm_vcpu_wakeup: poll time 0 ns
and so on. This is because we need to go through kvm_vcpu_block again
after the timer IRQ is injected. Avoid it by polling once before
entering kvm_vcpu_block.
On my machine (Xeon E5 Sandy Bridge) this removes about 500 cycles (7%)
from the latency of the TSC deadline timer.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/x86.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0b8dd13676ef..1e766033ebff 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6389,11 +6389,15 @@ static inline int __vcpu_run(struct kvm *kvm, struct kvm_vcpu *vcpu)
!vcpu->arch.apf.halted)
return vcpu_enter_guest(vcpu);
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
- kvm_vcpu_block(vcpu);
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
- return 1;
+ if (kvm_arch_vcpu_runnable(vcpu))
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ else {
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_block(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+ if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
+ return 1;
+ }
kvm_apic_accept_events(vcpu);
switch(vcpu->arch.mp_state) {
--
1.8.3.1
On Fri, Feb 06, 2015 at 01:16:59PM +0100, Paolo Bonzini wrote:
> The newly-added tracepoint shows the following results on
> the tscdeadline_latency test:
>
> qemu-kvm-8387 [002] 6425.558974: kvm_vcpu_wakeup: poll time 10407 ns
> qemu-kvm-8387 [002] 6425.558984: kvm_vcpu_wakeup: poll time 0 ns
> qemu-kvm-8387 [002] 6425.561242: kvm_vcpu_wakeup: poll time 10477 ns
> qemu-kvm-8387 [002] 6425.561251: kvm_vcpu_wakeup: poll time 0 ns
>
> and so on. This is because we need to go through kvm_vcpu_block again
> after the timer IRQ is injected. Avoid it by polling once before
> entering kvm_vcpu_block.
>
> On my machine (Xeon E5 Sandy Bridge) this removes about 500 cycles (7%)
> from the latency of the TSC deadline timer.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/x86.c | 14 +++++++++-----
> 1 file changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0b8dd13676ef..1e766033ebff 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6389,11 +6389,15 @@ static inline int __vcpu_run(struct kvm *kvm, struct kvm_vcpu *vcpu)
> !vcpu->arch.apf.halted)
> return vcpu_enter_guest(vcpu);
>
> - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> - kvm_vcpu_block(vcpu);
> - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
> - if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
> - return 1;
> + if (kvm_arch_vcpu_runnable(vcpu))
> + clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
> + else {
Why the clear_bit? Since only kvm_vcpu_block in the below section
sets it, and that section clears it as well.
Can remove another 300 cycles from do_div when programming LAPIC
tscdeadline timer.
> + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> + kvm_vcpu_block(vcpu);
> + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
> + if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
> + return 1;
> + }
>
> kvm_apic_accept_events(vcpu);
> switch(vcpu->arch.mp_state) {
> --
> 1.8.3.1
On 06/02/2015 21:51, Marcelo Tosatti wrote:
> On Fri, Feb 06, 2015 at 01:16:59PM +0100, Paolo Bonzini wrote:
>> The newly-added tracepoint shows the following results on
>> the tscdeadline_latency test:
>>
>> qemu-kvm-8387 [002] 6425.558974: kvm_vcpu_wakeup: poll time 10407 ns
>> qemu-kvm-8387 [002] 6425.558984: kvm_vcpu_wakeup: poll time 0 ns
>> qemu-kvm-8387 [002] 6425.561242: kvm_vcpu_wakeup: poll time 10477 ns
>> qemu-kvm-8387 [002] 6425.561251: kvm_vcpu_wakeup: poll time 0 ns
>>
>> and so on. This is because we need to go through kvm_vcpu_block again
>> after the timer IRQ is injected. Avoid it by polling once before
>> entering kvm_vcpu_block.
>>
>> On my machine (Xeon E5 Sandy Bridge) this removes about 500 cycles (7%)
>> from the latency of the TSC deadline timer.
>>
>> Signed-off-by: Paolo Bonzini <[email protected]>
>> ---
>> arch/x86/kvm/x86.c | 14 +++++++++-----
>> 1 file changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 0b8dd13676ef..1e766033ebff 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6389,11 +6389,15 @@ static inline int __vcpu_run(struct kvm *kvm, struct kvm_vcpu *vcpu)
>> !vcpu->arch.apf.halted)
>> return vcpu_enter_guest(vcpu);
>>
>> - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
>> - kvm_vcpu_block(vcpu);
>> - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
>> - if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
>> - return 1;
>> + if (kvm_arch_vcpu_runnable(vcpu))
>> + clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
>> + else {
>
> Why the clear_bit? Since only kvm_vcpu_block in the below section
> sets it, and that section clears it as well.
You're right.
> Can remove another 300 cycles from do_div when programming LAPIC
> tscdeadline timer.
Do you mean using something like lib/reciprocal_div.c? Good idea,
though that's not latency, it's just being slow. :)
Paolo
On Fri, Feb 6, 2015 at 4:16 AM, Paolo Bonzini <[email protected]> wrote:
> Rename the old __vcpu_run to vcpu_run, and extract its main logic
> to a new function.
>
> The next patch will add a new early-exit condition to __vcpu_run,
> avoid extra indentation.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
Reviewed-by: David Matlack <[email protected]>
> arch/x86/kvm/x86.c | 67 +++++++++++++++++++++++++++++-------------------------
> 1 file changed, 36 insertions(+), 31 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index bd7a70be41b3..0b8dd13676ef 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6165,7 +6165,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
> }
>
> /*
> - * Returns 1 to let __vcpu_run() continue the guest execution loop without
> + * Returns 1 to let vcpu_run() continue the guest execution loop without
> * exiting to the userspace. Otherwise, the value will be returned to the
> * userspace.
> */
> @@ -6383,42 +6383,45 @@ out:
> return r;
> }
>
> +static inline int __vcpu_run(struct kvm *kvm, struct kvm_vcpu *vcpu)
Kind of confusing function body given the name. Maybe put
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
return vcpu_enter_guest(vcpu);
back in vcpu_run and rename this function? vcpu_wait?
> +{
> + if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
> + !vcpu->arch.apf.halted)
> + return vcpu_enter_guest(vcpu);
> +
> + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> + kvm_vcpu_block(vcpu);
> + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
> + if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
> + return 1;
>
> -static int __vcpu_run(struct kvm_vcpu *vcpu)
> + kvm_apic_accept_events(vcpu);
> + switch(vcpu->arch.mp_state) {
> + case KVM_MP_STATE_HALTED:
> + vcpu->arch.pv.pv_unhalted = false;
> + vcpu->arch.mp_state =
> + KVM_MP_STATE_RUNNABLE;
> + case KVM_MP_STATE_RUNNABLE:
> + vcpu->arch.apf.halted = false;
> + break;
> + case KVM_MP_STATE_INIT_RECEIVED:
> + break;
> + default:
> + return -EINTR;
> + break;
> + }
> + return 1;
> +}
> +
> +static int vcpu_run(struct kvm_vcpu *vcpu)
vcpu_run_loop might be a clearer name.
> {
> int r;
> struct kvm *kvm = vcpu->kvm;
>
> vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
>
> - r = 1;
> - while (r > 0) {
> - if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
> - !vcpu->arch.apf.halted)
> - r = vcpu_enter_guest(vcpu);
> - else {
> - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> - kvm_vcpu_block(vcpu);
> - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
> - if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
> - kvm_apic_accept_events(vcpu);
> - switch(vcpu->arch.mp_state) {
> - case KVM_MP_STATE_HALTED:
> - vcpu->arch.pv.pv_unhalted = false;
> - vcpu->arch.mp_state =
> - KVM_MP_STATE_RUNNABLE;
> - case KVM_MP_STATE_RUNNABLE:
> - vcpu->arch.apf.halted = false;
> - break;
> - case KVM_MP_STATE_INIT_RECEIVED:
> - break;
> - default:
> - r = -EINTR;
> - break;
> - }
> - }
> - }
> -
> + for (;;) {
> + r = __vcpu_run(kvm, vcpu);
> if (r <= 0)
> break;
>
> @@ -6430,6 +6433,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> r = -EINTR;
> vcpu->run->exit_reason = KVM_EXIT_INTR;
> ++vcpu->stat.request_irq_exits;
> + break;
> }
>
> kvm_check_async_pf_completion(vcpu);
> @@ -6438,6 +6442,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> r = -EINTR;
> vcpu->run->exit_reason = KVM_EXIT_INTR;
> ++vcpu->stat.signal_exits;
> + break;
The removal of the loop condition and the addition of these "break"s
change the loop behavior slightly, but I think it's safe. We'll start
breaking before checking need_resched(), but we're about to return to
userspace anyway so we'll reschedule then.
> }
> if (need_resched()) {
> srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> @@ -6569,7 +6574,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> } else
> WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
>
> - r = __vcpu_run(vcpu);
> + r = vcpu_run(vcpu);
>
> out:
> post_kvm_run_save(vcpu);
> --
> 1.8.3.1
>
>
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On 02/06/2015 07:16 AM, Paolo Bonzini wrote:
> Rename the old __vcpu_run to vcpu_run, and extract its main logic
> to a new function.
>
> The next patch will add a new early-exit condition to __vcpu_run,
> avoid extra indentation.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
Reviewed-by: Rik van Riel <[email protected]>
- --
All rights reversed
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1
iQEcBAEBAgAGBQJU2UOmAAoJEM553pKExN6DLKoIAJhuuGhD46k4Xyai8JdtTGPr
osSYKVjIn9PYCYDjR9NQuUfji1i5JXluFMDHLREnKnTQlzvC9+pKIfyxEObgI3ni
8uYs5mealtFAv3uWL2JLdvfi4e58rUEmvI2c+CZBFBQJ6NkGh2QRF0VVa0Vowppv
/kvq/L3QKna4mfSngAA60p5g6ctKwNyXTXt1Pb+KbsYseJlnWLPSXwocDEBBEc35
vvHWdOwVtJQRbVwt/ZXNVPaePgCVB/eNLnxvCd1OujnIycoJhKNQKreEW2ik2UXY
CsgXrBiK3wlgHbf65Ogs9ivrn9wXQXxGbWf2WlnlQeTAbkSR9TqixkeqCYSmCI0=
=gy1C
-----END PGP SIGNATURE-----
On 09/02/2015 23:44, David Matlack wrote:
>> >
>> > +static inline int __vcpu_run(struct kvm *kvm, struct kvm_vcpu *vcpu)
> Kind of confusing function body given the name. Maybe put
>
> if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
> !vcpu->arch.apf.halted)
> return vcpu_enter_guest(vcpu);
>
> back in vcpu_run and rename this function? vcpu_wait?
Yeah, vcpu_wait or vcpu_block (since it calls kvm_vcpu_block) sound good.
Paolo
On Sat, Feb 07, 2015 at 09:33:09PM +0100, Paolo Bonzini wrote:
> > Can remove another 300 cycles from do_div when programming LAPIC
> > tscdeadline timer.
>
> Do you mean using something like lib/reciprocal_div.c?
Yes.
> Good idea,
> though that's not latency, it's just being slow. :)
It adds to latency, yes (the guest will reprogram the
APIC timer before resuming userspace execution).