The idea is from Xen, when sending a call-function IPI-many to vCPUs,
yield if any of the IPI target vCPUs was preempted. 17% performace
increase of ebizzy benchmark can be observed in an over-subscribe
environment. (w/ kvm-pv-tlb disabled, testing TLB flush call-function
IPI-many since call-function is not easy to be trigged by userspace
workload).
v1 -> v2:
* check map is not NULL
* check map->phys_map[dest_id] is not NULL
Wanpeng Li (3):
KVM: X86: Implement PV sched yield in linux guest
KVM: X86: Implement PV sched yield hypercall
KVM: X86: Expose PV_SCHED_YIELD CPUID feature bit to guest
Documentation/virtual/kvm/cpuid.txt | 4 ++++
Documentation/virtual/kvm/hypercalls.txt | 11 +++++++++++
arch/x86/include/uapi/asm/kvm_para.h | 1 +
arch/x86/kernel/kvm.c | 21 +++++++++++++++++++++
arch/x86/kvm/cpuid.c | 3 ++-
arch/x86/kvm/x86.c | 24 ++++++++++++++++++++++++
include/uapi/linux/kvm_para.h | 1 +
7 files changed, 64 insertions(+), 1 deletion(-)
--
2.7.4
From: Wanpeng Li <[email protected]>
When sending a call-function IPI-many to vCPUs, yield if any of
the IPI target vCPUs was preempted, we just select the first
preempted target vCPU which we found since the state of target
vCPUs can change underneath and to avoid race conditions.
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
Documentation/virtual/kvm/hypercalls.txt | 11 +++++++++++
arch/x86/include/uapi/asm/kvm_para.h | 1 +
arch/x86/kernel/kvm.c | 21 +++++++++++++++++++++
include/uapi/linux/kvm_para.h | 1 +
4 files changed, 34 insertions(+)
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index da24c13..da21065 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -141,3 +141,14 @@ a0 corresponds to the APIC ID in the third argument (a2), bit 1
corresponds to the APIC ID a2+1, and so on.
Returns the number of CPUs to which the IPIs were delivered successfully.
+
+7. KVM_HC_SCHED_YIELD
+------------------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to yield if the IPI target vCPU is preempted
+
+a0: destination APIC ID
+
+Usage example: When sending a call-function IPI-many to vCPUs, yield if
+any of the IPI target vCPUs was preempted.
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 19980ec..d0bf77c 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -29,6 +29,7 @@
#define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
#define KVM_FEATURE_PV_SEND_IPI 11
+#define KVM_FEATURE_PV_SCHED_YIELD 12
#define KVM_HINTS_REALTIME 0
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 3f0cc82..54400c2 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -540,6 +540,21 @@ static void kvm_setup_pv_ipi(void)
pr_info("KVM setup pv IPIs\n");
}
+static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
+{
+ int cpu;
+
+ native_send_call_func_ipi(mask);
+
+ /* Make sure other vCPUs get a chance to run if they need to. */
+ for_each_cpu(cpu, mask) {
+ if (vcpu_is_preempted(cpu)) {
+ kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
+ break;
+ }
+ }
+}
+
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
{
native_smp_prepare_cpus(max_cpus);
@@ -651,6 +666,12 @@ static void __init kvm_guest_init(void)
#ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
+ if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
+ pr_info("KVM setup pv sched yield\n");
+ }
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index 6c0ce49..8b86609 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -28,6 +28,7 @@
#define KVM_HC_MIPS_CONSOLE_OUTPUT 8
#define KVM_HC_CLOCK_PAIRING 9
#define KVM_HC_SEND_IPI 10
+#define KVM_HC_SCHED_YIELD 11
/*
* hypercalls use architecture specific
--
2.7.4
From: Wanpeng Li <[email protected]>
The target vCPUs are in runnable state after vcpu_kick and suitable
as a yield target. This patch implements the sched yield hypercall.
17% performace increase of ebizzy benchmark can be observed in an
over-subscribe environment. (w/ kvm-pv-tlb disabled, testing TLB flush
call-function IPI-many since call-function is not easy to be trigged
by userspace workload).
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
arch/x86/kvm/x86.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e7e57de..2ceef51 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7172,6 +7172,26 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
}
+void kvm_sched_yield(struct kvm *kvm, u64 dest_id)
+{
+ struct kvm_vcpu *target;
+ struct kvm_apic_map *map;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ if (unlikely(!map))
+ goto out;
+
+ if (map->phys_map[dest_id]->vcpu) {
+ target = map->phys_map[dest_id]->vcpu;
+ kvm_vcpu_yield_to(target);
+ }
+
+out:
+ rcu_read_unlock();
+}
+
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
@@ -7218,6 +7238,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_SEND_IPI:
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
+ case KVM_HC_SCHED_YIELD:
+ kvm_sched_yield(vcpu->kvm, a0);
+ ret = 0;
+ break;
default:
ret = -KVM_ENOSYS;
break;
--
2.7.4
From: Wanpeng Li <[email protected]>
Expose PV_SCHED_YIELD feature bit to guest, the guest can check this
feature bit before using paravirtualized sched yield.
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
Documentation/virtual/kvm/cpuid.txt | 4 ++++
arch/x86/kvm/cpuid.c | 3 ++-
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 97ca194..1c39683 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -66,6 +66,10 @@ KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit
|| || before using paravirtualized
|| || send IPIs.
------------------------------------------------------------------------------
+KVM_FEATURE_PV_SHED_YIELD || 12 || guest checks this feature bit
+ || || before using paravirtualized
+ || || sched yield.
+------------------------------------------------------------------------------
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|| || per-cpu warps are expected in
|| || kvmclock.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e18a9f9..c018fc8 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -643,7 +643,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_PV_UNHALT) |
(1 << KVM_FEATURE_PV_TLB_FLUSH) |
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
- (1 << KVM_FEATURE_PV_SEND_IPI);
+ (1 << KVM_FEATURE_PV_SEND_IPI) |
+ (1 << KVM_FEATURE_PV_SCHED_YIELD);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
--
2.7.4
From: Wanpeng Li <[email protected]>
The target vCPUs are in runnable state after vcpu_kick and suitable
as a yield target. This patch implements the sched yield hypercall.
17% performance increase of ebizzy benchmark can be observed in an
over-subscribe environment. (w/ kvm-pv-tlb disabled, testing TLB flush
call-function IPI-many since call-function is not easy to be trigged
by userspace workload).
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
arch/x86/kvm/x86.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e7e57de..2f9ec08 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7172,6 +7172,28 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
}
+void kvm_sched_yield(struct kvm *kvm, u64 dest_id)
+{
+ struct kvm_vcpu *target;
+ struct kvm_apic_map *map;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ if (unlikely(!map))
+ goto out;
+
+ if (map->phys_map[dest_id]->vcpu) {
+ target = map->phys_map[dest_id]->vcpu;
+ rcu_read_unlock();
+ kvm_vcpu_yield_to(target);
+ }
+
+out:
+ if (!target)
+ rcu_read_unlock();
+}
+
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
@@ -7218,6 +7240,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_SEND_IPI:
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
+ case KVM_HC_SCHED_YIELD:
+ kvm_sched_yield(vcpu->kvm, a0);
+ ret = 0;
+ break;
default:
ret = -KVM_ENOSYS;
break;
--
2.7.4
Hi Wanpeng,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v5.2-rc2 next-20190524]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Wanpeng-Li/KVM-X86-Implement-PV-sched-yield-hypercall/20190528-132021
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <[email protected]>
All errors (new ones prefixed by >>):
arch/x86//kvm/x86.c: In function 'kvm_emulate_hypercall':
>> arch/x86//kvm/x86.c:7243:7: error: 'KVM_HC_SCHED_YIELD' undeclared (first use in this function); did you mean 'KVM_HC_SEND_IPI'?
case KVM_HC_SCHED_YIELD:
^~~~~~~~~~~~~~~~~~
KVM_HC_SEND_IPI
arch/x86//kvm/x86.c:7243:7: note: each undeclared identifier is reported only once for each function it appears in
vim +7243 arch/x86//kvm/x86.c
7196
7197 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
7198 {
7199 unsigned long nr, a0, a1, a2, a3, ret;
7200 int op_64_bit;
7201
7202 if (kvm_hv_hypercall_enabled(vcpu->kvm))
7203 return kvm_hv_hypercall(vcpu);
7204
7205 nr = kvm_rax_read(vcpu);
7206 a0 = kvm_rbx_read(vcpu);
7207 a1 = kvm_rcx_read(vcpu);
7208 a2 = kvm_rdx_read(vcpu);
7209 a3 = kvm_rsi_read(vcpu);
7210
7211 trace_kvm_hypercall(nr, a0, a1, a2, a3);
7212
7213 op_64_bit = is_64_bit_mode(vcpu);
7214 if (!op_64_bit) {
7215 nr &= 0xFFFFFFFF;
7216 a0 &= 0xFFFFFFFF;
7217 a1 &= 0xFFFFFFFF;
7218 a2 &= 0xFFFFFFFF;
7219 a3 &= 0xFFFFFFFF;
7220 }
7221
7222 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
7223 ret = -KVM_EPERM;
7224 goto out;
7225 }
7226
7227 switch (nr) {
7228 case KVM_HC_VAPIC_POLL_IRQ:
7229 ret = 0;
7230 break;
7231 case KVM_HC_KICK_CPU:
7232 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
7233 ret = 0;
7234 break;
7235 #ifdef CONFIG_X86_64
7236 case KVM_HC_CLOCK_PAIRING:
7237 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
7238 break;
7239 #endif
7240 case KVM_HC_SEND_IPI:
7241 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
7242 break;
> 7243 case KVM_HC_SCHED_YIELD:
7244 kvm_sched_yield(vcpu->kvm, a0);
7245 ret = 0;
7246 break;
7247 default:
7248 ret = -KVM_ENOSYS;
7249 break;
7250 }
7251 out:
7252 if (!op_64_bit)
7253 ret = (u32)ret;
7254 kvm_rax_write(vcpu, ret);
7255
7256 ++vcpu->stat.hypercalls;
7257 return kvm_skip_emulated_instruction(vcpu);
7258 }
7259 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
7260
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
On Tue, 28 May 2019 at 14:08, kbuild test robot <[email protected]> wrote:
>
> Hi Wanpeng,
>
> Thank you for the patch! Yet something to improve:
>
> [auto build test ERROR on kvm/linux-next]
> [also build test ERROR on v5.2-rc2 next-20190524]
> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
>
> url: https://github.com/0day-ci/linux/commits/Wanpeng-Li/KVM-X86-Implement-PV-sched-yield-hypercall/20190528-132021
> base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
> config: x86_64-allyesconfig (attached as .config)
> compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=x86_64
>
> If you fix the issue, kindly add following tag
> Reported-by: kbuild test robot <[email protected]>
>
> All errors (new ones prefixed by >>):
>
> arch/x86//kvm/x86.c: In function 'kvm_emulate_hypercall':
> >> arch/x86//kvm/x86.c:7243:7: error: 'KVM_HC_SCHED_YIELD' undeclared (first use in this function); did you mean 'KVM_HC_SEND_IPI'?
> case KVM_HC_SCHED_YIELD:
> ^~~~~~~~~~~~~~~~~~
> KVM_HC_SEND_IPI
> arch/x86//kvm/x86.c:7243:7: note: each undeclared identifier is reported only once for each function it appears in
It's a false report, it is declared in patch 1/3.
>
> vim +7243 arch/x86//kvm/x86.c
>
> 7196
> 7197 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> 7198 {
> 7199 unsigned long nr, a0, a1, a2, a3, ret;
> 7200 int op_64_bit;
> 7201
> 7202 if (kvm_hv_hypercall_enabled(vcpu->kvm))
> 7203 return kvm_hv_hypercall(vcpu);
> 7204
> 7205 nr = kvm_rax_read(vcpu);
> 7206 a0 = kvm_rbx_read(vcpu);
> 7207 a1 = kvm_rcx_read(vcpu);
> 7208 a2 = kvm_rdx_read(vcpu);
> 7209 a3 = kvm_rsi_read(vcpu);
> 7210
> 7211 trace_kvm_hypercall(nr, a0, a1, a2, a3);
> 7212
> 7213 op_64_bit = is_64_bit_mode(vcpu);
> 7214 if (!op_64_bit) {
> 7215 nr &= 0xFFFFFFFF;
> 7216 a0 &= 0xFFFFFFFF;
> 7217 a1 &= 0xFFFFFFFF;
> 7218 a2 &= 0xFFFFFFFF;
> 7219 a3 &= 0xFFFFFFFF;
> 7220 }
> 7221
> 7222 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
> 7223 ret = -KVM_EPERM;
> 7224 goto out;
> 7225 }
> 7226
> 7227 switch (nr) {
> 7228 case KVM_HC_VAPIC_POLL_IRQ:
> 7229 ret = 0;
> 7230 break;
> 7231 case KVM_HC_KICK_CPU:
> 7232 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
> 7233 ret = 0;
> 7234 break;
> 7235 #ifdef CONFIG_X86_64
> 7236 case KVM_HC_CLOCK_PAIRING:
> 7237 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
> 7238 break;
> 7239 #endif
> 7240 case KVM_HC_SEND_IPI:
> 7241 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
> 7242 break;
> > 7243 case KVM_HC_SCHED_YIELD:
> 7244 kvm_sched_yield(vcpu->kvm, a0);
> 7245 ret = 0;
> 7246 break;
> 7247 default:
> 7248 ret = -KVM_ENOSYS;
> 7249 break;
> 7250 }
> 7251 out:
> 7252 if (!op_64_bit)
> 7253 ret = (u32)ret;
> 7254 kvm_rax_write(vcpu, ret);
> 7255
> 7256 ++vcpu->stat.hypercalls;
> 7257 return kvm_skip_emulated_instruction(vcpu);
> 7258 }
> 7259 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
> 7260
>
> ---
> 0-DAY kernel test infrastructure Open Source Technology Center
> https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi Wanpeng,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on kvm/linux-next]
[also build test WARNING on v5.2-rc2 next-20190524]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Wanpeng-Li/KVM-X86-Implement-PV-sched-yield-hypercall/20190528-132021
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
reproduce:
# apt-get install sparse
# sparse version: v0.6.1-rc1-7-g2b96cd8-dirty
make ARCH=x86_64 allmodconfig
make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <[email protected]>
sparse warnings: (new ones prefixed by >>)
arch/x86/kvm/x86.c:2379:38: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void const [noderef] <asn:1> * @@ got const [noderef] <asn:1> * @@
arch/x86/kvm/x86.c:2379:38: sparse: expected void const [noderef] <asn:1> *
arch/x86/kvm/x86.c:2379:38: sparse: got unsigned char [usertype] *
arch/x86/kvm/x86.c:7181:15: sparse: sparse: incompatible types in comparison expression (different address spaces):
arch/x86/kvm/x86.c:7181:15: sparse: struct kvm_apic_map [noderef] <asn:4> *
arch/x86/kvm/x86.c:7181:15: sparse: struct kvm_apic_map *
arch/x86/kvm/x86.c:7243:14: sparse: sparse: undefined identifier 'KVM_HC_SCHED_YIELD'
>> arch/x86/kvm/x86.c:7243:14: sparse: sparse: incompatible types for 'case' statement
arch/x86/kvm/x86.c:9408:16: sparse: sparse: incompatible types in comparison expression (different address spaces):
arch/x86/kvm/x86.c:9408:16: sparse: struct kvm_apic_map [noderef] <asn:4> *
arch/x86/kvm/x86.c:9408:16: sparse: struct kvm_apic_map *
>> arch/x86/kvm/x86.c:7193:9: sparse: sparse: context imbalance in 'kvm_sched_yield' - wrong count at exit
arch/x86/kvm/x86.c:7243:14: sparse: sparse: Expected constant expression in case statement
vim +/case +7243 arch/x86/kvm/x86.c
7174
7175 void kvm_sched_yield(struct kvm *kvm, u64 dest_id)
7176 {
7177 struct kvm_vcpu *target;
7178 struct kvm_apic_map *map;
7179
7180 rcu_read_lock();
7181 map = rcu_dereference(kvm->arch.apic_map);
7182
7183 if (unlikely(!map))
7184 goto out;
7185
7186 if (map->phys_map[dest_id]->vcpu) {
7187 target = map->phys_map[dest_id]->vcpu;
7188 rcu_read_unlock();
7189 kvm_vcpu_yield_to(target);
7190 }
7191
7192 out:
> 7193 if (!target)
7194 rcu_read_unlock();
7195 }
7196
7197 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
7198 {
7199 unsigned long nr, a0, a1, a2, a3, ret;
7200 int op_64_bit;
7201
7202 if (kvm_hv_hypercall_enabled(vcpu->kvm))
7203 return kvm_hv_hypercall(vcpu);
7204
7205 nr = kvm_rax_read(vcpu);
7206 a0 = kvm_rbx_read(vcpu);
7207 a1 = kvm_rcx_read(vcpu);
7208 a2 = kvm_rdx_read(vcpu);
7209 a3 = kvm_rsi_read(vcpu);
7210
7211 trace_kvm_hypercall(nr, a0, a1, a2, a3);
7212
7213 op_64_bit = is_64_bit_mode(vcpu);
7214 if (!op_64_bit) {
7215 nr &= 0xFFFFFFFF;
7216 a0 &= 0xFFFFFFFF;
7217 a1 &= 0xFFFFFFFF;
7218 a2 &= 0xFFFFFFFF;
7219 a3 &= 0xFFFFFFFF;
7220 }
7221
7222 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
7223 ret = -KVM_EPERM;
7224 goto out;
7225 }
7226
7227 switch (nr) {
7228 case KVM_HC_VAPIC_POLL_IRQ:
7229 ret = 0;
7230 break;
7231 case KVM_HC_KICK_CPU:
7232 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
7233 ret = 0;
7234 break;
7235 #ifdef CONFIG_X86_64
7236 case KVM_HC_CLOCK_PAIRING:
7237 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
7238 break;
7239 #endif
7240 case KVM_HC_SEND_IPI:
7241 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
7242 break;
> 7243 case KVM_HC_SCHED_YIELD:
7244 kvm_sched_yield(vcpu->kvm, a0);
7245 ret = 0;
7246 break;
7247 default:
7248 ret = -KVM_ENOSYS;
7249 break;
7250 }
7251 out:
7252 if (!op_64_bit)
7253 ret = (u32)ret;
7254 kvm_rax_write(vcpu, ret);
7255
7256 ++vcpu->stat.hypercalls;
7257 return kvm_skip_emulated_instruction(vcpu);
7258 }
7259 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
7260
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
The idea is from Xen, when sending a call-function IPI-many to vCPUs,
yield if any of the IPI target vCPUs was preempted. 17% performance
increase of ebizzy benchmark can be observed in an over-subscribe
environment. (w/ kvm-pv-tlb disabled, testing TLB flush call-function
IPI-many since call-function is not easy to be trigged by userspace
workload).
v1 -> v2:
* check map is not NULL
* check map->phys_map[dest_id] is not NULL
* make kvm_sched_yield static
* change dest_id to unsinged long
Wanpeng Li (3):
KVM: X86: Implement PV sched yield in linux guest
KVM: X86: Implement PV sched yield hypercall
KVM: X86: Expose PV_SCHED_YIELD CPUID feature bit to guest
Documentation/virtual/kvm/cpuid.txt | 4 ++++
Documentation/virtual/kvm/hypercalls.txt | 11 +++++++++++
arch/x86/include/uapi/asm/kvm_para.h | 1 +
arch/x86/kernel/kvm.c | 21 +++++++++++++++++++++
arch/x86/kvm/cpuid.c | 3 ++-
arch/x86/kvm/x86.c | 26 ++++++++++++++++++++++++++
include/uapi/linux/kvm_para.h | 1 +
7 files changed, 66 insertions(+), 1 deletion(-)
--
2.7.4
On 28.05.19 02:53, Wanpeng Li wrote:
> From: Wanpeng Li <[email protected]>
>
> The target vCPUs are in runnable state after vcpu_kick and suitable
> as a yield target. This patch implements the sched yield hypercall.
>
> 17% performace increase of ebizzy benchmark can be observed in an
> over-subscribe environment. (w/ kvm-pv-tlb disabled, testing TLB flush
> call-function IPI-many since call-function is not easy to be trigged
> by userspace workload).
>
> Cc: Paolo Bonzini <[email protected]>
> Cc: Radim Krčmář <[email protected]>
> Signed-off-by: Wanpeng Li <[email protected]>
FWIW, we do have a similar interface in s390.
See arch/s390/kvm/diag.c __diag_time_slice_end_directed for our implementation.
> ---
> arch/x86/kvm/x86.c | 24 ++++++++++++++++++++++++
> 1 file changed, 24 insertions(+)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e7e57de..2ceef51 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7172,6 +7172,26 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
> kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
> }
>
> +void kvm_sched_yield(struct kvm *kvm, u64 dest_id)
> +{
> + struct kvm_vcpu *target;
> + struct kvm_apic_map *map;
> +
> + rcu_read_lock();
> + map = rcu_dereference(kvm->arch.apic_map);
> +
> + if (unlikely(!map))
> + goto out;
> +
> + if (map->phys_map[dest_id]->vcpu) {
> + target = map->phys_map[dest_id]->vcpu;
> + kvm_vcpu_yield_to(target);
> + }
> +
> +out:
> + rcu_read_unlock();
> +}
> +
> int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> {
> unsigned long nr, a0, a1, a2, a3, ret;
> @@ -7218,6 +7238,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> case KVM_HC_SEND_IPI:
> ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
> break;
> + case KVM_HC_SCHED_YIELD:
> + kvm_sched_yield(vcpu->kvm, a0);
> + ret = 0;
> + break;
> default:
> ret = -KVM_ENOSYS;
> break;
>
On Tue, 28 May 2019 at 17:12, Christian Borntraeger
<[email protected]> wrote:
>
> On 28.05.19 02:53, Wanpeng Li wrote:
> > From: Wanpeng Li <[email protected]>
> >
> > The target vCPUs are in runnable state after vcpu_kick and suitable
> > as a yield target. This patch implements the sched yield hypercall.
> >
> > 17% performace increase of ebizzy benchmark can be observed in an
> > over-subscribe environment. (w/ kvm-pv-tlb disabled, testing TLB flush
> > call-function IPI-many since call-function is not easy to be trigged
> > by userspace workload).
> >
> > Cc: Paolo Bonzini <[email protected]>
> > Cc: Radim Krčmář <[email protected]>
> > Signed-off-by: Wanpeng Li <[email protected]>
>
> FWIW, we do have a similar interface in s390.
>
> See arch/s390/kvm/diag.c __diag_time_slice_end_directed for our implementation.
Good to know this. :)
Regards,
Wanpeng Li
> On 28 May 2019, at 3:53, Wanpeng Li <[email protected]> wrote:
>
> From: Wanpeng Li <[email protected]>
>
> The target vCPUs are in runnable state after vcpu_kick and suitable
> as a yield target. This patch implements the sched yield hypercall.
>
> 17% performace increase of ebizzy benchmark can be observed in an
> over-subscribe environment. (w/ kvm-pv-tlb disabled, testing TLB flush
> call-function IPI-many since call-function is not easy to be trigged
> by userspace workload).
>
> Cc: Paolo Bonzini <[email protected]>
> Cc: Radim Krčmář <[email protected]>
> Signed-off-by: Wanpeng Li <[email protected]>
> ---
> arch/x86/kvm/x86.c | 24 ++++++++++++++++++++++++
> 1 file changed, 24 insertions(+)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e7e57de..2ceef51 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7172,6 +7172,26 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
> kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
> }
>
> +void kvm_sched_yield(struct kvm *kvm, u64 dest_id)
> +{
> + struct kvm_vcpu *target;
> + struct kvm_apic_map *map;
> +
> + rcu_read_lock();
> + map = rcu_dereference(kvm->arch.apic_map);
> +
> + if (unlikely(!map))
> + goto out;
> +
We should have a bounds-check here on “dest_id”.
-Liran
> + if (map->phys_map[dest_id]->vcpu) {
> + target = map->phys_map[dest_id]->vcpu;
> + kvm_vcpu_yield_to(target);
> + }
> +
> +out:
> + rcu_read_unlock();
> +}
> +
> int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> {
> unsigned long nr, a0, a1, a2, a3, ret;
> @@ -7218,6 +7238,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> case KVM_HC_SEND_IPI:
> ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
> break;
> + case KVM_HC_SCHED_YIELD:
> + kvm_sched_yield(vcpu->kvm, a0);
> + ret = 0;
> + break;
> default:
> ret = -KVM_ENOSYS;
> break;
> --
> 2.7.4
>
On Wed, 29 May 2019 at 20:28, Liran Alon <[email protected]> wrote:
>
>
>
> > On 28 May 2019, at 3:53, Wanpeng Li <[email protected]> wrote:
> >
> > From: Wanpeng Li <[email protected]>
> >
> > The target vCPUs are in runnable state after vcpu_kick and suitable
> > as a yield target. This patch implements the sched yield hypercall.
> >
> > 17% performace increase of ebizzy benchmark can be observed in an
> > over-subscribe environment. (w/ kvm-pv-tlb disabled, testing TLB flush
> > call-function IPI-many since call-function is not easy to be trigged
> > by userspace workload).
> >
> > Cc: Paolo Bonzini <[email protected]>
> > Cc: Radim Krčmář <[email protected]>
> > Signed-off-by: Wanpeng Li <[email protected]>
> > ---
> > arch/x86/kvm/x86.c | 24 ++++++++++++++++++++++++
> > 1 file changed, 24 insertions(+)
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index e7e57de..2ceef51 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -7172,6 +7172,26 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
> > kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
> > }
> >
> > +void kvm_sched_yield(struct kvm *kvm, u64 dest_id)
> > +{
> > + struct kvm_vcpu *target;
> > + struct kvm_apic_map *map;
> > +
> > + rcu_read_lock();
> > + map = rcu_dereference(kvm->arch.apic_map);
> > +
> > + if (unlikely(!map))
> > + goto out;
> > +
>
> We should have a bounds-check here on “dest_id”.
Yeah, fix it in v3.
Regards,
Wanpeng Li