Hyper-V provides a para-virtualization hypercall HvFlushGuestPhysicalAddressSpace
to flush nested VM address space mapping in l1 hypervisor and it's to reduce overhead
of flushing ept tlb among vcpus. The tradition way is to send IPIs to all affected
vcpus and executes INVEPT on each vcpus. It will trigger several vmexits for IPI and
INVEPT emulation. The pv hypercall can help to flush specified ept table on all vcpus
via one single hypercall.
Change since v1:
- Fix compilation error for non-x86 platform.
- Use ept_pointers_match to check condition of identical ept
table pointer and get ept pointer from struct vcpu_vmx->ept_pointer.
- Add hyperv_nested_flush_guest_mapping ftrace support
Lan Tianyu (5):
X86/Hyper-V: Add flush HvFlushGuestPhysicalAddressSpace hypercall
support
KVM: Add tlb remote flush callback in kvm_x86_ops.
KVM/VMX: Add identical ept table pointer check
KVM/x86: Add tlb_remote_flush callback support for vmx
X86/Hyper-V: Add hyperv_nested_flush_guest_mapping ftrace support
arch/x86/hyperv/Makefile | 2 +-
arch/x86/hyperv/nested.c | 67 +++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/hyperv-tlfs.h | 8 +++++
arch/x86/include/asm/kvm_host.h | 11 ++++++
arch/x86/include/asm/mshyperv.h | 2 ++
arch/x86/include/asm/trace/hyperv.h | 14 ++++++++
arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++++++
include/linux/kvm_host.h | 7 ++++
virt/kvm/kvm_main.c | 11 +++++-
9 files changed, 179 insertions(+), 2 deletions(-)
create mode 100644 arch/x86/hyperv/nested.c
--
2.14.3
Hyper-V supports a pv hypercall HvFlushGuestPhysicalAddressSpace to
flush nested VM address space mapping in l1 hypervisor and it's to
reduce overhead of flushing ept tlb among vcpus. This patch is to
implement it.
Signed-off-by: Lan Tianyu <[email protected]>
---
arch/x86/hyperv/Makefile | 2 +-
arch/x86/hyperv/nested.c | 64 ++++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/hyperv-tlfs.h | 8 +++++
arch/x86/include/asm/mshyperv.h | 2 ++
4 files changed, 75 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/hyperv/nested.c
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index b173d404e3df..b21ee65c4101 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1,2 +1,2 @@
-obj-y := hv_init.o mmu.o
+obj-y := hv_init.o mmu.o nested.o
obj-$(CONFIG_X86_64) += hv_apic.o
diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c
new file mode 100644
index 000000000000..74dd38b5221d
--- /dev/null
+++ b/arch/x86/hyperv/nested.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Hyper-V nested virtualization code.
+ *
+ * Copyright (C) 2018, Microsoft, Inc.
+ *
+ * Author : Lan Tianyu <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ */
+
+
+#include <linux/types.h>
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
+#include <asm/tlbflush.h>
+
+int hyperv_flush_guest_mapping(u64 as)
+{
+ struct hv_guest_mapping_flush **flush_pcpu;
+ struct hv_guest_mapping_flush *flush;
+ u64 status;
+ unsigned long flags;
+ int ret = -EFAULT;
+
+ if (!hv_hypercall_pg)
+ goto fault;
+
+ local_irq_save(flags);
+
+ flush_pcpu = (struct hv_guest_mapping_flush **)
+ this_cpu_ptr(hyperv_pcpu_input_arg);
+
+ flush = *flush_pcpu;
+
+ if (unlikely(!flush)) {
+ local_irq_restore(flags);
+ goto fault;
+ }
+
+ flush->address_space = as;
+ flush->flags = 0;
+
+ status = hv_do_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE,
+ flush, NULL);
+ local_irq_restore(flags);
+
+ if (!(status & HV_HYPERCALL_RESULT_MASK))
+ ret = 0;
+
+fault:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index b8c89265baf0..08e24f552030 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -309,6 +309,7 @@ struct ms_hyperv_tsc_page {
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
/* Nested features (CPUID 0x4000000A) EAX */
+#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
struct hv_reenlightenment_control {
@@ -350,6 +351,7 @@ struct hv_tsc_emulation_status {
#define HVCALL_SEND_IPI_EX 0x0015
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
@@ -741,6 +743,12 @@ struct ipi_arg_ex {
struct hv_vpset vp_set;
};
+/* HvFlushGuestPhysicalAddressSpace hypercalls */
+struct hv_guest_mapping_flush {
+ u64 address_space;
+ u64 flags;
+};
+
/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
struct hv_tlb_flush {
u64 address_space;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 3cd14311edfa..a6a615b49876 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -302,6 +302,7 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
void clear_hv_tscchange_cb(void);
void hyperv_stop_tsc_emulation(void);
+int hyperv_flush_guest_mapping(u64 as);
#ifdef CONFIG_X86_64
void hv_apic_init(void);
@@ -321,6 +322,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
{
return NULL;
}
+static inline int hyperv_flush_guest_mapping(u64 as) { return -1; }
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE
--
2.14.3
This patch is to add hyperv_nested_flush_guest_mapping support to trace
hvFlushGuestPhysicalAddressSpace hypercall.
Signed-off-by: Lan Tianyu <[email protected]>
---
arch/x86/hyperv/nested.c | 3 +++
arch/x86/include/asm/trace/hyperv.h | 14 ++++++++++++++
2 files changed, 17 insertions(+)
diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c
index 74dd38b5221d..42a3232f2835 100644
--- a/arch/x86/hyperv/nested.c
+++ b/arch/x86/hyperv/nested.c
@@ -25,6 +25,8 @@
#include <asm/mshyperv.h>
#include <asm/tlbflush.h>
+#include <asm/trace/hyperv.h>
+
int hyperv_flush_guest_mapping(u64 as)
{
struct hv_guest_mapping_flush **flush_pcpu;
@@ -59,6 +61,7 @@ int hyperv_flush_guest_mapping(u64 as)
ret = 0;
fault:
+ trace_hyperv_nested_flush_guest_mapping(as, ret);
return ret;
}
EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index 4253bca99989..e1ffe61de8d6 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -28,6 +28,20 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others,
__entry->addr, __entry->end)
);
+TRACE_EVENT(hyperv_nested_flush_guest_mapping,
+ TP_PROTO(u64 as, int ret),
+ TP_ARGS(as, ret),
+
+ TP_STRUCT__entry(
+ __field(u64, as)
+ __field(int, ret)
+ ),
+ TP_fast_assign(__entry->as = as;
+ __entry->ret = ret;
+ ),
+ TP_printk("address space %llx ret %d", __entry->as, __entry->ret)
+ );
+
#endif /* CONFIG_HYPERV */
#undef TRACE_INCLUDE_PATH
--
2.14.3
Register tlb_remote_flush callback for vmx when hyperv capability of
nested guest mapping flush is detected. The interface can help to
reduce overhead when flush ept table among vcpus for nested VM. The
tradition way is to send IPIs to all affected vcpus and executes
INVEPT on each vcpus. It will trigger several vmexits for IPI
and INVEPT emulation. Hyper-V provides such hypercall to do
flush for all vcpus.
Signed-off-by: Lan Tianyu <[email protected]>
---
Change since v1:
Use ept_pointers_match to check condition of identical ept
table pointer and get ept pointer from struct vcpu_vmx->ept_pointer.
---
arch/x86/kvm/vmx.c | 27 ++++++++++++++++++++++++++-
1 file changed, 26 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8142b2da430a..55fe14d1d4d4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4778,6 +4778,25 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
}
}
+static int hv_remote_flush_tlb(struct kvm *kvm)
+{
+ int ret;
+
+ spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+
+ if (!to_kvm_vmx(kvm)->ept_pointers_match) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = hyperv_flush_guest_mapping(
+ to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
+
+out:
+ spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ return ret;
+}
+
static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
@@ -4968,7 +4987,7 @@ static void check_ept_pointer(struct kvm_vcpu *vcpu, u64 eptp)
u64 tmp_eptp = INVALID_PAGE;
int i;
- if (!kvm_x86_ops->tlb_remote_flush)
+ if (!kvm_x86_ops->hv_tlb_remote_flush)
return;
spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
@@ -7570,6 +7589,12 @@ static __init int hardware_setup(void)
if (enable_ept && !cpu_has_vmx_ept_2m_page())
kvm_disable_largepages();
+#if IS_ENABLED(CONFIG_HYPERV)
+ if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
+ && enable_ept)
+ kvm_x86_ops->hv_tlb_remote_flush = hv_remote_flush_tlb;
+#endif
+
if (!cpu_has_vmx_ple()) {
ple_gap = 0;
ple_window = 0;
--
2.14.3
This patch is to check ept table pointer of each cpus when set ept
tables and store identical ept table pointer if all ept table pointers
of single VM are same. This is for support of para-virt ept flush
hypercall.
Signed-off-by: Lan Tianyu <[email protected]>
---
Change since v1:
Replace identical_ept_pointer with ept_pointers_match and
check kvm_x86_ops->tlb_remote_flush in check_ept_pointer().
---
arch/x86/kvm/vmx.c | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1689f433f3a0..8142b2da430a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -194,6 +194,9 @@ struct kvm_vmx {
unsigned int tss_addr;
bool ept_identity_pagetable_done;
gpa_t ept_identity_map_addr;
+
+ bool ept_pointers_match;
+ spinlock_t ept_pointer_lock;
};
#define NR_AUTOLOAD_MSRS 8
@@ -853,6 +856,7 @@ struct vcpu_vmx {
*/
u64 msr_ia32_feature_control;
u64 msr_ia32_feature_control_valid_bits;
+ u64 ept_pointer;
};
enum segment_cache_field {
@@ -4958,6 +4962,32 @@ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
return eptp;
}
+static void check_ept_pointer(struct kvm_vcpu *vcpu, u64 eptp)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 tmp_eptp = INVALID_PAGE;
+ int i;
+
+ if (!kvm_x86_ops->tlb_remote_flush)
+ return;
+
+ spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ to_vmx(vcpu)->ept_pointer = eptp;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!VALID_PAGE(tmp_eptp)) {
+ tmp_eptp = to_vmx(vcpu)->ept_pointer;
+ } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
+ to_kvm_vmx(kvm)->ept_pointers_match = false;
+ spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ return;
+ }
+ }
+
+ to_kvm_vmx(kvm)->ept_pointers_match = true;
+ spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+}
+
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
unsigned long guest_cr3;
@@ -4967,6 +4997,8 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (enable_ept) {
eptp = construct_eptp(vcpu, cr3);
vmcs_write64(EPT_POINTER, eptp);
+ check_ept_pointer(vcpu, eptp);
+
if (enable_unrestricted_guest || is_paging(vcpu) ||
is_guest_mode(vcpu))
guest_cr3 = kvm_read_cr3(vcpu);
@@ -10383,6 +10415,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
static int vmx_vm_init(struct kvm *kvm)
{
+ spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
+
if (!ple_gap)
kvm->arch.pause_in_guest = true;
return 0;
--
2.14.3
This patch is to provide a way for platforms to register hv tlb remote
flush callback and this helps to optimize operation of tlb flush
among vcpus for nested virtualization case.
Signed-off-by: Lan Tianyu <[email protected]>
---
Change since v1:
Add kvm_arch_hv_flush_remote_tlb() to avoid compilation issue
for non-x86 platform.
---
arch/x86/include/asm/kvm_host.h | 11 +++++++++++
include/linux/kvm_host.h | 7 +++++++
virt/kvm/kvm_main.c | 11 ++++++++++-
3 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c13cd28d9d1b..d89e4204816c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -973,6 +973,7 @@ struct kvm_x86_ops {
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+ int (*hv_tlb_remote_flush)(struct kvm *kvm);
void (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu);
@@ -1117,6 +1118,16 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
return kvm_x86_ops->vm_free(kvm);
}
+#define __KVM_HAVE_HV_FLUSH_REMOTE_TLB
+static inline int kvm_arch_hv_flush_remote_tlb(struct kvm *kvm)
+{
+ if (kvm_x86_ops->hv_tlb_remote_flush &&
+ !kvm_x86_ops->hv_tlb_remote_flush(kvm))
+ return 0;
+ else
+ return -EFAULT;
+}
+
int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4ee7bc548a83..0c2c36cb041b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -827,6 +827,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
}
#endif
+#ifndef __KVM_HAVE_HV_FLUSH_REMOTE_TLB
+static inline int kvm_arch_hv_flush_remote_tlb(struct kvm *kvm)
+{
+ return -EFAULT;
+}
+#endif
+
#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8b47507faab5..c2b5e3273848 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -256,11 +256,20 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
+ long dirty_count;
+
+ /*
+ * Call kvm_arch_hv_tlb_remote first and go back old way when
+ * return failure.
+ */
+ if (!kvm_arch_hv_flush_remote_tlb(kvm))
+ return;
+
/*
* Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
* kvm_make_all_cpus_request.
*/
- long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
+ dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
/*
* We want to publish modifications to the page tables before reading
--
2.14.3
From: Tianyu Lan <[email protected]> Monday, July 9, 2018 2:03 AM
> Hyper-V supports a pv hypercall HvFlushGuestPhysicalAddressSpace to
> flush nested VM address space mapping in l1 hypervisor and it's to
> reduce overhead of flushing ept tlb among vcpus. This patch is to
> implement it.
>
> Signed-off-by: Lan Tianyu <[email protected]>
> ---
> arch/x86/hyperv/Makefile | 2 +-
> arch/x86/hyperv/nested.c | 64 ++++++++++++++++++++++++++++++++++++++
> arch/x86/include/asm/hyperv-tlfs.h | 8 +++++
> arch/x86/include/asm/mshyperv.h | 2 ++
> 4 files changed, 75 insertions(+), 1 deletion(-)
> create mode 100644 arch/x86/hyperv/nested.c
> +#include <linux/types.h>
> +#include <asm/hyperv-tlfs.h>
> +#include <asm/mshyperv.h>
> +#include <asm/tlbflush.h>
> +
> +int hyperv_flush_guest_mapping(u64 as)
> +{
> + struct hv_guest_mapping_flush **flush_pcpu;
> + struct hv_guest_mapping_flush *flush;
> + u64 status;
> + unsigned long flags;
> + int ret = -EFAULT;
> +
> + if (!hv_hypercall_pg)
> + goto fault;
> +
> + local_irq_save(flags);
> +
> + flush_pcpu = (struct hv_guest_mapping_flush **)
> + this_cpu_ptr(hyperv_pcpu_input_arg);
> +
> + flush = *flush_pcpu;
> +
> + if (unlikely(!flush)) {
> + local_irq_restore(flags);
> + goto fault;
> + }
> +
> + flush->address_space = as;
> + flush->flags = 0;
> +
> + status = hv_do_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE,
> + flush, NULL);
Did you consider using a "fast" hypercall? Unless there's some reason I'm
not aware of, a "fast" hypercall would be perfect here as there are 16 bytes
of input and no output. Vitaly recently added hv_do_fast_hypercall16()
in the linux-next tree. See __send_ipi_mask() in hv_apic.c in linux-next
for an example of usage. With a fast hypercall, you don't need the code for
getting the per-cpu input arg or the code for local irq save/restore, so the
code that is left is a lot faster and simpler.
Michael
> + local_irq_restore(flags);
> +
> + if (!(status & HV_HYPERCALL_RESULT_MASK))
> + ret = 0;
> +
> +fault:
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
Hi Michael:
Thanks for your review.
On 7/11/2018 5:29 AM, Michael Kelley (EOSG) wrote:
> From: Tianyu Lan <[email protected]> Monday, July 9, 2018 2:03 AM
>> Hyper-V supports a pv hypercall HvFlushGuestPhysicalAddressSpace to
>> flush nested VM address space mapping in l1 hypervisor and it's to
>> reduce overhead of flushing ept tlb among vcpus. This patch is to
>> implement it.
>>
>> Signed-off-by: Lan Tianyu <[email protected]>
>> ---
>> arch/x86/hyperv/Makefile | 2 +-
>> arch/x86/hyperv/nested.c | 64 ++++++++++++++++++++++++++++++++++++++
>> arch/x86/include/asm/hyperv-tlfs.h | 8 +++++
>> arch/x86/include/asm/mshyperv.h | 2 ++
>> 4 files changed, 75 insertions(+), 1 deletion(-)
>> create mode 100644 arch/x86/hyperv/nested.c
>> +#include <linux/types.h>
>> +#include <asm/hyperv-tlfs.h>
>> +#include <asm/mshyperv.h>
>> +#include <asm/tlbflush.h>
>> +
>> +int hyperv_flush_guest_mapping(u64 as)
>> +{
>> + struct hv_guest_mapping_flush **flush_pcpu;
>> + struct hv_guest_mapping_flush *flush;
>> + u64 status;
>> + unsigned long flags;
>> + int ret = -EFAULT;
>> +
>> + if (!hv_hypercall_pg)
>> + goto fault;
>> +
>> + local_irq_save(flags);
>> +
>> + flush_pcpu = (struct hv_guest_mapping_flush **)
>> + this_cpu_ptr(hyperv_pcpu_input_arg);
>> +
>> + flush = *flush_pcpu;
>> +
>> + if (unlikely(!flush)) {
>> + local_irq_restore(flags);
>> + goto fault;
>> + }
>> +
>> + flush->address_space = as;
>> + flush->flags = 0;
>> +
>> + status = hv_do_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE,
>> + flush, NULL);
>
> Did you consider using a "fast" hypercall? Unless there's some reason I'm
> not aware of, a "fast" hypercall would be perfect here as there are 16 bytes
> of input and no output. Vitaly recently added hv_do_fast_hypercall16()
> in the linux-next tree. See __send_ipi_mask() in hv_apic.c in linux-next
> for an example of usage. With a fast hypercall, you don't need the code for
> getting the per-cpu input arg or the code for local irq save/restore, so the
> code that is left is a lot faster and simpler.
>
> Michael
>
Good suggestion. But the "fast" hypercall still is not available in
kvm-next branch and it's in the x86 tip repo. We may rework this with
"fast" hypercall in the next kernel development cycle if this patchset
is accepted in for 4.19.
>> + local_irq_restore(flags);
>> +
>> + if (!(status & HV_HYPERCALL_RESULT_MASK))
>> + ret = 0;
>> +
>> +fault:
>> + return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
On 09/07/2018 11:02, Tianyu Lan wrote:
> +
> + /*
> + * Call kvm_arch_hv_tlb_remote first and go back old way when
> + * return failure.
> + */
Please call it "kvm_arch_flush_remote_tlbs", since Hyper-V should not be
mentioned in the generic code.
Paolo
On 09/07/2018 11:02, Tianyu Lan wrote:
> +static void check_ept_pointer(struct kvm_vcpu *vcpu, u64 eptp)
> +{
> + struct kvm *kvm = vcpu->kvm;
> + u64 tmp_eptp = INVALID_PAGE;
> + int i;
> +
> + if (!kvm_x86_ops->tlb_remote_flush)
> + return;
> +
> + spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
> + to_vmx(vcpu)->ept_pointer = eptp;
> +
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + if (!VALID_PAGE(tmp_eptp)) {
> + tmp_eptp = to_vmx(vcpu)->ept_pointer;
> + } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
> + to_kvm_vmx(kvm)->ept_pointers_match = false;
> + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
> + return;
> + }
> + }
> +
> + to_kvm_vmx(kvm)->ept_pointers_match = true;
> + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
> +}
> +
Is there any reason to do the check here, rather than the first time the
TLB flush is invoked? You could:
- have a tristate (true, false, check) value for ept_pointers_match
- reset it to "check" in vmx_set_cr3
- set it to either true or false in tlb_remote_flush if it is check, and
do the hypercall if it is true.
Paolo
On 09/07/2018 11:02, Tianyu Lan wrote:
> + /*
> + * Call kvm_arch_hv_tlb_remote first and go back old way when
> + * return failure.
> + */
> + if (!kvm_arch_hv_flush_remote_tlb(kvm))
> + return;
> +
> /*
> * Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
> * kvm_make_all_cpus_request.
> */
> - long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
> + dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
Also, the call to kvm_arch_flush_remote_tlbs should not replace the
whole function. It should be something like:
long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
/*
* We want to publish modifications to the page tables before reading
* mode. Pairs with a memory barrier in arch-specific code.
* - x86: smp_mb__after_srcu_read_unlock in vcpu_enter_guest
* and smp_mb in walk_shadow_page_lockless_begin/end.
* - powerpc: smp_mb in kvmppc_prepare_to_enter.
*
* There is already an smp_mb__after_atomic() before
* kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
* barrier here.
*/
if (!kvm_arch_hv_flush_remote_tlb(kvm) ||
kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.remote_tlb_flush;
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
Otherwise, kvm_mmu_notifier_invalidate_range_start will incorrectly skip
a TLB flush.
Thanks,
Paolo
Hi Paolo:
Thanks for review.
On 7/18/2018 8:01 PM, Paolo Bonzini wrote:
> On 09/07/2018 11:02, Tianyu Lan wrote:
>> + /*
>> + * Call kvm_arch_hv_tlb_remote first and go back old way when
>> + * return failure.
>> + */
>> + if (!kvm_arch_hv_flush_remote_tlb(kvm))
>> + return;
>> +
>> /*
>> * Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
>> * kvm_make_all_cpus_request.
>> */
>> - long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
>> + dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
>
> Also, the call to kvm_arch_flush_remote_tlbs should not replace the
> whole function. It should be something like:
>
> long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
>
> /*
> * We want to publish modifications to the page tables before reading
> * mode. Pairs with a memory barrier in arch-specific code.
> * - x86: smp_mb__after_srcu_read_unlock in vcpu_enter_guest
> * and smp_mb in walk_shadow_page_lockless_begin/end.
> * - powerpc: smp_mb in kvmppc_prepare_to_enter.
> *
> * There is already an smp_mb__after_atomic() before
> * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
> * barrier here.
> */
> if (!kvm_arch_hv_flush_remote_tlb(kvm) ||
> kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
> ++kvm->stat.remote_tlb_flush;
> cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
>
> Otherwise, kvm_mmu_notifier_invalidate_range_start will incorrectly skip
> a TLB flush.
Nice catch. Will update in the next version.
>
> Thanks,
>
> Paolo
>
On 7/18/2018 7:59 PM, Paolo Bonzini wrote:
> On 09/07/2018 11:02, Tianyu Lan wrote:
>> +static void check_ept_pointer(struct kvm_vcpu *vcpu, u64 eptp)
>> +{
>> + struct kvm *kvm = vcpu->kvm;
>> + u64 tmp_eptp = INVALID_PAGE;
>> + int i;
>> +
>> + if (!kvm_x86_ops->tlb_remote_flush)
>> + return;
>> +
>> + spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
>> + to_vmx(vcpu)->ept_pointer = eptp;
>> +
>> + kvm_for_each_vcpu(i, vcpu, kvm) {
>> + if (!VALID_PAGE(tmp_eptp)) {
>> + tmp_eptp = to_vmx(vcpu)->ept_pointer;
>> + } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
>> + to_kvm_vmx(kvm)->ept_pointers_match = false;
>> + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
>> + return;
>> + }
>> + }
>> +
>> + to_kvm_vmx(kvm)->ept_pointers_match = true;
>> + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
>> +}
>> +
>
> Is there any reason to do the check here, rather than the first time the
> TLB flush is invoked? You could:
>
> - have a tristate (true, false, check) value for ept_pointers_match
>
> - reset it to "check" in vmx_set_cr3
>
> - set it to either true or false in tlb_remote_flush if it is check, and
> do the hypercall if it is true.
>
Thanks for your suggestion. Will update.
> Paolo
>