2018-07-02 14:20:53

by Tianyu Lan

[permalink] [raw]
Subject: [PATCH 0/4] KVM/x86/hyper-V: Introduce PV guest address space mapping flush support

Hyper-V provides a para-virtualization hypercall HvFlushGuestPhysicalAddressSpace
to flush nested VM address space mapping in l1 hypervisor and it's to reduce overhead
of flushing ept tlb among vcpus. The tradition way is to send IPIs to all affected
vcpus and executes INVEPT on each vcpus. It will trigger several vmexits for IPI and
INVEPT emulation. The pv hypercall can help to flush specified ept table on all vcpus
via one single hypercall.

Lan Tianyu (4):
X86/Hyper-V: Add flush HvFlushGuestPhysicalAddressSpace hypercall
support
KVM: Add tlb remote flush callback in kvm_x86_ops.
KVM/VMX: Add identical ept table pointer check
KVM/x86: Add tlb_remote_flush callback support for vmcs

arch/x86/hyperv/Makefile | 2 +-
arch/x86/hyperv/nested.c | 64 ++++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/hyperv-tlfs.h | 8 +++++
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/include/asm/mshyperv.h | 2 ++
arch/x86/kvm/vmx.c | 56 +++++++++++++++++++++++++++++++++
virt/kvm/kvm_main.c | 12 ++++++-
7 files changed, 143 insertions(+), 2 deletions(-)
create mode 100644 arch/x86/hyperv/nested.c
--
2.14.3


2018-07-02 14:19:01

by Tianyu Lan

[permalink] [raw]
Subject: [PATCH 1/4] X86/Hyper-V: Add flush HvFlushGuestPhysicalAddressSpace hypercall support

Hyper-V supports a pv hypercall HvFlushGuestPhysicalAddressSpace to
flush nested VM address space mapping in l1 hypervisor and it's to
reduce overhead of flushing ept tlb among vcpus. This patch is to
implement it.

Signed-off-by: Lan Tianyu <[email protected]>
---
arch/x86/hyperv/Makefile | 2 +-
arch/x86/hyperv/nested.c | 64 ++++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/hyperv-tlfs.h | 8 +++++
arch/x86/include/asm/mshyperv.h | 2 ++
4 files changed, 75 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/hyperv/nested.c

diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index b173d404e3df..b21ee65c4101 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1,2 +1,2 @@
-obj-y := hv_init.o mmu.o
+obj-y := hv_init.o mmu.o nested.o
obj-$(CONFIG_X86_64) += hv_apic.o
diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c
new file mode 100644
index 000000000000..74dd38b5221d
--- /dev/null
+++ b/arch/x86/hyperv/nested.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Hyper-V nested virtualization code.
+ *
+ * Copyright (C) 2018, Microsoft, Inc.
+ *
+ * Author : Lan Tianyu <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ */
+
+
+#include <linux/types.h>
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
+#include <asm/tlbflush.h>
+
+int hyperv_flush_guest_mapping(u64 as)
+{
+ struct hv_guest_mapping_flush **flush_pcpu;
+ struct hv_guest_mapping_flush *flush;
+ u64 status;
+ unsigned long flags;
+ int ret = -EFAULT;
+
+ if (!hv_hypercall_pg)
+ goto fault;
+
+ local_irq_save(flags);
+
+ flush_pcpu = (struct hv_guest_mapping_flush **)
+ this_cpu_ptr(hyperv_pcpu_input_arg);
+
+ flush = *flush_pcpu;
+
+ if (unlikely(!flush)) {
+ local_irq_restore(flags);
+ goto fault;
+ }
+
+ flush->address_space = as;
+ flush->flags = 0;
+
+ status = hv_do_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE,
+ flush, NULL);
+ local_irq_restore(flags);
+
+ if (!(status & HV_HYPERCALL_RESULT_MASK))
+ ret = 0;
+
+fault:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index b8c89265baf0..08e24f552030 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -309,6 +309,7 @@ struct ms_hyperv_tsc_page {
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106

/* Nested features (CPUID 0x4000000A) EAX */
+#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
#define HV_X64_NESTED_MSR_BITMAP BIT(19)

struct hv_reenlightenment_control {
@@ -350,6 +351,7 @@ struct hv_tsc_emulation_status {
#define HVCALL_SEND_IPI_EX 0x0015
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af

#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
@@ -741,6 +743,12 @@ struct ipi_arg_ex {
struct hv_vpset vp_set;
};

+/* HvFlushGuestPhysicalAddressSpace hypercalls */
+struct hv_guest_mapping_flush {
+ u64 address_space;
+ u64 flags;
+};
+
/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
struct hv_tlb_flush {
u64 address_space;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 3cd14311edfa..a6a615b49876 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -302,6 +302,7 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
void clear_hv_tscchange_cb(void);
void hyperv_stop_tsc_emulation(void);
+int hyperv_flush_guest_mapping(u64 as);

#ifdef CONFIG_X86_64
void hv_apic_init(void);
@@ -321,6 +322,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
{
return NULL;
}
+static inline int hyperv_flush_guest_mapping(u64 as) { return -1; }
#endif /* CONFIG_HYPERV */

#ifdef CONFIG_HYPERV_TSCPAGE
--
2.14.3

2018-07-02 14:19:24

by Tianyu Lan

[permalink] [raw]
Subject: [PATCH 4/4] KVM/x86: Add tlb_remote_flush callback support for vmcs

Register tlb_remote_flush callback for vmcs when hyperv capability of
nested guest mapping flush is detected. The interface can help to
reduce overhead when flush ept table among vcpus for nested VM. The
tradition way is to send IPIs to all affected vcpus and executes
INVEPT on each vcpus. It will trigger several vmexits for IPI
and INVEPT emulation. Hyper-V provides such hypercall to do
flush for all vcpus.

Signed-off-by: Lan Tianyu <[email protected]>
---
arch/x86/kvm/vmx.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0b1e4e9fef2b..e42ed4833983 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4778,6 +4778,25 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
}
}

+static int hv_remote_flush_tlb(struct kvm *kvm)
+{
+ int ret;
+
+ spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+
+ if (!VALID_PAGE(to_kvm_vmx(kvm)->identical_ept_pointer)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = hyperv_flush_guest_mapping(
+ to_kvm_vmx(kvm)->identical_ept_pointer);
+
+out:
+ spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ return ret;
+}
+
static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
@@ -7567,6 +7586,12 @@ static __init int hardware_setup(void)
if (enable_ept && !cpu_has_vmx_ept_2m_page())
kvm_disable_largepages();

+#if IS_ENABLED(CONFIG_HYPERV)
+ if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
+ && enable_ept)
+ kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb;
+#endif
+
if (!cpu_has_vmx_ple()) {
ple_gap = 0;
ple_window = 0;
--
2.14.3

2018-07-02 14:19:47

by Tianyu Lan

[permalink] [raw]
Subject: [PATCH 3/4] KVM/VMX: Add identical ept table pointer check

This patch is to check ept table pointer of each cpus when set ept
tables and store identical ept table pointer if all ept table pointers
of single VM are same. This is for support of para-virt ept flush
hypercall.

Signed-off-by: Lan Tianyu <[email protected]>
---
arch/x86/kvm/vmx.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1689f433f3a0..0b1e4e9fef2b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -194,6 +194,9 @@ struct kvm_vmx {
unsigned int tss_addr;
bool ept_identity_pagetable_done;
gpa_t ept_identity_map_addr;
+
+ u64 identical_ept_pointer;
+ spinlock_t ept_pointer_lock;
};

#define NR_AUTOLOAD_MSRS 8
@@ -853,6 +856,7 @@ struct vcpu_vmx {
*/
u64 msr_ia32_feature_control;
u64 msr_ia32_feature_control_valid_bits;
+ u64 ept_pointer;
};

enum segment_cache_field {
@@ -4958,6 +4962,29 @@ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
return eptp;
}

+static void check_ept_pointer(struct kvm_vcpu *vcpu, u64 eptp)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 tmp_eptp = INVALID_PAGE;
+ int i;
+
+ spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ to_vmx(vcpu)->ept_pointer = eptp;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!VALID_PAGE(tmp_eptp)) {
+ tmp_eptp = to_vmx(vcpu)->ept_pointer;
+ } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
+ to_kvm_vmx(kvm)->identical_ept_pointer = INVALID_PAGE;
+ spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ return;
+ }
+ }
+
+ to_kvm_vmx(kvm)->identical_ept_pointer = tmp_eptp;
+ spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+}
+
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
unsigned long guest_cr3;
@@ -4967,6 +4994,8 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (enable_ept) {
eptp = construct_eptp(vcpu, cr3);
vmcs_write64(EPT_POINTER, eptp);
+ check_ept_pointer(vcpu, eptp);
+
if (enable_unrestricted_guest || is_paging(vcpu) ||
is_guest_mode(vcpu))
guest_cr3 = kvm_read_cr3(vcpu);
@@ -10383,6 +10412,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)

static int vmx_vm_init(struct kvm *kvm)
{
+ spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
+
if (!ple_gap)
kvm->arch.pause_in_guest = true;
return 0;
--
2.14.3

2018-07-02 14:19:47

by Tianyu Lan

[permalink] [raw]
Subject: [PATCH 2/4] KVM: Add tlb remote flush callback in kvm_x86_ops.

This patch is to provide a way for platforms to register tlb remote
flush callback and this helps to optimize operation of tlb flush
among vcpus for nested virtualization case.

Signed-off-by: Lan Tianyu <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 12 +++++++++++-
2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c13cd28d9d1b..94d7437d84cb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -973,6 +973,7 @@ struct kvm_x86_ops {
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);

void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+ int (*tlb_remote_flush)(struct kvm *kvm);

void (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8b47507faab5..580bed127dd0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -256,11 +256,21 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
+ long dirty_count;
+
+ /*
+ * Call tlb_remote_flush first and go back old way when
+ * return failure.
+ */
+ if (kvm_x86_ops->tlb_remote_flush &&
+ !kvm_x86_ops->tlb_remote_flush(kvm))
+ return;
+
/*
* Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
* kvm_make_all_cpus_request.
*/
- long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
+ dirty_count = smp_load_acquire(&kvm->tlbs_dirty);

/*
* We want to publish modifications to the page tables before reading
--
2.14.3

2018-07-02 15:12:30

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM/VMX: Add identical ept table pointer check

Tianyu Lan <[email protected]> writes:

> This patch is to check ept table pointer of each cpus when set ept
> tables and store identical ept table pointer if all ept table pointers
> of single VM are same. This is for support of para-virt ept flush
> hypercall.
>
> Signed-off-by: Lan Tianyu <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 31 +++++++++++++++++++++++++++++++
> 1 file changed, 31 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 1689f433f3a0..0b1e4e9fef2b 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -194,6 +194,9 @@ struct kvm_vmx {
> unsigned int tss_addr;
> bool ept_identity_pagetable_done;
> gpa_t ept_identity_map_addr;
> +
> + u64 identical_ept_pointer;
> + spinlock_t ept_pointer_lock;
> };
>
> #define NR_AUTOLOAD_MSRS 8
> @@ -853,6 +856,7 @@ struct vcpu_vmx {
> */
> u64 msr_ia32_feature_control;
> u64 msr_ia32_feature_control_valid_bits;
> + u64 ept_pointer;
> };
>
> enum segment_cache_field {
> @@ -4958,6 +4962,29 @@ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
> return eptp;
> }
>
> +static void check_ept_pointer(struct kvm_vcpu *vcpu, u64 eptp)
> +{
> + struct kvm *kvm = vcpu->kvm;
> + u64 tmp_eptp = INVALID_PAGE;
> + int i;
> +
> + spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
> + to_vmx(vcpu)->ept_pointer = eptp;
> +
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + if (!VALID_PAGE(tmp_eptp)) {
> + tmp_eptp = to_vmx(vcpu)->ept_pointer;
> + } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
> + to_kvm_vmx(kvm)->identical_ept_pointer = INVALID_PAGE;
> + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
> + return;
> + }
> + }
> +
> + to_kvm_vmx(kvm)->identical_ept_pointer = tmp_eptp;
> + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);

It seems we can get away with identical_ept_pointer being just 'bool':
go through the vCPU list and compare ept_pointer with ept_pointer for
the current vcpu. It would also make sense to rename it to something
like 'ept_pointers_match'.

I'm also not sure we need a dedicated ept_pointer_lock, can't we just
use the already existent mmu_lock from struct kvm?

> +}
> +
> static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> {
> unsigned long guest_cr3;
> @@ -4967,6 +4994,8 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> if (enable_ept) {
> eptp = construct_eptp(vcpu, cr3);
> vmcs_write64(EPT_POINTER, eptp);
> + check_ept_pointer(vcpu, eptp);

Do we always get here when we need? E.g, do we need to enforce
CPU_BASED_CR3_STORE_EXITING?

> +
> if (enable_unrestricted_guest || is_paging(vcpu) ||
> is_guest_mode(vcpu))
> guest_cr3 = kvm_read_cr3(vcpu);
> @@ -10383,6 +10412,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>
> static int vmx_vm_init(struct kvm *kvm)
> {
> + spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
> +
> if (!ple_gap)
> kvm->arch.pause_in_guest = true;
> return 0;

--
Vitaly

2018-07-03 02:42:20

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 2/4] KVM: Add tlb remote flush callback in kvm_x86_ops.

Hi Tianyu,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v4.18-rc3 next-20180702]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/Tianyu-Lan/KVM-x86-hyper-V-Introduce-PV-guest-address-space-mapping-flush-support/20180703-012046
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: powerpc-defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.2.0 make.cross ARCH=powerpc

All errors (new ones prefixed by >>):

arch/powerpc/kvm/../../../virt/kvm/kvm_main.c: In function 'kvm_flush_remote_tlbs':
>> arch/powerpc/kvm/../../../virt/kvm/kvm_main.c:265:6: error: 'kvm_x86_ops' undeclared (first use in this function); did you mean 'kvm_xive_ops'?
if (kvm_x86_ops->tlb_remote_flush &&
^~~~~~~~~~~
kvm_xive_ops
arch/powerpc/kvm/../../../virt/kvm/kvm_main.c:265:6: note: each undeclared identifier is reported only once for each function it appears in

vim +265 arch/powerpc/kvm/../../../virt/kvm/kvm_main.c

255
256 #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
257 void kvm_flush_remote_tlbs(struct kvm *kvm)
258 {
259 long dirty_count;
260
261 /*
262 * Call tlb_remote_flush first and go back old way when
263 * return failure.
264 */
> 265 if (kvm_x86_ops->tlb_remote_flush &&
266 !kvm_x86_ops->tlb_remote_flush(kvm))
267 return;
268
269 /*
270 * Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
271 * kvm_make_all_cpus_request.
272 */
273 dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
274
275 /*
276 * We want to publish modifications to the page tables before reading
277 * mode. Pairs with a memory barrier in arch-specific code.
278 * - x86: smp_mb__after_srcu_read_unlock in vcpu_enter_guest
279 * and smp_mb in walk_shadow_page_lockless_begin/end.
280 * - powerpc: smp_mb in kvmppc_prepare_to_enter.
281 *
282 * There is already an smp_mb__after_atomic() before
283 * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
284 * barrier here.
285 */
286 if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
287 ++kvm->stat.remote_tlb_flush;
288 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
289 }
290 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
291 #endif
292

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (2.89 kB)
.config.gz (23.25 kB)
Download all attachments

2018-07-04 06:19:39

by Tianyu Lan

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM/VMX: Add identical ept table pointer check

Hi Vitaly:
Thanks for your review.

On 7/2/2018 11:09 PM, Vitaly Kuznetsov wrote:
> Tianyu Lan <[email protected]> writes:
>
>> This patch is to check ept table pointer of each cpus when set ept
>> tables and store identical ept table pointer if all ept table pointers
>> of single VM are same. This is for support of para-virt ept flush
>> hypercall.
>>
>> Signed-off-by: Lan Tianyu <[email protected]>
>> ---
>> arch/x86/kvm/vmx.c | 31 +++++++++++++++++++++++++++++++
>> 1 file changed, 31 insertions(+)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 1689f433f3a0..0b1e4e9fef2b 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -194,6 +194,9 @@ struct kvm_vmx {
>> unsigned int tss_addr;
>> bool ept_identity_pagetable_done;
>> gpa_t ept_identity_map_addr;
>> +
>> + u64 identical_ept_pointer;
>> + spinlock_t ept_pointer_lock;
>> };
>>
>> #define NR_AUTOLOAD_MSRS 8
>> @@ -853,6 +856,7 @@ struct vcpu_vmx {
>> */
>> u64 msr_ia32_feature_control;
>> u64 msr_ia32_feature_control_valid_bits;
>> + u64 ept_pointer;
>> };
>>
>> enum segment_cache_field {
>> @@ -4958,6 +4962,29 @@ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
>> return eptp;
>> }
>>
>> +static void check_ept_pointer(struct kvm_vcpu *vcpu, u64 eptp)
>> +{
>> + struct kvm *kvm = vcpu->kvm;
>> + u64 tmp_eptp = INVALID_PAGE;
>> + int i;
>> +
>> + spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
>> + to_vmx(vcpu)->ept_pointer = eptp;
>> +
>> + kvm_for_each_vcpu(i, vcpu, kvm) {
>> + if (!VALID_PAGE(tmp_eptp)) {
>> + tmp_eptp = to_vmx(vcpu)->ept_pointer;
>> + } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
>> + to_kvm_vmx(kvm)->identical_ept_pointer = INVALID_PAGE;
>> + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
>> + return;
>> + }
>> + }
>> +
>> + to_kvm_vmx(kvm)->identical_ept_pointer = tmp_eptp;
>> + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
>
> It seems we can get away with identical_ept_pointer being just 'bool':
> go through the vCPU list and compare ept_pointer with ept_pointer for
> the current vcpu. It would also make sense to rename it to something
> like 'ept_pointers_match'.

Yes, that's another approach. But kvm_flush_remote_tlbs() only passes
struct kvm and we still need to randomly select a vcpu(maybe always use
vcpu0) to get ept pointer when we call flush hypercall.

>
> I'm also not sure we need a dedicated ept_pointer_lock, can't we just
> use the already existent mmu_lock from struct kvm?

The lock is to make sure the identical ept pointer won't be changed
during calling flush hypercall. kvm_flush_remote_tlbs() is already
called under mmu_lock protection(e.g,
kvm_mmu_notifier_invalidate_range_start()) and so we can't reuse the
lock in hv_remote_flush_tlb() otherwise it will cause deadlock.

>> +}
>> +
>> static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
>> {
>> unsigned long guest_cr3;
>> @@ -4967,6 +4994,8 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
>> if (enable_ept) {
>> eptp = construct_eptp(vcpu, cr3);
>> vmcs_write64(EPT_POINTER, eptp);
>> + check_ept_pointer(vcpu, eptp);
>
> Do we always get here when we need? E.g, do we need to enforce
> CPU_BASED_CR3_STORE_EXITING?
>

vmx_set_cr3() is only one place to set ept table pointer and so it is
always called when ept table pointer is changed.

When ept is enabled, CPU_BASED_CR3_STORE_EXITING is not necessary.
Because we don't need to shadow CR3 page table.

>> +
>> if (enable_unrestricted_guest || is_paging(vcpu) ||
>> is_guest_mode(vcpu))
>> guest_cr3 = kvm_read_cr3(vcpu);
>> @@ -10383,6 +10412,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>
>> static int vmx_vm_init(struct kvm *kvm)
>> {
>> + spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
>> +
>> if (!ple_gap)
>> kvm->arch.pause_in_guest = true;
>> return 0;
>

2018-07-04 06:27:21

by Tianyu Lan

[permalink] [raw]
Subject: Re: [PATCH 3/4] KVM/VMX: Add identical ept table pointer check

Hi Sean:
Thank for your review.


On 7/3/2018 1:29 AM, Sean Christopherson wrote:
> On Mon, Jul 02, 2018 at 02:17:29PM +0000, Tianyu Lan wrote:
>> This patch is to check ept table pointer of each cpus when set ept
>> tables and store identical ept table pointer if all ept table pointers
>> of single VM are same. This is for support of para-virt ept flush
>> hypercall.
>>
>> Signed-off-by: Lan Tianyu <[email protected]>
>> ---
>> arch/x86/kvm/vmx.c | 31 +++++++++++++++++++++++++++++++
>> 1 file changed, 31 insertions(+)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 1689f433f3a0..0b1e4e9fef2b 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -194,6 +194,9 @@ struct kvm_vmx {
>> unsigned int tss_addr;
>> bool ept_identity_pagetable_done;
>> gpa_t ept_identity_map_addr;
>> +
>> + u64 identical_ept_pointer;
>> + spinlock_t ept_pointer_lock;
>> };
>>
>> #define NR_AUTOLOAD_MSRS 8
>> @@ -853,6 +856,7 @@ struct vcpu_vmx {
>> */
>> u64 msr_ia32_feature_control;
>> u64 msr_ia32_feature_control_valid_bits;
>> + u64 ept_pointer;
>> };
>>
>> enum segment_cache_field {
>> @@ -4958,6 +4962,29 @@ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
>> return eptp;
>> }
>>
>> +static void check_ept_pointer(struct kvm_vcpu *vcpu, u64 eptp)
>> +{
>> + struct kvm *kvm = vcpu->kvm;
>> + u64 tmp_eptp = INVALID_PAGE;
>> + int i;
>> +
>> + spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
>> + to_vmx(vcpu)->ept_pointer = eptp;
>> +
>> + kvm_for_each_vcpu(i, vcpu, kvm) {
>> + if (!VALID_PAGE(tmp_eptp)) {
>> + tmp_eptp = to_vmx(vcpu)->ept_pointer;
>> + } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
>> + to_kvm_vmx(kvm)->identical_ept_pointer = INVALID_PAGE;
>> + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
>> + return;
>> + }
>> + }
>> +
>> + to_kvm_vmx(kvm)->identical_ept_pointer = tmp_eptp;
>> + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
>> +}
>> +
>> static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
>> {
>> unsigned long guest_cr3;
>> @@ -4967,6 +4994,8 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
>> if (enable_ept) {
>> eptp = construct_eptp(vcpu, cr3);
>> vmcs_write64(EPT_POINTER, eptp);
>> + check_ept_pointer(vcpu, eptp);
>
> Shouldn't this call, or the function itself, be conditional on Hyper-V
> or remote flushing, e.g. by checking kvm_x86_ops->tlb_remote_flush?
>

Yes, good suggestion and will update in the next version.

>> +
>> if (enable_unrestricted_guest || is_paging(vcpu) ||
>> is_guest_mode(vcpu))
>> guest_cr3 = kvm_read_cr3(vcpu);
>> @@ -10383,6 +10412,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>
>> static int vmx_vm_init(struct kvm *kvm)
>> {
>> + spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
>> +
>> if (!ple_gap)
>> kvm->arch.pause_in_guest = true;
>> return 0;
>> --
>> 2.14.3