LinuxLists.cc - [PATCH] KVM: nVMX: do not pin the VMCS12

2017-07-27 13:54:53

Subject: [PATCH] KVM: nVMX: do not pin the VMCS12

Since the current implementation of VMCS12 does a memcpy in and out
of guest memory, we do not need current_vmcs12 and current_vmcs12_page
anymore. current_vmptr is enough to read and write the VMCS12.

Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/vmx.c | 23 ++++++-----------------
1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b37161808352..142f16ebdca2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -416,9 +416,6 @@ struct nested_vmx {

/* The guest-physical address of the current VMCS L1 keeps for L2 */
gpa_t current_vmptr;
- /* The host-usable pointer to the above */
- struct page *current_vmcs12_page;
- struct vmcs12 *current_vmcs12;
/*
* Cache of the guest's VMCS, existing outside of guest memory.
* Loaded from guest memory during VMPTRLD. Flushed to guest
@@ -7183,10 +7180,6 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
if (vmx->nested.current_vmptr == -1ull)
return;

- /* current_vmptr and current_vmcs12 are always set/reset together */
- if (WARN_ON(vmx->nested.current_vmcs12 == NULL))
- return;
-
if (enable_shadow_vmcs) {
/* copy to memory all shadowed fields in case
they were modified */
@@ -7199,13 +7192,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
vmx->nested.posted_intr_nv = -1;

/* Flush VMCS12 to guest memory */
- memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12,
- VMCS12_SIZE);
+ kvm_vcpu_write_guest_page(&vmx->vcpu,
+ vmx->nested.current_vmptr >> PAGE_SHIFT,
+ vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);

- kunmap(vmx->nested.current_vmcs12_page);
- nested_release_page(vmx->nested.current_vmcs12_page);
vmx->nested.current_vmptr = -1ull;
- vmx->nested.current_vmcs12 = NULL;
}

/*
@@ -7623,14 +7614,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
}

nested_release_vmcs12(vmx);
- vmx->nested.current_vmcs12 = new_vmcs12;
- vmx->nested.current_vmcs12_page = page;
/*
* Load VMCS12 from guest memory since it is not already
* cached.
*/
- memcpy(vmx->nested.cached_vmcs12,
- vmx->nested.current_vmcs12, VMCS12_SIZE);
+ memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
+ kunmap(page);
+
set_current_vmptr(vmx, vmptr);
}

@@ -9354,7 +9344,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)

vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
- vmx->nested.current_vmcs12 = NULL;

vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;

--
1.8.3.1

2017-07-27 17:21:18

by David Matlack

[permalink] [raw]

Subject: Re: [PATCH] KVM: nVMX: do not pin the VMCS12

On Thu, Jul 27, 2017 at 6:54 AM, Paolo Bonzini <[email protected]> wrote:
> Since the current implementation of VMCS12 does a memcpy in and out
> of guest memory, we do not need current_vmcs12 and current_vmcs12_page
> anymore. current_vmptr is enough to read and write the VMCS12.

This patch also fixes dirty tracking (memslot->dirty_bitmap) of the
VMCS12 page by using kvm_write_guest. nested_release_page() only marks
the struct page dirty.

>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 23 ++++++-----------------
> 1 file changed, 6 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index b37161808352..142f16ebdca2 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -416,9 +416,6 @@ struct nested_vmx {
>
> /* The guest-physical address of the current VMCS L1 keeps for L2 */
> gpa_t current_vmptr;
> - /* The host-usable pointer to the above */
> - struct page *current_vmcs12_page;
> - struct vmcs12 *current_vmcs12;
> /*
> * Cache of the guest's VMCS, existing outside of guest memory.
> * Loaded from guest memory during VMPTRLD. Flushed to guest
> @@ -7183,10 +7180,6 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
> if (vmx->nested.current_vmptr == -1ull)
> return;
>
> - /* current_vmptr and current_vmcs12 are always set/reset together */
> - if (WARN_ON(vmx->nested.current_vmcs12 == NULL))
> - return;
> -
> if (enable_shadow_vmcs) {
> /* copy to memory all shadowed fields in case
> they were modified */
> @@ -7199,13 +7192,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
> vmx->nested.posted_intr_nv = -1;
>
> /* Flush VMCS12 to guest memory */
> - memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12,
> - VMCS12_SIZE);
> + kvm_vcpu_write_guest_page(&vmx->vcpu,
> + vmx->nested.current_vmptr >> PAGE_SHIFT,
> + vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);

Have you hit any "suspicious RCU usage" error messages during VM
teardown with this patch? We did when we replaced memcpy with
kvm_write_guest a while back. IIRC it was due to kvm->srcu not being
held in one of the teardown paths. kvm_write_guest() expects it to be
held in order to access memslots.

We fixed this by skipping the VMCS12 flush during VMXOFF. I'll send
that patch along with a few other nVMX dirty tracking related patches
I've been meaning to get upstreamed.

>
> - kunmap(vmx->nested.current_vmcs12_page);
> - nested_release_page(vmx->nested.current_vmcs12_page);
> vmx->nested.current_vmptr = -1ull;
> - vmx->nested.current_vmcs12 = NULL;
> }
>
> /*
> @@ -7623,14 +7614,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
> }
>
> nested_release_vmcs12(vmx);
> - vmx->nested.current_vmcs12 = new_vmcs12;
> - vmx->nested.current_vmcs12_page = page;
> /*
> * Load VMCS12 from guest memory since it is not already
> * cached.
> */
> - memcpy(vmx->nested.cached_vmcs12,
> - vmx->nested.current_vmcs12, VMCS12_SIZE);
> + memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
> + kunmap(page);

+ nested_release_page_clean(page);

> +
> set_current_vmptr(vmx, vmptr);
> }
>
> @@ -9354,7 +9344,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>
> vmx->nested.posted_intr_nv = -1;
> vmx->nested.current_vmptr = -1ull;
> - vmx->nested.current_vmcs12 = NULL;
>
> vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
>
> --
> 1.8.3.1
>

2017-07-27 17:54:07

by David Hildenbrand

[permalink] [raw]

Subject: Re: [PATCH] KVM: nVMX: do not pin the VMCS12

On 27.07.2017 15:54, Paolo Bonzini wrote:
> Since the current implementation of VMCS12 does a memcpy in and out
> of guest memory, we do not need current_vmcs12 and current_vmcs12_page
> anymore. current_vmptr is enough to read and write the VMCS12.
>
> Signed-off-by: Paolo Bonzini <[email protected]>

This looks like the right thing to do!

(and as mentioned, also properly marks the page as dirty)

Reviewed-by: David Hildenbrand <[email protected]>

--

Thanks,

David

2017-07-28 01:28:54

by Wanpeng Li

[permalink] [raw]

Subject: Re: [PATCH] KVM: nVMX: do not pin the VMCS12

2017-07-28 1:20 GMT+08:00 David Matlack <[email protected]>:
> On Thu, Jul 27, 2017 at 6:54 AM, Paolo Bonzini <[email protected]> wrote:
>> Since the current implementation of VMCS12 does a memcpy in and out
>> of guest memory, we do not need current_vmcs12 and current_vmcs12_page
>> anymore. current_vmptr is enough to read and write the VMCS12.
>
> This patch also fixes dirty tracking (memslot->dirty_bitmap) of the
> VMCS12 page by using kvm_write_guest. nested_release_page() only marks
> the struct page dirty.
>
>>
>> Signed-off-by: Paolo Bonzini <[email protected]>
>> ---
>> arch/x86/kvm/vmx.c | 23 ++++++-----------------
>> 1 file changed, 6 insertions(+), 17 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index b37161808352..142f16ebdca2 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -416,9 +416,6 @@ struct nested_vmx {
>>
>> /* The guest-physical address of the current VMCS L1 keeps for L2 */
>> gpa_t current_vmptr;
>> - /* The host-usable pointer to the above */
>> - struct page *current_vmcs12_page;
>> - struct vmcs12 *current_vmcs12;
>> /*
>> * Cache of the guest's VMCS, existing outside of guest memory.
>> * Loaded from guest memory during VMPTRLD. Flushed to guest
>> @@ -7183,10 +7180,6 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
>> if (vmx->nested.current_vmptr == -1ull)
>> return;
>>
>> - /* current_vmptr and current_vmcs12 are always set/reset together */
>> - if (WARN_ON(vmx->nested.current_vmcs12 == NULL))
>> - return;
>> -
>> if (enable_shadow_vmcs) {
>> /* copy to memory all shadowed fields in case
>> they were modified */
>> @@ -7199,13 +7192,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
>> vmx->nested.posted_intr_nv = -1;
>>
>> /* Flush VMCS12 to guest memory */
>> - memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12,
>> - VMCS12_SIZE);
>> + kvm_vcpu_write_guest_page(&vmx->vcpu,
>> + vmx->nested.current_vmptr >> PAGE_SHIFT,
>> + vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
>
> Have you hit any "suspicious RCU usage" error messages during VM

Yeah, I observe this splat when testing Paolo's patch today.

[87214.855344] =============================
[87214.855346] WARNING: suspicious RCU usage
[87214.855348] 4.13.0-rc2+ #2 Tainted: G OE
[87214.855350] -----------------------------
[87214.855352] ./include/linux/kvm_host.h:573 suspicious
rcu_dereference_check() usage!
[87214.855353]
other info that might help us debug this:

[87214.855355]
rcu_scheduler_active = 2, debug_locks = 1
[87214.855357] 1 lock held by qemu-system-x86/17059:
[87214.855359] #0: (&vcpu->mutex){+.+.+.}, at: [<ffffffffc051bb12>]
vcpu_load+0x22/0x80 [kvm]
[87214.855396]
stack backtrace:
[87214.855399] CPU: 3 PID: 17059 Comm: qemu-system-x86 Tainted: G
OE 4.13.0-rc2+ #2
[87214.855401] Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY,
BIOS FBKTC1AUS 02/16/2016
[87214.855403] Call Trace:
[87214.855408] dump_stack+0x99/0xce
[87214.855413] lockdep_rcu_suspicious+0xc5/0x100
[87214.855423] kvm_vcpu_gfn_to_memslot+0x166/0x180 [kvm]
[87214.855432] kvm_vcpu_write_guest_page+0x24/0x50 [kvm]
[87214.855438] free_nested.part.76+0x76/0x270 [kvm_intel]
[87214.855443] vmx_free_vcpu+0x7a/0xc0 [kvm_intel]
[87214.855454] kvm_arch_destroy_vm+0x104/0x1d0 [kvm]
[87214.855463] kvm_put_kvm+0x17a/0x2b0 [kvm]
[87214.855473] kvm_vm_release+0x21/0x30 [kvm]
[87214.855477] __fput+0xfb/0x240
[87214.855482] ____fput+0xe/0x10
[87214.855485] task_work_run+0x7e/0xb0
[87214.855490] do_exit+0x323/0xcf0
[87214.855494] ? get_signal+0x318/0x930
[87214.855498] ? _raw_spin_unlock_irq+0x2c/0x60
[87214.855503] do_group_exit+0x50/0xd0
[87214.855507] get_signal+0x24f/0x930
[87214.855514] do_signal+0x37/0x750
[87214.855518] ? __might_fault+0x3e/0x90
[87214.855523] ? __might_fault+0x85/0x90
[87214.855527] ? exit_to_usermode_loop+0x2b/0x100
[87214.855531] ? __this_cpu_preempt_check+0x13/0x20
[87214.855535] exit_to_usermode_loop+0xab/0x100
[87214.855539] syscall_return_slowpath+0x153/0x160
[87214.855542] entry_SYSCALL_64_fastpath+0xc0/0xc2
[87214.855545] RIP: 0033:0x7ff40d24a26d

Regards,
Wanpeng Li

> teardown with this patch? We did when we replaced memcpy with
> kvm_write_guest a while back. IIRC it was due to kvm->srcu not being
> held in one of the teardown paths. kvm_write_guest() expects it to be
> held in order to access memslots.
>
> We fixed this by skipping the VMCS12 flush during VMXOFF. I'll send
> that patch along with a few other nVMX dirty tracking related patches
> I've been meaning to get upstreamed.
>
>>
>> - kunmap(vmx->nested.current_vmcs12_page);
>> - nested_release_page(vmx->nested.current_vmcs12_page);
>> vmx->nested.current_vmptr = -1ull;
>> - vmx->nested.current_vmcs12 = NULL;
>> }
>>
>> /*
>> @@ -7623,14 +7614,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
>> }
>>
>> nested_release_vmcs12(vmx);
>> - vmx->nested.current_vmcs12 = new_vmcs12;
>> - vmx->nested.current_vmcs12_page = page;
>> /*
>> * Load VMCS12 from guest memory since it is not already
>> * cached.
>> */
>> - memcpy(vmx->nested.cached_vmcs12,
>> - vmx->nested.current_vmcs12, VMCS12_SIZE);
>> + memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
>> + kunmap(page);
>
> + nested_release_page_clean(page);
>
>> +
>> set_current_vmptr(vmx, vmptr);
>> }
>>
>> @@ -9354,7 +9344,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>
>> vmx->nested.posted_intr_nv = -1;
>> vmx->nested.current_vmptr = -1ull;
>> - vmx->nested.current_vmcs12 = NULL;
>>
>> vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
>>
>> --
>> 1.8.3.1
>>

2017-07-28 06:57:45

by Paolo Bonzini

[permalink] [raw]

Subject: Re: [PATCH] KVM: nVMX: do not pin the VMCS12

On 27/07/2017 19:20, David Matlack wrote:
>> + kvm_vcpu_write_guest_page(&vmx->vcpu,
>> + vmx->nested.current_vmptr >> PAGE_SHIFT,
>> + vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
> Have you hit any "suspicious RCU usage" error messages during VM
> teardown with this patch? We did when we replaced memcpy with
> kvm_write_guest a while back. IIRC it was due to kvm->srcu not being
> held in one of the teardown paths. kvm_write_guest() expects it to be
> held in order to access memslots.
>
> We fixed this by skipping the VMCS12 flush during VMXOFF. I'll send
> that patch along with a few other nVMX dirty tracking related patches
> I've been meaning to get upstreamed.

Oh, right. I had this other (untested) patch in the queue after
Christian recently annotated everything with RCU checks:

Paolo

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 890b706d1943..07e3b02a1be3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -477,7 +477,8 @@ struct kvm {
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
{
return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
- lockdep_is_held(&kvm->slots_lock));
+ lockdep_is_held(&kvm->slots_lock) ||
+ !refcount_read(&kvm->users_count));
}

static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
@@ -570,7 +571,8 @@ void kvm_put_kvm(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
- lockdep_is_held(&kvm->slots_lock));
+ lockdep_is_held(&kvm->slots_lock) ||
+ !refcount_read(&kvm->users_count));
}

static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f3f74271f1a9..6a21c98b22bf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -655,7 +655,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
mutex_init(&kvm->slots_lock);
- refcount_set(&kvm->users_count, 1);
INIT_LIST_HEAD(&kvm->devices);

r = kvm_arch_init_vm(kvm, type);
@@ -701,6 +700,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (r)
goto out_err;

+ refcount_set(&kvm->users_count, 1);
spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
@@ -717,10 +717,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
hardware_disable_all();
out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
- kfree(rcu_access_pointer(kvm->buses[i]));
+ kfree(kvm_get_bus(kvm, i));
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- kvm_free_memslots(kvm,
- rcu_dereference_protected(kvm->memslots[i], 1));
+ kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
kvm_arch_free_vm(kvm);
mmdrop(current->mm);
return ERR_PTR(r);
@@ -754,9 +754,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
- struct kvm_io_bus *bus;
+ struct kvm_io_bus *bus = kvm_get_bus(kvm, i);

- bus = rcu_dereference_protected(kvm->buses[i], 1);
if (bus)
kvm_io_bus_destroy(bus);
kvm->buses[i] = NULL;
@@ -770,8 +769,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- kvm_free_memslots(kvm,
- rcu_dereference_protected(kvm->memslots[i], 1));
+ kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);

2017-07-28 07:29:55

by Christian Borntraeger

[permalink] [raw]

Subject: Re: [PATCH] KVM: nVMX: do not pin the VMCS12

On 07/28/2017 08:57 AM, Paolo Bonzini wrote:
> On 27/07/2017 19:20, David Matlack wrote:
>>> + kvm_vcpu_write_guest_page(&vmx->vcpu,
>>> + vmx->nested.current_vmptr >> PAGE_SHIFT,
>>> + vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
>> Have you hit any "suspicious RCU usage" error messages during VM
>> teardown with this patch? We did when we replaced memcpy with
>> kvm_write_guest a while back. IIRC it was due to kvm->srcu not being
>> held in one of the teardown paths. kvm_write_guest() expects it to be
>> held in order to access memslots.
>>
>> We fixed this by skipping the VMCS12 flush during VMXOFF. I'll send
>> that patch along with a few other nVMX dirty tracking related patches
>> I've been meaning to get upstreamed.
>
> Oh, right. I had this other (untested) patch in the queue after
> Christian recently annotated everything with RCU checks:
>

So you make the checks not trigger for users_count == 0 to cope with
the teardown pathes?
Since for users_count==0 all file descriptors are gone, no
memslot/bus can be changed by userspace so this makes sense.

> Paolo
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 890b706d1943..07e3b02a1be3 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -477,7 +477,8 @@ struct kvm {
> static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
> {
> return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
> - lockdep_is_held(&kvm->slots_lock));
> + lockdep_is_held(&kvm->slots_lock) ||
> + !refcount_read(&kvm->users_count));
> }
>
> static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
> @@ -570,7 +571,8 @@ void kvm_put_kvm(struct kvm *kvm);
> static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
> {
> return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
> - lockdep_is_held(&kvm->slots_lock));
> + lockdep_is_held(&kvm->slots_lock) ||
> + !refcount_read(&kvm->users_count));
> }
>
> static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index f3f74271f1a9..6a21c98b22bf 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -655,7 +655,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
> mutex_init(&kvm->lock);
> mutex_init(&kvm->irq_lock);
> mutex_init(&kvm->slots_lock);
> - refcount_set(&kvm->users_count, 1);
> INIT_LIST_HEAD(&kvm->devices);
>
> r = kvm_arch_init_vm(kvm, type);
> @@ -701,6 +700,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
> if (r)
> goto out_err;
>
> + refcount_set(&kvm->users_count, 1);
> spin_lock(&kvm_lock);
> list_add(&kvm->vm_list, &vm_list);
> spin_unlock(&kvm_lock);
> @@ -717,10 +717,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
> hardware_disable_all();
> out_err_no_disable:
> for (i = 0; i < KVM_NR_BUSES; i++)
> - kfree(rcu_access_pointer(kvm->buses[i]));
> + kfree(kvm_get_bus(kvm, i));
> for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
> - kvm_free_memslots(kvm,
> - rcu_dereference_protected(kvm->memslots[i], 1));
> + kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
> kvm_arch_free_vm(kvm);
> mmdrop(current->mm);
> return ERR_PTR(r);
> @@ -754,9 +754,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
> spin_unlock(&kvm_lock);
> kvm_free_irq_routing(kvm);
> for (i = 0; i < KVM_NR_BUSES; i++) {
> - struct kvm_io_bus *bus;
> + struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
>
> - bus = rcu_dereference_protected(kvm->buses[i], 1);
> if (bus)
> kvm_io_bus_destroy(bus);
> kvm->buses[i] = NULL;
> @@ -770,8 +769,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
> kvm_arch_destroy_vm(kvm);
> kvm_destroy_devices(kvm);
> for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
> - kvm_free_memslots(kvm,
> - rcu_dereference_protected(kvm->memslots[i], 1));
> + kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
> cleanup_srcu_struct(&kvm->irq_srcu);
> cleanup_srcu_struct(&kvm->srcu);
> kvm_arch_free_vm(kvm);
>

2017-08-02 20:36:57

by Radim Krčmář

[permalink] [raw]

Subject: Re: [PATCH] KVM: nVMX: do not pin the VMCS12

2017-07-27 10:20-0700, David Matlack:
> On Thu, Jul 27, 2017 at 6:54 AM, Paolo Bonzini <[email protected]> wrote:
> > Since the current implementation of VMCS12 does a memcpy in and out
> > of guest memory, we do not need current_vmcs12 and current_vmcs12_page
> > anymore. current_vmptr is enough to read and write the VMCS12.
>
> This patch also fixes dirty tracking (memslot->dirty_bitmap) of the
> VMCS12 page by using kvm_write_guest. nested_release_page() only marks
> the struct page dirty.
>
> >
> > Signed-off-by: Paolo Bonzini <[email protected]>
> > ---
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > @@ -7623,14 +7614,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
> > }
> >
> > nested_release_vmcs12(vmx);
> > - vmx->nested.current_vmcs12 = new_vmcs12;
> > - vmx->nested.current_vmcs12_page = page;
> > /*
> > * Load VMCS12 from guest memory since it is not already
> > * cached.
> > */
> > - memcpy(vmx->nested.cached_vmcs12,
> > - vmx->nested.current_vmcs12, VMCS12_SIZE);
> > + memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
> > + kunmap(page);
>
> + nested_release_page_clean(page);

Added this and your note about the dirty bit when applying,

thanks.