2017-08-02 15:56:12

by Paolo Bonzini

[permalink] [raw]
Subject: [PATCH] KVM: avoid using rcu_dereference_protected

During teardown, accesses to memslots and buses are using
rcu_dereference_protected with an always-true condition because
these accesses are done outside the usual mutexes. This
is because the last reference is gone and there cannot be any
concurrent modifications, but rcu_dereference_protected is
ugly and unobvious.

Instead, check the refcount in kvm_get_bus and __kvm_memslots.

Signed-off-by: Paolo Bonzini <[email protected]>
---
include/linux/kvm_host.h | 6 ++++--
virt/kvm/kvm_main.c | 11 ++++-------
2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index aa492cb12ce8..294082cbff0b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -469,7 +469,8 @@ struct kvm {
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
{
return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
- lockdep_is_held(&kvm->slots_lock));
+ lockdep_is_held(&kvm->slots_lock) ||
+ !refcount_read(&kvm->users_count));
}

static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
@@ -562,7 +563,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
- lockdep_is_held(&kvm->slots_lock));
+ lockdep_is_held(&kvm->slots_lock) ||
+ !refcount_read(&kvm->users_count));
}

static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7001657be535..26ae5cb726b1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -705,10 +705,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
hardware_disable_all();
out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
- kfree(rcu_access_pointer(kvm->buses[i]));
+ kfree(kvm_get_bus(kvm, i));
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- kvm_free_memslots(kvm,
- rcu_dereference_protected(kvm->memslots[i], 1));
+ kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
kvm_arch_free_vm(kvm);
mmdrop(current->mm);
return ERR_PTR(r);
@@ -741,9 +740,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
- struct kvm_io_bus *bus;
+ struct kvm_io_bus *bus = kvm_get_bus(kvm, i);

- bus = rcu_dereference_protected(kvm->buses[i], 1);
if (bus)
kvm_io_bus_destroy(bus);
kvm->buses[i] = NULL;
@@ -757,8 +755,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- kvm_free_memslots(kvm,
- rcu_dereference_protected(kvm->memslots[i], 1));
+ kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
--
1.8.3.1


2017-08-02 20:16:40

by Radim Krčmář

[permalink] [raw]
Subject: Re: [PATCH] KVM: avoid using rcu_dereference_protected

2017-08-02 17:55+0200, Paolo Bonzini:
> During teardown, accesses to memslots and buses are using
> rcu_dereference_protected with an always-true condition because
> these accesses are done outside the usual mutexes. This
> is because the last reference is gone and there cannot be any
> concurrent modifications, but rcu_dereference_protected is
> ugly and unobvious.
>
> Instead, check the refcount in kvm_get_bus and __kvm_memslots.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---

Probably looks nicer than temporarily taking the slots_lock.
Queued for 4.13,

thanks.

2017-09-13 10:16:57

by Christian Borntraeger

[permalink] [raw]
Subject: Re: [PATCH] KVM: avoid using rcu_dereference_protected

Looks like this is now causing the following splat if
kvm_arch_init_vm fails:

[ 13.107404] =============================
[ 13.107408] WARNING: suspicious RCU usage
[ 13.107412] 4.13.0+ #105 Not tainted
[ 13.107415] -----------------------------
[ 13.107418] ./include/linux/kvm_host.h:481 suspicious rcu_dereference_check() usage!
[ 13.107421]
[ 13.107421] other info that might help us debug this:
[ 13.107421]
[ 13.107425]
[ 13.107425] rcu_scheduler_active = 2, debug_locks = 1
[ 13.107429] no locks held by qemu-system-s39/79.
[ 13.107432]
[ 13.107432] stack backtrace:
[ 13.107436] CPU: 0 PID: 79 Comm: qemu-system-s39 Not tainted 4.13.0+ #105
[ 13.107439] Hardware name: IBM 2964 NC9 704 (KVM/Linux)
[ 13.107442] Call Trace:
[ 13.107448] ([<00000000001140b2>] show_stack+0xea/0xf0)
[ 13.107452] [<00000000008a68a4>] dump_stack+0x94/0xd8
[ 13.107456] [<0000000000134c12>] kvm_dev_ioctl+0x372/0x7a0
[ 13.107460] [<000000000038f940>] do_vfs_ioctl+0xa8/0x6c8
[ 13.107463] [<0000000000390004>] SyS_ioctl+0xa4/0xb8
[ 13.107468] [<00000000008c7a8c>] system_call+0xc4/0x27c
[ 13.107471] no locks held by qemu-system-s39/79.
[ 13.107474]
[ 13.107477] =============================


Something like this seems to do the trick.

Signed-off-by: Christian Borntraeger <[email protected]>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6ed1c20..2d7df5c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -674,6 +674,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
out_err_no_srcu:
hardware_disable_all();
out_err_no_disable:
+ refcount_set(&kvm->users_count, 0);
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm_get_bus(kvm, i));
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)


If ok, let me know and I will spin a proper patch.



On 08/02/2017 05:55 PM, Paolo Bonzini wrote:
> During teardown, accesses to memslots and buses are using
> rcu_dereference_protected with an always-true condition because
> these accesses are done outside the usual mutexes. This
> is because the last reference is gone and there cannot be any
> concurrent modifications, but rcu_dereference_protected is
> ugly and unobvious.
>
> Instead, check the refcount in kvm_get_bus and __kvm_memslots.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> include/linux/kvm_host.h | 6 ++++--
> virt/kvm/kvm_main.c | 11 ++++-------
> 2 files changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index aa492cb12ce8..294082cbff0b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -469,7 +469,8 @@ struct kvm {
> static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
> {
> return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
> - lockdep_is_held(&kvm->slots_lock));
> + lockdep_is_held(&kvm->slots_lock) ||
> + !refcount_read(&kvm->users_count));
> }
>
> static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
> @@ -562,7 +563,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
> static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
> {
> return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
> - lockdep_is_held(&kvm->slots_lock));
> + lockdep_is_held(&kvm->slots_lock) ||
> + !refcount_read(&kvm->users_count));
> }
>
> static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 7001657be535..26ae5cb726b1 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -705,10 +705,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
> hardware_disable_all();
> out_err_no_disable:
> for (i = 0; i < KVM_NR_BUSES; i++)
> - kfree(rcu_access_pointer(kvm->buses[i]));
> + kfree(kvm_get_bus(kvm, i));
> for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
> - kvm_free_memslots(kvm,
> - rcu_dereference_protected(kvm->memslots[i], 1));
> + kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
> kvm_arch_free_vm(kvm);
> mmdrop(current->mm);
> return ERR_PTR(r);
> @@ -741,9 +740,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
> spin_unlock(&kvm_lock);
> kvm_free_irq_routing(kvm);
> for (i = 0; i < KVM_NR_BUSES; i++) {
> - struct kvm_io_bus *bus;
> + struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
>
> - bus = rcu_dereference_protected(kvm->buses[i], 1);
> if (bus)
> kvm_io_bus_destroy(bus);
> kvm->buses[i] = NULL;
> @@ -757,8 +755,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
> kvm_arch_destroy_vm(kvm);
> kvm_destroy_devices(kvm);
> for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
> - kvm_free_memslots(kvm,
> - rcu_dereference_protected(kvm->memslots[i], 1));
> + kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
> cleanup_srcu_struct(&kvm->irq_srcu);
> cleanup_srcu_struct(&kvm->srcu);
> kvm_arch_free_vm(kvm);
>