From: Isaku Yamahata <[email protected]>
Because kvm_count_lock unnecessarily complicates the KVM locking convention
Drop kvm_count_lock and instead protect kvm_usage_count with kvm_lock for
simplicity. kvm_arch_hardware_enable/disable() callbacks depend on
non-preemptiblity with the spin lock. Add preempt_disable/enable()
around hardware enable/disable callback to keep the assumption.
Opportunistically add some comments on locking.
Suggested-by: Sean Christopherson <[email protected]>
Signed-off-by: Isaku Yamahata <[email protected]>
---
Documentation/virt/kvm/locking.rst | 14 +++-----
virt/kvm/kvm_main.c | 56 +++++++++++++++++++++++-------
2 files changed, 49 insertions(+), 21 deletions(-)
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index 845a561629f1..8957e32aa724 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -216,15 +216,11 @@ time it will be set using the Dirty tracking mechanism described above.
:Type: mutex
:Arch: any
:Protects: - vm_list
-
-``kvm_count_lock``
-^^^^^^^^^^^^^^^^^^
-
-:Type: raw_spinlock_t
-:Arch: any
-:Protects: - hardware virtualization enable/disable
-:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
- migration.
+ - kvm_usage_count
+ - hardware virtualization enable/disable
+:Comment: Use cpus_read_lock() for hardware virtualization enable/disable
+ because hardware enabling/disabling must be atomic /wrt
+ migration. The lock order is cpus lock => kvm_lock.
``kvm->mn_invalidate_lock``
^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fc55447c4dba..05ede37edc31 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -100,7 +100,6 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
*/
DEFINE_MUTEX(kvm_lock);
-static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
static cpumask_var_t cpus_hardware_enabled;
@@ -4996,6 +4995,8 @@ static void hardware_enable_nolock(void *caller_name)
int cpu = raw_smp_processor_id();
int r;
+ WARN_ON_ONCE(preemptible());
+
if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
return;
@@ -5019,7 +5020,7 @@ static int kvm_online_cpu(unsigned int cpu)
if (ret)
return ret;
- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);
/*
* Abort the CPU online process if hardware virtualization cannot
* be enabled. Otherwise running VMs would encounter unrecoverable
@@ -5028,13 +5029,20 @@ static int kvm_online_cpu(unsigned int cpu)
if (kvm_usage_count) {
WARN_ON_ONCE(atomic_read(&hardware_enable_failed));
+ /*
+ * arch callback kvm_arch_hardware_eanble() assumes that
+ * preemption is disabled for historical reason. Disable
+ * preemption until all arch callbacks are fixed.
+ */
+ preempt_disable();
hardware_enable_nolock((void *)__func__);
+ preempt_enable();
if (atomic_read(&hardware_enable_failed)) {
atomic_set(&hardware_enable_failed, 0);
ret = -EIO;
}
}
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
return ret;
}
@@ -5042,6 +5050,8 @@ static void hardware_disable_nolock(void *junk)
{
int cpu = raw_smp_processor_id();
+ WARN_ON_ONCE(preemptible());
+
if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
return;
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -5050,10 +5060,18 @@ static void hardware_disable_nolock(void *junk)
static int kvm_offline_cpu(unsigned int cpu)
{
- raw_spin_lock(&kvm_count_lock);
- if (kvm_usage_count)
+ mutex_lock(&kvm_lock);
+ if (kvm_usage_count) {
+ /*
+ * arch callback kvm_arch_hardware_disable() assumes that
+ * preemption is disabled for historical reason. Disable
+ * preemption until all arch callbacks are fixed.
+ */
+ preempt_disable();
hardware_disable_nolock(NULL);
- raw_spin_unlock(&kvm_count_lock);
+ preempt_enable();
+ }
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -5068,9 +5086,11 @@ static void hardware_disable_all_nolock(void)
static void hardware_disable_all(void)
{
- raw_spin_lock(&kvm_count_lock);
+ cpus_read_lock();
+ mutex_lock(&kvm_lock);
hardware_disable_all_nolock();
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
+ cpus_read_unlock();
}
static int hardware_enable_all(void)
@@ -5088,7 +5108,7 @@ static int hardware_enable_all(void)
* Disable CPU hotplug to prevent this case from happening.
*/
cpus_read_lock();
- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);
kvm_usage_count++;
if (kvm_usage_count == 1) {
@@ -5101,7 +5121,7 @@ static int hardware_enable_all(void)
}
}
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
cpus_read_unlock();
return r;
@@ -5708,8 +5728,18 @@ static void kvm_init_debug(void)
static int kvm_suspend(void)
{
- if (kvm_usage_count)
+ /*
+ * The caller ensures that CPU hotlug is disabled by
+ * cpu_hotplug_disable() and other CPUs are offlined. No need for
+ * locking.
+ */
+ lockdep_assert_not_held(&kvm_lock);
+
+ if (kvm_usage_count) {
+ preempt_disable();
hardware_disable_nolock(NULL);
+ preempt_enable();
+ }
return 0;
}
@@ -5723,8 +5753,10 @@ static void kvm_resume(void)
return; /* FIXME: disable KVM */
if (kvm_usage_count) {
- lockdep_assert_not_held(&kvm_count_lock);
+ lockdep_assert_not_held(&kvm_lock);
+ preempt_disable();
hardware_enable_nolock((void *)__func__);
+ preempt_enable();
}
}
--
2.25.1
On Thu, Sep 08, 2022 at 04:25:26PM -0700, [email protected] wrote:
>-
>-``kvm_count_lock``
>-^^^^^^^^^^^^^^^^^^
>-
>-:Type: raw_spinlock_t
>-:Arch: any
>-:Protects: - hardware virtualization enable/disable
>-:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
>- migration.
>+ - kvm_usage_count
>+ - hardware virtualization enable/disable
>+:Comment: Use cpus_read_lock() for hardware virtualization enable/disable
>+ because hardware enabling/disabling must be atomic /wrt
>+ migration. The lock order is cpus lock => kvm_lock.
Probably "/wrt CPU hotplug" is better.
>
>@@ -5708,8 +5728,18 @@ static void kvm_init_debug(void)
>
> static int kvm_suspend(void)
> {
>- if (kvm_usage_count)
>+ /*
>+ * The caller ensures that CPU hotlug is disabled by
^hotplug
>+ * cpu_hotplug_disable() and other CPUs are offlined. No need for
>+ * locking.
>+ */
>+ lockdep_assert_not_held(&kvm_lock);
>+
>+ if (kvm_usage_count) {
>+ preempt_disable();
> hardware_disable_nolock(NULL);
>+ preempt_enable();
kvm_suspend() is called with interrupt disabled. So, no need to disable
preemption.
/**
* syscore_suspend - Execute all the registered system core suspend callbacks.
*
* This function is executed with one CPU on-line and disabled interrupts.
*/
int syscore_suspend(void)
>+ }
> return 0;
> }
>
>@@ -5723,8 +5753,10 @@ static void kvm_resume(void)
> return; /* FIXME: disable KVM */
>
> if (kvm_usage_count) {
>- lockdep_assert_not_held(&kvm_count_lock);
>+ lockdep_assert_not_held(&kvm_lock);
>+ preempt_disable();
> hardware_enable_nolock((void *)__func__);
>+ preempt_enable();
ditto.
On Fri, Sep 09, 2022 at 11:05:34AM +0800,
Chao Gao <[email protected]> wrote:
> >+ * cpu_hotplug_disable() and other CPUs are offlined. No need for
> >+ * locking.
> >+ */
> >+ lockdep_assert_not_held(&kvm_lock);
> >+
> >+ if (kvm_usage_count) {
> >+ preempt_disable();
> > hardware_disable_nolock(NULL);
> >+ preempt_enable();
>
> kvm_suspend() is called with interrupt disabled. So, no need to disable
> preemption.
>
> /**
> * syscore_suspend - Execute all the registered system core suspend callbacks.
> *
> * This function is executed with one CPU on-line and disabled interrupts.
> */
> int syscore_suspend(void)
Thanks, I'll fix it with a comment.
--
Isaku Yamahata <[email protected]>