2022-05-09 01:48:06

by Isaku Yamahata

[permalink] [raw]
Subject: [RFC PATCH v6 003/104] KVM: Refactor CPU compatibility check on module initialiization

From: Isaku Yamahata <[email protected]>

Although non-x86 arch doesn't break as long as I inspected code, it's by
code inspection. This should be reviewed by each arch maintainers.

kvm_init() checks CPU compatibility by calling
kvm_arch_check_processor_compat() on all online CPUs. Move the callback
to hardware_enable_nolock() and add hardware_enable_all() and
hardware_disable_all().
Add arch specific callback kvm_arch_post_hardware_enable_setup() for arch
to do arch specific initialization that required hardware_enable_all().
This makes a room for TDX module to initialize on kvm module loading. TDX
module requires all online cpu to enable VMX by VMXON.

If kvm_arch_hardware_enable/disable() depend on (*) part, such dependency
must be called before kvm_init(). In fact kvm_intel() does. Although
other arch doesn't as long as I checked as follows, it should be reviewed
by each arch maintainers.

Before this patch:
- Arch module initialization
- kvm_init()
- kvm_arch_init()
- kvm_arch_check_processor_compat() on each CPUs
- post arch specific initialization ---- (*)

- when creating/deleting first/last VM
- kvm_arch_hardware_enable() on each CPUs --- (A)
- kvm_arch_hardware_disable() on each CPUs --- (B)

After this patch:
- Arch module initialization
- kvm_init()
- kvm_arch_init()
- kvm_arch_hardware_enable() on each CPUs (A)
- kvm_arch_check_processor_compat() on each CPUs
- kvm_arch_hardware_disable() on each CPUs (B)
- post arch specific initialization --- (*)

Code inspection result:
(A)/(B) can depend on (*) before this patch. If there is dependency, such
initialization must be moved before kvm_init() with this patch. VMX does
in fact. As long as I inspected other archs and find only mips has it.

- arch/mips/kvm/mips.c
module init function, kvm_mips_init(), does some initialization after
kvm_init(). Compile test only. Needs review.

- arch/x86/kvm/x86.c
- uses vm_list which is statically initialized.
- static_call(kvm_x86_hardware_enable)();
- SVM: (*) is empty.
- VMX: needs fix

- arch/powerpc/kvm/powerpc.c
kvm_arch_hardware_enable/disable() are nop

- arch/s390/kvm/kvm-s390.c
kvm_arch_hardware_enable/disable() are nop

- arch/arm64/kvm/arm.c
module init function, arm_init(), calls only kvm_init().
(*) is empty

- arch/riscv/kvm/main.c
module init function, riscv_kvm_init(), calls only kvm_init().
(*) is empty

Co-developed-by: Sean Christopherson <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
Signed-off-by: Isaku Yamahata <[email protected]>
---
arch/mips/kvm/mips.c | 12 +++++++-----
arch/x86/kvm/vmx/vmx.c | 15 +++++++++++----
virt/kvm/kvm_main.c | 20 ++++++++++----------
3 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 092d09fb6a7e..17228584485d 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1643,11 +1643,6 @@ static int __init kvm_mips_init(void)
}

ret = kvm_mips_entry_setup();
- if (ret)
- return ret;
-
- ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
-
if (ret)
return ret;

@@ -1656,6 +1651,13 @@ static int __init kvm_mips_init(void)

register_die_notifier(&kvm_mips_csr_die_notifier);

+ ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+
+ if (ret) {
+ unregister_die_notifier(&kvm_mips_csr_die_notifier);
+ return ret;
+ }
+
return 0;
}

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e30493fe4553..9bc46c1e64d9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8254,6 +8254,15 @@ static void vmx_exit(void)
}
module_exit(vmx_exit);

+/* initialize before kvm_init() so that hardware_enable/disable() can work. */
+static void __init vmx_init_early(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+}
+
static int __init vmx_init(void)
{
int r, cpu;
@@ -8291,6 +8300,7 @@ static int __init vmx_init(void)
}
#endif

+ vmx_init_early();
r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
__alignof__(struct vcpu_vmx), THIS_MODULE);
if (r)
@@ -8309,11 +8319,8 @@ static int __init vmx_init(void)
return r;
}

- for_each_possible_cpu(cpu) {
- INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
-
+ for_each_possible_cpu(cpu)
pi_init_cpu(cpu);
- }

#ifdef CONFIG_KEXEC_CORE
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ec365291c625..0ff03889aa5d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4883,8 +4883,13 @@ static void hardware_enable_nolock(void *junk)

cpumask_set_cpu(cpu, cpus_hardware_enabled);

+ r = kvm_arch_check_processor_compat();
+ if (r)
+ goto out;
+
r = kvm_arch_hardware_enable();

+out:
if (r) {
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
atomic_inc(&hardware_enable_failed);
@@ -5681,11 +5686,6 @@ void kvm_unregister_perf_callbacks(void)
}
#endif

-static void check_processor_compat(void *rtn)
-{
- *(int *)rtn = kvm_arch_check_processor_compat();
-}
-
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
struct module *module)
{
@@ -5716,11 +5716,11 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
if (r < 0)
goto out_free_1;

- for_each_online_cpu(cpu) {
- smp_call_function_single(cpu, check_processor_compat, &r, 1);
- if (r < 0)
- goto out_free_2;
- }
+ /* hardware_enable_nolock() checks CPU compatibility on each CPUs. */
+ r = hardware_enable_all();
+ if (r)
+ goto out_free_2;
+ hardware_disable_all();

r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
kvm_starting_cpu, kvm_dying_cpu);
--
2.25.1



2022-05-24 02:28:28

by Sagi Shahar

[permalink] [raw]
Subject: Re: [RFC PATCH v6 003/104] KVM: Refactor CPU compatibility check on module initialiization

On Thu, May 5, 2022 at 11:15 AM <[email protected]> wrote:
>
> From: Isaku Yamahata <[email protected]>
>
> Although non-x86 arch doesn't break as long as I inspected code, it's by
> code inspection. This should be reviewed by each arch maintainers.
>
> kvm_init() checks CPU compatibility by calling
> kvm_arch_check_processor_compat() on all online CPUs. Move the callback
> to hardware_enable_nolock() and add hardware_enable_all() and
> hardware_disable_all().
> Add arch specific callback kvm_arch_post_hardware_enable_setup() for arch
> to do arch specific initialization that required hardware_enable_all().

There's no reference to kvm_arch_post_hardware_enable_setup in this
patch. Looks like it's introduced in a later patch in the series.

This patch might be clearer if the kvm_arch_post_hardware_enable_setup
is introduced and used here. Otherwise, the commit log should be
updated to make it clear that kvm_arch_post_hardware_enable_setup is
introduced in a later patch in the series.

> This makes a room for TDX module to initialize on kvm module loading. TDX
> module requires all online cpu to enable VMX by VMXON.
>
> If kvm_arch_hardware_enable/disable() depend on (*) part, such dependency
> must be called before kvm_init(). In fact kvm_intel() does. Although
> other arch doesn't as long as I checked as follows, it should be reviewed
> by each arch maintainers.
>
> Before this patch:
> - Arch module initialization
> - kvm_init()
> - kvm_arch_init()
> - kvm_arch_check_processor_compat() on each CPUs
> - post arch specific initialization ---- (*)
>
> - when creating/deleting first/last VM
> - kvm_arch_hardware_enable() on each CPUs --- (A)
> - kvm_arch_hardware_disable() on each CPUs --- (B)
>
> After this patch:
> - Arch module initialization
> - kvm_init()
> - kvm_arch_init()
> - kvm_arch_hardware_enable() on each CPUs (A)
> - kvm_arch_check_processor_compat() on each CPUs
> - kvm_arch_hardware_disable() on each CPUs (B)
> - post arch specific initialization --- (*)
>
> Code inspection result:
> (A)/(B) can depend on (*) before this patch. If there is dependency, such
> initialization must be moved before kvm_init() with this patch. VMX does
> in fact. As long as I inspected other archs and find only mips has it.
>
> - arch/mips/kvm/mips.c
> module init function, kvm_mips_init(), does some initialization after
> kvm_init(). Compile test only. Needs review.
>
> - arch/x86/kvm/x86.c
> - uses vm_list which is statically initialized.
> - static_call(kvm_x86_hardware_enable)();
> - SVM: (*) is empty.
> - VMX: needs fix
>
> - arch/powerpc/kvm/powerpc.c
> kvm_arch_hardware_enable/disable() are nop
>
> - arch/s390/kvm/kvm-s390.c
> kvm_arch_hardware_enable/disable() are nop
>
> - arch/arm64/kvm/arm.c
> module init function, arm_init(), calls only kvm_init().
> (*) is empty
>
> - arch/riscv/kvm/main.c
> module init function, riscv_kvm_init(), calls only kvm_init().
> (*) is empty
>
> Co-developed-by: Sean Christopherson <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
> Signed-off-by: Isaku Yamahata <[email protected]>
> ---
> arch/mips/kvm/mips.c | 12 +++++++-----
> arch/x86/kvm/vmx/vmx.c | 15 +++++++++++----
> virt/kvm/kvm_main.c | 20 ++++++++++----------
> 3 files changed, 28 insertions(+), 19 deletions(-)
>
> diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
> index 092d09fb6a7e..17228584485d 100644
> --- a/arch/mips/kvm/mips.c
> +++ b/arch/mips/kvm/mips.c
> @@ -1643,11 +1643,6 @@ static int __init kvm_mips_init(void)
> }
>
> ret = kvm_mips_entry_setup();
> - if (ret)
> - return ret;
> -
> - ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
> -
> if (ret)
> return ret;
>
> @@ -1656,6 +1651,13 @@ static int __init kvm_mips_init(void)
>
> register_die_notifier(&kvm_mips_csr_die_notifier);
>
> + ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
> +
> + if (ret) {
> + unregister_die_notifier(&kvm_mips_csr_die_notifier);
> + return ret;
> + }
> +
> return 0;
> }
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index e30493fe4553..9bc46c1e64d9 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -8254,6 +8254,15 @@ static void vmx_exit(void)
> }
> module_exit(vmx_exit);
>
> +/* initialize before kvm_init() so that hardware_enable/disable() can work. */
> +static void __init vmx_init_early(void)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu)
> + INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> +}
> +
> static int __init vmx_init(void)
> {
> int r, cpu;
> @@ -8291,6 +8300,7 @@ static int __init vmx_init(void)
> }
> #endif
>
> + vmx_init_early();
> r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
> __alignof__(struct vcpu_vmx), THIS_MODULE);
> if (r)
> @@ -8309,11 +8319,8 @@ static int __init vmx_init(void)
> return r;
> }
>
> - for_each_possible_cpu(cpu) {
> - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> -
> + for_each_possible_cpu(cpu)
> pi_init_cpu(cpu);
> - }
>
> #ifdef CONFIG_KEXEC_CORE
> rcu_assign_pointer(crash_vmclear_loaded_vmcss,
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index ec365291c625..0ff03889aa5d 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -4883,8 +4883,13 @@ static void hardware_enable_nolock(void *junk)
>
> cpumask_set_cpu(cpu, cpus_hardware_enabled);
>
> + r = kvm_arch_check_processor_compat();
> + if (r)
> + goto out;
> +
> r = kvm_arch_hardware_enable();
>
> +out:
> if (r) {
> cpumask_clear_cpu(cpu, cpus_hardware_enabled);
> atomic_inc(&hardware_enable_failed);
> @@ -5681,11 +5686,6 @@ void kvm_unregister_perf_callbacks(void)
> }
> #endif
>
> -static void check_processor_compat(void *rtn)
> -{
> - *(int *)rtn = kvm_arch_check_processor_compat();
> -}
> -
> int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
> struct module *module)
> {
> @@ -5716,11 +5716,11 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
> if (r < 0)
> goto out_free_1;
>
> - for_each_online_cpu(cpu) {
> - smp_call_function_single(cpu, check_processor_compat, &r, 1);
> - if (r < 0)
> - goto out_free_2;
> - }
> + /* hardware_enable_nolock() checks CPU compatibility on each CPUs. */
> + r = hardware_enable_all();
> + if (r)
> + goto out_free_2;
> + hardware_disable_all();
>
> r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
> kvm_starting_cpu, kvm_dying_cpu);
> --
> 2.25.1
>

Sagi

2022-05-27 07:51:17

by Isaku Yamahata

[permalink] [raw]
Subject: Re: [RFC PATCH v6 003/104] KVM: Refactor CPU compatibility check on module initialiization

On Mon, May 23, 2022 at 03:27:10PM -0700,
Sagi Shahar <[email protected]> wrote:

> On Thu, May 5, 2022 at 11:15 AM <[email protected]> wrote:
> >
> > From: Isaku Yamahata <[email protected]>
> >
> > Although non-x86 arch doesn't break as long as I inspected code, it's by
> > code inspection. This should be reviewed by each arch maintainers.
> >
> > kvm_init() checks CPU compatibility by calling
> > kvm_arch_check_processor_compat() on all online CPUs. Move the callback
> > to hardware_enable_nolock() and add hardware_enable_all() and
> > hardware_disable_all().
> > Add arch specific callback kvm_arch_post_hardware_enable_setup() for arch
> > to do arch specific initialization that required hardware_enable_all().
>
> There's no reference to kvm_arch_post_hardware_enable_setup in this
> patch. Looks like it's introduced in a later patch in the series.
>
> This patch might be clearer if the kvm_arch_post_hardware_enable_setup
> is introduced and used here. Otherwise, the commit log should be
> updated to make it clear that kvm_arch_post_hardware_enable_setup is
> introduced in a later patch in the series.
>
> > This makes a room for TDX module to initialize on kvm module loading. TDX
> > module requires all online cpu to enable VMX by VMXON.
> >
> > If kvm_arch_hardware_enable/disable() depend on (*) part, such dependency
> > must be called before kvm_init(). In fact kvm_intel() does. Although
> > other arch doesn't as long as I checked as follows, it should be reviewed
> > by each arch maintainers.

Thank you for pointing it out. I moved the addition of
kvm_arch_post_hardware_enable_setup() to this patch. The related patch is as
follows.

The patch that actually uses kvm_arch_post_hardware_enable_setup() is
"011/104 KVM: TDX: Initialize TDX module when loading kvm_intel.ko". It's
a bit far from this patch. I intentionally positioned this patch early in
this series because this patch requires cross-arch review.

Thanks,

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1aead3921a16..55dd08cca5d2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1441,6 +1441,7 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
int kvm_arch_hardware_enable(void);
void kvm_arch_hardware_disable(void);
int kvm_arch_hardware_setup(void *opaque);
+int kvm_arch_post_hardware_enable_setup(void *opaque);
void kvm_arch_hardware_unsetup(void);
int kvm_arch_check_processor_compat(void);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ec365291c625..55b292fda733 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4883,8 +4883,13 @@ static void hardware_enable_nolock(void *junk)

cpumask_set_cpu(cpu, cpus_hardware_enabled);

+ r = kvm_arch_check_processor_compat();
+ if (r)
+ goto out;
+
r = kvm_arch_hardware_enable();

+out:
if (r) {
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
atomic_inc(&hardware_enable_failed);
@@ -5681,9 +5686,9 @@ void kvm_unregister_perf_callbacks(void)
}
#endif

-static void check_processor_compat(void *rtn)
+__weak int kvm_arch_post_hardware_enable_setup(void *opaque)
{
- *(int *)rtn = kvm_arch_check_processor_compat();
+ return 0;
}

int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
@@ -5716,11 +5721,17 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
if (r < 0)
goto out_free_1;

- for_each_online_cpu(cpu) {
- smp_call_function_single(cpu, check_processor_compat, &r, 1);
- if (r < 0)
- goto out_free_2;
- }
+ /* hardware_enable_nolock() checks CPU compatibility on each CPUs. */
+ r = hardware_enable_all();
+ if (r)
+ goto out_free_2;
+ /*
+ * Arch specific initialization that requires to enable virtualization
+ * feature. e.g. TDX module initialization requires VMXON on all
+ * present CPUs.
+ */
+ kvm_arch_post_hardware_enable_setup(opaque);
+ hardware_disable_all();

r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
kvm_starting_cpu, kvm_dying_cpu);


--
Isaku Yamahata <[email protected]>