2022-06-27 21:56:30

by Isaku Yamahata

[permalink] [raw]
Subject: [PATCH v7 003/102] KVM: Refactor CPU compatibility check on module initialiization

From: Isaku Yamahata <[email protected]>

Although non-x86 arch doesn't break as long as I inspected code, it's by
code inspection. This should be reviewed by each arch maintainers.

kvm_init() checks CPU compatibility by calling
kvm_arch_check_processor_compat() on all online CPUs. Move the callback
to hardware_enable_nolock() and add hardware_enable_all() and
hardware_disable_all().
Add arch specific callback kvm_arch_post_hardware_enable_setup() for arch
to do arch specific initialization that required hardware_enable_all().
This makes a room for TDX module to initialize on kvm module loading. TDX
module requires all online cpu to enable VMX by VMXON.

If kvm_arch_hardware_enable/disable() depend on (*) part, such dependency
must be called before kvm_init(). In fact kvm_intel() does. Although
other arch doesn't as long as I checked as follows, it should be reviewed
by each arch maintainers.

Before this patch:
- Arch module initialization
- kvm_init()
- kvm_arch_init()
- kvm_arch_check_processor_compat() on each CPUs
- post arch specific initialization ---- (*)

- when creating/deleting first/last VM
- kvm_arch_hardware_enable() on each CPUs --- (A)
- kvm_arch_hardware_disable() on each CPUs --- (B)

After this patch:
- Arch module initialization
- kvm_init()
- kvm_arch_init()
- kvm_arch_hardware_enable() on each CPUs (A)
- kvm_arch_check_processor_compat() on each CPUs
- kvm_arch_hardware_disable() on each CPUs (B)
- post arch specific initialization --- (*)

Code inspection result:
(A)/(B) can depend on (*) before this patch. If there is dependency, such
initialization must be moved before kvm_init() with this patch. VMX does
in fact. As long as I inspected other archs and find only mips has it.

- arch/mips/kvm/mips.c
module init function, kvm_mips_init(), does some initialization after
kvm_init(). Compile test only. Needs review.

- arch/x86/kvm/x86.c
- uses vm_list which is statically initialized.
- static_call(kvm_x86_hardware_enable)();
- SVM: (*) is empty.
- VMX: needs fix

- arch/powerpc/kvm/powerpc.c
kvm_arch_hardware_enable/disable() are nop

- arch/s390/kvm/kvm-s390.c
kvm_arch_hardware_enable/disable() are nop

- arch/arm64/kvm/arm.c
module init function, arm_init(), calls only kvm_init().
(*) is empty

- arch/riscv/kvm/main.c
module init function, riscv_kvm_init(), calls only kvm_init().
(*) is empty

Co-developed-by: Sean Christopherson <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
Signed-off-by: Isaku Yamahata <[email protected]>
---
arch/mips/kvm/mips.c | 12 +++++++-----
arch/x86/kvm/vmx/vmx.c | 15 +++++++++++----
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 25 ++++++++++++++++++-------
4 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 092d09fb6a7e..17228584485d 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1643,11 +1643,6 @@ static int __init kvm_mips_init(void)
}

ret = kvm_mips_entry_setup();
- if (ret)
- return ret;
-
- ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
-
if (ret)
return ret;

@@ -1656,6 +1651,13 @@ static int __init kvm_mips_init(void)

register_die_notifier(&kvm_mips_csr_die_notifier);

+ ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+
+ if (ret) {
+ unregister_die_notifier(&kvm_mips_csr_die_notifier);
+ return ret;
+ }
+
return 0;
}

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 31e7630203fd..d3b68a6dec48 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8372,6 +8372,15 @@ static void vmx_exit(void)
}
module_exit(vmx_exit);

+/* initialize before kvm_init() so that hardware_enable/disable() can work. */
+static void __init vmx_init_early(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+}
+
static int __init vmx_init(void)
{
int r, cpu;
@@ -8409,6 +8418,7 @@ static int __init vmx_init(void)
}
#endif

+ vmx_init_early();
r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
__alignof__(struct vcpu_vmx), THIS_MODULE);
if (r)
@@ -8427,11 +8437,8 @@ static int __init vmx_init(void)
return r;
}

- for_each_possible_cpu(cpu) {
- INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
-
+ for_each_possible_cpu(cpu)
pi_init_cpu(cpu);
- }

#ifdef CONFIG_KEXEC_CORE
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d4f130a9f5c8..79a4988fd51f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1441,6 +1441,7 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
int kvm_arch_hardware_enable(void);
void kvm_arch_hardware_disable(void);
int kvm_arch_hardware_setup(void *opaque);
+int kvm_arch_post_hardware_enable_setup(void *opaque);
void kvm_arch_hardware_unsetup(void);
int kvm_arch_check_processor_compat(void);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a5bada53f1fe..cee799265ce6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4899,8 +4899,13 @@ static void hardware_enable_nolock(void *junk)

cpumask_set_cpu(cpu, cpus_hardware_enabled);

+ r = kvm_arch_check_processor_compat();
+ if (r)
+ goto out;
+
r = kvm_arch_hardware_enable();

+out:
if (r) {
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
atomic_inc(&hardware_enable_failed);
@@ -5697,9 +5702,9 @@ void kvm_unregister_perf_callbacks(void)
}
#endif

-static void check_processor_compat(void *rtn)
+__weak int kvm_arch_post_hardware_enable_setup(void *opaque)
{
- *(int *)rtn = kvm_arch_check_processor_compat();
+ return 0;
}

int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
@@ -5732,11 +5737,17 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
if (r < 0)
goto out_free_1;

- for_each_online_cpu(cpu) {
- smp_call_function_single(cpu, check_processor_compat, &r, 1);
- if (r < 0)
- goto out_free_2;
- }
+ /* hardware_enable_nolock() checks CPU compatibility on each CPUs. */
+ r = hardware_enable_all();
+ if (r)
+ goto out_free_2;
+ /*
+ * Arch specific initialization that requires to enable virtualization
+ * feature. e.g. TDX module initialization requires VMXON on all
+ * present CPUs.
+ */
+ kvm_arch_post_hardware_enable_setup(opaque);
+ hardware_disable_all();

r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
kvm_starting_cpu, kvm_dying_cpu);
--
2.25.1


2022-07-12 01:59:59

by Kai Huang

[permalink] [raw]
Subject: Re: [PATCH v7 003/102] KVM: Refactor CPU compatibility check on module initialiization

On Mon, 2022-06-27 at 14:52 -0700, [email protected] wrote:
> From: Isaku Yamahata <[email protected]>
>
> Although non-x86 arch doesn't break as long as I inspected code, it's by
> code inspection. This should be reviewed by each arch maintainers.
>
> kvm_init() checks CPU compatibility by calling
> kvm_arch_check_processor_compat() on all online CPUs. Move the callback
> to hardware_enable_nolock() and add hardware_enable_all() and
> hardware_disable_all().
> Add arch specific callback kvm_arch_post_hardware_enable_setup() for arch
> to do arch specific initialization that required hardware_enable_all().
> This makes a room for TDX module to initialize on kvm module loading. TDX
> module requires all online cpu to enable VMX by VMXON.
>
> If kvm_arch_hardware_enable/disable() depend on (*) part, such dependency
> must be called before kvm_init(). In fact kvm_intel() does. Although
> other arch doesn't as long as I checked as follows, it should be reviewed
> by each arch maintainers.
>
> Before this patch:
> - Arch module initialization
> - kvm_init()
> - kvm_arch_init()
> - kvm_arch_check_processor_compat() on each CPUs
> - post arch specific initialization ---- (*)
>
> - when creating/deleting first/last VM
> - kvm_arch_hardware_enable() on each CPUs --- (A)
> - kvm_arch_hardware_disable() on each CPUs --- (B)
>
> After this patch:
> - Arch module initialization
> - kvm_init()
> - kvm_arch_init()
> - kvm_arch_hardware_enable() on each CPUs (A)
> - kvm_arch_check_processor_compat() on each CPUs
> - kvm_arch_hardware_disable() on each CPUs (B)
> - post arch specific initialization --- (*)
>
> Code inspection result:
> (A)/(B) can depend on (*) before this patch. If there is dependency, such
> initialization must be moved before kvm_init() with this patch. VMX does
> in fact. As long as I inspected other archs and find only mips has it.
>
> - arch/mips/kvm/mips.c
> module init function, kvm_mips_init(), does some initialization after
> kvm_init(). Compile test only. Needs review.
>
> - arch/x86/kvm/x86.c
> - uses vm_list which is statically initialized.
> - static_call(kvm_x86_hardware_enable)();
> - SVM: (*) is empty.
> - VMX: needs fix
>
> - arch/powerpc/kvm/powerpc.c
> kvm_arch_hardware_enable/disable() are nop
>
> - arch/s390/kvm/kvm-s390.c
> kvm_arch_hardware_enable/disable() are nop
>
> - arch/arm64/kvm/arm.c
> module init function, arm_init(), calls only kvm_init().
> (*) is empty
>
> - arch/riscv/kvm/main.c
> module init function, riscv_kvm_init(), calls only kvm_init().
> (*) is empty
>
> Co-developed-by: Sean Christopherson <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
> Signed-off-by: Isaku Yamahata <[email protected]>
> ---
> arch/mips/kvm/mips.c | 12 +++++++-----
> arch/x86/kvm/vmx/vmx.c | 15 +++++++++++----
> include/linux/kvm_host.h | 1 +
> virt/kvm/kvm_main.c | 25 ++++++++++++++++++-------
> 4 files changed, 37 insertions(+), 16 deletions(-)
>
> diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
> index 092d09fb6a7e..17228584485d 100644
> --- a/arch/mips/kvm/mips.c
> +++ b/arch/mips/kvm/mips.c
> @@ -1643,11 +1643,6 @@ static int __init kvm_mips_init(void)
> }
>
> ret = kvm_mips_entry_setup();
> - if (ret)
> - return ret;
> -
> - ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
> -
> if (ret)
> return ret;
>
> @@ -1656,6 +1651,13 @@ static int __init kvm_mips_init(void)
>
> register_die_notifier(&kvm_mips_csr_die_notifier);
>
> + ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
> +
> + if (ret) {
> + unregister_die_notifier(&kvm_mips_csr_die_notifier);
> + return ret;
> + }
> +
> return 0;
> }
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 31e7630203fd..d3b68a6dec48 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -8372,6 +8372,15 @@ static void vmx_exit(void)
> }
> module_exit(vmx_exit);
>
> +/* initialize before kvm_init() so that hardware_enable/disable() can work. */
> +static void __init vmx_init_early(void)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu)
> + INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> +}
> +
> static int __init vmx_init(void)
> {
> int r, cpu;
> @@ -8409,6 +8418,7 @@ static int __init vmx_init(void)
> }
> #endif
>
> + vmx_init_early();
> r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
> __alignof__(struct vcpu_vmx), THIS_MODULE);
> if (r)
> @@ -8427,11 +8437,8 @@ static int __init vmx_init(void)
> return r;
> }
>
> - for_each_possible_cpu(cpu) {
> - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> -
> + for_each_possible_cpu(cpu)
> pi_init_cpu(cpu);
> - }
>
> #ifdef CONFIG_KEXEC_CORE
> rcu_assign_pointer(crash_vmclear_loaded_vmcss,
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index d4f130a9f5c8..79a4988fd51f 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1441,6 +1441,7 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
> int kvm_arch_hardware_enable(void);
> void kvm_arch_hardware_disable(void);
> int kvm_arch_hardware_setup(void *opaque);
> +int kvm_arch_post_hardware_enable_setup(void *opaque);
> void kvm_arch_hardware_unsetup(void);
> int kvm_arch_check_processor_compat(void);
> int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index a5bada53f1fe..cee799265ce6 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -4899,8 +4899,13 @@ static void hardware_enable_nolock(void *junk)
>
> cpumask_set_cpu(cpu, cpus_hardware_enabled);
>
> + r = kvm_arch_check_processor_compat();
> + if (r)
> + goto out;
> +
> r = kvm_arch_hardware_enable();
>
> +out:
> if (r) {
> cpumask_clear_cpu(cpu, cpus_hardware_enabled);
> atomic_inc(&hardware_enable_failed);
> @@ -5697,9 +5702,9 @@ void kvm_unregister_perf_callbacks(void)
> }
> #endif
>
> -static void check_processor_compat(void *rtn)
> +__weak int kvm_arch_post_hardware_enable_setup(void *opaque)
> {
> - *(int *)rtn = kvm_arch_check_processor_compat();
> + return 0;
> }
>
> int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
> @@ -5732,11 +5737,17 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
> if (r < 0)
> goto out_free_1;
>
> - for_each_online_cpu(cpu) {
> - smp_call_function_single(cpu, check_processor_compat, &r, 1);
> - if (r < 0)
> - goto out_free_2;
> - }
> + /* hardware_enable_nolock() checks CPU compatibility on each CPUs. */
> + r = hardware_enable_all();
> + if (r)
> + goto out_free_2;
> + /*
> + * Arch specific initialization that requires to enable virtualization
> + * feature. e.g. TDX module initialization requires VMXON on all
> + * present CPUs.
> + */
> + kvm_arch_post_hardware_enable_setup(opaque);

Please see my reply to your patch "KVM: TDX: Initialize TDX module when loading
kvm_intel.ko".

The introduce of __weak kvm_arch_post_hardware_enable_setup() should be in that
patch since it has nothing to do the job you claimed to do in this patch.

And by removing it, this patch can be taken out of TDX series and upstreamed
separately.

> + hardware_disable_all();
>
> r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
> kvm_starting_cpu, kvm_dying_cpu);

2022-07-13 03:31:37

by Kai Huang

[permalink] [raw]
Subject: Re: [PATCH v7 003/102] KVM: Refactor CPU compatibility check on module initialiization

On Mon, 2022-06-27 at 14:52 -0700, [email protected] wrote:
> From: Isaku Yamahata <[email protected]>
>
> Although non-x86 arch doesn't break as long as I inspected code, it's by
> code inspection. This should be reviewed by each arch maintainers.

This first paragraph doesn't make sense to me. At this moment, we don't know
why you need this patch at all.

>
> kvm_init() checks CPU compatibility by calling
> kvm_arch_check_processor_compat() on all online CPUs.  
>

What's the problem here which requires you need to do ..

> Move the callback
> to hardware_enable_nolock() and add hardware_enable_all() and
> hardware_disable_all().

.. this?


> Add arch specific callback kvm_arch_post_hardware_enable_setup() for arch
> to do arch specific initialization that required hardware_enable_all().
> This makes a room for TDX module to initialize on kvm module loading. TDX
> module requires all online cpu to enable VMX by VMXON.

So to me this is the reason why you need to do hardware_enable_all() in
kvm_init(). There's nothing wrong with "kvm_init() checks CPU compatibility by
calling kvm_arch_check_processor_compat() on all online CPUs", right?

In this case, shouldn't we say something like "opportunistically move
kvm_arch_check_processor_compat() to hardware_enable_nolock()" because this is
not the reason that you want this patch, correct?

Also, maybe I am missing something obviously, but why do you need to do
hardware_disable_all() right after hardware_enable_all() in kvm_init()? Could
you at least put some explanation in the changelog?

And again, it's better to add one sentence or so to explain why do you want to
init TDX module during module loading time.

>
> If kvm_arch_hardware_enable/disable() depend on (*) part, such dependency
> must be called before kvm_init().  
>

I don't follow the logic here.  If kvm_arch_hardware_enable() depends on
something, then you need to put kvm_arch_hardware_enable() after that, or move
that forward. But why such dependency must be called "before kvm_init()"?

Also, I think you are talking about the problem that _after_ you move
hardware_enable_all() to kvm_init(), but not problem in existing code, right?


> In fact kvm_intel() does.  
>

No such function kvm_intel().

Again, what's the issue here? Can you add more sentences to explain the
_problem_, or _why_?


> Although
> other arch doesn't as long as I checked as follows, it should be reviewed
> by each arch maintainers.
>
> Before this patch:
> - Arch module initialization
> - kvm_init()
> - kvm_arch_init()
> - kvm_arch_check_processor_compat() on each CPUs
> - post arch specific initialization ---- (*)
>
> - when creating/deleting first/last VM
> - kvm_arch_hardware_enable() on each CPUs --- (A)
> - kvm_arch_hardware_disable() on each CPUs --- (B)
>
> After this patch:
> - Arch module initialization
> - kvm_init()
> - kvm_arch_init()
> - kvm_arch_hardware_enable() on each CPUs (A)
> - kvm_arch_check_processor_compat() on each CPUs

Even with this patch, unless I am seeing mistakenly, kvm_arch_hardware_enable()
is called _after_ kvm_arch_check_processor_compat().

> - kvm_arch_hardware_disable() on each CPUs (B)
> - post arch specific initialization --- (*)
>
> Code inspection result:
> (A)/(B) can depend on (*) before this patch.  If there is dependency, such
> initialization must be moved before kvm_init() with this patch.  
>

Must be moved to before (A)/(B), right?

> VMX does
> in fact.  
>

More details will help, please.

> As long as I inspected other archs and find only mips has it.
>
> - arch/mips/kvm/mips.c
> module init function, kvm_mips_init(), does some initialization after
> kvm_init(). Compile test only. Needs review.

Is "Needs review" changelog material?

>
> - arch/x86/kvm/x86.c
> - uses vm_list which is statically initialized.

I can hardly see how "vm_list is statically initialized" is causing any problem
here. Exactly what's the problem here??

> - static_call(kvm_x86_hardware_enable)();
> - SVM: (*) is empty.
> - VMX: needs fix

What's the problem, and how are you going to fix? Shouldn't this be in
changelog?

>
> - arch/powerpc/kvm/powerpc.c
> kvm_arch_hardware_enable/disable() are nop
>
> - arch/s390/kvm/kvm-s390.c
> kvm_arch_hardware_enable/disable() are nop
>
> - arch/arm64/kvm/arm.c
> module init function, arm_init(), calls only kvm_init().
> (*) is empty
>
> - arch/riscv/kvm/main.c
> module init function, riscv_kvm_init(), calls only kvm_init().
> (*) is empty
>
> Co-developed-by: Sean Christopherson <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
> Signed-off-by: Isaku Yamahata <[email protected]>
> ---
> arch/mips/kvm/mips.c | 12 +++++++-----
> arch/x86/kvm/vmx/vmx.c | 15 +++++++++++----
> include/linux/kvm_host.h | 1 +
> virt/kvm/kvm_main.c | 25 ++++++++++++++++++-------
> 4 files changed, 37 insertions(+), 16 deletions(-)
>
> diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
> index 092d09fb6a7e..17228584485d 100644
> --- a/arch/mips/kvm/mips.c
> +++ b/arch/mips/kvm/mips.c
> @@ -1643,11 +1643,6 @@ static int __init kvm_mips_init(void)
> }
>
> ret = kvm_mips_entry_setup();
> - if (ret)
> - return ret;
> -
> - ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
> -
> if (ret)
> return ret;
>
> @@ -1656,6 +1651,13 @@ static int __init kvm_mips_init(void)
>
> register_die_notifier(&kvm_mips_csr_die_notifier);
>
> + ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
> +
> + if (ret) {
> + unregister_die_notifier(&kvm_mips_csr_die_notifier);
> + return ret;
> + }
> +

I don't understand how moving "hardware_enable_all()/hardware_disable_all()" to
kvm_init() is related to this change.

Anyway, at least some comments?

> return 0;
> }
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 31e7630203fd..d3b68a6dec48 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -8372,6 +8372,15 @@ static void vmx_exit(void)
> }
> module_exit(vmx_exit);
>
> +/* initialize before kvm_init() so that hardware_enable/disable() can work. */

There's no function named hardware_enable() or hardware_disable().

> +static void __init vmx_init_early(void)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu)
> + INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> +}
> +

Perhaps I am missing something, but I couldn't see why this must be done before
kvm_init(). Please give some comments?

> static int __init vmx_init(void)
> {
> int r, cpu;
> @@ -8409,6 +8418,7 @@ static int __init vmx_init(void)
> }
> #endif
>
> + vmx_init_early();
> r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
> __alignof__(struct vcpu_vmx), THIS_MODULE);
> if (r)
> @@ -8427,11 +8437,8 @@ static int __init vmx_init(void)
> return r;
> }
>
> - for_each_possible_cpu(cpu) {
> - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> -
> + for_each_possible_cpu(cpu)
> pi_init_cpu(cpu);
> - }
>
> #ifdef CONFIG_KEXEC_CORE
> rcu_assign_pointer(crash_vmclear_loaded_vmcss,
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index d4f130a9f5c8..79a4988fd51f 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1441,6 +1441,7 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
> int kvm_arch_hardware_enable(void);
> void kvm_arch_hardware_disable(void);
> int kvm_arch_hardware_setup(void *opaque);
> +int kvm_arch_post_hardware_enable_setup(void *opaque);
> void kvm_arch_hardware_unsetup(void);
> int kvm_arch_check_processor_compat(void);
> int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index a5bada53f1fe..cee799265ce6 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -4899,8 +4899,13 @@ static void hardware_enable_nolock(void *junk)
>
> cpumask_set_cpu(cpu, cpus_hardware_enabled);
>
> + r = kvm_arch_check_processor_compat();
> + if (r)
> + goto out;
> +
> r = kvm_arch_hardware_enable();
>
> +out:
> if (r) {
> cpumask_clear_cpu(cpu, cpus_hardware_enabled);
> atomic_inc(&hardware_enable_failed);
> @@ -5697,9 +5702,9 @@ void kvm_unregister_perf_callbacks(void)
> }
> #endif
>
> -static void check_processor_compat(void *rtn)
> +__weak int kvm_arch_post_hardware_enable_setup(void *opaque)
> {
> - *(int *)rtn = kvm_arch_check_processor_compat();
> + return 0;
> }
>
> int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
> @@ -5732,11 +5737,17 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
> if (r < 0)
> goto out_free_1;
>
> - for_each_online_cpu(cpu) {
> - smp_call_function_single(cpu, check_processor_compat, &r, 1);
> - if (r < 0)
> - goto out_free_2;
> - }
> + /* hardware_enable_nolock() checks CPU compatibility on each CPUs. */
> + r = hardware_enable_all();
> + if (r)
> + goto out_free_2;
> + /*
> + * Arch specific initialization that requires to enable virtualization
> + * feature. e.g. TDX module initialization requires VMXON on all
> + * present CPUs.
> + */
> + kvm_arch_post_hardware_enable_setup(opaque);

So after digging history and looking at the code again, I guess perhaps it's
also fine to introduce this __weak version here (since you have given the reason
to do so in changelog), but in this way perhaps it's better to put this patch
and the patch to load TDX module closely so it's easier to review.

Or to me it's also fine to move this chunk to the patch to init TDX module as I
replied before.

> + hardware_disable_all();

IMO needs a comment why do we need to do hardware_disable_all() here. It
doesn't make a lot sense to me.

>
> r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
> kvm_starting_cpu, kvm_dying_cpu);

2022-07-13 03:33:47

by Kai Huang

[permalink] [raw]
Subject: Re: [PATCH v7 003/102] KVM: Refactor CPU compatibility check on module initialiization


> > + /* hardware_enable_nolock() checks CPU compatibility on each CPUs. */
> > + r = hardware_enable_all();
> > + if (r)
> > + goto out_free_2;
> > + /*
> > + * Arch specific initialization that requires to enable virtualization
> > + * feature. e.g. TDX module initialization requires VMXON on all
> > + * present CPUs.
> > + */
> > + kvm_arch_post_hardware_enable_setup(opaque);
>
> Please see my reply to your patch "KVM: TDX: Initialize TDX module when loading
> kvm_intel.ko".
>
> The introduce of __weak kvm_arch_post_hardware_enable_setup() should be in that
> patch since it has nothing to do the job you claimed to do in this patch.
>
> And by removing it, this patch can be taken out of TDX series and upstreamed
> separately.

I tried to dig more about the history. Please see my another reply and ignore
this.

2022-07-27 22:41:45

by Isaku Yamahata

[permalink] [raw]
Subject: Re: [PATCH v7 003/102] KVM: Refactor CPU compatibility check on module initialiization

Here is the updated version.

commit 7d042749b631f668ed9e99044228f16c212161bc
Author: Isaku Yamahata <[email protected]>
Date: Fri Apr 22 16:56:51 2022 -0700

KVM: Refactor CPU compatibility check on module initialization

TDX module requires its initialization. It requires VMX to be enabled.
Although there are several options of when to initialize it, the choice is
the initialization time of the KVM kernel module. There is no usable
arch-specific hook for the TDX module to utilize during the KVM kernel module
initialization. The code doesn't enable/disable hardware (VMX in TDX case)
during the kernel module initialization. Add a hook for enabling hardware,
arch-specific initialization, and disabling hardware during KVM kernel
module initialization to make a room for TDX module initialization. The
current KVM enables hardware when the first VM is created and disables
hardware when the last VM is destroyed. When no VM is running, hardware is
disabled. To follow these semantics, the kernel module initialization needs
to disable hardware. Opportunistically refactor the code to enable/disable
hardware.

Add hadware_enable_all() and hardware_disable_all() to kvm_init() and
introduce a new arch-specific callback function,
kvm_arch_post_hardware_enable_setup, for arch to do arch-specific
initialization that requires hardware_enable_all(). Opportunistically,
move kvm_arch_check_processor_compat() to to hardware_enabled_nolock().
TDX module initialization code will go into
kvm_arch_post_hardware_enable_setup().

This patch reorders some function calls as below from (*) (**) (A) and (B)
to (A) (B) and (*). Here (A) and (B) depends on (*), but not (**). By
code inspection, only mips and VMX has the code of (*). No other
arch has empty (*). So refactor mips and VMX and eliminate the
necessity hook for (*) instead of adding an unused hook.

Before this patch:
- Arch module initialization
- kvm_init()
- kvm_arch_init()
- kvm_arch_check_processor_compat() on each CPUs
- post-arch-specific initialization -- (*): (A) and (B) depends on this
- post-arch-specific initialization -- (**): no dependency to (A) and (B)

- When creating/deleting the first/last VM
- kvm_arch_hardware_enable() on each CPUs -- (A)
- kvm_arch_hardware_disable() on each CPUs -- (B)

After this patch:
- Arch module initialization
- kvm_init()
- kvm_arch_init()
- arch-specific initialization -- (*)
- kvm_arch_check_processor_compat() on each CPUs
- kvm_arch_hardware_enable() on each CPUs -- (A)
- kvm_arch_hardware_disable() on each CPUs -- (B)
- post-arch-specific initialization -- (**)

- When creating/deleting the first/last VM (no logic change)
- kvm_arch_hardware_enable() on each CPUs -- (A)
- kvm_arch_hardware_disable() on each CPUs -- (B)

Code inspection result:
As long as I inspected, I found only mips and VMX have non-empty (*) or
non-empty (A) or (B).
x86: tested on a real machine
mips: compile test only
powerpc, s390, arm, riscv: code inspection only

- arch/mips/kvm/mips.c
module init function, kvm_mips_init(), does some initialization after
kvm_init(). Compile test only.

- arch/x86/kvm/x86.c
- uses vm_list which is statically initialized.
- static_call(kvm_x86_hardware_enable)();
- SVM: (*) and (**) are empty.
- VMX: initialize percpu variable loaded_vmcss_on_cpu that VMXON uses.

- arch/powerpc/kvm/powerpc.c
kvm_arch_hardware_enable/disable() are nop

- arch/s390/kvm/kvm-s390.c
kvm_arch_hardware_enable/disable() are nop

- arch/arm64/kvm/arm.c
module init function, arm_init(), calls only kvm_init().
(*) and (**) are empty

- arch/riscv/kvm/main.c
module init function, riscv_kvm_init(), calls only kvm_init().
(*) and (**) are empty

Co-developed-by: Sean Christopherson <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
Signed-off-by: Isaku Yamahata <[email protected]>

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 092d09fb6a7e..fd7339cff57c 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1642,12 +1642,11 @@ static int __init kvm_mips_init(void)
return -EOPNOTSUPP;
}

+ /*
+ * kvm_init() calls kvm_arch_hardware_enable/disable(). The early
+ * initialization is needed before calling kvm_init().
+ */
ret = kvm_mips_entry_setup();
- if (ret)
- return ret;
-
- ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
-
if (ret)
return ret;

@@ -1656,6 +1655,13 @@ static int __init kvm_mips_init(void)

register_die_notifier(&kvm_mips_csr_die_notifier);

+ ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+
+ if (ret) {
+ unregister_die_notifier(&kvm_mips_csr_die_notifier);
+ return ret;
+ }
+
return 0;
}

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 111e0c42479a..5c59b4ea6524 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8442,6 +8442,23 @@ static void vmx_exit(void)
}
module_exit(vmx_exit);

+/*
+ * Early initialization before kvm_init() so that vmx_hardware_enable/disable()
+ * can work.
+ */
+static void __init vmx_init_early(void)
+{
+ int cpu;
+
+ /*
+ * vmx_hardware_disable() accesses loaded_vmcss_on_cpu list.
+ * Initialize the variable before kvm_init() that calls
+ * vmx_hardware_enable/disable().
+ */
+ for_each_possible_cpu(cpu)
+ INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+}
+
static int __init vmx_init(void)
{
int r, cpu;
@@ -8479,6 +8496,7 @@ static int __init vmx_init(void)
}
#endif

+ vmx_init_early();
r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
__alignof__(struct vcpu_vmx), THIS_MODULE);
if (r)
@@ -8499,11 +8517,8 @@ static int __init vmx_init(void)

vmx_setup_fb_clear_ctrl();

- for_each_possible_cpu(cpu) {
- INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
-
+ for_each_possible_cpu(cpu)
pi_init_cpu(cpu);
- }

#ifdef CONFIG_KEXEC_CORE
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d4f130a9f5c8..79a4988fd51f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1441,6 +1441,7 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
int kvm_arch_hardware_enable(void);
void kvm_arch_hardware_disable(void);
int kvm_arch_hardware_setup(void *opaque);
+int kvm_arch_post_hardware_enable_setup(void *opaque);
void kvm_arch_hardware_unsetup(void);
int kvm_arch_check_processor_compat(void);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a5bada53f1fe..51b8ac5faca5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4899,8 +4899,13 @@ static void hardware_enable_nolock(void *junk)

cpumask_set_cpu(cpu, cpus_hardware_enabled);

+ r = kvm_arch_check_processor_compat();
+ if (r)
+ goto out;
+
r = kvm_arch_hardware_enable();

+out:
if (r) {
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
atomic_inc(&hardware_enable_failed);
@@ -5697,9 +5702,9 @@ void kvm_unregister_perf_callbacks(void)
}
#endif

-static void check_processor_compat(void *rtn)
+__weak int kvm_arch_post_hardware_enable_setup(void *opaque)
{
- *(int *)rtn = kvm_arch_check_processor_compat();
+ return 0;
}

int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
@@ -5732,11 +5737,23 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
if (r < 0)
goto out_free_1;

- for_each_online_cpu(cpu) {
- smp_call_function_single(cpu, check_processor_compat, &r, 1);
- if (r < 0)
- goto out_free_2;
- }
+ /* hardware_enable_nolock() checks CPU compatibility on each CPUs. */
+ r = hardware_enable_all();
+ if (r)
+ goto out_free_2;
+ /*
+ * Arch specific initialization that requires to enable virtualization
+ * feature. e.g. TDX module initialization requires VMXON on all
+ * present CPUs.
+ */
+ kvm_arch_post_hardware_enable_setup(opaque);
+ /*
+ * Make hardware disabled after the KVM module initialization. KVM
+ * enables hardware when the first KVM VM is created and disables
+ * hardware when the last KVM VM is destroyed. When no KVM VM is
+ * running, hardware is disabled. Keep that semantics.
+ */
+ hardware_disable_all();

r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
kvm_starting_cpu, kvm_dying_cpu);

--
Isaku Yamahata <[email protected]>