From: Isaku Yamahata <[email protected]>
Currently, KVM VMX module initialization/exit functions are a single
function each. Refactor KVM VMX module initialization functions into KVM
common part and VMX part so that TDX specific part can be added cleanly.
Opportunistically refactor module exit function as well.
The current module initialization flow is, 1.) calculate the sizes of VMX
kvm structure and VMX vcpu structure, 2.) report those sizes to the KVM
common layer and KVM common initialization, and 3.) VMX specific
system-wide initialization.
Refactor the KVM VMX module initialization function into functions with a
wrapper function to separate VMX logic in vmx.c from a file, main.c, common
among VMX and TDX. We have a wrapper function,
"vt_init() {vmx_pre_kvm_init(); kvm_init(); vmx_init(); }" in main.c, and
vmx_pre_kvm_init() and vmx_init() in vmx.c. vmx_pre_kvm_init() calculates
the sizes of VMX kvm structure and KVM vcpu structure, kvm_init() does
system-wide initialization of the KVM common layer, and vmx_init() does
system-wide VMX initialization.
The KVM architecture common layer allocates struct kvm with reported size
for architecture-specific code. The KVM VMX module defines its structure
as struct vmx_kvm { struct kvm; VMX specific members;} and uses it as
struct vmx kvm. Similar for vcpu structure. TDX KVM patches will define
TDX specific kvm and vcpu structures, add tdx_pre_kvm_init() to report the
sizes of them to the KVM common layer.
The current module exit function is also a single function, a combination
of VMX specific logic and common KVM logic. Refactor it into VMX specific
logic and KVM common logic. This is just refactoring to keep the VMX
specific logic in vmx.c from main.c.
Signed-off-by: Isaku Yamahata <[email protected]>
---
arch/x86/kvm/vmx/main.c | 33 +++++++++++++
arch/x86/kvm/vmx/vmx.c | 97 +++++++++++++++++++-------------------
arch/x86/kvm/vmx/x86_ops.h | 5 +-
3 files changed, 86 insertions(+), 49 deletions(-)
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index b79fcc8d81dd..8ff13c7881f2 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -165,3 +165,36 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
.runtime_ops = &vt_x86_ops,
};
+
+static int __init vt_init(void)
+{
+ unsigned int vcpu_size = 0, vcpu_align = 0;
+ int r;
+
+ vmx_pre_kvm_init(&vcpu_size, &vcpu_align);
+
+ r = kvm_init(&vt_init_ops, vcpu_size, vcpu_align, THIS_MODULE);
+ if (r)
+ goto err_vmx_post_exit;
+
+ r = vmx_init();
+ if (r)
+ goto err_kvm_exit;
+
+ return 0;
+
+err_kvm_exit:
+ kvm_exit();
+err_vmx_post_exit:
+ vmx_post_kvm_exit();
+ return r;
+}
+module_init(vt_init);
+
+static void vt_exit(void)
+{
+ vmx_exit();
+ kvm_exit();
+ vmx_post_kvm_exit();
+}
+module_exit(vt_exit);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f6f5d0dac579..7838cd177f0e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7929,47 +7929,12 @@ static void vmx_cleanup_l1d_flush(void)
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
}
-static void vmx_exit(void)
+void __init vmx_pre_kvm_init(unsigned int *vcpu_size, unsigned int *vcpu_align)
{
-#ifdef CONFIG_KEXEC_CORE
- RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
- synchronize_rcu();
-#endif
-
- kvm_exit();
-
-#if IS_ENABLED(CONFIG_HYPERV)
- if (static_branch_unlikely(&enable_evmcs)) {
- int cpu;
- struct hv_vp_assist_page *vp_ap;
- /*
- * Reset everything to support using non-enlightened VMCS
- * access later (e.g. when we reload the module with
- * enlightened_vmcs=0)
- */
- for_each_online_cpu(cpu) {
- vp_ap = hv_get_vp_assist_page(cpu);
-
- if (!vp_ap)
- continue;
-
- vp_ap->nested_control.features.directhypercall = 0;
- vp_ap->current_nested_vmcs = 0;
- vp_ap->enlighten_vmentry = 0;
- }
-
- static_branch_disable(&enable_evmcs);
- }
-#endif
- vmx_cleanup_l1d_flush();
-
- allow_smaller_maxphyaddr = false;
-}
-module_exit(vmx_exit);
-
-static int __init vmx_init(void)
-{
- int r, cpu;
+ if (sizeof(struct vcpu_vmx) > *vcpu_size)
+ *vcpu_size = sizeof(struct vcpu_vmx);
+ if (__alignof__(struct vcpu_vmx) > *vcpu_align)
+ *vcpu_align = __alignof__(struct vcpu_vmx);
#if IS_ENABLED(CONFIG_HYPERV)
/*
@@ -8004,11 +7969,38 @@ static int __init vmx_init(void)
enlightened_vmcs = false;
}
#endif
+}
- r = kvm_init(&vt_init_ops, sizeof(struct vcpu_vmx),
- __alignof__(struct vcpu_vmx), THIS_MODULE);
- if (r)
- return r;
+void vmx_post_kvm_exit(void)
+{
+#if IS_ENABLED(CONFIG_HYPERV)
+ if (static_branch_unlikely(&enable_evmcs)) {
+ int cpu;
+ struct hv_vp_assist_page *vp_ap;
+ /*
+ * Reset everything to support using non-enlightened VMCS
+ * access later (e.g. when we reload the module with
+ * enlightened_vmcs=0)
+ */
+ for_each_online_cpu(cpu) {
+ vp_ap = hv_get_vp_assist_page(cpu);
+
+ if (!vp_ap)
+ continue;
+
+ vp_ap->nested_control.features.directhypercall = 0;
+ vp_ap->current_nested_vmcs = 0;
+ vp_ap->enlighten_vmentry = 0;
+ }
+
+ static_branch_disable(&enable_evmcs);
+ }
+#endif
+}
+
+int __init vmx_init(void)
+{
+ int r, cpu;
/*
* Must be called after kvm_init() so enable_ept is properly set
@@ -8018,10 +8010,8 @@ static int __init vmx_init(void)
* mitigation mode.
*/
r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
- if (r) {
- vmx_exit();
+ if (r)
return r;
- }
for_each_possible_cpu(cpu) {
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
@@ -8045,4 +8035,15 @@ static int __init vmx_init(void)
return 0;
}
-module_init(vmx_init);
+
+void vmx_exit(void)
+{
+#ifdef CONFIG_KEXEC_CORE
+ RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
+ synchronize_rcu();
+#endif
+
+ vmx_cleanup_l1d_flush();
+
+ allow_smaller_maxphyaddr = false;
+}
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index ccf98e79d8c3..7da541e1c468 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -8,7 +8,10 @@
#include "x86.h"
-extern struct kvm_x86_init_ops vt_init_ops __initdata;
+void __init vmx_pre_kvm_init(unsigned int *vcpu_size, unsigned int *vcpu_align);
+int __init vmx_init(void);
+void vmx_exit(void);
+void vmx_post_kvm_exit(void);
__init int vmx_cpu_has_kvm_support(void);
__init int vmx_disabled_by_bios(void);
--
2.25.1
On 3/4/22 20:48, [email protected] wrote:
> From: Isaku Yamahata <[email protected]>
>
> Currently, KVM VMX module initialization/exit functions are a single
> function each. Refactor KVM VMX module initialization functions into KVM
> common part and VMX part so that TDX specific part can be added cleanly.
> Opportunistically refactor module exit function as well.
>
> The current module initialization flow is, 1.) calculate the sizes of VMX
> kvm structure and VMX vcpu structure, 2.) report those sizes to the KVM
> common layer and KVM common initialization, and 3.) VMX specific
> system-wide initialization.
>
> Refactor the KVM VMX module initialization function into functions with a
> wrapper function to separate VMX logic in vmx.c from a file, main.c, common
> among VMX and TDX. We have a wrapper function,
> "vt_init() {vmx_pre_kvm_init(); kvm_init(); vmx_init(); }" in main.c, and
> vmx_pre_kvm_init() and vmx_init() in vmx.c. vmx_pre_kvm_init() calculates
> the sizes of VMX kvm structure and KVM vcpu structure, kvm_init() does
> system-wide initialization of the KVM common layer, and vmx_init() does
> system-wide VMX initialization.
>
> The KVM architecture common layer allocates struct kvm with reported size
> for architecture-specific code. The KVM VMX module defines its structure
> as struct vmx_kvm { struct kvm; VMX specific members;} and uses it as
> struct vmx kvm. Similar for vcpu structure. TDX KVM patches will define
> TDX specific kvm and vcpu structures, add tdx_pre_kvm_init() to report the
> sizes of them to the KVM common layer.
>
> The current module exit function is also a single function, a combination
> of VMX specific logic and common KVM logic. Refactor it into VMX specific
> logic and KVM common logic. This is just refactoring to keep the VMX
> specific logic in vmx.c from main.c.
>
> Signed-off-by: Isaku Yamahata <[email protected]>
> ---
> arch/x86/kvm/vmx/main.c | 33 +++++++++++++
> arch/x86/kvm/vmx/vmx.c | 97 +++++++++++++++++++-------------------
> arch/x86/kvm/vmx/x86_ops.h | 5 +-
> 3 files changed, 86 insertions(+), 49 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index b79fcc8d81dd..8ff13c7881f2 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -165,3 +165,36 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
>
> .runtime_ops = &vt_x86_ops,
> };
> +
> +static int __init vt_init(void)
> +{
> + unsigned int vcpu_size = 0, vcpu_align = 0;
> + int r;
> +
> + vmx_pre_kvm_init(&vcpu_size, &vcpu_align);
> +
> + r = kvm_init(&vt_init_ops, vcpu_size, vcpu_align, THIS_MODULE);
> + if (r)
> + goto err_vmx_post_exit;
> +
> + r = vmx_init();
> + if (r)
> + goto err_kvm_exit;
> +
> + return 0;
> +
> +err_kvm_exit:
> + kvm_exit();
> +err_vmx_post_exit:
> + vmx_post_kvm_exit();
> + return r;
> +}
> +module_init(vt_init);
> +
> +static void vt_exit(void)
> +{
> + vmx_exit();
> + kvm_exit();
> + vmx_post_kvm_exit();
> +}
> +module_exit(vt_exit);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index f6f5d0dac579..7838cd177f0e 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7929,47 +7929,12 @@ static void vmx_cleanup_l1d_flush(void)
> l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
> }
>
> -static void vmx_exit(void)
> +void __init vmx_pre_kvm_init(unsigned int *vcpu_size, unsigned int *vcpu_align)
> {
> -#ifdef CONFIG_KEXEC_CORE
> - RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
> - synchronize_rcu();
> -#endif
> -
> - kvm_exit();
> -
> -#if IS_ENABLED(CONFIG_HYPERV)
> - if (static_branch_unlikely(&enable_evmcs)) {
> - int cpu;
> - struct hv_vp_assist_page *vp_ap;
> - /*
> - * Reset everything to support using non-enlightened VMCS
> - * access later (e.g. when we reload the module with
> - * enlightened_vmcs=0)
> - */
> - for_each_online_cpu(cpu) {
> - vp_ap = hv_get_vp_assist_page(cpu);
> -
> - if (!vp_ap)
> - continue;
> -
> - vp_ap->nested_control.features.directhypercall = 0;
> - vp_ap->current_nested_vmcs = 0;
> - vp_ap->enlighten_vmentry = 0;
> - }
> -
> - static_branch_disable(&enable_evmcs);
> - }
> -#endif
> - vmx_cleanup_l1d_flush();
> -
> - allow_smaller_maxphyaddr = false;
> -}
> -module_exit(vmx_exit);
> -
> -static int __init vmx_init(void)
> -{
> - int r, cpu;
> + if (sizeof(struct vcpu_vmx) > *vcpu_size)
> + *vcpu_size = sizeof(struct vcpu_vmx);
> + if (__alignof__(struct vcpu_vmx) > *vcpu_align)
> + *vcpu_align = __alignof__(struct vcpu_vmx);
Please keep these four lines in vt_init, and rename the rest of
vmx_pre_kvm_init to hv_vp_assist_page_init. Likewise, rename
vmx_post_kvm_exit to hv_vp_assist_page_exit.
Adjusting the vcpu_size and vcpu_align for TDX (I guess) can be added
later when TDX ops are introduced.
Paolo
>
> #if IS_ENABLED(CONFIG_HYPERV)
> /*
> @@ -8004,11 +7969,38 @@ static int __init vmx_init(void)
> enlightened_vmcs = false;
> }
> #endif
> +}
>
> - r = kvm_init(&vt_init_ops, sizeof(struct vcpu_vmx),
> - __alignof__(struct vcpu_vmx), THIS_MODULE);
> - if (r)
> - return r;
> +void vmx_post_kvm_exit(void)
> +{
> +#if IS_ENABLED(CONFIG_HYPERV)
> + if (static_branch_unlikely(&enable_evmcs)) {
> + int cpu;
> + struct hv_vp_assist_page *vp_ap;
> + /*
> + * Reset everything to support using non-enlightened VMCS
> + * access later (e.g. when we reload the module with
> + * enlightened_vmcs=0)
> + */
> + for_each_online_cpu(cpu) {
> + vp_ap = hv_get_vp_assist_page(cpu);
> +
> + if (!vp_ap)
> + continue;
> +
> + vp_ap->nested_control.features.directhypercall = 0;
> + vp_ap->current_nested_vmcs = 0;
> + vp_ap->enlighten_vmentry = 0;
> + }
> +
> + static_branch_disable(&enable_evmcs);
> + }
> +#endif
> +}
> +
> +int __init vmx_init(void)
> +{
> + int r, cpu;
>
> /*
> * Must be called after kvm_init() so enable_ept is properly set
> @@ -8018,10 +8010,8 @@ static int __init vmx_init(void)
> * mitigation mode.
> */
> r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
> - if (r) {
> - vmx_exit();
> + if (r)
> return r;
> - }
>
> for_each_possible_cpu(cpu) {
> INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> @@ -8045,4 +8035,15 @@ static int __init vmx_init(void)
>
> return 0;
> }
> -module_init(vmx_init);
> +
> +void vmx_exit(void)
> +{
> +#ifdef CONFIG_KEXEC_CORE
> + RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
> + synchronize_rcu();
> +#endif
> +
> + vmx_cleanup_l1d_flush();
> +
> + allow_smaller_maxphyaddr = false;
> +}
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index ccf98e79d8c3..7da541e1c468 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -8,7 +8,10 @@
>
> #include "x86.h"
>
> -extern struct kvm_x86_init_ops vt_init_ops __initdata;
> +void __init vmx_pre_kvm_init(unsigned int *vcpu_size, unsigned int *vcpu_align);
> +int __init vmx_init(void);
> +void vmx_exit(void);
> +void vmx_post_kvm_exit(void);
>
> __init int vmx_cpu_has_kvm_support(void);
> __init int vmx_disabled_by_bios(void);
Thanks for review.
On Sun, Mar 13, 2022 at 02:54:15PM +0100,
Paolo Bonzini <[email protected]> wrote:
> On 3/4/22 20:48, [email protected] wrote:
> > -static int __init vmx_init(void)
> > -{
> > - int r, cpu;
> > + if (sizeof(struct vcpu_vmx) > *vcpu_size)
> > + *vcpu_size = sizeof(struct vcpu_vmx);
> > + if (__alignof__(struct vcpu_vmx) > *vcpu_align)
> > + *vcpu_align = __alignof__(struct vcpu_vmx);
>
> Please keep these four lines in vt_init, and rename the rest of
> vmx_pre_kvm_init to hv_vp_assist_page_init. Likewise, rename
> vmx_post_kvm_exit to hv_vp_assist_page_exit.
>
> Adjusting the vcpu_size and vcpu_align for TDX (I guess) can be added later
> when TDX ops are introduced.
sure. I'll make it like
vcpu_size = max(sizeof(struct vcpu_vmx), sizeof(vcpu_tdx));
...
Thanks,
--
Isaku Yamahata <[email protected]>