From: Isaku Yamahata <[email protected]>
Currently, KVM VMX module initialization/exit functions are a single
function each. Refactor KVM VMX module initialization functions into KVM
common part and VMX part so that TDX specific part can be added cleanly.
Opportunistically refactor module exit function as well.
The current module initialization flow is,
0.) Check if VMX is supported,
1.) hyper-v specific initialization,
2.) system-wide x86 specific and vendor specific initialization,
3.) Final VMX specific system-wide initialization,
4.) calculate the sizes of VMX kvm structure and VMX vcpu structure,
5.) report those sizes to the KVM common layer and KVM common
initialization
Refactor the KVM VMX module initialization function into functions with a
wrapper function to separate VMX logic in vmx.c from a file, main.c, common
among VMX and TDX. Introduce a wrapper function for vmx_init().
The KVM architecture common layer allocates struct kvm with reported size
for architecture-specific code. The KVM VMX module defines its structure
as struct vmx_kvm { struct kvm; VMX specific members;} and uses it as
struct vmx kvm. Similar for vcpu structure. TDX KVM patches will define
TDX specific kvm and vcpu structures.
The current module exit function is also a single function, a combination
of VMX specific logic and common KVM logic. Refactor it into VMX specific
logic and KVM common logic. This is just refactoring to keep the VMX
specific logic in vmx.c from main.c.
Signed-off-by: Isaku Yamahata <[email protected]>
---
arch/x86/kvm/vmx/main.c | 51 +++++++++++++++++++++++++++++++++++
arch/x86/kvm/vmx/vmx.c | 54 +++++---------------------------------
arch/x86/kvm/vmx/x86_ops.h | 13 ++++++++-
3 files changed, 69 insertions(+), 49 deletions(-)
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index a59559ff140e..3f49e8e38b6b 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -165,3 +165,54 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
.runtime_ops = &vt_x86_ops,
.pmu_ops = &intel_pmu_ops,
};
+
+static int __init vt_init(void)
+{
+ unsigned int vcpu_size, vcpu_align;
+ int r;
+
+ if (!kvm_is_vmx_supported())
+ return -EOPNOTSUPP;
+
+ /*
+ * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
+ * to unwind if a later step fails.
+ */
+ hv_init_evmcs();
+
+ r = kvm_x86_vendor_init(&vt_init_ops);
+ if (r)
+ return r;
+
+ r = vmx_init();
+ if (r)
+ goto err_vmx_init;
+
+ /*
+ * Common KVM initialization _must_ come last, after this, /dev/kvm is
+ * exposed to userspace!
+ */
+ vt_x86_ops.vm_size = sizeof(struct kvm_vmx);
+ vcpu_size = sizeof(struct vcpu_vmx);
+ vcpu_align = __alignof__(struct vcpu_vmx);
+ r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE);
+ if (r)
+ goto err_kvm_init;
+
+ return 0;
+
+err_kvm_init:
+ vmx_exit();
+err_vmx_init:
+ kvm_x86_vendor_exit();
+ return r;
+}
+module_init(vt_init);
+
+static void vt_exit(void)
+{
+ kvm_exit();
+ kvm_x86_vendor_exit();
+ vmx_exit();
+}
+module_exit(vt_exit);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 47a9a647ae3a..3bbd07412f00 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -553,7 +553,7 @@ static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
return 0;
}
-static __init void hv_init_evmcs(void)
+__init void hv_init_evmcs(void)
{
int cpu;
@@ -589,7 +589,7 @@ static __init void hv_init_evmcs(void)
}
}
-static void hv_reset_evmcs(void)
+void hv_reset_evmcs(void)
{
struct hv_vp_assist_page *vp_ap;
@@ -613,10 +613,6 @@ static void hv_reset_evmcs(void)
vp_ap->current_nested_vmcs = 0;
vp_ap->enlighten_vmentry = 0;
}
-
-#else /* IS_ENABLED(CONFIG_HYPERV) */
-static void hv_init_evmcs(void) {}
-static void hv_reset_evmcs(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */
/*
@@ -2741,7 +2737,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
return 0;
}
-static bool kvm_is_vmx_supported(void)
+bool kvm_is_vmx_supported(void)
{
int cpu = raw_smp_processor_id();
@@ -8381,7 +8377,7 @@ static void vmx_cleanup_l1d_flush(void)
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
}
-static void __vmx_exit(void)
+void vmx_exit(void)
{
allow_smaller_maxphyaddr = false;
@@ -8392,32 +8388,10 @@ static void __vmx_exit(void)
vmx_cleanup_l1d_flush();
}
-static void vmx_exit(void)
-{
- kvm_exit();
- kvm_x86_vendor_exit();
-
- __vmx_exit();
-}
-module_exit(vmx_exit);
-
-static int __init vmx_init(void)
+int __init vmx_init(void)
{
int r, cpu;
- if (!kvm_is_vmx_supported())
- return -EOPNOTSUPP;
-
- /*
- * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
- * to unwind if a later step fails.
- */
- hv_init_evmcs();
-
- r = kvm_x86_vendor_init(&vt_init_ops);
- if (r)
- return r;
-
/*
* Must be called after common x86 init so enable_ept is properly set
* up. Hand the parameter mitigation value in which was stored in
@@ -8427,7 +8401,7 @@ static int __init vmx_init(void)
*/
r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
if (r)
- goto err_l1d_flush;
+ return r;
vmx_setup_fb_clear_ctrl();
@@ -8451,21 +8425,5 @@ static int __init vmx_init(void)
if (!enable_ept)
allow_smaller_maxphyaddr = true;
- /*
- * Common KVM initialization _must_ come last, after this, /dev/kvm is
- * exposed to userspace!
- */
- r = kvm_init(sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx),
- THIS_MODULE);
- if (r)
- goto err_kvm_init;
-
return 0;
-
-err_kvm_init:
- __vmx_exit();
-err_l1d_flush:
- kvm_x86_vendor_exit();
- return r;
}
-module_init(vmx_init);
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index e9ec4d259ff5..051b5c4b5c2f 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -8,11 +8,22 @@
#include "x86.h"
-__init int vmx_hardware_setup(void);
+#if IS_ENABLED(CONFIG_HYPERV)
+__init void hv_init_evmcs(void);
+void hv_reset_evmcs(void);
+#else /* IS_ENABLED(CONFIG_HYPERV) */
+static inline void hv_init_evmcs(void) {}
+static inline void hv_reset_evmcs(void) {}
+#endif /* IS_ENABLED(CONFIG_HYPERV) */
+
+bool kvm_is_vmx_supported(void);
+int __init vmx_init(void);
+void vmx_exit(void);
extern struct kvm_x86_ops vt_x86_ops __initdata;
extern struct kvm_x86_init_ops vt_init_ops __initdata;
+__init int vmx_hardware_setup(void);
void vmx_hardware_unsetup(void);
int vmx_check_processor_compat(void);
int vmx_hardware_enable(void);
--
2.25.1
On Monday, March 13, 2023 1:55 AM, [email protected] wrote:
> Currently, KVM VMX module initialization/exit functions are a single function
> each. Refactor KVM VMX module initialization functions into KVM common
> part and VMX part so that TDX specific part can be added cleanly.
> Opportunistically refactor module exit function as well.
>
> The current module initialization flow is,
> 0.) Check if VMX is supported,
> 1.) hyper-v specific initialization,
> 2.) system-wide x86 specific and vendor specific initialization,
> 3.) Final VMX specific system-wide initialization,
> 4.) calculate the sizes of VMX kvm structure and VMX vcpu structure,
> 5.) report those sizes to the KVM common layer and KVM common
> initialization
>
> Refactor the KVM VMX module initialization function into functions with a
> wrapper function to separate VMX logic in vmx.c from a file, main.c, common
> among VMX and TDX. Introduce a wrapper function for vmx_init().
>
> The KVM architecture common layer allocates struct kvm with reported size for
> architecture-specific code. The KVM VMX module defines its structure as
> struct vmx_kvm { struct kvm; VMX specific members;} and uses it as struct vmx
> kvm. Similar for vcpu structure. TDX KVM patches will define TDX specific kvm
> and vcpu structures.
>
> The current module exit function is also a single function, a combination of
> VMX specific logic and common KVM logic. Refactor it into VMX specific logic
> and KVM common logic. This is just refactoring to keep the VMX specific logic
> in vmx.c from main.c.
>
> Signed-off-by: Isaku Yamahata <[email protected]>
> ---
> arch/x86/kvm/vmx/main.c | 51 +++++++++++++++++++++++++++++++++++
> arch/x86/kvm/vmx/vmx.c | 54 +++++---------------------------------
> arch/x86/kvm/vmx/x86_ops.h | 13 ++++++++-
> 3 files changed, 69 insertions(+), 49 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index
> a59559ff140e..3f49e8e38b6b 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -165,3 +165,54 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
> .runtime_ops = &vt_x86_ops,
> .pmu_ops = &intel_pmu_ops,
> };
> +
> +static int __init vt_init(void)
> +{
> + unsigned int vcpu_size, vcpu_align;
> + int r;
> +
> + if (!kvm_is_vmx_supported())
> + return -EOPNOTSUPP;
> +
> + /*
> + * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
> + * to unwind if a later step fails.
> + */
> + hv_init_evmcs();
> +
> + r = kvm_x86_vendor_init(&vt_init_ops);
> + if (r)
> + return r;
> +
> + r = vmx_init();
> + if (r)
> + goto err_vmx_init;
> +
> + /*
> + * Common KVM initialization _must_ come last, after this, /dev/kvm is
> + * exposed to userspace!
> + */
> + vt_x86_ops.vm_size = sizeof(struct kvm_vmx);
> + vcpu_size = sizeof(struct vcpu_vmx);
> + vcpu_align = __alignof__(struct vcpu_vmx);
> + r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE);
> + if (r)
> + goto err_kvm_init;
> +
> + return 0;
> +
> +err_kvm_init:
> + vmx_exit();
> +err_vmx_init:
> + kvm_x86_vendor_exit();
> + return r;
> +}
> +module_init(vt_init);
I had a patch to fix a bug here, maybe you can take it:
kvm_x86_vendor_init copies vt_x86_ops to kvm_x86_ops. vt_x86_ops.vm_size
needs to be updated before calling kvm_x86_vendor_init so that kvm_x86_ops
can get the correct vm_size.
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 41f07d92aaf6..37a8c212c653 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -1126,18 +1126,6 @@ static int __init vt_init(void)
*/
hv_init_evmcs();
- r = kvm_x86_vendor_init(&vt_init_ops);
- if (r)
- return r;
-
- r = vmx_init();
- if (r)
- goto err_vmx_init;
-
- r = tdx_init();
- if (r)
- goto err_tdx_init;
-
/*
* Common KVM initialization _must_ come last, after this, /dev/kvm is
* exposed to userspace!
@@ -1153,6 +1141,19 @@ static int __init vt_init(void)
vcpu_align = max_t(unsigned int, vcpu_align,
__alignof__(struct vcpu_tdx));
}
+
+ r = kvm_x86_vendor_init(&vt_init_ops);
+ if (r)
+ return r;
+
+ r = vmx_init();
+ if (r)
+ goto err_vmx_init;
+
+ r = tdx_init();
+ if (r)
+ goto err_tdx_init;
+
r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE);
if (r)
goto err_kvm_init;
--
On Mon, Mar 13, 2023 at 02:49:03PM +0000,
"Wang, Wei W" <[email protected]> wrote:
> On Monday, March 13, 2023 1:55 AM, [email protected] wrote:
> > Currently, KVM VMX module initialization/exit functions are a single function
> > each. Refactor KVM VMX module initialization functions into KVM common
> > part and VMX part so that TDX specific part can be added cleanly.
> > Opportunistically refactor module exit function as well.
> >
> > The current module initialization flow is,
> > 0.) Check if VMX is supported,
> > 1.) hyper-v specific initialization,
> > 2.) system-wide x86 specific and vendor specific initialization,
> > 3.) Final VMX specific system-wide initialization,
> > 4.) calculate the sizes of VMX kvm structure and VMX vcpu structure,
> > 5.) report those sizes to the KVM common layer and KVM common
> > initialization
> >
> > Refactor the KVM VMX module initialization function into functions with a
> > wrapper function to separate VMX logic in vmx.c from a file, main.c, common
> > among VMX and TDX. Introduce a wrapper function for vmx_init().
> >
> > The KVM architecture common layer allocates struct kvm with reported size for
> > architecture-specific code. The KVM VMX module defines its structure as
> > struct vmx_kvm { struct kvm; VMX specific members;} and uses it as struct vmx
> > kvm. Similar for vcpu structure. TDX KVM patches will define TDX specific kvm
> > and vcpu structures.
> >
> > The current module exit function is also a single function, a combination of
> > VMX specific logic and common KVM logic. Refactor it into VMX specific logic
> > and KVM common logic. This is just refactoring to keep the VMX specific logic
> > in vmx.c from main.c.
> >
> > Signed-off-by: Isaku Yamahata <[email protected]>
> > ---
> > arch/x86/kvm/vmx/main.c | 51 +++++++++++++++++++++++++++++++++++
> > arch/x86/kvm/vmx/vmx.c | 54 +++++---------------------------------
> > arch/x86/kvm/vmx/x86_ops.h | 13 ++++++++-
> > 3 files changed, 69 insertions(+), 49 deletions(-)
> >
> > diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index
> > a59559ff140e..3f49e8e38b6b 100644
> > --- a/arch/x86/kvm/vmx/main.c
> > +++ b/arch/x86/kvm/vmx/main.c
> > @@ -165,3 +165,54 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
> > .runtime_ops = &vt_x86_ops,
> > .pmu_ops = &intel_pmu_ops,
> > };
> > +
> > +static int __init vt_init(void)
> > +{
> > + unsigned int vcpu_size, vcpu_align;
> > + int r;
> > +
> > + if (!kvm_is_vmx_supported())
> > + return -EOPNOTSUPP;
> > +
> > + /*
> > + * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
> > + * to unwind if a later step fails.
> > + */
> > + hv_init_evmcs();
> > +
> > + r = kvm_x86_vendor_init(&vt_init_ops);
> > + if (r)
> > + return r;
> > +
> > + r = vmx_init();
> > + if (r)
> > + goto err_vmx_init;
> > +
> > + /*
> > + * Common KVM initialization _must_ come last, after this, /dev/kvm is
> > + * exposed to userspace!
> > + */
> > + vt_x86_ops.vm_size = sizeof(struct kvm_vmx);
> > + vcpu_size = sizeof(struct vcpu_vmx);
> > + vcpu_align = __alignof__(struct vcpu_vmx);
> > + r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE);
> > + if (r)
> > + goto err_kvm_init;
> > +
> > + return 0;
> > +
> > +err_kvm_init:
> > + vmx_exit();
> > +err_vmx_init:
> > + kvm_x86_vendor_exit();
> > + return r;
> > +}
> > +module_init(vt_init);
>
> I had a patch to fix a bug here, maybe you can take it:
>
> kvm_x86_vendor_init copies vt_x86_ops to kvm_x86_ops. vt_x86_ops.vm_size
> needs to be updated before calling kvm_x86_vendor_init so that kvm_x86_ops
> can get the correct vm_size.
Thanks for catching it. With your patch, vm_size is always
max(sizeof struct kvm_vmx, sizeof strut kvm_tdx) even when the admin sets
kvm_intel.tdx=true and tdx is disabled by error.
option 1: Ignore such waste. Your patch. The difference is small and it's only
the error case. Locally I have the following values.
sizeof(struct kvm_vmx) = 44576
sizeof(struct vcpu_vmx) = 10432
sizeof(struct kvm_tdx)= 44632
sizeof(struct vcpu_tdx) = 8192
I suspect the actual allocation size for struct kvm is same. That's
the reason why I didn't hit problem.
option 2: Explicitly update vm_size after kvm_x86_vendor_init()
struct kvm_x86_ops isn't exported. It would be ugly.
option 3: Allow setup_hardware() to update vm_size.
setup_hardware(void) => setup_hardware(unsigned int *vm_size)
It's confusing because kvm_x86_ops.vm_size is already initialized.
Let's go with option 1(your patch).
--
Isaku Yamahata <[email protected]>
On Tuesday, March 14, 2023 2:40 AM, Isaku Yamahata wrote:
> > I had a patch to fix a bug here, maybe you can take it:
> >
> > kvm_x86_vendor_init copies vt_x86_ops to kvm_x86_ops.
> > vt_x86_ops.vm_size needs to be updated before calling
> > kvm_x86_vendor_init so that kvm_x86_ops can get the correct vm_size.
>
> Thanks for catching it. With your patch, vm_size is always max(sizeof struct
> kvm_vmx, sizeof strut kvm_tdx) even when the admin sets kvm_intel.tdx=true
> and tdx is disabled by error.
This seems to be another bug, where enable_tdx should be set to false if tdx
fails to be enabled:
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index d6a9f705a2a1..ef1e794efb97 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -4552,6 +4552,7 @@ static int tdx_module_setup(void)
ret = tdx_enable();
if (ret) {
+ enable_tdx = false;
pr_info("Failed to initialize TDX module.\n");
return ret;
}
>
> option 1: Ignore such waste. Your patch. The difference is small and it's only
> the error case. Locally I have the following values.
> sizeof(struct kvm_vmx) = 44576
> sizeof(struct vcpu_vmx) = 10432
> sizeof(struct kvm_tdx)= 44632
> sizeof(struct vcpu_tdx) = 8192
> I suspect the actual allocation size for struct kvm is same. That's
> the reason why I didn't hit problem.
No, the actual allocation size isn't same.
You didn’t get error notices because the kvm_tdx fields are still located
in the same page as that allocated for kvm_vmx, though that part of memory
isn’t explicitly allocated. If kvm_tdx size grows and crosses the page boundary,
you will see unexpected faults. Or if this part of memory (illegally used by
kvm_tdx) later happens to be given to other kernel components to use, you
would see random errors somewhere.
>
> option 2: Explicitly update vm_size after kvm_x86_vendor_init()
> struct kvm_x86_ops isn't exported. It would be ugly.
>
> option 3: Allow setup_hardware() to update vm_size.
> setup_hardware(void) => setup_hardware(unsigned int *vm_size)
> It's confusing because kvm_x86_ops.vm_size is already initialized.
>
> Let's go with option 1(your patch).
Sounds good.
On 3/13/2023 1:55 AM, [email protected] wrote:
> From: Isaku Yamahata <[email protected]>
>
> Currently, KVM VMX module initialization/exit functions are a single
> function each. Refactor KVM VMX module initialization functions into KVM
> common part and VMX part so that TDX specific part can be added cleanly.
> Opportunistically refactor module exit function as well.
>
> The current module initialization flow is,
> 0.) Check if VMX is supported,
> 1.) hyper-v specific initialization,
> 2.) system-wide x86 specific and vendor specific initialization,
> 3.) Final VMX specific system-wide initialization,
> 4.) calculate the sizes of VMX kvm structure and VMX vcpu structure,
> 5.) report those sizes to the KVM common layer and KVM common
> initialization
>
> Refactor the KVM VMX module initialization function into functions with a
> wrapper function to separate VMX logic in vmx.c from a file, main.c, common
> among VMX and TDX. Introduce a wrapper function for vmx_init().
>
> The KVM architecture common layer allocates struct kvm with reported size
> for architecture-specific code. The KVM VMX module defines its structure
> as struct vmx_kvm { struct kvm; VMX specific members;} and uses it as
> struct vmx kvm. Similar for vcpu structure. TDX KVM patches will define
> TDX specific kvm and vcpu structures.
>
> The current module exit function is also a single function, a combination
> of VMX specific logic and common KVM logic. Refactor it into VMX specific
> logic and KVM common logic. This is just refactoring to keep the VMX
> specific logic in vmx.c from main.c.
>
> Signed-off-by: Isaku Yamahata <[email protected]>
> ---
> arch/x86/kvm/vmx/main.c | 51 +++++++++++++++++++++++++++++++++++
> arch/x86/kvm/vmx/vmx.c | 54 +++++---------------------------------
> arch/x86/kvm/vmx/x86_ops.h | 13 ++++++++-
> 3 files changed, 69 insertions(+), 49 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index a59559ff140e..3f49e8e38b6b 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -165,3 +165,54 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
> .runtime_ops = &vt_x86_ops,
> .pmu_ops = &intel_pmu_ops,
> };
> +
> +static int __init vt_init(void)
> +{
> + unsigned int vcpu_size, vcpu_align;
> + int r;
> +
> + if (!kvm_is_vmx_supported())
> + return -EOPNOTSUPP;
> +
> + /*
> + * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
> + * to unwind if a later step fails.
> + */
> + hv_init_evmcs();
> +
> + r = kvm_x86_vendor_init(&vt_init_ops);
> + if (r)
> + return r;
> +
> + r = vmx_init();
> + if (r)
> + goto err_vmx_init;
> +
> + /*
> + * Common KVM initialization _must_ come last, after this, /dev/kvm is
> + * exposed to userspace!
> + */
> + vt_x86_ops.vm_size = sizeof(struct kvm_vmx);
nit: why initialize vm_size again? I noticed that it has already been assigned a value when create vt_x86_ops.
+struct kvm_x86_ops vt_x86_ops __initdata = {
...
+ .vm_size = sizeof(struct kvm_vmx),
+ .vm_init = vmx_vm_init,
> + vcpu_size = sizeof(struct vcpu_vmx);
> + vcpu_align = __alignof__(struct vcpu_vmx);
> + r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE);
> + if (r)
> + goto err_kvm_init;
> +
> + return 0;
> +
> +err_kvm_init:
> + vmx_exit();
> +err_vmx_init:
> + kvm_x86_vendor_exit();
> + return r;
> +}
> +module_init(vt_init);
> +
> +static void vt_exit(void)
> +{
> + kvm_exit();
> + kvm_x86_vendor_exit();
> + vmx_exit();
> +}
> +module_exit(vt_exit);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 47a9a647ae3a..3bbd07412f00 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -553,7 +553,7 @@ static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
> return 0;
> }
>
> -static __init void hv_init_evmcs(void)
> +__init void hv_init_evmcs(void)
> {
> int cpu;
>
> @@ -589,7 +589,7 @@ static __init void hv_init_evmcs(void)
> }
> }
>
> -static void hv_reset_evmcs(void)
> +void hv_reset_evmcs(void)
> {
> struct hv_vp_assist_page *vp_ap;
>
> @@ -613,10 +613,6 @@ static void hv_reset_evmcs(void)
> vp_ap->current_nested_vmcs = 0;
> vp_ap->enlighten_vmentry = 0;
> }
> -
> -#else /* IS_ENABLED(CONFIG_HYPERV) */
> -static void hv_init_evmcs(void) {}
> -static void hv_reset_evmcs(void) {}
> #endif /* IS_ENABLED(CONFIG_HYPERV) */
>
> /*
> @@ -2741,7 +2737,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
> return 0;
> }
>
> -static bool kvm_is_vmx_supported(void)
> +bool kvm_is_vmx_supported(void)
> {
> int cpu = raw_smp_processor_id();
>
> @@ -8381,7 +8377,7 @@ static void vmx_cleanup_l1d_flush(void)
> l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
> }
>
> -static void __vmx_exit(void)
> +void vmx_exit(void)
> {
> allow_smaller_maxphyaddr = false;
>
> @@ -8392,32 +8388,10 @@ static void __vmx_exit(void)
> vmx_cleanup_l1d_flush();
> }
>
> -static void vmx_exit(void)
> -{
> - kvm_exit();
> - kvm_x86_vendor_exit();
> -
> - __vmx_exit();
> -}
> -module_exit(vmx_exit);
> -
> -static int __init vmx_init(void)
> +int __init vmx_init(void)
> {
> int r, cpu;
>
> - if (!kvm_is_vmx_supported())
> - return -EOPNOTSUPP;
> -
> - /*
> - * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
> - * to unwind if a later step fails.
> - */
> - hv_init_evmcs();
> -
> - r = kvm_x86_vendor_init(&vt_init_ops);
> - if (r)
> - return r;
> -
> /*
> * Must be called after common x86 init so enable_ept is properly set
> * up. Hand the parameter mitigation value in which was stored in
> @@ -8427,7 +8401,7 @@ static int __init vmx_init(void)
> */
> r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
> if (r)
> - goto err_l1d_flush;
> + return r;
>
> vmx_setup_fb_clear_ctrl();
>
> @@ -8451,21 +8425,5 @@ static int __init vmx_init(void)
> if (!enable_ept)
> allow_smaller_maxphyaddr = true;
>
> - /*
> - * Common KVM initialization _must_ come last, after this, /dev/kvm is
> - * exposed to userspace!
> - */
> - r = kvm_init(sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx),
> - THIS_MODULE);
> - if (r)
> - goto err_kvm_init;
> -
> return 0;
> -
> -err_kvm_init:
> - __vmx_exit();
> -err_l1d_flush:
> - kvm_x86_vendor_exit();
> - return r;
> }
> -module_init(vmx_init);
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index e9ec4d259ff5..051b5c4b5c2f 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -8,11 +8,22 @@
>
> #include "x86.h"
>
> -__init int vmx_hardware_setup(void);
> +#if IS_ENABLED(CONFIG_HYPERV)
> +__init void hv_init_evmcs(void);
> +void hv_reset_evmcs(void);
> +#else /* IS_ENABLED(CONFIG_HYPERV) */
> +static inline void hv_init_evmcs(void) {}
> +static inline void hv_reset_evmcs(void) {}
> +#endif /* IS_ENABLED(CONFIG_HYPERV) */
> +
> +bool kvm_is_vmx_supported(void);
> +int __init vmx_init(void);
> +void vmx_exit(void);
>
> extern struct kvm_x86_ops vt_x86_ops __initdata;
> extern struct kvm_x86_init_ops vt_init_ops __initdata;
>
> +__init int vmx_hardware_setup(void);
> void vmx_hardware_unsetup(void);
> int vmx_check_processor_compat(void);
> int vmx_hardware_enable(void);
On Tue, May 23, 2023 at 10:23:57AM +0800,
"Wen, Qian" <[email protected]> wrote:
> > diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> > index a59559ff140e..3f49e8e38b6b 100644
> > --- a/arch/x86/kvm/vmx/main.c
> > +++ b/arch/x86/kvm/vmx/main.c
> > @@ -165,3 +165,54 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
> > .runtime_ops = &vt_x86_ops,
> > .pmu_ops = &intel_pmu_ops,
> > };
> > +
> > +static int __init vt_init(void)
> > +{
> > + unsigned int vcpu_size, vcpu_align;
> > + int r;
> > +
> > + if (!kvm_is_vmx_supported())
> > + return -EOPNOTSUPP;
> > +
> > + /*
> > + * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
> > + * to unwind if a later step fails.
> > + */
> > + hv_init_evmcs();
> > +
> > + r = kvm_x86_vendor_init(&vt_init_ops);
> > + if (r)
> > + return r;
> > +
> > + r = vmx_init();
> > + if (r)
> > + goto err_vmx_init;
> > +
> > + /*
> > + * Common KVM initialization _must_ come last, after this, /dev/kvm is
> > + * exposed to userspace!
> > + */
> > + vt_x86_ops.vm_size = sizeof(struct kvm_vmx);
>
> nit: why initialize vm_size again? I noticed that it has already been assigned a value when create vt_x86_ops.
Thanks, will remove the line.
--
Isaku Yamahata <[email protected]>