2022-03-04 20:05:37

by Isaku Yamahata

[permalink] [raw]
Subject: [RFC PATCH v5 003/104] KVM: TDX: Detect CPU feature on kernel module initialization

From: Isaku Yamahata <[email protected]>

TDX requires several initialization steps for KVM to create guest TDs.
Detect CPU feature, enable VMX (TDX is based on VMX), detect TDX module
availability, and initialize TDX module. This patch implements the first
step to detect CPU feature. Because VMX isn't enabled yet by VMXON
instruction on KVM kernel module initialization, defer further
initialization step until VMX is enabled by hardware_enable callback.

Introduce a module parameter, enable_tdx, to explicitly enable TDX KVM
support. It's off by default to keep same behavior for those who don't use
TDX. Implement CPU feature detection at KVM kernel module initialization
as hardware_setup callback to check if CPU feature is available and get
some CPU parameters.

Signed-off-by: Isaku Yamahata <[email protected]>
---
arch/x86/kvm/Makefile | 1 +
arch/x86/kvm/vmx/main.c | 15 ++++++++++-
arch/x86/kvm/vmx/tdx.c | 53 ++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/vmx/x86_ops.h | 6 +++++
4 files changed, 74 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/kvm/vmx/tdx.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ee4d0999f20f..e2c05195cb95 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -24,6 +24,7 @@ kvm-$(CONFIG_KVM_XEN) += xen.o
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
vmx/evmcs.o vmx/nested.o vmx/posted_intr.o vmx/main.o
kvm-intel-$(CONFIG_X86_SGX_KVM) += vmx/sgx.o
+kvm-intel-$(CONFIG_INTEL_TDX_HOST) += vmx/tdx.o

kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o

diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index b08ea9c42a11..b79fcc8d81dd 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -6,6 +6,19 @@
#include "nested.h"
#include "pmu.h"

+static __init int vt_hardware_setup(void)
+{
+ int ret;
+
+ ret = vmx_hardware_setup();
+ if (ret)
+ return ret;
+
+ tdx_hardware_setup(&vt_x86_ops);
+
+ return 0;
+}
+
struct kvm_x86_ops vt_x86_ops __initdata = {
.name = "kvm_intel",

@@ -147,7 +160,7 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
.cpu_has_kvm_support = vmx_cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
.check_processor_compatibility = vmx_check_processor_compat,
- .hardware_setup = vmx_hardware_setup,
+ .hardware_setup = vt_hardware_setup,
.handle_intel_pt_intr = NULL,

.runtime_ops = &vt_x86_ops,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
new file mode 100644
index 000000000000..1acf08c310c4
--- /dev/null
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpu.h>
+
+#include <asm/tdx.h>
+
+#include "capabilities.h"
+#include "x86_ops.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "tdx: " fmt
+
+static bool __read_mostly enable_tdx = true;
+module_param_named(tdx, enable_tdx, bool, 0644);
+
+static u64 hkid_mask __ro_after_init;
+static u8 hkid_start_pos __ro_after_init;
+
+static int __init __tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
+{
+ u32 max_pa;
+
+ if (!enable_ept) {
+ pr_warn("Cannot enable TDX with EPT disabled\n");
+ return -EINVAL;
+ }
+
+ if (!platform_has_tdx()) {
+ pr_warn("Cannot enable TDX with SEAMRR disabled\n");
+ return -ENODEV;
+ }
+
+ if (WARN_ON_ONCE(x86_ops->tlb_remote_flush))
+ return -EIO;
+
+ max_pa = cpuid_eax(0x80000008) & 0xff;
+ hkid_start_pos = boot_cpu_data.x86_phys_bits;
+ hkid_mask = GENMASK_ULL(max_pa - 1, hkid_start_pos);
+
+ return 0;
+}
+
+void __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
+{
+ /*
+ * This function is called at the initialization. No need to protect
+ * enable_tdx.
+ */
+ if (!enable_tdx)
+ return;
+
+ if (__tdx_hardware_setup(&vt_x86_ops))
+ enable_tdx = false;
+}
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 40c64fb1f505..ccf98e79d8c3 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -123,4 +123,10 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
#endif
void vmx_setup_mce(struct kvm_vcpu *vcpu);

+#ifdef CONFIG_INTEL_TDX_HOST
+void __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
+#else
+static inline void tdx_hardware_setup(struct kvm_x86_ops *x86_ops) {}
+#endif
+
#endif /* __KVM_X86_VMX_X86_OPS_H */
--
2.25.1


2022-03-14 12:21:35

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [RFC PATCH v5 003/104] KVM: TDX: Detect CPU feature on kernel module initialization

On 3/4/22 20:48, [email protected] wrote:
> From: Isaku Yamahata <[email protected]>
>
> TDX requires several initialization steps for KVM to create guest TDs.
> Detect CPU feature, enable VMX (TDX is based on VMX), detect TDX module
> availability, and initialize TDX module. This patch implements the first
> step to detect CPU feature. Because VMX isn't enabled yet by VMXON
> instruction on KVM kernel module initialization, defer further
> initialization step until VMX is enabled by hardware_enable callback.
>
> Introduce a module parameter, enable_tdx, to explicitly enable TDX KVM
> support. It's off by default to keep same behavior for those who don't use
> TDX. Implement CPU feature detection at KVM kernel module initialization
> as hardware_setup callback to check if CPU feature is available and get
> some CPU parameters.
>
> Signed-off-by: Isaku Yamahata <[email protected]>
> ---
> arch/x86/kvm/Makefile | 1 +
> arch/x86/kvm/vmx/main.c | 15 ++++++++++-
> arch/x86/kvm/vmx/tdx.c | 53 ++++++++++++++++++++++++++++++++++++++
> arch/x86/kvm/vmx/x86_ops.h | 6 +++++
> 4 files changed, 74 insertions(+), 1 deletion(-)
> create mode 100644 arch/x86/kvm/vmx/tdx.c
>
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index ee4d0999f20f..e2c05195cb95 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -24,6 +24,7 @@ kvm-$(CONFIG_KVM_XEN) += xen.o
> kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
> vmx/evmcs.o vmx/nested.o vmx/posted_intr.o vmx/main.o
> kvm-intel-$(CONFIG_X86_SGX_KVM) += vmx/sgx.o
> +kvm-intel-$(CONFIG_INTEL_TDX_HOST) += vmx/tdx.o
>
> kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
>
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index b08ea9c42a11..b79fcc8d81dd 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -6,6 +6,19 @@
> #include "nested.h"
> #include "pmu.h"
>
> +static __init int vt_hardware_setup(void)
> +{
> + int ret;
> +
> + ret = vmx_hardware_setup();
> + if (ret)
> + return ret;
> +
> + tdx_hardware_setup(&vt_x86_ops);
> +
> + return 0;
> +}
> +
> struct kvm_x86_ops vt_x86_ops __initdata = {
> .name = "kvm_intel",
>
> @@ -147,7 +160,7 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
> .cpu_has_kvm_support = vmx_cpu_has_kvm_support,
> .disabled_by_bios = vmx_disabled_by_bios,
> .check_processor_compatibility = vmx_check_processor_compat,
> - .hardware_setup = vmx_hardware_setup,
> + .hardware_setup = vt_hardware_setup,
> .handle_intel_pt_intr = NULL,
>
> .runtime_ops = &vt_x86_ops,
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> new file mode 100644
> index 000000000000..1acf08c310c4
> --- /dev/null
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -0,0 +1,53 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/cpu.h>
> +
> +#include <asm/tdx.h>
> +
> +#include "capabilities.h"
> +#include "x86_ops.h"
> +
> +#undef pr_fmt
> +#define pr_fmt(fmt) "tdx: " fmt
> +
> +static bool __read_mostly enable_tdx = true;
> +module_param_named(tdx, enable_tdx, bool, 0644);
> +
> +static u64 hkid_mask __ro_after_init;
> +static u8 hkid_start_pos __ro_after_init;
> +
> +static int __init __tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> +{
> + u32 max_pa;
> +
> + if (!enable_ept) {
> + pr_warn("Cannot enable TDX with EPT disabled\n");
> + return -EINVAL;
> + }
> +
> + if (!platform_has_tdx()) {
> + pr_warn("Cannot enable TDX with SEAMRR disabled\n");
> + return -ENODEV;
> + }

This will cause a pr_warn in the logs on all machines that don't have
TDX. Perhaps you can restrict the pr_warn() to machines that have
__seamrr_enabled() == true?

Paolo

> + if (WARN_ON_ONCE(x86_ops->tlb_remote_flush))
> + return -EIO;
> +
> + max_pa = cpuid_eax(0x80000008) & 0xff;
> + hkid_start_pos = boot_cpu_data.x86_phys_bits;
> + hkid_mask = GENMASK_ULL(max_pa - 1, hkid_start_pos);
> +
> + return 0;
> +}
> +
> +void __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> +{
> + /*
> + * This function is called at the initialization. No need to protect
> + * enable_tdx.
> + */
> + if (!enable_tdx)
> + return;
> +
> + if (__tdx_hardware_setup(&vt_x86_ops))
> + enable_tdx = false;
> +}
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index 40c64fb1f505..ccf98e79d8c3 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -123,4 +123,10 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
> #endif
> void vmx_setup_mce(struct kvm_vcpu *vcpu);
>
> +#ifdef CONFIG_INTEL_TDX_HOST
> +void __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
> +#else
> +static inline void tdx_hardware_setup(struct kvm_x86_ops *x86_ops) {}
> +#endif
> +
> #endif /* __KVM_X86_VMX_X86_OPS_H */

2022-03-15 05:15:32

by Isaku Yamahata

[permalink] [raw]
Subject: Re: [RFC PATCH v5 003/104] KVM: TDX: Detect CPU feature on kernel module initialization

Thanks for comment.

On Sun, Mar 13, 2022 at 02:49:51PM +0100,
Paolo Bonzini <[email protected]> wrote:

> On 3/4/22 20:48, [email protected] wrote:
> > From: Isaku Yamahata <[email protected]>
> > +static int __init __tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> > +{
> > + u32 max_pa;
> > +
> > + if (!enable_ept) {
> > + pr_warn("Cannot enable TDX with EPT disabled\n");
> > + return -EINVAL;
> > + }
> > +
> > + if (!platform_has_tdx()) {
> > + pr_warn("Cannot enable TDX with SEAMRR disabled\n");
> > + return -ENODEV;
> > + }
>
> This will cause a pr_warn in the logs on all machines that don't have TDX.
> Perhaps you can restrict the pr_warn() to machines that have
> __seamrr_enabled() == true?

Makes sense. I'll include the following change.

diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 73bb472bd515..aa02c98afd11 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -134,6 +134,7 @@ struct tdsysinfo_struct {
};
} __packed __aligned(TDSYSINFO_STRUCT_ALIGNMENT);

+bool __seamrr_enabled(void);
void tdx_detect_cpu(struct cpuinfo_x86 *c);
int tdx_detect(void);
int tdx_init(void);
@@ -143,6 +144,7 @@ u32 tdx_get_global_keyid(void);
int tdx_keyid_alloc(void);
void tdx_keyid_free(int keyid);
#else
+static inline bool __seamrr_enabled(void) { return false; }
static inline void tdx_detect_cpu(struct cpuinfo_x86 *c) { }
static inline int tdx_detect(void) { return -ENODEV; }
static inline int tdx_init(void) { return -ENODEV; }
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 66dffe815e63..880d8291b380 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -2625,7 +2625,8 @@ static int __init __tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
}

if (!platform_has_tdx()) {
- pr_warn("Cannot enable TDX with SEAMRR disabled\n");
+ if (__seamrr_enabled())
+ pr_warn("Cannot enable TDX with SEAMRR disabled\n");
return -ENODEV;
}

diff --git a/arch/x86/virt/vmx/tdx.c b/arch/x86/virt/vmx/tdx.c
index d99961b7cb02..bb578a72b2da 100644
--- a/arch/x86/virt/vmx/tdx.c
+++ b/arch/x86/virt/vmx/tdx.c
@@ -186,10 +186,11 @@ static const struct kernel_param_ops tdx_trace_ops = {
module_param_cb(tdx_trace_level, &tdx_trace_ops, &tdx_trace_level, 0644);
MODULE_PARM_DESC(tdx_trace_level, "TDX module trace level");

-static bool __seamrr_enabled(void)
+bool __seamrr_enabled(void)
{
return (seamrr_mask & SEAMRR_ENABLED_BITS) == SEAMRR_ENABLED_BITS;
}
+EXPORT_SYMBOL_GPL(__seamrr_enabled);

static void detect_seam_bsp(struct cpuinfo_x86 *c)
{


--
Isaku Yamahata <[email protected]>

2022-04-10 20:05:49

by Sean Christopherson

[permalink] [raw]
Subject: Re: [RFC PATCH v5 003/104] KVM: TDX: Detect CPU feature on kernel module initialization

On Fri, Mar 04, 2022, [email protected] wrote:
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> new file mode 100644
> index 000000000000..1acf08c310c4
> --- /dev/null
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -0,0 +1,53 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/cpu.h>
> +
> +#include <asm/tdx.h>
> +
> +#include "capabilities.h"
> +#include "x86_ops.h"
> +
> +#undef pr_fmt
> +#define pr_fmt(fmt) "tdx: " fmt
> +
> +static bool __read_mostly enable_tdx = true;
> +module_param_named(tdx, enable_tdx, bool, 0644);

This is comically unsafe, userspace must not be allowed to toggle enable_tdx
after KVM is loaded.

> +static u64 hkid_mask __ro_after_init;
> +static u8 hkid_start_pos __ro_after_init;
> +
> +static int __init __tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> +{
> + u32 max_pa;
> +
> + if (!enable_ept) {
> + pr_warn("Cannot enable TDX with EPT disabled\n");
> + return -EINVAL;
> + }
> +
> + if (!platform_has_tdx()) {
> + pr_warn("Cannot enable TDX with SEAMRR disabled\n");
> + return -ENODEV;
> + }
> +
> + if (WARN_ON_ONCE(x86_ops->tlb_remote_flush))
> + return -EIO;
> +
> + max_pa = cpuid_eax(0x80000008) & 0xff;
> + hkid_start_pos = boot_cpu_data.x86_phys_bits;
> + hkid_mask = GENMASK_ULL(max_pa - 1, hkid_start_pos);
> +
> + return 0;
> +}
> +
> +void __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> +{
> + /*
> + * This function is called at the initialization. No need to protect
> + * enable_tdx.
> + */
> + if (!enable_tdx)
> + return;
> +
> + if (__tdx_hardware_setup(&vt_x86_ops))
> + enable_tdx = false;

Clearing enable_tdx here unnecessarily risks introducing bugs in the caller,
e.g. acting on enable_tdx before tdx_hardware_setup() is invoked. I'm guessing
this was the result of trying to defer module load until VM creation. With that
gone, the flag can be moved to vmx/main.c, as there should be zero reason for
tdx.c to check/modify enable_tdx, i.e. functions in tdx.c should never be called
if enabled_tdx = false.

An alteranative to

if (enable_tdx && tdx_hardware_setup(&vt_x86_ops))
enable_tdx = false;

would be

enable_tdx = enable_tdx && !tdx_hardware_setup(&vt_x86_ops);

I actually prefer the latter (no "if"), but I already generated and wiped the below
diff before thinking of that.

diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index b79fcc8d81dd..43e13c2a804e 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -6,6 +6,9 @@
#include "nested.h"
#include "pmu.h"

+static bool __read_mostly enable_tdx = IS_ENABLED(CONFIG_INTEL_TDX_HOST);
+module_param_named(tdx, enable_tdx, bool, 0644);
+
static __init int vt_hardware_setup(void)
{
int ret;
@@ -14,7 +17,8 @@ static __init int vt_hardware_setup(void)
if (ret)
return ret;

- tdx_hardware_setup(&vt_x86_ops);
+ if (enable_tdx && tdx_hardware_setup(&vt_x86_ops))
+ enable_tdx = false;

return 0;
}
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 1acf08c310c4..3f660f323426 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -9,13 +9,10 @@
#undef pr_fmt
#define pr_fmt(fmt) "tdx: " fmt

-static bool __read_mostly enable_tdx = true;
-module_param_named(tdx, enable_tdx, bool, 0644);
-
static u64 hkid_mask __ro_after_init;
static u8 hkid_start_pos __ro_after_init;

-static int __init __tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
+static int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
{
u32 max_pa;

@@ -38,16 +35,3 @@ static int __init __tdx_hardware_setup(struct kvm_x86_ops *x86_ops)

return 0;
}
-
-void __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
-{
- /*
- * This function is called at the initialization. No need to protect
- * enable_tdx.
- */
- if (!enable_tdx)
- return;
-
- if (__tdx_hardware_setup(&vt_x86_ops))
- enable_tdx = false;
-}
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index ccf98e79d8c3..fd60128eb10a 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -124,9 +124,9 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
void vmx_setup_mce(struct kvm_vcpu *vcpu);

#ifdef CONFIG_INTEL_TDX_HOST
-void __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
+int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
#else
-static inline void tdx_hardware_setup(struct kvm_x86_ops *x86_ops) {}
+static inline int void tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return 0; }
#endif

#endif /* __KVM_X86_VMX_X86_OPS_H */