From: Andi Kleen <[email protected]>
Add an early #VE handler to convert early port IOs into TDCALLs.
TDX cannot do port IO directly. The TDX module triggers a #VE
exception to let the guest kernel to emulate operations like
IO ports, by converting them into TDCALLs to call the host.
A fully featured #VE handler support for port IO will be added
later in this patch set (in patch titled "x86/tdx: Handle port
I/O). But it can be used only at later point in the boot
process. So to support port IO in early boot code, add a
minimal support in early exception handler. This is similar to
what AMD SEV does.
This is mainly to support early_printk's serial driver, as
well as potentially the VGA driver (although it is expected
not to be used).
The early handler only does IO calls and nothing else, and
anything that goes wrong results in a normal early exception
panic.
It cannot share the code paths with the normal #VE handler
because it needs to avoid using trace calls or printk.
This early handler allows us to use the normal in*/out*
macros without patching them for every driver. We don't
expect IO port IO to be performance critical at all, so an
extra #VE exception is no problem. There are also no concerns
with nesting, since there should be no NMIs this early.
Signed-off-by: Andi Kleen <[email protected]>
---
arch/x86/include/asm/tdx.h | 6 ++++
arch/x86/kernel/head64.c | 4 +++
arch/x86/kernel/tdx.c | 59 ++++++++++++++++++++++++++++++++++++++
3 files changed, 69 insertions(+)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 53f844200909..e880a9dd40d3 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -72,6 +72,7 @@ u64 __tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15,
struct tdx_hypercall_output *out);
bool tdx_protected_guest_has(unsigned long flag);
+bool tdg_early_handle_ve(struct pt_regs *regs);
#else // !CONFIG_INTEL_TDX_GUEST
@@ -87,6 +88,11 @@ static inline bool tdx_protected_guest_has(unsigned long flag)
return false;
}
+static inline bool tdg_early_handle_ve(struct pt_regs *regs)
+{
+ return false;
+}
+
#endif /* CONFIG_INTEL_TDX_GUEST */
#ifdef CONFIG_INTEL_TDX_GUEST_KVM
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 75f2401cb5db..23d1ff4626aa 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -410,6 +410,10 @@ void __init do_early_exception(struct pt_regs *regs, int trapnr)
trapnr == X86_TRAP_VC && handle_vc_boot_ghcb(regs))
return;
+ if (IS_ENABLED(CONFIG_INTEL_TDX_GUEST) &&
+ trapnr == X86_TRAP_VE && tdg_early_handle_ve(regs))
+ return;
+
early_fixup_exception(regs, trapnr);
}
diff --git a/arch/x86/kernel/tdx.c b/arch/x86/kernel/tdx.c
index 858e7f3d8f36..ca3442b7accf 100644
--- a/arch/x86/kernel/tdx.c
+++ b/arch/x86/kernel/tdx.c
@@ -13,6 +13,10 @@
#define TDINFO 1
#define TDGETVEINFO 3
+#define VE_GET_IO_TYPE(exit_qual) (((exit_qual) & 8) ? 0 : 1)
+#define VE_GET_IO_SIZE(exit_qual) (((exit_qual) & 7) + 1)
+#define VE_GET_PORT_NUM(exit_qual) ((exit_qual) >> 16)
+
static struct {
unsigned int gpa_width;
unsigned long attributes;
@@ -256,6 +260,61 @@ int tdg_handle_virtualization_exception(struct pt_regs *regs,
return ret;
}
+/*
+ * Handle early IO, mainly for early printks serial output.
+ * This avoids anything that doesn't work early on, like tracing
+ * or printks, by calling the low level functions directly. Any
+ * problems are handled by falling back to a standard early exception.
+ *
+ * Assumes the IO instruction was using ax, which is enforced
+ * by the standard io.h macros.
+ */
+static __init bool tdx_early_io(struct ve_info *ve, struct pt_regs *regs)
+{
+ struct tdx_hypercall_output outh;
+ int out = VE_GET_IO_TYPE(ve->exit_qual);
+ int size = VE_GET_IO_SIZE(ve->exit_qual);
+ int port = VE_GET_PORT_NUM(ve->exit_qual);
+ int ret;
+
+ if (out) {
+ ret = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION,
+ size, 1, port,
+ regs->ax,
+ &outh);
+ } else {
+ u64 mask = GENMASK(8 * size, 0);
+
+ ret = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION,
+ size, 0, port,
+ regs->ax, &outh);
+ if (!ret) {
+ regs->ax &= ~mask;
+ regs->ax |= outh.r11 & mask;
+ }
+ }
+
+ return !ret;
+}
+
+/*
+ * Early #VE exception handler. Just used to handle port IOs
+ * for early_printk. If anything goes wrong handle it like
+ * a normal early exception.
+ */
+__init bool tdg_early_handle_ve(struct pt_regs *regs)
+{
+ struct ve_info ve;
+
+ if (tdg_get_ve_info(&ve))
+ return false;
+
+ if (ve.exit_reason == EXIT_REASON_IO_INSTRUCTION)
+ return tdx_early_io(&ve, regs);
+
+ return false;
+}
+
void __init tdx_early_init(void)
{
if (!cpuid_has_tdx_guest())
--
2.25.1
On Wed, 2021-05-26 at 21:23 -0700, Kuppuswamy Sathyanarayanan wrote:
> From: Andi Kleen <[email protected]>
>
> Add an early #VE handler to convert early port IOs into TDCALLs.
>
> TDX cannot do port IO directly. The TDX module triggers a #VE
> exception to let the guest kernel to emulate operations like
> IO ports, by converting them into TDCALLs to call the host.
s,kernel to emulate operations like IO ports,kernel emulate port I/O,
>
> A fully featured #VE handler support for port IO will be added
> later in this patch set (in patch titled "x86/tdx: Handle port
> I/O). But it can be used only at later point in the boot
> process. So to support port IO in early boot code, add a
> minimal support in early exception handler. This is similar to
> what AMD SEV does.
Clarify "fully featured". I naively thought that the notes below about
trace and printk were implying that the full featured #VE handler will
use printk() so it can't also use #VE since printk() would recurse into
the #VE handler if the serial console is using port IO.
...but that does not seem to be the reason since:
http://lore.kernel.org/r/20210527042356.3983284-4-sathyanarayanan.kuppuswamy@linux.intel.com
...is also using #VE for port IO emulation?
> This is mainly to support early_printk's serial driver, as
> well as potentially the VGA driver (although it is expected
> not to be used).
>
> The early handler only does IO calls and nothing else, and
> anything that goes wrong results in a normal early exception
> panic.
>
> It cannot share the code paths with the normal #VE handler
> because it needs to avoid using trace calls or printk.
>
> This early handler allows us to use the normal in*/out*
> macros without patching them for every driver. We don't
> expect IO port IO to be performance critical at all, so an
> extra #VE exception is no problem.
"There is no expectation that early port IO is performance critical, so
the #VE emulation cost is worth the simplicity benefit of not patching
out port IO usage in early code."
> There are also no concerns
> with nesting, since there should be no NMIs this early.
>
> Signed-off-by: Andi Kleen <[email protected]>
Missing a Sathya signed-off-by.
> ---
> arch/x86/include/asm/tdx.h | 6 ++++
> arch/x86/kernel/head64.c | 4 +++
> arch/x86/kernel/tdx.c | 59 ++++++++++++++++++++++++++++++++++++++
> 3 files changed, 69 insertions(+)
>
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index 53f844200909..e880a9dd40d3 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -72,6 +72,7 @@ u64 __tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15,
> struct tdx_hypercall_output *out);
>
> bool tdx_protected_guest_has(unsigned long flag);
> +bool tdg_early_handle_ve(struct pt_regs *regs);
>
> #else // !CONFIG_INTEL_TDX_GUEST
>
> @@ -87,6 +88,11 @@ static inline bool tdx_protected_guest_has(unsigned long flag)
> return false;
> }
>
> +static inline bool tdg_early_handle_ve(struct pt_regs *regs)
> +{
> + return false;
> +}
> +
> #endif /* CONFIG_INTEL_TDX_GUEST */
>
> #ifdef CONFIG_INTEL_TDX_GUEST_KVM
> diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
> index 75f2401cb5db..23d1ff4626aa 100644
> --- a/arch/x86/kernel/head64.c
> +++ b/arch/x86/kernel/head64.c
> @@ -410,6 +410,10 @@ void __init do_early_exception(struct pt_regs *regs, int trapnr)
> trapnr == X86_TRAP_VC && handle_vc_boot_ghcb(regs))
> return;
>
> + if (IS_ENABLED(CONFIG_INTEL_TDX_GUEST) &&
This explicit IS_ENABLED() is unnecessary given tdg_early_handle_ve()
returns false in the CONFIG_INTEL_TDX_GUEST=n case as defined above.
> + trapnr == X86_TRAP_VE && tdg_early_handle_ve(regs))
> + return;
> +
> early_fixup_exception(regs, trapnr);
> }
>
> diff --git a/arch/x86/kernel/tdx.c b/arch/x86/kernel/tdx.c
> index 858e7f3d8f36..ca3442b7accf 100644
> --- a/arch/x86/kernel/tdx.c
> +++ b/arch/x86/kernel/tdx.c
> @@ -13,6 +13,10 @@
> #define TDINFO 1
> #define TDGETVEINFO 3
>
> +#define VE_GET_IO_TYPE(exit_qual) (((exit_qual) & 8) ? 0 : 1)
How about VE_IS_IO_OUT()? To match its usage as a flag below...
> +#define VE_GET_IO_SIZE(exit_qual) (((exit_qual) & 7) + 1)
> +#define VE_GET_PORT_NUM(exit_qual) ((exit_qual) >> 16)
> +
> static struct {
> unsigned int gpa_width;
> unsigned long attributes;
> @@ -256,6 +260,61 @@ int tdg_handle_virtualization_exception(struct pt_regs *regs,
> return ret;
> }
>
> +/*
> + * Handle early IO, mainly for early printks serial output.
> + * This avoids anything that doesn't work early on, like tracing
> + * or printks, by calling the low level functions directly. Any
> + * problems are handled by falling back to a standard early exception.
> + *
> + * Assumes the IO instruction was using ax, which is enforced
> + * by the standard io.h macros.
> + */
> +static __init bool tdx_early_io(struct ve_info *ve, struct pt_regs *regs)
> +{
> + struct tdx_hypercall_output outh;
> + int out = VE_GET_IO_TYPE(ve->exit_qual);
> + int size = VE_GET_IO_SIZE(ve->exit_qual);
> + int port = VE_GET_PORT_NUM(ve->exit_qual);
> + int ret;
> +
...and if @out is a flag then the below can be simplified to:
ret = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, out, port,
regs->ax, &outh);
if (!out && !ret) {
u64 mask = GENMASK(8 * size, 0);
regs->ax &= ~mask;
regs->ax |= outh.r11 & mask;
}
return !ret;
With the above fixups and clarifications you can add:
Reviewed-by: Dan Williams <[email protected]>
...but the discussion here about fully featured leaves me confused
about the approach taken in the next patch.
> + if (out) {
> + ret = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION,
> + size, 1, port,
> + regs->ax,
> + &outh);
> + } else {
> + u64 mask = GENMASK(8 * size, 0);
> +
> + ret = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION,
> + size, 0, port,
> + regs->ax, &outh);
> + if (!ret) {
> + regs->ax &= ~mask;
> + regs->ax |= outh.r11 & mask;
> + }
> + }
> +
> + return !ret;
> +}
> +
> +/*
> + * Early #VE exception handler. Just used to handle port IOs
> + * for early_printk. If anything goes wrong handle it like
> + * a normal early exception.
> + */
> +__init bool tdg_early_handle_ve(struct pt_regs *regs)
> +{
> + struct ve_info ve;
> +
> + if (tdg_get_ve_info(&ve))
> + return false;
> +
> + if (ve.exit_reason == EXIT_REASON_IO_INSTRUCTION)
> + return tdx_early_io(&ve, regs);
> +
> + return false;
> +}
> +
> void __init tdx_early_init(void)
> {
> if (!cpuid_has_tdx_guest())