Subject: [RFC v2-fix-v1 3/3] x86/tdx: Handle port I/O

From: "Kirill A. Shutemov" <[email protected]>

TDX hypervisors cannot emulate instructions directly. This
includes port IO which is normally emulated in the hypervisor.
All port IO instructions inside TDX trigger the #VE exception
in the guest and would be normally emulated there.

For the really early code in the decompressor, #VE cannot be
used because the IDT needed for handling the exception is not
set-up, and some other infrastructure needed by the handler
is missing. So to support port IO in decompressor code, add
support for paravirt based I/O port virtualization.

Also string I/O is not supported in TDX guest. So, unroll the
string I/O operation into a loop operating on one element at
a time. This method is similar to AMD SEV, so just extend the
support for TDX guest platform.

Co-developed-by: Kuppuswamy Sathyanarayanan <[email protected]>
Signed-off-by: Kuppuswamy Sathyanarayanan <[email protected]>
Signed-off-by: Kirill A. Shutemov <[email protected]>
Reviewed-by: Andi Kleen <[email protected]>
---
arch/x86/boot/compressed/Makefile | 1 +
arch/x86/boot/compressed/tdcall.S | 3 ++
arch/x86/boot/compressed/tdx.c | 28 ++++++++++++++++++
arch/x86/include/asm/io.h | 7 +++--
arch/x86/include/asm/tdx.h | 47 ++++++++++++++++++++++++++++++-
arch/x86/kernel/tdx.c | 39 +++++++++++++++++++++++++
6 files changed, 122 insertions(+), 3 deletions(-)
create mode 100644 arch/x86/boot/compressed/tdcall.S

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index a2554621cefe..a944a2038797 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -97,6 +97,7 @@ endif

vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o
+vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdcall.o

vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
diff --git a/arch/x86/boot/compressed/tdcall.S b/arch/x86/boot/compressed/tdcall.S
new file mode 100644
index 000000000000..aafadc136c88
--- /dev/null
+++ b/arch/x86/boot/compressed/tdcall.S
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "../../kernel/tdcall.S"
diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
index 0a87c1775b67..cb20962c7da6 100644
--- a/arch/x86/boot/compressed/tdx.c
+++ b/arch/x86/boot/compressed/tdx.c
@@ -4,6 +4,8 @@
*/

#include <asm/tdx.h>
+#include <asm/vmx.h>
+#include <vdso/limits.h>

static int __ro_after_init tdx_guest = -1;

@@ -30,3 +32,29 @@ bool is_tdx_guest(void)
return !!tdx_guest;
}

+/*
+ * Helper function used for making hypercall for "out"
+ * instruction. It will be called from __out IO
+ * macro (in tdx.h).
+ */
+void tdg_out(int size, int port, unsigned int value)
+{
+ __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 1,
+ port, value, NULL);
+}
+
+/*
+ * Helper function used for making hypercall for "in"
+ * instruction. It will be called from __in IO macro
+ * (in tdx.h). If IO is failed, it will return all 1s.
+ */
+unsigned int tdg_in(int size, int port)
+{
+ struct tdx_hypercall_output out = {0};
+ int err;
+
+ err = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 0,
+ port, 0, &out);
+
+ return err ? UINT_MAX : out.r11;
+}
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index ef7a686a55a9..daa75c8eef5d 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -40,6 +40,7 @@

#include <linux/string.h>
#include <linux/compiler.h>
+#include <linux/protected_guest.h>
#include <asm/page.h>
#include <asm/early_ioremap.h>
#include <asm/pgtable_types.h>
@@ -309,7 +310,8 @@ static inline unsigned type in##bwl##_p(int port) \
\
static inline void outs##bwl(int port, const void *addr, unsigned long count) \
{ \
- if (sev_key_active()) { \
+ if (sev_key_active() || \
+ protected_guest_has(VM_UNROLL_STRING_IO)) { \
unsigned type *value = (unsigned type *)addr; \
while (count) { \
out##bwl(*value, port); \
@@ -325,7 +327,8 @@ static inline void outs##bwl(int port, const void *addr, unsigned long count) \
\
static inline void ins##bwl(int port, void *addr, unsigned long count) \
{ \
- if (sev_key_active()) { \
+ if (sev_key_active() || \
+ protected_guest_has(VM_UNROLL_STRING_IO)) { \
unsigned type *value = (unsigned type *)addr; \
while (count) { \
*value = in##bwl(port); \
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index e880a9dd40d3..6ba2dcea533f 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -5,6 +5,8 @@

#define TDX_CPUID_LEAF_ID 0x21

+#ifndef __ASSEMBLY__
+
#ifdef CONFIG_INTEL_TDX_GUEST

#include <asm/cpufeature.h>
@@ -74,6 +76,48 @@ u64 __tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15,
bool tdx_protected_guest_has(unsigned long flag);
bool tdg_early_handle_ve(struct pt_regs *regs);

+void tdg_out(int size, int port, unsigned int value);
+unsigned int tdg_in(int size, int port);
+
+/* Helper function for converting {b,w,l} to byte size */
+static inline int tdx_get_iosize(char *str)
+{
+ if (str[0] == 'w')
+ return 2;
+ else if (str[0] == 'l')
+ return 4;
+
+ return 1;
+}
+
+/*
+ * To support I/O port access in decompressor or early kernel init
+ * code, since #VE exception handler cannot be used, use paravirt
+ * model to implement __in/__out macros which will in turn be used
+ * by in{b,w,l}()/out{b,w,l} I/O helper macros used in kernel. You
+ * can find the __in/__out macro usage in arch/x86/include/asm/io.h
+ */
+#ifdef BOOT_COMPRESSED_MISC_H
+#define __out(bwl, bw) \
+do { \
+ if (is_tdx_guest()) { \
+ tdg_out(tdx_get_iosize(#bwl), port, value); \
+ } else { \
+ asm volatile("out" #bwl " %" #bw "0, %w1" : : \
+ "a"(value), "Nd"(port)); \
+ } \
+} while (0)
+#define __in(bwl, bw) \
+do { \
+ if (is_tdx_guest()) { \
+ value = tdg_in(tdx_get_iosize(#bwl), port); \
+ } else { \
+ asm volatile("in" #bwl " %w1, %" #bw "0" : \
+ "=a"(value) : "Nd"(port)); \
+ } \
+} while (0)
+#endif
+
#else // !CONFIG_INTEL_TDX_GUEST

static inline bool is_tdx_guest(void)
@@ -161,6 +205,7 @@ static inline long tdx_kvm_hypercall4(unsigned int nr, unsigned long p1,
{
return -ENODEV;
}
-#endif /* CONFIG_INTEL_TDX_GUEST_KVM */

+#endif /* CONFIG_INTEL_TDX_GUEST_KVM */
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_TDX_H */
diff --git a/arch/x86/kernel/tdx.c b/arch/x86/kernel/tdx.c
index ca3442b7accf..4a84487ee8ff 100644
--- a/arch/x86/kernel/tdx.c
+++ b/arch/x86/kernel/tdx.c
@@ -202,6 +202,42 @@ static void tdg_handle_cpuid(struct pt_regs *regs)
regs->dx = out.r15;
}

+void tdg_out(int size, int port, unsigned int value)
+{
+ tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 1, port, value);
+}
+
+unsigned int tdg_in(int size, int port)
+{
+ struct tdx_hypercall_output out = {0};
+ u64 err;
+
+ err = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 0,
+ port, 0, &out);
+
+ return err ? UINT_MAX : out.r11;
+}
+
+static void tdg_handle_io(struct pt_regs *regs, u32 exit_qual)
+{
+ bool string = exit_qual & 16;
+ int out, size, port;
+
+ /* I/O strings ops are unrolled at build time. */
+ BUG_ON(string);
+
+ out = VE_GET_IO_TYPE(exit_qual);
+ size = VE_GET_IO_SIZE(exit_qual);
+ port = VE_GET_PORT_NUM(exit_qual);
+
+ if (out) {
+ tdg_out(size, port, regs->ax);
+ } else {
+ regs->ax &= ~GENMASK(8 * size, 0);
+ regs->ax |= tdg_in(size, port) & GENMASK(8 * size, 0);
+ }
+}
+
unsigned long tdg_get_ve_info(struct ve_info *ve)
{
u64 ret;
@@ -248,6 +284,9 @@ int tdg_handle_virtualization_exception(struct pt_regs *regs,
case EXIT_REASON_CPUID:
tdg_handle_cpuid(regs);
break;
+ case EXIT_REASON_IO_INSTRUCTION:
+ tdg_handle_io(regs, ve->exit_qual);
+ break;
default:
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
return -EFAULT;
--
2.25.1


2021-06-05 18:55:45

by Dan Williams

[permalink] [raw]
Subject: Re: [RFC v2-fix-v1 3/3] x86/tdx: Handle port I/O

On Wed, May 26, 2021 at 9:24 PM Kuppuswamy Sathyanarayanan
<[email protected]> wrote:
>
> From: "Kirill A. Shutemov" <[email protected]>
>
> TDX hypervisors cannot emulate instructions directly. This
> includes port IO which is normally emulated in the hypervisor.
> All port IO instructions inside TDX trigger the #VE exception
> in the guest and would be normally emulated there.
>
> For the really early code in the decompressor, #VE cannot be
> used because the IDT needed for handling the exception is not
> set-up, and some other infrastructure needed by the handler
> is missing. So to support port IO in decompressor code, add
> support for paravirt based I/O port virtualization.
>
> Also string I/O is not supported in TDX guest. So, unroll the
> string I/O operation into a loop operating on one element at
> a time. This method is similar to AMD SEV, so just extend the
> support for TDX guest platform.

Given early port IO is broken out in its own previous I think it makes
sense to break out the decompressor port IO enabling from final
runtime port IO support.

The argument in the previous patch about using #VE emulation in the
early code was collisions with trace and printk support in the "fully
featured" #VE handler later in the series. My interpretation of that
collision was due to the possibility of the #VE handler going into
infinite recursion if a printk in the handler triggered port IO. It
seems I do not have the right picture of the constraints. Given the
runtime kernel can direct replace in/out macros I would expect a
statement of the tradeoff with #VE emulation and why the post
decompressor code is still using emulation.

>
> Co-developed-by: Kuppuswamy Sathyanarayanan <[email protected]>
> Signed-off-by: Kuppuswamy Sathyanarayanan <[email protected]>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Reviewed-by: Andi Kleen <[email protected]>
> ---
> arch/x86/boot/compressed/Makefile | 1 +
> arch/x86/boot/compressed/tdcall.S | 3 ++
> arch/x86/boot/compressed/tdx.c | 28 ++++++++++++++++++
> arch/x86/include/asm/io.h | 7 +++--
> arch/x86/include/asm/tdx.h | 47 ++++++++++++++++++++++++++++++-
> arch/x86/kernel/tdx.c | 39 +++++++++++++++++++++++++
> 6 files changed, 122 insertions(+), 3 deletions(-)
> create mode 100644 arch/x86/boot/compressed/tdcall.S
>
> diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
> index a2554621cefe..a944a2038797 100644
> --- a/arch/x86/boot/compressed/Makefile
> +++ b/arch/x86/boot/compressed/Makefile
> @@ -97,6 +97,7 @@ endif
>
> vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
> vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o
> +vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdcall.o
>
> vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
> efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
> diff --git a/arch/x86/boot/compressed/tdcall.S b/arch/x86/boot/compressed/tdcall.S
> new file mode 100644
> index 000000000000..aafadc136c88
> --- /dev/null
> +++ b/arch/x86/boot/compressed/tdcall.S
> @@ -0,0 +1,3 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#include "../../kernel/tdcall.S"
> diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
> index 0a87c1775b67..cb20962c7da6 100644
> --- a/arch/x86/boot/compressed/tdx.c
> +++ b/arch/x86/boot/compressed/tdx.c
> @@ -4,6 +4,8 @@
> */
>
> #include <asm/tdx.h>
> +#include <asm/vmx.h>
> +#include <vdso/limits.h>
>
> static int __ro_after_init tdx_guest = -1;
>
> @@ -30,3 +32,29 @@ bool is_tdx_guest(void)
> return !!tdx_guest;
> }
>
> +/*
> + * Helper function used for making hypercall for "out"
> + * instruction. It will be called from __out IO
> + * macro (in tdx.h).
> + */
> +void tdg_out(int size, int port, unsigned int value)
> +{
> + __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 1,
> + port, value, NULL);
> +}
> +
> +/*
> + * Helper function used for making hypercall for "in"
> + * instruction. It will be called from __in IO macro
> + * (in tdx.h). If IO is failed, it will return all 1s.
> + */
> +unsigned int tdg_in(int size, int port)
> +{
> + struct tdx_hypercall_output out = {0};
> + int err;
> +
> + err = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 0,
> + port, 0, &out);
> +
> + return err ? UINT_MAX : out.r11;
> +}

The previous patch open coded tdg_{in,out} and this one provides
helpers. I think at a minimum they should be consistent and pick one
style.

> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
> index ef7a686a55a9..daa75c8eef5d 100644
> --- a/arch/x86/include/asm/io.h
> +++ b/arch/x86/include/asm/io.h
> @@ -40,6 +40,7 @@
>
> #include <linux/string.h>
> #include <linux/compiler.h>
> +#include <linux/protected_guest.h>
> #include <asm/page.h>
> #include <asm/early_ioremap.h>
> #include <asm/pgtable_types.h>
> @@ -309,7 +310,8 @@ static inline unsigned type in##bwl##_p(int port) \
> \
> static inline void outs##bwl(int port, const void *addr, unsigned long count) \
> { \
> - if (sev_key_active()) { \
> + if (sev_key_active() || \
> + protected_guest_has(VM_UNROLL_STRING_IO)) { \
> unsigned type *value = (unsigned type *)addr; \
> while (count) { \
> out##bwl(*value, port); \
> @@ -325,7 +327,8 @@ static inline void outs##bwl(int port, const void *addr, unsigned long count) \
> \
> static inline void ins##bwl(int port, void *addr, unsigned long count) \
> { \
> - if (sev_key_active()) { \
> + if (sev_key_active() || \
> + protected_guest_has(VM_UNROLL_STRING_IO)) { \
> unsigned type *value = (unsigned type *)addr; \
> while (count) { \
> *value = in##bwl(port); \
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index e880a9dd40d3..6ba2dcea533f 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -5,6 +5,8 @@
>
> #define TDX_CPUID_LEAF_ID 0x21
>
> +#ifndef __ASSEMBLY__
> +
> #ifdef CONFIG_INTEL_TDX_GUEST
>
> #include <asm/cpufeature.h>
> @@ -74,6 +76,48 @@ u64 __tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15,
> bool tdx_protected_guest_has(unsigned long flag);
> bool tdg_early_handle_ve(struct pt_regs *regs);
>
> +void tdg_out(int size, int port, unsigned int value);
> +unsigned int tdg_in(int size, int port);
> +
> +/* Helper function for converting {b,w,l} to byte size */
> +static inline int tdx_get_iosize(char *str)
> +{
> + if (str[0] == 'w')
> + return 2;
> + else if (str[0] == 'l')
> + return 4;
> +
> + return 1;
> +}

This seems like an unnecessary novelty. The BUILDIO() macro in
arch/x86/include/asm/io.h takes a type argument, why can't the size be
explicitly specified rather than inferred from string parsing?

> +
> +/*
> + * To support I/O port access in decompressor or early kernel init
> + * code, since #VE exception handler cannot be used, use paravirt
> + * model to implement __in/__out macros which will in turn be used
> + * by in{b,w,l}()/out{b,w,l} I/O helper macros used in kernel. You
> + * can find the __in/__out macro usage in arch/x86/include/asm/io.h
> + */
> +#ifdef BOOT_COMPRESSED_MISC_H
> +#define __out(bwl, bw) \
> +do { \
> + if (is_tdx_guest()) { \
> + tdg_out(tdx_get_iosize(#bwl), port, value); \
> + } else { \
> + asm volatile("out" #bwl " %" #bw "0, %w1" : : \
> + "a"(value), "Nd"(port)); \
> + } \
> +} while (0)
> +#define __in(bwl, bw) \
> +do { \
> + if (is_tdx_guest()) { \
> + value = tdg_in(tdx_get_iosize(#bwl), port); \
> + } else { \
> + asm volatile("in" #bwl " %w1, %" #bw "0" : \
> + "=a"(value) : "Nd"(port)); \
> + } \
> +} while (0)
> +#endif
> +
> #else // !CONFIG_INTEL_TDX_GUEST
>
> static inline bool is_tdx_guest(void)
> @@ -161,6 +205,7 @@ static inline long tdx_kvm_hypercall4(unsigned int nr, unsigned long p1,
> {
> return -ENODEV;
> }
> -#endif /* CONFIG_INTEL_TDX_GUEST_KVM */
>
> +#endif /* CONFIG_INTEL_TDX_GUEST_KVM */
> +#endif /* __ASSEMBLY__ */
> #endif /* _ASM_X86_TDX_H */
> diff --git a/arch/x86/kernel/tdx.c b/arch/x86/kernel/tdx.c
> index ca3442b7accf..4a84487ee8ff 100644
> --- a/arch/x86/kernel/tdx.c
> +++ b/arch/x86/kernel/tdx.c
> @@ -202,6 +202,42 @@ static void tdg_handle_cpuid(struct pt_regs *regs)
> regs->dx = out.r15;
> }
>
> +void tdg_out(int size, int port, unsigned int value)
> +{
> + tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 1, port, value);
> +}
> +
> +unsigned int tdg_in(int size, int port)
> +{
> + struct tdx_hypercall_output out = {0};
> + u64 err;
> +
> + err = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 0,
> + port, 0, &out);
> +
> + return err ? UINT_MAX : out.r11;
> +}
> +
> +static void tdg_handle_io(struct pt_regs *regs, u32 exit_qual)
> +{
> + bool string = exit_qual & 16;
> + int out, size, port;
> +
> + /* I/O strings ops are unrolled at build time. */
> + BUG_ON(string);

...and here is why I think the WBINVD patch is bogus, or at least
inconsistent with the decision taken here. If it's ok to BUG_ON
instructions that "can't happen" due to care taken to ensure build
time guarantees then it is ok to skip WBINVD handling with the same
care taken to prevent its usage at build time.

> +
> + out = VE_GET_IO_TYPE(exit_qual);
> + size = VE_GET_IO_SIZE(exit_qual);
> + port = VE_GET_PORT_NUM(exit_qual);
> +
> + if (out) {
> + tdg_out(size, port, regs->ax);
> + } else {
> + regs->ax &= ~GENMASK(8 * size, 0);
> + regs->ax |= tdg_in(size, port) & GENMASK(8 * size, 0);
> + }
> +}
> +
> unsigned long tdg_get_ve_info(struct ve_info *ve)
> {
> u64 ret;
> @@ -248,6 +284,9 @@ int tdg_handle_virtualization_exception(struct pt_regs *regs,
> case EXIT_REASON_CPUID:
> tdg_handle_cpuid(regs);
> break;
> + case EXIT_REASON_IO_INSTRUCTION:
> + tdg_handle_io(regs, ve->exit_qual);
> + break;
> default:
> pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
> return -EFAULT;
> --
> 2.25.1
>

Subject: Re: [RFC v2-fix-v1 3/3] x86/tdx: Handle port I/O



On 6/5/21 11:52 AM, Dan Williams wrote:
> On Wed, May 26, 2021 at 9:24 PM Kuppuswamy Sathyanarayanan
> <[email protected]> wrote:
>>
>> From: "Kirill A. Shutemov" <[email protected]>
>>
>> TDX hypervisors cannot emulate instructions directly. This
>> includes port IO which is normally emulated in the hypervisor.
>> All port IO instructions inside TDX trigger the #VE exception
>> in the guest and would be normally emulated there.
>>
>> For the really early code in the decompressor, #VE cannot be
>> used because the IDT needed for handling the exception is not
>> set-up, and some other infrastructure needed by the handler
>> is missing. So to support port IO in decompressor code, add
>> support for paravirt based I/O port virtualization.
>>
>> Also string I/O is not supported in TDX guest. So, unroll the
>> string I/O operation into a loop operating on one element at
>> a time. This method is similar to AMD SEV, so just extend the
>> support for TDX guest platform.
>
> Given early port IO is broken out in its own previous I think it makes
> sense to break out the decompressor port IO enabling from final
> runtime port IO support.

Patch titled "x86/tdx: Handle early IO operations" mainly adds
IO #VE support in early exception handler. Decompression code IO
support does not have dependency on it. You still think it is
better to move it that patch?

>
> The argument in the previous patch about using #VE emulation in the
> early code was collisions with trace and printk support in the "fully
> featured" #VE handler later in the series. My interpretation of that
> collision was due to the possibility of the #VE handler going into
> infinite recursion if a printk in the handler triggered port IO. It

No. AFAIK, It has nothing to do with infinite recursion. We are just
highlighting the fact that when kernel uses early exception handler
support, we cannot use code path that enables tracing support. So we
use simplest way to trigger IO hypercalls.

if (early #VE exception path)
handle_io_ve()
__tdx_hypercall

if (normal #VE path)
handle_io_ve()
__tdx_hypercall (current version)
// Later on when adding tracing support, we will replace it
// with trace hypercalls.
__trace_tdx_hypercall

As you can see in above design flow, later on when adding tracing
support we will have split the early #IO handling code from
normal IO handling code. So instead of using common code now and
refactor it later on, we just use different code path for both
of them.

> seems I do not have the right picture of the constraints. Given the
> runtime kernel can direct replace in/out macros I would expect a
> statement of the tradeoff with #VE emulation and why the post
> decompressor code is still using emulation.

Currently decompression code cannot use #VE based IO emulation. It does
not know how to handle #VE exceptions. Also, It is much easier to replace
IO calls with TDX hypercalls in decompression code when compared with
teaching how to handle #VE exceptions in decompression code.

>
>>
>> Co-developed-by: Kuppuswamy Sathyanarayanan <[email protected]>
>> Signed-off-by: Kuppuswamy Sathyanarayanan <[email protected]>
>> Signed-off-by: Kirill A. Shutemov <[email protected]>
>> Reviewed-by: Andi Kleen <[email protected]>
>> ---
>> arch/x86/boot/compressed/Makefile | 1 +
>> arch/x86/boot/compressed/tdcall.S | 3 ++
>> arch/x86/boot/compressed/tdx.c | 28 ++++++++++++++++++
>> arch/x86/include/asm/io.h | 7 +++--
>> arch/x86/include/asm/tdx.h | 47 ++++++++++++++++++++++++++++++-
>> arch/x86/kernel/tdx.c | 39 +++++++++++++++++++++++++
>> 6 files changed, 122 insertions(+), 3 deletions(-)
>> create mode 100644 arch/x86/boot/compressed/tdcall.S
>>
>> diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
>> index a2554621cefe..a944a2038797 100644
>> --- a/arch/x86/boot/compressed/Makefile
>> +++ b/arch/x86/boot/compressed/Makefile
>> @@ -97,6 +97,7 @@ endif
>>

>> static int __ro_after_init tdx_guest = -1;
>>
>> @@ -30,3 +32,29 @@ bool is_tdx_guest(void)
>> return !!tdx_guest;
>> }
>>
>> +/*
>> + * Helper function used for making hypercall for "out"
>> + * instruction. It will be called from __out IO
>> + * macro (in tdx.h).
>> + */
>> +void tdg_out(int size, int port, unsigned int value)
>> +{
>> + __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 1,
>> + port, value, NULL);
>> +}
>> +
>> +/*
>> + * Helper function used for making hypercall for "in"
>> + * instruction. It will be called from __in IO macro
>> + * (in tdx.h). If IO is failed, it will return all 1s.
>> + */
>> +unsigned int tdg_in(int size, int port)
>> +{
>> + struct tdx_hypercall_output out = {0};
>> + int err;
>> +
>> + err = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 0,
>> + port, 0, &out);
>> +
>> + return err ? UINT_MAX : out.r11;
>> +}
>
> The previous patch open coded tdg_{in,out} and this one provides
> helpers. I think at a minimum they should be consistent and pick one
> style.

As I have mentioned above, early IO #VE handler is a special case. we
don't want to complicate its code path with debug or tracing support.
So it is not a good comparison target.

In this case, the reason for adding helper function is to make it easier
for calling it from tdx.h.

>
>> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
>> index ef7a686a55a9..daa75c8eef5d 100644
>> --- a/arch/x86/include/asm/io.h
>> +++ b/arch/x86/include/asm/io.h
>> @@ -40,6 +40,7 @@
>>

snip

>> +
>> +/* Helper function for converting {b,w,l} to byte size */
>> +static inline int tdx_get_iosize(char *str)
>> +{
>> + if (str[0] == 'w')
>> + return 2;
>> + else if (str[0] == 'l')
>> + return 4;
>> +
>> + return 1;
>> +}
>
> This seems like an unnecessary novelty. The BUILDIO() macro in
> arch/x86/include/asm/io.h takes a type argument, why can't the size be
> explicitly specified rather than inferred from string parsing?

I don't want to make changes to generic macros in io.h if it can be
avoided. It follows similar argument/type in all arch/* code. Also, it
is easier to handle TDX as a special case here.


--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

2021-06-05 21:11:38

by Dan Williams

[permalink] [raw]
Subject: Re: [RFC v2-fix-v1 3/3] x86/tdx: Handle port I/O

On Sat, Jun 5, 2021 at 1:08 PM Kuppuswamy, Sathyanarayanan
<[email protected]> wrote:
>
>
>
> On 6/5/21 11:52 AM, Dan Williams wrote:
> > On Wed, May 26, 2021 at 9:24 PM Kuppuswamy Sathyanarayanan
> > <[email protected]> wrote:
> >>
> >> From: "Kirill A. Shutemov" <[email protected]>
> >>
> >> TDX hypervisors cannot emulate instructions directly. This
> >> includes port IO which is normally emulated in the hypervisor.
> >> All port IO instructions inside TDX trigger the #VE exception
> >> in the guest and would be normally emulated there.
> >>
> >> For the really early code in the decompressor, #VE cannot be
> >> used because the IDT needed for handling the exception is not
> >> set-up, and some other infrastructure needed by the handler
> >> is missing. So to support port IO in decompressor code, add
> >> support for paravirt based I/O port virtualization.
> >>
> >> Also string I/O is not supported in TDX guest. So, unroll the
> >> string I/O operation into a loop operating on one element at
> >> a time. This method is similar to AMD SEV, so just extend the
> >> support for TDX guest platform.
> >
> > Given early port IO is broken out in its own previous I think it makes
> > sense to break out the decompressor port IO enabling from final
> > runtime port IO support.
>
> Patch titled "x86/tdx: Handle early IO operations" mainly adds
> IO #VE support in early exception handler. Decompression code IO
> support does not have dependency on it. You still think it is
> better to move it that patch?
>

No, I was suggesting three patches instead of 2:

early
decompressor
final-runtime

> >
> > The argument in the previous patch about using #VE emulation in the
> > early code was collisions with trace and printk support in the "fully
> > featured" #VE handler later in the series. My interpretation of that
> > collision was due to the possibility of the #VE handler going into
> > infinite recursion if a printk in the handler triggered port IO. It
>
> No. AFAIK, It has nothing to do with infinite recursion. We are just
> highlighting the fact that when kernel uses early exception handler
> support, we cannot use code path that enables tracing support. So we
> use simplest way to trigger IO hypercalls.

Ok, then how does this approach handle printk from the #VE handler if
printk issues port IO?

>
> if (early #VE exception path)
> handle_io_ve()
> __tdx_hypercall
>
> if (normal #VE path)
> handle_io_ve()
> __tdx_hypercall (current version)
> // Later on when adding tracing support, we will replace it
> // with trace hypercalls.
> __trace_tdx_hypercall
>
> As you can see in above design flow, later on when adding tracing
> support we will have split the early #IO handling code from
> normal IO handling code. So instead of using common code now and
> refactor it later on, we just use different code path for both
> of them.

Could you put that in the changelog, it was non-obvious to me.

> > seems I do not have the right picture of the constraints. Given the
> > runtime kernel can direct replace in/out macros I would expect a
> > statement of the tradeoff with #VE emulation and why the post
> > decompressor code is still using emulation.
>
> Currently decompression code cannot use #VE based IO emulation. It does
> not know how to handle #VE exceptions. Also, It is much easier to replace
> IO calls with TDX hypercalls in decompression code when compared with
> teaching how to handle #VE exceptions in decompression code.

Ok, but that does not answer the background behind the decision to use
emulation rather than direct replacement of port IO instructions in
the final kernel runtime image.

This patch mixes those 2 concerns and I think it deserves to be broken
out and explained.

>
> >
> >>
> >> Co-developed-by: Kuppuswamy Sathyanarayanan <[email protected]>
> >> Signed-off-by: Kuppuswamy Sathyanarayanan <[email protected]>
> >> Signed-off-by: Kirill A. Shutemov <[email protected]>
> >> Reviewed-by: Andi Kleen <[email protected]>
> >> ---
> >> arch/x86/boot/compressed/Makefile | 1 +
> >> arch/x86/boot/compressed/tdcall.S | 3 ++
> >> arch/x86/boot/compressed/tdx.c | 28 ++++++++++++++++++
> >> arch/x86/include/asm/io.h | 7 +++--
> >> arch/x86/include/asm/tdx.h | 47 ++++++++++++++++++++++++++++++-
> >> arch/x86/kernel/tdx.c | 39 +++++++++++++++++++++++++
> >> 6 files changed, 122 insertions(+), 3 deletions(-)
> >> create mode 100644 arch/x86/boot/compressed/tdcall.S
> >>
> >> diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
> >> index a2554621cefe..a944a2038797 100644
> >> --- a/arch/x86/boot/compressed/Makefile
> >> +++ b/arch/x86/boot/compressed/Makefile
> >> @@ -97,6 +97,7 @@ endif
> >>
>
> >> static int __ro_after_init tdx_guest = -1;
> >>
> >> @@ -30,3 +32,29 @@ bool is_tdx_guest(void)
> >> return !!tdx_guest;
> >> }
> >>
> >> +/*
> >> + * Helper function used for making hypercall for "out"
> >> + * instruction. It will be called from __out IO
> >> + * macro (in tdx.h).
> >> + */
> >> +void tdg_out(int size, int port, unsigned int value)
> >> +{
> >> + __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 1,
> >> + port, value, NULL);
> >> +}
> >> +
> >> +/*
> >> + * Helper function used for making hypercall for "in"
> >> + * instruction. It will be called from __in IO macro
> >> + * (in tdx.h). If IO is failed, it will return all 1s.
> >> + */
> >> +unsigned int tdg_in(int size, int port)
> >> +{
> >> + struct tdx_hypercall_output out = {0};
> >> + int err;
> >> +
> >> + err = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 0,
> >> + port, 0, &out);
> >> +
> >> + return err ? UINT_MAX : out.r11;
> >> +}
> >
> > The previous patch open coded tdg_{in,out} and this one provides
> > helpers. I think at a minimum they should be consistent and pick one
> > style.
>
> As I have mentioned above, early IO #VE handler is a special case. we
> don't want to complicate its code path with debug or tracing support.
> So it is not a good comparison target.

This patch and the last do the same thing in 2 different ways. One of
them should match the other even if the helpers are not directly
reused.

> In this case, the reason for adding helper function is to make it easier
> for calling it from tdx.h.
> >
> >> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
> >> index ef7a686a55a9..daa75c8eef5d 100644
> >> --- a/arch/x86/include/asm/io.h
> >> +++ b/arch/x86/include/asm/io.h
> >> @@ -40,6 +40,7 @@
> >>
>
> snip
>
> >> +
> >> +/* Helper function for converting {b,w,l} to byte size */
> >> +static inline int tdx_get_iosize(char *str)
> >> +{
> >> + if (str[0] == 'w')
> >> + return 2;
> >> + else if (str[0] == 'l')
> >> + return 4;
> >> +
> >> + return 1;
> >> +}
> >
> > This seems like an unnecessary novelty. The BUILDIO() macro in
> > arch/x86/include/asm/io.h takes a type argument, why can't the size be
> > explicitly specified rather than inferred from string parsing?
>
> I don't want to make changes to generic macros in io.h if it can be
> avoided. It follows similar argument/type in all arch/* code. Also, it
> is easier to handle TDX as a special case here.
>

What changes are you talking about to the generic macros? The BUILDIO
macro passes in a size parameter explicitly rather than inferring the
size from the string name of an argument. BUILDIO does not need to
change, it's backend just needs to do the right thing in the TDX case.

Otherwise, "I don't want to" is not a sufficient justification for
avoiding needlessly new design patterns.

Subject: Re: [RFC v2-fix-v1 3/3] x86/tdx: Handle port I/O



On 6/5/21 2:08 PM, Dan Williams wrote:
> On Sat, Jun 5, 2021 at 1:08 PM Kuppuswamy, Sathyanarayanan
> <[email protected]> wrote:
>>
>>
>>
>> On 6/5/21 11:52 AM, Dan Williams wrote:
>>> On Wed, May 26, 2021 at 9:24 PM Kuppuswamy Sathyanarayanan
>>> <[email protected]> wrote:
>>>>
>>>> From: "Kirill A. Shutemov" <[email protected]>
>>>>
>>>> TDX hypervisors cannot emulate instructions directly. This
>>>> includes port IO which is normally emulated in the hypervisor.
>>>> All port IO instructions inside TDX trigger the #VE exception
>>>> in the guest and would be normally emulated there.
>>>>
>>>> For the really early code in the decompressor, #VE cannot be
>>>> used because the IDT needed for handling the exception is not
>>>> set-up, and some other infrastructure needed by the handler
>>>> is missing. So to support port IO in decompressor code, add
>>>> support for paravirt based I/O port virtualization.
>>>>
>>>> Also string I/O is not supported in TDX guest. So, unroll the
>>>> string I/O operation into a loop operating on one element at
>>>> a time. This method is similar to AMD SEV, so just extend the
>>>> support for TDX guest platform.
>>>
>>> Given early port IO is broken out in its own previous I think it makes
>>> sense to break out the decompressor port IO enabling from final
>>> runtime port IO support.
>>
>> Patch titled "x86/tdx: Handle early IO operations" mainly adds
>> IO #VE support in early exception handler. Decompression code IO
>> support does not have dependency on it. You still think it is
>> better to move it that patch?
>>
>
> No, I was suggesting three patches instead of 2:

Ok. I will move it to separate patch.

>

snip

>>
>> Currently decompression code cannot use #VE based IO emulation. It does
>> not know how to handle #VE exceptions. Also, It is much easier to replace
>> IO calls with TDX hypercalls in decompression code when compared with
>> teaching how to handle #VE exceptions in decompression code.
>
> Ok, but that does not answer the background behind the decision to use
> emulation rather than direct replacement of port IO instructions in
> the final kernel runtime image.

The reason for using #VE based emulation is,

1. It does not require changes to all usages of emulated instructions in
all the drivers.
2. Directly replacing instructions with TDX hypercalls will lead to bloated
image. So we cannot universally adapt this approach.

Reason for not adapting to use #VE approach for decompression code is,

1. #VE handler support does not exist for de-compressor code.
2. Adding such support is more complex and in-efficient (just for
single use case of handling IO instructions).

So we have replaced IO instructions with TDX hypercalls in decompression code.

Did it answer your query?

>
> This patch mixes those 2 concerns and I think it deserves to be broken
> out and explained.
>



>>>> +/*
>>>> + * Helper function used for making hypercall for "out"
>>>> + * instruction. It will be called from __out IO
>>>> + * macro (in tdx.h).
>>>> + */
>>>> +void tdg_out(int size, int port, unsigned int value)
>>>> +{
>>>> + __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 1,
>>>> + port, value, NULL);
>>>> +}
>>>> +
>>>> +/*
>>>> + * Helper function used for making hypercall for "in"
>>>> + * instruction. It will be called from __in IO macro
>>>> + * (in tdx.h). If IO is failed, it will return all 1s.
>>>> + */
>>>> +unsigned int tdg_in(int size, int port)
>>>> +{
>>>> + struct tdx_hypercall_output out = {0};
>>>> + int err;
>>>> +
>>>> + err = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 0,
>>>> + port, 0, &out);
>>>> +
>>>> + return err ? UINT_MAX : out.r11;
>>>> +}
>>>
>>> The previous patch open coded tdg_{in,out} and this one provides
>>> helpers. I think at a minimum they should be consistent and pick one
>>> style.
>>
>> As I have mentioned above, early IO #VE handler is a special case. we
>> don't want to complicate its code path with debug or tracing support.
>> So it is not a good comparison target.
>
> This patch and the last do the same thing in 2 different ways. One of
> them should match the other even if the helpers are not directly
> reused.

I am not sure whether I understand your question. But if the question is
about different implementation, the difference is due to where it is
being called.

In early IO support patch, tdx_early_io() is called from #VE handler.
So there is extra buffer code to extract exception information from
struct ve_info.

In this case, the caller is __in/__out macros. So there is no need
for above mentioned buffer code.

Actual hypercall usage is similar in both cases.

>
>> In this case, the reason for adding helper function is to make it easier
>> for calling it from tdx.h.
>>>
>>>> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
>>>> index ef7a686a55a9..daa75c8eef5d 100644
>>>> --- a/arch/x86/include/asm/io.h
>>>> +++ b/arch/x86/include/asm/io.h
>>>> @@ -40,6 +40,7 @@
>>>>
>>
>> snip
>>
>>>> +
>>>> +/* Helper function for converting {b,w,l} to byte size */
>>>> +static inline int tdx_get_iosize(char *str)
>>>> +{
>>>> + if (str[0] == 'w')
>>>> + return 2;
>>>> + else if (str[0] == 'l')
>>>> + return 4;
>>>> +
>>>> + return 1;
>>>> +}
>>>
>>> This seems like an unnecessary novelty. The BUILDIO() macro in
>>> arch/x86/include/asm/io.h takes a type argument, why can't the size be
>>> explicitly specified rather than inferred from string parsing?
>>
>> I don't want to make changes to generic macros in io.h if it can be
>> avoided. It follows similar argument/type in all arch/* code. Also, it
>> is easier to handle TDX as a special case here.
>>
>
> What changes are you talking about to the generic macros? The BUILDIO
> macro passes in a size parameter explicitly rather than inferring the
> size from the string name of an argument. BUILDIO does not need to
> change, it's backend just needs to do the right thing in the TDX case.
>
> Otherwise, "I don't want to" is not a sufficient justification for
> avoiding needlessly new design patterns.

I hope this is what you meant?

--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -273,25 +273,25 @@ static inline bool sev_key_active(void) { return false; }
#endif /* CONFIG_AMD_MEM_ENCRYPT */

#ifndef __out
-#define __out(bwl, bw) \
+#define __out(bwl, bw, sz) \
asm volatile("out" #bwl " %" #bw "0, %w1" : : "a"(value), "Nd"(port))
#endif

#ifndef __in
-#define __in(bwl, bw) \
+#define __in(bwl, bw, sz) \
asm volatile("in" #bwl " %w1, %" #bw "0" : "=a"(value) : "Nd"(port))
#endif

#define BUILDIO(bwl, bw, type) \
static inline void out##bwl(unsigned type value, int port) \
{ \
- __out(bwl, bw); \
+ __out(bwl, bw, sizeof(type)); \
} \
\
static inline unsigned type in##bwl(int port) \
{ \
unsigned type value; \
- __in(bwl, bw); \
+ __in(bwl, bw, sizeof(type)); \
return value; \
}

>

--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

2021-06-07 17:19:57

by Dan Williams

[permalink] [raw]
Subject: Re: [RFC v2-fix-v1 3/3] x86/tdx: Handle port I/O

On Mon, Jun 7, 2021 at 9:25 AM Kuppuswamy, Sathyanarayanan
<[email protected]> wrote:
>
>
>
> On 6/5/21 2:08 PM, Dan Williams wrote:
> > On Sat, Jun 5, 2021 at 1:08 PM Kuppuswamy, Sathyanarayanan
> > <[email protected]> wrote:
> >>
> >>
> >>
> >> On 6/5/21 11:52 AM, Dan Williams wrote:
> >>> On Wed, May 26, 2021 at 9:24 PM Kuppuswamy Sathyanarayanan
> >>> <[email protected]> wrote:
> >>>>
> >>>> From: "Kirill A. Shutemov" <[email protected]>
> >>>>
> >>>> TDX hypervisors cannot emulate instructions directly. This
> >>>> includes port IO which is normally emulated in the hypervisor.
> >>>> All port IO instructions inside TDX trigger the #VE exception
> >>>> in the guest and would be normally emulated there.
> >>>>
> >>>> For the really early code in the decompressor, #VE cannot be
> >>>> used because the IDT needed for handling the exception is not
> >>>> set-up, and some other infrastructure needed by the handler
> >>>> is missing. So to support port IO in decompressor code, add
> >>>> support for paravirt based I/O port virtualization.
> >>>>
> >>>> Also string I/O is not supported in TDX guest. So, unroll the
> >>>> string I/O operation into a loop operating on one element at
> >>>> a time. This method is similar to AMD SEV, so just extend the
> >>>> support for TDX guest platform.
> >>>
> >>> Given early port IO is broken out in its own previous I think it makes
> >>> sense to break out the decompressor port IO enabling from final
> >>> runtime port IO support.
> >>
> >> Patch titled "x86/tdx: Handle early IO operations" mainly adds
> >> IO #VE support in early exception handler. Decompression code IO
> >> support does not have dependency on it. You still think it is
> >> better to move it that patch?
> >>
> >
> > No, I was suggesting three patches instead of 2:
>
> Ok. I will move it to separate patch.
>
> >
>
> snip
>
> >>
> >> Currently decompression code cannot use #VE based IO emulation. It does
> >> not know how to handle #VE exceptions. Also, It is much easier to replace
> >> IO calls with TDX hypercalls in decompression code when compared with
> >> teaching how to handle #VE exceptions in decompression code.
> >
> > Ok, but that does not answer the background behind the decision to use
> > emulation rather than direct replacement of port IO instructions in
> > the final kernel runtime image.
>
> The reason for using #VE based emulation is,
>
> 1. It does not require changes to all usages of emulated instructions in
> all the drivers.
> 2. Directly replacing instructions with TDX hypercalls will lead to bloated
> image. So we cannot universally adapt this approach.
>
> Reason for not adapting to use #VE approach for decompression code is,
>
> 1. #VE handler support does not exist for de-compressor code.
> 2. Adding such support is more complex and in-efficient (just for
> single use case of handling IO instructions).
>
> So we have replaced IO instructions with TDX hypercalls in decompression code.
>
> Did it answer your query?

Yes, all but the concern of printk recursion.

>
> >
> > This patch mixes those 2 concerns and I think it deserves to be broken
> > out and explained.
> >
>
>
>
> >>>> +/*
> >>>> + * Helper function used for making hypercall for "out"
> >>>> + * instruction. It will be called from __out IO
> >>>> + * macro (in tdx.h).
> >>>> + */
> >>>> +void tdg_out(int size, int port, unsigned int value)
> >>>> +{
> >>>> + __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 1,
> >>>> + port, value, NULL);
> >>>> +}
> >>>> +
> >>>> +/*
> >>>> + * Helper function used for making hypercall for "in"
> >>>> + * instruction. It will be called from __in IO macro
> >>>> + * (in tdx.h). If IO is failed, it will return all 1s.
> >>>> + */
> >>>> +unsigned int tdg_in(int size, int port)
> >>>> +{
> >>>> + struct tdx_hypercall_output out = {0};
> >>>> + int err;
> >>>> +
> >>>> + err = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, 0,
> >>>> + port, 0, &out);
> >>>> +
> >>>> + return err ? UINT_MAX : out.r11;
> >>>> +}
> >>>
> >>> The previous patch open coded tdg_{in,out} and this one provides
> >>> helpers. I think at a minimum they should be consistent and pick one
> >>> style.
> >>
> >> As I have mentioned above, early IO #VE handler is a special case. we
> >> don't want to complicate its code path with debug or tracing support.
> >> So it is not a good comparison target.
> >
> > This patch and the last do the same thing in 2 different ways. One of
> > them should match the other even if the helpers are not directly
> > reused.
>
> I am not sure whether I understand your question. But if the question is
> about different implementation, the difference is due to where it is
> being called.
>
> In early IO support patch, tdx_early_io() is called from #VE handler.
> So there is extra buffer code to extract exception information from
> struct ve_info.
>
> In this case, the caller is __in/__out macros. So there is no need
> for above mentioned buffer code.
>
> Actual hypercall usage is similar in both cases.

It's similar when it can be identical. It simply looks like 2 authors
wrote 2 different code paths which is the case. Just huddle and code
it one way, I don't much care which.

>
> >
> >> In this case, the reason for adding helper function is to make it easier
> >> for calling it from tdx.h.
> >>>
> >>>> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
> >>>> index ef7a686a55a9..daa75c8eef5d 100644
> >>>> --- a/arch/x86/include/asm/io.h
> >>>> +++ b/arch/x86/include/asm/io.h
> >>>> @@ -40,6 +40,7 @@
> >>>>
> >>
> >> snip
> >>
> >>>> +
> >>>> +/* Helper function for converting {b,w,l} to byte size */
> >>>> +static inline int tdx_get_iosize(char *str)
> >>>> +{
> >>>> + if (str[0] == 'w')
> >>>> + return 2;
> >>>> + else if (str[0] == 'l')
> >>>> + return 4;
> >>>> +
> >>>> + return 1;
> >>>> +}
> >>>
> >>> This seems like an unnecessary novelty. The BUILDIO() macro in
> >>> arch/x86/include/asm/io.h takes a type argument, why can't the size be
> >>> explicitly specified rather than inferred from string parsing?
> >>
> >> I don't want to make changes to generic macros in io.h if it can be
> >> avoided. It follows similar argument/type in all arch/* code. Also, it
> >> is easier to handle TDX as a special case here.
> >>
> >
> > What changes are you talking about to the generic macros? The BUILDIO
> > macro passes in a size parameter explicitly rather than inferring the
> > size from the string name of an argument. BUILDIO does not need to
> > change, it's backend just needs to do the right thing in the TDX case.
> >
> > Otherwise, "I don't want to" is not a sufficient justification for
> > avoiding needlessly new design patterns.
>
> I hope this is what you meant?
>
> --- a/arch/x86/include/asm/io.h
> +++ b/arch/x86/include/asm/io.h
> @@ -273,25 +273,25 @@ static inline bool sev_key_active(void) { return false; }
> #endif /* CONFIG_AMD_MEM_ENCRYPT */
>
> #ifndef __out
> -#define __out(bwl, bw) \
> +#define __out(bwl, bw, sz) \
> asm volatile("out" #bwl " %" #bw "0, %w1" : : "a"(value), "Nd"(port))
> #endif
>
> #ifndef __in
> -#define __in(bwl, bw) \
> +#define __in(bwl, bw, sz) \
> asm volatile("in" #bwl " %w1, %" #bw "0" : "=a"(value) : "Nd"(port))
> #endif
>
> #define BUILDIO(bwl, bw, type) \
> static inline void out##bwl(unsigned type value, int port) \
> { \
> - __out(bwl, bw); \
> + __out(bwl, bw, sizeof(type)); \
> } \
> \
> static inline unsigned type in##bwl(int port) \
> { \
> unsigned type value; \
> - __in(bwl, bw); \
> + __in(bwl, bw, sizeof(type)); \
> return value; \
> }

Looks good.

Subject: Re: [RFC v2-fix-v1 3/3] x86/tdx: Handle port I/O



On 6/7/21 10:17 AM, Dan Williams wrote:
>> Did it answer your query?
> Yes, all but the concern of printk recursion.

I think recursion is not possible because printk will
handle it (using console_lock). If another print is
triggered during the current printk handling, it will
be directed to logbuf and delayed.

https://elixir.bootlin.com/linux/latest/source/kernel/printk/printk_safe.c#L382

>

--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

2021-06-07 22:04:20

by Dan Williams

[permalink] [raw]
Subject: Re: [RFC v2-fix-v1 3/3] x86/tdx: Handle port I/O

On Mon, Jun 7, 2021 at 2:52 PM Kuppuswamy, Sathyanarayanan
<[email protected]> wrote:
>
>
>
> On 6/7/21 10:17 AM, Dan Williams wrote:
> >> Did it answer your query?
> > Yes, all but the concern of printk recursion.
>
> I think recursion is not possible because printk will
> handle it (using console_lock). If another print is
> triggered during the current printk handling, it will
> be directed to logbuf and delayed.
>
> https://elixir.bootlin.com/linux/latest/source/kernel/printk/printk_safe.c#L382

That depends on printk_nmi_direct_enter() to set the context, wouldn't
an equivalent printk_ve_direct_enter() context flag be needed as well?

2021-06-08 03:01:36

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC v2-fix-v1 3/3] x86/tdx: Handle port I/O


ps://elixir.bootlin.com/linux/latest/source/kernel/printk/printk_safe.c#L382

> That depends on printk_nmi_direct_enter() to set the context, wouldn't
> an equivalent printk_ve_direct_enter() context flag be needed as well?

Even without it the console semaphore is always trylocked. So recursion
is just not possible.

What would be possible is a endless loop (printk adding more information
to the log buffer, which is then printed etc.), but that's true for
everywhere in the console/serial driver subsystem.


-Andi

Subject: [RFC v2-fix-v2 0/3] x86/tdx: Handle port I/O

This patchset addresses the review comments in the patch titled
"[RFC v2 14/32] x86/tdx: Handle port I/O". Since it requires
patch split, sending these together.

Changes since RFC v2-fix-v1:
* Splitted TDX decompression IO support into a seperate patch.
* Implemented tdg_handle_io() and tdx_early_io() in the similar
way as per review suggestion.
* Added VE_IS_IO_OUT() macro as per review suggestion.
* Added VE_IS_IO_STRING() to check the string I/O case in
tdx_early_io()
* Removed helper function tdg_in() and tdg_out() and directly
called IO hypercall to make the implementation uniform in
decompression code, early IO code and normal IO handler code.

Changes since RFC v2:
 * Removed assembly implementation of port IO emulation code
   and modified __in/__out IO helpers to directly call C function
   for in/out instruction emulation in decompression code.
 * Added helper function tdx_get_iosize() to make it easier for
   calling tdg_out/tdg_int() C functions from decompression code.
 * Added support for early exception handler to support IO
   instruction emulation in early boot kernel code.
 * Removed alternative_ usage and made kernel only use #VE based
   IO instruction emulation support outside the decompression module.
 * Added support for protection_guest_has() API to generalize
   AMD SEV/TDX specific initialization code in common drivers.
 * Fixed commit log and comments as per review comments.


Andi Kleen (1):
x86/tdx: Handle early IO operations

Kirill A. Shutemov (1):
x86/tdx: Handle port I/O

Kuppuswamy Sathyanarayanan (1):
x86/tdx: Handle port I/O in decompression code

arch/x86/boot/compressed/Makefile | 1 +
arch/x86/boot/compressed/tdcall.S | 3 ++
arch/x86/include/asm/io.h | 15 +++---
arch/x86/include/asm/tdx.h | 54 ++++++++++++++++++++
arch/x86/kernel/head64.c | 3 ++
arch/x86/kernel/tdx.c | 84 +++++++++++++++++++++++++++++++
6 files changed, 154 insertions(+), 6 deletions(-)
create mode 100644 arch/x86/boot/compressed/tdcall.S

--
2.25.1

Subject: [RFC v2-fix-v2 3/3] x86/tdx: Handle port I/O

From: "Kirill A. Shutemov" <[email protected]>

TDX hypervisors cannot emulate instructions directly. This
includes port IO which is normally emulated in the hypervisor.
All port IO instructions inside TDX trigger the #VE exception
in the guest and would be normally emulated there.

Also string I/O is not supported in TDX guest. So, unroll the
string I/O operation into a loop operating on one element at
a time. This method is similar to AMD SEV, so just extend the
support for TDX guest platform.

Co-developed-by: Kuppuswamy Sathyanarayanan <[email protected]>
Signed-off-by: Kuppuswamy Sathyanarayanan <[email protected]>
Signed-off-by: Kirill A. Shutemov <[email protected]>
Reviewed-by: Andi Kleen <[email protected]>
---
Changes since RFC v2-fix-v1:
* Fixed commit log to adapt to decompression support code split.

arch/x86/include/asm/io.h | 6 ++++--
arch/x86/kernel/tdx.c | 28 ++++++++++++++++++++++++++++
2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 391205dace98..e01d8bf2b37a 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -310,7 +310,8 @@ static inline unsigned type in##bwl##_p(int port) \
\
static inline void outs##bwl(int port, const void *addr, unsigned long count) \
{ \
- if (sev_key_active()) { \
+ if (sev_key_active() || \
+ protected_guest_has(VM_UNROLL_STRING_IO)) { \
unsigned type *value = (unsigned type *)addr; \
while (count) { \
out##bwl(*value, port); \
@@ -326,7 +327,8 @@ static inline void outs##bwl(int port, const void *addr, unsigned long count) \
\
static inline void ins##bwl(int port, void *addr, unsigned long count) \
{ \
- if (sev_key_active()) { \
+ if (sev_key_active() || \
+ protected_guest_has(VM_UNROLL_STRING_IO)) { \
unsigned type *value = (unsigned type *)addr; \
while (count) { \
*value = in##bwl(port); \
diff --git a/arch/x86/kernel/tdx.c b/arch/x86/kernel/tdx.c
index 3410cfc8a988..48a0cc2663ea 100644
--- a/arch/x86/kernel/tdx.c
+++ b/arch/x86/kernel/tdx.c
@@ -201,6 +201,31 @@ static void tdg_handle_cpuid(struct pt_regs *regs)
regs->dx = out.r15;
}

+/*
+ * Since the way we fail for string case is different we cannot
+ * reuse tdx_handle_early_io().
+ */
+static void tdg_handle_io(struct pt_regs *regs, u32 exit_qual)
+{
+ struct tdx_hypercall_output outh;
+ int out = VE_IS_IO_OUT(exit_qual);
+ int size = VE_GET_IO_SIZE(exit_qual);
+ int port = VE_GET_PORT_NUM(exit_qual);
+ u64 mask = GENMASK(8 * size, 0);
+ bool string = VE_IS_IO_STRING(exit_qual);
+ int ret;
+
+ /* I/O strings ops are unrolled at build time. */
+ BUG_ON(string);
+
+ ret = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, out, port,
+ regs->ax, &outh);
+ if (!out) {
+ regs->ax &= ~mask;
+ regs->ax |= (ret ? UINT_MAX : outh.r11) & mask;
+ }
+}
+
unsigned long tdg_get_ve_info(struct ve_info *ve)
{
u64 ret;
@@ -247,6 +272,9 @@ int tdg_handle_virtualization_exception(struct pt_regs *regs,
case EXIT_REASON_CPUID:
tdg_handle_cpuid(regs);
break;
+ case EXIT_REASON_IO_INSTRUCTION:
+ tdg_handle_io(regs, ve->exit_qual);
+ break;
default:
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
return -EFAULT;
--
2.25.1