Subject: [PATCH v1 0/5] Add TDX Guest Support (boot fixes)

Hi All,

Intel's Trust Domain Extensions (TDX) protect guest VMs from malicious
hosts and some physical attacks. This series adds boot code support
and some additional fixes required for successful boot of TDX guest.

This series is the continuation of the patch series titled "Add TDX Guest
Support (Initial support)" and "Add TDX Guest Support (#VE handler support
)", which added initial support and #VE handler support for TDX guests. You
can find the related patchsets in the following links.

https://lore.kernel.org/patchwork/project/lkml/list/?series=502143
https://lore.kernel.org/patchwork/project/lkml/list/?series=503701

Also please note that this series alone is not necessarily fully
functional.

You can find TDX related documents in the following link.

https://software.intel.com/content/www/br/pt/develop/articles/intel-trust-domain-extensions.html

Kuppuswamy Sathyanarayanan (2):
x86/topology: Disable CPU online/offline control for TDX guest
x86: Skip WBINVD instruction for VM guest

Sean Christopherson (3):
x86/boot: Add a trampoline for APs booting in 64-bit mode
x86/boot: Avoid #VE during boot for TDX platforms
x86/tdx: Forcefully disable legacy PIC for TDX guests

arch/x86/boot/compressed/head_64.S | 16 +++++--
arch/x86/boot/compressed/pgtable.h | 2 +-
arch/x86/include/asm/acenv.h | 7 ++-
arch/x86/include/asm/realmode.h | 11 +++++
arch/x86/kernel/head_64.S | 20 +++++++-
arch/x86/kernel/smpboot.c | 2 +-
arch/x86/kernel/tdx.c | 17 +++++++
arch/x86/kernel/topology.c | 3 +-
arch/x86/realmode/rm/header.S | 1 +
arch/x86/realmode/rm/trampoline_64.S | 59 ++++++++++++++++++++++--
arch/x86/realmode/rm/trampoline_common.S | 12 ++++-
11 files changed, 136 insertions(+), 14 deletions(-)

--
2.25.1


Subject: [PATCH v1 1/5] x86/boot: Add a trampoline for APs booting in 64-bit mode

From: Sean Christopherson <[email protected]>

Add a trampoline for booting APs in 64-bit mode via a software handoff
with BIOS, and use the new trampoline for the ACPI MP wake protocol used
by TDX. You can find MADT MP wake protocol details in ACPI specification
r6.4, sec 5.2.12.19.

Extend the real mode IDT pointer by four bytes to support LIDT in 64-bit
mode. For the GDT pointer, create a new entry as the existing storage
for the pointer occupies the zero entry in the GDT itself.

Reported-by: Kai Huang <[email protected]>
Signed-off-by: Sean Christopherson <[email protected]>
Reviewed-by: Andi Kleen <[email protected]>
Reviewed-by: Dan Williams <[email protected]>
Signed-off-by: Kuppuswamy Sathyanarayanan <[email protected]>
---
arch/x86/include/asm/realmode.h | 11 +++++++
arch/x86/kernel/smpboot.c | 2 +-
arch/x86/realmode/rm/header.S | 1 +
arch/x86/realmode/rm/trampoline_64.S | 38 ++++++++++++++++++++++++
arch/x86/realmode/rm/trampoline_common.S | 12 +++++++-
5 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 5db5d083c873..0f707521b797 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -25,6 +25,7 @@ struct real_mode_header {
u32 sev_es_trampoline_start;
#endif
#ifdef CONFIG_X86_64
+ u32 trampoline_start64;
u32 trampoline_pgd;
#endif
/* ACPI S3 wakeup */
@@ -88,6 +89,16 @@ static inline void set_real_mode_mem(phys_addr_t mem)
real_mode_header = (struct real_mode_header *) __va(mem);
}

+/* Common helper function to get start IP address */
+static inline unsigned long get_trampoline_start_ip(struct real_mode_header *rmh)
+{
+#ifdef CONFIG_X86_64
+ if (is_tdx_guest())
+ return rmh->trampoline_start64;
+#endif
+ return rmh->trampoline_start;
+}
+
void reserve_real_mode(void);

#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7770245cc7fa..3cde58849d70 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1033,7 +1033,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
int *cpu0_nmi_registered)
{
/* start_ip had better be page-aligned! */
- unsigned long start_ip = real_mode_header->trampoline_start;
+ unsigned long start_ip = get_trampoline_start_ip(real_mode_header);

unsigned long boot_error = 0;
unsigned long timeout;
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
index 8c1db5bf5d78..2eb62be6d256 100644
--- a/arch/x86/realmode/rm/header.S
+++ b/arch/x86/realmode/rm/header.S
@@ -24,6 +24,7 @@ SYM_DATA_START(real_mode_header)
.long pa_sev_es_trampoline_start
#endif
#ifdef CONFIG_X86_64
+ .long pa_trampoline_start64
.long pa_trampoline_pgd;
#endif
/* ACPI S3 wakeup */
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index cc8391f86cdb..ae112a91592f 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -161,6 +161,19 @@ SYM_CODE_START(startup_32)
ljmpl $__KERNEL_CS, $pa_startup_64
SYM_CODE_END(startup_32)

+SYM_CODE_START(pa_trampoline_compat)
+ /*
+ * In compatibility mode. Prep ESP and DX for startup_32, then disable
+ * paging and complete the switch to legacy 32-bit mode.
+ */
+ movl $rm_stack_end, %esp
+ movw $__KERNEL_DS, %dx
+
+ movl $X86_CR0_PE, %eax
+ movl %eax, %cr0
+ ljmpl $__KERNEL32_CS, $pa_startup_32
+SYM_CODE_END(pa_trampoline_compat)
+
.section ".text64","ax"
.code64
.balign 4
@@ -169,6 +182,20 @@ SYM_CODE_START(startup_64)
jmpq *tr_start(%rip)
SYM_CODE_END(startup_64)

+SYM_CODE_START(trampoline_start64)
+ /*
+ * APs start here on a direct transfer from 64-bit BIOS with identity
+ * mapped page tables. Load the kernel's GDT in order to gear down to
+ * 32-bit mode (to handle 4-level vs. 5-level paging), and to (re)load
+ * segment registers. Load the zero IDT so any fault triggers a
+ * shutdown instead of jumping back into BIOS.
+ */
+ lidt tr_idt(%rip)
+ lgdt tr_gdt64(%rip)
+
+ ljmpl *tr_compat(%rip)
+SYM_CODE_END(trampoline_start64)
+
.section ".rodata","a"
# Duplicate the global descriptor table
# so the kernel can live anywhere
@@ -182,6 +209,17 @@ SYM_DATA_START(tr_gdt)
.quad 0x00cf93000000ffff # __KERNEL_DS
SYM_DATA_END_LABEL(tr_gdt, SYM_L_LOCAL, tr_gdt_end)

+SYM_DATA_START(tr_gdt64)
+ .short tr_gdt_end - tr_gdt - 1 # gdt limit
+ .long pa_tr_gdt
+ .long 0
+SYM_DATA_END(tr_gdt64)
+
+SYM_DATA_START(tr_compat)
+ .long pa_trampoline_compat
+ .short __KERNEL32_CS
+SYM_DATA_END(tr_compat)
+
.bss
.balign PAGE_SIZE
SYM_DATA(trampoline_pgd, .space PAGE_SIZE)
diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S
index 5033e640f957..4331c32c47f8 100644
--- a/arch/x86/realmode/rm/trampoline_common.S
+++ b/arch/x86/realmode/rm/trampoline_common.S
@@ -1,4 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
.section ".rodata","a"
.balign 16
-SYM_DATA_LOCAL(tr_idt, .fill 1, 6, 0)
+
+/*
+ * When a bootloader hands off to the kernel in 32-bit mode an
+ * IDT with a 2-byte limit and 4-byte base is needed. When a boot
+ * loader hands off to a kernel 64-bit mode the base address
+ * extends to 8-bytes. Reserve enough space for either scenario.
+ */
+SYM_DATA_START_LOCAL(tr_idt)
+ .short 0
+ .quad 0
+SYM_DATA_END(tr_idt)
--
2.25.1

Subject: [PATCH v1 5/5] x86: Skip WBINVD instruction for VM guest

VM guests that supports ACPI, use standard ACPI mechanisms to signal
sleep state entry (including reboot) to the host. The ACPI
specification mandates WBINVD on any sleep state entry with the
expectation that the platform is only responsible for maintaining the
state of memory over sleep states, not preserving dirty data in any
CPU caches. ACPI cache flushing requirements pre-date the advent of
virtualization. Given guest sleep state entry does not affect any
host power rails it is not required to flush caches. The host is
responsible for maintaining cache state over its own bare metal sleep
state transitions that power-off the cache. A TDX guest, unlike a
typical guest, will machine check if the CPU cache is powered off.

Cc: Rafael J. Wysocki <[email protected]>
Cc: [email protected]
Reviewed-by: Dan Williams <[email protected]>
Signed-off-by: Kuppuswamy Sathyanarayanan <[email protected]>
---
arch/x86/include/asm/acenv.h | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h
index 9aff97f0de7f..d4162e94bee8 100644
--- a/arch/x86/include/asm/acenv.h
+++ b/arch/x86/include/asm/acenv.h
@@ -10,10 +10,15 @@
#define _ASM_X86_ACENV_H

#include <asm/special_insns.h>
+#include <asm/cpu.h>

/* Asm macros */

-#define ACPI_FLUSH_CPU_CACHE() wbinvd()
+#define ACPI_FLUSH_CPU_CACHE() \
+do { \
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) \
+ wbinvd(); \
+} while (0)

int __acpi_acquire_global_lock(unsigned int *lock);
int __acpi_release_global_lock(unsigned int *lock);
--
2.25.1

2021-06-10 13:02:47

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH v1 5/5] x86: Skip WBINVD instruction for VM guest

On Wed, Jun 9, 2021 at 11:51 PM Kuppuswamy Sathyanarayanan
<[email protected]> wrote:
>
> VM guests that supports ACPI, use standard ACPI mechanisms to signal
> sleep state entry (including reboot) to the host. The ACPI
> specification mandates WBINVD on any sleep state entry with the
> expectation that the platform is only responsible for maintaining the
> state of memory over sleep states, not preserving dirty data in any
> CPU caches. ACPI cache flushing requirements pre-date the advent of
> virtualization. Given guest sleep state entry does not affect any
> host power rails it is not required to flush caches. The host is
> responsible for maintaining cache state over its own bare metal sleep
> state transitions that power-off the cache. A TDX guest, unlike a
> typical guest, will machine check if the CPU cache is powered off.
>
> Cc: Rafael J. Wysocki <[email protected]>
> Cc: [email protected]
> Reviewed-by: Dan Williams <[email protected]>
> Signed-off-by: Kuppuswamy Sathyanarayanan <[email protected]>

Acked-by: Rafael J. Wysocki <[email protected]>

> ---
> arch/x86/include/asm/acenv.h | 7 ++++++-
> 1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h
> index 9aff97f0de7f..d4162e94bee8 100644
> --- a/arch/x86/include/asm/acenv.h
> +++ b/arch/x86/include/asm/acenv.h
> @@ -10,10 +10,15 @@
> #define _ASM_X86_ACENV_H
>
> #include <asm/special_insns.h>
> +#include <asm/cpu.h>
>
> /* Asm macros */
>
> -#define ACPI_FLUSH_CPU_CACHE() wbinvd()
> +#define ACPI_FLUSH_CPU_CACHE() \
> +do { \
> + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) \
> + wbinvd(); \
> +} while (0)
>
> int __acpi_acquire_global_lock(unsigned int *lock);
> int __acpi_release_global_lock(unsigned int *lock);
> --
> 2.25.1
>