2022-11-21 19:59:02

by Dexuan Cui

[permalink] [raw]
Subject: [PATCH 0/6] Support TDX guests on Hyper-V

Intel folks added the generic code to support a TDX guest in April, 2022.

This patchset adds the Hyper-V specific code so that a TDX guest can run
on Hyper-V. Please review. Thanks!

Patch 1, 2 and 3 modify the generic TDX code in arch/x86/coco/tdx/.
I think they should go through the x86 tip.git tree.

Patch 4, 5, and 6 are for Hyper-V drivers and I think they should go
through the hyperv tree. Once Michael Kelley's patchset is merged
(see https://lwn.net/ml/linux-kernel/1668624097-14884-1-git-send-email-mikelley%40microsoft.com/)
I'll need to rebase the 3 patches.

Patch 5 depends on __tdx_ms_hv_hypercall(), which is added by patch 1.

Thanks,
Dexuan

Dexuan Cui (6):
x86/tdx: Support hypercalls for TDX guests on Hyper-V
x86/tdx: Retry TDVMCALL_MAP_GPA() when needed
x86/tdx: Support vmalloc() for tdx_enc_status_changed()
x86/hyperv: Add hv_isolation_type_tdx() to detect TDX guests
x86/hyperv: Support hypercalls for TDX guests
Drivers: hv: vmbus: Support TDX guests

MAINTAINERS | 1 +
arch/x86/coco/tdx/tdcall.S | 87 +++++++++++++++++++++
arch/x86/coco/tdx/tdx.c | 110 +++++++++++++++++++++++++--
arch/x86/hyperv/hv_init.c | 27 ++++++-
arch/x86/hyperv/ivm.c | 7 ++
arch/x86/include/asm/hyperv-tlfs.h | 3 +-
arch/x86/include/asm/mshyperv.h | 29 ++++++-
arch/x86/kernel/cpu/mshyperv.c | 30 +++++++-
arch/x86/mm/pat/set_memory.c | 2 +-
drivers/hv/connection.c | 4 +-
drivers/hv/hv.c | 25 ++++++
drivers/hv/hv_common.c | 6 ++
drivers/hv/ring_buffer.c | 2 +-
include/asm-generic/ms_hyperv_info.h | 29 +++++++
include/asm-generic/mshyperv.h | 24 +-----
15 files changed, 345 insertions(+), 41 deletions(-)
create mode 100644 include/asm-generic/ms_hyperv_info.h

--
2.25.1



2022-11-21 20:19:47

by Dexuan Cui

[permalink] [raw]
Subject: [PATCH 4/6] x86/hyperv: Add hv_isolation_type_tdx() to detect TDX guests

No logic change to SNP/VBS guests.

Signed-off-by: Dexuan Cui <[email protected]>
---
arch/x86/hyperv/ivm.c | 7 +++++++
arch/x86/include/asm/hyperv-tlfs.h | 3 ++-
arch/x86/include/asm/mshyperv.h | 3 +++
arch/x86/kernel/cpu/mshyperv.c | 18 ++++++++++++++++--
drivers/hv/hv_common.c | 6 ++++++
5 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 1dbcbd9da74d..0c219f163f71 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -269,6 +269,13 @@ bool hv_isolation_type_snp(void)
return static_branch_unlikely(&isolation_type_snp);
}

+DEFINE_STATIC_KEY_FALSE(isolation_type_tdx);
+
+bool hv_isolation_type_tdx(void)
+{
+ return static_branch_unlikely(&isolation_type_tdx);
+}
+
/*
* hv_mark_gpa_visibility - Set pages visible to host via hvcall.
*
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 6d9368ea3701..6c0a04d078f5 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -161,7 +161,8 @@
enum hv_isolation_type {
HV_ISOLATION_TYPE_NONE = 0,
HV_ISOLATION_TYPE_VBS = 1,
- HV_ISOLATION_TYPE_SNP = 2
+ HV_ISOLATION_TYPE_SNP = 2,
+ HV_ISOLATION_TYPE_TDX = 3
};

/* Hyper-V specific model specific registers (MSRs) */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index fc09b6739922..9d593ab2be26 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -14,6 +14,7 @@
union hv_ghcb;

DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
+DECLARE_STATIC_KEY_FALSE(isolation_type_tdx);

typedef int (*hyperv_fill_flush_list_func)(
struct hv_guest_mapping_flush_list *flush,
@@ -32,6 +33,8 @@ extern u64 hv_current_partition_id;

extern union hv_ghcb * __percpu *hv_ghcb_pg;

+extern bool hv_isolation_type_tdx(void);
+
int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 831613959a92..9ad0b0abf0e0 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -338,9 +338,23 @@ static void __init ms_hyperv_init_platform(void)
#endif
}
/* Isolation VMs are unenlightened SEV-based VMs, thus this check: */
- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
- if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE)
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) ||
+ IS_ENABLED(CONFIG_INTEL_TDX_GUEST)) {
+
+ switch (hv_get_isolation_type()) {
+ case HV_ISOLATION_TYPE_VBS:
+ case HV_ISOLATION_TYPE_SNP:
cc_set_vendor(CC_VENDOR_HYPERV);
+ break;
+
+ case HV_ISOLATION_TYPE_TDX:
+ static_branch_enable(&isolation_type_tdx);
+ break;
+
+ default:
+ WARN_ON(1);
+ break;
+ }
}
}

diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index ae68298c0dca..a9a03ab04b97 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -268,6 +268,12 @@ bool __weak hv_isolation_type_snp(void)
}
EXPORT_SYMBOL_GPL(hv_isolation_type_snp);

+bool __weak hv_isolation_type_tdx(void)
+{
+ return false;
+}
+EXPORT_SYMBOL_GPL(hv_isolation_type_tdx);
+
void __weak hv_setup_vmbus_handler(void (*handler)(void))
{
}
--
2.25.1


2022-11-21 20:21:44

by Dexuan Cui

[permalink] [raw]
Subject: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

__tdx_hypercall() doesn't work for a TDX guest running on Hyper-V,
because Hyper-V uses a different calling convention, so add the
new function __tdx_ms_hv_hypercall().

Signed-off-by: Dexuan Cui <[email protected]>
---
arch/x86/coco/tdx/tdcall.S | 87 +++++++++++++++++++++++++++++++++
arch/x86/include/asm/mshyperv.h | 2 +
2 files changed, 89 insertions(+)

diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S
index f9eb1134f22d..468b71738485 100644
--- a/arch/x86/coco/tdx/tdcall.S
+++ b/arch/x86/coco/tdx/tdcall.S
@@ -13,6 +13,8 @@
/*
* Bitmasks of exposed registers (with VMM).
*/
+#define TDX_RDX BIT(2)
+#define TDX_R8 BIT(8)
#define TDX_R10 BIT(10)
#define TDX_R11 BIT(11)
#define TDX_R12 BIT(12)
@@ -203,3 +205,88 @@ SYM_FUNC_START(__tdx_hypercall)
REACHABLE
jmp .Lpanic
SYM_FUNC_END(__tdx_hypercall)
+
+/*
+ * __tdx_ms_hv_hypercall() - Make hypercalls to Hype-V using TDVMCALL leaf
+ * of TDCALL instruction
+ *
+ * Transforms values in function call arguments "input control, output_addr,
+ * and input_addr" into the TDCALL register ABI. After TDCALL operation,
+ * Hyper-V has changed the memory pointed by output_addr, and R11 is the
+ * output control code. Note: before the TDCALL operation, the guest must
+ * share the memory pointed by input_addr and output_addr with Hyper-V.
+ *-------------------------------------------------------------------------
+ * TD VMCALL ABI on Hyper-V:
+ *-------------------------------------------------------------------------
+ *
+ * Input Registers:
+ *
+ * RAX - TDCALL instruction leaf number (0 - TDG.VP.VMCALL)
+ * RCX - BITMAP which controls which part of TD Guest GPR
+ * is passed as-is to the VMM and back.
+ * R10 - Set to Hyper-V hypercall input control code.
+ * Note: legal Hyper-V hypercall input control codes
+ * are always non-zero, i.e. they don't conflict with
+ * TDX_HYPERCALL_STANDARD.
+ * R8 - Output physical addr.
+ * RDX - Input physical addr.
+ *
+ * Output Registers:
+ *
+ * RAX - TDCALL instruction status (Not related to hypercall
+ * output).
+ * R11 - Output control code.
+ *
+ *-------------------------------------------------------------------------
+ *
+ * __tdx_ms_hv_hypercall() function ABI:
+ *
+ * @arg (RDI) - Input control code, moved to R10
+ * @arg (RSI) - Output address, moved to R8
+ * @arg (RDX) - Input address. RDX is passed to Hyper-V as-is.
+ *
+ * On successful completion, return the hypercall output control code.
+ */
+SYM_FUNC_START(__tdx_ms_hv_hypercall)
+ FRAME_BEGIN
+
+ /* Set TDCALL leaf ID (TDVMCALL (0)) in RAX */
+ xor %eax, %eax
+
+ /* Do not leak the value of the output-only register to Hyper-V */
+ xor %r11, %r11
+
+ /* Load input control code */
+ mov %rdi, %r10
+
+ /* Load output addr. NB: input addr is already in RDX. */
+ mov %rsi, %r8
+
+ /* Expose these registers to Hyper-V as-is */
+ mov $(TDX_RDX | TDX_R8 | TDX_R10 |TDX_R11), %ecx
+
+ tdcall
+
+ /*
+ * RAX==0 indicates a failure of the TDVMCALL mechanism itself and that
+ * something has gone horribly wrong with the TDX module.
+ *
+ * The return status of the hypercall operation is in a separate
+ * register (in R11). Hypercall errors are a part of normal operation
+ * and are handled by callers.
+ */
+ testq %rax, %rax
+ jne .Lpanic_ms_hv
+
+ /* Copy output control code as the function's return value */
+ movq %r11, %rax
+
+ FRAME_END
+
+ RET
+.Lpanic_ms_hv:
+ call __tdx_hypercall_failed
+ /* __tdx_hypercall_failed never returns */
+ REACHABLE
+ jmp .Lpanic_ms_hv
+SYM_FUNC_END(__tdx_ms_hv_hypercall)
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 61f0c206bff0..fc09b6739922 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -36,6 +36,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);

+u64 __tdx_ms_hv_hypercall(u64 control, u64 output_addr, u64 input_addr);
+
static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
u64 input_address = input ? virt_to_phys(input) : 0;
--
2.25.1


2022-11-21 20:39:09

by Dexuan Cui

[permalink] [raw]
Subject: [PATCH 6/6] Drivers: hv: vmbus: Support TDX guests

Intel folks added the generic code to support a TDX guest in April, 2022.
This commit and some earlier commits from me add the Hyper-V specific
code so that a TDX guest can run on Hyper-V.

Signed-off-by: Dexuan Cui <[email protected]>
---
arch/x86/hyperv/hv_init.c | 19 +++++++++++++++----
arch/x86/kernel/cpu/mshyperv.c | 10 ++++++++++
arch/x86/mm/pat/set_memory.c | 2 +-
drivers/hv/connection.c | 4 +++-
drivers/hv/hv.c | 25 +++++++++++++++++++++++++
drivers/hv/ring_buffer.c | 2 +-
6 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 05682c4e327f..694f7fb04e5d 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void)
static int hv_cpu_init(unsigned int cpu)
{
union hv_vp_assist_msr_contents msr = { 0 };
- struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
+ struct hv_vp_assist_page **hvp;
int ret;

ret = hv_common_cpu_init(cpu);
@@ -87,6 +87,7 @@ static int hv_cpu_init(unsigned int cpu)
if (!hv_vp_assist_page)
return 0;

+ hvp = &hv_vp_assist_page[smp_processor_id()];
if (!*hvp) {
if (hv_root_partition) {
/*
@@ -398,11 +399,21 @@ void __init hyperv_init(void)
if (hv_common_init())
return;

- hv_vp_assist_page = kcalloc(num_possible_cpus(),
- sizeof(*hv_vp_assist_page), GFP_KERNEL);
+ /*
+ * The VP assist page is useless to a TDX guest: the only use we
+ * would have for it is lazy EOI, which can not be used with TDX.
+ */
+ if (hv_isolation_type_tdx())
+ hv_vp_assist_page = NULL;
+ else
+ hv_vp_assist_page = kcalloc(num_possible_cpus(),
+ sizeof(*hv_vp_assist_page),
+ GFP_KERNEL);
if (!hv_vp_assist_page) {
ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
- goto common_free;
+
+ if (!hv_isolation_type_tdx())
+ goto common_free;
}

if (hv_isolation_type_snp()) {
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index dddccdbc5526..6f597b23ad3e 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -350,7 +350,17 @@ static void __init ms_hyperv_init_platform(void)
case HV_ISOLATION_TYPE_TDX:
static_branch_enable(&isolation_type_tdx);

+ cc_set_vendor(CC_VENDOR_INTEL);
+
ms_hyperv.shared_gpa_boundary = cc_mkdec(0);
+
+ /* Don't use the unsafe Hyper-V TSC page */
+ ms_hyperv.features &=
+ ~HV_MSR_REFERENCE_TSC_AVAILABLE;
+
+ /* HV_REGISTER_CRASH_CTL is unsupported */
+ ms_hyperv.misc_features &=
+ ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
break;

default:
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 2e5a045731de..bb44aaddb230 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2120,7 +2120,7 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)

static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
{
- if (hv_is_isolation_supported())
+ if (hv_is_isolation_supported() && !hv_isolation_type_tdx())
return hv_set_mem_host_visibility(addr, numpages, !enc);

if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 9dc27e5d367a..1ecc3c29e3f7 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -250,12 +250,14 @@ int vmbus_connect(void)
* Isolation VM with AMD SNP needs to access monitor page via
* address space above shared gpa boundary.
*/
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_isolation_type_tdx()) {
vmbus_connection.monitor_pages_pa[0] +=
ms_hyperv.shared_gpa_boundary;
vmbus_connection.monitor_pages_pa[1] +=
ms_hyperv.shared_gpa_boundary;
+ }

+ if (hv_isolation_type_snp()) {
vmbus_connection.monitor_pages[0]
= memremap(vmbus_connection.monitor_pages_pa[0],
HV_HYP_PAGE_SIZE,
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 4d6480d57546..03b3257bc1ab 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -18,6 +18,7 @@
#include <linux/clockchips.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/set_memory.h>
#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
@@ -119,6 +120,7 @@ int hv_synic_alloc(void)
{
int cpu;
struct hv_per_cpu_context *hv_cpu;
+ int ret;

/*
* First, zero all per-cpu memory areas so hv_synic_free() can
@@ -168,6 +170,21 @@ int hv_synic_alloc(void)
pr_err("Unable to allocate post msg page\n");
goto err;
}
+
+
+ if (hv_isolation_type_tdx()) {
+ ret = set_memory_decrypted(
+ (unsigned long)hv_cpu->synic_message_page, 1);
+ BUG_ON(ret);
+
+ ret = set_memory_decrypted(
+ (unsigned long)hv_cpu->synic_event_page, 1);
+ BUG_ON(ret);
+
+ ret = set_memory_decrypted(
+ (unsigned long)hv_cpu->post_msg_page, 1);
+ BUG_ON(ret);
+ }
}

return 0;
@@ -225,6 +242,10 @@ void hv_synic_enable_regs(unsigned int cpu)
} else {
simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
>> HV_HYP_PAGE_SHIFT;
+
+ if (hv_isolation_type_tdx())
+ simp.base_simp_gpa += ms_hyperv.shared_gpa_boundary
+ >> HV_HYP_PAGE_SHIFT;
}

hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
@@ -243,6 +264,10 @@ void hv_synic_enable_regs(unsigned int cpu)
} else {
siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
>> HV_HYP_PAGE_SHIFT;
+
+ if (hv_isolation_type_tdx())
+ siefp.base_siefp_gpa += ms_hyperv.shared_gpa_boundary
+ >> HV_HYP_PAGE_SHIFT;
}

hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index c6692fd5ab15..a51da82316ce 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -233,7 +233,7 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,

ring_info->ring_buffer = (struct hv_ring_buffer *)
vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
- PAGE_KERNEL);
+ pgprot_decrypted(PAGE_KERNEL_NOENC));

kfree(pages_wraparound);
if (!ring_info->ring_buffer)
--
2.25.1


2022-11-21 20:39:20

by Dexuan Cui

[permalink] [raw]
Subject: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

A TDX guest uses the GHCI call rather than hv_hypercall_pg.

In hv_do_hypercall(), Hyper-V requires that the input/output addresses
must have the vTOM bit set. With current Hyper-V, the bit for TDX is
bit 47, which is saved into ms_hyperv.shared_gpa_boundary() in
ms_hyperv_init_platform().

arch/x86/include/asm/mshyperv.h: hv_do_hypercall() needs
"struct ms_hyperv_info", which is defined in
include/asm-generic/mshyperv.h, which can't be included in
arch/x86/include/asm/mshyperv.h because include/asm-generic/mshyperv.h
has vmbus_signal_eom() -> hv_set_register(), which is defined in
arch/x86/include/asm/mshyperv.h.

Break this circular dependency by introducing a new header file
for "struct ms_hyperv_info".

Signed-off-by: Dexuan Cui <[email protected]>
---
MAINTAINERS | 1 +
arch/x86/hyperv/hv_init.c | 8 ++++++++
arch/x86/include/asm/mshyperv.h | 24 ++++++++++++++++++++++-
arch/x86/kernel/cpu/mshyperv.c | 2 ++
include/asm-generic/ms_hyperv_info.h | 29 ++++++++++++++++++++++++++++
include/asm-generic/mshyperv.h | 24 +----------------------
6 files changed, 64 insertions(+), 24 deletions(-)
create mode 100644 include/asm-generic/ms_hyperv_info.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 256f03904987..455ecaf188fe 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9537,6 +9537,7 @@ F: drivers/scsi/storvsc_drv.c
F: drivers/uio/uio_hv_generic.c
F: drivers/video/fbdev/hyperv_fb.c
F: include/asm-generic/hyperv-tlfs.h
+F: include/asm-generic/ms_hyperv_info.h
F: include/asm-generic/mshyperv.h
F: include/clocksource/hyperv_timer.h
F: include/linux/hyperv.h
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 89954490af93..05682c4e327f 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -432,6 +432,10 @@ void __init hyperv_init(void)
/* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);

+ /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
+ if (hv_isolation_type_tdx())
+ goto skip_hypercall_pg_init;
+
hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
@@ -471,6 +475,7 @@ void __init hyperv_init(void)
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
}

+skip_hypercall_pg_init:
/*
* hyperv_init() is called before LAPIC is initialized: see
* apic_intr_mode_init() -> x86_platform.apic_post_init() and
@@ -606,6 +611,9 @@ bool hv_is_hyperv_initialized(void)
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return false;

+ /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
+ if (hv_isolation_type_tdx())
+ return true;
/*
* Verify that earlier initialization succeeded by checking
* that the hypercall page is setup
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 9d593ab2be26..650b4fae2fd8 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -9,7 +9,7 @@
#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
-#include <asm/mshyperv.h>
+#include <asm-generic/ms_hyperv_info.h>

union hv_ghcb;

@@ -48,6 +48,18 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 hv_status;

#ifdef CONFIG_X86_64
+#if CONFIG_INTEL_TDX_GUEST
+ if (hv_isolation_type_tdx()) {
+ if (input_address)
+ input_address += ms_hyperv.shared_gpa_boundary;
+
+ if (output_address)
+ output_address += ms_hyperv.shared_gpa_boundary;
+
+ return __tdx_ms_hv_hypercall(control, output_address,
+ input_address);
+ }
+#endif
if (!hv_hypercall_pg)
return U64_MAX;

@@ -85,6 +97,11 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

#ifdef CONFIG_X86_64
+#if CONFIG_INTEL_TDX_GUEST
+ if (hv_isolation_type_tdx())
+ return __tdx_ms_hv_hypercall(control, 0, input1);
+#endif
+
{
__asm__ __volatile__(CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -116,6 +133,11 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

#ifdef CONFIG_X86_64
+#if CONFIG_INTEL_TDX_GUEST
+ if (hv_isolation_type_tdx())
+ return __tdx_ms_hv_hypercall(control, input2, input1);
+#endif
+
{
__asm__ __volatile__("mov %4, %%r8\n"
CALL_NOSPEC
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 9ad0b0abf0e0..dddccdbc5526 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -349,6 +349,8 @@ static void __init ms_hyperv_init_platform(void)

case HV_ISOLATION_TYPE_TDX:
static_branch_enable(&isolation_type_tdx);
+
+ ms_hyperv.shared_gpa_boundary = cc_mkdec(0);
break;

default:
diff --git a/include/asm-generic/ms_hyperv_info.h b/include/asm-generic/ms_hyperv_info.h
new file mode 100644
index 000000000000..734583dfea99
--- /dev/null
+++ b/include/asm-generic/ms_hyperv_info.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_GENERIC_MS_HYPERV_INFO_H
+#define _ASM_GENERIC_MS_HYPERV_INFO_H
+
+struct ms_hyperv_info {
+ u32 features;
+ u32 priv_high;
+ u32 misc_features;
+ u32 hints;
+ u32 nested_features;
+ u32 max_vp_index;
+ u32 max_lp_index;
+ u32 isolation_config_a;
+ union {
+ u32 isolation_config_b;
+ struct {
+ u32 cvm_type : 4;
+ u32 reserved1 : 1;
+ u32 shared_gpa_boundary_active : 1;
+ u32 shared_gpa_boundary_bits : 6;
+ u32 reserved2 : 20;
+ };
+ };
+ u64 shared_gpa_boundary;
+};
+extern struct ms_hyperv_info ms_hyperv;
+
+#endif
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bfb9eb9d7215..2ae3e4e4256b 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -25,29 +25,7 @@
#include <linux/nmi.h>
#include <asm/ptrace.h>
#include <asm/hyperv-tlfs.h>
-
-struct ms_hyperv_info {
- u32 features;
- u32 priv_high;
- u32 misc_features;
- u32 hints;
- u32 nested_features;
- u32 max_vp_index;
- u32 max_lp_index;
- u32 isolation_config_a;
- union {
- u32 isolation_config_b;
- struct {
- u32 cvm_type : 4;
- u32 reserved1 : 1;
- u32 shared_gpa_boundary_active : 1;
- u32 shared_gpa_boundary_bits : 6;
- u32 reserved2 : 20;
- };
- };
- u64 shared_gpa_boundary;
-};
-extern struct ms_hyperv_info ms_hyperv;
+#include <asm-generic/ms_hyperv_info.h>

extern void * __percpu *hyperv_pcpu_input_arg;
extern void * __percpu *hyperv_pcpu_output_arg;
--
2.25.1


2022-11-21 20:56:42

by Dexuan Cui

[permalink] [raw]
Subject: [PATCH 2/6] x86/tdx: Retry TDVMCALL_MAP_GPA() when needed

GHCI spec for TDX 1.0 says that the MapGPA call may fail with the R10
error code = TDG.VP.VMCALL_RETRY (1), and the guest must retry this
operation for the pages in the region starting at the GPA specified
in R11.

When a TDX guest runs on Hyper-V, Hyper-V returns the retry error
when hyperv_init() -> swiotlb_update_mem_attributes() ->
set_memory_decrypted() decrypts up to 1GB of swiotlb bounce buffers.

Signed-off-by: Dexuan Cui <[email protected]>
---
arch/x86/coco/tdx/tdx.c | 65 +++++++++++++++++++++++++++++++++++++----
1 file changed, 59 insertions(+), 6 deletions(-)

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 3fee96931ff5..46971cc7d006 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -20,6 +20,8 @@
/* TDX hypercall Leaf IDs */
#define TDVMCALL_MAP_GPA 0x10001

+#define TDVMCALL_STATUS_RETRY 1
+
/* MMIO direction */
#define EPT_READ 0
#define EPT_WRITE 1
@@ -52,6 +54,25 @@ static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
return __tdx_hypercall(&args, 0);
}

+static inline u64 _tdx_hypercall_output_r11(u64 fn, u64 r12, u64 r13, u64 r14,
+ u64 r15, u64 *r11)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = fn,
+ .r12 = r12,
+ .r13 = r13,
+ .r14 = r14,
+ .r15 = r15,
+ };
+
+ u64 ret;
+
+ ret = __tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+ *r11 = args.r11;
+ return ret;
+}
+
/* Called from __tdx_hypercall() for unrecoverable failure */
void __tdx_hypercall_failed(void)
{
@@ -691,6 +712,43 @@ static bool try_accept_one(phys_addr_t *start, unsigned long len,
return true;
}

+/*
+ * Notify the VMM about page mapping conversion. More info about ABI
+ * can be found in TDX Guest-Host-Communication Interface (GHCI),
+ * section "TDG.VP.VMCALL<MapGPA>"
+ */
+static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
+{
+ u64 ret, r11;
+
+ while (1) {
+ ret = _tdx_hypercall_output_r11(TDVMCALL_MAP_GPA, start,
+ end - start, 0, 0, &r11);
+ if (!ret)
+ break;
+
+ if (ret != TDVMCALL_STATUS_RETRY)
+ break;
+
+ /*
+ * The guest must retry the operation for the pages in the
+ * region starting at the GPA specified in R11. Make sure R11
+ * contains a sane value.
+ */
+ if ((r11 & ~cc_mkdec(0)) < (start & ~cc_mkdec(0)) ||
+ (r11 & ~cc_mkdec(0)) >= (end & ~cc_mkdec(0)))
+ return false;
+
+ start = r11;
+
+ /* Set the shared (decrypted) bit. */
+ if (!enc)
+ start |= cc_mkdec(0);
+ }
+
+ return !ret;
+}
+
/*
* Inform the VMM of the guest's intent for this physical page: shared with
* the VMM or private to the guest. The VMM is expected to change its mapping
@@ -707,12 +765,7 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
end |= cc_mkdec(0);
}

- /*
- * Notify the VMM about page mapping conversion. More info about ABI
- * can be found in TDX Guest-Host-Communication Interface (GHCI),
- * section "TDG.VP.VMCALL<MapGPA>"
- */
- if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
+ if (!tdx_map_gpa(start, end, enc))
return false;

/* private->shared conversion requires only MapGPA call */
--
2.25.1


2022-11-21 21:01:07

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86/tdx: Retry TDVMCALL_MAP_GPA() when needed

On 11/21/22 11:51, Dexuan Cui wrote:
> +static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
> +{
> + u64 ret, r11;

'r11' needs a real, logical name.

> + while (1) {
> + ret = _tdx_hypercall_output_r11(TDVMCALL_MAP_GPA, start,
> + end - start, 0, 0, &r11);
> + if (!ret)
> + break;
> +
> + if (ret != TDVMCALL_STATUS_RETRY)
> + break;
> +
> + /*
> + * The guest must retry the operation for the pages in the
> + * region starting at the GPA specified in R11. Make sure R11
> + * contains a sane value.
> + */
> + if ((r11 & ~cc_mkdec(0)) < (start & ~cc_mkdec(0)) ||
> + (r11 & ~cc_mkdec(0)) >= (end & ~cc_mkdec(0)))
> + return false;

This statement is, um, a wee bit ugly.

First, it's not obvious at all why the address masking is necessary.

Second, it's utterly insane to do that mask to 'r11' twice. Third, it's
silly do logically the same thing to start and end every time through
the loop.

This also seems to have built in the idea that cc_mkdec() *SETS* bits
rather than clearing them. That's true for TDX today, but it's a
horrible chunk of code to leave around because it'll confuse actual
legitimate cc_enc/dec() users.

The more I write about it, the more I dislike it.

Why can't this just be:

if ((map_fail_paddr < start) ||
(map_fail_paddr >= end))
return false;

If the hypervisor returns some crazy address in r11 that isn't masked
like the inputs, just fail.

> + start = r11;
> +
> + /* Set the shared (decrypted) bit. */
> + if (!enc)
> + start |= cc_mkdec(0);

Why is only one side of this necessary? Shouldn't it need to be
something like:

if (enc)
start = cc_mkenc(start);
else
start = cc_mkdec(start);

??

> + }
> +
> + return !ret;
> +}
> +
> /*
> * Inform the VMM of the guest's intent for this physical page: shared with
> * the VMM or private to the guest. The VMM is expected to change its mapping
> @@ -707,12 +765,7 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> end |= cc_mkdec(0);
> }
>
> - /*
> - * Notify the VMM about page mapping conversion. More info about ABI
> - * can be found in TDX Guest-Host-Communication Interface (GHCI),
> - * section "TDG.VP.VMCALL<MapGPA>"
> - */
> - if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
> + if (!tdx_map_gpa(start, end, enc))
> return false;
>
> /* private->shared conversion requires only MapGPA call */


2022-11-21 21:11:15

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 4/6] x86/hyperv: Add hv_isolation_type_tdx() to detect TDX guests

On 11/21/22 11:51, Dexuan Cui wrote:
> + switch (hv_get_isolation_type()) {
> + case HV_ISOLATION_TYPE_VBS:
> + case HV_ISOLATION_TYPE_SNP:
> cc_set_vendor(CC_VENDOR_HYPERV);
> + break;
> +
> + case HV_ISOLATION_TYPE_TDX:
> + static_branch_enable(&isolation_type_tdx);
> + break;

This makes zero logical sense to me.

Running on Hyper-V, a HV_ISOLATION_TYPE_SNP is CC_VENDOR_HYPERV, but a
HV_ISOLATION_TYPE_TDX guest is *NOT* CC_VENDOR_HYPERV?

2022-11-21 21:26:13

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

On 11/21/22 11:51, Dexuan Cui wrote:
> __tdx_hypercall() doesn't work for a TDX guest running on Hyper-V,
> because Hyper-V uses a different calling convention, so add the
> new function __tdx_ms_hv_hypercall().

Other than R10 being variable here and fixed for __tdx_hypercall(), this
looks *EXACTLY* the same as __tdx_hypercall(), or at least a strict
subset of what __tdx_hypercall() can do.

Did I miss something?

Another way of saying this: It seems like you could do this with a new
version of _tdx_hypercall() (and all in C) instead of a new
__tdx_hypercall().

2022-11-21 22:05:18

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 4/6] x86/hyperv: Add hv_isolation_type_tdx() to detect TDX guests

On Mon, Nov 21, 2022 at 01:01:45PM -0800, Dave Hansen wrote:
> On 11/21/22 11:51, Dexuan Cui wrote:
> > + switch (hv_get_isolation_type()) {
> > + case HV_ISOLATION_TYPE_VBS:
> > + case HV_ISOLATION_TYPE_SNP:
> > cc_set_vendor(CC_VENDOR_HYPERV);
> > + break;
> > +
> > + case HV_ISOLATION_TYPE_TDX:
> > + static_branch_enable(&isolation_type_tdx);
> > + break;
>
> This makes zero logical sense to me.
>
> Running on Hyper-V, a HV_ISOLATION_TYPE_SNP is CC_VENDOR_HYPERV, but a
> HV_ISOLATION_TYPE_TDX guest is *NOT* CC_VENDOR_HYPERV?

https://lore.kernel.org/r/[email protected]

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2022-11-22 00:12:10

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH 2/6] x86/tdx: Retry TDVMCALL_MAP_GPA() when needed

On Mon, Nov 21, 2022 at 11:51:47AM -0800, Dexuan Cui wrote:
> GHCI spec for TDX 1.0 says that the MapGPA call may fail with the R10
> error code = TDG.VP.VMCALL_RETRY (1), and the guest must retry this
> operation for the pages in the region starting at the GPA specified
> in R11.
>
> When a TDX guest runs on Hyper-V, Hyper-V returns the retry error
> when hyperv_init() -> swiotlb_update_mem_attributes() ->
> set_memory_decrypted() decrypts up to 1GB of swiotlb bounce buffers.
>
> Signed-off-by: Dexuan Cui <[email protected]>
> ---
> arch/x86/coco/tdx/tdx.c | 65 +++++++++++++++++++++++++++++++++++++----
> 1 file changed, 59 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 3fee96931ff5..46971cc7d006 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -20,6 +20,8 @@
> /* TDX hypercall Leaf IDs */
> #define TDVMCALL_MAP_GPA 0x10001
>
> +#define TDVMCALL_STATUS_RETRY 1
> +
> /* MMIO direction */
> #define EPT_READ 0
> #define EPT_WRITE 1
> @@ -52,6 +54,25 @@ static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
> return __tdx_hypercall(&args, 0);
> }
>
> +static inline u64 _tdx_hypercall_output_r11(u64 fn, u64 r12, u64 r13, u64 r14,
> + u64 r15, u64 *r11)
> +{
> + struct tdx_hypercall_args args = {
> + .r10 = TDX_HYPERCALL_STANDARD,
> + .r11 = fn,
> + .r12 = r12,
> + .r13 = r13,
> + .r14 = r14,
> + .r15 = r15,
> + };
> +
> + u64 ret;
> +
> + ret = __tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> + *r11 = args.r11;
> + return ret;
> +}
> +

I'm not convinced it deserves a separate helper for one user.
Does it look that ugly if tdx_map_gpa() uses __tdx_hypercall() directly?

> /* Called from __tdx_hypercall() for unrecoverable failure */
> void __tdx_hypercall_failed(void)
> {
> @@ -691,6 +712,43 @@ static bool try_accept_one(phys_addr_t *start, unsigned long len,
> return true;
> }
>
> +/*
> + * Notify the VMM about page mapping conversion. More info about ABI
> + * can be found in TDX Guest-Host-Communication Interface (GHCI),
> + * section "TDG.VP.VMCALL<MapGPA>"
> + */
> +static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
> +{
> + u64 ret, r11;
> +
> + while (1) {

Endless? Maybe an upper limit if no progress?

> + ret = _tdx_hypercall_output_r11(TDVMCALL_MAP_GPA, start,
> + end - start, 0, 0, &r11);
> + if (!ret)
> + break;
> +
> + if (ret != TDVMCALL_STATUS_RETRY)
> + break;
> +
> + /*
> + * The guest must retry the operation for the pages in the
> + * region starting at the GPA specified in R11. Make sure R11
> + * contains a sane value.
> + */
> + if ((r11 & ~cc_mkdec(0)) < (start & ~cc_mkdec(0)) ||
> + (r11 & ~cc_mkdec(0)) >= (end & ~cc_mkdec(0)))
> + return false;

Emm. All of them suppose to have shared bit set, why not compare directly
without cc_mkdec() dance?

> +
> + start = r11;
> +
> + /* Set the shared (decrypted) bit. */
> + if (!enc)
> + start |= cc_mkdec(0);
> + }
> +
> + return !ret;
> +}
> +
> /*
> * Inform the VMM of the guest's intent for this physical page: shared with
> * the VMM or private to the guest. The VMM is expected to change its mapping
> @@ -707,12 +765,7 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> end |= cc_mkdec(0);
> }
>
> - /*
> - * Notify the VMM about page mapping conversion. More info about ABI
> - * can be found in TDX Guest-Host-Communication Interface (GHCI),
> - * section "TDG.VP.VMCALL<MapGPA>"
> - */
> - if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
> + if (!tdx_map_gpa(start, end, enc))
> return false;
>
> /* private->shared conversion requires only MapGPA call */
> --
> 2.25.1
>

--
Kiryl Shutsemau / Kirill A. Shutemov

Subject: Re: [PATCH 4/6] x86/hyperv: Add hv_isolation_type_tdx() to detect TDX guests



On 11/21/22 11:51 AM, Dexuan Cui wrote:
> No logic change to SNP/VBS guests.

Add some info on how and where you are going to use this function.

>
> Signed-off-by: Dexuan Cui <[email protected]>
> ---
> arch/x86/hyperv/ivm.c | 7 +++++++
> arch/x86/include/asm/hyperv-tlfs.h | 3 ++-
> arch/x86/include/asm/mshyperv.h | 3 +++
> arch/x86/kernel/cpu/mshyperv.c | 18 ++++++++++++++++--
> drivers/hv/hv_common.c | 6 ++++++
> 5 files changed, 34 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 1dbcbd9da74d..0c219f163f71 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -269,6 +269,13 @@ bool hv_isolation_type_snp(void)
> return static_branch_unlikely(&isolation_type_snp);
> }
>
> +DEFINE_STATIC_KEY_FALSE(isolation_type_tdx);
> +
> +bool hv_isolation_type_tdx(void)
> +{
> + return static_branch_unlikely(&isolation_type_tdx);
> +}

Does it need #ifdef CONFIG_INTEL_TDX_GUEST? If not TDX, you can
live with weak reference.

> +
> /*
> * hv_mark_gpa_visibility - Set pages visible to host via hvcall.
> *
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 6d9368ea3701..6c0a04d078f5 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -161,7 +161,8 @@
> enum hv_isolation_type {
> HV_ISOLATION_TYPE_NONE = 0,
> HV_ISOLATION_TYPE_VBS = 1,
> - HV_ISOLATION_TYPE_SNP = 2
> + HV_ISOLATION_TYPE_SNP = 2,
> + HV_ISOLATION_TYPE_TDX = 3
> };
>
> /* Hyper-V specific model specific registers (MSRs) */
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index fc09b6739922..9d593ab2be26 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -14,6 +14,7 @@
> union hv_ghcb;
>
> DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
> +DECLARE_STATIC_KEY_FALSE(isolation_type_tdx);
>
> typedef int (*hyperv_fill_flush_list_func)(
> struct hv_guest_mapping_flush_list *flush,
> @@ -32,6 +33,8 @@ extern u64 hv_current_partition_id;
>
> extern union hv_ghcb * __percpu *hv_ghcb_pg;
>
> +extern bool hv_isolation_type_tdx(void);
> +
> int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
> int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
> int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 831613959a92..9ad0b0abf0e0 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -338,9 +338,23 @@ static void __init ms_hyperv_init_platform(void)
> #endif
> }
> /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */
> - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
> - if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE)
> + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) ||
> + IS_ENABLED(CONFIG_INTEL_TDX_GUEST)) {
> +
> + switch (hv_get_isolation_type()) {
> + case HV_ISOLATION_TYPE_VBS:
> + case HV_ISOLATION_TYPE_SNP:
> cc_set_vendor(CC_VENDOR_HYPERV);
> + break;
> +
> + case HV_ISOLATION_TYPE_TDX:
> + static_branch_enable(&isolation_type_tdx);
> + break;
> +

It is not clear why you need special handling for TDX?

> + default:
> + WARN_ON(1);
> + break;
> + }
> }
> }
>
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index ae68298c0dca..a9a03ab04b97 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -268,6 +268,12 @@ bool __weak hv_isolation_type_snp(void)
> }
> EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
>
> +bool __weak hv_isolation_type_tdx(void)
> +{
> + return false;
> +}
> +EXPORT_SYMBOL_GPL(hv_isolation_type_tdx);
> +
> void __weak hv_setup_vmbus_handler(void (*handler)(void))
> {
> }

--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

2022-11-22 00:51:51

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

On Mon, Nov 21, 2022 at 12:38:36PM -0800, Dave Hansen wrote:
> On 11/21/22 11:51, Dexuan Cui wrote:
> > __tdx_hypercall() doesn't work for a TDX guest running on Hyper-V,
> > because Hyper-V uses a different calling convention, so add the
> > new function __tdx_ms_hv_hypercall().
>
> Other than R10 being variable here and fixed for __tdx_hypercall(), this
> looks *EXACTLY* the same as __tdx_hypercall(), or at least a strict
> subset of what __tdx_hypercall() can do.
>
> Did I miss something?
>
> Another way of saying this: It seems like you could do this with a new
> version of _tdx_hypercall() (and all in C) instead of a new
> __tdx_hypercall().

+1. There should be a strong reason to add another asm helper.

--
Kiryl Shutsemau / Kirill A. Shutemov

2022-11-23 02:34:36

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

> From: Dave Hansen <[email protected]>
> Sent: Monday, November 21, 2022 12:39 PM
> [...]
> On 11/21/22 11:51, Dexuan Cui wrote:
> > __tdx_hypercall() doesn't work for a TDX guest running on Hyper-V,
> > because Hyper-V uses a different calling convention, so add the
> > new function __tdx_ms_hv_hypercall().
>
> Other than R10 being variable here and fixed for __tdx_hypercall(), this
> looks *EXACTLY* the same as __tdx_hypercall(), or at least a strict
> subset of what __tdx_hypercall() can do.
>
> Did I miss something?

The existing asm code for __tdx_hypercall() passes through R10~R15
(see TDVMCALL_EXPOSE_REGS_MASK) to the (KVM) hypervisor.

Unluckily, for Hyper-V, we need to pass through RDX, R8, R10 and R11
to Hyper-V, so I don't think I can use the existing __tdx_hypercall() ?

> Another way of saying this: It seems like you could do this with a new
> version of _tdx_hypercall() (and all in C) instead of a new
> __tdx_hypercall().

I don't think the current TDVMCALL_EXPOSE_REGS_MASK allows me
to pass through RDX and R8 to Hyper-V.

PS, the comment before __tdx_hypercall() contains this line:

"* RBX, RBP, RDI, RSI - Used to pass VMCALL sub function specific
arguments."

But it looks like currently RBX an RBP are not used at all in
arch/x86/coco/tdx/tdcall.S ?

2022-11-23 02:55:33

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

> From: Dexuan Cui
> [...]
> The existing asm code for __tdx_hypercall() passes through R10~R15
> (see TDVMCALL_EXPOSE_REGS_MASK) to the (KVM) hypervisor.
>
> Unluckily, for Hyper-V, we need to pass through RDX, R8, R10 and R11
> to Hyper-V, so I don't think I can use the existing __tdx_hypercall() ?

I'm checking with the Hyper-V team to see if it's possible for them
to not use RDX and R8, and use R12 and R13 instead. Will keep the
thread updated.

2022-11-23 03:33:18

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 2/6] x86/tdx: Retry TDVMCALL_MAP_GPA() when needed

> From: Kirill A. Shutemov <[email protected]>
> Sent: Monday, November 21, 2022 4:01 PM
> [...]
> On Mon, Nov 21, 2022 at 11:51:47AM -0800, Dexuan Cui wrote:
> [...]
> I'm not convinced it deserves a separate helper for one user.
> Does it look that ugly if tdx_map_gpa() uses __tdx_hypercall() directly?

Will use __tdx_hypercall() directly.

> > /* Called from __tdx_hypercall() for unrecoverable failure */
> > void __tdx_hypercall_failed(void)
> > {
> > @@ -691,6 +712,43 @@ static bool try_accept_one(phys_addr_t *start,
> unsigned long len,
> > return true;
> > }
> >
> > +/*
> > + * Notify the VMM about page mapping conversion. More info about ABI
> > + * can be found in TDX Guest-Host-Communication Interface (GHCI),
> > + * section "TDG.VP.VMCALL<MapGPA>"
> > + */
> > +static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
> > +{
> > + u64 ret, r11;
> > +
> > + while (1) {
>
> Endless? Maybe an upper limit if no progress?

I'll add a max count of 1000, which should be far more than enough.

> > + ret = _tdx_hypercall_output_r11(TDVMCALL_MAP_GPA, start,
> > + end - start, 0, 0, &r11);
> > + if (!ret)
> > + break;
> > +
> > + if (ret != TDVMCALL_STATUS_RETRY)
> > + break;
> > +
> > + /*
> > + * The guest must retry the operation for the pages in the
> > + * region starting at the GPA specified in R11. Make sure R11
> > + * contains a sane value.
> > + */
> > + if ((r11 & ~cc_mkdec(0)) < (start & ~cc_mkdec(0)) ||
> > + (r11 & ~cc_mkdec(0)) >= (end & ~cc_mkdec(0)))
> > + return false;
>
> Emm. All of them suppose to have shared bit set, why not compare directly
> without cc_mkdec() dance?

The above code is unnecessary and will be removed.

So, I'll use the below in v2:

/*
* Notify the VMM about page mapping conversion. More info about ABI
* can be found in TDX Guest-Host-Communication Interface (GHCI),
* section "TDG.VP.VMCALL<MapGPA>"
*/
static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
{
int max_retry_cnt = 1000, retry_cnt = 0;
struct tdx_hypercall_args args;
u64 map_fail_paddr, ret;

while (1) {
args.r10 = TDX_HYPERCALL_STANDARD;
args.r11 = TDVMCALL_MAP_GPA;
args.r12 = start;
args.r13 = end - start;
args.r14 = 0;
args.r15 = 0;

ret = __tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
if (!ret)
break;

if (ret != TDVMCALL_STATUS_RETRY)
break;
/*
* The guest must retry the operation for the pages in the
* region starting at the GPA specified in R11. Make sure R11
* contains a sane value.
*/
map_fail_paddr = args.r11 ;
if (map_fail_paddr < start || map_fail_paddr >= end)
return false;

if (map_fail_paddr == start) {
retry_cnt++;
if (retry_cnt > max_retry_cnt)
return false;
} else {
retry_cnt = 0;;
start = map_fail_paddr;
}
}

return !ret;
}

2022-11-23 03:38:44

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 2/6] x86/tdx: Retry TDVMCALL_MAP_GPA() when needed

> Sent: Monday, November 21, 2022 12:56 PM
> On 11/21/22 11:51, Dexuan Cui wrote:
> > +static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
> > +{
> > + u64 ret, r11;
>
> 'r11' needs a real, logical name.

OK, will use "map_fail_paddr" (as you implied below).

> > + while (1) {
> > + ret = _tdx_hypercall_output_r11(TDVMCALL_MAP_GPA, start,
> > + end - start, 0, 0, &r11);
> > + if (!ret)
> > + break;
> > +
> > + if (ret != TDVMCALL_STATUS_RETRY)
> > + break;
> > +
> > + /*
> > + * The guest must retry the operation for the pages in the
> > + * region starting at the GPA specified in R11. Make sure R11
> > + * contains a sane value.
> > + */
> > + if ((r11 & ~cc_mkdec(0)) < (start & ~cc_mkdec(0)) ||
> > + (r11 & ~cc_mkdec(0)) >= (end & ~cc_mkdec(0)))
> > + return false;
>
> This statement is, um, a wee bit ugly.
>
> First, it's not obvious at all why the address masking is necessary.

It turns out that the masking is completely unnecessary :-)

I incorrectly assumed that if the input 'start' has the bit 47, Hyper-V
always returns a physical address without bit 47. This is not the case.

I'll remove the masking code in v2.

> Second, it's utterly insane to do that mask to 'r11' twice. Third, it's
> silly do logically the same thing to start and end every time through
> the loop.
>
> This also seems to have built in the idea that cc_mkdec() *SETS* bits
> rather than clearing them. That's true for TDX today, but it's a
> horrible chunk of code to leave around because it'll confuse actual
> legitimate cc_enc/dec() users.
>
> The more I write about it, the more I dislike it.
>
> Why can't this just be:
>
> if ((map_fail_paddr < start) ||
> (map_fail_paddr >= end))
> return false;
>
> If the hypervisor returns some crazy address in r11 that isn't masked
> like the inputs, just fail.

Will use your example code in v2.

> > + start = r11;
> > +
> > + /* Set the shared (decrypted) bit. */
> > + if (!enc)
> > + start |= cc_mkdec(0);
>
> Why is only one side of this necessary? Shouldn't it need to be
> something like:
>
> if (enc)
> start = cc_mkenc(start);
> else
> start = cc_mkdec(start);
>
> ??
The code is unnecessary. Will remove it in v2.

Subject: Re: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V



On 11/22/22 5:37 PM, Dexuan Cui wrote:
>> From: Dave Hansen <[email protected]>
>> Sent: Monday, November 21, 2022 12:39 PM
>> [...]
>> On 11/21/22 11:51, Dexuan Cui wrote:
>>> __tdx_hypercall() doesn't work for a TDX guest running on Hyper-V,
>>> because Hyper-V uses a different calling convention, so add the
>>> new function __tdx_ms_hv_hypercall().
>>
>> Other than R10 being variable here and fixed for __tdx_hypercall(), this
>> looks *EXACTLY* the same as __tdx_hypercall(), or at least a strict
>> subset of what __tdx_hypercall() can do.
>>
>> Did I miss something?
>
> The existing asm code for __tdx_hypercall() passes through R10~R15
> (see TDVMCALL_EXPOSE_REGS_MASK) to the (KVM) hypervisor.
>
> Unluckily, for Hyper-V, we need to pass through RDX, R8, R10 and R11
> to Hyper-V, so I don't think I can use the existing __tdx_hypercall() ?
> >> Another way of saying this: It seems like you could do this with a new
>> version of _tdx_hypercall() (and all in C) instead of a new
>> __tdx_hypercall().
>
> I don't think the current TDVMCALL_EXPOSE_REGS_MASK allows me
> to pass through RDX and R8 to Hyper-V.

Because TDVMCALLs defined in the GHCI specification only use registers
R10-R15, only those registers are currently exposed. However, the TDVMCALL
ABI allows the use of input registers such as RBX, RBP, RDI, RSI, R8 or R9.
Instead of creating a new variant of __tdx_hypercall() to handle your use
case, perhaps you can add the registers you require to the
TDVMCALL EXPOSE REGS MASK. You just need to make sure they are zeroed out
for other users of __tdx_hypercall().

>
> PS, the comment before __tdx_hypercall() contains this line:
>
> "* RBX, RBP, RDI, RSI - Used to pass VMCALL sub function specific
> arguments."
>
> But it looks like currently RBX an RBP are not used at all in
> arch/x86/coco/tdx/tdcall.S ?
>

--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

2022-11-23 13:51:42

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH 2/6] x86/tdx: Retry TDVMCALL_MAP_GPA() when needed

From: Dexuan Cui <[email protected]> Sent: Tuesday, November 22, 2022 7:27 PM
>
> > From: Kirill A. Shutemov <[email protected]>
> > Sent: Monday, November 21, 2022 4:01 PM
> > [...]
> > On Mon, Nov 21, 2022 at 11:51:47AM -0800, Dexuan Cui wrote:
> > [...]
> > I'm not convinced it deserves a separate helper for one user.
> > Does it look that ugly if tdx_map_gpa() uses __tdx_hypercall() directly?
>
> Will use __tdx_hypercall() directly.
>
> > > /* Called from __tdx_hypercall() for unrecoverable failure */
> > > void __tdx_hypercall_failed(void)
> > > {
> > > @@ -691,6 +712,43 @@ static bool try_accept_one(phys_addr_t *start,
> > unsigned long len,
> > > return true;
> > > }
> > >
> > > +/*
> > > + * Notify the VMM about page mapping conversion. More info about ABI
> > > + * can be found in TDX Guest-Host-Communication Interface (GHCI),
> > > + * section "TDG.VP.VMCALL<MapGPA>"
> > > + */
> > > +static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
> > > +{
> > > + u64 ret, r11;
> > > +
> > > + while (1) {
> >
> > Endless? Maybe an upper limit if no progress?
>
> I'll add a max count of 1000, which should be far more than enough.
>
> > > + ret = _tdx_hypercall_output_r11(TDVMCALL_MAP_GPA, start,
> > > + end - start, 0, 0, &r11);
> > > + if (!ret)
> > > + break;
> > > +
> > > + if (ret != TDVMCALL_STATUS_RETRY)
> > > + break;
> > > +
> > > + /*
> > > + * The guest must retry the operation for the pages in the
> > > + * region starting at the GPA specified in R11. Make sure R11
> > > + * contains a sane value.
> > > + */
> > > + if ((r11 & ~cc_mkdec(0)) < (start & ~cc_mkdec(0)) ||
> > > + (r11 & ~cc_mkdec(0)) >= (end & ~cc_mkdec(0)))
> > > + return false;
> >
> > Emm. All of them suppose to have shared bit set, why not compare directly
> > without cc_mkdec() dance?
>
> The above code is unnecessary and will be removed.
>
> So, I'll use the below in v2:
>
> /*
> * Notify the VMM about page mapping conversion. More info about ABI
> * can be found in TDX Guest-Host-Communication Interface (GHCI),
> * section "TDG.VP.VMCALL<MapGPA>"
> */
> static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
> {
> int max_retry_cnt = 1000, retry_cnt = 0;
> struct tdx_hypercall_args args;
> u64 map_fail_paddr, ret;
>
> while (1) {
> args.r10 = TDX_HYPERCALL_STANDARD;
> args.r11 = TDVMCALL_MAP_GPA;
> args.r12 = start;
> args.r13 = end - start;
> args.r14 = 0;
> args.r15 = 0;
>
> ret = __tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> if (!ret)
> break;

The above test is redundant and can be removed. The "success" case is
implicitly handled by the test below for != TDVMCALL_STATUS_RETRY.

>
> if (ret != TDVMCALL_STATUS_RETRY)
> break;
> /*
> * The guest must retry the operation for the pages in the
> * region starting at the GPA specified in R11. Make sure R11
> * contains a sane value.
> */
> map_fail_paddr = args.r11 ;
> if (map_fail_paddr < start || map_fail_paddr >= end)
> return false;
>
> if (map_fail_paddr == start) {
> retry_cnt++;
> if (retry_cnt > max_retry_cnt)
> return false;
> } else {
> retry_cnt = 0;;
> start = map_fail_paddr;

Just summarizing the code, we increment the retry count if the hypercall
returns STATUS_RETRY but did nothing (i.e., map_fail_paddr == start). But
if the hypercall returns STATUS_RETRY after making at least some progress,
then we reset the retry count. So in the worst case, for example, if the
hypercall processed only one page on each invocation, the loop will continue
until completion, without hitting any retry limits. That scenario seems
plausible and within the spec.

Do we have any indication about the likelihood of the "RETRY but did
nothing" case? The spec doesn't appear to disallow this case, but does
Hyper-V actually do this? It seems like a weird case.

Michael

> }
> }
>
> return !ret;
> }

2022-11-23 15:04:48

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

On Wed, Nov 23, 2022 at 01:37:26AM +0000, Dexuan Cui wrote:
> > From: Dave Hansen <[email protected]>
> > Sent: Monday, November 21, 2022 12:39 PM
> > [...]
> > On 11/21/22 11:51, Dexuan Cui wrote:
> > > __tdx_hypercall() doesn't work for a TDX guest running on Hyper-V,
> > > because Hyper-V uses a different calling convention, so add the
> > > new function __tdx_ms_hv_hypercall().
> >
> > Other than R10 being variable here and fixed for __tdx_hypercall(), this
> > looks *EXACTLY* the same as __tdx_hypercall(), or at least a strict
> > subset of what __tdx_hypercall() can do.
> >
> > Did I miss something?
>
> The existing asm code for __tdx_hypercall() passes through R10~R15
> (see TDVMCALL_EXPOSE_REGS_MASK) to the (KVM) hypervisor.
>
> Unluckily, for Hyper-V, we need to pass through RDX, R8, R10 and R11
> to Hyper-V, so I don't think I can use the existing __tdx_hypercall() ?
>
> > Another way of saying this: It seems like you could do this with a new
> > version of _tdx_hypercall() (and all in C) instead of a new
> > __tdx_hypercall().
>
> I don't think the current TDVMCALL_EXPOSE_REGS_MASK allows me
> to pass through RDX and R8 to Hyper-V.
>
> PS, the comment before __tdx_hypercall() contains this line:
>
> "* RBX, RBP, RDI, RSI - Used to pass VMCALL sub function specific
> arguments."
>
> But it looks like currently RBX an RBP are not used at all in
> arch/x86/coco/tdx/tdcall.S ?

I have plan to expand __tdx_hypercall() to cover more registers.
See the patch below.

Is it enough for you?

---
arch/x86/coco/tdx/tdcall.S | 82 ++++++++++++++++++++++---------
arch/x86/include/asm/shared/tdx.h | 6 +++
arch/x86/kernel/asm-offsets.c | 6 +++
3 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S
index f9eb1134f22d..64e57739dc9d 100644
--- a/arch/x86/coco/tdx/tdcall.S
+++ b/arch/x86/coco/tdx/tdcall.S
@@ -13,6 +13,12 @@
/*
* Bitmasks of exposed registers (with VMM).
*/
+#define TDX_RDX BIT(2)
+#define TDX_RBX BIT(3)
+#define TDX_RSI BIT(6)
+#define TDX_RDI BIT(7)
+#define TDX_R8 BIT(8)
+#define TDX_R9 BIT(9)
#define TDX_R10 BIT(10)
#define TDX_R11 BIT(11)
#define TDX_R12 BIT(12)
@@ -27,9 +33,9 @@
* details can be found in TDX GHCI specification, section
* titled "TDCALL [TDG.VP.VMCALL] leaf".
*/
-#define TDVMCALL_EXPOSE_REGS_MASK ( TDX_R10 | TDX_R11 | \
- TDX_R12 | TDX_R13 | \
- TDX_R14 | TDX_R15 )
+#define TDVMCALL_EXPOSE_REGS_MASK \
+ ( TDX_RDX | TDX_RBX | TDX_RSI | TDX_RDI | TDX_R8 | TDX_R9 | \
+ TDX_R10 | TDX_R11 | TDX_R12 | TDX_R13 | TDX_R14 | TDX_R15 )

/*
* __tdx_module_call() - Used by TDX guests to request services from
@@ -124,19 +130,32 @@ SYM_FUNC_START(__tdx_hypercall)
push %r14
push %r13
push %r12
+ push %rbx
+ push %rbp
+
+ movq %rdi, %rax
+ movq %rsi, %rbp
+
+ /* Copy hypercall registers from arg struct: */
+ movq TDX_HYPERCALL_r8(%rax), %r8
+ movq TDX_HYPERCALL_r9(%rax), %r9
+ movq TDX_HYPERCALL_r10(%rax), %r10
+ movq TDX_HYPERCALL_r11(%rax), %r11
+ movq TDX_HYPERCALL_r12(%rax), %r12
+ movq TDX_HYPERCALL_r13(%rax), %r13
+ movq TDX_HYPERCALL_r14(%rax), %r14
+ movq TDX_HYPERCALL_r15(%rax), %r15
+ movq TDX_HYPERCALL_rdi(%rax), %rdi
+ movq TDX_HYPERCALL_rsi(%rax), %rsi
+ movq TDX_HYPERCALL_rbx(%rax), %rbx
+ movq TDX_HYPERCALL_rdx(%rax), %rdx
+
+ push %rax

/* Mangle function call ABI into TDCALL ABI: */
/* Set TDCALL leaf ID (TDVMCALL (0)) in RAX */
xor %eax, %eax

- /* Copy hypercall registers from arg struct: */
- movq TDX_HYPERCALL_r10(%rdi), %r10
- movq TDX_HYPERCALL_r11(%rdi), %r11
- movq TDX_HYPERCALL_r12(%rdi), %r12
- movq TDX_HYPERCALL_r13(%rdi), %r13
- movq TDX_HYPERCALL_r14(%rdi), %r14
- movq TDX_HYPERCALL_r15(%rdi), %r15
-
movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx

/*
@@ -148,14 +167,14 @@ SYM_FUNC_START(__tdx_hypercall)
* HLT operation indefinitely. Since this is the not the desired
* result, conditionally call STI before TDCALL.
*/
- testq $TDX_HCALL_ISSUE_STI, %rsi
+ testq $TDX_HCALL_ISSUE_STI, %rbp
jz .Lskip_sti
sti
.Lskip_sti:
tdcall

/*
- * RAX==0 indicates a failure of the TDVMCALL mechanism itself and that
+ * RAX!=0 indicates a failure of the TDVMCALL mechanism itself and that
* something has gone horribly wrong with the TDX module.
*
* The return status of the hypercall operation is in a separate
@@ -165,30 +184,45 @@ SYM_FUNC_START(__tdx_hypercall)
testq %rax, %rax
jne .Lpanic

- /* TDVMCALL leaf return code is in R10 */
- movq %r10, %rax
+ pop %rax

/* Copy hypercall result registers to arg struct if needed */
- testq $TDX_HCALL_HAS_OUTPUT, %rsi
+ testq $TDX_HCALL_HAS_OUTPUT, %rbp
jz .Lout

- movq %r10, TDX_HYPERCALL_r10(%rdi)
- movq %r11, TDX_HYPERCALL_r11(%rdi)
- movq %r12, TDX_HYPERCALL_r12(%rdi)
- movq %r13, TDX_HYPERCALL_r13(%rdi)
- movq %r14, TDX_HYPERCALL_r14(%rdi)
- movq %r15, TDX_HYPERCALL_r15(%rdi)
+ movq %r8, TDX_HYPERCALL_r8(%rax)
+ movq %r9, TDX_HYPERCALL_r9(%rax)
+ movq %r10, TDX_HYPERCALL_r10(%rax)
+ movq %r11, TDX_HYPERCALL_r11(%rax)
+ movq %r12, TDX_HYPERCALL_r12(%rax)
+ movq %r13, TDX_HYPERCALL_r13(%rax)
+ movq %r14, TDX_HYPERCALL_r14(%rax)
+ movq %r15, TDX_HYPERCALL_r15(%rax)
+ movq %rdi, TDX_HYPERCALL_rdi(%rax)
+ movq %rsi, TDX_HYPERCALL_rsi(%rax)
+ movq %rbx, TDX_HYPERCALL_rbx(%rax)
+ movq %rdx, TDX_HYPERCALL_rdx(%rax)
.Lout:
+ /* TDVMCALL leaf return code is in R10 */
+ movq %r10, %rax
+
/*
* Zero out registers exposed to the VMM to avoid speculative execution
* with VMM-controlled values. This needs to include all registers
- * present in TDVMCALL_EXPOSE_REGS_MASK (except R12-R15). R12-R15
- * context will be restored.
+ * present in TDVMCALL_EXPOSE_REGS_MASK, except RBX, and R12-R15 which
+ * will be restored.
*/
+ xor %r8d, %r8d
+ xor %r9d, %r9d
xor %r10d, %r10d
xor %r11d, %r11d
+ xor %rdi, %rdi
+ xor %rsi, %rsi
+ xor %rdx, %rdx

/* Restore callee-saved GPRs as mandated by the x86_64 ABI */
+ pop %rbp
+ pop %rbx
pop %r12
pop %r13
pop %r14
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index e53f26228fbb..8068faa52de1 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -22,12 +22,18 @@
* This is a software only structure and not part of the TDX module/VMM ABI.
*/
struct tdx_hypercall_args {
+ u64 r8;
+ u64 r9;
u64 r10;
u64 r11;
u64 r12;
u64 r13;
u64 r14;
u64 r15;
+ u64 rdi;
+ u64 rsi;
+ u64 rbx;
+ u64 rdx;
};

/* Used to request services from the VMM */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 437308004ef2..cf819c5ed2de 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -81,6 +81,12 @@ static void __used common(void)
OFFSET(TDX_HYPERCALL_r13, tdx_hypercall_args, r13);
OFFSET(TDX_HYPERCALL_r14, tdx_hypercall_args, r14);
OFFSET(TDX_HYPERCALL_r15, tdx_hypercall_args, r15);
+ OFFSET(TDX_HYPERCALL_rbx, tdx_hypercall_args, rbx);
+ OFFSET(TDX_HYPERCALL_rdi, tdx_hypercall_args, rdi);
+ OFFSET(TDX_HYPERCALL_rsi, tdx_hypercall_args, rsi);
+ OFFSET(TDX_HYPERCALL_r8, tdx_hypercall_args, r8);
+ OFFSET(TDX_HYPERCALL_r9, tdx_hypercall_args, r9);
+ OFFSET(TDX_HYPERCALL_rdx, tdx_hypercall_args, rdx);

BLANK();
OFFSET(BP_scratch, boot_params, scratch);
--
Kiryl Shutsemau / Kirill A. Shutemov

2022-11-23 15:22:13

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

From: Dexuan Cui <[email protected]> Sent: Monday, November 21, 2022 11:52 AM
>
> A TDX guest uses the GHCI call rather than hv_hypercall_pg.
>
> In hv_do_hypercall(), Hyper-V requires that the input/output addresses
> must have the vTOM bit set. With current Hyper-V, the bit for TDX is
> bit 47, which is saved into ms_hyperv.shared_gpa_boundary() in
> ms_hyperv_init_platform().
>
> arch/x86/include/asm/mshyperv.h: hv_do_hypercall() needs
> "struct ms_hyperv_info", which is defined in
> include/asm-generic/mshyperv.h, which can't be included in
> arch/x86/include/asm/mshyperv.h because include/asm-generic/mshyperv.h
> has vmbus_signal_eom() -> hv_set_register(), which is defined in
> arch/x86/include/asm/mshyperv.h.
>
> Break this circular dependency by introducing a new header file
> for "struct ms_hyperv_info".
>
> Signed-off-by: Dexuan Cui <[email protected]>
> ---
> MAINTAINERS | 1 +
> arch/x86/hyperv/hv_init.c | 8 ++++++++
> arch/x86/include/asm/mshyperv.h | 24 ++++++++++++++++++++++-
> arch/x86/kernel/cpu/mshyperv.c | 2 ++
> include/asm-generic/ms_hyperv_info.h | 29 ++++++++++++++++++++++++++++
> include/asm-generic/mshyperv.h | 24 +----------------------
> 6 files changed, 64 insertions(+), 24 deletions(-)
> create mode 100644 include/asm-generic/ms_hyperv_info.h
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 256f03904987..455ecaf188fe 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -9537,6 +9537,7 @@ F: drivers/scsi/storvsc_drv.c
> F: drivers/uio/uio_hv_generic.c
> F: drivers/video/fbdev/hyperv_fb.c
> F: include/asm-generic/hyperv-tlfs.h
> +F: include/asm-generic/ms_hyperv_info.h
> F: include/asm-generic/mshyperv.h
> F: include/clocksource/hyperv_timer.h
> F: include/linux/hyperv.h
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 89954490af93..05682c4e327f 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -432,6 +432,10 @@ void __init hyperv_init(void)
> /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
> hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
>
> + /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
> + if (hv_isolation_type_tdx())
> + goto skip_hypercall_pg_init;
> +
> hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
> VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
> VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
> @@ -471,6 +475,7 @@ void __init hyperv_init(void)
> wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> }
>
> +skip_hypercall_pg_init:
> /*
> * hyperv_init() is called before LAPIC is initialized: see
> * apic_intr_mode_init() -> x86_platform.apic_post_init() and
> @@ -606,6 +611,9 @@ bool hv_is_hyperv_initialized(void)
> if (x86_hyper_type != X86_HYPER_MS_HYPERV)
> return false;
>
> + /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
> + if (hv_isolation_type_tdx())
> + return true;
> /*
> * Verify that earlier initialization succeeded by checking
> * that the hypercall page is setup
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 9d593ab2be26..650b4fae2fd8 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -9,7 +9,7 @@
> #include <asm/hyperv-tlfs.h>
> #include <asm/nospec-branch.h>
> #include <asm/paravirt.h>
> -#include <asm/mshyperv.h>
> +#include <asm-generic/ms_hyperv_info.h>
>
> union hv_ghcb;
>
> @@ -48,6 +48,18 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void
> *output)
> u64 hv_status;
>
> #ifdef CONFIG_X86_64
> +#if CONFIG_INTEL_TDX_GUEST
> + if (hv_isolation_type_tdx()) {
> + if (input_address)
> + input_address += ms_hyperv.shared_gpa_boundary;
> +
> + if (output_address)
> + output_address += ms_hyperv.shared_gpa_boundary;
> +
> + return __tdx_ms_hv_hypercall(control, output_address,
> + input_address);
> + }
> +#endif

Two thoughts:

1) The #ifdef CONFIG_INTEL_TDX_GUEST could probably be removed entirely
with a tweak. hv_isolation_type_tdx() already doesn't need the #ifdef as there's
already a stub that returns 'false'. Then you just need a way to handle
__tdx_ms_hv_hypercall(), or whatever it becomes based on the other discussion.
As long as you can provide a stub that does nothing, the #ifdef won't be needed.

2) Assuming that we end up with some kind of Hyper-V specific version of
__tdx_hypercall(), and hopefully as a "C" function, could you move the handling
of ms_hyperv.shared_gpa_boundary into that function? Then you won't need
to break out a separate include file for struct ms_hyperv. The Hyper-V TDX
hypercall function must handle both normal and "fast" hypercalls, and the
shared_gpa_boundary adjustment is needed only for normal hypercalls,
but you can check the "fast" bit in the control word to decide.

I haven't coded these ideas, so maybe there are snags I haven't thought of.
But I'm really hoping we can avoid having to create a separate include
file for struct ms_hyperv.

Michael

> if (!hv_hypercall_pg)
> return U64_MAX;
>
> @@ -85,6 +97,11 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
>
> #ifdef CONFIG_X86_64
> +#if CONFIG_INTEL_TDX_GUEST
> + if (hv_isolation_type_tdx())
> + return __tdx_ms_hv_hypercall(control, 0, input1);
> +#endif
> +
> {
> __asm__ __volatile__(CALL_NOSPEC
> : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> @@ -116,6 +133,11 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1,
> u64 input2)
> u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
>
> #ifdef CONFIG_X86_64
> +#if CONFIG_INTEL_TDX_GUEST
> + if (hv_isolation_type_tdx())
> + return __tdx_ms_hv_hypercall(control, input2, input1);
> +#endif
> +
> {
> __asm__ __volatile__("mov %4, %%r8\n"
> CALL_NOSPEC
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 9ad0b0abf0e0..dddccdbc5526 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -349,6 +349,8 @@ static void __init ms_hyperv_init_platform(void)
>
> case HV_ISOLATION_TYPE_TDX:
> static_branch_enable(&isolation_type_tdx);
> +
> + ms_hyperv.shared_gpa_boundary = cc_mkdec(0);
> break;
>
> default:
> diff --git a/include/asm-generic/ms_hyperv_info.h b/include/asm-
> generic/ms_hyperv_info.h
> new file mode 100644
> index 000000000000..734583dfea99
> --- /dev/null
> +++ b/include/asm-generic/ms_hyperv_info.h
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _ASM_GENERIC_MS_HYPERV_INFO_H
> +#define _ASM_GENERIC_MS_HYPERV_INFO_H
> +
> +struct ms_hyperv_info {
> + u32 features;
> + u32 priv_high;
> + u32 misc_features;
> + u32 hints;
> + u32 nested_features;
> + u32 max_vp_index;
> + u32 max_lp_index;
> + u32 isolation_config_a;
> + union {
> + u32 isolation_config_b;
> + struct {
> + u32 cvm_type : 4;
> + u32 reserved1 : 1;
> + u32 shared_gpa_boundary_active : 1;
> + u32 shared_gpa_boundary_bits : 6;
> + u32 reserved2 : 20;
> + };
> + };
> + u64 shared_gpa_boundary;
> +};
> +extern struct ms_hyperv_info ms_hyperv;
> +
> +#endif
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index bfb9eb9d7215..2ae3e4e4256b 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -25,29 +25,7 @@
> #include <linux/nmi.h>
> #include <asm/ptrace.h>
> #include <asm/hyperv-tlfs.h>
> -
> -struct ms_hyperv_info {
> - u32 features;
> - u32 priv_high;
> - u32 misc_features;
> - u32 hints;
> - u32 nested_features;
> - u32 max_vp_index;
> - u32 max_lp_index;
> - u32 isolation_config_a;
> - union {
> - u32 isolation_config_b;
> - struct {
> - u32 cvm_type : 4;
> - u32 reserved1 : 1;
> - u32 shared_gpa_boundary_active : 1;
> - u32 shared_gpa_boundary_bits : 6;
> - u32 reserved2 : 20;
> - };
> - };
> - u64 shared_gpa_boundary;
> -};
> -extern struct ms_hyperv_info ms_hyperv;
> +#include <asm-generic/ms_hyperv_info.h>
>
> extern void * __percpu *hyperv_pcpu_input_arg;
> extern void * __percpu *hyperv_pcpu_output_arg;
> --
> 2.25.1

2022-11-23 16:31:30

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

On 11/22/22 17:56, Dexuan Cui wrote:
>> From: Dexuan Cui
>> [...]
>> The existing asm code for __tdx_hypercall() passes through R10~R15
>> (see TDVMCALL_EXPOSE_REGS_MASK) to the (KVM) hypervisor.
>>
>> Unluckily, for Hyper-V, we need to pass through RDX, R8, R10 and R11
>> to Hyper-V, so I don't think I can use the existing __tdx_hypercall() ?
> I'm checking with the Hyper-V team to see if it's possible for them
> to not use RDX and R8, and use R12 and R13 instead. Will keep the
> thread updated.

That would be nice. But, to be honest, I don't expect them to change
the ABI for one OS. It's not a big deal to just make the function a bit
more flexible.

2022-11-23 16:31:50

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

On 11/22/22 17:37, Dexuan Cui wrote:
>> From: Dave Hansen <[email protected]>
>> Sent: Monday, November 21, 2022 12:39 PM
>> [...]
>> On 11/21/22 11:51, Dexuan Cui wrote:
>>> __tdx_hypercall() doesn't work for a TDX guest running on Hyper-V,
>>> because Hyper-V uses a different calling convention, so add the
>>> new function __tdx_ms_hv_hypercall().
>>
>> Other than R10 being variable here and fixed for __tdx_hypercall(), this
>> looks *EXACTLY* the same as __tdx_hypercall(), or at least a strict
>> subset of what __tdx_hypercall() can do.
>>
>> Did I miss something?
>
> The existing asm code for __tdx_hypercall() passes through R10~R15
> (see TDVMCALL_EXPOSE_REGS_MASK) to the (KVM) hypervisor.
>
> Unluckily, for Hyper-V, we need to pass through RDX, R8, R10 and R11
> to Hyper-V, so I don't think I can use the existing __tdx_hypercall() ?

What's to prevent you from adding RDX and R8? You could make
TDVMCALL_EXPOSE_REGS_MASK a macro argument.

Look at 'has_erro_code', for instance in "idtentry_body"
arch/x86/entry/entry_64.S.

>> Another way of saying this: It seems like you could do this with a new
>> version of _tdx_hypercall() (and all in C) instead of a new
>> __tdx_hypercall().
>
> I don't think the current TDVMCALL_EXPOSE_REGS_MASK allows me
> to pass through RDX and R8 to Hyper-V.

Right. So pass it in.

> PS, the comment before __tdx_hypercall() contains this line:
>
> "* RBX, RBP, RDI, RSI - Used to pass VMCALL sub function specific
> arguments."
>
> But it looks like currently RBX an RBP are not used at all in
> arch/x86/coco/tdx/tdcall.S ?

Yeah, it looks like they are a part of the hypercall ABI but no existing
hypercall is using them. Patches to fix it accepted. :)

2022-11-23 19:30:21

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 4/6] x86/hyperv: Add hv_isolation_type_tdx() to detect TDX guests

> From: Sathyanarayanan Kuppuswamy
> <[email protected]>
>
> On 11/21/22 11:51 AM, Dexuan Cui wrote:
> > No logic change to SNP/VBS guests.
>
> Add some info on how and where you are going to use this function.

Will do.

> > +DEFINE_STATIC_KEY_FALSE(isolation_type_tdx);
> > +
> > +bool hv_isolation_type_tdx(void)
> > +{
> > + return static_branch_unlikely(&isolation_type_tdx);
> > +}
>
> Does it need #ifdef CONFIG_INTEL_TDX_GUEST? If not TDX, you can
> live with weak reference.

Will add the #ifdef.

> > - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
> > - if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE)
> > + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) ||
> > + IS_ENABLED(CONFIG_INTEL_TDX_GUEST)) {
> > +
> > + switch (hv_get_isolation_type()) {
> > + case HV_ISOLATION_TYPE_VBS:
> > + case HV_ISOLATION_TYPE_SNP:
> > cc_set_vendor(CC_VENDOR_HYPERV);
> > + break;
> > +
> > + case HV_ISOLATION_TYPE_TDX:
> > + static_branch_enable(&isolation_type_tdx);
> > + break;
> > +
>
> It is not clear why you need special handling for TDX?

It's being discussed in another thread:
https://lwn.net/ml/linux-kernel/BYAPR21MB16886FF8B35F51964A515CD5D70C9@BYAPR21MB1688.namprd21.prod.outlook.com/

I'll wait for Michael Kelley's v4 and rebase my patches accordingly.

2022-11-23 19:46:31

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

> From: Kirill A. Shutemov <[email protected]>
> Sent: Wednesday, November 23, 2022 6:41 AM
> [...]
> I have plan to expand __tdx_hypercall() to cover more registers.
> See the patch below.

Great! Thank you!

> Is it enough for you?
Yes.

2022-11-23 19:51:00

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

> From: Dave Hansen <[email protected]>
> Sent: Wednesday, November 23, 2022 8:05 AM
> On 11/22/22 17:56, Dexuan Cui wrote:
> >> From: Dexuan Cui
> >> [...]
> >> The existing asm code for __tdx_hypercall() passes through R10~R15
> >> (see TDVMCALL_EXPOSE_REGS_MASK) to the (KVM) hypervisor.
> >>
> >> Unluckily, for Hyper-V, we need to pass through RDX, R8, R10 and R11
> >> to Hyper-V, so I don't think I can use the existing __tdx_hypercall() ?
> > I'm checking with the Hyper-V team to see if it's possible for them
> > to not use RDX and R8, and use R12 and R13 instead. Will keep the
> > thread updated.
>
> That would be nice. But, to be honest, I don't expect them to change
> the ABI for one OS. It's not a big deal to just make the function a bit
> more flexible.

Then I'll implement a C function using __tdx_hypercall() that will
be expaneed by Kirill. Thank you all for the suggestions!

2022-11-28 00:21:43

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 2/6] x86/tdx: Retry TDVMCALL_MAP_GPA() when needed

> From: Michael Kelley (LINUX) <[email protected]>
> Sent: Wednesday, November 23, 2022 5:30 AM
> > [...]
> > static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc)
> > {
> > int max_retry_cnt = 1000, retry_cnt = 0;
> > struct tdx_hypercall_args args;
> > u64 map_fail_paddr, ret;
> >
> > while (1) {
> > args.r10 = TDX_HYPERCALL_STANDARD;
> > args.r11 = TDVMCALL_MAP_GPA;
> > args.r12 = start;
> > args.r13 = end - start;
> > args.r14 = 0;
> > args.r15 = 0;
> >
> > ret = __tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> > if (!ret)
> > break;
>
> The above test is redundant and can be removed. The "success" case is
> implicitly handled by the test below for != TDVMCALL_STATUS_RETRY.

Good point. Will remove the redundant test.

> > if (ret != TDVMCALL_STATUS_RETRY)
> > break;
> > /*
> > * The guest must retry the operation for the pages in
> the
> > * region starting at the GPA specified in R11. Make sure
> R11
> > * contains a sane value.
> > */
> > map_fail_paddr = args.r11 ;
> > if (map_fail_paddr < start || map_fail_paddr >= end)
> > return false;
> >
> > if (map_fail_paddr == start) {
> > retry_cnt++;
> > if (retry_cnt > max_retry_cnt)
> > return false;
> > } else {
> > retry_cnt = 0;;
> > start = map_fail_paddr;
>
> Just summarizing the code, we increment the retry count if the hypercall
> returns STATUS_RETRY but did nothing (i.e., map_fail_paddr == start). But
> if the hypercall returns STATUS_RETRY after making at least some progress,
> then we reset the retry count. So in the worst case, for example, if the
> hypercall processed only one page on each invocation, the loop will continue
> until completion, without hitting any retry limits. That scenario seems
> plausible and within the spec.

Exactly.

> Do we have any indication about the likelihood of the "RETRY but did
> nothing" case? The spec doesn't appear to disallow this case, but does
> Hyper-V actually do this? It seems like a weird case.
>
> Michael

Yes, Hyper-V does do this, according to my test. It looks like this is not
because the operation is too time-consuming -- it looks like there is some
Hyper-V specific activity going on.

2022-11-28 01:07:21

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

> From: Michael Kelley (LINUX) <[email protected]>
> Sent: Wednesday, November 23, 2022 6:45 AM
> To: Dexuan Cui <[email protected]>; [email protected]; [email protected];
>
> Two thoughts:
>
> 1) The #ifdef CONFIG_INTEL_TDX_GUEST could probably be removed
> entirely
> with a tweak. hv_isolation_type_tdx() already doesn't need the #ifdef as
> there's
> already a stub that returns 'false'. Then you just need a way to handle
> __tdx_ms_hv_hypercall(), or whatever it becomes based on the other
> discussion.
> As long as you can provide a stub that does nothing, the #ifdef won't be
> needed.
>
> 2) Assuming that we end up with some kind of Hyper-V specific version of
> __tdx_hypercall(), and hopefully as a "C" function, could you move the
> handling
> of ms_hyperv.shared_gpa_boundary into that function? Then you won't
> need
> to break out a separate include file for struct ms_hyperv. The Hyper-V TDX
> hypercall function must handle both normal and "fast" hypercalls, and the
> shared_gpa_boundary adjustment is needed only for normal hypercalls,
> but you can check the "fast" bit in the control word to decide.
>
> I haven't coded these ideas, so maybe there are snags I haven't thought of.
> But I'm really hoping we can avoid having to create a separate include
> file for struct ms_hyperv.
>
> Michael

Thanks for the great suggestions! Now the code looks like this:
(the full list of v2 patches are still WIP:
https://github.com/dcui/tdx/commits/decui/hyperv-next/2022-1121/v6.1-rc5/v2)

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 13ccb52eecd7..00e5c84e380b 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -276,6 +276,27 @@ bool hv_isolation_type_tdx(void)
{
return static_branch_unlikely(&isolation_type_tdx);
}
+
+u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
+{
+ struct tdx_hypercall_args args = { };
+
+ if (!(control & HV_HYPERCALL_FAST_BIT)) {
+ if (input_addr)
+ input_addr += ms_hyperv.shared_gpa_boundary;
+
+ if (output_addr)
+ output_addr += ms_hyperv.shared_gpa_boundary;
+ }
+
+ args.r10 = control;
+ args.rdx = input_addr;
+ args.r8 = output_addr;
+
+ (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+
+ return args.r11;
+}
#endif

/*

diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 8a2cafec4675..1be7bcf0d7d1 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -39,6 +39,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);

+u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr);
+
static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
u64 input_address = input ? virt_to_phys(input) : 0;
@@ -46,6 +48,9 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 hv_status;

#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx())
+ return hv_tdx_hypercall(control, input_address, output_address);
+
if (!hv_hypercall_pg)
return U64_MAX;

@@ -83,6 +88,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx())
+ return hv_tdx_hypercall(control, input1, 0);
+
{
__asm__ __volatile__(CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -114,6 +122,9 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx())
+ return hv_tdx_hypercall(control, input1, input2);
+
{
__asm__ __volatile__("mov %4, %%r8\n"
CALL_NOSPEC

Subject: Re: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests



On 11/27/22 4:58 PM, Dexuan Cui wrote:
>> From: Michael Kelley (LINUX) <[email protected]>
>> Sent: Wednesday, November 23, 2022 6:45 AM
>> To: Dexuan Cui <[email protected]>; [email protected]; [email protected];
>>
>> Two thoughts:
>>
>> 1) The #ifdef CONFIG_INTEL_TDX_GUEST could probably be removed
>> entirely
>> with a tweak. hv_isolation_type_tdx() already doesn't need the #ifdef as
>> there's
>> already a stub that returns 'false'. Then you just need a way to handle
>> __tdx_ms_hv_hypercall(), or whatever it becomes based on the other
>> discussion.
>> As long as you can provide a stub that does nothing, the #ifdef won't be
>> needed.
>>
>> 2) Assuming that we end up with some kind of Hyper-V specific version of
>> __tdx_hypercall(), and hopefully as a "C" function, could you move the
>> handling
>> of ms_hyperv.shared_gpa_boundary into that function? Then you won't
>> need
>> to break out a separate include file for struct ms_hyperv. The Hyper-V TDX
>> hypercall function must handle both normal and "fast" hypercalls, and the
>> shared_gpa_boundary adjustment is needed only for normal hypercalls,
>> but you can check the "fast" bit in the control word to decide.
>>
>> I haven't coded these ideas, so maybe there are snags I haven't thought of.
>> But I'm really hoping we can avoid having to create a separate include
>> file for struct ms_hyperv.
>>
>> Michael
>
> Thanks for the great suggestions! Now the code looks like this:
> (the full list of v2 patches are still WIP:
> https://github.com/dcui/tdx/commits/decui/hyperv-next/2022-1121/v6.1-rc5/v2)
>
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 13ccb52eecd7..00e5c84e380b 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -276,6 +276,27 @@ bool hv_isolation_type_tdx(void)
> {
> return static_branch_unlikely(&isolation_type_tdx);
> }
> +
> +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> +{
> + struct tdx_hypercall_args args = { };

I think you mean initialize to 0.

> +
> + if (!(control & HV_HYPERCALL_FAST_BIT)) {
> + if (input_addr)
> + input_addr += ms_hyperv.shared_gpa_boundary;
> +
> + if (output_addr)
> + output_addr += ms_hyperv.shared_gpa_boundary;
> + }
> +
> + args.r10 = control;
> + args.rdx = input_addr;
> + args.r8 = output_addr;
> +
> + (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> +
> + return args.r11;
> +}
> #endif
>
> /*
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 8a2cafec4675..1be7bcf0d7d1 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -39,6 +39,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
> int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
> int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
>
> +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr);
> +
> static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
> {
> u64 input_address = input ? virt_to_phys(input) : 0;
> @@ -46,6 +48,9 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
> u64 hv_status;
>
> #ifdef CONFIG_X86_64
> + if (hv_isolation_type_tdx())
> + return hv_tdx_hypercall(control, input_address, output_address);
> +
> if (!hv_hypercall_pg)
> return U64_MAX;
>
> @@ -83,6 +88,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
>
> #ifdef CONFIG_X86_64
> + if (hv_isolation_type_tdx())
> + return hv_tdx_hypercall(control, input1, 0);
> +
> {
> __asm__ __volatile__(CALL_NOSPEC
> : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> @@ -114,6 +122,9 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
>
> #ifdef CONFIG_X86_64
> + if (hv_isolation_type_tdx())
> + return hv_tdx_hypercall(control, input1, input2);
> +
> {
> __asm__ __volatile__("mov %4, %%r8\n"
> CALL_NOSPEC

--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

2022-11-28 01:37:17

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

From: Dexuan Cui <[email protected]> Sent: Sunday, November 27, 2022 4:59 PM
>
> > From: Michael Kelley (LINUX) <[email protected]>
> > Sent: Wednesday, November 23, 2022 6:45 AM
> > To: Dexuan Cui <[email protected]>; [email protected]; [email protected];
> >
> > Two thoughts:
> >
> > 1) The #ifdef CONFIG_INTEL_TDX_GUEST could probably be removed entirely
> > with a tweak. hv_isolation_type_tdx() already doesn't need the #ifdef asthere's
> > already a stub that returns 'false'. Then you just need a way to handle
> > __tdx_ms_hv_hypercall(), or whatever it becomes based on the other discussion.
> > As long as you can provide a stub that does nothing, the #ifdef won't be needed.
> >
> > 2) Assuming that we end up with some kind of Hyper-V specific version of
> > __tdx_hypercall(), and hopefully as a "C" function, could you move the handling
> > of ms_hyperv.shared_gpa_boundary into that function? Then you won't need
> > to break out a separate include file for struct ms_hyperv. The Hyper-V TDX
> > hypercall function must handle both normal and "fast" hypercalls, and the
> > shared_gpa_boundary adjustment is needed only for normal hypercalls,
> > but you can check the "fast" bit in the control word to decide.
> >
> > I haven't coded these ideas, so maybe there are snags I haven't thought of.
> > But I'm really hoping we can avoid having to create a separate include
> > file for struct ms_hyperv.
> >
> > Michael
>
> Thanks for the great suggestions! Now the code looks like this:
> (the full list of v2 patches are still WIP:
>
> https://github.com/dcui/tdx/commits/decui/hyperv-next/2022-1121/v6.1-rc5/v2
>
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 13ccb52eecd7..00e5c84e380b 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -276,6 +276,27 @@ bool hv_isolation_type_tdx(void)
> {
> return static_branch_unlikely(&isolation_type_tdx);
> }
> +
> +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> +{
> + struct tdx_hypercall_args args = { };
> +
> + if (!(control & HV_HYPERCALL_FAST_BIT)) {
> + if (input_addr)
> + input_addr += ms_hyperv.shared_gpa_boundary;

At one point when working with the vTOM code, I realized that or'ing in
the shared_gpa_boundary is probably safer than add'ing it, just in case
the physical address already has vTOM set. I don't know if that possibility
exists here, but it's something to consider as being slightly more robust.

> +
> + if (output_addr)
> + output_addr += ms_hyperv.shared_gpa_boundary;

Same here.

> + }
> +
> + args.r10 = control;
> + args.rdx = input_addr;
> + args.r8 = output_addr;
> +
> + (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> +
> + return args.r11;
> +}
> #endif
>
> /*
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 8a2cafec4675..1be7bcf0d7d1 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -39,6 +39,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32
> num_pages);
> int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
> int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
>
> +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr);
> +
> static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
> {
> u64 input_address = input ? virt_to_phys(input) : 0;
> @@ -46,6 +48,9 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void
> *output)
> u64 hv_status;
>
> #ifdef CONFIG_X86_64
> + if (hv_isolation_type_tdx())
> + return hv_tdx_hypercall(control, input_address, output_address);
> +
> if (!hv_hypercall_pg)
> return U64_MAX;
>
> @@ -83,6 +88,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
>
> #ifdef CONFIG_X86_64
> + if (hv_isolation_type_tdx())
> + return hv_tdx_hypercall(control, input1, 0);
> +
> {
> __asm__ __volatile__(CALL_NOSPEC
> : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> @@ -114,6 +122,9 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1,
> u64 input2)
> u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
>
> #ifdef CONFIG_X86_64
> + if (hv_isolation_type_tdx())
> + return hv_tdx_hypercall(control, input1, input2);
> +
> {
> __asm__ __volatile__("mov %4, %%r8\n"
> CALL_NOSPEC

Yes. This new structure LGTM.

Michael

2022-11-28 02:02:24

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

> From: Michael Kelley (LINUX) <[email protected]>
> Sent: Sunday, November 27, 2022 5:21 PM
> > [...]
> > +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> > +{
> > + struct tdx_hypercall_args args = { };
> > +
> > + if (!(control & HV_HYPERCALL_FAST_BIT)) {
> > + if (input_addr)
> > + input_addr += ms_hyperv.shared_gpa_boundary;
>
> At one point when working with the vTOM code, I realized that or'ing in
> the shared_gpa_boundary is probably safer than add'ing it, just in case
> the physical address already has vTOM set. I don't know if that possibility
> exists here, but it's something to consider as being slightly more robust.
>
> > +
> > + if (output_addr)
> > + output_addr += ms_hyperv.shared_gpa_boundary;
>
> Same here.

Ok, will use |= in all the reference.

2022-11-28 02:08:52

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

> From: Sathyanarayanan Kuppuswamy
> <[email protected]>
> > +
> > +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> > +{
> > + struct tdx_hypercall_args args = { };
>
> I think you mean initialize to 0.
Yes, it's the same as "struct tdx_hypercall_args args = { 0 };"

I was educated to use "{ }", which is said to be more preferred,
compared to "{ 0 } ":

https://lwn.net/ml/linux-kernel/YG1qF8lULn8lLJa/@unreal/
https://lwn.net/ml/linux-kernel/MW2PR2101MB0892AC106C360F2A209560A8BF759@MW2PR2101MB0892.namprd21.prod.outlook.com/

2022-11-28 15:41:44

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

On 11/27/22 16:58, Dexuan Cui wrote:
> +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> +{
> + struct tdx_hypercall_args args = { };
> +
> + if (!(control & HV_HYPERCALL_FAST_BIT)) {
> + if (input_addr)
> + input_addr += ms_hyperv.shared_gpa_boundary;
> +
> + if (output_addr)
> + output_addr += ms_hyperv.shared_gpa_boundary;
> + }

This:

>
https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/hypercall-interface

makes it sound like HV_HYPERCALL_FAST_BIT says whether arguments go in
registers (fast) or memory (slow). But this hv_tdx_hypercall() function
looks like it takes addresses only.

*Is* there a register based calling convention to make Hyper-V
hypercalls when running under TDX?

Also, is this the output address manipulation fundamentally different from:

output_addr = cc_mkdec(output_addr);

? Decrypted addresses are the shared ones, right?

2022-11-28 19:53:01

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

> From: Dave Hansen <[email protected]>
> Sent: Monday, November 28, 2022 7:22 AM
> [...]
> On 11/27/22 16:58, Dexuan Cui wrote:
> > +u64 hv_tdx_hypercall(u64 control, u64 input_addr, u64 output_addr)
> > +{
> > + struct tdx_hypercall_args args = { };
> > +
> > + if (!(control & HV_HYPERCALL_FAST_BIT)) {
> > + if (input_addr)
> > + input_addr += ms_hyperv.shared_gpa_boundary;
> > +
> > + if (output_addr)
> > + output_addr += ms_hyperv.shared_gpa_boundary;
> > + }
>
> This:
> [...]
> makes it sound like HV_HYPERCALL_FAST_BIT says whether arguments go in
> registers (fast) or memory (slow). But this hv_tdx_hypercall() function
> looks like it takes addresses only.

Good point! When hv_tdx_hypercall() is called from hv_do_fast_hypercall8()
and hv_do_fast_hypercall16(), actually the two parameters are not memory
addresses. I'll rename the parameters to param1 and param2.

I also realized I need to export the function for drivers.

> *Is* there a register based calling convention to make Hyper-V
> hypercalls when running under TDX?

When hv_tdx_hypercall() is called from hv_do_fast_hypercall8()
and hv_do_fast_hypercall16(), the params are indeed passed via registers
rather than memory.

> Also, is this the output address manipulation fundamentally different from:
>
> output_addr = cc_mkdec(output_addr);
>
> ? Decrypted addresses are the shared ones, right?
Yes.

Good point -- I'll use the updated version:

u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
{
struct tdx_hypercall_args args = { };

if (!(control & HV_HYPERCALL_FAST_BIT)) {
if (param1)
param1 = cc_mkdec(param1);

if (param2)
param2 = cc_mkdec(param2);
}

args.r10 = control;
args.rdx = param1;
args.r8 = param2;

(void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);

return args.r11;
}
EXPORT_SYMBOL_GPL(hv_tdx_hypercall);

2022-11-28 19:53:44

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

On 11/28/22 11:03, Dexuan Cui wrote:
...
> u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
> {
> struct tdx_hypercall_args args = { };
>
> if (!(control & HV_HYPERCALL_FAST_BIT)) {
> if (param1)
> param1 = cc_mkdec(param1);
>
> if (param2)
> param2 = cc_mkdec(param2);
> }
>
> args.r10 = control;
> args.rdx = param1;
> args.r8 = param2;
>
> (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
>
> return args.r11;
> }

I still think this is problematic.

The cc_mkdec() should be done on the parameters when the code still
*knows* that they are addresses.

How do we know, for instance, that no hypercall using this interface
will *ever* take the 0x0 physical address as an argument?


2022-11-28 19:56:27

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

On 11/28/22 11:37, Dexuan Cui wrote:
>> From: Dave Hansen <[email protected]>
...
>> How do we know, for instance, that no hypercall using this interface
>> will *ever* take the 0x0 physical address as an argument?
>
> A 0x0 physical address as an argument still works: the 0 is passed
> to the hypervisor using GHCI. I believe Hyper-V interprets the 0 as
> an error (if the param is needed), and returns an "invalid parameter"
> error code to the guest.

I don't see any data in the public documentation to support the claim
that 0x0 is a special argument for either the input or output GPA
parameters.

This is despite some actual discussion on things like their alignment
requirements[1] and interactions with overlay pages.

So, either you are mistaken about that behavior, or it looks like the
documentation needs updating.

1.
https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/hypercall-interface


2022-11-28 20:16:01

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

> From: Dave Hansen <[email protected]>
> Sent: Monday, November 28, 2022 11:11 AM
> On 11/28/22 11:03, Dexuan Cui wrote:
> ...
> > u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
> > {
> > struct tdx_hypercall_args args = { };
> >
> > if (!(control & HV_HYPERCALL_FAST_BIT)) {
> > if (param1)
> > param1 = cc_mkdec(param1);
> >
> > if (param2)
> > param2 = cc_mkdec(param2);
> > }
> >
> > args.r10 = control;
> > args.rdx = param1;
> > args.r8 = param2;
> >
> > (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
> >
> > return args.r11;
> > }
>
> I still think this is problematic.
>
> The cc_mkdec() should be done on the parameters when the code still
> *knows* that they are addresses.

Makes sense.

> How do we know, for instance, that no hypercall using this interface
> will *ever* take the 0x0 physical address as an argument?

A 0x0 physical address as an argument still works: the 0 is passed
to the hypervisor using GHCI. I believe Hyper-V interprets the 0 as
an error (if the param is needed), and returns an "invalid parameter"
error code to the guest.

Here is the updated version:

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 70170049be2c..41395891d112 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -242,6 +242,20 @@ bool hv_isolation_type_tdx(void)
{
return static_branch_unlikely(&isolation_type_tdx);
}
+
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
+{
+ struct tdx_hypercall_args args = { };
+
+ args.r10 = control;
+ args.rdx = param1;
+ args.r8 = param2;
+
+ (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+
+ return args.r11;
+}
+EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
#endif

/*
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 9cc6db45c3bc..ea053af067a2 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -10,6 +10,7 @@
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
#include <asm/mshyperv.h>
+#include <asm/coco.h>

union hv_ghcb;

@@ -39,6 +40,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);

+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+
static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
u64 input_address = input ? virt_to_phys(input) : 0;
@@ -46,6 +49,12 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 hv_status;

#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx()) {
+ u64 param1 = input_address ? cc_mkdec(input_address) : 0;
+ u64 param2 = output_address ? cc_mkdec(output_address) : 0;
+ return hv_tdx_hypercall(control, param1, param2);
+ }
+
if (!hv_hypercall_pg)
return U64_MAX;

@@ -83,6 +92,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx())
+ return hv_tdx_hypercall(control, input1, 0);
+
{
__asm__ __volatile__(CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -114,6 +126,9 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx())
+ return hv_tdx_hypercall(control, input1, input2);
+
{
__asm__ __volatile__("mov %4, %%r8\n"
CALL_NOSPEC

2022-11-28 20:59:02

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

> From: Dave Hansen <[email protected]>
> Sent: Monday, November 28, 2022 11:48 AM
>
> On 11/28/22 11:37, Dexuan Cui wrote:
> >> From: Dave Hansen <[email protected]>
> ...
> >> How do we know, for instance, that no hypercall using this interface
> >> will *ever* take the 0x0 physical address as an argument?
> >
> > A 0x0 physical address as an argument still works: the 0 is passed
> > to the hypervisor using GHCI. I believe Hyper-V interprets the 0 as
> > an error (if the param is needed), and returns an "invalid parameter"
> > error code to the guest.
>
> I don't see any data in the public documentation to support the claim
> that 0x0 is a special argument for either the input or output GPA
> parameters.

Sorry, I didn't make it clear. I meant: for some hypercalls, Hyper-V
doesn't really need an "input" param or an "output" param, so Linux
passes 0 for such a "not needed" param. Maybe Linux can pass any
value for such a "not needed" param, if Hyper-V just ignores the
"not needed" param. Some examples:

arch/x86/hyperv/hv_init.c: hv_get_partition_id():
status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page);

drivers/pci/controller/pci-hyperv.c:
res = hv_do_hypercall(HVCALL_RETARGET_INTERRUPT | (var_size << 17),
params, NULL);


If a param is needed and is supposed to be a non-zero memory address,
Linux running as a TDX guest must pass "cc_mkdec(address)" rather than
"address", otherwise I suspect the result is undefined, e.g. Hyper-V might
return an error to the guest, or Hyper-V might just terminate the guest,
especially if Linux passes 0 or cc_mkdec(0).

Currently all the users of hv_do_hypercall() pass valid arguments.

> This is despite some actual discussion on things like their alignment
> requirements[1] and interactions with overlay pages.
>
> So, either you are mistaken about that behavior, or it looks like the
> documentation needs updating.

The above is just my conjecture. I don't know how exactly Hyper-V works.

2022-11-28 22:17:24

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

On 11/28/22 12:36, Dexuan Cui wrote:
>> From: Dave Hansen <[email protected]>
>> Sent: Monday, November 28, 2022 11:48 AM
>>
>> On 11/28/22 11:37, Dexuan Cui wrote:
>>>> From: Dave Hansen <[email protected]>
>> ...
>>>> How do we know, for instance, that no hypercall using this interface
>>>> will *ever* take the 0x0 physical address as an argument?
>>>
>>> A 0x0 physical address as an argument still works: the 0 is passed
>>> to the hypervisor using GHCI. I believe Hyper-V interprets the 0 as
>>> an error (if the param is needed), and returns an "invalid parameter"
>>> error code to the guest.
>>
>> I don't see any data in the public documentation to support the claim
>> that 0x0 is a special argument for either the input or output GPA
>> parameters.
>
> Sorry, I didn't make it clear. I meant: for some hypercalls, Hyper-V
> doesn't really need an "input" param or an "output" param, so Linux
> passes 0 for such a "not needed" param. Maybe Linux can pass any
> value for such a "not needed" param, if Hyper-V just ignores the
> "not needed" param. Some examples:
>
> arch/x86/hyperv/hv_init.c: hv_get_partition_id():
> status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page);
>
> drivers/pci/controller/pci-hyperv.c:
> res = hv_do_hypercall(HVCALL_RETARGET_INTERRUPT | (var_size << 17),
> params, NULL);
>
>
> If a param is needed and is supposed to be a non-zero memory address,
> Linux running as a TDX guest must pass "cc_mkdec(address)" rather than
> "address", otherwise I suspect the result is undefined, e.g. Hyper-V might
> return an error to the guest, or Hyper-V might just terminate the guest,
> especially if Linux passes 0 or cc_mkdec(0).

This is the point where the maintainer gets a wee bit grumpy. The page
I just pointed you to (twice) has this nice quote:

If the hypercall involves no input or output parameters, the
hypervisor ignores the corresponding GPA pointer.

So, bravo to your colleagues who nicely documented this. But,
unfortunately, you didn't take advantage of their great documentation.
Instead, you made me search for it to provide actual facts to combat the
weak conjecture you offered above.

>> This is despite some actual discussion on things like their alignment
>> requirements[1] and interactions with overlay pages.
>>
>> So, either you are mistaken about that behavior, or it looks like the
>> documentation needs updating.
>
> The above is just my conjecture. I don't know how exactly Hyper-V works.

I do! I literally Googled "HV HYPERCALL FAST BIT" and the first page
that came up told me all I needed to know.

I could be merrily merging your patches. But, instead, I'm reading
documentation that can be trivially found and repeatedly regurgitating it.

Please, slow down. Take some time to answer emails and do it more
deliberately. This isn't a race.

2022-11-28 22:49:48

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 5/6] x86/hyperv: Support hypercalls for TDX guests

> From: Dave Hansen <[email protected]>
> Sent: Monday, November 28, 2022 1:15 PM
> [...]
> This is the point where the maintainer gets a wee bit grumpy. The page
> I just pointed you to (twice) has this nice quote:
>
> If the hypercall involves no input or output parameters, the
> hypervisor ignores the corresponding GPA pointer.
>
> So, bravo to your colleagues who nicely documented this. But,
> unfortunately, you didn't take advantage of their great documentation.
> Instead, you made me search for it to provide actual facts to combat the
> weak conjecture you offered above.

Sorry, I should really have read the document before starting to conjecture...

> > The above is just my conjecture. I don't know how exactly Hyper-V works.
>
> I do! I literally Googled "HV HYPERCALL FAST BIT" and the first page
> that came up told me all I needed to know.
>
> I could be merrily merging your patches. But, instead, I'm reading
> documentation that can be trivially found and repeatedly regurgitating it.
>
> Please, slow down. Take some time to answer emails and do it more
> deliberately. This isn't a race.

Thanks, I learn a lesson. Hopefully the below looks good enough.

Compared to the earlier version, the only changes are: 1) simplified the
change in hv_do_hypercall(); 2) added a comment before the function.

BTW, I can't post the v2 patchset right now, as I'm waiting for Kirill to
expand __tdx_hypercall() first:
https://lwn.net/ml/linux-kernel/SA1PR21MB1335EEEC1DE4CB42F6477A5EBF0C9@SA1PR21MB1335.namprd21.prod.outlook.com/
I'm also thinking about how to properly address the vmalloc
issue with tdx_enc_status_changed().

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 70170049be2c..41395891d112 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -242,6 +242,20 @@ bool hv_isolation_type_tdx(void)
{
return static_branch_unlikely(&isolation_type_tdx);
}
+
+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
+{
+ struct tdx_hypercall_args args = { };
+
+ args.r10 = control;
+ args.rdx = param1;
+ args.r8 = param2;
+
+ (void)__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT);
+
+ return args.r11;
+}
+EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
#endif

/*
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 9cc6db45c3bc..055b6fb8941f 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -10,6 +10,7 @@
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
#include <asm/mshyperv.h>
+#include <asm/coco.h>

union hv_ghcb;

@@ -39,6 +40,12 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);

+u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+
+/*
+ * If the hypercall involves no input or output parameters, the hypervisor
+ * ignores the corresponding GPA pointer.
+ */
static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
u64 input_address = input ? virt_to_phys(input) : 0;
@@ -46,6 +53,10 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 hv_status;

#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx())
+ return hv_tdx_hypercall(control,
+ cc_mkdec(input_address),
+ cc_mkdec(output_address));
if (!hv_hypercall_pg)
return U64_MAX;

@@ -83,6 +94,9 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx())
+ return hv_tdx_hypercall(control, input1, 0);
+
{
__asm__ __volatile__(CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
@@ -114,6 +128,9 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;

#ifdef CONFIG_X86_64
+ if (hv_isolation_type_tdx())
+ return hv_tdx_hypercall(control, input1, input2);
+
{
__asm__ __volatile__("mov %4, %%r8\n"
CALL_NOSPEC

2022-11-30 19:22:06

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

> From: Dexuan Cui
> Sent: Wednesday, November 23, 2022 10:55 AM
> To: Kirill A. Shutemov <[email protected]>
>
> > From: Kirill A. Shutemov <[email protected]>
> > Sent: Wednesday, November 23, 2022 6:41 AM
> > [...]
> > I have plan to expand __tdx_hypercall() to cover more registers.
> > See the patch below.
>
> Great! Thank you!
>
> > Is it enough for you?
> Yes.

Hi Kirill, it would be great if you could post a formal patch so that
I can rebase my patchset accordingly.

2022-12-02 22:00:14

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [PATCH 1/6] x86/tdx: Support hypercalls for TDX guests on Hyper-V

On Wed, Nov 30, 2022 at 07:14:49PM +0000, Dexuan Cui wrote:
> > From: Dexuan Cui
> > Sent: Wednesday, November 23, 2022 10:55 AM
> > To: Kirill A. Shutemov <[email protected]>
> >
> > > From: Kirill A. Shutemov <[email protected]>
> > > Sent: Wednesday, November 23, 2022 6:41 AM
> > > [...]
> > > I have plan to expand __tdx_hypercall() to cover more registers.
> > > See the patch below.
> >
> > Great! Thank you!
> >
> > > Is it enough for you?
> > Yes.
>
> Hi Kirill, it would be great if you could post a formal patch so that
> I can rebase my patchset accordingly.

The patch doesn't make sense without a user. The use-case I wanted to use
it for awaits update of GHCI. It make take time.

Below is proper patch. Feel free to include it into your patchset.

From fdf892e8f84c98e4cb7f3f7a613f32c8da396bd7 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <[email protected]>
Date: Wed, 23 Nov 2022 07:31:05 +0300
Subject: [PATCH] x86/tdx: Expand __tdx_hypercall() to handle more arguments

So far __tdx_hypercall() only handles six arguments for VMCALL.
Expanding it to six more register would allow to cover more use-cases.

Using RDI and RSI as VMCALL arguments requires more register shuffling.
RAX is used to hold tdx_hypercall_args pointer and RBP stores flags.

While there, fix typo in the comment on panic branch.

Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/coco/tdx/tdcall.S | 82 ++++++++++++++++++++++---------
arch/x86/include/asm/shared/tdx.h | 6 +++
arch/x86/kernel/asm-offsets.c | 6 +++
3 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S
index f9eb1134f22d..64e57739dc9d 100644
--- a/arch/x86/coco/tdx/tdcall.S
+++ b/arch/x86/coco/tdx/tdcall.S
@@ -13,6 +13,12 @@
/*
* Bitmasks of exposed registers (with VMM).
*/
+#define TDX_RDX BIT(2)
+#define TDX_RBX BIT(3)
+#define TDX_RSI BIT(6)
+#define TDX_RDI BIT(7)
+#define TDX_R8 BIT(8)
+#define TDX_R9 BIT(9)
#define TDX_R10 BIT(10)
#define TDX_R11 BIT(11)
#define TDX_R12 BIT(12)
@@ -27,9 +33,9 @@
* details can be found in TDX GHCI specification, section
* titled "TDCALL [TDG.VP.VMCALL] leaf".
*/
-#define TDVMCALL_EXPOSE_REGS_MASK ( TDX_R10 | TDX_R11 | \
- TDX_R12 | TDX_R13 | \
- TDX_R14 | TDX_R15 )
+#define TDVMCALL_EXPOSE_REGS_MASK \
+ ( TDX_RDX | TDX_RBX | TDX_RSI | TDX_RDI | TDX_R8 | TDX_R9 | \
+ TDX_R10 | TDX_R11 | TDX_R12 | TDX_R13 | TDX_R14 | TDX_R15 )

/*
* __tdx_module_call() - Used by TDX guests to request services from
@@ -124,19 +130,32 @@ SYM_FUNC_START(__tdx_hypercall)
push %r14
push %r13
push %r12
+ push %rbx
+ push %rbp
+
+ movq %rdi, %rax
+ movq %rsi, %rbp
+
+ /* Copy hypercall registers from arg struct: */
+ movq TDX_HYPERCALL_r8(%rax), %r8
+ movq TDX_HYPERCALL_r9(%rax), %r9
+ movq TDX_HYPERCALL_r10(%rax), %r10
+ movq TDX_HYPERCALL_r11(%rax), %r11
+ movq TDX_HYPERCALL_r12(%rax), %r12
+ movq TDX_HYPERCALL_r13(%rax), %r13
+ movq TDX_HYPERCALL_r14(%rax), %r14
+ movq TDX_HYPERCALL_r15(%rax), %r15
+ movq TDX_HYPERCALL_rdi(%rax), %rdi
+ movq TDX_HYPERCALL_rsi(%rax), %rsi
+ movq TDX_HYPERCALL_rbx(%rax), %rbx
+ movq TDX_HYPERCALL_rdx(%rax), %rdx
+
+ push %rax

/* Mangle function call ABI into TDCALL ABI: */
/* Set TDCALL leaf ID (TDVMCALL (0)) in RAX */
xor %eax, %eax

- /* Copy hypercall registers from arg struct: */
- movq TDX_HYPERCALL_r10(%rdi), %r10
- movq TDX_HYPERCALL_r11(%rdi), %r11
- movq TDX_HYPERCALL_r12(%rdi), %r12
- movq TDX_HYPERCALL_r13(%rdi), %r13
- movq TDX_HYPERCALL_r14(%rdi), %r14
- movq TDX_HYPERCALL_r15(%rdi), %r15
-
movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx

/*
@@ -148,14 +167,14 @@ SYM_FUNC_START(__tdx_hypercall)
* HLT operation indefinitely. Since this is the not the desired
* result, conditionally call STI before TDCALL.
*/
- testq $TDX_HCALL_ISSUE_STI, %rsi
+ testq $TDX_HCALL_ISSUE_STI, %rbp
jz .Lskip_sti
sti
.Lskip_sti:
tdcall

/*
- * RAX==0 indicates a failure of the TDVMCALL mechanism itself and that
+ * RAX!=0 indicates a failure of the TDVMCALL mechanism itself and that
* something has gone horribly wrong with the TDX module.
*
* The return status of the hypercall operation is in a separate
@@ -165,30 +184,45 @@ SYM_FUNC_START(__tdx_hypercall)
testq %rax, %rax
jne .Lpanic

- /* TDVMCALL leaf return code is in R10 */
- movq %r10, %rax
+ pop %rax

/* Copy hypercall result registers to arg struct if needed */
- testq $TDX_HCALL_HAS_OUTPUT, %rsi
+ testq $TDX_HCALL_HAS_OUTPUT, %rbp
jz .Lout

- movq %r10, TDX_HYPERCALL_r10(%rdi)
- movq %r11, TDX_HYPERCALL_r11(%rdi)
- movq %r12, TDX_HYPERCALL_r12(%rdi)
- movq %r13, TDX_HYPERCALL_r13(%rdi)
- movq %r14, TDX_HYPERCALL_r14(%rdi)
- movq %r15, TDX_HYPERCALL_r15(%rdi)
+ movq %r8, TDX_HYPERCALL_r8(%rax)
+ movq %r9, TDX_HYPERCALL_r9(%rax)
+ movq %r10, TDX_HYPERCALL_r10(%rax)
+ movq %r11, TDX_HYPERCALL_r11(%rax)
+ movq %r12, TDX_HYPERCALL_r12(%rax)
+ movq %r13, TDX_HYPERCALL_r13(%rax)
+ movq %r14, TDX_HYPERCALL_r14(%rax)
+ movq %r15, TDX_HYPERCALL_r15(%rax)
+ movq %rdi, TDX_HYPERCALL_rdi(%rax)
+ movq %rsi, TDX_HYPERCALL_rsi(%rax)
+ movq %rbx, TDX_HYPERCALL_rbx(%rax)
+ movq %rdx, TDX_HYPERCALL_rdx(%rax)
.Lout:
+ /* TDVMCALL leaf return code is in R10 */
+ movq %r10, %rax
+
/*
* Zero out registers exposed to the VMM to avoid speculative execution
* with VMM-controlled values. This needs to include all registers
- * present in TDVMCALL_EXPOSE_REGS_MASK (except R12-R15). R12-R15
- * context will be restored.
+ * present in TDVMCALL_EXPOSE_REGS_MASK, except RBX, and R12-R15 which
+ * will be restored.
*/
+ xor %r8d, %r8d
+ xor %r9d, %r9d
xor %r10d, %r10d
xor %r11d, %r11d
+ xor %rdi, %rdi
+ xor %rsi, %rsi
+ xor %rdx, %rdx

/* Restore callee-saved GPRs as mandated by the x86_64 ABI */
+ pop %rbp
+ pop %rbx
pop %r12
pop %r13
pop %r14
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index e53f26228fbb..8068faa52de1 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -22,12 +22,18 @@
* This is a software only structure and not part of the TDX module/VMM ABI.
*/
struct tdx_hypercall_args {
+ u64 r8;
+ u64 r9;
u64 r10;
u64 r11;
u64 r12;
u64 r13;
u64 r14;
u64 r15;
+ u64 rdi;
+ u64 rsi;
+ u64 rbx;
+ u64 rdx;
};

/* Used to request services from the VMM */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 437308004ef2..9f09947495e2 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -75,12 +75,18 @@ static void __used common(void)
OFFSET(TDX_MODULE_r11, tdx_module_output, r11);

BLANK();
+ OFFSET(TDX_HYPERCALL_r8, tdx_hypercall_args, r8);
+ OFFSET(TDX_HYPERCALL_r9, tdx_hypercall_args, r9);
OFFSET(TDX_HYPERCALL_r10, tdx_hypercall_args, r10);
OFFSET(TDX_HYPERCALL_r11, tdx_hypercall_args, r11);
OFFSET(TDX_HYPERCALL_r12, tdx_hypercall_args, r12);
OFFSET(TDX_HYPERCALL_r13, tdx_hypercall_args, r13);
OFFSET(TDX_HYPERCALL_r14, tdx_hypercall_args, r14);
OFFSET(TDX_HYPERCALL_r15, tdx_hypercall_args, r15);
+ OFFSET(TDX_HYPERCALL_rdi, tdx_hypercall_args, rdi);
+ OFFSET(TDX_HYPERCALL_rsi, tdx_hypercall_args, rsi);
+ OFFSET(TDX_HYPERCALL_rbx, tdx_hypercall_args, rbx);
+ OFFSET(TDX_HYPERCALL_rdx, tdx_hypercall_args, rdx);

BLANK();
OFFSET(BP_scratch, boot_params, scratch);
--
Kiryl Shutsemau / Kirill A. Shutemov

2023-01-06 11:19:48

by Zhi Wang

[permalink] [raw]
Subject: Re: [PATCH 6/6] Drivers: hv: vmbus: Support TDX guests

On Mon, 21 Nov 2022 11:51:51 -0800
Dexuan Cui <[email protected]> wrote:

> Intel folks added the generic code to support a TDX guest in April, 2022.
> This commit and some earlier commits from me add the Hyper-V specific
> code so that a TDX guest can run on Hyper-V.
>
> Signed-off-by: Dexuan Cui <[email protected]>
> ---
> arch/x86/hyperv/hv_init.c | 19 +++++++++++++++----
> arch/x86/kernel/cpu/mshyperv.c | 10 ++++++++++
> arch/x86/mm/pat/set_memory.c | 2 +-
> drivers/hv/connection.c | 4 +++-
> drivers/hv/hv.c | 25 +++++++++++++++++++++++++
> drivers/hv/ring_buffer.c | 2 +-
> 6 files changed, 55 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 05682c4e327f..694f7fb04e5d 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void)
> static int hv_cpu_init(unsigned int cpu)
> {
> union hv_vp_assist_msr_contents msr = { 0 };
> - struct hv_vp_assist_page **hvp =
> &hv_vp_assist_page[smp_processor_id()];
> + struct hv_vp_assist_page **hvp;
> int ret;
>
> ret = hv_common_cpu_init(cpu);
> @@ -87,6 +87,7 @@ static int hv_cpu_init(unsigned int cpu)
> if (!hv_vp_assist_page)
> return 0;
>
> + hvp = &hv_vp_assist_page[smp_processor_id()];
> if (!*hvp) {
> if (hv_root_partition) {
> /*
> @@ -398,11 +399,21 @@ void __init hyperv_init(void)
> if (hv_common_init())
> return;
>
> - hv_vp_assist_page = kcalloc(num_possible_cpus(),
> - sizeof(*hv_vp_assist_page),
> GFP_KERNEL);
> + /*
> + * The VP assist page is useless to a TDX guest: the only use we
> + * would have for it is lazy EOI, which can not be used with
> TDX.
> + */
> + if (hv_isolation_type_tdx())
> + hv_vp_assist_page = NULL;
> + else
> + hv_vp_assist_page = kcalloc(num_possible_cpus(),
> + sizeof(*hv_vp_assist_page),
> + GFP_KERNEL);
> if (!hv_vp_assist_page) {
> ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
> - goto common_free;
> +
> + if (!hv_isolation_type_tdx())
> + goto common_free;
> }
>
> if (hv_isolation_type_snp()) {
> diff --git a/arch/x86/kernel/cpu/mshyperv.c
> b/arch/x86/kernel/cpu/mshyperv.c index dddccdbc5526..6f597b23ad3e 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -350,7 +350,17 @@ static void __init ms_hyperv_init_platform(void)
> case HV_ISOLATION_TYPE_TDX:
> static_branch_enable(&isolation_type_tdx);
>
> + cc_set_vendor(CC_VENDOR_INTEL);
> +
> ms_hyperv.shared_gpa_boundary =
> cc_mkdec(0); +
> + /* Don't use the unsafe Hyper-V TSC
> page */
> + ms_hyperv.features &=
> + ~HV_MSR_REFERENCE_TSC_AVAILABLE;
> +
> + /* HV_REGISTER_CRASH_CTL is unsupported
> */
> + ms_hyperv.misc_features &=
> +
> ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; break;
>
> default:
> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
> index 2e5a045731de..bb44aaddb230 100644
> --- a/arch/x86/mm/pat/set_memory.c
> +++ b/arch/x86/mm/pat/set_memory.c
> @@ -2120,7 +2120,7 @@ static int __set_memory_enc_pgtable(unsigned long
> addr, int numpages, bool enc)
> static int __set_memory_enc_dec(unsigned long addr, int numpages, bool
> enc) {
> - if (hv_is_isolation_supported())
> + if (hv_is_isolation_supported() && !hv_isolation_type_tdx())
> return hv_set_mem_host_visibility(addr, numpages, !enc);
>
> if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))

Let's say there will be four cases:

----
case a. SEV-SNP guest with paravisor

In the code, this case is represented by:

hv_is_isolation_supported() && hv_isolation_type_snp()
hv_is_isolation_supported() && !hv_isolation_type_tdx()

case b. TDX guest with paravisor
?

case c. SEV-SNP guest *without* paravisor
?

case d. TDX guest *without* paravisor

In the code, this case is represented by:

hv_is_isolation_supported() && hv_isolation_type_tdx()
----

1. It would be better to use "hv_is_isolation_supported() &&
hv_isolation_type_snp()" to represent case a to avoid confusion in the
above patch.

2. For now, hv_is_isolation_supported() only shows if the guest is a CC
guest or not. hv_isolation_type_*() only represent SNP or TDX but
not "w/ or w/o paravisor".

How are you going to represent case b and c in __set_memory_enc_dec()?

I think you are looking for something to show if the guest is running
with a paravisor or not here:

if (hv_is_isolation_supported() && hv_is_isolation_with_paravisor())
...

Thanks,
Zhi.


> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
> index 9dc27e5d367a..1ecc3c29e3f7 100644
> --- a/drivers/hv/connection.c
> +++ b/drivers/hv/connection.c
> @@ -250,12 +250,14 @@ int vmbus_connect(void)
> * Isolation VM with AMD SNP needs to access monitor
> page via
> * address space above shared gpa boundary.
> */
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_isolation_type_tdx())
> { vmbus_connection.monitor_pages_pa[0] +=
> ms_hyperv.shared_gpa_boundary;
> vmbus_connection.monitor_pages_pa[1] +=
> ms_hyperv.shared_gpa_boundary;
> + }
>
> + if (hv_isolation_type_snp()) {
> vmbus_connection.monitor_pages[0]
> =
> memremap(vmbus_connection.monitor_pages_pa[0], HV_HYP_PAGE_SIZE,
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 4d6480d57546..03b3257bc1ab 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -18,6 +18,7 @@
> #include <linux/clockchips.h>
> #include <linux/delay.h>
> #include <linux/interrupt.h>
> +#include <linux/set_memory.h>
> #include <clocksource/hyperv_timer.h>
> #include <asm/mshyperv.h>
> #include "hyperv_vmbus.h"
> @@ -119,6 +120,7 @@ int hv_synic_alloc(void)
> {
> int cpu;
> struct hv_per_cpu_context *hv_cpu;
> + int ret;
>
> /*
> * First, zero all per-cpu memory areas so hv_synic_free() can
> @@ -168,6 +170,21 @@ int hv_synic_alloc(void)
> pr_err("Unable to allocate post msg page\n");
> goto err;
> }
> +
> +
> + if (hv_isolation_type_tdx()) {
> + ret = set_memory_decrypted(
> + (unsigned
> long)hv_cpu->synic_message_page, 1);
> + BUG_ON(ret);
> +
> + ret = set_memory_decrypted(
> + (unsigned
> long)hv_cpu->synic_event_page, 1);
> + BUG_ON(ret);
> +
> + ret = set_memory_decrypted(
> + (unsigned long)hv_cpu->post_msg_page,
> 1);
> + BUG_ON(ret);
> + }
> }
>
> return 0;
> @@ -225,6 +242,10 @@ void hv_synic_enable_regs(unsigned int cpu)
> } else {
> simp.base_simp_gpa =
> virt_to_phys(hv_cpu->synic_message_page)
> >> HV_HYP_PAGE_SHIFT;
> +
> + if (hv_isolation_type_tdx())
> + simp.base_simp_gpa +=
> ms_hyperv.shared_gpa_boundary
> + >> HV_HYP_PAGE_SHIFT;
> }
>
> hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
> @@ -243,6 +264,10 @@ void hv_synic_enable_regs(unsigned int cpu)
> } else {
> siefp.base_siefp_gpa =
> virt_to_phys(hv_cpu->synic_event_page)
> >> HV_HYP_PAGE_SHIFT;
> +
> + if (hv_isolation_type_tdx())
> + siefp.base_siefp_gpa +=
> ms_hyperv.shared_gpa_boundary
> + >> HV_HYP_PAGE_SHIFT;
> }
>
> hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
> diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
> index c6692fd5ab15..a51da82316ce 100644
> --- a/drivers/hv/ring_buffer.c
> +++ b/drivers/hv/ring_buffer.c
> @@ -233,7 +233,7 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info
> *ring_info,
> ring_info->ring_buffer = (struct hv_ring_buffer *)
> vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
> - PAGE_KERNEL);
> + pgprot_decrypted(PAGE_KERNEL_NOENC));
>
> kfree(pages_wraparound);
> if (!ring_info->ring_buffer)

2023-01-09 07:21:48

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH 6/6] Drivers: hv: vmbus: Support TDX guests

> From: Zhi Wang <[email protected]>
> Sent: Friday, January 6, 2023 3:00 AM
> > diff --git a/arch/x86/mm/pat/set_memory.c
> > @@ -2120,7 +2120,7 @@ static int __set_memory_enc_pgtable(unsigned
> long
> > addr, int numpages, bool enc)
> > static int __set_memory_enc_dec(unsigned long addr, int numpages, bool
> > enc) {
> > - if (hv_is_isolation_supported())
> > + if (hv_is_isolation_supported() && !hv_isolation_type_tdx())
> > return hv_set_mem_host_visibility(addr, numpages, !enc);
> >
> > if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))

The change here is kind of a hack to not call hv_set_mem_host_visibility()
for TDX guests on Hyper-V. The original change was also a hack to me to
call hv_set_mem_host_visibility() for SNP guests with pavavisor on Hyper-V.

> Let's say there will be four cases:
> ----
> case a. SEV-SNP guest with paravisor
>
> In the code, this case is represented by:
>
> hv_is_isolation_supported() && hv_isolation_type_snp()
> hv_is_isolation_supported() && !hv_isolation_type_tdx()
These look bad to me...

> case b. TDX guest with paravisor
> ?
As of now, this is not supported yet. I'll need to figure out how exactly
this scenario will look like.

> case c. SEV-SNP guest *without* paravisor
> ?
Tianyu Lan is working on this:
https://lwn.net/ml/linux-kernel/[email protected]/
set_memory_decrypted() calls __set_memory_enc_dec() directly. This
is the same as a SNP guest running on KVM.

> case d. TDX guest *without* paravisor
>
> In the code, this case is represented by:
>
> hv_is_isolation_supported() && hv_isolation_type_tdx()
This looks bad to me...

> ----
>
> 1. It would be better to use "hv_is_isolation_supported() &&
> hv_isolation_type_snp()" to represent case a to avoid confusion in the
> above patch.
>
> 2. For now, hv_is_isolation_supported() only shows if the guest is a CC
> guest or not. hv_isolation_type_*() only represent SNP or TDX but
> not "w/ or w/o paravisor".
>
> How are you going to represent case b and c in __set_memory_enc_dec()?
>
> I think you are looking for something to show if the guest is running
> with a paravisor or not here:
>
> if (hv_is_isolation_supported() && hv_is_isolation_with_paravisor())
> ...
>
> Thanks,
> Zhi.

Michael's patchset removes the special path for SNP with pavavisor on Hyper-V:
https://lwn.net/ml/linux-kernel/1669951831-4180-7-git-send-email-mikelley%40microsoft.com/

With Michael's patchset, I don't need the change to __set_memory_enc_dec()
at all. The plan was that Michael's patchset would be merged into the upstream
first and I would rebase my TDX patchset accordingly, but Michael's patchset
has been pending for almost 2 months...

so I probably need to post v3 with the below version, which looks a little
better to me because it hides the Hyper-V specific logic in a Hyper-V specific
file arch/x86/hyperv/ivm.c, and if necessary we can change the implementation
of hv_set_memory_enc_dec_needed() in future, e.g. Tianyu can change
hv_set_memory_enc_dec_needed() to distinguish between SNP with pavavisor
and SNP without pavavisor. Of course, I still hope Michael's patchset would
be merged soon so I can avoid this kind of mess...

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 07e4253b5809..4398042f10d5 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -258,6 +258,11 @@ bool hv_is_isolation_supported(void)
return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
}

+bool hv_set_memory_enc_dec_needed(void)
+{
+ return hv_is_isolation_supported() && !hv_isolation_type_tdx();
+}
+
DEFINE_STATIC_KEY_FALSE(isolation_type_snp);

/*

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 2e5a045731de..5892196f8ade 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2120,7 +2120,7 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)

static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
{
- if (hv_is_isolation_supported())
+ if (hv_set_memory_enc_dec_needed())
return hv_set_mem_host_visibility(addr, numpages, !enc);

if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index a9a03ab04b97..192dcf295dfc 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -262,6 +262,12 @@ bool __weak hv_is_isolation_supported(void)
}
EXPORT_SYMBOL_GPL(hv_is_isolation_supported);

+bool __weak hv_set_memory_enc_dec_needed(void)
+{
+ return false;
+}
+EXPORT_SYMBOL_GPL(hv_set_memory_enc_dec_needed);
+
bool __weak hv_isolation_type_snp(void)
{
return false;
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bfb9eb9d7215..b7b1b18c9854 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -262,6 +262,7 @@ bool hv_is_hyperv_initialized(void);
bool hv_is_hibernation_supported(void);
enum hv_isolation_type hv_get_isolation_type(void);
bool hv_is_isolation_supported(void);
+bool hv_set_memory_enc_dec_needed(void);
bool hv_isolation_type_snp(void);
u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
void hyperv_cleanup(void);
@@ -274,6 +275,7 @@ static inline bool hv_is_hyperv_initialized(void) { return false; }
static inline bool hv_is_hibernation_supported(void) { return false; }
static inline void hyperv_cleanup(void) {}
static inline bool hv_is_isolation_supported(void) { return false; }
+static inline bool hv_set_memory_enc_dec_needed(void) { return false; }
static inline enum hv_isolation_type hv_get_isolation_type(void)
{
return HV_ISOLATION_TYPE_NONE;