All preparations are complete. Hookup TDX-specific code to accept memory.
Accepting the memory is the same process as converting memory from
shared to private: kernel notifies VMM with MAP_GPA hypercall and then
accept pages with ACCEPT_PAGE module call.
The implementation in core kernel uses tdx_enc_status_changed(). It
already used for converting memory to shared and back for I/O
transactions.
Boot stub provides own implementation of tdx_accept_memory(). It is
similar in structure to tdx_enc_status_changed(), but only cares about
converting memory to private.
Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/boot/compressed/mem.c | 24 ++++++++-
arch/x86/boot/compressed/tdx.c | 85 +++++++++++++++++++++++++++++++
arch/x86/coco/tdx/tdx.c | 31 +++++++----
arch/x86/include/asm/shared/tdx.h | 2 +
arch/x86/mm/unaccepted_memory.c | 9 +++-
6 files changed, 141 insertions(+), 11 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7021ec725dd3..e4c31dbea6d7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -885,6 +885,7 @@ config INTEL_TDX_GUEST
select ARCH_HAS_CC_PLATFORM
select X86_MEM_ENCRYPT
select X86_MCE
+ select UNACCEPTED_MEMORY
help
Support running as a guest under Intel TDX. Without this support,
the guest kernel can not boot or run under TDX.
diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
index b5058c975d26..539fff27de49 100644
--- a/arch/x86/boot/compressed/mem.c
+++ b/arch/x86/boot/compressed/mem.c
@@ -5,6 +5,8 @@
#include "error.h"
#include "find.h"
#include "math.h"
+#include "tdx.h"
+#include <asm/shared/tdx.h>
#define PMD_SHIFT 21
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
@@ -12,10 +14,30 @@
extern struct boot_params *boot_params;
+static bool is_tdx_guest(void)
+{
+ static bool once;
+ static bool is_tdx;
+
+ if (!once) {
+ u32 eax, sig[3];
+
+ cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax,
+ &sig[0], &sig[2], &sig[1]);
+ is_tdx = !memcmp(TDX_IDENT, sig, sizeof(sig));
+ once = true;
+ }
+
+ return is_tdx;
+}
+
static inline void __accept_memory(phys_addr_t start, phys_addr_t end)
{
/* Platform-specific memory-acceptance call goes here */
- error("Cannot accept memory");
+ if (is_tdx_guest())
+ tdx_accept_memory(start, end);
+ else
+ error("Cannot accept memory");
}
/*
diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
index 918a7606f53c..57fd2bf28484 100644
--- a/arch/x86/boot/compressed/tdx.c
+++ b/arch/x86/boot/compressed/tdx.c
@@ -3,12 +3,14 @@
#include "../cpuflags.h"
#include "../string.h"
#include "../io.h"
+#include "align.h"
#include "error.h"
#include <vdso/limits.h>
#include <uapi/asm/vmx.h>
#include <asm/shared/tdx.h>
+#include <asm/page_types.h>
/* Called from __tdx_hypercall() for unrecoverable failure */
void __tdx_hypercall_failed(void)
@@ -75,3 +77,86 @@ void early_tdx_detect(void)
pio_ops.f_outb = tdx_outb;
pio_ops.f_outw = tdx_outw;
}
+
+enum pg_level {
+ PG_LEVEL_4K,
+ PG_LEVEL_2M,
+ PG_LEVEL_1G,
+};
+
+#define PTE_SHIFT 9
+
+static bool try_accept_one(phys_addr_t *start, unsigned long len,
+ enum pg_level pg_level)
+{
+ unsigned long accept_size = PAGE_SIZE << (pg_level * PTE_SHIFT);
+ u64 tdcall_rcx;
+ u8 page_size;
+
+ if (!IS_ALIGNED(*start, accept_size))
+ return false;
+
+ if (len < accept_size)
+ return false;
+
+ /*
+ * Pass the page physical address to the TDX module to accept the
+ * pending, private page.
+ *
+ * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
+ */
+ switch (pg_level) {
+ case PG_LEVEL_4K:
+ page_size = 0;
+ break;
+ case PG_LEVEL_2M:
+ page_size = 1;
+ break;
+ case PG_LEVEL_1G:
+ page_size = 2;
+ break;
+ default:
+ return false;
+ }
+
+ tdcall_rcx = *start | page_size;
+ if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
+ return false;
+
+ *start += accept_size;
+ return true;
+}
+
+void tdx_accept_memory(phys_addr_t start, phys_addr_t end)
+{
+ /*
+ * Notify the VMM about page mapping conversion. More info about ABI
+ * can be found in TDX Guest-Host-Communication Interface (GHCI),
+ * section "TDG.VP.VMCALL<MapGPA>"
+ */
+ if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
+ error("Accepting memory failed\n");
+
+ /*
+ * For shared->private conversion, accept the page using
+ * TDX_ACCEPT_PAGE TDX module call.
+ */
+ while (start < end) {
+ unsigned long len = end - start;
+
+ /*
+ * Try larger accepts first. It gives chance to VMM to keep
+ * 1G/2M SEPT entries where possible and speeds up process by
+ * cutting number of hypercalls (if successful).
+ */
+
+ if (try_accept_one(&start, len, PG_LEVEL_1G))
+ continue;
+
+ if (try_accept_one(&start, len, PG_LEVEL_2M))
+ continue;
+
+ if (!try_accept_one(&start, len, PG_LEVEL_4K))
+ error("Accepting memory failed\n");
+ }
+}
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index ddb60a87b426..ab4deb897942 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -580,16 +580,9 @@ static bool try_accept_one(phys_addr_t *start, unsigned long len,
return true;
}
-/*
- * Inform the VMM of the guest's intent for this physical page: shared with
- * the VMM or private to the guest. The VMM is expected to change its mapping
- * of the page in response.
- */
-static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
+static bool tdx_enc_status_changed_phys(phys_addr_t start, phys_addr_t end,
+ bool enc)
{
- phys_addr_t start = __pa(vaddr);
- phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
-
if (!enc) {
/* Set the shared (decrypted) bits: */
start |= cc_mkdec(0);
@@ -634,6 +627,25 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
return true;
}
+void tdx_accept_memory(phys_addr_t start, phys_addr_t end)
+{
+ if (!tdx_enc_status_changed_phys(start, end, true))
+ panic("Accepting memory failed\n");
+}
+
+/*
+ * Inform the VMM of the guest's intent for this physical page: shared with
+ * the VMM or private to the guest. The VMM is expected to change its mapping
+ * of the page in response.
+ */
+static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
+{
+ phys_addr_t start = __pa(vaddr);
+ phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
+
+ return tdx_enc_status_changed_phys(start, end, enc);
+}
+
void __init tdx_early_init(void)
{
u64 cc_mask;
@@ -645,6 +657,7 @@ void __init tdx_early_init(void)
return;
setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
+ setup_clear_cpu_cap(X86_FEATURE_MCE);
cc_set_vendor(CC_VENDOR_INTEL);
cc_mask = get_cc_mask();
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index 956ced04c3be..97534c334473 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -81,5 +81,7 @@ struct tdx_module_output {
u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
struct tdx_module_output *out);
+void tdx_accept_memory(phys_addr_t start, phys_addr_t end);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_SHARED_TDX_H */
diff --git a/arch/x86/mm/unaccepted_memory.c b/arch/x86/mm/unaccepted_memory.c
index 1327f64d5205..de0790af1824 100644
--- a/arch/x86/mm/unaccepted_memory.c
+++ b/arch/x86/mm/unaccepted_memory.c
@@ -6,6 +6,7 @@
#include <asm/io.h>
#include <asm/setup.h>
+#include <asm/shared/tdx.h>
#include <asm/unaccepted_memory.h>
/* Protects unaccepted memory bitmap */
@@ -29,7 +30,13 @@ void accept_memory(phys_addr_t start, phys_addr_t end)
unsigned long len = range_end - range_start;
/* Platform-specific memory-acceptance call goes here */
- panic("Cannot accept memory");
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+ tdx_accept_memory(range_start * PMD_SIZE,
+ range_end * PMD_SIZE);
+ } else {
+ panic("Cannot accept memory");
+ }
+
bitmap_clear(unaccepted_memory, range_start, len);
}
spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
--
2.35.1
On Thu, May 05, 2022 at 12:12:52PM +0200, Borislav Petkov wrote:
> On Mon, Apr 25, 2022 at 06:39:32AM +0300, Kirill A. Shutemov wrote:
> > Subject: [PATCHv5 10/12] x86/tdx: Unaccepted memory support
>
> Patch subject needs a verb:
>
> "Add ... "
>
> > All preparations are complete.
>
> Drop this sentence.
>
> > Hookup TDX-specific code to accept memory.
> >
> > Accepting the memory is the same process as converting memory from
> > shared to private: kernel notifies VMM with MAP_GPA hypercall and then
> > accept pages with ACCEPT_PAGE module call.
> >
> > The implementation in core kernel uses tdx_enc_status_changed(). It
> > already used for converting memory to shared and back for I/O
> > transactions.
> >
> > Boot stub provides own implementation of tdx_accept_memory(). It is
> > similar in structure to tdx_enc_status_changed(), but only cares about
> > converting memory to private.
> >
> > Signed-off-by: Kirill A. Shutemov <[email protected]>
> > ---
> > arch/x86/Kconfig | 1 +
> > arch/x86/boot/compressed/mem.c | 24 ++++++++-
> > arch/x86/boot/compressed/tdx.c | 85 +++++++++++++++++++++++++++++++
> > arch/x86/coco/tdx/tdx.c | 31 +++++++----
> > arch/x86/include/asm/shared/tdx.h | 2 +
> > arch/x86/mm/unaccepted_memory.c | 9 +++-
> > 6 files changed, 141 insertions(+), 11 deletions(-)
> >
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index 7021ec725dd3..e4c31dbea6d7 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -885,6 +885,7 @@ config INTEL_TDX_GUEST
> > select ARCH_HAS_CC_PLATFORM
> > select X86_MEM_ENCRYPT
> > select X86_MCE
> > + select UNACCEPTED_MEMORY
>
> WARNING: unmet direct dependencies detected for UNACCEPTED_MEMORY
> Depends on [n]: EFI [=y] && EFI_STUB [=y] && !KEXEC_CORE [=y]
> Selected by [y]:
> - INTEL_TDX_GUEST [=y] && HYPERVISOR_GUEST [=y] && X86_64 [=y] && CPU_SUP_INTEL [=y] && X86_X2APIC [=y]
>
> WARNING: unmet direct dependencies detected for UNACCEPTED_MEMORY
> Depends on [n]: EFI [=y] && EFI_STUB [=y] && !KEXEC_CORE [=y]
> Selected by [y]:
> - INTEL_TDX_GUEST [=y] && HYPERVISOR_GUEST [=y] && X86_64 [=y] && CPU_SUP_INTEL [=y] && X86_X2APIC [=y]
Ughh. Any ideas how to get around it? (Except for implementing kexec
support right away?)
>
>
> > diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
> > index b5058c975d26..539fff27de49 100644
> > --- a/arch/x86/boot/compressed/mem.c
> > +++ b/arch/x86/boot/compressed/mem.c
> > @@ -5,6 +5,8 @@
> > #include "error.h"
> > #include "find.h"
> > #include "math.h"
> > +#include "tdx.h"
> > +#include <asm/shared/tdx.h>
> >
> > #define PMD_SHIFT 21
> > #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
> > @@ -12,10 +14,30 @@
> >
> > extern struct boot_params *boot_params;
> >
> > +static bool is_tdx_guest(void)
>
> There is arch/x86/boot/compressed/tdx.c which already looks at that leaf
> and detects crap. Why is that hastily slapped here too?
I'm not happhy with this too.
process_unaccepted_memory() called form EFI stub that called before
decompression code.
I'm not sure how to structure code that it makes sense.
Call early_tdx_detect() from efi_main() in libstub/x86-stub.c?
It would require to include tdx.h from decompression code there which is
non-sense.
I would appreciate an idea.
> > +{
> > + static bool once;
> > + static bool is_tdx;
> > +
> > + if (!once) {
> > + u32 eax, sig[3];
> > +
> > + cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax,
> > + &sig[0], &sig[2], &sig[1]);
> > + is_tdx = !memcmp(TDX_IDENT, sig, sizeof(sig));
> > + once = true;
> > + }
> > +
> > + return is_tdx;
> > +}
> > +
> > static inline void __accept_memory(phys_addr_t start, phys_addr_t end)
> > {
> > /* Platform-specific memory-acceptance call goes here */
> > - error("Cannot accept memory");
> > + if (is_tdx_guest())
> > + tdx_accept_memory(start, end);
> > + else
> > + error("Cannot accept memory");
>
> What is that supposed to catch?
Booting on a platform that uses unaccepted memory, but kernel doesn't not
support it.
> > diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
> > index 918a7606f53c..57fd2bf28484 100644
> > --- a/arch/x86/boot/compressed/tdx.c
> > +++ b/arch/x86/boot/compressed/tdx.c
> > @@ -3,12 +3,14 @@
> > #include "../cpuflags.h"
> > #include "../string.h"
> > #include "../io.h"
> > +#include "align.h"
> > #include "error.h"
> >
> > #include <vdso/limits.h>
> > #include <uapi/asm/vmx.h>
> >
> > #include <asm/shared/tdx.h>
> > +#include <asm/page_types.h>
> >
> > /* Called from __tdx_hypercall() for unrecoverable failure */
> > void __tdx_hypercall_failed(void)
> > @@ -75,3 +77,86 @@ void early_tdx_detect(void)
> > pio_ops.f_outb = tdx_outb;
> > pio_ops.f_outw = tdx_outw;
> > }
> > +
> > +enum pg_level {
> > + PG_LEVEL_4K,
> > + PG_LEVEL_2M,
> > + PG_LEVEL_1G,
> > +};
> > +
> > +#define PTE_SHIFT 9
>
> At least stick those in a header.
>
> > +static bool try_accept_one(phys_addr_t *start, unsigned long len,
> > + enum pg_level pg_level)
>
> No need to break that line.
>
> Also, it doesn't need to be bool - you can simply return accept_size on
> success and 0 on error so that you don't have an I/O argument.
So on the calling side it would look like:
accepted = try_accept_one(start, len, PG_LEVEL_1G)
if (accepted) {
start += accepted;
continue;
}
And the similar for other levels. Is it really better?
>
> Ditto for the copy in coco/tdx/tdx.c
>
> > +{
> > + unsigned long accept_size = PAGE_SIZE << (pg_level * PTE_SHIFT);
> > + u64 tdcall_rcx;
> > + u8 page_size;
> > +
> > + if (!IS_ALIGNED(*start, accept_size))
> > + return false;
> > +
> > + if (len < accept_size)
> > + return false;
> > +
> > + /*
> > + * Pass the page physical address to the TDX module to accept the
> > + * pending, private page.
> > + *
> > + * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
> > + */
> > + switch (pg_level) {
> > + case PG_LEVEL_4K:
> > + page_size = 0;
> > + break;
> > + case PG_LEVEL_2M:
> > + page_size = 1;
> > + break;
> > + case PG_LEVEL_1G:
> > + page_size = 2;
> > + break;
> > + default:
> > + return false;
> > + }
> > +
> > + tdcall_rcx = *start | page_size;
> > + if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
> > + return false;
> > +
> > + *start += accept_size;
> > + return true;
> > +}
> > +
> > +void tdx_accept_memory(phys_addr_t start, phys_addr_t end)
> > +{
> > + /*
> > + * Notify the VMM about page mapping conversion. More info about ABI
> > + * can be found in TDX Guest-Host-Communication Interface (GHCI),
> > + * section "TDG.VP.VMCALL<MapGPA>"
> > + */
> > + if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
> > + error("Accepting memory failed\n");
> > + /*
> > + * For shared->private conversion, accept the page using
> > + * TDX_ACCEPT_PAGE TDX module call.
> > + */
> > + while (start < end) {
> > + unsigned long len = end - start;
> > +
> > + /*
> > + * Try larger accepts first. It gives chance to VMM to keep
> > + * 1G/2M SEPT entries where possible and speeds up process by
>
> "SEPT"?
Secure EPT. EPT for private memory in TDX.
> > + * cutting number of hypercalls (if successful).
> > + */
> > +
> > + if (try_accept_one(&start, len, PG_LEVEL_1G))
> > + continue;
> > +
> > + if (try_accept_one(&start, len, PG_LEVEL_2M))
> > + continue;
> > +
> > + if (!try_accept_one(&start, len, PG_LEVEL_4K))
> > + error("Accepting memory failed\n");
> > + }
> > +}
> > diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> > index ddb60a87b426..ab4deb897942 100644
> > --- a/arch/x86/coco/tdx/tdx.c
> > +++ b/arch/x86/coco/tdx/tdx.c
> > @@ -580,16 +580,9 @@ static bool try_accept_one(phys_addr_t *start, unsigned long len,
> > return true;
> > }
> >
> > -/*
> > - * Inform the VMM of the guest's intent for this physical page: shared with
> > - * the VMM or private to the guest. The VMM is expected to change its mapping
> > - * of the page in response.
> > - */
> > -static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> > +static bool tdx_enc_status_changed_phys(phys_addr_t start, phys_addr_t end,
>
> Why? is tdx_enc_status_changed_virt() coming too?
tdx_enc_status_changed() deals with virtual addresses.
> > + bool enc)
> > {
> > - phys_addr_t start = __pa(vaddr);
> > - phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
> > -
> > if (!enc) {
> > /* Set the shared (decrypted) bits: */
> > start |= cc_mkdec(0);
> > @@ -634,6 +627,25 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> > return true;
> > }
> >
> > +void tdx_accept_memory(phys_addr_t start, phys_addr_t end)
> > +{
> > + if (!tdx_enc_status_changed_phys(start, end, true))
> > + panic("Accepting memory failed\n");
> > +}
> > +
> > +/*
> > + * Inform the VMM of the guest's intent for this physical page: shared with
> > + * the VMM or private to the guest. The VMM is expected to change its mapping
> > + * of the page in response.
> > + */
> > +static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> > +{
> > + phys_addr_t start = __pa(vaddr);
> > + phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
> > +
> > + return tdx_enc_status_changed_phys(start, end, enc);
> > +}
> > +
> > void __init tdx_early_init(void)
> > {
> > u64 cc_mask;
> > @@ -645,6 +657,7 @@ void __init tdx_early_init(void)
> > return;
> >
> > setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
> > + setup_clear_cpu_cap(X86_FEATURE_MCE);
>
> What, no comment? Why does TDX need to disable MCE?
It doesn't not suppose to be here. Sorry.
>
> > cc_set_vendor(CC_VENDOR_INTEL);
> > cc_mask = get_cc_mask();
> > diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
> > index 956ced04c3be..97534c334473 100644
> > --- a/arch/x86/include/asm/shared/tdx.h
> > +++ b/arch/x86/include/asm/shared/tdx.h
> > @@ -81,5 +81,7 @@ struct tdx_module_output {
> > u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
> > struct tdx_module_output *out);
> >
> > +void tdx_accept_memory(phys_addr_t start, phys_addr_t end);
> > +
> > #endif /* !__ASSEMBLY__ */
> > #endif /* _ASM_X86_SHARED_TDX_H */
> > diff --git a/arch/x86/mm/unaccepted_memory.c b/arch/x86/mm/unaccepted_memory.c
> > index 1327f64d5205..de0790af1824 100644
> > --- a/arch/x86/mm/unaccepted_memory.c
> > +++ b/arch/x86/mm/unaccepted_memory.c
> > @@ -6,6 +6,7 @@
> >
> > #include <asm/io.h>
> > #include <asm/setup.h>
> > +#include <asm/shared/tdx.h>
> > #include <asm/unaccepted_memory.h>
> >
> > /* Protects unaccepted memory bitmap */
> > @@ -29,7 +30,13 @@ void accept_memory(phys_addr_t start, phys_addr_t end)
> > unsigned long len = range_end - range_start;
> >
> > /* Platform-specific memory-acceptance call goes here */
> > - panic("Cannot accept memory");
> > + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
> > + tdx_accept_memory(range_start * PMD_SIZE,
> > + range_end * PMD_SIZE);
> > + } else {
> > + panic("Cannot accept memory");
>
> Why panic here? A WARN_ONCE() should suffice, methinks.
As I said before, memory accept failure is fatal.
--
Kirill A. Shutemov
On Mon, Apr 25, 2022 at 06:39:32AM +0300, Kirill A. Shutemov wrote:
> Subject: [PATCHv5 10/12] x86/tdx: Unaccepted memory support
Patch subject needs a verb:
"Add ... "
> All preparations are complete.
Drop this sentence.
> Hookup TDX-specific code to accept memory.
>
> Accepting the memory is the same process as converting memory from
> shared to private: kernel notifies VMM with MAP_GPA hypercall and then
> accept pages with ACCEPT_PAGE module call.
>
> The implementation in core kernel uses tdx_enc_status_changed(). It
> already used for converting memory to shared and back for I/O
> transactions.
>
> Boot stub provides own implementation of tdx_accept_memory(). It is
> similar in structure to tdx_enc_status_changed(), but only cares about
> converting memory to private.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> ---
> arch/x86/Kconfig | 1 +
> arch/x86/boot/compressed/mem.c | 24 ++++++++-
> arch/x86/boot/compressed/tdx.c | 85 +++++++++++++++++++++++++++++++
> arch/x86/coco/tdx/tdx.c | 31 +++++++----
> arch/x86/include/asm/shared/tdx.h | 2 +
> arch/x86/mm/unaccepted_memory.c | 9 +++-
> 6 files changed, 141 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 7021ec725dd3..e4c31dbea6d7 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -885,6 +885,7 @@ config INTEL_TDX_GUEST
> select ARCH_HAS_CC_PLATFORM
> select X86_MEM_ENCRYPT
> select X86_MCE
> + select UNACCEPTED_MEMORY
WARNING: unmet direct dependencies detected for UNACCEPTED_MEMORY
Depends on [n]: EFI [=y] && EFI_STUB [=y] && !KEXEC_CORE [=y]
Selected by [y]:
- INTEL_TDX_GUEST [=y] && HYPERVISOR_GUEST [=y] && X86_64 [=y] && CPU_SUP_INTEL [=y] && X86_X2APIC [=y]
WARNING: unmet direct dependencies detected for UNACCEPTED_MEMORY
Depends on [n]: EFI [=y] && EFI_STUB [=y] && !KEXEC_CORE [=y]
Selected by [y]:
- INTEL_TDX_GUEST [=y] && HYPERVISOR_GUEST [=y] && X86_64 [=y] && CPU_SUP_INTEL [=y] && X86_X2APIC [=y]
> diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
> index b5058c975d26..539fff27de49 100644
> --- a/arch/x86/boot/compressed/mem.c
> +++ b/arch/x86/boot/compressed/mem.c
> @@ -5,6 +5,8 @@
> #include "error.h"
> #include "find.h"
> #include "math.h"
> +#include "tdx.h"
> +#include <asm/shared/tdx.h>
>
> #define PMD_SHIFT 21
> #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
> @@ -12,10 +14,30 @@
>
> extern struct boot_params *boot_params;
>
> +static bool is_tdx_guest(void)
There is arch/x86/boot/compressed/tdx.c which already looks at that leaf
and detects crap. Why is that hastily slapped here too?
> +{
> + static bool once;
> + static bool is_tdx;
> +
> + if (!once) {
> + u32 eax, sig[3];
> +
> + cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax,
> + &sig[0], &sig[2], &sig[1]);
> + is_tdx = !memcmp(TDX_IDENT, sig, sizeof(sig));
> + once = true;
> + }
> +
> + return is_tdx;
> +}
> +
> static inline void __accept_memory(phys_addr_t start, phys_addr_t end)
> {
> /* Platform-specific memory-acceptance call goes here */
> - error("Cannot accept memory");
> + if (is_tdx_guest())
> + tdx_accept_memory(start, end);
> + else
> + error("Cannot accept memory");
What is that supposed to catch?
> diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
> index 918a7606f53c..57fd2bf28484 100644
> --- a/arch/x86/boot/compressed/tdx.c
> +++ b/arch/x86/boot/compressed/tdx.c
> @@ -3,12 +3,14 @@
> #include "../cpuflags.h"
> #include "../string.h"
> #include "../io.h"
> +#include "align.h"
> #include "error.h"
>
> #include <vdso/limits.h>
> #include <uapi/asm/vmx.h>
>
> #include <asm/shared/tdx.h>
> +#include <asm/page_types.h>
>
> /* Called from __tdx_hypercall() for unrecoverable failure */
> void __tdx_hypercall_failed(void)
> @@ -75,3 +77,86 @@ void early_tdx_detect(void)
> pio_ops.f_outb = tdx_outb;
> pio_ops.f_outw = tdx_outw;
> }
> +
> +enum pg_level {
> + PG_LEVEL_4K,
> + PG_LEVEL_2M,
> + PG_LEVEL_1G,
> +};
> +
> +#define PTE_SHIFT 9
At least stick those in a header.
> +static bool try_accept_one(phys_addr_t *start, unsigned long len,
> + enum pg_level pg_level)
No need to break that line.
Also, it doesn't need to be bool - you can simply return accept_size on
success and 0 on error so that you don't have an I/O argument.
Ditto for the copy in coco/tdx/tdx.c
> +{
> + unsigned long accept_size = PAGE_SIZE << (pg_level * PTE_SHIFT);
> + u64 tdcall_rcx;
> + u8 page_size;
> +
> + if (!IS_ALIGNED(*start, accept_size))
> + return false;
> +
> + if (len < accept_size)
> + return false;
> +
> + /*
> + * Pass the page physical address to the TDX module to accept the
> + * pending, private page.
> + *
> + * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
> + */
> + switch (pg_level) {
> + case PG_LEVEL_4K:
> + page_size = 0;
> + break;
> + case PG_LEVEL_2M:
> + page_size = 1;
> + break;
> + case PG_LEVEL_1G:
> + page_size = 2;
> + break;
> + default:
> + return false;
> + }
> +
> + tdcall_rcx = *start | page_size;
> + if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
> + return false;
> +
> + *start += accept_size;
> + return true;
> +}
> +
> +void tdx_accept_memory(phys_addr_t start, phys_addr_t end)
> +{
> + /*
> + * Notify the VMM about page mapping conversion. More info about ABI
> + * can be found in TDX Guest-Host-Communication Interface (GHCI),
> + * section "TDG.VP.VMCALL<MapGPA>"
> + */
> + if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
> + error("Accepting memory failed\n");
> + /*
> + * For shared->private conversion, accept the page using
> + * TDX_ACCEPT_PAGE TDX module call.
> + */
> + while (start < end) {
> + unsigned long len = end - start;
> +
> + /*
> + * Try larger accepts first. It gives chance to VMM to keep
> + * 1G/2M SEPT entries where possible and speeds up process by
"SEPT"?
> + * cutting number of hypercalls (if successful).
> + */
> +
> + if (try_accept_one(&start, len, PG_LEVEL_1G))
> + continue;
> +
> + if (try_accept_one(&start, len, PG_LEVEL_2M))
> + continue;
> +
> + if (!try_accept_one(&start, len, PG_LEVEL_4K))
> + error("Accepting memory failed\n");
> + }
> +}
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index ddb60a87b426..ab4deb897942 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -580,16 +580,9 @@ static bool try_accept_one(phys_addr_t *start, unsigned long len,
> return true;
> }
>
> -/*
> - * Inform the VMM of the guest's intent for this physical page: shared with
> - * the VMM or private to the guest. The VMM is expected to change its mapping
> - * of the page in response.
> - */
> -static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> +static bool tdx_enc_status_changed_phys(phys_addr_t start, phys_addr_t end,
Why? is tdx_enc_status_changed_virt() coming too?
> + bool enc)
> {
> - phys_addr_t start = __pa(vaddr);
> - phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
> -
> if (!enc) {
> /* Set the shared (decrypted) bits: */
> start |= cc_mkdec(0);
> @@ -634,6 +627,25 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> return true;
> }
>
> +void tdx_accept_memory(phys_addr_t start, phys_addr_t end)
> +{
> + if (!tdx_enc_status_changed_phys(start, end, true))
> + panic("Accepting memory failed\n");
> +}
> +
> +/*
> + * Inform the VMM of the guest's intent for this physical page: shared with
> + * the VMM or private to the guest. The VMM is expected to change its mapping
> + * of the page in response.
> + */
> +static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> +{
> + phys_addr_t start = __pa(vaddr);
> + phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
> +
> + return tdx_enc_status_changed_phys(start, end, enc);
> +}
> +
> void __init tdx_early_init(void)
> {
> u64 cc_mask;
> @@ -645,6 +657,7 @@ void __init tdx_early_init(void)
> return;
>
> setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
> + setup_clear_cpu_cap(X86_FEATURE_MCE);
What, no comment? Why does TDX need to disable MCE?
> cc_set_vendor(CC_VENDOR_INTEL);
> cc_mask = get_cc_mask();
> diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
> index 956ced04c3be..97534c334473 100644
> --- a/arch/x86/include/asm/shared/tdx.h
> +++ b/arch/x86/include/asm/shared/tdx.h
> @@ -81,5 +81,7 @@ struct tdx_module_output {
> u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
> struct tdx_module_output *out);
>
> +void tdx_accept_memory(phys_addr_t start, phys_addr_t end);
> +
> #endif /* !__ASSEMBLY__ */
> #endif /* _ASM_X86_SHARED_TDX_H */
> diff --git a/arch/x86/mm/unaccepted_memory.c b/arch/x86/mm/unaccepted_memory.c
> index 1327f64d5205..de0790af1824 100644
> --- a/arch/x86/mm/unaccepted_memory.c
> +++ b/arch/x86/mm/unaccepted_memory.c
> @@ -6,6 +6,7 @@
>
> #include <asm/io.h>
> #include <asm/setup.h>
> +#include <asm/shared/tdx.h>
> #include <asm/unaccepted_memory.h>
>
> /* Protects unaccepted memory bitmap */
> @@ -29,7 +30,13 @@ void accept_memory(phys_addr_t start, phys_addr_t end)
> unsigned long len = range_end - range_start;
>
> /* Platform-specific memory-acceptance call goes here */
> - panic("Cannot accept memory");
> + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
> + tdx_accept_memory(range_start * PMD_SIZE,
> + range_end * PMD_SIZE);
> + } else {
> + panic("Cannot accept memory");
Why panic here? A WARN_ONCE() should suffice, methinks.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Fri, May 06, 2022 at 11:44:23PM +0300, Kirill A. Shutemov wrote:
> > > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > > index 7021ec725dd3..e4c31dbea6d7 100644
> > > --- a/arch/x86/Kconfig
> > > +++ b/arch/x86/Kconfig
> > > @@ -885,6 +885,7 @@ config INTEL_TDX_GUEST
> > > select ARCH_HAS_CC_PLATFORM
> > > select X86_MEM_ENCRYPT
> > > select X86_MCE
> > > + select UNACCEPTED_MEMORY
> >
> > WARNING: unmet direct dependencies detected for UNACCEPTED_MEMORY
> > Depends on [n]: EFI [=y] && EFI_STUB [=y] && !KEXEC_CORE [=y]
> > Selected by [y]:
> > - INTEL_TDX_GUEST [=y] && HYPERVISOR_GUEST [=y] && X86_64 [=y] && CPU_SUP_INTEL [=y] && X86_X2APIC [=y]
> >
> > WARNING: unmet direct dependencies detected for UNACCEPTED_MEMORY
> > Depends on [n]: EFI [=y] && EFI_STUB [=y] && !KEXEC_CORE [=y]
> > Selected by [y]:
> > - INTEL_TDX_GUEST [=y] && HYPERVISOR_GUEST [=y] && X86_64 [=y] && CPU_SUP_INTEL [=y] && X86_X2APIC [=y]
>
> Ughh. Any ideas how to get around it? (Except for implementing kexec
> support right away?)
I reworked this to boot-time kexec disable.
> > Also, it doesn't need to be bool - you can simply return accept_size on
> > success and 0 on error so that you don't have an I/O argument.
>
> So on the calling side it would look like:
>
> accepted = try_accept_one(start, len, PG_LEVEL_1G)
> if (accepted) {
> start += accepted;
> continue;
> }
>
> And the similar for other levels. Is it really better?
JFYI, I've reworked it as
accepted = try_accept_one(start, len, PG_LEVEL_1G);
if (!accepted)
accepted = try_accept_one(start, len, PG_LEVEL_2M);
if (!accepted)
accepted = try_accept_one(start, len, PG_LEVEL_4K);
if (!accepted)
return false;
start += accepted;
looks good to me.
--
Kirill A. Shutemov
On Wed, May 11, 2022 at 04:19:06AM +0300, Kirill A. Shutemov wrote:
> JFYI, I've reworked it as
>
> accepted = try_accept_one(start, len, PG_LEVEL_1G);
> if (!accepted)
> accepted = try_accept_one(start, len, PG_LEVEL_2M);
> if (!accepted)
> accepted = try_accept_one(start, len, PG_LEVEL_4K);
> if (!accepted)
> return false;
> start += accepted;
s/accepted/accpt_size/
and then it is perfectly clear what that variable contains.
But it seems you're preparing a new version so I'll continue looking at
there.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette