Memory access #VE's are hard for Linux to handle in contexts like the
entry code or NMIs. But other OSes need them for functionality.
There's a static (pre-guest-boot) way for a VMM to choose one or the
other. But VMMs don't always know which OS they are booting, so they
choose to deliver those #VE's so the "other" OSes will work. That,
unfortunately has left us in the lurch and exposed to these
hard-to-handle #VEs.
The TDX module has introduced a new feature. Even if the static
configuration is "send nasty #VE's", the kernel can dynamically request
that they be disabled.
Check if the feature is available and disable SEPT #VE if possible.
If the TD allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
attribute is no longer reliable. It reflects the initial state of the
control for the TD, but it will not be updated if someone (e.g. bootloader)
changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
determine if SEPT #VEs are enabled or disabled.
Signed-off-by: Kirill A. Shutemov <[email protected]>
Fixes: 373e715e31bf ("x86/tdx: Panic on bad configs that #VE on "private" memory access")
Cc: [email protected]
---
arch/x86/coco/tdx/tdx.c | 88 +++++++++++++++++++++++++------
arch/x86/include/asm/shared/tdx.h | 11 +++-
2 files changed, 83 insertions(+), 16 deletions(-)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 1ff571cb9177..ba37f4306f4e 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -77,6 +77,20 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
}
+/* Read TD-scoped metadata */
+static inline u64 tdg_vm_rd(u64 field, u64 *value)
+{
+ struct tdx_module_args args = {
+ .rdx = field,
+ };
+ u64 ret;
+
+ ret = __tdcall_ret(TDG_VM_RD, &args);
+ *value = args.r8;
+
+ return ret;
+}
+
/* Write TD-scoped metadata */
static inline u64 tdg_vm_wr(u64 field, u64 value, u64 mask)
{
@@ -179,6 +193,62 @@ static void __noreturn tdx_panic(const char *msg)
__tdx_hypercall(&args);
}
+/*
+ * The kernel cannot handle #VEs when accessing normal kernel memory. Ensure
+ * that no #VE will be delivered for accesses to TD-private memory.
+ *
+ * TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
+ * controls if the guest will receive such #VE with TD attribute
+ * ATTR_SEPT_VE_DISABLE.
+ *
+ * Newer TDX module allows the guest to control if it wants to receive SEPT
+ * violation #VEs.
+ *
+ * Check if the feature is available and disable SEPT #VE if possible.
+ *
+ * If the TD allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
+ * attribute is no longer reliable. It reflects the initial state of the
+ * control for the TD, but it will not be updated if someone (e.g. bootloader)
+ * changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
+ * determine if SEPT #VEs are enabled or disabled.
+ */
+static void disable_sept_ve(u64 td_attr)
+{
+ const char *msg = "TD misconfiguration: SEPT #VE has to be disabled";
+ bool debug = td_attr & ATTR_DEBUG;
+ u64 config, controls;
+
+ /* Is this TD allowed to disable SEPT #VE */
+ tdg_vm_rd(TDCS_CONFIG_FLAGS, &config);
+ if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE)) {
+ /* No SEPT #VE controls for the guest: check the attribute */
+ if (td_attr & ATTR_SEPT_VE_DISABLE)
+ return;
+
+ /* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
+ if (debug)
+ pr_warn("%s\n", msg);
+ else
+ tdx_panic(msg);
+ return;
+ }
+
+ /* Check if SEPT #VE has been disabled before us */
+ tdg_vm_rd(TDCS_TD_CTLS, &controls);
+ if (controls & TD_CTLS_PENDING_VE_DISABLE)
+ return;
+
+ /* Keep #VEs enabled for splats in debugging environments */
+ if (debug)
+ return;
+
+ /* Disable SEPT #VEs */
+ tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_PENDING_VE_DISABLE,
+ TD_CTLS_PENDING_VE_DISABLE);
+
+ return;
+}
+
static void tdx_setup(u64 *cc_mask)
{
struct tdx_module_args args = {};
@@ -204,24 +274,12 @@ static void tdx_setup(u64 *cc_mask)
gpa_width = args.rcx & GENMASK(5, 0);
*cc_mask = BIT_ULL(gpa_width - 1);
+ td_attr = args.rdx;
+
/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);
- /*
- * The kernel can not handle #VE's when accessing normal kernel
- * memory. Ensure that no #VE will be delivered for accesses to
- * TD-private memory. Only VMM-shared memory (MMIO) will #VE.
- */
- td_attr = args.rdx;
- if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
- const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
-
- /* Relax SEPT_VE_DISABLE check for debug TD. */
- if (td_attr & ATTR_DEBUG)
- pr_warn("%s\n", msg);
- else
- tdx_panic(msg);
- }
+ disable_sept_ve(td_attr);
}
/*
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index fdfd41511b02..fecb2a6e864b 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -16,11 +16,20 @@
#define TDG_VP_VEINFO_GET 3
#define TDG_MR_REPORT 4
#define TDG_MEM_PAGE_ACCEPT 6
+#define TDG_VM_RD 7
#define TDG_VM_WR 8
-/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
+/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
+#define TDCS_CONFIG_FLAGS 0x1110000300000016
+#define TDCS_TD_CTLS 0x1110000300000017
#define TDCS_NOTIFY_ENABLES 0x9100000000000010
+/* TDCS_CONFIG_FLAGS bits */
+#define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1)
+
+/* TDCS_TD_CTLS bits */
+#define TD_CTLS_PENDING_VE_DISABLE BIT_ULL(0)
+
/* TDX hypercall Leaf IDs */
#define TDVMCALL_MAP_GPA 0x10001
#define TDVMCALL_GET_QUOTE 0x10002
--
2.43.0
On 12.05.24 г. 15:21 ч., Kirill A. Shutemov wrote:
> Memory access #VE's are hard for Linux to handle in contexts like the
> entry code or NMIs. But other OSes need them for functionality.
> There's a static (pre-guest-boot) way for a VMM to choose one or the
> other. But VMMs don't always know which OS they are booting, so they
> choose to deliver those #VE's so the "other" OSes will work. That,
> unfortunately has left us in the lurch and exposed to these
> hard-to-handle #VEs.
>
> The TDX module has introduced a new feature. Even if the static
> configuration is "send nasty #VE's", the kernel can dynamically request
> that they be disabled.
>
> Check if the feature is available and disable SEPT #VE if possible.
>
> If the TD allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
> attribute is no longer reliable. It reflects the initial state of the
> control for the TD, but it will not be updated if someone (e.g. bootloader)
> changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
> determine if SEPT #VEs are enabled or disabled.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Fixes: 373e715e31bf ("x86/tdx: Panic on bad configs that #VE on "private" memory access")
> Cc: [email protected]
Reviewed-by: Nikolay Borisov <[email protected]> though one nit below.
> ---
> arch/x86/coco/tdx/tdx.c | 88 +++++++++++++++++++++++++------
> arch/x86/include/asm/shared/tdx.h | 11 +++-
> 2 files changed, 83 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 1ff571cb9177..ba37f4306f4e 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -77,6 +77,20 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
> panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
> }
>
> +/* Read TD-scoped metadata */
> +static inline u64 tdg_vm_rd(u64 field, u64 *value)
> +{
> + struct tdx_module_args args = {
> + .rdx = field,
> + };
> + u64 ret;
> +
> + ret = __tdcall_ret(TDG_VM_RD, &args);
> + *value = args.r8;
> +
> + return ret;
> +}
nit: Perhaps this function can be put in the first patch and the
description there be made more generic, something along the lines of
"introduce functions for tdg_rd/tdg_wr" ?
> +
> /* Write TD-scoped metadata */
> static inline u64 tdg_vm_wr(u64 field, u64 value, u64 mask)
> {
> @@ -179,6 +193,62 @@ static void __noreturn tdx_panic(const char *msg)
> __tdx_hypercall(&args);
> }
>
> +/*
> + * The kernel cannot handle #VEs when accessing normal kernel memory. Ensure
> + * that no #VE will be delivered for accesses to TD-private memory.
> + *
> + * TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
> + * controls if the guest will receive such #VE with TD attribute
> + * ATTR_SEPT_VE_DISABLE.
> + *
> + * Newer TDX module allows the guest to control if it wants to receive SEPT
> + * violation #VEs.
> + *
> + * Check if the feature is available and disable SEPT #VE if possible.
> + *
> + * If the TD allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
> + * attribute is no longer reliable. It reflects the initial state of the
> + * control for the TD, but it will not be updated if someone (e.g. bootloader)
> + * changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
> + * determine if SEPT #VEs are enabled or disabled.
> + */
> +static void disable_sept_ve(u64 td_attr)
> +{
> + const char *msg = "TD misconfiguration: SEPT #VE has to be disabled";
> + bool debug = td_attr & ATTR_DEBUG;
> + u64 config, controls;
> +
> + /* Is this TD allowed to disable SEPT #VE */
> + tdg_vm_rd(TDCS_CONFIG_FLAGS, &config);
> + if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE)) {
> + /* No SEPT #VE controls for the guest: check the attribute */
> + if (td_attr & ATTR_SEPT_VE_DISABLE)
> + return;
> +
> + /* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
> + if (debug)
> + pr_warn("%s\n", msg);
> + else
> + tdx_panic(msg);
> + return;
> + }
> +
> + /* Check if SEPT #VE has been disabled before us */
> + tdg_vm_rd(TDCS_TD_CTLS, &controls);
> + if (controls & TD_CTLS_PENDING_VE_DISABLE)
> + return;
> +
> + /* Keep #VEs enabled for splats in debugging environments */
> + if (debug)
> + return;
> +
> + /* Disable SEPT #VEs */
> + tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_PENDING_VE_DISABLE,
> + TD_CTLS_PENDING_VE_DISABLE);
> +
> + return;
> +}
> +
> static void tdx_setup(u64 *cc_mask)
> {
> struct tdx_module_args args = {};
> @@ -204,24 +274,12 @@ static void tdx_setup(u64 *cc_mask)
> gpa_width = args.rcx & GENMASK(5, 0);
> *cc_mask = BIT_ULL(gpa_width - 1);
>
> + td_attr = args.rdx;
> +
> /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
> tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);
>
> - /*
> - * The kernel can not handle #VE's when accessing normal kernel
> - * memory. Ensure that no #VE will be delivered for accesses to
> - * TD-private memory. Only VMM-shared memory (MMIO) will #VE.
> - */
> - td_attr = args.rdx;
> - if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
> - const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
> -
> - /* Relax SEPT_VE_DISABLE check for debug TD. */
> - if (td_attr & ATTR_DEBUG)
> - pr_warn("%s\n", msg);
> - else
> - tdx_panic(msg);
> - }
> + disable_sept_ve(td_attr);
> }
>
> /*
> diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
> index fdfd41511b02..fecb2a6e864b 100644
> --- a/arch/x86/include/asm/shared/tdx.h
> +++ b/arch/x86/include/asm/shared/tdx.h
> @@ -16,11 +16,20 @@
> #define TDG_VP_VEINFO_GET 3
> #define TDG_MR_REPORT 4
> #define TDG_MEM_PAGE_ACCEPT 6
> +#define TDG_VM_RD 7
> #define TDG_VM_WR 8
>
> -/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
> +/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
> +#define TDCS_CONFIG_FLAGS 0x1110000300000016
> +#define TDCS_TD_CTLS 0x1110000300000017
> #define TDCS_NOTIFY_ENABLES 0x9100000000000010
>
> +/* TDCS_CONFIG_FLAGS bits */
> +#define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1)
> +
> +/* TDCS_TD_CTLS bits */
> +#define TD_CTLS_PENDING_VE_DISABLE BIT_ULL(0)
> +
> /* TDX hypercall Leaf IDs */
> #define TDVMCALL_MAP_GPA 0x10001
> #define TDVMCALL_GET_QUOTE 0x10002
On Tue, May 14, 2024 at 05:56:21PM +0300, Nikolay Borisov wrote:
> > diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> > index 1ff571cb9177..ba37f4306f4e 100644
> > --- a/arch/x86/coco/tdx/tdx.c
> > +++ b/arch/x86/coco/tdx/tdx.c
> > @@ -77,6 +77,20 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
> > panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
> > }
> > +/* Read TD-scoped metadata */
> > +static inline u64 tdg_vm_rd(u64 field, u64 *value)
> > +{
> > + struct tdx_module_args args = {
> > + .rdx = field,
> > + };
> > + u64 ret;
> > +
> > + ret = __tdcall_ret(TDG_VM_RD, &args);
> > + *value = args.r8;
> > +
> > + return ret;
> > +}
>
> nit: Perhaps this function can be put in the first patch and the description
> there be made more generic, something along the lines of "introduce
> functions for tdg_rd/tdg_wr" ?
A static function without an user will generate a build warning. I don't
think it is good idea.
--
Kiryl Shutsemau / Kirill A. Shutemov
On Wed, May 15, 2024 at 04:14:18PM +0300, Nikolay Borisov wrote:
>
>
> On 15.05.24 г. 12:30 ч., Kirill A. Shutemov wrote:
> > On Tue, May 14, 2024 at 05:56:21PM +0300, Nikolay Borisov wrote:
> > > > diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> > > > index 1ff571cb9177..ba37f4306f4e 100644
> > > > --- a/arch/x86/coco/tdx/tdx.c
> > > > +++ b/arch/x86/coco/tdx/tdx.c
> > > > @@ -77,6 +77,20 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
> > > > panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
> > > > }
> > > > +/* Read TD-scoped metadata */
> > > > +static inline u64 tdg_vm_rd(u64 field, u64 *value)
> > > > +{
> > > > + struct tdx_module_args args = {
> > > > + .rdx = field,
> > > > + };
> > > > + u64 ret;
> > > > +
> > > > + ret = __tdcall_ret(TDG_VM_RD, &args);
> > > > + *value = args.r8;
> > > > +
> > > > + return ret;
> > > > +}
> > >
> > > nit: Perhaps this function can be put in the first patch and the description
> > > there be made more generic, something along the lines of "introduce
> > > functions for tdg_rd/tdg_wr" ?
> >
> > A static function without an user will generate a build warning. I don't
> > think it is good idea.
> >
>
> But are those 2 wrappers really static-worthy? Those two interfaces seem to
> be rather generic and could be used by more things in the future? OTOH when
> the time comes they can be exposed as needed.
Generally, functions have to static unless they used outside of the
translation unit.
--
Kiryl Shutsemau / Kirill A. Shutemov
On 15.05.24 г. 12:30 ч., Kirill A. Shutemov wrote:
> On Tue, May 14, 2024 at 05:56:21PM +0300, Nikolay Borisov wrote:
>>> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
>>> index 1ff571cb9177..ba37f4306f4e 100644
>>> --- a/arch/x86/coco/tdx/tdx.c
>>> +++ b/arch/x86/coco/tdx/tdx.c
>>> @@ -77,6 +77,20 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
>>> panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
>>> }
>>> +/* Read TD-scoped metadata */
>>> +static inline u64 tdg_vm_rd(u64 field, u64 *value)
>>> +{
>>> + struct tdx_module_args args = {
>>> + .rdx = field,
>>> + };
>>> + u64 ret;
>>> +
>>> + ret = __tdcall_ret(TDG_VM_RD, &args);
>>> + *value = args.r8;
>>> +
>>> + return ret;
>>> +}
>>
>> nit: Perhaps this function can be put in the first patch and the description
>> there be made more generic, something along the lines of "introduce
>> functions for tdg_rd/tdg_wr" ?
>
> A static function without an user will generate a build warning. I don't
> think it is good idea.
>
But are those 2 wrappers really static-worthy? Those two interfaces seem
to be rather generic and could be used by more things in the future?
OTOH when the time comes they can be exposed as needed.
Anyway that could be considered a minor thing.
On 5/15/24 02:30, Kirill A. Shutemov wrote:
>> nit: Perhaps this function can be put in the first patch and the description
>> there be made more generic, something along the lines of "introduce
>> functions for tdg_rd/tdg_wr" ?
> A static function without an user will generate a build warning. I don't
> think it is good idea.
OK, so stick a __maybe_unused on it when you define it and remove it on
first use.