During review of TDX guests on Hyper-V patchset Dave pointed to the
potential race between changing page private/shared status and
load_unaligned_zeropad().
Fix the issue.
v2:
- Add more info in commit message of the first patch.
- Move enc_status_change_finish_noop() into a separate patch.
- Fix typo in commit message and comment.
Kirill A. Shutemov (3):
x86/mm: Allow guest.enc_status_change_prepare() to fail
x86/tdx: Fix race between set_memory_encrypted() and
load_unaligned_zeropad()
x86/mm: Fix enc_status_change_finish_noop()
arch/x86/coco/tdx/tdx.c | 56 +++++++++++++++++++++++++++++++--
arch/x86/include/asm/x86_init.h | 2 +-
arch/x86/kernel/x86_init.c | 4 +--
arch/x86/mm/mem_encrypt_amd.c | 4 ++-
arch/x86/mm/pat/set_memory.c | 3 +-
5 files changed, 61 insertions(+), 8 deletions(-)
--
2.39.3
TDX code is going to provide guest.enc_status_change_prepare() that is
able to fail. TDX will use the call to convert the GPA range from shared
to private. This operation can fail.
Add a way to return an error from the callback.
Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: [email protected]
---
arch/x86/include/asm/x86_init.h | 2 +-
arch/x86/kernel/x86_init.c | 2 +-
arch/x86/mm/mem_encrypt_amd.c | 4 +++-
arch/x86/mm/pat/set_memory.c | 3 ++-
4 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 88085f369ff6..1ca9701917c5 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -150,7 +150,7 @@ struct x86_init_acpi {
* @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status
*/
struct x86_guest {
- void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+ bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
bool (*enc_tlb_flush_required)(bool enc);
bool (*enc_cache_flush_required)(void);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index d82f4fa2f1bf..f230d4d7d8eb 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -130,7 +130,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
static void default_nmi_init(void) { };
-static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { }
+static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; }
static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; }
static bool enc_tlb_flush_required_noop(bool enc) { return false; }
static bool enc_cache_flush_required_noop(void) { return false; }
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index e0b51c09109f..4f95c449a406 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -319,7 +319,7 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
#endif
}
-static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
+static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
{
/*
* To maintain the security guarantees of SEV-SNP guests, make sure
@@ -327,6 +327,8 @@ static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool
*/
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
snp_set_memory_shared(vaddr, npages);
+
+ return true;
}
/* Return true unconditionally: return value doesn't matter for the SEV side */
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 7159cf787613..b8f48ebe753c 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2151,7 +2151,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required());
/* Notify hypervisor that we are about to set/clr encryption attribute. */
- x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
+ if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc))
+ return -EIO;
ret = __change_page_attr_set_clr(&cpa, 1);
--
2.39.3
enc_status_change_finish_noop() defined as always-fail now which
doesn't make sense for noop.
The change doesn't have user-visible effect because it only gets
called if the platform has CC_ATTR_MEM_ENCRYPT. All platforms with
the attribute override the callback with own implementation.
Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/kernel/x86_init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index f230d4d7d8eb..64664311ac2b 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -131,7 +131,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
static void default_nmi_init(void) { };
static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; }
-static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; }
+static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; }
static bool enc_tlb_flush_required_noop(bool enc) { return false; }
static bool enc_cache_flush_required_noop(void) { return false; }
static bool is_private_mmio_noop(u64 addr) {return false; }
--
2.39.3
Touching privately mapped GPA that is not properly converted to private
with MapGPA and accepted leads to unrecoverable exit to VMM.
load_unaligned_zeropad() can touch memory that is not owned by the
caller, but just happened to next after the owned memory.
This load_unaligned_zeropad() behaviour makes it important when kernel
asks VMM to convert a GPA from shared to private or back. Kernel must
never have a page mapped into direct mapping (and aliases) as private
when the GPA is already converted to shared or when GPA is not yet
converted to private.
guest.enc_status_change_prepare() called before adjusting direct mapping
and therefore it is responsible for converting the memory to private.
guest.enc_status_change_finish() called after adjusting direct mapping
and it converts the memory to shared.
It is okay to have a shared mapping of memory that is not converted
properly. handle_mmio() knows how to deal with load_unaligned_zeropad()
stepping on it.
Signed-off-by: Kirill A. Shutemov <[email protected]>
Fixes: 7dbde7631629 ("x86/mm/cpa: Add support for TDX shared memory")
Cc: [email protected]
---
arch/x86/coco/tdx/tdx.c | 56 ++++++++++++++++++++++++++++++++++++++---
1 file changed, 53 insertions(+), 3 deletions(-)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index e146b599260f..59cc13e41aa6 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -840,6 +840,30 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
return true;
}
+static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
+ bool enc)
+{
+ /*
+ * Only handle shared->private conversion here.
+ * See the comment in tdx_early_init().
+ */
+ if (enc)
+ return tdx_enc_status_changed(vaddr, numpages, enc);
+ return true;
+}
+
+static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
+ bool enc)
+{
+ /*
+ * Only handle private->shared conversion here.
+ * See the comment in tdx_early_init().
+ */
+ if (!enc)
+ return tdx_enc_status_changed(vaddr, numpages, enc);
+ return true;
+}
+
void __init tdx_early_init(void)
{
u64 cc_mask;
@@ -867,9 +891,35 @@ void __init tdx_early_init(void)
*/
physical_mask &= cc_mask - 1;
- x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
- x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
- x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
+ /*
+ * Touching privately mapped GPA that is not properly converted to
+ * private with MapGPA and accepted leads to unrecoverable exit
+ * to VMM.
+ *
+ * load_unaligned_zeropad() can touch memory that is not owned by
+ * the caller, but just happened to next after the owned memory.
+ * This load_unaligned_zeropad() behaviour makes it important when
+ * kernel asks VMM to convert a GPA from shared to private or back.
+ * Kernel must never have a page mapped into direct mapping (and
+ * aliases) as private when the GPA is already converted to shared or
+ * when GPA is not yet converted to private.
+ *
+ * guest.enc_status_change_prepare() called before adjusting direct
+ * mapping and therefore it is responsible for converting the memory
+ * to private.
+ *
+ * guest.enc_status_change_finish() called after adjusting direct
+ * mapping and it converts the memory to shared.
+ *
+ * It is okay to have a shared mapping of memory that is not converted
+ * properly. handle_mmio() knows how to deal with load_unaligned_zeropad()
+ * stepping on it.
+ */
+ x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare;
+ x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish;
+
+ x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
+ x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
pr_info("Guest detected\n");
}
--
2.39.3
On 5/26/23 5:02 AM, Kirill A. Shutemov wrote:
> TDX code is going to provide guest.enc_status_change_prepare() that is
> able to fail. TDX will use the call to convert the GPA range from shared
> to private. This operation can fail.
>
> Add a way to return an error from the callback.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Cc: [email protected]
> ---
Looks good to me.
Reviewed-by: Kuppuswamy Sathyanarayanan <[email protected]>
> arch/x86/include/asm/x86_init.h | 2 +-
> arch/x86/kernel/x86_init.c | 2 +-
> arch/x86/mm/mem_encrypt_amd.c | 4 +++-
> arch/x86/mm/pat/set_memory.c | 3 ++-
> 4 files changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
> index 88085f369ff6..1ca9701917c5 100644
> --- a/arch/x86/include/asm/x86_init.h
> +++ b/arch/x86/include/asm/x86_init.h
> @@ -150,7 +150,7 @@ struct x86_init_acpi {
> * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status
> */
> struct x86_guest {
> - void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
> + bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
> bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
> bool (*enc_tlb_flush_required)(bool enc);
> bool (*enc_cache_flush_required)(void);
> diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
> index d82f4fa2f1bf..f230d4d7d8eb 100644
> --- a/arch/x86/kernel/x86_init.c
> +++ b/arch/x86/kernel/x86_init.c
> @@ -130,7 +130,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
>
> static void default_nmi_init(void) { };
>
> -static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { }
> +static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; }
> static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; }
> static bool enc_tlb_flush_required_noop(bool enc) { return false; }
> static bool enc_cache_flush_required_noop(void) { return false; }
> diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
> index e0b51c09109f..4f95c449a406 100644
> --- a/arch/x86/mm/mem_encrypt_amd.c
> +++ b/arch/x86/mm/mem_encrypt_amd.c
> @@ -319,7 +319,7 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
> #endif
> }
>
> -static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
> +static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
> {
> /*
> * To maintain the security guarantees of SEV-SNP guests, make sure
> @@ -327,6 +327,8 @@ static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool
> */
> if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
> snp_set_memory_shared(vaddr, npages);
> +
> + return true;
> }>
> /* Return true unconditionally: return value doesn't matter for the SEV side */
> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
> index 7159cf787613..b8f48ebe753c 100644
> --- a/arch/x86/mm/pat/set_memory.c
> +++ b/arch/x86/mm/pat/set_memory.c
> @@ -2151,7 +2151,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
> cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required());
>
> /* Notify hypervisor that we are about to set/clr encryption attribute. */
> - x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
> + if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc))
> + return -EIO;
>
> ret = __change_page_attr_set_clr(&cpa, 1);
>
--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer
On 5/26/23 5:02 AM, Kirill A. Shutemov wrote:
> Touching privately mapped GPA that is not properly converted to private
> with MapGPA and accepted leads to unrecoverable exit to VMM.
>
> load_unaligned_zeropad() can touch memory that is not owned by the
> caller, but just happened to next after the owned memory.
/s/to/to be ?
> This load_unaligned_zeropad() behaviour makes it important when kernel
> asks VMM to convert a GPA from shared to private or back. Kernel must
> never have a page mapped into direct mapping (and aliases) as private
> when the GPA is already converted to shared or when GPA is not yet
> converted to private.
I am wondering whether this issue exist in the AMD code?
IMO, you can add some info on the window in set_memory_encrypted()
where this race exists.
>
> guest.enc_status_change_prepare() called before adjusting direct mapping
> and therefore it is responsible for converting the memory to private.
>
> guest.enc_status_change_finish() called after adjusting direct mapping
> and it converts the memory to shared.
>
> It is okay to have a shared mapping of memory that is not converted
> properly. handle_mmio() knows how to deal with load_unaligned_zeropad()
> stepping on it.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Fixes: 7dbde7631629 ("x86/mm/cpa: Add support for TDX shared memory")
> Cc: [email protected]
> ---
> arch/x86/coco/tdx/tdx.c | 56 ++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 53 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index e146b599260f..59cc13e41aa6 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -840,6 +840,30 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> return true;
> }
>
> +static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
> + bool enc)
> +{
> + /*
> + * Only handle shared->private conversion here.
> + * See the comment in tdx_early_init().
> + */
> + if (enc)
> + return tdx_enc_status_changed(vaddr, numpages, enc);
> + return true;
> +}
> +
> +static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
> + bool enc)
> +{
> + /*
> + * Only handle private->shared conversion here.
> + * See the comment in tdx_early_init().
> + */
> + if (!enc)
> + return tdx_enc_status_changed(vaddr, numpages, enc);
> + return true;
> +}
> +
> void __init tdx_early_init(void)
> {
> u64 cc_mask;
> @@ -867,9 +891,35 @@ void __init tdx_early_init(void)
> */
> physical_mask &= cc_mask - 1;
>
> - x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
> - x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
I think you don't need to change the order here.
> - x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
> + /*
> + * Touching privately mapped GPA that is not properly converted to
> + * private with MapGPA and accepted leads to unrecoverable exit
> + * to VMM.
> + *
> + * load_unaligned_zeropad() can touch memory that is not owned by
> + * the caller, but just happened to next after the owned memory.
> + * This load_unaligned_zeropad() behaviour makes it important when
> + * kernel asks VMM to convert a GPA from shared to private or back.
> + * Kernel must never have a page mapped into direct mapping (and
> + * aliases) as private when the GPA is already converted to shared or
> + * when GPA is not yet converted to private.
> + *
> + * guest.enc_status_change_prepare() called before adjusting direct
> + * mapping and therefore it is responsible for converting the memory
> + * to private.
> + *
> + * guest.enc_status_change_finish() called after adjusting direct
> + * mapping and it converts the memory to shared.
> + *
> + * It is okay to have a shared mapping of memory that is not converted
> + * properly. handle_mmio() knows how to deal with load_unaligned_zeropad()
> + * stepping on it.
> + */
> + x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare;
> + x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish;
> +
> + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
> + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
>
> pr_info("Guest detected\n");
> }
--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer
On Fri, May 26, 2023 at 03:10:56PM -0700, Sathyanarayanan Kuppuswamy wrote:
>
>
> On 5/26/23 5:02 AM, Kirill A. Shutemov wrote:
> > Touching privately mapped GPA that is not properly converted to private
> > with MapGPA and accepted leads to unrecoverable exit to VMM.
> >
> > load_unaligned_zeropad() can touch memory that is not owned by the
> > caller, but just happened to next after the owned memory.
>
> /s/to/to be ?
Yep, my bad.
> > This load_unaligned_zeropad() behaviour makes it important when kernel
> > asks VMM to convert a GPA from shared to private or back. Kernel must
> > never have a page mapped into direct mapping (and aliases) as private
> > when the GPA is already converted to shared or when GPA is not yet
> > converted to private.
>
> I am wondering whether this issue exist in the AMD code?
>
> IMO, you can add some info on the window in set_memory_encrypted()
> where this race exists.
I don't think AMD affected by load_unaligned_zeropad() the same way as
Intel does. But I'm not sure.
Tom, do you have any comments?
>
> >
> > guest.enc_status_change_prepare() called before adjusting direct mapping
> > and therefore it is responsible for converting the memory to private.
> >
> > guest.enc_status_change_finish() called after adjusting direct mapping
> > and it converts the memory to shared.
> >
> > It is okay to have a shared mapping of memory that is not converted
> > properly. handle_mmio() knows how to deal with load_unaligned_zeropad()
> > stepping on it.
>
> >
> > Signed-off-by: Kirill A. Shutemov <[email protected]>
> > Fixes: 7dbde7631629 ("x86/mm/cpa: Add support for TDX shared memory")
> > Cc: [email protected]
> > ---
> > arch/x86/coco/tdx/tdx.c | 56 ++++++++++++++++++++++++++++++++++++++---
> > 1 file changed, 53 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> > index e146b599260f..59cc13e41aa6 100644
> > --- a/arch/x86/coco/tdx/tdx.c
> > +++ b/arch/x86/coco/tdx/tdx.c
> > @@ -840,6 +840,30 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> > return true;
> > }
> >
> > +static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
> > + bool enc)
> > +{
> > + /*
> > + * Only handle shared->private conversion here.
> > + * See the comment in tdx_early_init().
> > + */
> > + if (enc)
> > + return tdx_enc_status_changed(vaddr, numpages, enc);
> > + return true;
> > +}
> > +
> > +static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
> > + bool enc)
> > +{
> > + /*
> > + * Only handle private->shared conversion here.
> > + * See the comment in tdx_early_init().
> > + */
> > + if (!enc)
> > + return tdx_enc_status_changed(vaddr, numpages, enc);
> > + return true;
> > +}
> > +
> > void __init tdx_early_init(void)
> > {
> > u64 cc_mask;
> > @@ -867,9 +891,35 @@ void __init tdx_early_init(void)
> > */
> > physical_mask &= cc_mask - 1;
> >
> > - x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
> > - x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
>
> I think you don't need to change the order here.
I wanted to emphasise that the comment is for _prepare/_finish callbacks
and I hoped re-order would help with this.
> > - x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
> > + /*
> > + * Touching privately mapped GPA that is not properly converted to
> > + * private with MapGPA and accepted leads to unrecoverable exit
> > + * to VMM.
> > + *
> > + * load_unaligned_zeropad() can touch memory that is not owned by
> > + * the caller, but just happened to next after the owned memory.
> > + * This load_unaligned_zeropad() behaviour makes it important when
> > + * kernel asks VMM to convert a GPA from shared to private or back.
> > + * Kernel must never have a page mapped into direct mapping (and
> > + * aliases) as private when the GPA is already converted to shared or
> > + * when GPA is not yet converted to private.
> > + *
> > + * guest.enc_status_change_prepare() called before adjusting direct
> > + * mapping and therefore it is responsible for converting the memory
> > + * to private.
> > + *
> > + * guest.enc_status_change_finish() called after adjusting direct
> > + * mapping and it converts the memory to shared.
> > + *
> > + * It is okay to have a shared mapping of memory that is not converted
> > + * properly. handle_mmio() knows how to deal with load_unaligned_zeropad()
> > + * stepping on it.
> > + */
> > + x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare;
> > + x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish;
> > +
> > + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
> > + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
> >
> > pr_info("Guest detected\n");
> > }
>
> --
> Sathyanarayanan Kuppuswamy
> Linux Kernel Developer
--
Kiryl Shutsemau / Kirill A. Shutemov
On 5/29/23 19:57, Kirill A. Shutemov wrote:
> On Fri, May 26, 2023 at 03:10:56PM -0700, Sathyanarayanan Kuppuswamy wrote:
>>
>>
>> On 5/26/23 5:02 AM, Kirill A. Shutemov wrote:
>>> Touching privately mapped GPA that is not properly converted to private
>>> with MapGPA and accepted leads to unrecoverable exit to VMM.
>>>
>>> load_unaligned_zeropad() can touch memory that is not owned by the
>>> caller, but just happened to next after the owned memory.
>>
>> /s/to/to be ?
>
> Yep, my bad.
>
>>> This load_unaligned_zeropad() behaviour makes it important when kernel
>>> asks VMM to convert a GPA from shared to private or back. Kernel must
>>> never have a page mapped into direct mapping (and aliases) as private
>>> when the GPA is already converted to shared or when GPA is not yet
>>> converted to private.
>>
>> I am wondering whether this issue exist in the AMD code?
>>
>> IMO, you can add some info on the window in set_memory_encrypted()
>> where this race exists.
>
> I don't think AMD affected by load_unaligned_zeropad() the same way as
> Intel does. But I'm not sure.
>
> Tom, do you have any comments?
Right, shouldn't be an issue for SNP.
Thanks,
Tom
>
>>
>>>
>>> guest.enc_status_change_prepare() called before adjusting direct mapping
>>> and therefore it is responsible for converting the memory to private.
>>>
>>> guest.enc_status_change_finish() called after adjusting direct mapping
>>> and it converts the memory to shared.
>>>
>>> It is okay to have a shared mapping of memory that is not converted
>>> properly. handle_mmio() knows how to deal with load_unaligned_zeropad()
>>> stepping on it.
>>
>>>
>>> Signed-off-by: Kirill A. Shutemov <[email protected]>
>>> Fixes: 7dbde7631629 ("x86/mm/cpa: Add support for TDX shared memory")
>>> Cc: [email protected]
>>> ---
>>> arch/x86/coco/tdx/tdx.c | 56 ++++++++++++++++++++++++++++++++++++++---
>>> 1 file changed, 53 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
>>> index e146b599260f..59cc13e41aa6 100644
>>> --- a/arch/x86/coco/tdx/tdx.c
>>> +++ b/arch/x86/coco/tdx/tdx.c
>>> @@ -840,6 +840,30 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
>>> return true;
>>> }
>>>
>>> +static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
>>> + bool enc)
>>> +{
>>> + /*
>>> + * Only handle shared->private conversion here.
>>> + * See the comment in tdx_early_init().
>>> + */
>>> + if (enc)
>>> + return tdx_enc_status_changed(vaddr, numpages, enc);
>>> + return true;
>>> +}
>>> +
>>> +static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
>>> + bool enc)
>>> +{
>>> + /*
>>> + * Only handle private->shared conversion here.
>>> + * See the comment in tdx_early_init().
>>> + */
>>> + if (!enc)
>>> + return tdx_enc_status_changed(vaddr, numpages, enc);
>>> + return true;
>>> +}
>>> +
>>> void __init tdx_early_init(void)
>>> {
>>> u64 cc_mask;
>>> @@ -867,9 +891,35 @@ void __init tdx_early_init(void)
>>> */
>>> physical_mask &= cc_mask - 1;
>>>
>>> - x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
>>> - x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
>>
>> I think you don't need to change the order here.
>
> I wanted to emphasise that the comment is for _prepare/_finish callbacks
> and I hoped re-order would help with this.
>
>>> - x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
>>> + /*
>>> + * Touching privately mapped GPA that is not properly converted to
>>> + * private with MapGPA and accepted leads to unrecoverable exit
>>> + * to VMM.
>>> + *
>>> + * load_unaligned_zeropad() can touch memory that is not owned by
>>> + * the caller, but just happened to next after the owned memory.
>>> + * This load_unaligned_zeropad() behaviour makes it important when
>>> + * kernel asks VMM to convert a GPA from shared to private or back.
>>> + * Kernel must never have a page mapped into direct mapping (and
>>> + * aliases) as private when the GPA is already converted to shared or
>>> + * when GPA is not yet converted to private.
>>> + *
>>> + * guest.enc_status_change_prepare() called before adjusting direct
>>> + * mapping and therefore it is responsible for converting the memory
>>> + * to private.
>>> + *
>>> + * guest.enc_status_change_finish() called after adjusting direct
>>> + * mapping and it converts the memory to shared.
>>> + *
>>> + * It is okay to have a shared mapping of memory that is not converted
>>> + * properly. handle_mmio() knows how to deal with load_unaligned_zeropad()
>>> + * stepping on it.
>>> + */
>>> + x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare;
>>> + x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish;
>>> +
>>> + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
>>> + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
>>>
>>> pr_info("Guest detected\n");
>>> }
>>
>> --
>> Sathyanarayanan Kuppuswamy
>> Linux Kernel Developer
>
Hi,
On 5/30/23 5:57 AM, Tom Lendacky wrote:
> On 5/29/23 19:57, Kirill A. Shutemov wrote:
>> On Fri, May 26, 2023 at 03:10:56PM -0700, Sathyanarayanan Kuppuswamy wrote:
>>>
>>>
>>> On 5/26/23 5:02 AM, Kirill A. Shutemov wrote:
>>>> Touching privately mapped GPA that is not properly converted to private
>>>> with MapGPA and accepted leads to unrecoverable exit to VMM.
>>>>
>>>> load_unaligned_zeropad() can touch memory that is not owned by the
>>>> caller, but just happened to next after the owned memory.
>>>
>>> /s/to/to be ?
>>
>> Yep, my bad.
>>
>>>> This load_unaligned_zeropad() behaviour makes it important when kernel
>>>> asks VMM to convert a GPA from shared to private or back. Kernel must
>>>> never have a page mapped into direct mapping (and aliases) as private
>>>> when the GPA is already converted to shared or when GPA is not yet
>>>> converted to private.
>>>
>>> I am wondering whether this issue exist in the AMD code?
>>>
>>> IMO, you can add some info on the window in set_memory_encrypted()
>>> where this race exists.
>>
>> I don't think AMD affected by load_unaligned_zeropad() the same way as
>> Intel does. But I'm not sure.
>>
>> Tom, do you have any comments?
>
> Right, shouldn't be an issue for SNP.
Thanks for confirming.
>
> Thanks,
> Tom
>
>>
>>>
>>>>
>>>> guest.enc_status_change_prepare() called before adjusting direct mapping
>>>> and therefore it is responsible for converting the memory to private.
>>>>
>>>> guest.enc_status_change_finish() called after adjusting direct mapping
>>>> and it converts the memory to shared.
>>>>
>>>> It is okay to have a shared mapping of memory that is not converted
>>>> properly. handle_mmio() knows how to deal with load_unaligned_zeropad()
>>>> stepping on it.
>>>
>>>>
Rest looks good to me.
Reviewed-by: Kuppuswamy Sathyanarayanan <[email protected]>
>>>> Signed-off-by: Kirill A. Shutemov <[email protected]>
>>>> Fixes: 7dbde7631629 ("x86/mm/cpa: Add support for TDX shared memory")
>>>> Cc: [email protected]
>>>> ---
>>>> arch/x86/coco/tdx/tdx.c | 56 ++++++++++++++++++++++++++++++++++++++---
>>>> 1 file changed, 53 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
>>>> index e146b599260f..59cc13e41aa6 100644
>>>> --- a/arch/x86/coco/tdx/tdx.c
>>>> +++ b/arch/x86/coco/tdx/tdx.c
>>>> @@ -840,6 +840,30 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
>>>> return true;
>>>> }
>>>> +static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
>>>> + bool enc)
>>>> +{
>>>> + /*
>>>> + * Only handle shared->private conversion here.
>>>> + * See the comment in tdx_early_init().
>>>> + */
>>>> + if (enc)
>>>> + return tdx_enc_status_changed(vaddr, numpages, enc);
>>>> + return true;
>>>> +}
>>>> +
>>>> +static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
>>>> + bool enc)
>>>> +{
>>>> + /*
>>>> + * Only handle private->shared conversion here.
>>>> + * See the comment in tdx_early_init().
>>>> + */
>>>> + if (!enc)
>>>> + return tdx_enc_status_changed(vaddr, numpages, enc);
>>>> + return true;
>>>> +}
>>>> +
>>>> void __init tdx_early_init(void)
>>>> {
>>>> u64 cc_mask;
>>>> @@ -867,9 +891,35 @@ void __init tdx_early_init(void)
>>>> */
>>>> physical_mask &= cc_mask - 1;
>>>> - x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
>>>> - x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
>>>
>>> I think you don't need to change the order here.
>>
>> I wanted to emphasise that the comment is for _prepare/_finish callbacks
>> and I hoped re-order would help with this.
>>
>>>> - x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
>>>> + /*
>>>> + * Touching privately mapped GPA that is not properly converted to
>>>> + * private with MapGPA and accepted leads to unrecoverable exit
>>>> + * to VMM.
>>>> + *
>>>> + * load_unaligned_zeropad() can touch memory that is not owned by
>>>> + * the caller, but just happened to next after the owned memory.
>>>> + * This load_unaligned_zeropad() behaviour makes it important when
>>>> + * kernel asks VMM to convert a GPA from shared to private or back.
>>>> + * Kernel must never have a page mapped into direct mapping (and
>>>> + * aliases) as private when the GPA is already converted to shared or
>>>> + * when GPA is not yet converted to private.
>>>> + *
>>>> + * guest.enc_status_change_prepare() called before adjusting direct
>>>> + * mapping and therefore it is responsible for converting the memory
>>>> + * to private.
>>>> + *
>>>> + * guest.enc_status_change_finish() called after adjusting direct
>>>> + * mapping and it converts the memory to shared.
>>>> + *
>>>> + * It is okay to have a shared mapping of memory that is not converted
>>>> + * properly. handle_mmio() knows how to deal with load_unaligned_zeropad()
>>>> + * stepping on it.
>>>> + */
>>>> + x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare;
>>>> + x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish;
>>>> +
>>>> + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
>>>> + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
>>>> pr_info("Guest detected\n");
>>>> }
>>>
>>> --
>>> Sathyanarayanan Kuppuswamy
>>> Linux Kernel Developer
>>
--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer
On 5/26/23 5:02 AM, Kirill A. Shutemov wrote:
> enc_status_change_finish_noop() defined as always-fail now which
> doesn't make sense for noop.
>
> The change doesn't have user-visible effect because it only gets
> called if the platform has CC_ATTR_MEM_ENCRYPT. All platforms with
> the attribute override the callback with own implementation.
>
Reviewed-by: Kuppuswamy Sathyanarayanan <[email protected]>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> ---
> arch/x86/kernel/x86_init.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
> index f230d4d7d8eb..64664311ac2b 100644
> --- a/arch/x86/kernel/x86_init.c
> +++ b/arch/x86/kernel/x86_init.c
> @@ -131,7 +131,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
> static void default_nmi_init(void) { };
>
> static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; }
> -static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; }
> +static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; }
> static bool enc_tlb_flush_required_noop(bool enc) { return false; }
> static bool enc_cache_flush_required_noop(void) { return false; }
> static bool is_private_mmio_noop(u64 addr) {return false; }
--
Sathyanarayanan Kuppuswamy
Linux Kernel Developer
From: Sathyanarayanan Kuppuswamy <[email protected]>
Sent: Tuesday, May 30, 2023 6:22 AM
>
> Hi,
>
> On 5/30/23 5:57 AM, Tom Lendacky wrote:
> > On 5/29/23 19:57, Kirill A. Shutemov wrote:
> >> On Fri, May 26, 2023 at 03:10:56PM -0700, Sathyanarayanan Kuppuswamy wrote:
> >>>
> >>>
> >>> On 5/26/23 5:02 AM, Kirill A. Shutemov wrote:
> >>>> Touching privately mapped GPA that is not properly converted to private
> >>>> with MapGPA and accepted leads to unrecoverable exit to VMM.
> >>>>
> >>>> load_unaligned_zeropad() can touch memory that is not owned by the
> >>>> caller, but just happened to next after the owned memory.
> >>>
> >>> /s/to/to be ?
> >>
> >> Yep, my bad.
> >>
> >>>> This load_unaligned_zeropad() behaviour makes it important when kernel
> >>>> asks VMM to convert a GPA from shared to private or back. Kernel must
> >>>> never have a page mapped into direct mapping (and aliases) as private
> >>>> when the GPA is already converted to shared or when GPA is not yet
> >>>> converted to private.
> >>>
> >>> I am wondering whether this issue exist in the AMD code?
> >>>
> >>> IMO, you can add some info on the window in set_memory_encrypted()
> >>> where this race exists.
> >>
> >> I don't think AMD affected by load_unaligned_zeropad() the same way as
> >> Intel does. But I'm not sure.
> >>
> >> Tom, do you have any comments?
> >
> > Right, shouldn't be an issue for SNP.
>
> Thanks for confirming.
>
Tom -- For my education, could you elaborate on why this problem can't
occur in an SEV-SNP guest? There's still a window where the direct map
PTE and the RMP as maintained by the hypervisor are out-of-sync. If
load_unaligned_zeropad() does a read using the direct map PTE during
this out-of-sync window, isn't that going to trap to the hypervisor? How
is the scenario is handled from there to provide the zeros to
load_unaligned_zeropad()? I need to make sure Hyper-V is doing whatever
is needed. :-)
Thanks,
Michael
On 5/31/23 15:00, Michael Kelley (LINUX) wrote:
> From: Sathyanarayanan Kuppuswamy <[email protected]>
> Sent: Tuesday, May 30, 2023 6:22 AM
>>
>> Hi,
>>
>> On 5/30/23 5:57 AM, Tom Lendacky wrote:
>>> On 5/29/23 19:57, Kirill A. Shutemov wrote:
>>>> On Fri, May 26, 2023 at 03:10:56PM -0700, Sathyanarayanan Kuppuswamy wrote:
>>>>>
>>>>>
>>>>> On 5/26/23 5:02 AM, Kirill A. Shutemov wrote:
>>>>>> Touching privately mapped GPA that is not properly converted to private
>>>>>> with MapGPA and accepted leads to unrecoverable exit to VMM.
>>>>>>
>>>>>> load_unaligned_zeropad() can touch memory that is not owned by the
>>>>>> caller, but just happened to next after the owned memory.
>>>>>
>>>>> /s/to/to be ?
>>>>
>>>> Yep, my bad.
>>>>
>>>>>> This load_unaligned_zeropad() behaviour makes it important when kernel
>>>>>> asks VMM to convert a GPA from shared to private or back. Kernel must
>>>>>> never have a page mapped into direct mapping (and aliases) as private
>>>>>> when the GPA is already converted to shared or when GPA is not yet
>>>>>> converted to private.
>>>>>
>>>>> I am wondering whether this issue exist in the AMD code?
>>>>>
>>>>> IMO, you can add some info on the window in set_memory_encrypted()
>>>>> where this race exists.
>>>>
>>>> I don't think AMD affected by load_unaligned_zeropad() the same way as
>>>> Intel does. But I'm not sure.
>>>>
>>>> Tom, do you have any comments?
>>>
>>> Right, shouldn't be an issue for SNP.
>>
>> Thanks for confirming.
>>
>
> Tom -- For my education, could you elaborate on why this problem can't
> occur in an SEV-SNP guest? There's still a window where the direct map
> PTE and the RMP as maintained by the hypervisor are out-of-sync. If
> load_unaligned_zeropad() does a read using the direct map PTE during
> this out-of-sync window, isn't that going to trap to the hypervisor? How
> is the scenario is handled from there to provide the zeros to
> load_unaligned_zeropad()? I need to make sure Hyper-V is doing whatever
> is needed. :-)
Ah, I think I misunderstood this when it was being talked about. The issue
SNP would have would be between setting the c-bit but before the PVALIDATE
is issued. Prior to the RMP being updated, referencing the page will
generate an #NPF and automatically change the RMP over to private (in
KVM). However, after the guest is resumed, the page will not have been
validated resulting in a #VC with error code 0x404 being generated,
causing the guest to terminate itself.
I suppose, when a 0x404 error code is encountered by the #VC handler, it
could call search_exception_tables() and call ex_handler_zeropad() for the
EX_TYPE_ZEROPAD type (ex_handler_zeropad is currently static, though).
Thanks,
Tom
>
> Thanks,
>
> Michael
>
>
From: Tom Lendacky <[email protected]> Sent: Thursday, June 1, 2023 11:19 AM
>
> On 5/31/23 15:00, Michael Kelley (LINUX) wrote:
> > From: Sathyanarayanan Kuppuswamy
> <[email protected]>
> > Sent: Tuesday, May 30, 2023 6:22 AM
> >>
> >> Hi,
> >>
> >> On 5/30/23 5:57 AM, Tom Lendacky wrote:
> >>> On 5/29/23 19:57, Kirill A. Shutemov wrote:
> >>>> On Fri, May 26, 2023 at 03:10:56PM -0700, Sathyanarayanan Kuppuswamy
> wrote:
> >>>>>
> >>>>>
> >>>>> On 5/26/23 5:02 AM, Kirill A. Shutemov wrote:
> >>>>>> Touching privately mapped GPA that is not properly converted to private
> >>>>>> with MapGPA and accepted leads to unrecoverable exit to VMM.
> >>>>>>
> >>>>>> load_unaligned_zeropad() can touch memory that is not owned by the
> >>>>>> caller, but just happened to next after the owned memory.
> >>>>>
> >>>>> /s/to/to be ?
> >>>>
> >>>> Yep, my bad.
> >>>>
> >>>>>> This load_unaligned_zeropad() behaviour makes it important when kernel
> >>>>>> asks VMM to convert a GPA from shared to private or back. Kernel must
> >>>>>> never have a page mapped into direct mapping (and aliases) as private
> >>>>>> when the GPA is already converted to shared or when GPA is not yet
> >>>>>> converted to private.
> >>>>>
> >>>>> I am wondering whether this issue exist in the AMD code?
> >>>>>
> >>>>> IMO, you can add some info on the window in set_memory_encrypted()
> >>>>> where this race exists.
> >>>>
> >>>> I don't think AMD affected by load_unaligned_zeropad() the same way as
> >>>> Intel does. But I'm not sure.
> >>>>
> >>>> Tom, do you have any comments?
> >>>
> >>> Right, shouldn't be an issue for SNP.
> >>
> >> Thanks for confirming.
> >>
> >
> > Tom -- For my education, could you elaborate on why this problem can't
> > occur in an SEV-SNP guest? There's still a window where the direct map
> > PTE and the RMP as maintained by the hypervisor are out-of-sync. If
> > load_unaligned_zeropad() does a read using the direct map PTE during
> > this out-of-sync window, isn't that going to trap to the hypervisor? How
> > is the scenario is handled from there to provide the zeros to
> > load_unaligned_zeropad()? I need to make sure Hyper-V is doing whatever
> > is needed. :-)
>
> Ah, I think I misunderstood this when it was being talked about. The issue
> SNP would have would be between setting the c-bit but before the PVALIDATE
> is issued. Prior to the RMP being updated, referencing the page will
> generate an #NPF and automatically change the RMP over to private (in
> KVM). However, after the guest is resumed, the page will not have been
> validated resulting in a #VC with error code 0x404 being generated,
> causing the guest to terminate itself.
>
> I suppose, when a 0x404 error code is encountered by the #VC handler, it
> could call search_exception_tables() and call ex_handler_zeropad() for the
> EX_TYPE_ZEROPAD type (ex_handler_zeropad is currently static, though).
>
Tom -- Does the above sequence *depend* on the hypervisor doing anything
to make it work? I'm not clear on why KVM would automatically change the
page over to private. If there's a dependency on the hypervisor doing
something, then it seems like we'll need to standardize that "something"
across hypervisors, lest we end up with per-hypervisor code in Linux to handle
this scenario. And running SEV-SNP with multiple VMPLs probably makes it
even more complicated.
Kirill -- Same question about TDX. Does making load_unaligned_zeropad()
work in a TDX VM depend on the hypervisor doing anything? Or is the
behavior seen by the guest dependent only on architected behavior of
the TDX processor?
Looking at this problem from a slightly higher level, and thinking out loud
a bit, load_unaligned_zeropad() functionality is provided only for certain
architectures: x86/64, arm, arm64, and PowerPC 64 (little endian). There are
fallbacks for architectures that don't support it. With two minor tweaks to
Kconfig files, I've built x86 with load_unaligned_zeropad() disabled. Maybe
with today's processors the performance benefits are past their prime,
and running with it disabled in CoCo VMs is the better solution. Does
anyone have a sense of whether the perf impact would be measureable?
If doing the load_unaligned_zeropad() enable/disable at build time is too
limiting, maybe it could be runtime based on whether page private/shared
state is being enforced. I haven't looked at the details.
Thoughts?
Michael
On 6/2/23 11:11, Michael Kelley (LINUX) wrote:
> From: Tom Lendacky <[email protected]> Sent: Thursday, June 1, 2023 11:19 AM
>>
>> On 5/31/23 15:00, Michael Kelley (LINUX) wrote:
>>> From: Sathyanarayanan Kuppuswamy
>> <[email protected]>
>>> Sent: Tuesday, May 30, 2023 6:22 AM
>>>>
>>>> Hi,
>>>>
>>>> On 5/30/23 5:57 AM, Tom Lendacky wrote:
>>>>> On 5/29/23 19:57, Kirill A. Shutemov wrote:
>>>>>> On Fri, May 26, 2023 at 03:10:56PM -0700, Sathyanarayanan Kuppuswamy
>> wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 5/26/23 5:02 AM, Kirill A. Shutemov wrote:
>>>>>>>> Touching privately mapped GPA that is not properly converted to private
>>>>>>>> with MapGPA and accepted leads to unrecoverable exit to VMM.
>>>>>>>>
>>>>>>>> load_unaligned_zeropad() can touch memory that is not owned by the
>>>>>>>> caller, but just happened to next after the owned memory.
>>>>>>>
>>>>>>> /s/to/to be ?
>>>>>>
>>>>>> Yep, my bad.
>>>>>>
>>>>>>>> This load_unaligned_zeropad() behaviour makes it important when kernel
>>>>>>>> asks VMM to convert a GPA from shared to private or back. Kernel must
>>>>>>>> never have a page mapped into direct mapping (and aliases) as private
>>>>>>>> when the GPA is already converted to shared or when GPA is not yet
>>>>>>>> converted to private.
>>>>>>>
>>>>>>> I am wondering whether this issue exist in the AMD code?
>>>>>>>
>>>>>>> IMO, you can add some info on the window in set_memory_encrypted()
>>>>>>> where this race exists.
>>>>>>
>>>>>> I don't think AMD affected by load_unaligned_zeropad() the same way as
>>>>>> Intel does. But I'm not sure.
>>>>>>
>>>>>> Tom, do you have any comments?
>>>>>
>>>>> Right, shouldn't be an issue for SNP.
>>>>
>>>> Thanks for confirming.
>>>>
>>>
>>> Tom -- For my education, could you elaborate on why this problem can't
>>> occur in an SEV-SNP guest? There's still a window where the direct map
>>> PTE and the RMP as maintained by the hypervisor are out-of-sync. If
>>> load_unaligned_zeropad() does a read using the direct map PTE during
>>> this out-of-sync window, isn't that going to trap to the hypervisor? How
>>> is the scenario is handled from there to provide the zeros to
>>> load_unaligned_zeropad()? I need to make sure Hyper-V is doing whatever
>>> is needed. :-)
>>
>> Ah, I think I misunderstood this when it was being talked about. The issue
>> SNP would have would be between setting the c-bit but before the PVALIDATE
>> is issued. Prior to the RMP being updated, referencing the page will
>> generate an #NPF and automatically change the RMP over to private (in
>> KVM). However, after the guest is resumed, the page will not have been
>> validated resulting in a #VC with error code 0x404 being generated,
>> causing the guest to terminate itself.
>>
>> I suppose, when a 0x404 error code is encountered by the #VC handler, it
>> could call search_exception_tables() and call ex_handler_zeropad() for the
>> EX_TYPE_ZEROPAD type (ex_handler_zeropad is currently static, though).
>>
>
> Tom -- Does the above sequence *depend* on the hypervisor doing anything
> to make it work? I'm not clear on why KVM would automatically change the
> page over to private. If there's a dependency on the hypervisor doing
> something, then it seems like we'll need to standardize that "something"
> across hypervisors, lest we end up with per-hypervisor code in Linux to handle
> this scenario. And running SEV-SNP with multiple VMPLs probably makes it
> even more complicated.
No, it doesn't depend on the hypervisor doing anything. If the RMP hasn't
been updated, then a #VMEXIT(NPF) will be triggered (see APM vol 2, Table
15-39). The hypervisor is free to do what it wants with that, e.g. just
resume the guest (and immediately take another #VMEXIT(NPF), possibly).
Since it is a different thread/vCPU trying to access the memory than is
changing the state of the memory, eventually the guest kernel thread that
is changing the state of the memory will issue the page state change to
the hypervisor and the other thread can continue.
Thanks,
Tom
>
> Kirill -- Same question about TDX. Does making load_unaligned_zeropad()
> work in a TDX VM depend on the hypervisor doing anything? Or is the
> behavior seen by the guest dependent only on architected behavior of
> the TDX processor?
>
> Looking at this problem from a slightly higher level, and thinking out loud
> a bit, load_unaligned_zeropad() functionality is provided only for certain
> architectures: x86/64, arm, arm64, and PowerPC 64 (little endian). There are
> fallbacks for architectures that don't support it. With two minor tweaks to
> Kconfig files, I've built x86 with load_unaligned_zeropad() disabled. Maybe
> with today's processors the performance benefits are past their prime,
> and running with it disabled in CoCo VMs is the better solution. Does
> anyone have a sense of whether the perf impact would be measureable?
>
> If doing the load_unaligned_zeropad() enable/disable at build time is too
> limiting, maybe it could be runtime based on whether page private/shared
> state is being enforced. I haven't looked at the details.
>
> Thoughts?
>
> Michael
On 6/2/23 09:11, Michael Kelley (LINUX) wrote:
> Tom -- Does the above sequence *depend* on the hypervisor doing anything
> to make it work? I'm not clear on why KVM would automatically change the
> page over to private. If there's a dependency on the hypervisor doing
> something, then it seems like we'll need to standardize that "something"
> across hypervisors, lest we end up with per-hypervisor code in Linux to handle
> this scenario. And running SEV-SNP with multiple VMPLs probably makes it
> even more complicated.
>
> Kirill -- Same question about TDX. Does making load_unaligned_zeropad()
> work in a TDX VM depend on the hypervisor doing anything? Or is the
> behavior seen by the guest dependent only on architected behavior of
> the TDX processor?
No, there's no active help from the hypervisor here.
Also, fwiw, the "architected behavior" here is really just the TDX
module policy and _arguably_ the hardware Secure-EPT controlled by the
TDX module.
On Fri, Jun 02, 2023 at 10:42:33AM -0700, Dave Hansen wrote:
> On 6/2/23 09:11, Michael Kelley (LINUX) wrote:
> > Tom -- Does the above sequence *depend* on the hypervisor doing anything
> > to make it work? I'm not clear on why KVM would automatically change the
> > page over to private. If there's a dependency on the hypervisor doing
> > something, then it seems like we'll need to standardize that "something"
> > across hypervisors, lest we end up with per-hypervisor code in Linux to handle
> > this scenario. And running SEV-SNP with multiple VMPLs probably makes it
> > even more complicated.
> >
> > Kirill -- Same question about TDX. Does making load_unaligned_zeropad()
> > work in a TDX VM depend on the hypervisor doing anything? Or is the
> > behavior seen by the guest dependent only on architected behavior of
> > the TDX processor?
>
> No, there's no active help from the hypervisor here.
>
> Also, fwiw, the "architected behavior" here is really just the TDX
> module policy and _arguably_ the hardware Secure-EPT controlled by the
> TDX module.
Right. There's nothing VMM can do to change behaviour here. VMM can remove
private page, but it will lead to VM termination on access to the page,
but VMM controls VM lifecycle anyway.
--
Kiryl Shutsemau / Kirill A. Shutemov
On 5/26/23 05:02, Kirill A. Shutemov wrote:
> Touching privately mapped GPA that is not properly converted to private
> with MapGPA and accepted leads to unrecoverable exit to VMM.
>
> load_unaligned_zeropad() can touch memory that is not owned by the
> caller, but just happened to next after the owned memory.
> This load_unaligned_zeropad() behaviour makes it important when kernel
> asks VMM to convert a GPA from shared to private or back. Kernel must
> never have a page mapped into direct mapping (and aliases) as private
> when the GPA is already converted to shared or when GPA is not yet
> converted to private.
>
> guest.enc_status_change_prepare() called before adjusting direct mapping
> and therefore it is responsible for converting the memory to private.
>
> guest.enc_status_change_finish() called after adjusting direct mapping
> and it converts the memory to shared.
>
> It is okay to have a shared mapping of memory that is not converted
> properly. handle_mmio() knows how to deal with load_unaligned_zeropad()
> stepping on it.
Yeah, as other said, the changelog grammar here is a bit funky. Can you
fix it up and resend, please?