AMD SEV and Intel TDX guests allocate shared buffers for performing I/O.
This is done by allocating pages normally from the buddy allocator and
then converting them to shared using set_memory_decrypted().
On kexec, the second kernel is unaware of which memory has been
converted in this manner. It only sees E820_TYPE_RAM. Accessing shared
memory as private is fatal.
Therefore, the memory state must be reset to its original state before
starting the new kernel with kexec.
The process of converting shared memory back to private occurs in two
steps:
- enc_kexec_stop_conversion() stops new conversions.
- enc_kexec_unshare_mem() unshares all existing shared memory, reverting
it back to private.
Signed-off-by: Kirill A. Shutemov <[email protected]>
---
arch/x86/include/asm/x86_init.h | 2 ++
arch/x86/kernel/crash.c | 5 +++++
arch/x86/kernel/reboot.c | 12 ++++++++++++
arch/x86/kernel/x86_init.c | 4 ++++
4 files changed, 23 insertions(+)
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index c9503fe2d13a..3196ff20a29e 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -154,6 +154,8 @@ struct x86_guest {
int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
bool (*enc_tlb_flush_required)(bool enc);
bool (*enc_cache_flush_required)(void);
+ void (*enc_kexec_stop_conversion)(bool crash);
+ void (*enc_kexec_unshare_mem)(void);
};
/**
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index b6b044356f1b..6585a5f2c2ba 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -107,6 +107,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
crash_smp_send_stop();
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
+ x86_platform.guest.enc_kexec_stop_conversion(true);
+ x86_platform.guest.enc_kexec_unshare_mem();
+ }
+
cpu_emergency_disable_virtualization();
/*
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 830425e6d38e..0574d4ad6b41 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
#include <linux/delay.h>
#include <linux/objtool.h>
#include <linux/pgtable.h>
+#include <linux/kexec.h>
#include <acpi/reboot.h>
#include <asm/io.h>
#include <asm/apic.h>
@@ -716,6 +717,14 @@ static void native_machine_emergency_restart(void)
void native_machine_shutdown(void)
{
+ /*
+ * Call enc_kexec_stop_conversion() while all CPUs are still active and
+ * interrupts are enabled. This will allow all in-flight memory
+ * conversions to finish cleanly.
+ */
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) && kexec_in_progress)
+ x86_platform.guest.enc_kexec_stop_conversion(false);
+
/* Stop the cpus and apics */
#ifdef CONFIG_X86_IO_APIC
/*
@@ -752,6 +761,9 @@ void native_machine_shutdown(void)
#ifdef CONFIG_X86_64
x86_platform.iommu_shutdown();
#endif
+
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) && kexec_in_progress)
+ x86_platform.guest.enc_kexec_unshare_mem();
}
static void __machine_emergency_restart(int emergency)
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index f0f54e109eb9..b95206ebc621 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -135,6 +135,8 @@ static int enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool
static int enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return 0; }
static bool enc_tlb_flush_required_noop(bool enc) { return false; }
static bool enc_cache_flush_required_noop(void) { return false; }
+static void enc_kexec_stop_conversion_noop(bool crash) {}
+static void enc_kexec_unshare_mem_noop(void) {}
static bool is_private_mmio_noop(u64 addr) {return false; }
struct x86_platform_ops x86_platform __ro_after_init = {
@@ -158,6 +160,8 @@ struct x86_platform_ops x86_platform __ro_after_init = {
.enc_status_change_finish = enc_status_change_finish_noop,
.enc_tlb_flush_required = enc_tlb_flush_required_noop,
.enc_cache_flush_required = enc_cache_flush_required_noop,
+ .enc_kexec_stop_conversion = enc_kexec_stop_conversion_noop,
+ .enc_kexec_unshare_mem = enc_kexec_unshare_mem_noop,
},
};
--
2.43.0
On 24.01.24 г. 14:55 ч., Kirill A. Shutemov wrote:
> AMD SEV and Intel TDX guests allocate shared buffers for performing I/O.
> This is done by allocating pages normally from the buddy allocator and
> then converting them to shared using set_memory_decrypted().
>
> On kexec, the second kernel is unaware of which memory has been
> converted in this manner. It only sees E820_TYPE_RAM. Accessing shared
> memory as private is fatal.
>
> Therefore, the memory state must be reset to its original state before
> starting the new kernel with kexec.
>
> The process of converting shared memory back to private occurs in two
> steps:
>
> - enc_kexec_stop_conversion() stops new conversions.
>
> - enc_kexec_unshare_mem() unshares all existing shared memory, reverting
> it back to private.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
Reviewed-by: Nikolay Borisov <[email protected]>