From: Ashish Kalra <[email protected]>
Handle cases where the RMP table placement in the BIOS is
not 2M aligned and then the kexec kernel could try to allocate
from within that chunk and that causes a fatal RMP fault.
Check if RMP table start & end physical range in e820 tables
are not aligned to 2MB and in that case map this range to reserved
in all the three e820 tables.
The callback to apply these RMP table fixups needs to be called
after the e820 tables are setup/populated and before the e820 map
has been converted to the standard Linux memory resources and e820 map
is no longer used and modifying it has no effect.
v3:
- Added new e820__range_update_table() helper and updated all
internal and external modifiers of e820_table_kexec and
e820_table_firmware to call this new helper function.
- Updated and restructured the commit message for patch 2/2 to
explain the issue in detail.
- Added, merged and cleaned up inline comments in patch 2/2.
- Added new __snp_e820_tables_fixup() function to be avoid
duplication of code for fixing both RMP table start and end
physical ranges.
v2:
- Remove overriding e820__memory_setup_default() to invoke
snp_rmptable_e820_fixup() to apply the RMP table fixups.
- This callback snp_rmptable_e820_fixup() is now invoked
after e820__memory_setup() and before e820__reserve_resources().
- Expose e820 API interfaces to update e820_table_kexec and
e820_table_firmware externally.
- snp_rmptable_e820_fixup() now calls these new external API
interfaces to update e820_table_kexec and e820_table_firmware.
Ashish Kalra (2):
x86/e820: Expose API to update e820 kexec and firmware tables
externally.
x86/sev: Add callback to apply RMP table fixups for kexec
arch/x86/include/asm/e820/api.h | 2 ++
arch/x86/include/asm/sev.h | 2 ++
arch/x86/kernel/e820.c | 6 ++---
arch/x86/mm/mem_encrypt.c | 13 ++++++++++
arch/x86/virt/svm/sev.c | 42 +++++++++++++++++++++++++++++++++
5 files changed, 62 insertions(+), 3 deletions(-)
--
2.34.1
From: Ashish Kalra <[email protected]>
Export a new API helper function e820__range_update_table() to update both
e820_table_kexec and e820_table_firmware. Move all current users of
e820__range_update_kexec() to use this new helper function.
Signed-off-by: Ashish Kalra <[email protected]>
---
arch/x86/include/asm/e820/api.h | 2 ++
arch/x86/kernel/e820.c | 6 +++---
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index e8f58ddd06d9..eeb44e71aa66 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -18,6 +18,8 @@ extern void e820__range_add (u64 start, u64 size, enum e820_type type);
extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
+extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
+
extern void e820__print_table(char *who);
extern int e820__update_table(struct e820_table *table);
extern void e820__update_table_print(void);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 6f1b379e3b38..872e133d2718 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -532,9 +532,9 @@ u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum
return __e820__range_update(e820_table, start, size, old_type, new_type);
}
-static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
+u64 __init e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
{
- return __e820__range_update(e820_table_kexec, start, size, old_type, new_type);
+ return __e820__range_update(t, start, size, old_type, new_type);
}
/* Remove a range of memory from the E820 table: */
@@ -806,7 +806,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
addr = memblock_phys_alloc(size, align);
if (addr) {
- e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_kexec, addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
e820__update_table_kexec();
}
--
2.34.1
From: Ashish Kalra <[email protected]>
Handle cases where the RMP table placement in the BIOS is
not 2M aligned and then the kexec kernel could try to allocate
from within that chunk and that causes a fatal RMP fault.
The kexec failure is illustrated below from the kernel logs:
[ 0.000000] SEV-SNP: RMP table physical range [0x0000007ffe800000 - 0x000000807f0fffff]
[ 0.000000] BIOS-provided physical RAM map:
[ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000008efff] usable
[ 0.000000] BIOS-e820: [mem 0x000000000008f000-0x000000000008ffff] ACPI NVS
[ 0.000000] BIOS-e820: [mem 0x0000000000090000-0x000000000009ffff] usable
[ 0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000005a14afff] usable
[ 0.000000] BIOS-e820: [mem 0x000000005a14b000-0x000000005a34afff] reserved
[ 0.000000] BIOS-e820: [mem 0x000000005a34b000-0x0000000067acefff] usable
[ 0.000000] BIOS-e820: [mem 0x0000000067acf000-0x000000006dfcefff] reserved
[ 0.000000] BIOS-e820: [mem 0x000000006dfcf000-0x000000006edfefff] ACPI NVS
[ 0.000000] BIOS-e820: [mem 0x000000006edff000-0x000000006effefff] ACPI data
[ 0.000000] BIOS-e820: [mem 0x000000006efff000-0x000000006effffff] usable
[ 0.000000] BIOS-e820: [mem 0x000000006f000000-0x000000006f00afff] ACPI NVS
[ 0.000000] BIOS-e820: [mem 0x000000006f00b000-0x000000006fffffff] usable
[ 0.000000] BIOS-e820: [mem 0x0000000070000000-0x000000008fffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000aa000000-0x00000000aaffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000c5000000-0x00000000c5ffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000e0000000-0x00000000e0ffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000fd000000-0x00000000ffffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000407fcfffff] usable
[ 0.000000] BIOS-e820: [mem 0x000000407fd00000-0x000000407fffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x0000004080000000-0x0000007ffe7fffff] usable
[ 0.000000] BIOS-e820: [mem 0x0000007ffe800000-0x000000807f0fffff] reserved
[ 0.000000] BIOS-e820: [mem 0x000000807f100000-0x000000807f1fefff] usable
As seen here in the e820 memory map, the end range of RMP table is not aligned to
2MB and not reserved and usable as RAM.
Subsequently, kexec -s (KEXEC_FILE_LOAD syscall) loads it's purgatory code and
boot_param, command line and other setup data into this RAM region as seen in the
kexec logs below, which leads to fatal RMP fault during kexec boot.
[ 173.113085] Loaded purgatory at 0x807f1fa000
[ 173.113099] Loaded boot_param, command line and misc at 0x807f1f8000 bufsz=0x1350 memsz=0x2000
[ 173.113107] Loaded 64bit kernel at 0x7ffae00000 bufsz=0xd06200 memsz=0x3894000
[ 173.113291] Loaded initrd at 0x7ff6c89000 bufsz=0x4176014 memsz=0x4176014
[ 173.113296] E820 memmap:
[ 173.113298] 0000000000000000-000000000008efff (1)
[ 173.113300] 000000000008f000-000000000008ffff (4)
[ 173.113302] 0000000000090000-000000000009ffff (1)
[ 173.113303] 0000000000100000-000000005a14afff (1)
[ 173.113305] 000000005a14b000-000000005a34afff (2)
[ 173.113306] 000000005a34b000-0000000067acefff (1)
[ 173.113308] 0000000067acf000-000000006dfcefff (2)
[ 173.113309] 000000006dfcf000-000000006edfefff (4)
[ 173.113311] 000000006edff000-000000006effefff (3)
[ 173.113312] 000000006efff000-000000006effffff (1)
[ 173.113314] 000000006f000000-000000006f00afff (4)
[ 173.113315] 000000006f00b000-000000006fffffff (1)
[ 173.113317] 0000000070000000-000000008fffffff (2)
[ 173.113318] 00000000aa000000-00000000aaffffff (2)
[ 173.113319] 00000000c5000000-00000000c5ffffff (2)
[ 173.113321] 00000000e0000000-00000000e0ffffff (2)
[ 173.113322] 00000000fd000000-00000000ffffffff (2)
[ 173.113324] 0000000100000000-000000407fcfffff (1)
[ 173.113325] 000000407fd00000-000000407fffffff (2)
[ 173.113327] 0000004080000000-0000007ffe7fffff (1)
[ 173.113328] 0000007ffe800000-000000807f0fffff (2)
[ 173.113330] 000000807f100000-000000807f1fefff (1)
[ 173.113331] 000000807f1ff000-000000807fffffff (2)
[ 173.690528] nr_segments = 4
[ 173.690533] segment[0]: buf=0x00000000e626d1a2 bufsz=0x4000 mem=0x807f1fa000 memsz=0x5000
[ 173.690546] segment[1]: buf=0x0000000029c67bd6 bufsz=0x1350 mem=0x807f1f8000 memsz=0x2000
[ 173.690552] segment[2]: buf=0x0000000045c60183 bufsz=0xd06200 mem=0x7ffae00000 memsz=0x3894000
[ 173.697994] segment[3]: buf=0x000000006e54f08d bufsz=0x4176014 mem=0x7ff6c89000 memsz=0x4177000
[ 173.708672] kexec_file_load: type:0, start:0x807f1fa150 head:0x1184d0002 flags:0x0
Check if RMP table start & end physical range in e820 tables
are not aligned to 2MB and in that case map this range to reserved in all
the three e820 tables.
Fixes: c3b86e61b756 ("x86/cpufeatures: Enable/unmask SEV-SNP CPU feature")
Signed-off-by: Ashish Kalra <[email protected]>
---
arch/x86/include/asm/sev.h | 2 ++
arch/x86/mm/mem_encrypt.c | 13 ++++++++++++
arch/x86/virt/svm/sev.c | 42 ++++++++++++++++++++++++++++++++++++++
3 files changed, 57 insertions(+)
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 7f57382afee4..24300927a476 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -269,6 +269,7 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut
int rmp_make_shared(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
void kdump_sev_callback(void);
+void snp_rmptable_e820_fixup(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
@@ -282,6 +283,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
static inline void kdump_sev_callback(void) { }
+static inline void snp_rmptable_e820_fixup(void) {}
#endif
#endif
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 6f3b3e028718..d88c942dd311 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -102,6 +102,19 @@ void __init mem_encrypt_setup_arch(void)
phys_addr_t total_mem = memblock_phys_mem_size();
unsigned long size;
+ /*
+ * Invoke callback to do RMP table fixups which needs to be called
+ * during setup_arch() after the e820 tables have been setup
+ * in e820__memory_setup() and this function is appropriate to
+ * invoke the callback to apply any memory encryption platform specific
+ * quirks. The callback to do RMP table fixups cannot be invoked from
+ * snp_init() as snp_init() is called from sme_enable() in
+ * startup_64() which is before setup_arch() and e820 tables
+ * have still not been setup.
+ */
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ snp_rmptable_e820_fixup();
+
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index ab0e8448bb6e..1b4b99b26bec 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -163,6 +163,48 @@ bool snp_probe_rmptable_info(void)
return true;
}
+static void __init __snp_e820_tables_fixup(u64 pa)
+{
+ if (IS_ALIGNED(pa, PMD_SIZE))
+ return;
+
+ /*
+ * Check if RMP table start and end physical range
+ * in e820_tables are not aligned to 2MB and in that case map
+ * this range in all the three e820 tables to be reserved.
+ * The e820_table needs to be updated as it is converted to
+ * kernel memory resources and used by KEXEC_FILE_LOAD syscall
+ * to load kexec segments. The e820_table_firmware needs to be
+ * updated as it is exposed to sysfs and used by KEXEC_LOAD
+ * syscall to load kexec segments and e820_table_kexec needs
+ * to be updated as it passed to kexec-ed kernel.
+ */
+ pa = ALIGN_DOWN(pa, PMD_SIZE);
+ if (e820__mapped_any(pa, pa + PMD_SIZE, E820_TYPE_RAM)) {
+ pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa);
+ e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ }
+}
+
+/*
+ * Callback to do any RMP table fixups, needs to be called
+ * after e820__memory_setup(), after the e820 tables are
+ * setup/populated and before e820__reserve_resources(), before
+ * the e820 map has been converted to the standard Linux memory
+ * resources and e820 map is no longer used and modifying it
+ * has no effect. Handle cases where the RMP table placement in
+ * the BIOS is not 2M aligned and then the kexec kernel could
+ * try to allocate from within that chunk and that causes a
+ * fatal RMP fault.
+ */
+void __init snp_rmptable_e820_fixup(void)
+{
+ __snp_e820_tables_fixup(probed_rmp_base);
+ __snp_e820_tables_fixup(probed_rmp_base + probed_rmp_size);
+}
+
/*
* Do the necessary preparations which are verified by the firmware as
* described in the SNP_INIT_EX firmware command description in the SNP
--
2.34.1