This patch series addresses an issue when SME is active and the BSP
is attempting to check for and load microcode during load_ucode_bsp().
Since the initrd has not been decrypted (yet) and the virtual address
of the initrd treats the memory as encrypted, the CPIO archive parsing
fails to locate the microcode.
This series moves the encryption of the initrd into the early boot code
and encrypts it at the same time that the kernel is encrypted. Since
the initrd is now encrypted, the CPIO archive parsing succeeds in
properly locating the microcode.
The following patches are included in this fix:
- Cleanup register saving in arch/x86/mm/mem_encrypt_boot.S
- Reduce parameters and complexity for creating the SME PGD mappings
- Centralize the use of the PMD flags used in sme_encrypt_kernel() in
preparation for using PTE flags also.
- Prepare sme_encrypt_kernel() to handle PAGE aligned encryption, not
just 2MB large page aligned encryption.
- Encrypt the initrd in sme_encrypt_kernel() when the kernel is being
encrypted.
This patch series is based on tip/master.
---
Changes from v1:
- Additional patch to cleanup the register saving performed in
arch/x86/mm/mem_encrypt_boot.S in prep for changes made in the
remainder of the patchset.
- Additional patch to reduce parameters and complexity for creating the
SME PGD mappings by introducing and using a structure for referencing
the PGD to populate, the pagetable allocation area, the virtual/physical
addresses being mapped and the pagetable flags to be used.
- Consolidate PMD/PTE mapping code to reduce duplication.
Tom Lendacky (5):
x86/mm: Cleanup register saving in mem_encrypt_boot.S
x86/mm: Use a struct to reduce parameters for SME PGD mapping
x86/mm: Centralize PMD flags in sme_encrypt_kernel()
x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption
x86/mm: Encrypt the initrd earlier for BSP microcode update
arch/x86/include/asm/mem_encrypt.h | 4
arch/x86/kernel/head64.c | 4
arch/x86/kernel/setup.c | 10 -
arch/x86/mm/mem_encrypt.c | 355 ++++++++++++++++++++++++++----------
arch/x86/mm/mem_encrypt_boot.S | 80 ++++----
5 files changed, 309 insertions(+), 144 deletions(-)
--
Tom Lendacky
Cleanup the use of push and pop and when registers are saved in the
mem_encrypt_boot.S file in order to improve the readability of the code.
Move parameter register saving into general purpose registers earlier
in the code and move all the pushes to the beginning of the function
with corresponding pops at the end.
Signed-off-by: Tom Lendacky <[email protected]>
---
arch/x86/mm/mem_encrypt_boot.S | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 730e6d5..de36884 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -103,20 +103,19 @@ ENTRY(__enc_copy)
orq $X86_CR4_PGE, %rdx
mov %rdx, %cr4
+ push %r15
+
+ movq %rcx, %r9 /* Save kernel length */
+ movq %rdi, %r10 /* Save encrypted kernel address */
+ movq %rsi, %r11 /* Save decrypted kernel address */
+
/* Set the PAT register PA5 entry to write-protect */
- push %rcx
movl $MSR_IA32_CR_PAT, %ecx
rdmsr
- push %rdx /* Save original PAT value */
+ mov %rdx, %r15 /* Save original PAT value */
andl $0xffff00ff, %edx /* Clear PA5 */
orl $0x00000500, %edx /* Set PA5 to WP */
wrmsr
- pop %rdx /* RDX contains original PAT value */
- pop %rcx
-
- movq %rcx, %r9 /* Save kernel length */
- movq %rdi, %r10 /* Save encrypted kernel address */
- movq %rsi, %r11 /* Save decrypted kernel address */
wbinvd /* Invalidate any cache entries */
@@ -138,12 +137,13 @@ ENTRY(__enc_copy)
jnz 1b /* Kernel length not zero? */
/* Restore PAT register */
- push %rdx /* Save original PAT value */
movl $MSR_IA32_CR_PAT, %ecx
rdmsr
- pop %rdx /* Restore original PAT value */
+ mov %r15, %rdx /* Restore original PAT value */
wrmsr
+ pop %r15
+
ret
.L__enc_copy_end:
ENDPROC(__enc_copy)
In preparation for follow-on patches, combine the PGD mapping parameters
into a struct to reduce the number of function arguments and allow for
direct updating of the next pagetable mapping area pointer.
Signed-off-by: Tom Lendacky <[email protected]>
---
arch/x86/mm/mem_encrypt.c | 90 +++++++++++++++++++++++----------------------
1 file changed, 46 insertions(+), 44 deletions(-)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 391b134..5a20696 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -464,6 +464,14 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
}
+struct sme_populate_pgd_data {
+ void *pgtable_area;
+ pgd_t *pgd;
+
+ pmdval_t pmd_val;
+ unsigned long vaddr;
+};
+
static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
unsigned long end)
{
@@ -486,15 +494,14 @@ static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
#define PUD_FLAGS _KERNPG_TABLE_NOENC
#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
-static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
- unsigned long vaddr, pmdval_t pmd_val)
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
{
pgd_t *pgd_p;
p4d_t *p4d_p;
pud_t *pud_p;
pmd_t *pmd_p;
- pgd_p = pgd_base + pgd_index(vaddr);
+ pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
if (native_pgd_val(*pgd_p)) {
if (IS_ENABLED(CONFIG_X86_5LEVEL))
p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
@@ -504,15 +511,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
pgd_t pgd;
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_p = pgtable_area;
+ p4d_p = ppd->pgtable_area;
memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
- pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
+ ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
} else {
- pud_p = pgtable_area;
+ pud_p = ppd->pgtable_area;
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
- pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+ ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
}
@@ -520,44 +527,41 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
}
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_p += p4d_index(vaddr);
+ p4d_p += p4d_index(ppd->vaddr);
if (native_p4d_val(*p4d_p)) {
pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
} else {
p4d_t p4d;
- pud_p = pgtable_area;
+ pud_p = ppd->pgtable_area;
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
- pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+ ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
native_set_p4d(p4d_p, p4d);
}
}
- pud_p += pud_index(vaddr);
+ pud_p += pud_index(ppd->vaddr);
if (native_pud_val(*pud_p)) {
if (native_pud_val(*pud_p) & _PAGE_PSE)
- goto out;
+ return;
pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
} else {
pud_t pud;
- pmd_p = pgtable_area;
+ pmd_p = ppd->pgtable_area;
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
- pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
+ ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
native_set_pud(pud_p, pud);
}
- pmd_p += pmd_index(vaddr);
+ pmd_p += pmd_index(ppd->vaddr);
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
- native_set_pmd(pmd_p, native_make_pmd(pmd_val));
-
-out:
- return pgtable_area;
+ native_set_pmd(pmd_p, native_make_pmd(ppd->pmd_val));
}
static unsigned long __init sme_pgtable_calc(unsigned long len)
@@ -615,11 +619,10 @@ void __init sme_encrypt_kernel(void)
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
unsigned long kernel_start, kernel_end, kernel_len;
+ struct sme_populate_pgd_data ppd;
unsigned long pgtable_area_len;
unsigned long paddr, pmd_flags;
unsigned long decrypted_base;
- void *pgtable_area;
- pgd_t *pgd;
if (!sme_active())
return;
@@ -683,18 +686,18 @@ void __init sme_encrypt_kernel(void)
* pagetables and when the new encrypted and decrypted kernel
* mappings are populated.
*/
- pgtable_area = (void *)execute_end;
+ ppd.pgtable_area = (void *)execute_end;
/*
* Make sure the current pagetable structure has entries for
* addressing the workarea.
*/
- pgd = (pgd_t *)native_read_cr3_pa();
+ ppd.pgd = (pgd_t *)native_read_cr3_pa();
paddr = workarea_start;
while (paddr < workarea_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + PMD_FLAGS);
+ ppd.pmd_val = paddr + PMD_FLAGS;
+ ppd.vaddr = paddr;
+ sme_populate_pgd_large(&ppd);
paddr += PMD_PAGE_SIZE;
}
@@ -708,17 +711,17 @@ void __init sme_encrypt_kernel(void)
* populated with new PUDs and PMDs as the encrypted and decrypted
* kernel mappings are created.
*/
- pgd = pgtable_area;
- memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
- pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;
+ ppd.pgd = ppd.pgtable_area;
+ memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
+ ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
/* Add encrypted kernel (identity) mappings */
pmd_flags = PMD_FLAGS | _PAGE_ENC;
paddr = kernel_start;
while (paddr < kernel_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + pmd_flags);
+ ppd.pmd_val = paddr + pmd_flags;
+ ppd.vaddr = paddr;
+ sme_populate_pgd_large(&ppd);
paddr += PMD_PAGE_SIZE;
}
@@ -736,9 +739,9 @@ void __init sme_encrypt_kernel(void)
pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
paddr = kernel_start;
while (paddr < kernel_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr + decrypted_base,
- paddr + pmd_flags);
+ ppd.pmd_val = paddr + pmd_flags;
+ ppd.vaddr = paddr + decrypted_base;
+ sme_populate_pgd_large(&ppd);
paddr += PMD_PAGE_SIZE;
}
@@ -746,30 +749,29 @@ void __init sme_encrypt_kernel(void)
/* Add decrypted workarea mappings to both kernel mappings */
paddr = workarea_start;
while (paddr < workarea_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + PMD_FLAGS);
+ ppd.pmd_val = paddr + PMD_FLAGS;
+ ppd.vaddr = paddr;
+ sme_populate_pgd_large(&ppd);
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr + decrypted_base,
- paddr + PMD_FLAGS);
+ ppd.vaddr = paddr + decrypted_base;
+ sme_populate_pgd_large(&ppd);
paddr += PMD_PAGE_SIZE;
}
/* Perform the encryption */
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
- kernel_len, workarea_start, (unsigned long)pgd);
+ kernel_len, workarea_start, (unsigned long)ppd.pgd);
/*
* At this point we are running encrypted. Remove the mappings for
* the decrypted areas - all that is needed for this is to remove
* the PGD entry/entries.
*/
- sme_clear_pgd(pgd, kernel_start + decrypted_base,
+ sme_clear_pgd(ppd.pgd, kernel_start + decrypted_base,
kernel_end + decrypted_base);
- sme_clear_pgd(pgd, workarea_start + decrypted_base,
+ sme_clear_pgd(ppd.pgd, workarea_start + decrypted_base,
workarea_end + decrypted_base);
/* Flush the TLB - no globals so cr3 is enough */
In preparation for encrypting more than just the kernel during early
boot processing, centralize the use of the PMD flag settings based
on the type of mapping desired. When 4KB aligned encryption is added,
this will allow either PTE flags or large page PMD flags to be used
without requiring the caller to adjust.
Signed-off-by: Tom Lendacky <[email protected]>
---
arch/x86/mm/mem_encrypt.c | 131 ++++++++++++++++++++++++++-------------------
1 file changed, 77 insertions(+), 54 deletions(-)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 5a20696..9b180f8 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -468,31 +468,40 @@ struct sme_populate_pgd_data {
void *pgtable_area;
pgd_t *pgd;
- pmdval_t pmd_val;
+ pmdval_t pmd_flags;
+ unsigned long paddr;
+
unsigned long vaddr;
+ unsigned long vaddr_end;
};
-static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
- unsigned long end)
+static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
unsigned long pgd_start, pgd_end, pgd_size;
pgd_t *pgd_p;
- pgd_start = start & PGDIR_MASK;
- pgd_end = end & PGDIR_MASK;
+ pgd_start = ppd->vaddr & PGDIR_MASK;
+ pgd_end = ppd->vaddr_end & PGDIR_MASK;
pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
pgd_size *= sizeof(pgd_t);
- pgd_p = pgd_base + pgd_index(start);
+ pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
memset(pgd_p, 0, pgd_size);
}
-#define PGD_FLAGS _KERNPG_TABLE_NOENC
-#define P4D_FLAGS _KERNPG_TABLE_NOENC
-#define PUD_FLAGS _KERNPG_TABLE_NOENC
-#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+#define PGD_FLAGS _KERNPG_TABLE_NOENC
+#define P4D_FLAGS _KERNPG_TABLE_NOENC
+#define PUD_FLAGS _KERNPG_TABLE_NOENC
+
+#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
+#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+
+#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
{
@@ -561,7 +570,36 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
pmd_p += pmd_index(ppd->vaddr);
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
- native_set_pmd(pmd_p, native_make_pmd(ppd->pmd_val));
+ native_set_pmd(pmd_p,
+ native_make_pmd(ppd->paddr | ppd->pmd_flags));
+}
+
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+ pmdval_t pmd_flags)
+{
+ ppd->pmd_flags = pmd_flags;
+
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd_large(ppd);
+
+ ppd->vaddr += PMD_PAGE_SIZE;
+ ppd->paddr += PMD_PAGE_SIZE;
+ }
+}
+
+static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_ENC);
+}
+
+static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_DEC);
+}
+
+static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_DEC_WP);
}
static unsigned long __init sme_pgtable_calc(unsigned long len)
@@ -621,7 +659,6 @@ void __init sme_encrypt_kernel(void)
unsigned long kernel_start, kernel_end, kernel_len;
struct sme_populate_pgd_data ppd;
unsigned long pgtable_area_len;
- unsigned long paddr, pmd_flags;
unsigned long decrypted_base;
if (!sme_active())
@@ -693,14 +730,10 @@ void __init sme_encrypt_kernel(void)
* addressing the workarea.
*/
ppd.pgd = (pgd_t *)native_read_cr3_pa();
- paddr = workarea_start;
- while (paddr < workarea_end) {
- ppd.pmd_val = paddr + PMD_FLAGS;
- ppd.vaddr = paddr;
- sme_populate_pgd_large(&ppd);
-
- paddr += PMD_PAGE_SIZE;
- }
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start;
+ ppd.vaddr_end = workarea_end;
+ sme_map_range_decrypted(&ppd);
/* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3());
@@ -715,17 +748,6 @@ void __init sme_encrypt_kernel(void)
memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
- /* Add encrypted kernel (identity) mappings */
- pmd_flags = PMD_FLAGS | _PAGE_ENC;
- paddr = kernel_start;
- while (paddr < kernel_end) {
- ppd.pmd_val = paddr + pmd_flags;
- ppd.vaddr = paddr;
- sme_populate_pgd_large(&ppd);
-
- paddr += PMD_PAGE_SIZE;
- }
-
/*
* A different PGD index/entry must be used to get different
* pagetable entries for the decrypted mapping. Choose the next
@@ -735,29 +757,28 @@ void __init sme_encrypt_kernel(void)
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
decrypted_base <<= PGDIR_SHIFT;
+ /* Add encrypted kernel (identity) mappings */
+ ppd.paddr = kernel_start;
+ ppd.vaddr = kernel_start;
+ ppd.vaddr_end = kernel_end;
+ sme_map_range_encrypted(&ppd);
+
/* Add decrypted, write-protected kernel (non-identity) mappings */
- pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
- paddr = kernel_start;
- while (paddr < kernel_end) {
- ppd.pmd_val = paddr + pmd_flags;
- ppd.vaddr = paddr + decrypted_base;
- sme_populate_pgd_large(&ppd);
-
- paddr += PMD_PAGE_SIZE;
- }
+ ppd.paddr = kernel_start;
+ ppd.vaddr = kernel_start + decrypted_base;
+ ppd.vaddr_end = kernel_end + decrypted_base;
+ sme_map_range_decrypted_wp(&ppd);
/* Add decrypted workarea mappings to both kernel mappings */
- paddr = workarea_start;
- while (paddr < workarea_end) {
- ppd.pmd_val = paddr + PMD_FLAGS;
- ppd.vaddr = paddr;
- sme_populate_pgd_large(&ppd);
-
- ppd.vaddr = paddr + decrypted_base;
- sme_populate_pgd_large(&ppd);
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start;
+ ppd.vaddr_end = workarea_end;
+ sme_map_range_decrypted(&ppd);
- paddr += PMD_PAGE_SIZE;
- }
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start + decrypted_base;
+ ppd.vaddr_end = workarea_end + decrypted_base;
+ sme_map_range_decrypted(&ppd);
/* Perform the encryption */
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
@@ -768,11 +789,13 @@ void __init sme_encrypt_kernel(void)
* the decrypted areas - all that is needed for this is to remove
* the PGD entry/entries.
*/
- sme_clear_pgd(ppd.pgd, kernel_start + decrypted_base,
- kernel_end + decrypted_base);
+ ppd.vaddr = kernel_start + decrypted_base;
+ ppd.vaddr_end = kernel_end + decrypted_base;
+ sme_clear_pgd(&ppd);
- sme_clear_pgd(ppd.pgd, workarea_start + decrypted_base,
- workarea_end + decrypted_base);
+ ppd.vaddr = workarea_start + decrypted_base;
+ ppd.vaddr_end = workarea_end + decrypted_base;
+ sme_clear_pgd(&ppd);
/* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3());
Currently the BSP microcode update code examines the initrd very early
in the boot process. If SME is active, the initrd is treated as being
encrypted but it has not been encrypted (in place) yet. Update the
early boot code that encrypts the kernel to also encrypt the initrd so
that early BSP microcode updates work.
Signed-off-by: Tom Lendacky <[email protected]>
---
arch/x86/include/asm/mem_encrypt.h | 4 +-
arch/x86/kernel/head64.c | 4 +-
arch/x86/kernel/setup.c | 10 -----
arch/x86/mm/mem_encrypt.c | 66 ++++++++++++++++++++++++++++++++----
arch/x86/mm/mem_encrypt_boot.S | 46 +++++++++++++------------
5 files changed, 85 insertions(+), 45 deletions(-)
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c9459a4..22c5f3e 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -39,7 +39,7 @@ void __init sme_early_decrypt(resource_size_t paddr,
void __init sme_early_init(void);
-void __init sme_encrypt_kernel(void);
+void __init sme_encrypt_kernel(struct boot_params *bp);
void __init sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
static inline void __init sme_early_init(void) { }
-static inline void __init sme_encrypt_kernel(void) { }
+static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
static inline void __init sme_enable(struct boot_params *bp) { }
static inline bool sme_active(void) { return false; }
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6a5d757..7ba5d81 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
p = fixup_pointer(&phys_base, physaddr);
*p += load_delta - sme_get_me_mask();
- /* Encrypt the kernel (if SME is active) */
- sme_encrypt_kernel();
+ /* Encrypt the kernel and related (if SME is active) */
+ sme_encrypt_kernel(bp);
/*
* Return the SME encryption mask (if SME is active) to be used as a
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c8e0447..a9d8b41 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -363,16 +363,6 @@ static void __init reserve_initrd(void)
!ramdisk_image || !ramdisk_size)
return; /* No initrd provided by bootloader */
- /*
- * If SME is active, this memory will be marked encrypted by the
- * kernel when it is accessed (including relocation). However, the
- * ramdisk image was loaded decrypted by the bootloader, so make
- * sure that it is encrypted before accessing it. For SEV the
- * ramdisk will already be encrypted, so only do this for SME.
- */
- if (sme_active())
- sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
-
initrd_start = 0;
mapped_size = memblock_mem_size(max_pfn_mapped);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index ea61170..94015ed 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -741,11 +741,12 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
return total;
}
-void __init sme_encrypt_kernel(void)
+void __init sme_encrypt_kernel(struct boot_params *bp)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
unsigned long kernel_start, kernel_end, kernel_len;
+ unsigned long initrd_start, initrd_end, initrd_len;
struct sme_populate_pgd_data ppd;
unsigned long pgtable_area_len;
unsigned long decrypted_base;
@@ -754,14 +755,15 @@ void __init sme_encrypt_kernel(void)
return;
/*
- * Prepare for encrypting the kernel by building new pagetables with
- * the necessary attributes needed to encrypt the kernel in place.
+ * Prepare for encrypting the kernel and initrd by building new
+ * pagetables with the necessary attributes needed to encrypt the
+ * kernel in place.
*
* One range of virtual addresses will map the memory occupied
- * by the kernel as encrypted.
+ * by the kernel and initrd as encrypted.
*
* Another range of virtual addresses will map the memory occupied
- * by the kernel as decrypted and write-protected.
+ * by the kernel and initrd as decrypted and write-protected.
*
* The use of write-protect attribute will prevent any of the
* memory from being cached.
@@ -772,6 +774,20 @@ void __init sme_encrypt_kernel(void)
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
kernel_len = kernel_end - kernel_start;
+ initrd_start = 0;
+ initrd_end = 0;
+ initrd_len = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+ initrd_len = (unsigned long)bp->hdr.ramdisk_size |
+ ((unsigned long)bp->ext_ramdisk_size << 32);
+ if (initrd_len) {
+ initrd_start = (unsigned long)bp->hdr.ramdisk_image |
+ ((unsigned long)bp->ext_ramdisk_image << 32);
+ initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
+ initrd_len = initrd_end - initrd_start;
+ }
+#endif
+
/* Set the encryption workarea to be immediately after the kernel */
workarea_start = kernel_end;
@@ -794,6 +810,8 @@ void __init sme_encrypt_kernel(void)
*/
pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+ if (initrd_len)
+ pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
/* PUDs and PMDs needed in the current pagetables for the workarea */
pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
@@ -832,9 +850,9 @@ void __init sme_encrypt_kernel(void)
/*
* A new pagetable structure is being built to allow for the kernel
- * to be encrypted. It starts with an empty PGD that will then be
- * populated with new PUDs and PMDs as the encrypted and decrypted
- * kernel mappings are created.
+ * and initrd to be encrypted. It starts with an empty PGD that will
+ * then be populated with new PUDs and PMDs as the encrypted and
+ * decrypted kernel mappings are created.
*/
ppd.pgd = ppd.pgtable_area;
memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
@@ -847,6 +865,12 @@ void __init sme_encrypt_kernel(void)
* the base of the mapping.
*/
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+ if (initrd_len) {
+ unsigned long check_base;
+
+ check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
+ decrypted_base = max(decrypted_base, check_base);
+ }
decrypted_base <<= PGDIR_SHIFT;
/* Add encrypted kernel (identity) mappings */
@@ -861,6 +885,21 @@ void __init sme_encrypt_kernel(void)
ppd.vaddr_end = kernel_end + decrypted_base;
sme_map_range_decrypted_wp(&ppd);
+ if (initrd_len) {
+ /* Add encrypted initrd (identity) mappings */
+ ppd.paddr = initrd_start;
+ ppd.vaddr = initrd_start;
+ ppd.vaddr_end = initrd_end;
+ sme_map_range_encrypted(&ppd);
+ /*
+ * Add decrypted, write-protected initrd (non-identity) mappings
+ */
+ ppd.paddr = initrd_start;
+ ppd.vaddr = initrd_start + decrypted_base;
+ ppd.vaddr_end = initrd_end + decrypted_base;
+ sme_map_range_decrypted_wp(&ppd);
+ }
+
/* Add decrypted workarea mappings to both kernel mappings */
ppd.paddr = workarea_start;
ppd.vaddr = workarea_start;
@@ -876,6 +915,11 @@ void __init sme_encrypt_kernel(void)
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
kernel_len, workarea_start, (unsigned long)ppd.pgd);
+ if (initrd_len)
+ sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
+ initrd_len, workarea_start,
+ (unsigned long)ppd.pgd);
+
/*
* At this point we are running encrypted. Remove the mappings for
* the decrypted areas - all that is needed for this is to remove
@@ -885,6 +929,12 @@ void __init sme_encrypt_kernel(void)
ppd.vaddr_end = kernel_end + decrypted_base;
sme_clear_pgd(&ppd);
+ if (initrd_len) {
+ ppd.vaddr = initrd_start + decrypted_base;
+ ppd.vaddr_end = initrd_end + decrypted_base;
+ sme_clear_pgd(&ppd);
+ }
+
ppd.vaddr = workarea_start + decrypted_base;
ppd.vaddr_end = workarea_end + decrypted_base;
sme_clear_pgd(&ppd);
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 23a8a9e..01f682c 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute)
/*
* Entry parameters:
- * RDI - virtual address for the encrypted kernel mapping
- * RSI - virtual address for the decrypted kernel mapping
- * RDX - length of kernel
+ * RDI - virtual address for the encrypted mapping
+ * RSI - virtual address for the decrypted mapping
+ * RDX - length to encrypt
* RCX - virtual address of the encryption workarea, including:
* - stack page (PAGE_SIZE)
* - encryption routine page (PAGE_SIZE)
@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute)
addq $PAGE_SIZE, %rax /* Workarea encryption routine */
push %r12
- movq %rdi, %r10 /* Encrypted kernel */
- movq %rsi, %r11 /* Decrypted kernel */
- movq %rdx, %r12 /* Kernel length */
+ movq %rdi, %r10 /* Encrypted area */
+ movq %rsi, %r11 /* Decrypted area */
+ movq %rdx, %r12 /* Area length */
/* Copy encryption routine into the workarea */
movq %rax, %rdi /* Workarea encryption routine */
@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute)
rep movsb
/* Setup registers for call */
- movq %r10, %rdi /* Encrypted kernel */
- movq %r11, %rsi /* Decrypted kernel */
+ movq %r10, %rdi /* Encrypted area */
+ movq %r11, %rsi /* Decrypted area */
movq %r8, %rdx /* Pagetables used for encryption */
- movq %r12, %rcx /* Kernel length */
+ movq %r12, %rcx /* Area length */
movq %rax, %r8 /* Workarea encryption routine */
addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute)
ENTRY(__enc_copy)
/*
- * Routine used to encrypt kernel.
+ * Routine used to encrypt memory in place.
* This routine must be run outside of the kernel proper since
* the kernel will be encrypted during the process. So this
* routine is defined here and then copied to an area outside
@@ -79,19 +79,19 @@ ENTRY(__enc_copy)
* during execution.
*
* On entry the registers must be:
- * RDI - virtual address for the encrypted kernel mapping
- * RSI - virtual address for the decrypted kernel mapping
+ * RDI - virtual address for the encrypted mapping
+ * RSI - virtual address for the decrypted mapping
* RDX - address of the pagetables to use for encryption
- * RCX - length of kernel
+ * RCX - length of area
* R8 - intermediate copy buffer
*
* RAX - points to this routine
*
- * The kernel will be encrypted by copying from the non-encrypted
- * kernel space to an intermediate buffer and then copying from the
- * intermediate buffer back to the encrypted kernel space. The physical
- * addresses of the two kernel space mappings are the same which
- * results in the kernel being encrypted "in place".
+ * The area will be encrypted by copying from the non-encrypted
+ * memory space to an intermediate buffer and then copying from the
+ * intermediate buffer back to the encrypted memory space. The physical
+ * addresses of the two mappings are the same which results in the area
+ * being encrypted "in place".
*/
/* Enable the new page tables */
mov %rdx, %cr3
@@ -106,9 +106,9 @@ ENTRY(__enc_copy)
push %r15
push %r12
- movq %rcx, %r9 /* Save kernel length */
- movq %rdi, %r10 /* Save encrypted kernel address */
- movq %rsi, %r11 /* Save decrypted kernel address */
+ movq %rcx, %r9 /* Save area length */
+ movq %rdi, %r10 /* Save encrypted area address */
+ movq %rsi, %r11 /* Save decrypted area address */
/* Set the PAT register PA5 entry to write-protect */
movl $MSR_IA32_CR_PAT, %ecx
@@ -128,13 +128,13 @@ ENTRY(__enc_copy)
movq %r9, %r12
2:
- movq %r11, %rsi /* Source - decrypted kernel */
+ movq %r11, %rsi /* Source - decrypted area */
movq %r8, %rdi /* Dest - intermediate copy buffer */
movq %r12, %rcx
rep movsb
movq %r8, %rsi /* Source - intermediate copy buffer */
- movq %r10, %rdi /* Dest - encrypted kernel */
+ movq %r10, %rdi /* Dest - encrypted area */
movq %r12, %rcx
rep movsb
In preparation for encrypting more than just the kernel, the encryption
support in sme_encrypt_kernel() needs to support 4KB page aligned
encryption instead of just 2MB large page aligned encryption.
Update the routines that populate the PGD to support non-2MB aligned
addresses. This is done by creating PTE page tables for the start
and end portion of the address range that fall outside of the 2MB
alignment. This results in, at most, two extra pages to hold the
PTE entries for each mapping of a range.
Signed-off-by: Tom Lendacky <[email protected]>
---
arch/x86/mm/mem_encrypt.c | 124 +++++++++++++++++++++++++++++++++++-----
arch/x86/mm/mem_encrypt_boot.S | 20 +++++-
2 files changed, 122 insertions(+), 22 deletions(-)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 9b180f8..ea61170 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -469,6 +469,7 @@ struct sme_populate_pgd_data {
pgd_t *pgd;
pmdval_t pmd_flags;
+ pteval_t pte_flags;
unsigned long paddr;
unsigned long vaddr;
@@ -494,6 +495,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
#define PGD_FLAGS _KERNPG_TABLE_NOENC
#define P4D_FLAGS _KERNPG_TABLE_NOENC
#define PUD_FLAGS _KERNPG_TABLE_NOENC
+#define PMD_FLAGS _KERNPG_TABLE_NOENC
#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
@@ -503,7 +505,15 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
-static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
+
+#define PTE_FLAGS_DEC PTE_FLAGS
+#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+
+#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
+
+static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
{
pgd_t *pgd_p;
p4d_t *p4d_p;
@@ -554,7 +564,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
pud_p += pud_index(ppd->vaddr);
if (native_pud_val(*pud_p)) {
if (native_pud_val(*pud_p) & _PAGE_PSE)
- return;
+ return NULL;
pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
} else {
@@ -568,17 +578,57 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
native_set_pud(pud_p, pud);
}
+ return pmd_p;
+}
+
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+{
+ pmd_t *pmd_p;
+
+ pmd_p = sme_prepare_pgd(ppd);
+ if (!pmd_p)
+ return;
+
pmd_p += pmd_index(ppd->vaddr);
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
native_set_pmd(pmd_p,
native_make_pmd(ppd->paddr | ppd->pmd_flags));
}
-static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
- pmdval_t pmd_flags)
+static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
{
- ppd->pmd_flags = pmd_flags;
+ pmd_t *pmd_p;
+ pte_t *pte_p;
+
+ pmd_p = sme_prepare_pgd(ppd);
+ if (!pmd_p)
+ return;
+
+ pmd_p += pmd_index(ppd->vaddr);
+ if (native_pmd_val(*pmd_p)) {
+ if (native_pmd_val(*pmd_p) & _PAGE_PSE)
+ return;
+
+ pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
+ } else {
+ pmd_t pmd;
+ pte_p = ppd->pgtable_area;
+ memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
+ ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
+
+ pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
+ native_set_pmd(pmd_p, pmd);
+ }
+
+ pte_p += pte_index(ppd->vaddr);
+ if (!native_pte_val(*pte_p))
+ native_set_pte(pte_p,
+ native_make_pte(ppd->paddr | ppd->pte_flags));
+}
+
+static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd_large(ppd);
@@ -587,33 +637,71 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
}
}
+static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+{
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd(ppd);
+
+ ppd->vaddr += PAGE_SIZE;
+ ppd->paddr += PAGE_SIZE;
+ }
+}
+
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+ pmdval_t pmd_flags, pteval_t pte_flags)
+{
+ unsigned long vaddr_end;
+
+ ppd->pmd_flags = pmd_flags;
+ ppd->pte_flags = pte_flags;
+
+ /* Save original end value since we modify the struct value */
+ vaddr_end = ppd->vaddr_end;
+
+ /* If start is not 2MB aligned, create PTE entries */
+ ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
+ __sme_map_range_pte(ppd);
+
+ /* Create PMD entries */
+ ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
+ __sme_map_range_pmd(ppd);
+
+ /* If end is not 2MB aligned, create PTE entries */
+ ppd->vaddr_end = vaddr_end;
+ __sme_map_range_pte(ppd);
+}
+
static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
{
- __sme_map_range(ppd, PMD_FLAGS_ENC);
+ __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
}
static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
{
- __sme_map_range(ppd, PMD_FLAGS_DEC);
+ __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
}
static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
{
- __sme_map_range(ppd, PMD_FLAGS_DEC_WP);
+ __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
}
static unsigned long __init sme_pgtable_calc(unsigned long len)
{
- unsigned long p4d_size, pud_size, pmd_size;
+ unsigned long p4d_size, pud_size, pmd_size, pte_size;
unsigned long total;
/*
* Perform a relatively simplistic calculation of the pagetable
- * entries that are needed. That mappings will be covered by 2MB
- * PMD entries so we can conservatively calculate the required
+ * entries that are needed. Those mappings will be covered mostly
+ * by 2MB PMD entries so we can conservatively calculate the required
* number of P4D, PUD and PMD structures needed to perform the
- * mappings. Incrementing the count for each covers the case where
- * the addresses cross entries.
+ * mappings. For mappings that are not 2MB aligned, PTE mappings
+ * would be needed for the start and end portion of the address range
+ * that fall outside of the 2MB alignment. This results in, at most,
+ * two extra pages to hold PTE entries for each range that is mapped.
+ * Incrementing the count for each covers the case where the addresses
+ * cross entries.
*/
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
@@ -627,8 +715,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
}
pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+ pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
- total = p4d_size + pud_size + pmd_size;
+ total = p4d_size + pud_size + pmd_size + pte_size;
/*
* Now calculate the added pagetable structures needed to populate
@@ -711,10 +800,13 @@ void __init sme_encrypt_kernel(void)
/*
* The total workarea includes the executable encryption area and
- * the pagetable area.
+ * the pagetable area. The start of the workarea is already 2MB
+ * aligned, align the end of the workarea on a 2MB boundary so that
+ * we don't try to create/allocate PTE entries from the workarea
+ * before it is mapped.
*/
workarea_len = execute_len + pgtable_area_len;
- workarea_end = workarea_start + workarea_len;
+ workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
/*
* Set the address to the start of where newly created pagetable
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index de36884..23a8a9e 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -104,6 +104,7 @@ ENTRY(__enc_copy)
mov %rdx, %cr4
push %r15
+ push %r12
movq %rcx, %r9 /* Save kernel length */
movq %rdi, %r10 /* Save encrypted kernel address */
@@ -119,21 +120,27 @@ ENTRY(__enc_copy)
wbinvd /* Invalidate any cache entries */
- /* Copy/encrypt 2MB at a time */
+ /* Copy/encrypt up to 2MB at a time */
+ movq $PMD_PAGE_SIZE, %r12
1:
+ cmpq %r12, %r9
+ jnb 2f
+ movq %r9, %r12
+
+2:
movq %r11, %rsi /* Source - decrypted kernel */
movq %r8, %rdi /* Dest - intermediate copy buffer */
- movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
+ movq %r12, %rcx
rep movsb
movq %r8, %rsi /* Source - intermediate copy buffer */
movq %r10, %rdi /* Dest - encrypted kernel */
- movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
+ movq %r12, %rcx
rep movsb
- addq $PMD_PAGE_SIZE, %r11
- addq $PMD_PAGE_SIZE, %r10
- subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */
+ addq %r12, %r11
+ addq %r12, %r10
+ subq %r12, %r9 /* Kernel length decrement */
jnz 1b /* Kernel length not zero? */
/* Restore PAT register */
@@ -142,6 +149,7 @@ ENTRY(__enc_copy)
mov %r15, %rdx /* Restore original PAT value */
wrmsr
+ pop %r12
pop %r15
ret
On Thu, Dec 21, 2017 at 04:03:12PM -0600, Tom Lendacky wrote:
> In preparation for encrypting more than just the kernel during early
> boot processing, centralize the use of the PMD flag settings based
> on the type of mapping desired. When 4KB aligned encryption is added,
> this will allow either PTE flags or large page PMD flags to be used
> without requiring the caller to adjust.
>
> Signed-off-by: Tom Lendacky <[email protected]>
> ---
> arch/x86/mm/mem_encrypt.c | 131 ++++++++++++++++++++++++++-------------------
> 1 file changed, 77 insertions(+), 54 deletions(-)
>
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index 5a20696..9b180f8 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -468,31 +468,40 @@ struct sme_populate_pgd_data {
> void *pgtable_area;
> pgd_t *pgd;
>
> - pmdval_t pmd_val;
> + pmdval_t pmd_flags;
> + unsigned long paddr;
> +
> unsigned long vaddr;
> + unsigned long vaddr_end;
> };
>
> -static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
> - unsigned long end)
> +static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
> {
> unsigned long pgd_start, pgd_end, pgd_size;
> pgd_t *pgd_p;
>
> - pgd_start = start & PGDIR_MASK;
> - pgd_end = end & PGDIR_MASK;
> + pgd_start = ppd->vaddr & PGDIR_MASK;
> + pgd_end = ppd->vaddr_end & PGDIR_MASK;
>
> pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
> pgd_size *= sizeof(pgd_t);
This is a strange way of writing this. I'd expect:
pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
>
> - pgd_p = pgd_base + pgd_index(start);
> + pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
>
> memset(pgd_p, 0, pgd_size);
> }
>
> -#define PGD_FLAGS _KERNPG_TABLE_NOENC
> -#define P4D_FLAGS _KERNPG_TABLE_NOENC
> -#define PUD_FLAGS _KERNPG_TABLE_NOENC
> -#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
> +#define PGD_FLAGS _KERNPG_TABLE_NOENC
> +#define P4D_FLAGS _KERNPG_TABLE_NOENC
> +#define PUD_FLAGS _KERNPG_TABLE_NOENC
> +
> +#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
> +
> +#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
> +#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
> + (_PAGE_PAT | _PAGE_PWT))
> +
> +#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
>
> static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
> {
> @@ -561,7 +570,36 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
>
> pmd_p += pmd_index(ppd->vaddr);
> if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
> - native_set_pmd(pmd_p, native_make_pmd(ppd->pmd_val));
> + native_set_pmd(pmd_p,
> + native_make_pmd(ppd->paddr | ppd->pmd_flags));
Never do those ugly line breaks. Just let it stick out.
Otherwise, sme_encrypt_kernel() is starting to look quite readable :)
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
On Thu, Dec 21, 2017 at 04:03:21PM -0600, Tom Lendacky wrote:
> @@ -568,17 +578,57 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
> native_set_pud(pud_p, pud);
> }
>
> + return pmd_p;
> +}
> +
> +static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
> +{
> + pmd_t *pmd_p;
> +
> + pmd_p = sme_prepare_pgd(ppd);
> + if (!pmd_p)
> + return;
> +
> pmd_p += pmd_index(ppd->vaddr);
> if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
> native_set_pmd(pmd_p,
> native_make_pmd(ppd->paddr | ppd->pmd_flags));
Ugly linebreak.
> }
>
> -static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
> - pmdval_t pmd_flags)
> +static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
> {
> - ppd->pmd_flags = pmd_flags;
> + pmd_t *pmd_p;
> + pte_t *pte_p;
> +
> + pmd_p = sme_prepare_pgd(ppd);
> + if (!pmd_p)
> + return;
> +
> + pmd_p += pmd_index(ppd->vaddr);
> + if (native_pmd_val(*pmd_p)) {
> + if (native_pmd_val(*pmd_p) & _PAGE_PSE)
> + return;
> +
> + pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
> + } else {
> + pmd_t pmd;
>
> + pte_p = ppd->pgtable_area;
> + memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
> + ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
> +
> + pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
> + native_set_pmd(pmd_p, pmd);
> + }
> +
> + pte_p += pte_index(ppd->vaddr);
> + if (!native_pte_val(*pte_p))
> + native_set_pte(pte_p,
> + native_make_pte(ppd->paddr | ppd->pte_flags));
Ditto.
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
On 1/7/2018 10:28 AM, Borislav Petkov wrote:
> On Thu, Dec 21, 2017 at 04:03:12PM -0600, Tom Lendacky wrote:
>> In preparation for encrypting more than just the kernel during early
>> boot processing, centralize the use of the PMD flag settings based
>> on the type of mapping desired. When 4KB aligned encryption is added,
>> this will allow either PTE flags or large page PMD flags to be used
>> without requiring the caller to adjust.
>>
>> Signed-off-by: Tom Lendacky <[email protected]>
>> ---
>> arch/x86/mm/mem_encrypt.c | 131 ++++++++++++++++++++++++++-------------------
>> 1 file changed, 77 insertions(+), 54 deletions(-)
>>
>> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
>> index 5a20696..9b180f8 100644
>> --- a/arch/x86/mm/mem_encrypt.c
>> +++ b/arch/x86/mm/mem_encrypt.c
>> @@ -468,31 +468,40 @@ struct sme_populate_pgd_data {
>> void *pgtable_area;
>> pgd_t *pgd;
>>
>> - pmdval_t pmd_val;
>> + pmdval_t pmd_flags;
>> + unsigned long paddr;
>> +
>> unsigned long vaddr;
>> + unsigned long vaddr_end;
>> };
>>
>> -static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
>> - unsigned long end)
>> +static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
>> {
>> unsigned long pgd_start, pgd_end, pgd_size;
>> pgd_t *pgd_p;
>>
>> - pgd_start = start & PGDIR_MASK;
>> - pgd_end = end & PGDIR_MASK;
>> + pgd_start = ppd->vaddr & PGDIR_MASK;
>> + pgd_end = ppd->vaddr_end & PGDIR_MASK;
>>
>> pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
>> pgd_size *= sizeof(pgd_t);
>
> This is a strange way of writing this. I'd expect:
>
> pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
Yup, clearer as one line. I can fix that up.
>
>>
>> - pgd_p = pgd_base + pgd_index(start);
>> + pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
>>
>> memset(pgd_p, 0, pgd_size);
>> }
>>
>> -#define PGD_FLAGS _KERNPG_TABLE_NOENC
>> -#define P4D_FLAGS _KERNPG_TABLE_NOENC
>> -#define PUD_FLAGS _KERNPG_TABLE_NOENC
>> -#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
>> +#define PGD_FLAGS _KERNPG_TABLE_NOENC
>> +#define P4D_FLAGS _KERNPG_TABLE_NOENC
>> +#define PUD_FLAGS _KERNPG_TABLE_NOENC
>> +
>> +#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
>> +
>> +#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
>> +#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
>> + (_PAGE_PAT | _PAGE_PWT))
>> +
>> +#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
>>
>> static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
>> {
>> @@ -561,7 +570,36 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
>>
>> pmd_p += pmd_index(ppd->vaddr);
>> if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
>> - native_set_pmd(pmd_p, native_make_pmd(ppd->pmd_val));
>> + native_set_pmd(pmd_p,
>> + native_make_pmd(ppd->paddr | ppd->pmd_flags));
>
> Never do those ugly line breaks. Just let it stick out.
Will do.
Thanks,
Tom
>
> Otherwise, sme_encrypt_kernel() is starting to look quite readable :)
>
On 1/7/2018 12:25 PM, Borislav Petkov wrote:
> On Thu, Dec 21, 2017 at 04:03:21PM -0600, Tom Lendacky wrote:
>> @@ -568,17 +578,57 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
>> native_set_pud(pud_p, pud);
>> }
>>
>> + return pmd_p;
>> +}
>> +
>> +static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
>> +{
>> + pmd_t *pmd_p;
>> +
>> + pmd_p = sme_prepare_pgd(ppd);
>> + if (!pmd_p)
>> + return;
>> +
>> pmd_p += pmd_index(ppd->vaddr);
>> if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
>> native_set_pmd(pmd_p,
>> native_make_pmd(ppd->paddr | ppd->pmd_flags));
>
> Ugly linebreak.
That one will be fixed in the previous patch.
>
>> }
>>
>> -static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
>> - pmdval_t pmd_flags)
>> +static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
>> {
>> - ppd->pmd_flags = pmd_flags;
>> + pmd_t *pmd_p;
>> + pte_t *pte_p;
>> +
>> + pmd_p = sme_prepare_pgd(ppd);
>> + if (!pmd_p)
>> + return;
>> +
>> + pmd_p += pmd_index(ppd->vaddr);
>> + if (native_pmd_val(*pmd_p)) {
>> + if (native_pmd_val(*pmd_p) & _PAGE_PSE)
>> + return;
>> +
>> + pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
>> + } else {
>> + pmd_t pmd;
>>
>> + pte_p = ppd->pgtable_area;
>> + memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
>> + ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
>> +
>> + pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
>> + native_set_pmd(pmd_p, pmd);
>> + }
>> +
>> + pte_p += pte_index(ppd->vaddr);
>> + if (!native_pte_val(*pte_p))
>> + native_set_pte(pte_p,
>> + native_make_pte(ppd->paddr | ppd->pte_flags));
>
> Ditto.
And this one will be fixed here.
Thanks,
Tom
>