2017-12-07 23:33:50

by Tom Lendacky

[permalink] [raw]
Subject: [PATCH v1 0/3] x86: SME: BSP/SME microcode update fix

This patch series addresses an issue when SME is active and the BSP
is attempting to check for and load microcode during load_ucode_bsp().
Since the initrd has not been decrypted (yet) and the virtual address
of the initrd treats the memory as encrypted, the CPIO archive parsing
fails to locate the microcode.

This series moves the encryption of the initrd into the early boot code
and encrypts it at the same time that the kernel is encrypted. Since
the initrd is now encrypted, the CPIO archive parsing succeeds in
properly locating the microcode.

The following patches are included in this fix:
- Centralize the use of the PMD flags used in sme_encrypt_kernel() in
preparation for using PTE flags also.
- Prepare sme_encrypt_kernel() to handle PAGE aligned encryption, not
just 2MB large page aligned encryption.
- Encrypt the initrd in sme_encrypt_kernel() when the kernel is being
encrypted.

This patch series is based on tip/master.

---

Tom Lendacky (3):
x86/mm: Centralize PMD flags in sme_encrypt_kernel()
x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption
x86/mm: Encrypt the initrd earlier for BSP microcode update


arch/x86/include/asm/mem_encrypt.h | 4 -
arch/x86/kernel/head64.c | 4 -
arch/x86/kernel/setup.c | 10 -
arch/x86/mm/mem_encrypt.c | 264 +++++++++++++++++++++++++++---------
arch/x86/mm/mem_encrypt_boot.S | 66 +++++----
5 files changed, 243 insertions(+), 105 deletions(-)

--
Tom Lendacky


2017-12-07 23:34:00

by Tom Lendacky

[permalink] [raw]
Subject: [PATCH v1 1/3] x86/mm: Centralize PMD flags in sme_encrypt_kernel()

In preparation for encrypting more than just the kernel during early
boot processing, centralize the use of the PMD flag settings based
on the type of mapping desired. When 4KB aligned encryption is added,
this will allow either PTE flags or large page PMD flags to be used
without requiring the caller to adjust.

Signed-off-by: Tom Lendacky <[email protected]>
---
arch/x86/mm/mem_encrypt.c | 109 +++++++++++++++++++++++++--------------------
1 file changed, 60 insertions(+), 49 deletions(-)

diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d9a9e9f..2d8404b 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -464,6 +464,8 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
}

+static void *pgtable_area;
+
static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
unsigned long end)
{
@@ -484,10 +486,16 @@ static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
#define PGD_FLAGS _KERNPG_TABLE_NOENC
#define P4D_FLAGS _KERNPG_TABLE_NOENC
#define PUD_FLAGS _KERNPG_TABLE_NOENC
-#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)

-static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
- unsigned long vaddr, pmdval_t pmd_val)
+#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
+#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
+
+static void __init sme_populate_pgd(pgd_t *pgd_base, unsigned long vaddr,
+ unsigned long paddr, pmdval_t pmd_flags)
{
pgd_t *pgd_p;
p4d_t *p4d_p;
@@ -538,7 +546,7 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
pud_p += pud_index(vaddr);
if (native_pud_val(*pud_p)) {
if (native_pud_val(*pud_p) & _PAGE_PSE)
- goto out;
+ return;

pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
} else {
@@ -554,10 +562,43 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,

pmd_p += pmd_index(vaddr);
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
- native_set_pmd(pmd_p, native_make_pmd(pmd_val));
+ native_set_pmd(pmd_p, native_make_pmd(paddr | pmd_flags));
+}

-out:
- return pgtable_area;
+static void __init __sme_map_range(pgd_t *pgd, unsigned long vaddr,
+ unsigned long vaddr_end,
+ unsigned long paddr, pmdval_t pmd_flags)
+{
+ while (vaddr < vaddr_end) {
+ sme_populate_pgd(pgd, vaddr, paddr, pmd_flags);
+
+ vaddr += PMD_PAGE_SIZE;
+ paddr += PMD_PAGE_SIZE;
+ }
+}
+
+static void __init sme_map_range_encrypted(pgd_t *pgd,
+ unsigned long vaddr,
+ unsigned long vaddr_end,
+ unsigned long paddr)
+{
+ __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_ENC);
+}
+
+static void __init sme_map_range_decrypted(pgd_t *pgd,
+ unsigned long vaddr,
+ unsigned long vaddr_end,
+ unsigned long paddr)
+{
+ __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_DEC);
+}
+
+static void __init sme_map_range_decrypted_wp(pgd_t *pgd,
+ unsigned long vaddr,
+ unsigned long vaddr_end,
+ unsigned long paddr)
+{
+ __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_DEC_WP);
}

static unsigned long __init sme_pgtable_calc(unsigned long len)
@@ -616,9 +657,7 @@ void __init sme_encrypt_kernel(void)
unsigned long execute_start, execute_end, execute_len;
unsigned long kernel_start, kernel_end, kernel_len;
unsigned long pgtable_area_len;
- unsigned long paddr, pmd_flags;
unsigned long decrypted_base;
- void *pgtable_area;
pgd_t *pgd;

if (!sme_active())
@@ -690,14 +729,8 @@ void __init sme_encrypt_kernel(void)
* addressing the workarea.
*/
pgd = (pgd_t *)native_read_cr3_pa();
- paddr = workarea_start;
- while (paddr < workarea_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + PMD_FLAGS);
-
- paddr += PMD_PAGE_SIZE;
- }
+ sme_map_range_decrypted(pgd, workarea_start, workarea_end,
+ workarea_start);

/* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3());
@@ -712,17 +745,6 @@ void __init sme_encrypt_kernel(void)
memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;

- /* Add encrypted kernel (identity) mappings */
- pmd_flags = PMD_FLAGS | _PAGE_ENC;
- paddr = kernel_start;
- while (paddr < kernel_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + pmd_flags);
-
- paddr += PMD_PAGE_SIZE;
- }
-
/*
* A different PGD index/entry must be used to get different
* pagetable entries for the decrypted mapping. Choose the next
@@ -732,30 +754,19 @@ void __init sme_encrypt_kernel(void)
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
decrypted_base <<= PGDIR_SHIFT;

- /* Add decrypted, write-protected kernel (non-identity) mappings */
- pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
- paddr = kernel_start;
- while (paddr < kernel_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr + decrypted_base,
- paddr + pmd_flags);
+ /* Add encrypted kernel (identity) mappings */
+ sme_map_range_encrypted(pgd, kernel_start, kernel_end, kernel_start);

- paddr += PMD_PAGE_SIZE;
- }
+ /* Add decrypted, write-protected kernel (non-identity) mappings */
+ sme_map_range_decrypted_wp(pgd, kernel_start + decrypted_base,
+ kernel_end + decrypted_base, kernel_start);

/* Add decrypted workarea mappings to both kernel mappings */
- paddr = workarea_start;
- while (paddr < workarea_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + PMD_FLAGS);
-
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr + decrypted_base,
- paddr + PMD_FLAGS);
-
- paddr += PMD_PAGE_SIZE;
- }
+ sme_map_range_decrypted(pgd, workarea_start, workarea_end,
+ workarea_start);
+ sme_map_range_decrypted(pgd, workarea_start + decrypted_base,
+ workarea_end + decrypted_base,
+ workarea_start);

/* Perform the encryption */
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,

2017-12-07 23:34:11

by Tom Lendacky

[permalink] [raw]
Subject: [PATCH v1 2/3] x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption

In preparation for encrypting more than just the kernel, the encryption
support in sme_encrypt_kernel() needs to support 4KB page aligned
encryption instead of just 2MB large page aligned encryption.

Update the routines that populate the PGD to support non-2MB aligned
addresses. This is done by creating PTE page tables for the start
and end portion of the address range that fall outside of the 2MB
alignment. This results in, at most, two extra pages to hold the
PTE entries for each mapping of a range.

Signed-off-by: Tom Lendacky <[email protected]>
---
arch/x86/mm/mem_encrypt.c | 115 ++++++++++++++++++++++++++++++++++------
arch/x86/mm/mem_encrypt_boot.S | 20 +++++--
2 files changed, 114 insertions(+), 21 deletions(-)

diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 2d8404b..1f0efb8 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -486,6 +486,7 @@ static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
#define PGD_FLAGS _KERNPG_TABLE_NOENC
#define P4D_FLAGS _KERNPG_TABLE_NOENC
#define PUD_FLAGS _KERNPG_TABLE_NOENC
+#define PMD_FLAGS _KERNPG_TABLE_NOENC

#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)

@@ -494,8 +495,14 @@ static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
(_PAGE_PAT | _PAGE_PWT))
#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)

-static void __init sme_populate_pgd(pgd_t *pgd_base, unsigned long vaddr,
- unsigned long paddr, pmdval_t pmd_flags)
+#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
+
+#define PTE_FLAGS_DEC PTE_FLAGS
+#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
+
+static pmd_t __init *sme_prepare_pgd(pgd_t *pgd_base, unsigned long vaddr)
{
pgd_t *pgd_p;
p4d_t *p4d_p;
@@ -546,7 +553,7 @@ static void __init sme_populate_pgd(pgd_t *pgd_base, unsigned long vaddr,
pud_p += pud_index(vaddr);
if (native_pud_val(*pud_p)) {
if (native_pud_val(*pud_p) & _PAGE_PSE)
- return;
+ return NULL;

pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
} else {
@@ -560,21 +567,90 @@ static void __init sme_populate_pgd(pgd_t *pgd_base, unsigned long vaddr,
native_set_pud(pud_p, pud);
}

+ return pmd_p;
+}
+
+static void __init sme_populate_pgd_large(pgd_t *pgd, unsigned long vaddr,
+ unsigned long paddr,
+ pmdval_t pmd_flags)
+{
+ pmd_t *pmd_p;
+
+ pmd_p = sme_prepare_pgd(pgd, vaddr);
+ if (!pmd_p)
+ return;
+
pmd_p += pmd_index(vaddr);
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
native_set_pmd(pmd_p, native_make_pmd(paddr | pmd_flags));
}

+static void __init sme_populate_pgd(pgd_t *pgd, unsigned long vaddr,
+ unsigned long paddr,
+ pteval_t pte_flags)
+{
+ pmd_t *pmd_p;
+ pte_t *pte_p;
+
+ pmd_p = sme_prepare_pgd(pgd, vaddr);
+ if (!pmd_p)
+ return;
+
+ pmd_p += pmd_index(vaddr);
+ if (native_pmd_val(*pmd_p)) {
+ if (native_pmd_val(*pmd_p) & _PAGE_PSE)
+ return;
+
+ pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
+ } else {
+ pmd_t pmd;
+
+ pte_p = pgtable_area;
+ memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
+ pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
+
+ pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
+ native_set_pmd(pmd_p, pmd);
+ }
+
+ pte_p += pte_index(vaddr);
+ if (!native_pte_val(*pte_p))
+ native_set_pte(pte_p, native_make_pte(paddr | pte_flags));
+}
+
static void __init __sme_map_range(pgd_t *pgd, unsigned long vaddr,
unsigned long vaddr_end,
- unsigned long paddr, pmdval_t pmd_flags)
+ unsigned long paddr,
+ pmdval_t pmd_flags, pteval_t pte_flags)
{
- while (vaddr < vaddr_end) {
- sme_populate_pgd(pgd, vaddr, paddr, pmd_flags);
+ if (vaddr & ~PMD_PAGE_MASK) {
+ /* Start is not 2MB aligned, create PTE entries */
+ unsigned long pmd_start = ALIGN(vaddr, PMD_PAGE_SIZE);
+
+ while (vaddr < pmd_start) {
+ sme_populate_pgd(pgd, vaddr, paddr, pte_flags);
+
+ vaddr += PAGE_SIZE;
+ paddr += PAGE_SIZE;
+ }
+ }
+
+ while (vaddr < (vaddr_end & PMD_PAGE_MASK)) {
+ sme_populate_pgd_large(pgd, vaddr, paddr, pmd_flags);

vaddr += PMD_PAGE_SIZE;
paddr += PMD_PAGE_SIZE;
}
+
+ if (vaddr_end & ~PMD_PAGE_MASK) {
+ /* End is not 2MB aligned, create PTE entries */
+ while (vaddr < vaddr_end) {
+ sme_populate_pgd(pgd, vaddr, paddr, pte_flags);
+
+ vaddr += PAGE_SIZE;
+ paddr += PAGE_SIZE;
+ }
+ }
}

static void __init sme_map_range_encrypted(pgd_t *pgd,
@@ -582,7 +658,8 @@ static void __init sme_map_range_encrypted(pgd_t *pgd,
unsigned long vaddr_end,
unsigned long paddr)
{
- __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_ENC);
+ __sme_map_range(pgd, vaddr, vaddr_end, paddr,
+ PMD_FLAGS_ENC, PTE_FLAGS_ENC);
}

static void __init sme_map_range_decrypted(pgd_t *pgd,
@@ -590,7 +667,8 @@ static void __init sme_map_range_decrypted(pgd_t *pgd,
unsigned long vaddr_end,
unsigned long paddr)
{
- __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_DEC);
+ __sme_map_range(pgd, vaddr, vaddr_end, paddr,
+ PMD_FLAGS_DEC, PTE_FLAGS_DEC);
}

static void __init sme_map_range_decrypted_wp(pgd_t *pgd,
@@ -598,19 +676,20 @@ static void __init sme_map_range_decrypted_wp(pgd_t *pgd,
unsigned long vaddr_end,
unsigned long paddr)
{
- __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_DEC_WP);
+ __sme_map_range(pgd, vaddr, vaddr_end, paddr,
+ PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
}

static unsigned long __init sme_pgtable_calc(unsigned long len)
{
- unsigned long p4d_size, pud_size, pmd_size;
+ unsigned long p4d_size, pud_size, pmd_size, pte_size;
unsigned long total;

/*
* Perform a relatively simplistic calculation of the pagetable
- * entries that are needed. That mappings will be covered by 2MB
- * PMD entries so we can conservatively calculate the required
- * number of P4D, PUD and PMD structures needed to perform the
+ * entries that are needed. That mappings will be covered mostly
+ * by 2MB PMD entries so we can conservatively calculate the required
+ * number of P4D, PUD, PMD and PTE structures needed to perform the
* mappings. Incrementing the count for each covers the case where
* the addresses cross entries.
*/
@@ -626,8 +705,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
}
pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+ pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;

- total = p4d_size + pud_size + pmd_size;
+ total = p4d_size + pud_size + pmd_size + pte_size;

/*
* Now calculate the added pagetable structures needed to populate
@@ -710,10 +790,13 @@ void __init sme_encrypt_kernel(void)

/*
* The total workarea includes the executable encryption area and
- * the pagetable area.
+ * the pagetable area. The start of the workarea is already 2MB
+ * aligned, align the end of the workarea on a 2MB boundary so that
+ * we don't try to create/allocate PTE entries from the workarea
+ * before it is mapped.
*/
workarea_len = execute_len + pgtable_area_len;
- workarea_end = workarea_start + workarea_len;
+ workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);

/*
* Set the address to the start of where newly created pagetable
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 730e6d5..20cca86 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -120,23 +120,33 @@ ENTRY(__enc_copy)

wbinvd /* Invalidate any cache entries */

+ push %r12
+
/* Copy/encrypt 2MB at a time */
+ movq $PMD_PAGE_SIZE, %r12
1:
+ cmpq %r12, %r9
+ jnb 2f
+ movq %r9, %r12
+
+2:
movq %r11, %rsi /* Source - decrypted kernel */
movq %r8, %rdi /* Dest - intermediate copy buffer */
- movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
+ movq %r12, %rcx
rep movsb

movq %r8, %rsi /* Source - intermediate copy buffer */
movq %r10, %rdi /* Dest - encrypted kernel */
- movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
+ movq %r12, %rcx
rep movsb

- addq $PMD_PAGE_SIZE, %r11
- addq $PMD_PAGE_SIZE, %r10
- subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */
+ addq %r12, %r11
+ addq %r12, %r10
+ subq %r12, %r9 /* Kernel length decrement */
jnz 1b /* Kernel length not zero? */

+ pop %r12
+
/* Restore PAT register */
push %rdx /* Save original PAT value */
movl $MSR_IA32_CR_PAT, %ecx

2017-12-07 23:34:21

by Tom Lendacky

[permalink] [raw]
Subject: [PATCH v1 3/3] x86/mm: Encrypt the initrd earlier for BSP microcode update

Currently the BSP microcode update code examines the initrd very early
in the boot process. If SME is active, the initrd is treated as being
encrypted but it has not been encrypted (in place) yet. Update the
early boot code that encrypts the kernel to also encrypt the initrd so
that early BSP microcode updates work.

Signed-off-by: Tom Lendacky <[email protected]>
---
arch/x86/include/asm/mem_encrypt.h | 4 +-
arch/x86/kernel/head64.c | 4 +-
arch/x86/kernel/setup.c | 10 ------
arch/x86/mm/mem_encrypt.c | 62 +++++++++++++++++++++++++++++++-----
arch/x86/mm/mem_encrypt_boot.S | 46 +++++++++++++--------------
5 files changed, 80 insertions(+), 46 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c9459a4..22c5f3e 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -39,7 +39,7 @@ void __init sme_early_decrypt(resource_size_t paddr,

void __init sme_early_init(void);

-void __init sme_encrypt_kernel(void);
+void __init sme_encrypt_kernel(struct boot_params *bp);
void __init sme_enable(struct boot_params *bp);

int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }

static inline void __init sme_early_init(void) { }

-static inline void __init sme_encrypt_kernel(void) { }
+static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
static inline void __init sme_enable(struct boot_params *bp) { }

static inline bool sme_active(void) { return false; }
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6a5d757..7ba5d81 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
p = fixup_pointer(&phys_base, physaddr);
*p += load_delta - sme_get_me_mask();

- /* Encrypt the kernel (if SME is active) */
- sme_encrypt_kernel();
+ /* Encrypt the kernel and related (if SME is active) */
+ sme_encrypt_kernel(bp);

/*
* Return the SME encryption mask (if SME is active) to be used as a
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 8af2e8d..8f2af5d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -364,16 +364,6 @@ static void __init reserve_initrd(void)
!ramdisk_image || !ramdisk_size)
return; /* No initrd provided by bootloader */

- /*
- * If SME is active, this memory will be marked encrypted by the
- * kernel when it is accessed (including relocation). However, the
- * ramdisk image was loaded decrypted by the bootloader, so make
- * sure that it is encrypted before accessing it. For SEV the
- * ramdisk will already be encrypted, so only do this for SME.
- */
- if (sme_active())
- sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
-
initrd_start = 0;

mapped_size = memblock_mem_size(max_pfn_mapped);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1f0efb8..60df247 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -731,11 +731,12 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
return total;
}

-void __init sme_encrypt_kernel(void)
+void __init sme_encrypt_kernel(struct boot_params *bp)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
unsigned long kernel_start, kernel_end, kernel_len;
+ unsigned long initrd_start, initrd_end, initrd_len;
unsigned long pgtable_area_len;
unsigned long decrypted_base;
pgd_t *pgd;
@@ -744,14 +745,15 @@ void __init sme_encrypt_kernel(void)
return;

/*
- * Prepare for encrypting the kernel by building new pagetables with
- * the necessary attributes needed to encrypt the kernel in place.
+ * Prepare for encrypting the kernel and initrd by building new
+ * pagetables with the necessary attributes needed to encrypt the
+ * kernel in place.
*
* One range of virtual addresses will map the memory occupied
- * by the kernel as encrypted.
+ * by the kernel and initrd as encrypted.
*
* Another range of virtual addresses will map the memory occupied
- * by the kernel as decrypted and write-protected.
+ * by the kernel and initrd as decrypted and write-protected.
*
* The use of write-protect attribute will prevent any of the
* memory from being cached.
@@ -762,6 +764,20 @@ void __init sme_encrypt_kernel(void)
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
kernel_len = kernel_end - kernel_start;

+ initrd_start = 0;
+ initrd_end = 0;
+ initrd_len = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+ initrd_len = (unsigned long)bp->hdr.ramdisk_size |
+ ((unsigned long)bp->ext_ramdisk_size << 32);
+ if (initrd_len) {
+ initrd_start = (unsigned long)bp->hdr.ramdisk_image |
+ ((unsigned long)bp->ext_ramdisk_image << 32);
+ initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
+ initrd_len = initrd_end - initrd_start;
+ }
+#endif
+
/* Set the encryption workarea to be immediately after the kernel */
workarea_start = kernel_end;

@@ -784,6 +800,8 @@ void __init sme_encrypt_kernel(void)
*/
pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+ if (initrd_len)
+ pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;

/* PUDs and PMDs needed in the current pagetables for the workarea */
pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
@@ -820,9 +838,9 @@ void __init sme_encrypt_kernel(void)

/*
* A new pagetable structure is being built to allow for the kernel
- * to be encrypted. It starts with an empty PGD that will then be
- * populated with new PUDs and PMDs as the encrypted and decrypted
- * kernel mappings are created.
+ * and initrd to be encrypted. It starts with an empty PGD that will
+ * then be populated with new PUDs and PMDs as the encrypted and
+ * decrypted kernel mappings are created.
*/
pgd = pgtable_area;
memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
@@ -835,6 +853,12 @@ void __init sme_encrypt_kernel(void)
* the base of the mapping.
*/
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+ if (initrd_len) {
+ unsigned long check_base;
+
+ check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
+ decrypted_base = max(decrypted_base, check_base);
+ }
decrypted_base <<= PGDIR_SHIFT;

/* Add encrypted kernel (identity) mappings */
@@ -844,7 +868,19 @@ void __init sme_encrypt_kernel(void)
sme_map_range_decrypted_wp(pgd, kernel_start + decrypted_base,
kernel_end + decrypted_base, kernel_start);

- /* Add decrypted workarea mappings to both kernel mappings */
+ if (initrd_len) {
+ /* Add encrypted initrd (identity) mappings */
+ sme_map_range_encrypted(pgd, initrd_start, initrd_end,
+ initrd_start);
+ /*
+ * Add decrypted, write-protected initrd (non-identity) mappings
+ */
+ sme_map_range_decrypted_wp(pgd, initrd_start + decrypted_base,
+ initrd_end + decrypted_base,
+ initrd_start);
+ }
+
+ /* Add decrypted workarea mappings to both mappings */
sme_map_range_decrypted(pgd, workarea_start, workarea_end,
workarea_start);
sme_map_range_decrypted(pgd, workarea_start + decrypted_base,
@@ -854,6 +890,10 @@ void __init sme_encrypt_kernel(void)
/* Perform the encryption */
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
kernel_len, workarea_start, (unsigned long)pgd);
+ if (initrd_len)
+ sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
+ initrd_len, workarea_start,
+ (unsigned long)pgd);

/*
* At this point we are running encrypted. Remove the mappings for
@@ -863,6 +903,10 @@ void __init sme_encrypt_kernel(void)
sme_clear_pgd(pgd, kernel_start + decrypted_base,
kernel_end + decrypted_base);

+ if (initrd_len)
+ sme_clear_pgd(pgd, initrd_start + decrypted_base,
+ initrd_end + decrypted_base);
+
sme_clear_pgd(pgd, workarea_start + decrypted_base,
workarea_end + decrypted_base);

diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 20cca86..6132015 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute)

/*
* Entry parameters:
- * RDI - virtual address for the encrypted kernel mapping
- * RSI - virtual address for the decrypted kernel mapping
- * RDX - length of kernel
+ * RDI - virtual address for the encrypted mapping
+ * RSI - virtual address for the decrypted mapping
+ * RDX - length to encrypt
* RCX - virtual address of the encryption workarea, including:
* - stack page (PAGE_SIZE)
* - encryption routine page (PAGE_SIZE)
@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute)
addq $PAGE_SIZE, %rax /* Workarea encryption routine */

push %r12
- movq %rdi, %r10 /* Encrypted kernel */
- movq %rsi, %r11 /* Decrypted kernel */
- movq %rdx, %r12 /* Kernel length */
+ movq %rdi, %r10 /* Encrypted area */
+ movq %rsi, %r11 /* Decrypted area */
+ movq %rdx, %r12 /* Area length */

/* Copy encryption routine into the workarea */
movq %rax, %rdi /* Workarea encryption routine */
@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute)
rep movsb

/* Setup registers for call */
- movq %r10, %rdi /* Encrypted kernel */
- movq %r11, %rsi /* Decrypted kernel */
+ movq %r10, %rdi /* Encrypted area */
+ movq %r11, %rsi /* Decrypted area */
movq %r8, %rdx /* Pagetables used for encryption */
- movq %r12, %rcx /* Kernel length */
+ movq %r12, %rcx /* Area length */
movq %rax, %r8 /* Workarea encryption routine */
addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */

@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute)

ENTRY(__enc_copy)
/*
- * Routine used to encrypt kernel.
+ * Routine used to encrypt memory in place.
* This routine must be run outside of the kernel proper since
* the kernel will be encrypted during the process. So this
* routine is defined here and then copied to an area outside
@@ -79,19 +79,19 @@ ENTRY(__enc_copy)
* during execution.
*
* On entry the registers must be:
- * RDI - virtual address for the encrypted kernel mapping
- * RSI - virtual address for the decrypted kernel mapping
+ * RDI - virtual address for the encrypted mapping
+ * RSI - virtual address for the decrypted mapping
* RDX - address of the pagetables to use for encryption
- * RCX - length of kernel
+ * RCX - length of area
* R8 - intermediate copy buffer
*
* RAX - points to this routine
*
- * The kernel will be encrypted by copying from the non-encrypted
- * kernel space to an intermediate buffer and then copying from the
- * intermediate buffer back to the encrypted kernel space. The physical
- * addresses of the two kernel space mappings are the same which
- * results in the kernel being encrypted "in place".
+ * The area will be encrypted by copying from the non-encrypted
+ * memory space to an intermediate buffer and then copying from the
+ * intermediate buffer back to the encrypted memory space. The physical
+ * addresses of the two mappings are the same which results in the area
+ * being encrypted "in place".
*/
/* Enable the new page tables */
mov %rdx, %cr3
@@ -114,9 +114,9 @@ ENTRY(__enc_copy)
pop %rdx /* RDX contains original PAT value */
pop %rcx

- movq %rcx, %r9 /* Save kernel length */
- movq %rdi, %r10 /* Save encrypted kernel address */
- movq %rsi, %r11 /* Save decrypted kernel address */
+ movq %rcx, %r9 /* Save area length */
+ movq %rdi, %r10 /* Save encrypted area address */
+ movq %rsi, %r11 /* Save decrypted area address */

wbinvd /* Invalidate any cache entries */

@@ -130,13 +130,13 @@ ENTRY(__enc_copy)
movq %r9, %r12

2:
- movq %r11, %rsi /* Source - decrypted kernel */
+ movq %r11, %rsi /* Source - decrypted area */
movq %r8, %rdi /* Dest - intermediate copy buffer */
movq %r12, %rcx
rep movsb

movq %r8, %rsi /* Source - intermediate copy buffer */
- movq %r10, %rdi /* Dest - encrypted kernel */
+ movq %r10, %rdi /* Dest - encrypted area */
movq %r12, %rcx
rep movsb


2017-12-20 19:13:14

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v1 1/3] x86/mm: Centralize PMD flags in sme_encrypt_kernel()

On Thu, Dec 07, 2017 at 05:33:52PM -0600, Tom Lendacky wrote:
> In preparation for encrypting more than just the kernel during early
> boot processing, centralize the use of the PMD flag settings based
> on the type of mapping desired. When 4KB aligned encryption is added,
> this will allow either PTE flags or large page PMD flags to be used
> without requiring the caller to adjust.
>
> Signed-off-by: Tom Lendacky <[email protected]>
> ---
> arch/x86/mm/mem_encrypt.c | 109 +++++++++++++++++++++++++--------------------
> 1 file changed, 60 insertions(+), 49 deletions(-)
>
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index d9a9e9f..2d8404b 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -464,6 +464,8 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
> set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
> }
>
> +static void *pgtable_area;

Ewww, a global variable which gets manipulated by functions. Can we not
do that pls?

sme_populate_pgd() used to return it. Why change that?

> +
> static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
> unsigned long end)
> {
> @@ -484,10 +486,16 @@ static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
> #define PGD_FLAGS _KERNPG_TABLE_NOENC
> #define P4D_FLAGS _KERNPG_TABLE_NOENC
> #define PUD_FLAGS _KERNPG_TABLE_NOENC
> -#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
>
> -static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
> - unsigned long vaddr, pmdval_t pmd_val)
> +#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
> +
> +#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
> +#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
> + (_PAGE_PAT | _PAGE_PWT))
> +#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)

Align vertically.

Rest looks ok.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2017-12-20 19:59:28

by Tom Lendacky

[permalink] [raw]
Subject: Re: [PATCH v1 1/3] x86/mm: Centralize PMD flags in sme_encrypt_kernel()

On 12/20/2017 1:13 PM, Borislav Petkov wrote:
> On Thu, Dec 07, 2017 at 05:33:52PM -0600, Tom Lendacky wrote:
>> In preparation for encrypting more than just the kernel during early
>> boot processing, centralize the use of the PMD flag settings based
>> on the type of mapping desired. When 4KB aligned encryption is added,
>> this will allow either PTE flags or large page PMD flags to be used
>> without requiring the caller to adjust.
>>
>> Signed-off-by: Tom Lendacky <[email protected]>
>> ---
>> arch/x86/mm/mem_encrypt.c | 109 +++++++++++++++++++++++++--------------------
>> 1 file changed, 60 insertions(+), 49 deletions(-)
>>
>> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
>> index d9a9e9f..2d8404b 100644
>> --- a/arch/x86/mm/mem_encrypt.c
>> +++ b/arch/x86/mm/mem_encrypt.c
>> @@ -464,6 +464,8 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
>> set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
>> }
>>
>> +static void *pgtable_area;
>
> Ewww, a global variable which gets manipulated by functions. Can we not
> do that pls?
>
> sme_populate_pgd() used to return it. Why change that?

It was starting to get pretty hairy with all the parameters and what was
needed to be returned when the second patch was introduced. I'll look at
what I can do to avoid the global, maybe pass in the address of the
variable for updating within the function or combining the parameters into
a struct.

>
>> +
>> static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
>> unsigned long end)
>> {
>> @@ -484,10 +486,16 @@ static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
>> #define PGD_FLAGS _KERNPG_TABLE_NOENC
>> #define P4D_FLAGS _KERNPG_TABLE_NOENC
>> #define PUD_FLAGS _KERNPG_TABLE_NOENC
>> -#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
>>
>> -static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
>> - unsigned long vaddr, pmdval_t pmd_val)
>> +#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
>> +
>> +#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
>> +#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
>> + (_PAGE_PAT | _PAGE_PWT))
>> +#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
>
> Align vertically.

Ok

Thanks,
Tom

>
> Rest looks ok.
>

2017-12-21 12:58:14

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption

On Thu, Dec 07, 2017 at 05:34:02PM -0600, Tom Lendacky wrote:
> In preparation for encrypting more than just the kernel, the encryption
> support in sme_encrypt_kernel() needs to support 4KB page aligned
> encryption instead of just 2MB large page aligned encryption.

...

> static void __init __sme_map_range(pgd_t *pgd, unsigned long vaddr,
> unsigned long vaddr_end,
> - unsigned long paddr, pmdval_t pmd_flags)
> + unsigned long paddr,
> + pmdval_t pmd_flags, pteval_t pte_flags)
> {
> - while (vaddr < vaddr_end) {
> - sme_populate_pgd(pgd, vaddr, paddr, pmd_flags);
> + if (vaddr & ~PMD_PAGE_MASK) {
> + /* Start is not 2MB aligned, create PTE entries */
> + unsigned long pmd_start = ALIGN(vaddr, PMD_PAGE_SIZE);
> +
> + while (vaddr < pmd_start) {
> + sme_populate_pgd(pgd, vaddr, paddr, pte_flags);
> +
> + vaddr += PAGE_SIZE;
> + paddr += PAGE_SIZE;
> + }
> + }

This chunk...

> +
> + while (vaddr < (vaddr_end & PMD_PAGE_MASK)) {
> + sme_populate_pgd_large(pgd, vaddr, paddr, pmd_flags);
>
> vaddr += PMD_PAGE_SIZE;
> paddr += PMD_PAGE_SIZE;
> }
> +
> + if (vaddr_end & ~PMD_PAGE_MASK) {
> + /* End is not 2MB aligned, create PTE entries */
> + while (vaddr < vaddr_end) {
> + sme_populate_pgd(pgd, vaddr, paddr, pte_flags);
> +
> + vaddr += PAGE_SIZE;
> + paddr += PAGE_SIZE;
> + }
> + }

... and this chunk look like could be extracted in a wrapper as they're
pretty similar.

> static void __init sme_map_range_encrypted(pgd_t *pgd,
> @@ -582,7 +658,8 @@ static void __init sme_map_range_encrypted(pgd_t *pgd,
> unsigned long vaddr_end,
> unsigned long paddr)
> {
> - __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_ENC);
> + __sme_map_range(pgd, vaddr, vaddr_end, paddr,
> + PMD_FLAGS_ENC, PTE_FLAGS_ENC);
> }
>
> static void __init sme_map_range_decrypted(pgd_t *pgd,
> @@ -590,7 +667,8 @@ static void __init sme_map_range_decrypted(pgd_t *pgd,
> unsigned long vaddr_end,
> unsigned long paddr)
> {
> - __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_DEC);
> + __sme_map_range(pgd, vaddr, vaddr_end, paddr,
> + PMD_FLAGS_DEC, PTE_FLAGS_DEC);
> }
>
> static void __init sme_map_range_decrypted_wp(pgd_t *pgd,
> @@ -598,19 +676,20 @@ static void __init sme_map_range_decrypted_wp(pgd_t *pgd,
> unsigned long vaddr_end,
> unsigned long paddr)
> {
> - __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_DEC_WP);
> + __sme_map_range(pgd, vaddr, vaddr_end, paddr,
> + PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
> }
>
> static unsigned long __init sme_pgtable_calc(unsigned long len)
> {
> - unsigned long p4d_size, pud_size, pmd_size;
> + unsigned long p4d_size, pud_size, pmd_size, pte_size;
> unsigned long total;
>
> /*
> * Perform a relatively simplistic calculation of the pagetable
> - * entries that are needed. That mappings will be covered by 2MB
> - * PMD entries so we can conservatively calculate the required
> - * number of P4D, PUD and PMD structures needed to perform the
> + * entries that are needed. That mappings will be covered mostly

Those

> + * by 2MB PMD entries so we can conservatively calculate the required
> + * number of P4D, PUD, PMD and PTE structures needed to perform the
> * mappings. Incrementing the count for each covers the case where
> * the addresses cross entries.
> */
> @@ -626,8 +705,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
> }
> pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
> pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
> + pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;

This needs the explanation from the commit message about the 2 extra
pages, as a comment above it.

> - total = p4d_size + pud_size + pmd_size;
> + total = p4d_size + pud_size + pmd_size + pte_size;
>
> /*
> * Now calculate the added pagetable structures needed to populate
> @@ -710,10 +790,13 @@ void __init sme_encrypt_kernel(void)
>
> /*
> * The total workarea includes the executable encryption area and
> - * the pagetable area.
> + * the pagetable area. The start of the workarea is already 2MB
> + * aligned, align the end of the workarea on a 2MB boundary so that
> + * we don't try to create/allocate PTE entries from the workarea
> + * before it is mapped.
> */
> workarea_len = execute_len + pgtable_area_len;
> - workarea_end = workarea_start + workarea_len;
> + workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
>
> /*
> * Set the address to the start of where newly created pagetable
> diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
> index 730e6d5..20cca86 100644
> --- a/arch/x86/mm/mem_encrypt_boot.S
> +++ b/arch/x86/mm/mem_encrypt_boot.S
> @@ -120,23 +120,33 @@ ENTRY(__enc_copy)
>
> wbinvd /* Invalidate any cache entries */
>
> + push %r12
> +
> /* Copy/encrypt 2MB at a time */
> + movq $PMD_PAGE_SIZE, %r12
> 1:
> + cmpq %r12, %r9
> + jnb 2f
> + movq %r9, %r12
> +
> +2:
> movq %r11, %rsi /* Source - decrypted kernel */
> movq %r8, %rdi /* Dest - intermediate copy buffer */
> - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
> + movq %r12, %rcx
> rep movsb
>
> movq %r8, %rsi /* Source - intermediate copy buffer */
> movq %r10, %rdi /* Dest - encrypted kernel */
> - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
> + movq %r12, %rcx
> rep movsb
>
> - addq $PMD_PAGE_SIZE, %r11
> - addq $PMD_PAGE_SIZE, %r10
> - subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */
> + addq %r12, %r11
> + addq %r12, %r10
> + subq %r12, %r9 /* Kernel length decrement */
> jnz 1b /* Kernel length not zero? */
>
> + pop %r12
> +
> /* Restore PAT register */
> push %rdx /* Save original PAT value */
> movl $MSR_IA32_CR_PAT, %ecx

Right, for this here can you pls do a cleanup pre-patch which does

push
push
push

/* meat of the function */

pop
pop
pop

as now those pushes and pops are interspersed with the rest of the insns
and that makes following through it harder. F17h has a stack engine so
those streams of pushes and pops won't hurt perf and besides, this is
not even perf-sensitive.

Thx.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2017-12-21 14:49:38

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v1 3/3] x86/mm: Encrypt the initrd earlier for BSP microcode update

On Thu, Dec 07, 2017 at 05:34:10PM -0600, Tom Lendacky wrote:
> Currently the BSP microcode update code examines the initrd very early
> in the boot process. If SME is active, the initrd is treated as being
> encrypted but it has not been encrypted (in place) yet. Update the
> early boot code that encrypts the kernel to also encrypt the initrd so
> that early BSP microcode updates work.

...

> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index 1f0efb8..60df247 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -731,11 +731,12 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
> return total;
> }
>
> -void __init sme_encrypt_kernel(void)
> +void __init sme_encrypt_kernel(struct boot_params *bp)
> {
> unsigned long workarea_start, workarea_end, workarea_len;
> unsigned long execute_start, execute_end, execute_len;
> unsigned long kernel_start, kernel_end, kernel_len;
> + unsigned long initrd_start, initrd_end, initrd_len;
> unsigned long pgtable_area_len;
> unsigned long decrypted_base;
> pgd_t *pgd;
> @@ -744,14 +745,15 @@ void __init sme_encrypt_kernel(void)
> return;
>
> /*
> - * Prepare for encrypting the kernel by building new pagetables with
> - * the necessary attributes needed to encrypt the kernel in place.
> + * Prepare for encrypting the kernel and initrd by building new
> + * pagetables with the necessary attributes needed to encrypt the
> + * kernel in place.
> *
> * One range of virtual addresses will map the memory occupied
> - * by the kernel as encrypted.
> + * by the kernel and initrd as encrypted.
> *
> * Another range of virtual addresses will map the memory occupied
> - * by the kernel as decrypted and write-protected.
> + * by the kernel and initrd as decrypted and write-protected.
> *
> * The use of write-protect attribute will prevent any of the
> * memory from being cached.
> @@ -762,6 +764,20 @@ void __init sme_encrypt_kernel(void)
> kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
> kernel_len = kernel_end - kernel_start;
>
> + initrd_start = 0;
> + initrd_end = 0;
> + initrd_len = 0;
> +#ifdef CONFIG_BLK_DEV_INITRD
> + initrd_len = (unsigned long)bp->hdr.ramdisk_size |
> + ((unsigned long)bp->ext_ramdisk_size << 32);
> + if (initrd_len) {
> + initrd_start = (unsigned long)bp->hdr.ramdisk_image |
> + ((unsigned long)bp->ext_ramdisk_image << 32);
> + initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
> + initrd_len = initrd_end - initrd_start;
> + }
> +#endif

In a prepatch, pls make get_ramdisk_image() and get_ramdisk_size() from
arch/x86/kernel/setup.c accessible to this code too. Also, add dummies
for the !CONFIG_BLK_DEV_INITRD case so that you can simply call them
here, regardless of the CONFIG_BLK_DEV_INITRD setting.

Then you won't need boot_params ptr either and that would simplify the
diff a bit.

Thx.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2017-12-21 16:35:45

by Tom Lendacky

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption

On 12/21/2017 6:58 AM, Borislav Petkov wrote:
> On Thu, Dec 07, 2017 at 05:34:02PM -0600, Tom Lendacky wrote:
>> In preparation for encrypting more than just the kernel, the encryption
>> support in sme_encrypt_kernel() needs to support 4KB page aligned
>> encryption instead of just 2MB large page aligned encryption.
>
> ...
>
>> static void __init __sme_map_range(pgd_t *pgd, unsigned long vaddr,
>> unsigned long vaddr_end,
>> - unsigned long paddr, pmdval_t pmd_flags)
>> + unsigned long paddr,
>> + pmdval_t pmd_flags, pteval_t pte_flags)
>> {
>> - while (vaddr < vaddr_end) {
>> - sme_populate_pgd(pgd, vaddr, paddr, pmd_flags);
>> + if (vaddr & ~PMD_PAGE_MASK) {
>> + /* Start is not 2MB aligned, create PTE entries */
>> + unsigned long pmd_start = ALIGN(vaddr, PMD_PAGE_SIZE);
>> +
>> + while (vaddr < pmd_start) {
>> + sme_populate_pgd(pgd, vaddr, paddr, pte_flags);
>> +
>> + vaddr += PAGE_SIZE;
>> + paddr += PAGE_SIZE;
>> + }
>> + }
>
> This chunk...
>
>> +
>> + while (vaddr < (vaddr_end & PMD_PAGE_MASK)) {
>> + sme_populate_pgd_large(pgd, vaddr, paddr, pmd_flags);
>>
>> vaddr += PMD_PAGE_SIZE;
>> paddr += PMD_PAGE_SIZE;
>> }
>> +
>> + if (vaddr_end & ~PMD_PAGE_MASK) {
>> + /* End is not 2MB aligned, create PTE entries */
>> + while (vaddr < vaddr_end) {
>> + sme_populate_pgd(pgd, vaddr, paddr, pte_flags);
>> +
>> + vaddr += PAGE_SIZE;
>> + paddr += PAGE_SIZE;
>> + }
>> + }
>
> ... and this chunk look like could be extracted in a wrapper as they're
> pretty similar.
>

Should be doable, I'll take care of it.

>> static void __init sme_map_range_encrypted(pgd_t *pgd,
>> @@ -582,7 +658,8 @@ static void __init sme_map_range_encrypted(pgd_t *pgd,
>> unsigned long vaddr_end,
>> unsigned long paddr)
>> {
>> - __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_ENC);
>> + __sme_map_range(pgd, vaddr, vaddr_end, paddr,
>> + PMD_FLAGS_ENC, PTE_FLAGS_ENC);
>> }
>>
>> static void __init sme_map_range_decrypted(pgd_t *pgd,
>> @@ -590,7 +667,8 @@ static void __init sme_map_range_decrypted(pgd_t *pgd,
>> unsigned long vaddr_end,
>> unsigned long paddr)
>> {
>> - __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_DEC);
>> + __sme_map_range(pgd, vaddr, vaddr_end, paddr,
>> + PMD_FLAGS_DEC, PTE_FLAGS_DEC);
>> }
>>
>> static void __init sme_map_range_decrypted_wp(pgd_t *pgd,
>> @@ -598,19 +676,20 @@ static void __init sme_map_range_decrypted_wp(pgd_t *pgd,
>> unsigned long vaddr_end,
>> unsigned long paddr)
>> {
>> - __sme_map_range(pgd, vaddr, vaddr_end, paddr, PMD_FLAGS_DEC_WP);
>> + __sme_map_range(pgd, vaddr, vaddr_end, paddr,
>> + PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
>> }
>>
>> static unsigned long __init sme_pgtable_calc(unsigned long len)
>> {
>> - unsigned long p4d_size, pud_size, pmd_size;
>> + unsigned long p4d_size, pud_size, pmd_size, pte_size;
>> unsigned long total;
>>
>> /*
>> * Perform a relatively simplistic calculation of the pagetable
>> - * entries that are needed. That mappings will be covered by 2MB
>> - * PMD entries so we can conservatively calculate the required
>> - * number of P4D, PUD and PMD structures needed to perform the
>> + * entries that are needed. That mappings will be covered mostly
>
> Those
>

Got it.

>> + * by 2MB PMD entries so we can conservatively calculate the required
>> + * number of P4D, PUD, PMD and PTE structures needed to perform the
>> * mappings. Incrementing the count for each covers the case where
>> * the addresses cross entries.
>> */
>> @@ -626,8 +705,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
>> }
>> pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
>> pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
>> + pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
>
> This needs the explanation from the commit message about the 2 extra
> pages, as a comment above it.

Yup, I'll include the info about the (possible) extra PTE entries.

>
>> - total = p4d_size + pud_size + pmd_size;
>> + total = p4d_size + pud_size + pmd_size + pte_size;
>>
>> /*
>> * Now calculate the added pagetable structures needed to populate
>> @@ -710,10 +790,13 @@ void __init sme_encrypt_kernel(void)
>>
>> /*
>> * The total workarea includes the executable encryption area and
>> - * the pagetable area.
>> + * the pagetable area. The start of the workarea is already 2MB
>> + * aligned, align the end of the workarea on a 2MB boundary so that
>> + * we don't try to create/allocate PTE entries from the workarea
>> + * before it is mapped.
>> */
>> workarea_len = execute_len + pgtable_area_len;
>> - workarea_end = workarea_start + workarea_len;
>> + workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
>>
>> /*
>> * Set the address to the start of where newly created pagetable
>> diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
>> index 730e6d5..20cca86 100644
>> --- a/arch/x86/mm/mem_encrypt_boot.S
>> +++ b/arch/x86/mm/mem_encrypt_boot.S
>> @@ -120,23 +120,33 @@ ENTRY(__enc_copy)
>>
>> wbinvd /* Invalidate any cache entries */
>>
>> + push %r12
>> +
>> /* Copy/encrypt 2MB at a time */
>> + movq $PMD_PAGE_SIZE, %r12
>> 1:
>> + cmpq %r12, %r9
>> + jnb 2f
>> + movq %r9, %r12
>> +
>> +2:
>> movq %r11, %rsi /* Source - decrypted kernel */
>> movq %r8, %rdi /* Dest - intermediate copy buffer */
>> - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
>> + movq %r12, %rcx
>> rep movsb
>>
>> movq %r8, %rsi /* Source - intermediate copy buffer */
>> movq %r10, %rdi /* Dest - encrypted kernel */
>> - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
>> + movq %r12, %rcx
>> rep movsb
>>
>> - addq $PMD_PAGE_SIZE, %r11
>> - addq $PMD_PAGE_SIZE, %r10
>> - subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */
>> + addq %r12, %r11
>> + addq %r12, %r10
>> + subq %r12, %r9 /* Kernel length decrement */
>> jnz 1b /* Kernel length not zero? */
>>
>> + pop %r12
>> +
>> /* Restore PAT register */
>> push %rdx /* Save original PAT value */
>> movl $MSR_IA32_CR_PAT, %ecx
>
> Right, for this here can you pls do a cleanup pre-patch which does
>
> push
> push
> push
>
> /* meat of the function */
>
> pop
> pop
> pop
>
> as now those pushes and pops are interspersed with the rest of the insns
> and that makes following through it harder. F17h has a stack engine so
> those streams of pushes and pops won't hurt perf and besides, this is
> not even perf-sensitive.
>

Ok, I'll clean that up to move any pushes/pops to the beginning/end of
the function, as well as moving some register saving earlier which may
eliminate some push/pop instructions.

Thanks,
Tom

> Thx.
>

2017-12-21 16:48:44

by Tom Lendacky

[permalink] [raw]
Subject: Re: [PATCH v1 3/3] x86/mm: Encrypt the initrd earlier for BSP microcode update

On 12/21/2017 8:49 AM, Borislav Petkov wrote:
> On Thu, Dec 07, 2017 at 05:34:10PM -0600, Tom Lendacky wrote:
>> Currently the BSP microcode update code examines the initrd very early
>> in the boot process. If SME is active, the initrd is treated as being
>> encrypted but it has not been encrypted (in place) yet. Update the
>> early boot code that encrypts the kernel to also encrypt the initrd so
>> that early BSP microcode updates work.
>
> ...
>
>> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
>> index 1f0efb8..60df247 100644
>> --- a/arch/x86/mm/mem_encrypt.c
>> +++ b/arch/x86/mm/mem_encrypt.c
>> @@ -731,11 +731,12 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
>> return total;
>> }
>>
>> -void __init sme_encrypt_kernel(void)
>> +void __init sme_encrypt_kernel(struct boot_params *bp)
>> {
>> unsigned long workarea_start, workarea_end, workarea_len;
>> unsigned long execute_start, execute_end, execute_len;
>> unsigned long kernel_start, kernel_end, kernel_len;
>> + unsigned long initrd_start, initrd_end, initrd_len;
>> unsigned long pgtable_area_len;
>> unsigned long decrypted_base;
>> pgd_t *pgd;
>> @@ -744,14 +745,15 @@ void __init sme_encrypt_kernel(void)
>> return;
>>
>> /*
>> - * Prepare for encrypting the kernel by building new pagetables with
>> - * the necessary attributes needed to encrypt the kernel in place.
>> + * Prepare for encrypting the kernel and initrd by building new
>> + * pagetables with the necessary attributes needed to encrypt the
>> + * kernel in place.
>> *
>> * One range of virtual addresses will map the memory occupied
>> - * by the kernel as encrypted.
>> + * by the kernel and initrd as encrypted.
>> *
>> * Another range of virtual addresses will map the memory occupied
>> - * by the kernel as decrypted and write-protected.
>> + * by the kernel and initrd as decrypted and write-protected.
>> *
>> * The use of write-protect attribute will prevent any of the
>> * memory from being cached.
>> @@ -762,6 +764,20 @@ void __init sme_encrypt_kernel(void)
>> kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
>> kernel_len = kernel_end - kernel_start;
>>
>> + initrd_start = 0;
>> + initrd_end = 0;
>> + initrd_len = 0;
>> +#ifdef CONFIG_BLK_DEV_INITRD
>> + initrd_len = (unsigned long)bp->hdr.ramdisk_size |
>> + ((unsigned long)bp->ext_ramdisk_size << 32);
>> + if (initrd_len) {
>> + initrd_start = (unsigned long)bp->hdr.ramdisk_image |
>> + ((unsigned long)bp->ext_ramdisk_image << 32);
>> + initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
>> + initrd_len = initrd_end - initrd_start;
>> + }
>> +#endif
>
> In a prepatch, pls make get_ramdisk_image() and get_ramdisk_size() from
> arch/x86/kernel/setup.c accessible to this code too. Also, add dummies
> for the !CONFIG_BLK_DEV_INITRD case so that you can simply call them
> here, regardless of the CONFIG_BLK_DEV_INITRD setting.
>
> Then you won't need boot_params ptr either and that would simplify the
> diff a bit.

This is very early in the boot and the boot parameters have not been
copied to boot_params yet, so I need the pointer. And since the routines
in arch/x86/kernel/setup.c also use boot_params, those would have to be
modified to accept a pointer rather than automatically using boot_params.
I'm not sure it's worth all that.

Thanks,
Tom

>
> Thx.
>

2017-12-21 17:13:17

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v1 3/3] x86/mm: Encrypt the initrd earlier for BSP microcode update

On Thu, Dec 21, 2017 at 10:48:29AM -0600, Tom Lendacky wrote:
> This is very early in the boot and the boot parameters have not been
> copied to boot_params yet, so I need the pointer. And since the routines
> in arch/x86/kernel/setup.c also use boot_params, those would have to be
> modified to accept a pointer rather than automatically using boot_params.
> I'm not sure it's worth all that.

I fear that this computation of the ramdisk address and size keeps
duplicating around the tree. And you could pass in struct boot_params *
to those functions but ok, it is simple enough, so whatever. We can
always clean it up later if it grows unwieldy...

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.