Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751654AbdLBGUU (ORCPT ); Sat, 2 Dec 2017 01:20:20 -0500 Received: from mail.kernel.org ([198.145.29.99]:37220 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751367AbdLBGUR (ORCPT ); Sat, 2 Dec 2017 01:20:17 -0500 DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 3E88E219A2 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=kernel.org Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=luto@kernel.org From: Andy Lutomirski To: X86 ML Cc: Borislav Petkov , "linux-kernel@vger.kernel.org" , Brian Gerst , Dave Hansen , Linus Torvalds , Josh Poimboeuf , Andy Lutomirski Subject: [PATCH 2/2] x86/kpti: Reference all cpu_entry_area pagetables in the usermode tables Date: Fri, 1 Dec 2017 22:20:11 -0800 Message-Id: <6b7540535dccc043cee1a2d2e399e1eebf9eb332.1512195450.git.luto@kernel.org> X-Mailer: git-send-email 2.13.6 In-Reply-To: References: In-Reply-To: References: Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6775 Lines: 201 We were manually configuring cpu_entry_area in the usermode tables. This was error-prone and wasted memory. (Not much memory, but still.) Instead, just reference the same pagetables. This avoids needing to keep the KPTI code and the normal cpu_entry_area code in sync, since the KPTI code no longer cares what's in cpu_entry_area. [This does *not* work on the current KPTI series. It requires that all the kernelmode cpu_entry_tables are pre-allocated. That happens in the series as I submitted it, but tglx changed it for reasons that I haven't figured out.] Signed-off-by: Andy Lutomirski --- arch/x86/include/asm/fixmap.h | 14 +++++--- arch/x86/include/asm/kpti.h | 8 +++-- arch/x86/kernel/cpu/common.c | 3 -- arch/x86/mm/kpti.c | 82 ++++++++++++++++++++++++++----------------- 4 files changed, 64 insertions(+), 43 deletions(-) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 839addd1eaec..a630cd2861f7 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -142,16 +142,20 @@ enum fixed_addresses { #ifdef CONFIG_PARAVIRT FIX_PARAVIRT_BOOTMAP, #endif - FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ - FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ #ifdef CONFIG_X86_INTEL_MID FIX_LNW_VRTC, #endif - /* Fixmap entries to remap the GDTs, one per processor. */ - FIX_CPU_ENTRY_AREA_TOP, + FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ + FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ + + /* + * Fixmap entries to remap the GDTs, one per processor. Align + * to a PMD boundary. + */ + FIX_CPU_ENTRY_AREA_TOP = round_up(FIX_TEXT_POKE0 + 1, PTRS_PER_PMD), FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1, - __end_of_permanent_fixed_addresses, + __end_of_permanent_fixed_addresses = round_up(FIX_CPU_ENTRY_AREA_BOTTOM + 1, PTRS_PER_PMD), /* * 512 temporary boot-time mappings, used by early_ioremap(), diff --git a/arch/x86/include/asm/kpti.h b/arch/x86/include/asm/kpti.h index 0c10e86ae3f8..df52cec2a53b 100644 --- a/arch/x86/include/asm/kpti.h +++ b/arch/x86/include/asm/kpti.h @@ -1,5 +1,8 @@ #ifndef _ASM_X86_KPTI_H #define _ASM_X86_KPTI_H + +#include + /* * Copyright(c) 2017 Intel Corporation. All rights reserved. * @@ -34,10 +37,9 @@ extern int kpti_add_mapping(unsigned long addr, unsigned long size, unsigned long flags); /** - * kpti_add_mapping_cpu_entry - map the cpu entry area - * @cpu: the CPU for which the entry area is being mapped + * kpti_clone_cpu_entry_areas - clone cpu_entry_areas to the usermode tables */ -extern void kpti_add_mapping_cpu_entry(int cpu); +extern void __init kpti_clone_cpu_entry_areas(void); /** * kpti_remove_mapping - remove a kernel mapping from the userpage tables diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 00697119f983..3dc814519c92 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -606,9 +606,6 @@ void __init setup_cpu_entry_area(int cpu) sizeof(struct debug_store) / PAGE_SIZE, PAGE_KERNEL); #endif - /* CPU 0's mapping is done in kpti_init() */ - if (cpu) - kpti_add_mapping_cpu_entry(cpu); } /* Load the original GDT from the per-cpu structure */ diff --git a/arch/x86/mm/kpti.c b/arch/x86/mm/kpti.c index 52fd833845ba..cd81a7432f49 100644 --- a/arch/x86/mm/kpti.c +++ b/arch/x86/mm/kpti.c @@ -240,7 +240,7 @@ static pmd_t *kpti_shadow_pagetable_walk_pmd(unsigned long address, * Returns a pointer to a PTE on success, or NULL on failure. */ static pte_t *kpti_shadow_pagetable_walk(unsigned long address, - unsigned long flags) + unsigned long flags) { pmd_t *pmd = kpti_shadow_pagetable_walk_pmd(address, flags); pte_t *pte; @@ -401,28 +401,55 @@ static void __init kpti_init_all_pgds(void) WARN_ON(__ret); \ } while (0) -void kpti_add_mapping_cpu_entry(int cpu) +void __init kpti_clone_cpu_entry_areas(void) { - kpti_add_user_map_early(get_cpu_gdt_ro(cpu), PAGE_SIZE, - __PAGE_KERNEL_RO); - - kpti_add_user_map_early(&get_cpu_entry_area(cpu)->tss, - sizeof(get_cpu_entry_area(cpu)->tss), - __PAGE_KERNEL | _PAGE_GLOBAL); - - /* entry stack */ - kpti_add_user_map_early(&get_cpu_entry_area(cpu)->SYSENTER_stack_page, - sizeof(get_cpu_entry_area(cpu)->SYSENTER_stack_page), - __PAGE_KERNEL | _PAGE_GLOBAL); - - /* Entry code, so needs to be EXEC */ - kpti_add_user_map_early(&get_cpu_entry_area(cpu)->entry_trampoline, - sizeof(get_cpu_entry_area(cpu)->entry_trampoline), - __PAGE_KERNEL_RX | _PAGE_GLOBAL); - - kpti_add_user_map_early(&get_cpu_entry_area(cpu)->exception_stacks, - sizeof(get_cpu_entry_area(cpu)->exception_stacks), - __PAGE_KERNEL | _PAGE_GLOBAL); + int cpu; + unsigned long last_pmd_addr = 0; + + /* The top of the cpu_entry_area block is meant to be PMD-aligned. */ + WARN_ON((unsigned long)(get_cpu_entry_area(NR_CPUS-1) + 1) & ~PMD_MASK); + + /* + * Iterate over possible CPUs, not addresses: it's possible that + * NR_CPUs is enough larger than the actual number of possible CPUs + * that we have unpopulated PMDs in the cpu_entry_area range. + */ + for_each_possible_cpu(cpu) { + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd, *target_pmd; + unsigned long addr = + (unsigned long)get_cpu_entry_area(cpu) & PMD_MASK; + + if (addr == last_pmd_addr) + continue; + last_pmd_addr = addr; + + pgd = pgd_offset_k(addr); + if (WARN_ON(pgd_none(*pgd))) + return; + p4d = p4d_offset(pgd, addr); + if (WARN_ON(p4d_none(*p4d))) + return; + pud = pud_offset(p4d, addr); + if (WARN_ON(pud_none(*pud))) + return; + pmd = pmd_offset(pud, addr); + if (WARN_ON(pmd_none(*pmd))) + return; + + target_pmd = kpti_shadow_pagetable_walk_pmd(addr, 0); + if (WARN_ON(!target_pmd)) + return; + + /* + * Copy the PMD. That is, the kernelmode and usermode tables + * will share all last-level page tables containing + * cpu_entry_area mappings. + */ + *target_pmd = *pmd; + } } /* @@ -459,16 +486,7 @@ void __init kpti_init(void) sizeof(gate_desc) * NR_VECTORS, __PAGE_KERNEL_RO | _PAGE_GLOBAL); - /* - * We delay CPU 0's mappings because these structures are created - * before the page allocator is up. Deferring it until here lets - * us use the plain page allocator unconditionally in the page - * table code above. - * - * This is OK because kpti_init() is called long before we ever run - * userspace and need the KERNEL_PAGE_TABLE_ISOLATION mappings. - */ - kpti_add_mapping_cpu_entry(0); + kpti_clone_cpu_entry_areas(); } int kpti_add_mapping(unsigned long addr, unsigned long size, -- 2.13.6