Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932140AbeAAOkg (ORCPT + 1 other); Mon, 1 Jan 2018 09:40:36 -0500 Received: from mail.linuxfoundation.org ([140.211.169.12]:45132 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752996AbeAAOkd (ORCPT ); Mon, 1 Jan 2018 09:40:33 -0500 From: Greg Kroah-Hartman To: linux-kernel@vger.kernel.org Cc: Greg Kroah-Hartman , stable@vger.kernel.org, Dave Hansen , Thomas Gleixner , Andy Lutomirski , Boris Ostrovsky , Borislav Petkov , Brian Gerst , David Laight , Denys Vlasenko , Eduardo Valentin , "H. Peter Anvin" , Josh Poimboeuf , Juergen Gross , Linus Torvalds , Peter Zijlstra , Will Deacon , aliguori@amazon.com, daniel.gruss@iaik.tugraz.at, hughd@google.com, keescook@google.com, Ingo Molnar Subject: [PATCH 4.14 009/146] x86/mm/pti: Add mapping helper functions Date: Mon, 1 Jan 2018 15:36:40 +0100 Message-Id: <20180101140125.134317544@linuxfoundation.org> X-Mailer: git-send-email 2.15.1 In-Reply-To: <20180101140123.743014891@linuxfoundation.org> References: <20180101140123.743014891@linuxfoundation.org> User-Agent: quilt/0.65 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: 4.14-stable review patch. If anyone has any objections, please let me know. ------------------ From: Dave Hansen commit 61e9b3671007a5da8127955a1a3bda7e0d5f42e8 upstream. Add the pagetable helper functions do manage the separate user space page tables. [ tglx: Split out from the big combo kaiser patch. Folded Andys simplification and made it out of line as Boris suggested ] Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable.h | 6 ++ arch/x86/include/asm/pgtable_64.h | 92 ++++++++++++++++++++++++++++++++++++++ arch/x86/mm/pti.c | 41 ++++++++++++++++ 3 files changed, 138 insertions(+), 1 deletion(-) --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -909,7 +909,11 @@ static inline int pgd_none(pgd_t pgd) * pgd_offset() returns a (pgd_t *) * pgd_index() is used get the offset into the pgd page's array of pgd_t's; */ -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) +#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) +/* + * a shortcut to get a pgd_t in a given mm + */ +#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) /* * a shortcut which implies the use of the kernel's pgd, instead * of a process's --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_ #endif } +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages + * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and + * the user one is in the last 4k. To switch between them, you + * just need to flip the 12th bit in their addresses. + */ +#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT + +/* + * This generates better code than the inline assembly in + * __set_bit(). + */ +static inline void *ptr_set_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr |= BIT(bit); + return (void *)__ptr; +} +static inline void *ptr_clear_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr &= ~BIT(bit); + return (void *)__ptr; +} + +static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) +{ + return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) +{ + return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) +{ + return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) +{ + return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + +/* + * Page table pages are page-aligned. The lower half of the top + * level is used for userspace and the top half for the kernel. + * + * Returns true for parts of the PGD that map userspace and + * false for the parts that map the kernel. + */ +static inline bool pgdp_maps_userspace(void *__ptr) +{ + unsigned long ptr = (unsigned long)__ptr; + + return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2); +} + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd); + +/* + * Take a PGD location (pgdp) and a pgd value that needs to be set there. + * Populates the user and returns the resulting PGD that must be set in + * the kernel copy of the page tables. + */ +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return pgd; + return __pti_set_user_pgd(pgdp, pgd); +} +#else +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + return pgd; +} +#endif + static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { +#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) + p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd); +#else *p4dp = p4d; +#endif } static inline void native_p4d_clear(p4d_t *p4d) @@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { +#ifdef CONFIG_PAGE_TABLE_ISOLATION + *pgdp = pti_set_user_pgd(pgdp, pgd); +#else *pgdp = pgd; +#endif } static inline void native_pgd_clear(pgd_t *pgd) --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -96,6 +96,47 @@ enable: setup_force_cpu_cap(X86_FEATURE_PTI); } +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + /* + * Changes to the high (kernel) portion of the kernelmode page + * tables are not automatically propagated to the usermode tables. + * + * Users should keep in mind that, unlike the kernelmode tables, + * there is no vmalloc_fault equivalent for the usermode tables. + * Top-level entries added to init_mm's usermode pgd after boot + * will not be automatically propagated to other mms. + */ + if (!pgdp_maps_userspace(pgdp)) + return pgd; + + /* + * The user page tables get the full PGD, accessible from + * userspace: + */ + kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd; + + /* + * If this is normal user memory, make it NX in the kernel + * pagetables so that, if we somehow screw up and return to + * usermode with the kernel CR3 loaded, we'll get a page fault + * instead of allowing user code to execute with the wrong CR3. + * + * As exceptions, we don't set NX if: + * - _PAGE_USER is not set. This could be an executable + * EFI runtime mapping or something similar, and the kernel + * may execute from it + * - we don't have NX support + * - we're clearing the PGD (i.e. the new pgd is not present). + */ + if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) && + (__supported_pte_mask & _PAGE_NX)) + pgd.pgd |= _PAGE_NX; + + /* return the copy of the PGD we want the kernel to use: */ + return pgd; +} + /* * Initialize kernel page table isolation */