Received: by 2002:a25:1985:0:0:0:0:0 with SMTP id 127csp2344004ybz; Thu, 23 Apr 2020 16:24:39 -0700 (PDT) X-Google-Smtp-Source: APiQypJgzc3+sMns3XO/xkeQXoEg329dkvv5gUCqcYvXVNCwGsAzvxgQLIMz8KRxxOkFSJVHYwY/ X-Received: by 2002:a05:6402:c84:: with SMTP id cm4mr4662689edb.316.1587684279413; Thu, 23 Apr 2020 16:24:39 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1587684279; cv=none; d=google.com; s=arc-20160816; b=cgJ/xta8pKgX0Vj4YZXSZlU9ar+91gKUsbwXdQsnc3RUmLl+9V91R9nI4ZyEMxxyOO mwWV4tp14ZQTD+Ak9UZadfjPUVfn28ST1ZcaBjoEu11As4toDKsQoUJgeu3F2DjbNZm/ r+lvU2P3WtomVBDrbjYX9IV5dYzjaWuVi+VRSV6OosaxzaFSDfLk78tWqooGpEnIMoOg Cv+ZERqR1NUTN31/w8ma77Y3sHcNwdX8LFkU32Xq41ZryHh3p3u0Jii3eB8IxBeNRCci lyhJbSakKscFcAmqqcBglXhikVyTp4RY8tt+KAbRhMTc2/5tltjFNCiR6cBXcTLrwLHX FUKg== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=list-id:precedence:sender:in-reply-to:subject:message-id:date:cc:to :from:mime-version:content-transfer-encoding:content-disposition; bh=fbtWfYK/xCkA0SYVvke1pNX6/tAc0rZ+xfS8WJbJVtE=; b=ozcrDI6k1c4xoDmVgbehXcZgOnNw++Ox6jE96AFcfk+iAzZ3g0j1U3Tc/e3g/czdkU wvOKjTCQIzcdfdRq93bapZoPYOXqDogCG5kLx4a9/RZefrK6tib1VrE6GaFtirwX8mnE AJjDwPB+uXAfb0bRq/8TugfnG2k7+RLRLE/IErGhovniavnyoTQ5nZ/rvqcVUXBpdZ/l N0HA/flwUfOHSSGAeDreadY1D2ITSz6vmClMJ8H1WJAnWeIvbcxYRKrQczoMHlFNS0xs eo9hAvE/KYaCnGpVvp2gaF5h+vb4knSwaFaFzYSX0W8sFJ4rm+WXdHoWRUTp5DHsqy22 r39w== ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org Return-Path: Received: from vger.kernel.org (vger.kernel.org. [23.128.96.18]) by mx.google.com with ESMTP id o6si2082183ejb.97.2020.04.23.16.24.16; Thu, 23 Apr 2020 16:24:39 -0700 (PDT) Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) client-ip=23.128.96.18; Authentication-Results: mx.google.com; spf=pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.18 as permitted sender) smtp.mailfrom=linux-kernel-owner@vger.kernel.org Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729687AbgDWXU7 (ORCPT + 99 others); Thu, 23 Apr 2020 19:20:59 -0400 Received: from shadbolt.e.decadent.org.uk ([88.96.1.126]:48550 "EHLO shadbolt.e.decadent.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728201AbgDWXGd (ORCPT ); Thu, 23 Apr 2020 19:06:33 -0400 Received: from [192.168.4.242] (helo=deadeye) by shadbolt.decadent.org.uk with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.89) (envelope-from ) id 1jRkvM-0004bu-1I; Fri, 24 Apr 2020 00:06:28 +0100 Received: from ben by deadeye with local (Exim 4.93) (envelope-from ) id 1jRkvK-00E6iw-8R; Fri, 24 Apr 2020 00:06:26 +0100 Content-Type: text/plain; charset="UTF-8" Content-Disposition: inline Content-Transfer-Encoding: 8bit MIME-Version: 1.0 From: Ben Hutchings To: linux-kernel@vger.kernel.org, stable@vger.kernel.org CC: akpm@linux-foundation.org, Denis Kirjanov , "Greg Kroah-Hartman" , "Steven Sistare" , "Pavel Tatashin" Date: Fri, 24 Apr 2020 00:04:47 +0100 Message-ID: X-Mailer: LinuxStableQueue (scripts by bwh) X-Patchwork-Hint: ignore Subject: [PATCH 3.16 060/245] x86/pti/efi: broken conversion from efi to kernel page table In-Reply-To: X-SA-Exim-Connect-IP: 192.168.4.242 X-SA-Exim-Mail-From: ben@decadent.org.uk X-SA-Exim-Scanned: No (on shadbolt.decadent.org.uk); SAEximRunCond expanded to false Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org 3.16.83-rc1 review patch. If anyone has any objections, please let me know. ------------------ From: Pavel Tatashin In entry_64.S we have code like this: /* Unconditionally use kernel CR3 for do_nmi() */ /* %rax is saved above, so OK to clobber here */ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID pushq %rax /* mask off "user" bit of pgd address and 12 PCID bits: */ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax movq %rax, %cr3 2: /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ call do_nmi With this instruction: andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax We unconditionally switch from whatever our CR3 was to kernel page table. But, in arch/x86/platform/efi/efi_64.c We temporarily set a different page table, that does not have the kernel page table with 0x1000 offset from it. Look in efi_thunk() and efi_thunk_set_virtual_address_map(). So, while CR3 points to the other page table, we get an NMI interrupt, and clear 0x1000 from CR3, resulting in a bogus CR3 if the 0x1000 bit was set. The efi page table comes from realmode/rm/trampoline_64.S: arch/x86/realmode/rm/trampoline_64.S 141 .bss 142 .balign PAGE_SIZE 143 GLOBAL(trampoline_pgd) .space PAGE_SIZE Notice: alignment is PAGE_SIZE, so after applying KAISER_SHADOW_PGD_OFFSET which equal to PAGE_SIZE, we can get a different page table. But, even if we fix alignment, here the trampoline binary is later copied into dynamically allocated memory in reserve_real_mode(), so we need to fix that place as well. Fixes: f9a1666f97b3 ("KAISER: Kernel Address Isolation") Signed-off-by: Pavel Tatashin Reviewed-by: Steven Sistare Signed-off-by: Greg Kroah-Hartman [bwh: Adjust the Fixes field for 3.16] Signed-off-by: Ben Hutchings --- arch/x86/include/asm/kaiser.h | 10 ++++++++++ arch/x86/realmode/init.c | 4 +++- arch/x86/realmode/rm/trampoline_64.S | 3 ++- 3 files changed, 15 insertions(+), 2 deletions(-) --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -19,6 +19,16 @@ #define KAISER_SHADOW_PGD_OFFSET 0x1000 +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * A page table address must have this alignment to stay the same when + * KAISER_SHADOW_PGD_OFFSET mask is applied + */ +#define KAISER_KERNEL_PGD_ALIGNMENT (KAISER_SHADOW_PGD_OFFSET << 1) +#else +#define KAISER_KERNEL_PGD_ALIGNMENT PAGE_SIZE +#endif + #ifdef __ASSEMBLY__ #ifdef CONFIG_PAGE_TABLE_ISOLATION --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -4,6 +4,7 @@ #include #include #include +#include struct real_mode_header *real_mode_header; u32 *trampoline_cr4_features; @@ -15,7 +16,8 @@ void __init reserve_real_mode(void) size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); /* Has to be under 1M so we can execute real-mode AP code. */ - mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); + mem = memblock_find_in_range(0, 1 << 20, size, + KAISER_KERNEL_PGD_ALIGNMENT); if (!mem) panic("Cannot allocate trampoline\n"); --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -30,6 +30,7 @@ #include #include #include +#include #include "realmode.h" .text @@ -139,7 +140,7 @@ tr_gdt: tr_gdt_end: .bss - .balign PAGE_SIZE + .balign KAISER_KERNEL_PGD_ALIGNMENT GLOBAL(trampoline_pgd) .space PAGE_SIZE .balign 8