Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S965481AbeAOPPE (ORCPT + 1 other); Mon, 15 Jan 2018 10:15:04 -0500 Received: from mail-ot0-f195.google.com ([74.125.82.195]:39045 "EHLO mail-ot0-f195.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S934742AbeAOPPC (ORCPT ); Mon, 15 Jan 2018 10:15:02 -0500 X-Google-Smtp-Source: ACJfBouuWCq+mXzIgeDW968KeupF6OlEg6I9rjZkJ4qWKMWWzRTCG/zDgdxpmFc9A4nyaXa2wFjuqA== Subject: Re: [PATCH] x86/pti: Fix !PCID and sanitize defines To: Thomas Gleixner , Andy Lutomirski Cc: Willy Tarreau , Peter Zijlstra , Borislav Petkov , X86 ML , Linux Kernel Mailing List , David Woodhouse , stable References: From: Laura Abbott Message-ID: <2e58f6f0-008f-397f-8eca-f40d3d3d2e4b@redhat.com> Date: Mon, 15 Jan 2018 07:14:59 -0800 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.5.0 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset=utf-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: On 01/13/2018 03:23 PM, Thomas Gleixner wrote: > The switch to the user space page tables in the low level ASM code sets > unconditionally bit 12 and bit 11 of CR3. Bit 12 is switching the base > address of the page directory to the user part, bit 11 is switching the > PCID to the PCID associated with the user page tables. > > This fails on a machine which lacks PCID support because bit 11 is set in > CR3. Bit 11 is reserved when PCID is inactive. > > While the Intel SDM claims that the reserved bits are ignored when PCID is > disabled, the AMD APM states that they should be cleared. > > This went unnoticed as the AMD APM was not checked when the code was > developed and reviewed and test systems with Intel CPUs never failed to > boot. The report is against a Centos 6 host where the guest fails to boot, > so it's not yet clear whether this is a virt issue or can happen on real > hardware too, but thats irrelevant as the AMD APM clearly ask for clearing > the reserved bits. > > Make sure that on non PCID machines bit 11 is not set by the page table > switching code. > > Andy suggested to rename the related bits and masks so they are clearly > describing what they should be used for, which is done as well for clarity. > > That split could have been done with alternatives but the macro hell is > horrible and ugly. This can be done on top if someone cares to remove the > extra orq. For now it's a straight forward fix. > Original reporter confirmed it fixes the problem. Thanks for the prompt response. Laura > Fixes: 6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches") > Reported-by: Laura Abbott > Signed-off-by: Thomas Gleixner > Cc: Andy Lutomirski > Cc: Willy Tarreau > Cc: Peter Zijlstra > Cc: Borislav Petkov > Cc: stable@vger.kernel.org > > --- > arch/x86/entry/calling.h | 36 +++++++++++++++++---------------- > arch/x86/include/asm/processor-flags.h | 2 - > arch/x86/include/asm/tlbflush.h | 6 ++--- > 3 files changed, 23 insertions(+), 21 deletions(-) > > --- a/arch/x86/entry/calling.h > +++ b/arch/x86/entry/calling.h > @@ -198,8 +198,11 @@ For 32-bit we have the following convent > * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two > * halves: > */ > -#define PTI_SWITCH_PGTABLES_MASK (1< -#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1< +#define PTI_USER_PGTABLE_BIT PAGE_SHIFT > +#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT) > +#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT > +#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT) > +#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK) > > .macro SET_NOFLUSH_BIT reg:req > bts $X86_CR3_PCID_NOFLUSH_BIT, \reg > @@ -208,7 +211,7 @@ For 32-bit we have the following convent > .macro ADJUST_KERNEL_CR3 reg:req > ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID > /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ > - andq $(~PTI_SWITCH_MASK), \reg > + andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg > .endm > > .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req > @@ -239,15 +242,19 @@ For 32-bit we have the following convent > /* Flush needed, clear the bit */ > btr \scratch_reg, THIS_CPU_user_pcid_flush_mask > movq \scratch_reg2, \scratch_reg > - jmp .Lwrcr3_\@ > + jmp .Lwrcr3_pcid_\@ > > .Lnoflush_\@: > movq \scratch_reg2, \scratch_reg > SET_NOFLUSH_BIT \scratch_reg > > +.Lwrcr3_pcid_\@: > + /* Flip the ASID to the user version */ > + orq $(PTI_USER_PCID_MASK), \scratch_reg > + > .Lwrcr3_\@: > - /* Flip the PGD and ASID to the user version */ > - orq $(PTI_SWITCH_MASK), \scratch_reg > + /* Flip the PGD to the user version */ > + orq $(PTI_USER_PGTABLE_MASK), \scratch_reg > mov \scratch_reg, %cr3 > .Lend_\@: > .endm > @@ -263,17 +270,12 @@ For 32-bit we have the following convent > movq %cr3, \scratch_reg > movq \scratch_reg, \save_reg > /* > - * Is the "switch mask" all zero? That means that both of > - * these are zero: > - * > - * 1. The user/kernel PCID bit, and > - * 2. The user/kernel "bit" that points CR3 to the > - * bottom half of the 8k PGD > - * > - * That indicates a kernel CR3 value, not a user CR3. > + * Test the user pagetable bit. If set, then the user page tables > + * are active. If clear CR3 already has the kernel page table > + * active. > */ > - testq $(PTI_SWITCH_MASK), \scratch_reg > - jz .Ldone_\@ > + bt $PTI_USER_PGTABLE_BIT, \scratch_reg > + jnc .Ldone_\@ > > ADJUST_KERNEL_CR3 \scratch_reg > movq \scratch_reg, %cr3 > @@ -290,7 +292,7 @@ For 32-bit we have the following convent > * KERNEL pages can always resume with NOFLUSH as we do > * explicit flushes. > */ > - bt $X86_CR3_PTI_SWITCH_BIT, \save_reg > + bt $PTI_USER_PGTABLE_BIT, \save_reg > jnc .Lnoflush_\@ > > /* > --- a/arch/x86/include/asm/processor-flags.h > +++ b/arch/x86/include/asm/processor-flags.h > @@ -40,7 +40,7 @@ > #define CR3_NOFLUSH BIT_ULL(63) > > #ifdef CONFIG_PAGE_TABLE_ISOLATION > -# define X86_CR3_PTI_SWITCH_BIT 11 > +# define X86_CR3_PTI_PCID_USER_BIT 11 > #endif > > #else > --- a/arch/x86/include/asm/tlbflush.h > +++ b/arch/x86/include/asm/tlbflush.h > @@ -81,13 +81,13 @@ static inline u16 kern_pcid(u16 asid) > * Make sure that the dynamic ASID space does not confict with the > * bit we are using to switch between user and kernel ASIDs. > */ > - BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT)); > + BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT)); > > /* > * The ASID being passed in here should have respected the > * MAX_ASID_AVAILABLE and thus never have the switch bit set. > */ > - VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT)); > + VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT)); > #endif > /* > * The dynamically-assigned ASIDs that get passed in are small > @@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid) > { > u16 ret = kern_pcid(asid); > #ifdef CONFIG_PAGE_TABLE_ISOLATION > - ret |= 1 << X86_CR3_PTI_SWITCH_BIT; > + ret |= 1 << X86_CR3_PTI_PCID_USER_BIT; > #endif > return ret; > } >