Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753790AbeAMUwY (ORCPT + 1 other); Sat, 13 Jan 2018 15:52:24 -0500 Received: from mail.kernel.org ([198.145.29.99]:51392 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751372AbeAMUwW (ORCPT ); Sat, 13 Jan 2018 15:52:22 -0500 DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 31D8B2177A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=kernel.org Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=luto@kernel.org X-Google-Smtp-Source: ACJfBovDUVowQbRKT/1W2KvqepGD3YPlxNe29p8l2nggd/beDfthaAg8VOUvAf2NE6poql4SDWjPXZJTFFTAIzJBsNQ= MIME-Version: 1.0 In-Reply-To: References: <9eb15489-da09-7a4c-0700-7b6eb99e6f7b@redhat.com> <20180113063320.GC16821@1wt.eu> From: Andy Lutomirski Date: Sat, 13 Jan 2018 12:52:01 -0800 X-Gmail-Original-Message-ID: Message-ID: Subject: Re: Yet another KPTI regression with 4.14.x series in a VM To: Thomas Gleixner Cc: Andy Lutomirski , Willy Tarreau , Peter Zijlstra , Borislav Petkov , Laura Abbott , X86 ML , Linux Kernel Mailing List , stable Content-Type: text/plain; charset="UTF-8" Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: On Sat, Jan 13, 2018 at 12:45 PM, Thomas Gleixner wrote: > On Sat, 13 Jan 2018, Andy Lutomirski wrote: >> Trying to inventory this stuff scattered all over the place: >> >> #define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT >> #define PTI_SWITCH_PGTABLES_MASK (1<> # define X86_CR3_PTI_SWITCH_BIT 11 >> #define PTI_SWITCH_MASK >> (PTI_SWITCH_PGTABLES_MASK|(1<> >> Blech. I wouldn't be terribly surprised if I missed a few as well. How about: >> >> PTI_USER_PGTABLE_BIT = PAGE_SHIFT >> PTI_USER_PGTABLE_MASK = 1 << PTI_USER_PGTABLE_BIT >> PTI_USER_PCID_BIT = 11 >> PTI_USER_PCID_MASK = 1 << PTI_USER_PCID_BIT >> PTI_USER_PGTABLE_AND_PCID_MASK = PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK >> >> This naming would make the apparently buggy code look fishy, as it >> should. I will give this a shot some time soon if no one beats me to >> it. > > Well, the thing we tripped over is that we trusted the SDM that bit 11 is > ignored. Seems its not and the AMD APM says that reserved bit should be > cleared. Next time I surely stare into both.... > > So something like the below should make it clear. I've not done the > alternatives thing yet... > Looks generally sane to me. > > 8<------------------- > --- a/arch/x86/entry/calling.h > +++ b/arch/x86/entry/calling.h > @@ -198,8 +198,11 @@ For 32-bit we have the following convent > * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two > * halves: > */ > -#define PTI_SWITCH_PGTABLES_MASK (1< -#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1< +#define PTI_USER_PGTABLE_BIT PAGE_SHIFT > +#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT) > +#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT > +#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT) > +#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK) > > .macro SET_NOFLUSH_BIT reg:req > bts $X86_CR3_PCID_NOFLUSH_BIT, \reg > @@ -208,7 +211,7 @@ For 32-bit we have the following convent > .macro ADJUST_KERNEL_CR3 reg:req > ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID > /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ > - andq $(~PTI_SWITCH_MASK), \reg > + andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg > .endm > > .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req > @@ -239,15 +242,18 @@ For 32-bit we have the following convent > /* Flush needed, clear the bit */ > btr \scratch_reg, THIS_CPU_user_pcid_flush_mask > movq \scratch_reg2, \scratch_reg > - jmp .Lwrcr3_\@ > + jmp .Lwrcr3_pcid_\@ > > .Lnoflush_\@: > movq \scratch_reg2, \scratch_reg > SET_NOFLUSH_BIT \scratch_reg > > +.Lwcr3_pcid_\@: > + orq $(PTI_USER_PCID_MASK), \scratch_reg > + > .Lwrcr3_\@: > /* Flip the PGD and ASID to the user version */ > - orq $(PTI_SWITCH_MASK), \scratch_reg > + orq $(PTI_USER_PGTABLE_MASK), \scratch_reg > mov \scratch_reg, %cr3 > .Lend_\@: > .endm > @@ -272,7 +278,7 @@ For 32-bit we have the following convent > * > * That indicates a kernel CR3 value, not a user CR3. > */ > - testq $(PTI_SWITCH_MASK), \scratch_reg > + testq $(PTI_USER_PGTABLE_MASK), \scratch_reg > jz .Ldone_\@ > > ADJUST_KERNEL_CR3 \scratch_reg > @@ -290,7 +296,7 @@ For 32-bit we have the following convent > * KERNEL pages can always resume with NOFLUSH as we do > * explicit flushes. > */ > - bt $X86_CR3_PTI_SWITCH_BIT, \save_reg > + bt $PTI_USER_PGTABLE_BIT, \save_reg > jnc .Lnoflush_\@ > > /* > --- a/arch/x86/include/asm/processor-flags.h > +++ b/arch/x86/include/asm/processor-flags.h > @@ -40,7 +40,7 @@ > #define CR3_NOFLUSH BIT_ULL(63) > > #ifdef CONFIG_PAGE_TABLE_ISOLATION > -# define X86_CR3_PTI_SWITCH_BIT 11 > +# define X86_CR3_PTI_PCID_USER_BIT 11 > #endif > > #else > --- a/arch/x86/include/asm/tlbflush.h > +++ b/arch/x86/include/asm/tlbflush.h > @@ -81,13 +81,13 @@ static inline u16 kern_pcid(u16 asid) > * Make sure that the dynamic ASID space does not confict with the > * bit we are using to switch between user and kernel ASIDs. > */ > - BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT)); > + BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT)); > > /* > * The ASID being passed in here should have respected the > * MAX_ASID_AVAILABLE and thus never have the switch bit set. > */ > - VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT)); > + VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT)); > #endif > /* > * The dynamically-assigned ASIDs that get passed in are small > @@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid) > { > u16 ret = kern_pcid(asid); > #ifdef CONFIG_PAGE_TABLE_ISOLATION > - ret |= 1 << X86_CR3_PTI_SWITCH_BIT; > + ret |= 1 << X86_CR3_PTI_PCID_USER_BIT; > #endif > return ret; > } > > >