Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756239AbZICTxr (ORCPT ); Thu, 3 Sep 2009 15:53:47 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1756196AbZICTxr (ORCPT ); Thu, 3 Sep 2009 15:53:47 -0400 Received: from gw1.cosmosbay.com ([212.99.114.194]:47766 "EHLO gw1.cosmosbay.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756187AbZICTxq (ORCPT ); Thu, 3 Sep 2009 15:53:46 -0400 Message-ID: <4AA01D4F.1080707@gmail.com> Date: Thu, 03 Sep 2009 21:47:27 +0200 From: Eric Dumazet User-Agent: Thunderbird 2.0.0.23 (Windows/20090812) MIME-Version: 1.0 To: Jeremy Fitzhardinge CC: "H. Peter Anvin" , the arch/x86 maintainers , Linux Kernel Mailing List Subject: Re: [PATCH] x86/i386: make sure stack-protector segment base is cache aligned References: <4AA01893.6000507@goop.org> In-Reply-To: <4AA01893.6000507@goop.org> Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-1.6 (gw1.cosmosbay.com [0.0.0.0]); Thu, 03 Sep 2009 21:47:28 +0200 (CEST) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4667 Lines: 120 Jeremy Fitzhardinge a écrit : > The Intel Optimization Reference Guide says: > > In Intel Atom microarchitecture, the address generation unit > assumes that the segment base will be 0 by default. Non-zero > segment base will cause load and store operations to experience > a delay. > - If the segment base isn't aligned to a cache line > boundary, the max throughput of memory operations is > reduced to one [e]very 9 cycles. > [...] > Assembly/Compiler Coding Rule 15. (H impact, ML generality) > For Intel Atom processors, use segments with base set to 0 > whenever possible; avoid non-zero segment base address that is > not aligned to cache line boundary at all cost. > > We can't avoid having a non-zero base for the stack-protector segment, but > we can make it cache-aligned. > > Signed-off-by: Jeremy Fitzhardinge > > diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h > index 0bfcf7e..f7d2c8f 100644 > --- a/arch/x86/include/asm/processor.h > +++ b/arch/x86/include/asm/processor.h > @@ -403,7 +403,17 @@ extern unsigned long kernel_eflags; > extern asmlinkage void ignore_sysret(void); > #else /* X86_64 */ > #ifdef CONFIG_CC_STACKPROTECTOR > -DECLARE_PER_CPU(unsigned long, stack_canary); > +/* > + * Make sure stack canary segment base is cached-aligned: > + * "For Intel Atom processors, avoid non zero segment base address > + * that is not aligned to cache line boundary at all cost." > + * (Optim Ref Manual Assembly/Compiler Coding Rule 15.) > + */ > +struct stack_canary { > + char __pad[20]; /* canary at %gs:20 */ > + unsigned long canary; > +}; > +DECLARE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned; DECLARE_PER_CPU_SHARED_ALIGNED() Or else, we'll have many holes in percpu section, because of linker encapsulation > #endif > #endif /* X86_64 */ > > diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h > index c2d742c..decad97 100644 > --- a/arch/x86/include/asm/stackprotector.h > +++ b/arch/x86/include/asm/stackprotector.h > @@ -78,14 +78,14 @@ static __always_inline void boot_init_stack_canary(void) > #ifdef CONFIG_X86_64 > percpu_write(irq_stack_union.stack_canary, canary); > #else > - percpu_write(stack_canary, canary); > + percpu_write(stack_canary.canary, canary); > #endif > } > > static inline void setup_stack_canary_segment(int cpu) > { > #ifdef CONFIG_X86_32 > - unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20; > + unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu); > struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); > struct desc_struct desc; > > diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h > index 643c59b..5bd119b 100644 > --- a/arch/x86/include/asm/system.h > +++ b/arch/x86/include/asm/system.h > @@ -31,7 +31,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, > "movl %P[task_canary](%[next]), %%ebx\n\t" \ > "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" > #define __switch_canary_oparam \ > - , [stack_canary] "=m" (per_cpu_var(stack_canary)) > + , [stack_canary] "=m" (per_cpu_var(stack_canary.canary)) > #define __switch_canary_iparam \ > , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) > #else /* CC_STACKPROTECTOR */ > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c > index 5ce60a8..e338b5c 100644 > --- a/arch/x86/kernel/cpu/common.c > +++ b/arch/x86/kernel/cpu/common.c > @@ -1043,7 +1043,7 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist); > #else /* CONFIG_X86_64 */ > > #ifdef CONFIG_CC_STACKPROTECTOR > -DEFINE_PER_CPU(unsigned long, stack_canary); > +DEFINE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned; same here, DECLARE_PER_CPU_SHARED_ALIGNED > #endif > > /* Make sure %fs and %gs are initialized properly in idle threads */ > diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S > index cc827ac..7ffec6b 100644 > --- a/arch/x86/kernel/head_32.S > +++ b/arch/x86/kernel/head_32.S > @@ -439,7 +439,6 @@ is386: movl $2,%ecx # set MP > jne 1f > movl $per_cpu__gdt_page,%eax > movl $per_cpu__stack_canary,%ecx > - subl $20, %ecx > movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) > shrl $16, %ecx > movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) > > > -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/