Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752282AbbEHK0l (ORCPT ); Fri, 8 May 2015 06:26:41 -0400 Received: from mx1.redhat.com ([209.132.183.28]:49462 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750955AbbEHK0j (ORCPT ); Fri, 8 May 2015 06:26:39 -0400 From: Denys Vlasenko To: Ingo Molnar Cc: Denys Vlasenko , Steven Rostedt , Borislav Petkov , "H. Peter Anvin" , Andy Lutomirski , Frederic Weisbecker , Alexei Starovoitov , Will Drewry , Kees Cook , x86@kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH] x86: force inlining of atomic ops Date: Fri, 8 May 2015 12:26:02 +0200 Message-Id: <1431080762-17797-1-git-send-email-dvlasenk@redhat.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8610 Lines: 228 With both gcc 4.7.2 and 4.9.2, sometimes gcc mysteriously doesn't inline very small functions we expect to be inlined: $ nm --size-sort vmlinux | grep -iF ' t ' | uniq -c | grep -v '^ *1 ' | sort -rn 473 000000000000000b t spin_unlock_irqrestore 449 000000000000005f t rcu_read_unlock 355 0000000000000009 t atomic_inc <== THIS 353 000000000000006e t rcu_read_lock 350 0000000000000075 t rcu_read_lock_sched_held 291 000000000000000b t spin_unlock 266 0000000000000019 t arch_local_irq_restore 215 000000000000000b t spin_lock 180 0000000000000011 t kzalloc 165 0000000000000012 t list_add_tail 161 0000000000000019 t arch_local_save_flags 153 0000000000000016 t test_and_set_bit 134 000000000000000b t spin_unlock_irq 134 0000000000000009 t atomic_dec <== THIS 130 000000000000000b t spin_unlock_bh 122 0000000000000010 t brelse 120 0000000000000016 t test_and_clear_bit 120 000000000000000b t spin_lock_irq 119 000000000000001e t get_dma_ops 117 0000000000000053 t cpumask_next 116 0000000000000036 t kref_get 114 000000000000001a t schedule_work 106 000000000000000b t spin_lock_bh 103 0000000000000019 t arch_local_irq_disable ... Note sizes of marked functions. They are merely 9 bytes long! Selecting function with 'atomic' in their names: 355 0000000000000009 t atomic_inc 134 0000000000000009 t atomic_dec 98 0000000000000014 t atomic_dec_and_test 31 000000000000000e t atomic_add_return 27 000000000000000a t atomic64_inc 26 000000000000002f t kmap_atomic 24 0000000000000009 t atomic_add 12 0000000000000009 t atomic_sub 10 0000000000000021 t __atomic_add_unless 10 000000000000000a t atomic64_add 5 000000000000001f t __atomic_add_unless.constprop.7 5 000000000000000a t atomic64_dec 4 000000000000001f t __atomic_add_unless.constprop.18 4 000000000000001f t __atomic_add_unless.constprop.12 4 000000000000001f t __atomic_add_unless.constprop.10 3 000000000000001f t __atomic_add_unless.constprop.13 3 0000000000000011 t atomic64_add_return 2 000000000000001f t __atomic_add_unless.constprop.9 2 000000000000001f t __atomic_add_unless.constprop.8 2 000000000000001f t __atomic_add_unless.constprop.6 2 000000000000001f t __atomic_add_unless.constprop.5 2 000000000000001f t __atomic_add_unless.constprop.3 2 000000000000001f t __atomic_add_unless.constprop.22 2 000000000000001f t __atomic_add_unless.constprop.14 2 000000000000001f t __atomic_add_unless.constprop.11 2 000000000000001e t atomic_dec_if_positive 2 0000000000000014 t atomic_inc_and_test 2 0000000000000011 t atomic_add_return.constprop.4 2 0000000000000011 t atomic_add_return.constprop.17 2 0000000000000011 t atomic_add_return.constprop.16 2 000000000000000d t atomic_inc.constprop.4 2 000000000000000c t atomic_cmpxchg This patch fixes this for x86 atomic ops via s/inline/__always_inline/. This decreases allyesconfig kernel by about 25k: text data bss dec hex filename 82399481 22255416 20627456 125282353 777a831 vmlinux.before 82375570 22255544 20627456 125258570 7774b4a vmlinux Signed-off-by: Denys Vlasenko CC: Steven Rostedt CC: Ingo Molnar CC: Borislav Petkov CC: "H. Peter Anvin" CC: Andy Lutomirski CC: Frederic Weisbecker CC: Alexei Starovoitov CC: Will Drewry CC: Kees Cook CC: x86@kernel.org CC: linux-kernel@vger.kernel.org --- arch/x86/include/asm/atomic.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 5e5cd12..e9ddabc 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -22,7 +22,7 @@ * * Atomically reads the value of @v. */ -static inline int atomic_read(const atomic_t *v) +static __always_inline int atomic_read(const atomic_t *v) { return ACCESS_ONCE((v)->counter); } @@ -34,7 +34,7 @@ static inline int atomic_read(const atomic_t *v) * * Atomically sets the value of @v to @i. */ -static inline void atomic_set(atomic_t *v, int i) +static __always_inline void atomic_set(atomic_t *v, int i) { v->counter = i; } @@ -46,7 +46,7 @@ static inline void atomic_set(atomic_t *v, int i) * * Atomically adds @i to @v. */ -static inline void atomic_add(int i, atomic_t *v) +static __always_inline void atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) @@ -60,7 +60,7 @@ static inline void atomic_add(int i, atomic_t *v) * * Atomically subtracts @i from @v. */ -static inline void atomic_sub(int i, atomic_t *v) +static __always_inline void atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) @@ -76,7 +76,7 @@ static inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static inline int atomic_sub_and_test(int i, atomic_t *v) +static __always_inline int atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); } @@ -87,7 +87,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) * * Atomically increments @v by 1. */ -static inline void atomic_inc(atomic_t *v) +static __always_inline void atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter)); @@ -99,7 +99,7 @@ static inline void atomic_inc(atomic_t *v) * * Atomically decrements @v by 1. */ -static inline void atomic_dec(atomic_t *v) +static __always_inline void atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter)); @@ -113,7 +113,7 @@ static inline void atomic_dec(atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline int atomic_dec_and_test(atomic_t *v) +static __always_inline int atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } @@ -126,7 +126,7 @@ static inline int atomic_dec_and_test(atomic_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline int atomic_inc_and_test(atomic_t *v) +static __always_inline int atomic_inc_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); } @@ -140,7 +140,7 @@ static inline int atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int atomic_add_negative(int i, atomic_t *v) +static __always_inline int atomic_add_negative(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); } @@ -152,7 +152,7 @@ static inline int atomic_add_negative(int i, atomic_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline int atomic_add_return(int i, atomic_t *v) +static __always_inline int atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } @@ -164,7 +164,7 @@ static inline int atomic_add_return(int i, atomic_t *v) * * Atomically subtracts @i from @v and returns @v - @i */ -static inline int atomic_sub_return(int i, atomic_t *v) +static __always_inline int atomic_sub_return(int i, atomic_t *v) { return atomic_add_return(-i, v); } @@ -172,7 +172,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) -static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { return cmpxchg(&v->counter, old, new); } @@ -213,7 +213,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) * Atomically adds 1 to @v * Returns the new value of @u */ -static inline short int atomic_inc_short(short int *v) +static __always_inline short int atomic_inc_short(short int *v) { asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); return *v; -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/