Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751297AbdCYHwO (ORCPT ); Sat, 25 Mar 2017 03:52:14 -0400 Received: from bombadil.infradead.org ([65.50.211.133]:46330 "EHLO bombadil.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750952AbdCYHwM (ORCPT ); Sat, 25 Mar 2017 03:52:12 -0400 Date: Sat, 25 Mar 2017 08:51:56 +0100 From: Peter Zijlstra To: Linus Torvalds Cc: Andy Lutomirski , Dmitry Vyukov , Andrew Morton , Andy Lutomirski , Borislav Petkov , Brian Gerst , Denys Vlasenko , "H. Peter Anvin" , Josh Poimboeuf , Paul McKenney , Thomas Gleixner , Ingo Molnar , LKML Subject: Re: locking/atomic: Introduce atomic_try_cmpxchg() Message-ID: <20170325075156.GF32474@worktop> References: <20170324142140.vpyzl755oj6rb5qv@hirez.programming.kicks-ass.net> <20170324164108.ibcxxqbhvx6ao54r@hirez.programming.kicks-ass.net> <20170324172342.radlrhk2z6mwmdgk@hirez.programming.kicks-ass.net> <20170324212329.GC5680@worktop> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20170324212329.GC5680@worktop> User-Agent: Mutt/1.5.22.1 (2013-10-16) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 13006 Lines: 406 On Fri, Mar 24, 2017 at 10:23:29PM +0100, Peter Zijlstra wrote: > I'll try and redo the patches that landed in tip and see what it does > for total vmlinux size somewhere tomorrow. text data bss dec hex filename 10726413 4540256 843776 16110445 f5d36d defconfig-build/vmlinux.pre 10730509 4540256 843776 16114541 f5e36d defconfig-build/vmlinux.post :-( --- arch/x86/include/asm/atomic.h | 18 ++++++------- arch/x86/include/asm/atomic64_64.h | 24 ++++++++++-------- arch/x86/include/asm/cmpxchg.h | 50 ++++++++++++++++++++---------------- include/linux/atomic.h | 52 +++++++++++++++++++++++++------------- lib/refcount.c | 35 ++++++++++++++----------- 5 files changed, 104 insertions(+), 75 deletions(-) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index caa5798..c80d4914 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -186,11 +186,8 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) return cmpxchg(&v->counter, old, new); } -#define atomic_try_cmpxchg atomic_try_cmpxchg -static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new) -{ - return try_cmpxchg(&v->counter, old, new); -} +#define atomic_try_cmpxchg(_ptr, _old, _new, _label) \ + try_cmpxchg(&(_ptr)->counter, _old, _new, _label) static inline int atomic_xchg(atomic_t *v, int new) { @@ -210,8 +207,9 @@ static inline void atomic_##op(int i, atomic_t *v) \ static inline int atomic_fetch_##op(int i, atomic_t *v) \ { \ int val = atomic_read(v); \ - do { \ - } while (!atomic_try_cmpxchg(v, &val, val c_op i)); \ + for (;;) \ + val = atomic_try_cmpxchg(v, val, val c_op i, success); \ +success: \ return val; \ } @@ -239,10 +237,12 @@ ATOMIC_OPS(xor, ^) static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) { int c = atomic_read(v); - do { + for (;;) { if (unlikely(c == u)) break; - } while (!atomic_try_cmpxchg(v, &c, c + a)); + c = atomic_try_cmpxchg(v, c, c + a, success); + } +success: return c; } diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 6189a43..489c3e2 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -176,11 +176,8 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) return cmpxchg(&v->counter, old, new); } -#define atomic64_try_cmpxchg atomic64_try_cmpxchg -static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new) -{ - return try_cmpxchg(&v->counter, old, new); -} +#define atomic64_try_cmpxchg(_ptr, _old, _new, _label) \ + try_cmpxchg(&(_ptr)->counter, _old, _new, _label) static inline long atomic64_xchg(atomic64_t *v, long new) { @@ -199,10 +196,12 @@ static inline long atomic64_xchg(atomic64_t *v, long new) static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) { long c = atomic64_read(v); - do { + for (;;) { if (unlikely(c == u)) return false; - } while (!atomic64_try_cmpxchg(v, &c, c + a)); + c = atomic64_try_cmpxchg(v, c, c + a, success); + } +success: return true; } @@ -218,11 +217,13 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) static inline long atomic64_dec_if_positive(atomic64_t *v) { long dec, c = atomic64_read(v); - do { + for (;;) { dec = c - 1; if (unlikely(dec < 0)) break; - } while (!atomic64_try_cmpxchg(v, &c, dec)); + c = atomic64_try_cmpxchg(v, c, dec, success); + } +success: return dec; } @@ -239,8 +240,9 @@ static inline void atomic64_##op(long i, atomic64_t *v) \ static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ { \ long val = atomic64_read(v); \ - do { \ - } while (!atomic64_try_cmpxchg(v, &val, val c_op i)); \ + for (;;) \ + val = atomic64_try_cmpxchg(v, val, val c_op i, success);\ +success: \ return val; \ } diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index fb961db..e6b8a8f 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -154,22 +154,24 @@ extern void __add_wrong_size(void) __cmpxchg_local(ptr, old, new, sizeof(*(ptr))) -#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \ +#define __raw_try_cmpxchg(_ptr, _old, _new, success_label, size, lock) \ ({ \ - bool success; \ - __typeof__(_ptr) _old = (_pold); \ - __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __old = (_old); \ __typeof__(*(_ptr)) __new = (_new); \ + __typeof__(*(_ptr)) __ret; \ + bool __success; \ + \ switch (size) { \ case __X86_CASE_B: \ { \ volatile u8 *__ptr = (volatile u8 *)(_ptr); \ asm volatile(lock "cmpxchgb %[new], %[ptr]" \ CC_SET(z) \ - : CC_OUT(z) (success), \ + : CC_OUT(z) (__success), \ [ptr] "+m" (*__ptr), \ - [old] "+a" (__old) \ - : [new] "q" (__new) \ + [old] "=a" (__ret) \ + : [new] "q" (__new), \ + "2" (__old) \ : "memory"); \ break; \ } \ @@ -178,10 +180,11 @@ extern void __add_wrong_size(void) volatile u16 *__ptr = (volatile u16 *)(_ptr); \ asm volatile(lock "cmpxchgw %[new], %[ptr]" \ CC_SET(z) \ - : CC_OUT(z) (success), \ + : CC_OUT(z) (__success), \ [ptr] "+m" (*__ptr), \ - [old] "+a" (__old) \ - : [new] "r" (__new) \ + [old] "=a" (__ret) \ + : [new] "r" (__new), \ + "2" (__old) \ : "memory"); \ break; \ } \ @@ -190,10 +193,11 @@ extern void __add_wrong_size(void) volatile u32 *__ptr = (volatile u32 *)(_ptr); \ asm volatile(lock "cmpxchgl %[new], %[ptr]" \ CC_SET(z) \ - : CC_OUT(z) (success), \ + : CC_OUT(z) (__success), \ [ptr] "+m" (*__ptr), \ - [old] "+a" (__old) \ - : [new] "r" (__new) \ + [old] "=a" (__ret) \ + : [new] "r" (__new), \ + "2" (__old) \ : "memory"); \ break; \ } \ @@ -202,25 +206,27 @@ extern void __add_wrong_size(void) volatile u64 *__ptr = (volatile u64 *)(_ptr); \ asm volatile(lock "cmpxchgq %[new], %[ptr]" \ CC_SET(z) \ - : CC_OUT(z) (success), \ + : CC_OUT(z) (__success), \ [ptr] "+m" (*__ptr), \ - [old] "+a" (__old) \ - : [new] "r" (__new) \ + [old] "=a" (__ret) \ + : [new] "r" (__new), \ + "2" (__old) \ : "memory"); \ break; \ } \ default: \ __cmpxchg_wrong_size(); \ } \ - *_old = __old; \ - success; \ + \ + if (likely(__success)) goto success_label; \ + __ret; \ }) -#define __try_cmpxchg(ptr, pold, new, size) \ - __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX) +#define __try_cmpxchg(ptr, pold, new, success_label, size) \ + __raw_try_cmpxchg((ptr), (pold), (new), success_label, (size), LOCK_PREFIX) -#define try_cmpxchg(ptr, pold, new) \ - __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr))) +#define try_cmpxchg(ptr, pold, new, success_label) \ + __try_cmpxchg((ptr), (pold), (new), success_label, sizeof(*(ptr))) /* * xadd() adds "inc" to "*ptr" and atomically returns the previous diff --git a/include/linux/atomic.h b/include/linux/atomic.h index aae5953..13a6eac 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -425,18 +425,26 @@ #ifndef atomic_try_cmpxchg -#define __atomic_try_cmpxchg(type, _p, _po, _n) \ +#define __atomic_try_cmpxchg(type, _p, _o, _n, _label) \ ({ \ - typeof(_po) __po = (_po); \ - typeof(*(_po)) __o = *__po; \ - *__po = atomic_cmpxchg##type((_p), __o, (_n)); \ - (*__po == __o); \ + typeof(*(_p)) __r; \ + typeof(*(_p)) __o = (_o); \ + __r = atomic_cmpxchg##type((_p), __o, (_n)); \ + if (__r == __o) goto _label; \ + __r; \ }) -#define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, _p, _po, _n) -#define atomic_try_cmpxchg_relaxed(_p, _po, _n) __atomic_try_cmpxchg(_relaxed, _p, _po, _n) -#define atomic_try_cmpxchg_acquire(_p, _po, _n) __atomic_try_cmpxchg(_acquire, _p, _po, _n) -#define atomic_try_cmpxchg_release(_p, _po, _n) __atomic_try_cmpxchg(_release, _p, _po, _n) +#define atomic_try_cmpxchg(_p, _o, _n, _l) \ + __atomic_try_cmpxchg(, _p, _o, _n, _l) + +#define atomic_try_cmpxchg_relaxed(_p, _o, _n, _l) \ + __atomic_try_cmpxchg(_relaxed, _p, _o, _n, _l) + +#define atomic_try_cmpxchg_acquire(_p, _o, _n, _l) \ + __atomic_try_cmpxchg(_acquire, _p, _o, _n, _l) + +#define atomic_try_cmpxchg_release(_p, _o, _n, _l) \ + __atomic_try_cmpxchg(_release, _p, _o, _n, _l) #else /* atomic_try_cmpxchg */ #define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg @@ -1019,18 +1027,26 @@ static inline int atomic_dec_if_positive(atomic_t *v) #ifndef atomic64_try_cmpxchg -#define __atomic64_try_cmpxchg(type, _p, _po, _n) \ +#define __atomic64_try_cmpxchg(type, _p, _o, _n, _label) \ ({ \ - typeof(_po) __po = (_po); \ - typeof(*(_po)) __o = *__po; \ - *__po = atomic64_cmpxchg##type((_p), __o, (_n)); \ - (*__po == __o); \ + typeof(*(_p)) __r; \ + typeof(*(_p)) __o = (_o); \ + __r = atomic64_cmpxchg##type((_p), __o, (_n)); \ + if (__r == __o) goto _label; \ + __r; \ }) -#define atomic64_try_cmpxchg(_p, _po, _n) __atomic64_try_cmpxchg(, _p, _po, _n) -#define atomic64_try_cmpxchg_relaxed(_p, _po, _n) __atomic64_try_cmpxchg(_relaxed, _p, _po, _n) -#define atomic64_try_cmpxchg_acquire(_p, _po, _n) __atomic64_try_cmpxchg(_acquire, _p, _po, _n) -#define atomic64_try_cmpxchg_release(_p, _po, _n) __atomic64_try_cmpxchg(_release, _p, _po, _n) +#define atomic64_try_cmpxchg(_p, _o, _n, _l) \ + __atomic64_try_cmpxchg(, _p, _o, _n, _l) + +#define atomic64_try_cmpxchg_relaxed(_p, _o, _n, _l) \ + __atomic64_try_cmpxchg(_relaxed, _p, _o, _n, _l) + +#define atomic64_try_cmpxchg_acquire(_p, _o, _n, _l) \ + __atomic64_try_cmpxchg(_acquire, _p, _o, _n, _l) + +#define atomic64_try_cmpxchg_release(_p, _o, _n, _l) \ + __atomic64_try_cmpxchg(_release, _p, _o, _n, _l) #else /* atomic64_try_cmpxchg */ #define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg diff --git a/lib/refcount.c b/lib/refcount.c index f42124c..18b8926 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -59,7 +59,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); - do { + for (;;) { if (!val) return false; @@ -70,8 +70,9 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) if (new < val) new = UINT_MAX; - } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); - + val = atomic_try_cmpxchg_relaxed(&r->refs, val, new, success); + } +success: WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; @@ -116,7 +117,7 @@ bool refcount_inc_not_zero(refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); - do { + for (;;) { new = val + 1; if (!val) @@ -125,8 +126,9 @@ bool refcount_inc_not_zero(refcount_t *r) if (unlikely(!new)) return true; - } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); - + val = atomic_try_cmpxchg_relaxed(&r->refs, val, new, success); + } +success: WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; @@ -175,7 +177,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); - do { + for (;;) { if (unlikely(val == UINT_MAX)) return false; @@ -185,8 +187,9 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r) return false; } - } while (!atomic_try_cmpxchg_release(&r->refs, &val, new)); - + val = atomic_try_cmpxchg_release(&r->refs, val, new, success); + } +success: return !new; } EXPORT_SYMBOL_GPL(refcount_sub_and_test); @@ -244,9 +247,10 @@ EXPORT_SYMBOL_GPL(refcount_dec); */ bool refcount_dec_if_one(refcount_t *r) { - int val = 1; - - return atomic_try_cmpxchg_release(&r->refs, &val, 0); + atomic_try_cmpxchg_release(&r->refs, 1, 0, success); + return false; +success: + return true; } EXPORT_SYMBOL_GPL(refcount_dec_if_one); @@ -265,7 +269,7 @@ bool refcount_dec_not_one(refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); - do { + for (;;) { if (unlikely(val == UINT_MAX)) return true; @@ -278,8 +282,9 @@ bool refcount_dec_not_one(refcount_t *r) return true; } - } while (!atomic_try_cmpxchg_release(&r->refs, &val, new)); - + val = atomic_try_cmpxchg_release(&r->refs, val, new, success); + } +success: return true; } EXPORT_SYMBOL_GPL(refcount_dec_not_one);