Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753271AbbHCKEF (ORCPT ); Mon, 3 Aug 2015 06:04:05 -0400 Received: from us01smtprelay-2.synopsys.com ([198.182.47.9]:47132 "EHLO smtprelay.synopsys.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1752513AbbHCKEA (ORCPT ); Mon, 3 Aug 2015 06:04:00 -0400 From: Vineet Gupta To: "Peter Zijlstra (Intel)" CC: lkml , , Vineet Gupta Subject: [PATCH 5/6] ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff Date: Mon, 3 Aug 2015 15:33:07 +0530 Message-ID: <1438596188-10875-6-git-send-email-vgupta@synopsys.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1438596188-10875-1-git-send-email-vgupta@synopsys.com> References: <1438596188-10875-1-git-send-email-vgupta@synopsys.com> MIME-Version: 1.0 Content-Type: text/plain X-Originating-IP: [10.12.197.191] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 12200 Lines: 459 This is to workaround the hardware livelock when multiple cores are trying to SCOND to same location (see code comments for details) Cc: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 5 + arch/arc/include/asm/atomic.h | 73 ++++++++++ arch/arc/include/asm/spinlock.h | 292 ++++++++++++++++++++++++++++++++++++++++ arch/arc/kernel/setup.c | 4 + 4 files changed, 374 insertions(+) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index a5fccdfbfc8f..bd4670d1b89b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -365,6 +365,11 @@ config ARC_HAS_LLSC default y depends on !ARC_CANT_LLSC +config ARC_STAR_9000923308 + bool "Workaround for llock/scond livelock" + default y + depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC + config ARC_HAS_SWAPE bool "Insn: SWAPE (endian-swap)" default y diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 3dd36c1efee1..f59d2cfc00fc 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -23,6 +23,8 @@ #define atomic_set(v, i) (((v)->counter) = (i)) +#ifndef CONFIG_ARC_STAR_9000923308 + #define ATOMIC_OP(op, c_op, asm_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ @@ -65,6 +67,74 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return val; \ } +#else /* CONFIG_ARC_STAR_9000923308 */ + +#define SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int delay = 1, tmp; \ + +#define SCOND_FAIL_RETRY_ASM \ + " bz 4f \n" \ + " ; --- scond fail delay --- \n" \ + " mov %[tmp], %[delay] \n" /* tmp = delay */ \ + "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ + " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ + " asl %[delay], %[delay], 1 \n" /* delay *= 2 */ \ + " b 1b \n" /* start over */ \ + "4: ; --- success --- \n" \ + +#define SCOND_FAIL_RETRY_VARS \ + ,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \ + +#define ATOMIC_OP(op, c_op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned int val, delay = 1, tmp; \ + \ + __asm__ __volatile__( \ + "1: llock %[val], [%[ctr]] \n" \ + " " #asm_op " %[val], %[val], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " \n" \ + SCOND_FAIL_RETRY_ASM \ + \ + : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ + SCOND_FAIL_RETRY_VARS \ + : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ + [i] "ir" (i) \ + : "cc"); \ +} \ + +#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned int val, delay = 1, tmp; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: llock %[val], [%[ctr]] \n" \ + " " #asm_op " %[val], %[val], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " \n" \ + SCOND_FAIL_RETRY_ASM \ + \ + : [val] "=&r" (val) \ + SCOND_FAIL_RETRY_VARS \ + : [ctr] "r" (&v->counter), \ + [i] "ir" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return val; \ +} + +#endif /* CONFIG_ARC_STAR_9000923308 */ + #else /* !CONFIG_ARC_HAS_LLSC */ #ifndef CONFIG_SMP @@ -142,6 +212,9 @@ ATOMIC_OP(and, &=, and) #undef ATOMIC_OPS #undef ATOMIC_OP_RETURN #undef ATOMIC_OP +#undef SCOND_FAIL_RETRY_VAR_DEF +#undef SCOND_FAIL_RETRY_ASM +#undef SCOND_FAIL_RETRY_VARS /** * __atomic_add_unless - add unless the number is a given value diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 9a7a7293f127..297c54484013 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -20,6 +20,11 @@ #ifdef CONFIG_ARC_HAS_LLSC +/* + * A normal LLOCK/SCOND based system, w/o need for livelock workaround + */ +#ifndef CONFIG_ARC_STAR_9000923308 + static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int val; @@ -246,6 +251,293 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) smp_mb(); } +#else /* CONFIG_ARC_STAR_9000923308 */ + +/* + * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping + * coherency transactions in the SCU. The exclusive line state keeps rotating + * among contenting cores leading to a never ending cycle. So break the cycle + * by deferring the retry of failed exclusive access (SCOND). The actual delay + * needed is function of number of contending cores as well as the unrelated + * coherency traffic from other cores. To keep the code simple, start off with + * small delay of 1 which would suffice most cases and in case of contention + * double the delay. Eventually the delay is sufficient such that the coherency + * pipeline is drained, thus a subsequent exclusive access would succeed. + */ + +#define SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int delay, tmp; \ + +#define SCOND_FAIL_RETRY_ASM \ + " ; --- scond fail delay --- \n" \ + " mov %[tmp], %[delay] \n" /* tmp = delay */ \ + "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ + " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ + " asl %[delay], %[delay], 1 \n" /* delay *= 2 */ \ + " b 1b \n" /* start over */ \ + " \n" \ + "4: ; --- done --- \n" \ + +#define SCOND_FAIL_RETRY_VARS \ + ,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \ + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + unsigned int val; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[slock]] \n" + " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ + " scond %[LOCKED], [%[slock]] \n" /* acquire */ + " bz 4f \n" /* done */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val) + SCOND_FAIL_RETRY_VARS + : [slock] "r" (&(lock->slock)), + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + unsigned int val, got_it = 0; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[slock]] \n" + " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ + " scond %[LOCKED], [%[slock]] \n" /* acquire */ + " bz.d 4f \n" + " mov.z %[got_it], 1 \n" /* got it */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + SCOND_FAIL_RETRY_VARS + : [slock] "r" (&(lock->slock)), + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + smp_mb(); + + lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; + + smp_mb(); +} + +/* + * Read-write spinlocks, allowing multiple readers but only one writer. + * Unfair locking as Writers could be starved indefinitely by Reader(s) + */ + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + unsigned int val; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + /* + * zero means writer holds the lock exclusively, deny Reader. + * Otherwise grant lock to first/subseq reader + * + * if (rw->counter > 0) { + * rw->counter--; + * ret = 1; + * } + */ + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */ + " sub %[val], %[val], 1 \n" /* reader lock */ + " scond %[val], [%[rwlock]] \n" + " bz 4f \n" /* done */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + unsigned int val, got_it = 0; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */ + " sub %[val], %[val], 1 \n" /* counter-- */ + " scond %[val], [%[rwlock]] \n" + " bz.d 4f \n" + " mov.z %[got_it], 1 \n" /* got it */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + unsigned int val; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + /* + * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__), + * deny writer. Otherwise if unlocked grant to writer + * Hence the claim that Linux rwlocks are unfair to writers. + * (can be starved for an indefinite time by readers). + * + * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) { + * rw->counter = 0; + * ret = 1; + * } + */ + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */ + " mov %[val], %[WR_LOCKED] \n" + " scond %[val], [%[rwlock]] \n" + " bz 4f \n" + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + unsigned int val, got_it = 0; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */ + " mov %[val], %[WR_LOCKED] \n" + " scond %[val], [%[rwlock]] \n" + " bz.d 4f \n" + " mov.z %[got_it], 1 \n" /* got it */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * rw->counter++; + */ + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " add %[val], %[val], 1 \n" + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)) + : "memory", "cc"); + + smp_mb(); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * rw->counter = __ARCH_RW_LOCK_UNLOCKED__; + */ + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " scond %[UNLOCKED], [%[rwlock]]\n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__) + : "memory", "cc"); + + smp_mb(); +} + +#undef SCOND_FAIL_RETRY_VAR_DEF +#undef SCOND_FAIL_RETRY_ASM +#undef SCOND_FAIL_RETRY_VARS + +#endif /* CONFIG_ARC_STAR_9000923308 */ + #else /* !CONFIG_ARC_HAS_LLSC */ static inline void arch_spin_lock(arch_spinlock_t *lock) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 18cc01591c96..1f3d9b3160fa 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -330,6 +330,10 @@ static void arc_chk_core_config(void) pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n"); else if (!cpu->extn.fpu_dp && fpu_enabled) panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n"); + + if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic && + !IS_ENABLED(CONFIG_ARC_STAR_9000923308)) + panic("llock/scond livelock workaround missing\n"); } /* -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/