Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752474AbbGOCN7 (ORCPT ); Tue, 14 Jul 2015 22:13:59 -0400 Received: from g4t3426.houston.hp.com ([15.201.208.54]:37487 "EHLO g4t3426.houston.hp.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752193AbbGOCN5 (ORCPT ); Tue, 14 Jul 2015 22:13:57 -0400 From: Waiman Long To: Peter Zijlstra , Ingo Molnar , Thomas Gleixner , "H. Peter Anvin" Cc: x86@kernel.org, linux-kernel@vger.kernel.org, Scott J Norton , Douglas Hatch , Davidlohr Bueso , Waiman Long Subject: [PATCH v2 2/6] locking/pvqspinlock: Add pending bit support Date: Tue, 14 Jul 2015 22:13:33 -0400 Message-Id: <1436926417-20256-3-git-send-email-Waiman.Long@hp.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1436926417-20256-1-git-send-email-Waiman.Long@hp.com> References: <1436926417-20256-1-git-send-email-Waiman.Long@hp.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5384 Lines: 191 Like the native qspinlock, using the pending bit when it is lightly loaded to acquire the lock is faster than going through the PV queuing process which is even slower than the native queuing process. It also avoids loading two additional cachelines (the MCS and PV nodes). This patch adds the pending bit support for PV qspinlock. The pending bit code has a smaller spin threshold (1<<10). It will default back to the queuing method if it cannot acquired the lock within a certain time limit. On a VM with 32 vCPUs on a 32-core Westmere-EX box, the kernel build times on 4.2-rc1 based kernels were: Kernel Build Time Sys Time ------ ---------- -------- w/o patch 3m28.5s 28m17.5s with patch 3m19.3s 23m55.7s Using a locking microbenchmark on the same system, the locking rates in (kops/s) were: Threads Rate w/o patch Rate with patch ------- -------------- --------------- 2 (same socket) 6,515,265 7,077,476 2 (diff sockets) 2,967,145 4,353,851 Signed-off-by: Waiman Long --- kernel/locking/qspinlock.c | 27 ++++++++++++++- kernel/locking/qspinlock_paravirt.h | 66 +++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 38c4920..6518ee9 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -162,6 +162,17 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL); } +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + struct __qspinlock *l = (void *)lock; + + WRITE_ONCE(l->pending, 0); +} + /* * xchg_tail - Put in the new queue tail code word & retrieve previous one * @lock : Pointer to queued spinlock structure @@ -193,6 +204,15 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) } /** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + atomic_add(-_Q_PENDING_VAL, &lock->val); +} + +/** * xchg_tail - Put in the new queue tail code word & retrieve previous one * @lock : Pointer to queued spinlock structure * @tail : The new queue tail code word @@ -245,6 +265,7 @@ static __always_inline void __pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) { } #define pv_enabled() false +#define pv_pending_lock(l, v) false #define pv_init_node __pv_init_node #define pv_wait_node __pv_wait_node @@ -286,8 +307,11 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); - if (pv_enabled()) + if (pv_enabled()) { + if (pv_pending_lock(lock, val)) + return; /* Got the lock via pending bit */ goto queue; + } if (virt_queued_spin_lock(lock)) return; @@ -463,6 +487,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath); #undef pv_wait_node #undef pv_kick_node #undef pv_wait_head +#undef pv_pending_lock #undef queued_spin_lock_slowpath #define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index f2f4807..c43dec7 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -21,6 +21,14 @@ #define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET) +/* + * Queued Spinlock Spin Threshold + * + * The vCPU will spin a relatively short time in pending mode before falling + * back to queuing. + */ +#define PENDING_SPIN_THRESHOLD (SPIN_THRESHOLD >> 5) + enum vcpu_state { vcpu_running = 0, vcpu_halted, @@ -151,6 +159,64 @@ static void pv_init_node(struct mcs_spinlock *node) } /* + * Try to acquire the lock and wait using the pending bit + */ +static int pv_pending_lock(struct qspinlock *lock, u32 val) +{ + int loop = PENDING_SPIN_THRESHOLD; + u32 new, old; + + /* + * wait for in-progress pending->locked hand-overs + */ + if (val == _Q_PENDING_VAL) { + while (((val = atomic_read(&lock->val)) == _Q_PENDING_VAL) && + loop--) + cpu_relax(); + } + + /* + * trylock || pending + */ + for (;;) { + if (val & ~_Q_LOCKED_MASK) + goto queue; + new = _Q_LOCKED_VAL; + if (val == new) + new |= _Q_PENDING_VAL; + old = atomic_cmpxchg(&lock->val, val, new); + if (old == val) + break; + if (loop-- <= 0) + goto queue; + } + + if (new == _Q_LOCKED_VAL) + goto gotlock; + /* + * We are pending, wait for the owner to go away. + */ + while (((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK) + && (loop-- > 0)) + cpu_relax(); + + if (!(val & _Q_LOCKED_MASK)) { + clear_pending_set_locked(lock); + goto gotlock; + } + /* + * Clear the pending bit and fall back to queuing + */ + clear_pending(lock); + +queue: + return 0; + +gotlock: + return 1; +} + +/* * Wait for node->locked to become true, halt the vcpu after a short spin. * pv_kick_node() is used to wake the vcpu again. */ -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/