Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752064AbbGKUhg (ORCPT ); Sat, 11 Jul 2015 16:37:36 -0400 Received: from g4t3425.houston.hp.com ([15.201.208.53]:53567 "EHLO g4t3425.houston.hp.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751813AbbGKUh1 (ORCPT ); Sat, 11 Jul 2015 16:37:27 -0400 From: Waiman Long To: Peter Zijlstra , Ingo Molnar , Thomas Gleixner , "H. Peter Anvin" Cc: x86@kernel.org, linux-kernel@vger.kernel.org, Scott J Norton , Douglas Hatch , Waiman Long Subject: [PATCH 7/7] locking/qspinlock: Collect queued unfair lock slowpath statistics Date: Sat, 11 Jul 2015 16:36:58 -0400 Message-Id: <1436647018-49734-8-git-send-email-Waiman.Long@hp.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1436647018-49734-1-git-send-email-Waiman.Long@hp.com> References: <1436647018-49734-1-git-send-email-Waiman.Long@hp.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5461 Lines: 177 This patch enables the accumulation of unfair qspinlock statistics when the CONFIG_QUEUED_LOCK_STAT configuration parameter is set. The accumulated lock statistics will be reported in debugfs under the unfair-qspinlock directory. On a KVM guest with 32 vCPUs, the statistics counts after bootup were: lsteal_cnts = 172219 2377 425 118 33 8 5 12 14 0 0 0 trylock_cnt = 1495372 So most of the lock stealing happened in the initial trylock before entering the queue. Once a vCPU is in the queue, the chance of getting the lock drop off significantly the further it is away from queue head. Signed-off-by: Waiman Long --- arch/x86/Kconfig | 7 ++- kernel/locking/qspinlock.c | 2 +- kernel/locking/qspinlock_unfair.h | 89 +++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 299a1c4..aee6236 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -680,11 +680,12 @@ config PARAVIRT_SPINLOCKS If you are unsure how to answer this question, answer Y. config QUEUED_LOCK_STAT - bool "Paravirt queued lock statistics" - depends on PARAVIRT && DEBUG_FS && QUEUED_SPINLOCKS + bool "Paravirt/Unfair queued lock statistics" + depends on DEBUG_FS && QUEUED_SPINLOCKS ---help--- Enable the collection of statistical data on the behavior of - paravirtualized queued spinlocks and report them on debugfs. + paravirtualized and unfair queued spinlocks and report them + on debugfs. source "arch/x86/xen/Kconfig" diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 65dead9..12e2e89 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -538,7 +538,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath); #ifdef queued_spin_trylock #undef queued_spin_trylock #endif -#define queued_spin_trylock queued_spin_trylock_unfair +#define queued_spin_trylock __queued_spin_trylock_unfair /* * The unfair lock code is used internally and so don't need to be exported diff --git a/kernel/locking/qspinlock_unfair.h b/kernel/locking/qspinlock_unfair.h index 0e8a40f..fc94578 100644 --- a/kernel/locking/qspinlock_unfair.h +++ b/kernel/locking/qspinlock_unfair.h @@ -44,6 +44,93 @@ struct uf_node { u32 prev_tail; /* Previous node tail code */ }; +#ifdef CONFIG_QUEUED_LOCK_STAT + +#include + +/* + * Unfair qspinlock statistics + * + * All spinning CPUs are grouped into buckets depending on the most + * significant bit in their lock stealing period. The first entry in + * the array is for the queue head. + */ +#define NR_LPERIOD_CNTS (LPERIOD_THRESHOLD_SHIFT - LPERIOD_MIN_SHIFT + 6) +static atomic_t lsteal_cnts[NR_LPERIOD_CNTS]; + +/* + * # of successful trylocks at beginning of slowpath + */ +static atomic_t trylock_cnt; + +/* + * Counts reset flag + */ +static bool reset_cnts __read_mostly; + +/* + * Initialize debugfs for the unfair qspinlock statistics + */ +static int __init unfair_qspinlock_debugfs(void) +{ + struct dentry *d_ufqlock = debugfs_create_dir("unfair-qspinlock", NULL); + + if (!d_ufqlock) + printk(KERN_WARNING + "Could not create 'unfair-qspinlock' debugfs directory\n"); + + debugfs_create_u32_array("lsteal_cnts", 0444, d_ufqlock, + (u32 *)lsteal_cnts, NR_LPERIOD_CNTS); + debugfs_create_u32("trylock_cnt", 0444, d_ufqlock, (u32 *)&trylock_cnt); + debugfs_create_bool("reset_cnts", 0644, d_ufqlock, (u32 *)&reset_cnts); + return 0; +} +fs_initcall(unfair_qspinlock_debugfs); + +/* + * Reset all the statistics counts + */ +static noinline void reset_counts(void) +{ + int idx; + + reset_cnts = false; + atomic_set(&trylock_cnt, 0); + for (idx = 0 ; idx < NR_LPERIOD_CNTS; idx++) + atomic_set(&lsteal_cnts[idx], 0); +} + +/* + * Increment the unfair qspinlock statistic count + */ +static inline void ustat_inc(struct uf_node *pn) +{ + /* + * fls() returns the most significant 1 bit position + 1 + */ + int idx = fls(pn->lsteal_period) - LPERIOD_MIN_SHIFT; + + if (idx >= NR_LPERIOD_CNTS) + idx = NR_LPERIOD_CNTS - 1; + atomic_inc(&lsteal_cnts[idx]); + if (unlikely(reset_cnts)) + reset_counts(); +} + +static inline bool __queued_spin_trylock_unfair(struct qspinlock *lock) +{ + bool ret = queued_spin_trylock_unfair(lock); + + if (ret) + atomic_inc(&trylock_cnt); + return ret; +} + +#else /* CONFIG_QUEUED_LOCK_STAT */ +static inline void ustat_inc(struct uf_node *pn) { } +#define __queued_spin_trylock_unfair queued_spin_trylock_unfair +#endif /* CONFIG_QUEUED_LOCK_STAT */ + /** * cmpxchg_tail - Put in the new tail code if it matches the old one * @lock : Pointer to queue spinlock structure @@ -125,6 +212,7 @@ static inline bool unfair_wait_node(struct qspinlock *lock, if (queued_spin_trylock_unfair(lock)) break; /* Got the lock */ } + ustat_inc(pn); /* * Have stolen the lock, need to remove itself from the wait queue. @@ -220,6 +308,7 @@ unfair_wait_head(struct qspinlock *lock, struct mcs_spinlock *node, u32 tail) pn->lsteal_period = LPERIOD_QHEAD; while (!queued_spin_trylock_unfair(lock)) cpu_relax(); + ustat_inc(pn); /* * Remove tail code in the lock if it is the only one in the queue -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/