Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755410AbcDNOe7 (ORCPT ); Thu, 14 Apr 2016 10:34:59 -0400 Received: from e28smtp09.in.ibm.com ([125.16.236.9]:50635 "EHLO e28smtp09.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754799AbcDNOez (ORCPT ); Thu, 14 Apr 2016 10:34:55 -0400 X-IBM-Helo: d28relay04.in.ibm.com X-IBM-MailFrom: xinhui@linux.vnet.ibm.com X-IBM-RcptTo: linux-kernel@vger.kernel.org Message-ID: <570FAA72.5090302@linux.vnet.ibm.com> Date: Thu, 14 Apr 2016 22:34:26 +0800 From: Pan Xinhui User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.8.0 MIME-Version: 1.0 To: Waiman Long , Peter Zijlstra , Ingo Molnar CC: linux-kernel@vger.kernel.org, Scott J Norton , Douglas Hatch Subject: Re: [PATCH] locking/pvqspinlock: Add lock holder CPU argument to pv_wait() References: <1460581160-46187-1-git-send-email-Waiman.Long@hpe.com> <570F6487.8070706@linux.vnet.ibm.com> In-Reply-To: <570F6487.8070706@linux.vnet.ibm.com> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-TM-AS-MML: disable x-cbid: 16041414-0041-0000-0000-00000C06B237 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10600 Lines: 277 hello, Waiman I try your patch, thanks! also I do some improvement. below code diff has been tested, it works for me. :) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ce2f75e..99f31e4 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -248,7 +248,8 @@ static __always_inline void set_locked(struct qspinlock *lock) */ static __always_inline void __pv_init_node(struct mcs_spinlock *node) { } -static __always_inline void __pv_wait_node(struct mcs_spinlock *node, +static __always_inline void __pv_wait_node(struct qspinlock *lock, + struct mcs_spinlock *node, struct mcs_spinlock *prev) { } static __always_inline void __pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { } @@ -407,7 +408,7 @@ queue: prev = decode_tail(old); WRITE_ONCE(prev->next, node); - pv_wait_node(node, prev); + pv_wait_node(lock, node, prev); arch_mcs_spin_lock_contended(&node->locked); /* diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 01a6d16..75ccfd3 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -255,6 +257,19 @@ static struct pv_node *pv_unhash(struct qspinlock *lock) BUG(); } +static struct pv_node *pv_hash_lookup(struct qspinlock *lock) +{ + unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits); + struct pv_hash_entry *he; + + for_each_hash_entry(he, offset, hash) { + if (READ_ONCE(he->lock) == lock) { + return he->node; + } + } + return NULL; +} + /* * Return true if when it is time to check the previous node which is not * in a running state. @@ -287,14 +303,17 @@ static void pv_init_node(struct mcs_spinlock *node) * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its * behalf. */ -static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) +static void pv_wait_node(struct qspinlock *lock, struct mcs_spinlock *node, + struct mcs_spinlock *prev) { struct pv_node *pn = (struct pv_node *)node; struct pv_node *pp = (struct pv_node *)prev; + struct pv_node *ph; int waitcnt = 0; int loop; bool wait_early; + pn->prev_cpu = pp->cpu; /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */ for (;; waitcnt++) { for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) { @@ -322,7 +341,10 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) qstat_inc(qstat_pv_wait_node, true); qstat_inc(qstat_pv_wait_again, waitcnt); qstat_inc(qstat_pv_wait_early, wait_early); - pv_wait(&pn->state, vcpu_halted); + ph = pv_hash_lookup(lock); + if (!ph) + ph = pp; + pv_wait(&pn->state, vcpu_halted, ph->prev_cpu); } /* any comments are welcome. I put my patch here just for simplicity, or need I send it out in a new thread? thanks xinhui On 2016年04月14日 17:36, Pan Xinhui wrote: > > > On 2016年04月14日 04:59, Waiman Long wrote: >> Pan Xinhui was asking for a lock holder cpu argument in pv_wait() >> to help the porting of pvqspinlock to PPC. The new argument will can >> potentially help hypervisor expediate the execution of the critical >> section so that the lock holder vCPU can release the lock sooner. >> >> This patch does just that by storing the previous node vCPU number. >> In pv_wait_head_or_lock(), pv_wait() will be called with that vCPU >> number as it is likely to be the lock holder. In pv_wait_node(), >> -1 will be passed to pv_wait() instead to indicate that it doesn't >> know what the current lock holder is. >> >> This patch introduces negligible overhead to the current pvqspinlock >> code. The extra lockcpu argument isn't currently used in x86 >> architecture. >> >> Signed-off-by: Waiman Long >> --- >> arch/x86/include/asm/paravirt.h | 4 ++-- >> arch/x86/include/asm/paravirt_types.h | 2 +- >> arch/x86/kernel/kvm.c | 2 +- >> arch/x86/xen/spinlock.c | 2 +- >> kernel/locking/qspinlock_paravirt.h | 19 +++++++++++++++---- >> kernel/locking/qspinlock_stat.h | 8 ++++---- >> 6 files changed, 24 insertions(+), 13 deletions(-) >> >> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h >> index 601f1b8..b89eccf 100644 >> --- a/arch/x86/include/asm/paravirt.h >> +++ b/arch/x86/include/asm/paravirt.h >> @@ -676,9 +676,9 @@ static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) >> PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock); >> } >> >> -static __always_inline void pv_wait(u8 *ptr, u8 val) >> +static __always_inline void pv_wait(u8 *ptr, u8 val, int lockcpu) >> { >> - PVOP_VCALL2(pv_lock_ops.wait, ptr, val); >> + PVOP_VCALL3(pv_lock_ops.wait, ptr, val, lockcpu); >> } >> >> static __always_inline void pv_kick(int cpu) >> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h >> index e8c2326..2fc26c1 100644 >> --- a/arch/x86/include/asm/paravirt_types.h >> +++ b/arch/x86/include/asm/paravirt_types.h >> @@ -312,7 +312,7 @@ struct pv_lock_ops { >> void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); >> struct paravirt_callee_save queued_spin_unlock; >> >> - void (*wait)(u8 *ptr, u8 val); >> + void (*wait)(u8 *ptr, u8 val, int lockcpu); >> void (*kick)(int cpu); >> #else /* !CONFIG_QUEUED_SPINLOCKS */ >> struct paravirt_callee_save lock_spinning; >> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c >> index dc1207e..47ab4e1 100644 >> --- a/arch/x86/kernel/kvm.c >> +++ b/arch/x86/kernel/kvm.c >> @@ -590,7 +590,7 @@ static void kvm_kick_cpu(int cpu) >> >> #include >> >> -static void kvm_wait(u8 *ptr, u8 val) >> +static void kvm_wait(u8 *ptr, u8 val, int lockcpu) >> { >> unsigned long flags; >> >> diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c >> index 9e2ba5c..6f78c41 100644 >> --- a/arch/x86/xen/spinlock.c >> +++ b/arch/x86/xen/spinlock.c >> @@ -33,7 +33,7 @@ static void xen_qlock_kick(int cpu) >> /* >> * Halt the current CPU & release it back to the host >> */ >> -static void xen_qlock_wait(u8 *byte, u8 val) >> +static void xen_qlock_wait(u8 *byte, u8 val, int lockcpu) >> { >> int irq = __this_cpu_read(lock_kicker_irq); >> >> diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h >> index 21ede57..4bec98b 100644 >> --- a/kernel/locking/qspinlock_paravirt.h >> +++ b/kernel/locking/qspinlock_paravirt.h >> @@ -51,6 +51,7 @@ struct pv_node { >> struct mcs_spinlock __res[3]; >> >> int cpu; >> + int prev_cpu; /* Previous node cpu */ >> u8 state; >> }; >> >> @@ -156,8 +157,7 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock) >> * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page. >> * >> * Since we should not be holding locks from NMI context (very rare indeed) the >> - * max load factor is 0.75, which is around the point where open addressing >> - * breaks down. >> + * max load factor is 0.75. >> * >> */ >> struct pv_hash_entry { >> @@ -275,6 +275,7 @@ static void pv_init_node(struct mcs_spinlock *node) >> >> pn->cpu = smp_processor_id(); >> pn->state = vcpu_running; >> + pn->prev_cpu = -1; >> } >> >> /* >> @@ -290,6 +291,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) >> int loop; >> bool wait_early; >> >> + pn->prev_cpu = pp->cpu; /* Save previous node vCPU */ >> + >> /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */ >> for (;; waitcnt++) { >> for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) { >> @@ -317,7 +320,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) >> qstat_inc(qstat_pv_wait_node, true); >> qstat_inc(qstat_pv_wait_again, waitcnt); >> qstat_inc(qstat_pv_wait_early, wait_early); >> - pv_wait(&pn->state, vcpu_halted); >> + pv_wait(&pn->state, vcpu_halted, -1); > If the contention is high, we might run here. And we indeed need the lock holder on such scenario. > how about that, we store the lock into pv_node, then search the lock in hashtable, > code might look like, > node = pv_hash_lookup(pn->lock); > pv_wait(...,node->holder); > thanks > xinhui >> } >> >> /* >> @@ -453,7 +456,15 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) >> WRITE_ONCE(pn->state, vcpu_halted); >> qstat_inc(qstat_pv_wait_head, true); >> qstat_inc(qstat_pv_wait_again, waitcnt); >> - pv_wait(&l->locked, _Q_SLOW_VAL); >> + >> + /* >> + * Pass in the previous node vCPU nmber which is likely to be >> + * the lock holder vCPU. This additional information may help >> + * the hypervisor to give more resource to that vCPU so that >> + * it can release the lock faster. With lock stealing, >> + * however, that vCPU may not be the actual lock holder. >> + */ >> + pv_wait(&l->locked, _Q_SLOW_VAL, pn->prev_cpu); >> >> /* >> * The unlocker should have freed the lock before kicking the >> diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h >> index eb2a2c9..8728348 100644 >> --- a/kernel/locking/qspinlock_stat.h >> +++ b/kernel/locking/qspinlock_stat.h >> @@ -266,12 +266,12 @@ static inline void __pv_kick(int cpu) >> /* >> * Replacement function for pv_wait() >> */ >> -static inline void __pv_wait(u8 *ptr, u8 val) >> +static inline void __pv_wait(u8 *ptr, u8 val, int lockcpu) >> { >> u64 *pkick_time = this_cpu_ptr(&pv_kick_time); >> >> *pkick_time = 0; >> - pv_wait(ptr, val); >> + pv_wait(ptr, val, lockcpu); >> if (*pkick_time) { >> this_cpu_add(qstats[qstat_pv_latency_wake], >> sched_clock() - *pkick_time); >> @@ -279,8 +279,8 @@ static inline void __pv_wait(u8 *ptr, u8 val) >> } >> } >> >> -#define pv_kick(c) __pv_kick(c) >> -#define pv_wait(p, v) __pv_wait(p, v) >> +#define pv_kick(c) __pv_kick(c) >> +#define pv_wait(p, v, c) __pv_wait(p, v, c) >> >> #else /* CONFIG_QUEUED_LOCK_STAT */ >> >