Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754832AbcDNJh3 (ORCPT ); Thu, 14 Apr 2016 05:37:29 -0400 Received: from e23smtp07.au.ibm.com ([202.81.31.140]:48524 "EHLO e23smtp07.au.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753427AbcDNJhY (ORCPT ); Thu, 14 Apr 2016 05:37:24 -0400 X-IBM-Helo: d23dlp03.au.ibm.com X-IBM-MailFrom: xinhui@linux.vnet.ibm.com X-IBM-RcptTo: linux-kernel@vger.kernel.org Message-ID: <570F6487.8070706@linux.vnet.ibm.com> Date: Thu, 14 Apr 2016 17:36:07 +0800 From: Pan Xinhui User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.8.0 MIME-Version: 1.0 To: Waiman Long , Peter Zijlstra , Ingo Molnar CC: linux-kernel@vger.kernel.org, Scott J Norton , Douglas Hatch Subject: Re: [PATCH] locking/pvqspinlock: Add lock holder CPU argument to pv_wait() References: <1460581160-46187-1-git-send-email-Waiman.Long@hpe.com> In-Reply-To: <1460581160-46187-1-git-send-email-Waiman.Long@hpe.com> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-TM-AS-MML: disable X-Content-Scanned: Fidelis XPS MAILER x-cbid: 16041409-0025-0000-0000-000004464F7C Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6933 Lines: 185 On 2016年04月14日 04:59, Waiman Long wrote: > Pan Xinhui was asking for a lock holder cpu argument in pv_wait() > to help the porting of pvqspinlock to PPC. The new argument will can > potentially help hypervisor expediate the execution of the critical > section so that the lock holder vCPU can release the lock sooner. > > This patch does just that by storing the previous node vCPU number. > In pv_wait_head_or_lock(), pv_wait() will be called with that vCPU > number as it is likely to be the lock holder. In pv_wait_node(), > -1 will be passed to pv_wait() instead to indicate that it doesn't > know what the current lock holder is. > > This patch introduces negligible overhead to the current pvqspinlock > code. The extra lockcpu argument isn't currently used in x86 > architecture. > > Signed-off-by: Waiman Long > --- > arch/x86/include/asm/paravirt.h | 4 ++-- > arch/x86/include/asm/paravirt_types.h | 2 +- > arch/x86/kernel/kvm.c | 2 +- > arch/x86/xen/spinlock.c | 2 +- > kernel/locking/qspinlock_paravirt.h | 19 +++++++++++++++---- > kernel/locking/qspinlock_stat.h | 8 ++++---- > 6 files changed, 24 insertions(+), 13 deletions(-) > > diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h > index 601f1b8..b89eccf 100644 > --- a/arch/x86/include/asm/paravirt.h > +++ b/arch/x86/include/asm/paravirt.h > @@ -676,9 +676,9 @@ static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) > PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock); > } > > -static __always_inline void pv_wait(u8 *ptr, u8 val) > +static __always_inline void pv_wait(u8 *ptr, u8 val, int lockcpu) > { > - PVOP_VCALL2(pv_lock_ops.wait, ptr, val); > + PVOP_VCALL3(pv_lock_ops.wait, ptr, val, lockcpu); > } > > static __always_inline void pv_kick(int cpu) > diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h > index e8c2326..2fc26c1 100644 > --- a/arch/x86/include/asm/paravirt_types.h > +++ b/arch/x86/include/asm/paravirt_types.h > @@ -312,7 +312,7 @@ struct pv_lock_ops { > void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); > struct paravirt_callee_save queued_spin_unlock; > > - void (*wait)(u8 *ptr, u8 val); > + void (*wait)(u8 *ptr, u8 val, int lockcpu); > void (*kick)(int cpu); > #else /* !CONFIG_QUEUED_SPINLOCKS */ > struct paravirt_callee_save lock_spinning; > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index dc1207e..47ab4e1 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -590,7 +590,7 @@ static void kvm_kick_cpu(int cpu) > > #include > > -static void kvm_wait(u8 *ptr, u8 val) > +static void kvm_wait(u8 *ptr, u8 val, int lockcpu) > { > unsigned long flags; > > diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c > index 9e2ba5c..6f78c41 100644 > --- a/arch/x86/xen/spinlock.c > +++ b/arch/x86/xen/spinlock.c > @@ -33,7 +33,7 @@ static void xen_qlock_kick(int cpu) > /* > * Halt the current CPU & release it back to the host > */ > -static void xen_qlock_wait(u8 *byte, u8 val) > +static void xen_qlock_wait(u8 *byte, u8 val, int lockcpu) > { > int irq = __this_cpu_read(lock_kicker_irq); > > diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h > index 21ede57..4bec98b 100644 > --- a/kernel/locking/qspinlock_paravirt.h > +++ b/kernel/locking/qspinlock_paravirt.h > @@ -51,6 +51,7 @@ struct pv_node { > struct mcs_spinlock __res[3]; > > int cpu; > + int prev_cpu; /* Previous node cpu */ > u8 state; > }; > > @@ -156,8 +157,7 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock) > * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page. > * > * Since we should not be holding locks from NMI context (very rare indeed) the > - * max load factor is 0.75, which is around the point where open addressing > - * breaks down. > + * max load factor is 0.75. > * > */ > struct pv_hash_entry { > @@ -275,6 +275,7 @@ static void pv_init_node(struct mcs_spinlock *node) > > pn->cpu = smp_processor_id(); > pn->state = vcpu_running; > + pn->prev_cpu = -1; > } > > /* > @@ -290,6 +291,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) > int loop; > bool wait_early; > > + pn->prev_cpu = pp->cpu; /* Save previous node vCPU */ > + > /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */ > for (;; waitcnt++) { > for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) { > @@ -317,7 +320,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) > qstat_inc(qstat_pv_wait_node, true); > qstat_inc(qstat_pv_wait_again, waitcnt); > qstat_inc(qstat_pv_wait_early, wait_early); > - pv_wait(&pn->state, vcpu_halted); > + pv_wait(&pn->state, vcpu_halted, -1); If the contention is high, we might run here. And we indeed need the lock holder on such scenario. how about that, we store the lock into pv_node, then search the lock in hashtable, code might look like, node = pv_hash_lookup(pn->lock); pv_wait(...,node->holder); thanks xinhui > } > > /* > @@ -453,7 +456,15 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) > WRITE_ONCE(pn->state, vcpu_halted); > qstat_inc(qstat_pv_wait_head, true); > qstat_inc(qstat_pv_wait_again, waitcnt); > - pv_wait(&l->locked, _Q_SLOW_VAL); > + > + /* > + * Pass in the previous node vCPU nmber which is likely to be > + * the lock holder vCPU. This additional information may help > + * the hypervisor to give more resource to that vCPU so that > + * it can release the lock faster. With lock stealing, > + * however, that vCPU may not be the actual lock holder. > + */ > + pv_wait(&l->locked, _Q_SLOW_VAL, pn->prev_cpu); > > /* > * The unlocker should have freed the lock before kicking the > diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h > index eb2a2c9..8728348 100644 > --- a/kernel/locking/qspinlock_stat.h > +++ b/kernel/locking/qspinlock_stat.h > @@ -266,12 +266,12 @@ static inline void __pv_kick(int cpu) > /* > * Replacement function for pv_wait() > */ > -static inline void __pv_wait(u8 *ptr, u8 val) > +static inline void __pv_wait(u8 *ptr, u8 val, int lockcpu) > { > u64 *pkick_time = this_cpu_ptr(&pv_kick_time); > > *pkick_time = 0; > - pv_wait(ptr, val); > + pv_wait(ptr, val, lockcpu); > if (*pkick_time) { > this_cpu_add(qstats[qstat_pv_latency_wake], > sched_clock() - *pkick_time); > @@ -279,8 +279,8 @@ static inline void __pv_wait(u8 *ptr, u8 val) > } > } > > -#define pv_kick(c) __pv_kick(c) > -#define pv_wait(p, v) __pv_wait(p, v) > +#define pv_kick(c) __pv_kick(c) > +#define pv_wait(p, v, c) __pv_wait(p, v, c) > > #else /* CONFIG_QUEUED_LOCK_STAT */ >