This optional patch improves yielding behavior in that the acquire
function now checks whether the vCPU owning the lock is actually
running, yielding immediately if it isn't.
The (only) additional overhead this introduces for native execution is
the writing of the owning CPU in the lock acquire paths. If this is
considered a problem but the patch otherwise is deemed useful, even
that code could be eliminated for native execution (by further
alternative instruction patching).
Signed-off-by: Jan Beulich <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: KY Srinivasan <[email protected]>
---
arch/x86/include/asm/spinlock.h | 13 +++++++++++++
arch/x86/include/asm/spinlock_types.h | 5 +++++
arch/x86/kernel/cpu/xen.c | 5 ++++-
3 files changed, 22 insertions(+), 1 deletion(-)
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock.h
@@ -85,6 +85,15 @@ extern void virt_spin_unlock_stub(void);
# define UNLOCK_LOCK_PREFIX
#endif
+static __always_inline void __ticket_spin_set_owner(arch_spinlock_t *lock,
+ int owned)
+{
+#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
+ if (owned)
+ lock->owner = percpu_read(cpu_number);
+#endif
+}
+
/*
* Ticket locks are conceptually two parts, one indicating the current head of
* the queue, and the other indicating the current tail. The lock is acquired
@@ -124,6 +133,7 @@ static __always_inline void __ticket_spi
ASM_OUTPUT2("+Q" (inc), "+m" (lock->slock)),
[stub] "i" (virt_spin_lock_stub)
: "memory", "cc");
+ __ticket_spin_set_owner(lock, true);
}
static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
@@ -141,6 +151,7 @@ static __always_inline int __ticket_spin
: "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
:
: "memory", "cc");
+ __ticket_spin_set_owner(lock, tmp);
return tmp;
}
@@ -192,6 +203,7 @@ static __always_inline void __ticket_spi
ASM_OUTPUT2("+r" (inc), "+m" (lock->slock), "=&r" (tmp)),
[stub] "i" (virt_spin_lock_stub)
: "memory", "cc");
+ __ticket_spin_set_owner(lock, true);
}
static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
@@ -212,6 +224,7 @@ static __always_inline int __ticket_spin
: "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
:
: "memory", "cc");
+ __ticket_spin_set_owner(lock, tmp);
return tmp;
}
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock_types.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock_types.h
@@ -17,6 +17,11 @@ typedef struct arch_spinlock {
# else
u16 cur, seq;
# endif
+# if CONFIG_NR_CPUS <= 256
+ u8 owner;
+# else
+ u16 owner;
+# endif
};
#endif
};
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/cpu/xen.c
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/xen.c
@@ -79,7 +79,8 @@ static void xen_spin_lock(struct arch_sp
for (count = spin_count; ({ barrier(); lock->cur != token; }); )
if (likely(cpu_online(raw_smp_processor_id()))
- && unlikely(!--count)) {
+ && (per_cpu(runstate.state, lock->owner) != RUNSTATE_running
+ || unlikely(!--count))) {
struct sched_poll sched_poll;
set_xen_guest_handle(sched_poll.ports,
@@ -91,6 +92,8 @@ static void xen_spin_lock(struct arch_sp
} else
cpu_relax();
+ lock->owner = raw_smp_processor_id();
+
/*
* If we interrupted another spinlock while it was blocking, make
* sure it doesn't block (again) without re-checking the lock.
On Tue, 2010-06-29 at 15:35 +0100, Jan Beulich wrote:
>
> The (only) additional overhead this introduces for native execution is
> the writing of the owning CPU in the lock acquire paths.
Uhm, and growing the size of spinlock_t to 6 (or 8 bytes when aligned)
bytes when NR_CPUS>256.
>>> On 30.06.10 at 10:11, Peter Zijlstra <[email protected]> wrote:
> On Tue, 2010-06-29 at 15:35 +0100, Jan Beulich wrote:
>>
>> The (only) additional overhead this introduces for native execution is
>> the writing of the owning CPU in the lock acquire paths.
>
> Uhm, and growing the size of spinlock_t to 6 (or 8 bytes when aligned)
> bytes when NR_CPUS>256.
Indeed, I should have mentioned that. Will do so in an eventual
next version.
Jan
On Wed, 2010-06-30 at 09:49 +0100, Jan Beulich wrote:
> >>> On 30.06.10 at 10:11, Peter Zijlstra <[email protected]> wrote:
> > On Tue, 2010-06-29 at 15:35 +0100, Jan Beulich wrote:
> >>
> >> The (only) additional overhead this introduces for native execution is
> >> the writing of the owning CPU in the lock acquire paths.
> >
> > Uhm, and growing the size of spinlock_t to 6 (or 8 bytes when aligned)
> > bytes when NR_CPUS>256.
>
> Indeed, I should have mentioned that. Will do so in an eventual
> next version.
It would be good to also get a measure of data structure bloat caused by
this, not sure .data section size is representable there, but its
something easy to provide.
Something like: pahole -s build/vmlinux | awk '{t+=$2} END {print t}'
from before and after might also be interesting.
On 06/29/2010 04:35 PM, Jan Beulich wrote:
> This optional patch improves yielding behavior in that the acquire
> function now checks whether the vCPU owning the lock is actually
> running, yielding immediately if it isn't.
>
> The (only) additional overhead this introduces for native execution is
> the writing of the owning CPU in the lock acquire paths. If this is
> considered a problem but the patch otherwise is deemed useful, even
> that code could be eliminated for native execution (by further
> alternative instruction patching).
>
> Signed-off-by: Jan Beulich <[email protected]>
> Cc: Jeremy Fitzhardinge <[email protected]>
> Cc: KY Srinivasan <[email protected]>
>
> ---
> arch/x86/include/asm/spinlock.h | 13 +++++++++++++
> arch/x86/include/asm/spinlock_types.h | 5 +++++
> arch/x86/kernel/cpu/xen.c | 5 ++++-
> 3 files changed, 22 insertions(+), 1 deletion(-)
>
> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock.h
> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock.h
> @@ -85,6 +85,15 @@ extern void virt_spin_unlock_stub(void);
> # define UNLOCK_LOCK_PREFIX
> #endif
>
> +static __always_inline void __ticket_spin_set_owner(arch_spinlock_t *lock,
> + int owned)
> +{
> +#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
> + if (owned)
> + lock->owner = percpu_read(cpu_number);
>
Why not smp_processor_id()? Is this different in some way?
> +#endif
> +}
> +
> /*
> * Ticket locks are conceptually two parts, one indicating the current head of
> * the queue, and the other indicating the current tail. The lock is acquired
> @@ -124,6 +133,7 @@ static __always_inline void __ticket_spi
> ASM_OUTPUT2("+Q" (inc), "+m" (lock->slock)),
> [stub] "i" (virt_spin_lock_stub)
> : "memory", "cc");
> + __ticket_spin_set_owner(lock, true);
> }
>
> static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
> @@ -141,6 +151,7 @@ static __always_inline int __ticket_spin
> : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
> :
> : "memory", "cc");
> + __ticket_spin_set_owner(lock, tmp);
>
> return tmp;
> }
> @@ -192,6 +203,7 @@ static __always_inline void __ticket_spi
> ASM_OUTPUT2("+r" (inc), "+m" (lock->slock), "=&r" (tmp)),
> [stub] "i" (virt_spin_lock_stub)
> : "memory", "cc");
> + __ticket_spin_set_owner(lock, true);
> }
>
> static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
> @@ -212,6 +224,7 @@ static __always_inline int __ticket_spin
> : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
> :
> : "memory", "cc");
> + __ticket_spin_set_owner(lock, tmp);
>
> return tmp;
> }
> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock_types.h
> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock_types.h
> @@ -17,6 +17,11 @@ typedef struct arch_spinlock {
> # else
> u16 cur, seq;
> # endif
> +# if CONFIG_NR_CPUS <= 256
> + u8 owner;
> +# else
> + u16 owner;
> +# endif
> };
> #endif
> };
> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/cpu/xen.c
> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/xen.c
> @@ -79,7 +79,8 @@ static void xen_spin_lock(struct arch_sp
>
> for (count = spin_count; ({ barrier(); lock->cur != token; }); )
> if (likely(cpu_online(raw_smp_processor_id()))
> - && unlikely(!--count)) {
> + && (per_cpu(runstate.state, lock->owner) != RUNSTATE_running
> + || unlikely(!--count))) {
> struct sched_poll sched_poll;
>
> set_xen_guest_handle(sched_poll.ports,
> @@ -91,6 +92,8 @@ static void xen_spin_lock(struct arch_sp
> } else
> cpu_relax();
>
> + lock->owner = raw_smp_processor_id();
> +
> /*
> * If we interrupted another spinlock while it was blocking, make
> * sure it doesn't block (again) without re-checking the lock.
>
>
>
>
On 06/30/2010 10:49 AM, Jan Beulich wrote:
>>>> On 30.06.10 at 10:11, Peter Zijlstra <[email protected]> wrote:
>>>>
>> On Tue, 2010-06-29 at 15:35 +0100, Jan Beulich wrote:
>>
>>> The (only) additional overhead this introduces for native execution is
>>> the writing of the owning CPU in the lock acquire paths.
>>>
>> Uhm, and growing the size of spinlock_t to 6 (or 8 bytes when aligned)
>> bytes when NR_CPUS>256.
>>
> Indeed, I should have mentioned that. Will do so in an eventual
> next version.
>
Rather than increasing the lock size, why not just disable the
enlightenment if the number of (possible) cpus is > 256? I don't think
a VM will ever have that many cpus, so it will only apply in the case of
booting the kernel on large physical machine.
J
>>> On 30.06.10 at 12:08, Jeremy Fitzhardinge <[email protected]> wrote:
> On 06/29/2010 04:35 PM, Jan Beulich wrote:
>> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock.h
>> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock.h
>> @@ -85,6 +85,15 @@ extern void virt_spin_unlock_stub(void);
>> # define UNLOCK_LOCK_PREFIX
>> #endif
>>
>> +static __always_inline void __ticket_spin_set_owner(arch_spinlock_t *lock,
>> + int owned)
>> +{
>> +#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
>> + if (owned)
>> + lock->owner = percpu_read(cpu_number);
>>
>
> Why not smp_processor_id()? Is this different in some way?
Including the respective header here just doesn't work due to
resulting cyclic dependencies.
Jan
>>> On 30.06.10 at 12:10, Jeremy Fitzhardinge <[email protected]> wrote:
> On 06/30/2010 10:49 AM, Jan Beulich wrote:
>>>>> On 30.06.10 at 10:11, Peter Zijlstra <[email protected]> wrote:
>>>>>
>>> On Tue, 2010-06-29 at 15:35 +0100, Jan Beulich wrote:
>>>
>>>> The (only) additional overhead this introduces for native execution is
>>>> the writing of the owning CPU in the lock acquire paths.
>>>>
>>> Uhm, and growing the size of spinlock_t to 6 (or 8 bytes when aligned)
>>> bytes when NR_CPUS>256.
>>>
>> Indeed, I should have mentioned that. Will do so in an eventual
>> next version.
>>
>
> Rather than increasing the lock size, why not just disable the
> enlightenment if the number of (possible) cpus is > 256? I don't think
> a VM will ever have that many cpus, so it will only apply in the case of
> booting the kernel on large physical machine.
While that would be an option, I think the decision to go either
way should really be left to the user (after all that's why there
is a config option in the first place).
Jan