Under the assumption that the nop-s added by the base ticket spinlock
enlightenment patch might be considered undesirable (or worse), here
is an optional patch to eliminate these nop-s again. This is done
through extending the memory operands of the inc instructions used for
unlocking ticket locks to the necessary size, using assembler and
linker features.
Signed-off-by: Jan Beulich <[email protected]>
Cc: Jeremy Fitzhardinge <[email protected]>
Cc: KY Srinivasan <[email protected]>
---
arch/x86/Makefile | 3 +
arch/x86/include/asm/alternative-asm.h | 59 +++++++++++++++++++++++++++++++++
arch/x86/include/asm/alternative.h | 5 ++
arch/x86/include/asm/spinlock.h | 7 +--
arch/x86/kernel/symdefs.lds | 1
arch/x86/kernel/vmlinux.lds.S | 2 +
6 files changed, 72 insertions(+), 5 deletions(-)
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/Makefile
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/Makefile
@@ -87,6 +87,9 @@ ifeq ($(CONFIG_KMEMCHECK),y)
KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
endif
+KBUILD_CFLAGS += -Wa,-I$(srctree)/arch/x86/include
+LDFLAGS_MODULE += -T $(srctree)/arch/x86/kernel/symdefs.lds
+
# Stackpointer is addressed different for 32 bit and 64 bit x86
sp-$(CONFIG_X86_32) := esp
sp-$(CONFIG_X86_64) := rsp
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/alternative.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/alternative.h
@@ -6,6 +6,11 @@
#include <linux/stringify.h>
#include <asm/asm.h>
+#if !defined(__ASSEMBLY__) && !defined(__PIC__)
+#include <asm/alternative-asm.h> /* just for tracking the build dependency */
+__asm__(".include \"asm/alternative-asm.h\"");
+#endif
+
/*
* Alternative inline assembly for SMP.
*
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/alternative-asm.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/alternative-asm.h
@@ -1,3 +1,7 @@
+#if 0 /* Hide this from compiler. */
+ .if 0 # Hide assembly source stuff when assembling compiler output.
+#endif
+
#ifdef __ASSEMBLY__
#include <asm/asm.h>
@@ -16,3 +20,58 @@
#endif
#endif /* __ASSEMBLY__ */
+
+#if 0 /* Hide this from compiler. */
+ .else # Code to be used in compiler output:
+
+ .weak _$.zero
+
+ .macro unary opc arg1 arg2 arg3
+ .Lempty=2
+ .irpc c,"\arg2"
+ .Lempty=3
+ .endr
+ .irpc c,"\arg3"
+ .Lempty=0
+ .endr
+ .Lsym=1
+ .Lnum=0
+ .irpc c,"\arg1"
+ .irpc m,"(123456789-0"
+ .ifeqs "\c","\m"
+ .Lsym=0
+ .exitm
+ .endif
+ .Lnum=1
+ .endr
+ .exitm
+ .endr
+ .if .Lempty == 2
+ .if .Lsym
+ \opc \arg1
+ .elseif .Lnum
+ \opc _$.zero+\arg1
+ .else
+ \opc _$.zero\arg1
+ .endif
+ .elseif .Lempty == 3
+ .if .Lsym
+ \opc \arg1,\arg2
+ .elseif .Lnum
+ \opc _$.zero+\arg1,\arg2
+ .else
+ \opc _$.zero\arg1,\arg2
+ .endif
+ .else
+ .if .Lsym
+ \opc \arg1,\arg2,\arg3
+ .elseif .Lnum
+ \opc _$.zero+\arg1,\arg2,\arg3
+ .else
+ \opc _$.zero\arg1,\arg2,\arg3
+ .endif
+ .endif
+ .endm
+
+ .endif
+#endif
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock.h
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock.h
@@ -10,7 +10,6 @@
#ifdef CONFIG_ENLIGHTEN_SPINLOCKS
#include <asm/alternative.h>
-#include <asm/nops.h>
/* Including asm/smp.h here causes a cyclic include dependency. */
#include <asm/percpu.h>
DECLARE_PER_CPU(int, cpu_number);
@@ -156,8 +155,7 @@ static __always_inline void __ticket_spi
#else
unsigned int token;
- alternative_io(UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
- ASM_NOP3,
+ alternative_io(UNLOCK_LOCK_PREFIX "unary incb %[lock]\n\t",
ALTERNATIVE_TICKET_UNLOCK_HEAD
UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
"movzwl %[lock], %[token]\n\t"
@@ -228,8 +226,7 @@ static __always_inline void __ticket_spi
#else
unsigned int token, tmp;
- alternative_io(UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
- ASM_NOP2,
+ alternative_io(UNLOCK_LOCK_PREFIX "unary incw %[lock]\n\t",
ALTERNATIVE_TICKET_UNLOCK_HEAD
UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
"movl %[lock], %[token]\n\t"
--- /dev/null
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/symdefs.lds
@@ -0,0 +1 @@
+_$.zero = 0;
--- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/vmlinux.lds.S
+++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/vmlinux.lds.S
@@ -27,6 +27,8 @@
#include <asm/cache.h>
#include <asm/boot.h>
+#include "symdefs.lds"
+
#undef i386 /* in case the preprocessor is a 32bit one */
OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
On 06/29/2010 07:33 AM, Jan Beulich wrote:
> Under the assumption that the nop-s added by the base ticket spinlock
> enlightenment patch might be considered undesirable (or worse), here
> is an optional patch to eliminate these nop-s again. This is done
> through extending the memory operands of the inc instructions used for
> unlocking ticket locks to the necessary size, using assembler and
> linker features.
>
> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock.h
> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock.h
> @@ -10,7 +10,6 @@
>
> #ifdef CONFIG_ENLIGHTEN_SPINLOCKS
> #include <asm/alternative.h>
> -#include <asm/nops.h>
> /* Including asm/smp.h here causes a cyclic include dependency. */
> #include <asm/percpu.h>
> DECLARE_PER_CPU(int, cpu_number);
> @@ -156,8 +155,7 @@ static __always_inline void __ticket_spi
> #else
> unsigned int token;
>
> - alternative_io(UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
> - ASM_NOP3,
> + alternative_io(UNLOCK_LOCK_PREFIX "unary incb %[lock]\n\t",
> ALTERNATIVE_TICKET_UNLOCK_HEAD
> UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
> "movzwl %[lock], %[token]\n\t"
> @@ -228,8 +226,7 @@ static __always_inline void __ticket_spi
> #else
> unsigned int token, tmp;
>
> - alternative_io(UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
> - ASM_NOP2,
> + alternative_io(UNLOCK_LOCK_PREFIX "unary incw %[lock]\n\t",
> ALTERNATIVE_TICKET_UNLOCK_HEAD
> UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
> "movl %[lock], %[token]\n\t"
If you're stretching (bloating) them anyway, perhaps we should be using
"add" instructions instead, with their better EFLAGS behavior?
-hpa
>>> On 30.06.10 at 03:13, "H. Peter Anvin" <[email protected]> wrote:
> On 06/29/2010 07:33 AM, Jan Beulich wrote:
>> Under the assumption that the nop-s added by the base ticket spinlock
>> enlightenment patch might be considered undesirable (or worse), here
>> is an optional patch to eliminate these nop-s again. This is done
>> through extending the memory operands of the inc instructions used for
>> unlocking ticket locks to the necessary size, using assembler and
>> linker features.
>>
>> --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock.h
>> +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock.h
>> @@ -10,7 +10,6 @@
>>
>> #ifdef CONFIG_ENLIGHTEN_SPINLOCKS
>> #include <asm/alternative.h>
>> -#include <asm/nops.h>
>> /* Including asm/smp.h here causes a cyclic include dependency. */
>> #include <asm/percpu.h>
>> DECLARE_PER_CPU(int, cpu_number);
>> @@ -156,8 +155,7 @@ static __always_inline void __ticket_spi
>> #else
>> unsigned int token;
>>
>> - alternative_io(UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
>> - ASM_NOP3,
>> + alternative_io(UNLOCK_LOCK_PREFIX "unary incb %[lock]\n\t",
>> ALTERNATIVE_TICKET_UNLOCK_HEAD
>> UNLOCK_LOCK_PREFIX "incb %[lock]\n\t"
>> "movzwl %[lock], %[token]\n\t"
>> @@ -228,8 +226,7 @@ static __always_inline void __ticket_spi
>> #else
>> unsigned int token, tmp;
>>
>> - alternative_io(UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
>> - ASM_NOP2,
>> + alternative_io(UNLOCK_LOCK_PREFIX "unary incw %[lock]\n\t",
>> ALTERNATIVE_TICKET_UNLOCK_HEAD
>> UNLOCK_LOCK_PREFIX "incw %[lock]\n\t"
>> "movl %[lock], %[token]\n\t"
>
> If you're stretching (bloating) them anyway, perhaps we should be using
> "add" instructions instead, with their better EFLAGS behavior?
Hmm, yes, that possibility I didn't even consider. Would have
the potential to get away without that admittedly ugly "unary"
assembler macro altogether, though at the price of growing all
instructions rather than just those that have a non-symbolic
and small displacement. Since unlock generally gets inlined, I'm
not certain this additional growth in code size would be
acceptable...
Please let me know, though before submitting an eventual third
version I'd appreciate knowing especially the first two patches
need further changes in order to get accepted.
Thanks, Jan
On 06/30/2010 12:07 AM, Jan Beulich wrote:
>>
>> If you're stretching (bloating) them anyway, perhaps we should be using
>> "add" instructions instead, with their better EFLAGS behavior?
>
> Hmm, yes, that possibility I didn't even consider. Would have
> the potential to get away without that admittedly ugly "unary"
> assembler macro altogether, though at the price of growing all
> instructions rather than just those that have a non-symbolic
> and small displacement. Since unlock generally gets inlined, I'm
> not certain this additional growth in code size would be
> acceptable...
>
> Please let me know, though before submitting an eventual third
> version I'd appreciate knowing especially the first two patches
> need further changes in order to get accepted.
>
Will look at it today, hopefully. The Syslinux 4.00 release has
unfortunately occupied me over the last week-plus.
As far as the "unary" macro is concerned... I have to admit I couldn't
even figure out what it was supposed to do. It could definitely use a
better comment.
-hpa
--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel. I don't speak on their behalf.