2010-06-10 00:11:07

by H. Peter Anvin

[permalink] [raw]
Subject: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index

Commit-ID: f88731e3068f9d1392ba71cc9f50f035d26a0d4f
Gitweb: http://git.kernel.org/tip/f88731e3068f9d1392ba71cc9f50f035d26a0d4f
Author: H. Peter Anvin <[email protected]>
AuthorDate: Wed, 9 Jun 2010 16:03:09 -0700
Committer: H. Peter Anvin <[email protected]>
CommitDate: Wed, 9 Jun 2010 16:03:09 -0700

x86, alternatives: Use 16-bit numbers for cpufeature index

We already have cpufeature indicies above 255, so use a 16-bit number
for the alternatives index. This consumes a padding field and so
doesn't add any size, but it means that abusing the padding field to
create assembly errors on overflow no longer works. We can retain the
test simply by redirecting it to the .discard section, however.

Signed-off-by: H. Peter Anvin <[email protected]>
LKML-Reference: <tip-*@git.kernel.org>
---
arch/x86/include/asm/alternative.h | 7 ++++---
arch/x86/include/asm/cpufeature.h | 10 ++++++----
2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 03b6bb5..bc6abb7 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -45,10 +45,9 @@
struct alt_instr {
u8 *instr; /* original instruction */
u8 *replacement;
- u8 cpuid; /* cpuid bit set for replacement */
+ u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction, <= instrlen */
- u8 pad1;
#ifdef CONFIG_X86_64
u32 pad2;
#endif
@@ -86,9 +85,11 @@ static inline int alternatives_text_reserved(void *start, void *end)
_ASM_ALIGN "\n" \
_ASM_PTR "661b\n" /* label */ \
_ASM_PTR "663f\n" /* new instruction */ \
- " .byte " __stringify(feature) "\n" /* feature bit */ \
+ " .word " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .discard,\"aw\",@progbits\n" \
" .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \
".previous\n" \
".section .altinstr_replacement, \"ax\"\n" \
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 4681459..66878c5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -300,11 +300,11 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)
_ASM_ALIGN "\n"
_ASM_PTR "1b\n"
_ASM_PTR "0\n" /* no replacement */
- " .byte %P0\n" /* feature bit */
+ " .word %P0\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
- " .byte 0xff + 0 - (2b-1b)\n" /* padding */
".previous\n"
+ /* skipping size check since replacement size = 0 */
: : "i" (bit) : : t_no);
return true;
t_no:
@@ -318,10 +318,12 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)
_ASM_ALIGN "\n"
_ASM_PTR "1b\n"
_ASM_PTR "3f\n"
- " .byte %P1\n" /* feature bit */
+ " .word %P1\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
- " .byte 0xff + (4f-3f) - (2b-1b)\n" /* padding */
+ ".previous\n"
+ ".section .discard,\"aw\",@progbits\n"
+ " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
"3: movb $1,%0\n"


2010-06-11 18:24:59

by H. Peter Anvin

[permalink] [raw]
Subject: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index

Commit-ID: 884c4f15783495e42a628b7f31d2dff60ec9cd17
Gitweb: http://git.kernel.org/tip/884c4f15783495e42a628b7f31d2dff60ec9cd17
Author: H. Peter Anvin <[email protected]>
AuthorDate: Wed, 9 Jun 2010 16:03:09 -0700
Committer: H. Peter Anvin <[email protected]>
CommitDate: Fri, 11 Jun 2010 11:22:39 -0700

x86, alternatives: Use 16-bit numbers for cpufeature index

We already have cpufeature indicies above 255, so use a 16-bit number
for the alternatives index. This consumes a padding field and so
doesn't add any size, but it means that abusing the padding field to
create assembly errors on overflow no longer works. We can retain the
test simply by redirecting it to the .discard section, however.

[ v2: fix open-coded alternatives sites ]

Signed-off-by: H. Peter Anvin <[email protected]>
LKML-Reference: <[email protected]>
---
arch/x86/include/asm/alternative.h | 7 ++++---
arch/x86/include/asm/cpufeature.h | 10 ++++++----
arch/x86/kernel/entry_32.S | 2 +-
arch/x86/lib/clear_page_64.S | 2 +-
arch/x86/lib/copy_page_64.S | 2 +-
arch/x86/lib/copy_user_64.S | 2 +-
arch/x86/lib/memcpy_64.S | 2 +-
arch/x86/lib/memset_64.S | 2 +-
8 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 03b6bb5..bc6abb7 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -45,10 +45,9 @@
struct alt_instr {
u8 *instr; /* original instruction */
u8 *replacement;
- u8 cpuid; /* cpuid bit set for replacement */
+ u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction, <= instrlen */
- u8 pad1;
#ifdef CONFIG_X86_64
u32 pad2;
#endif
@@ -86,9 +85,11 @@ static inline int alternatives_text_reserved(void *start, void *end)
_ASM_ALIGN "\n" \
_ASM_PTR "661b\n" /* label */ \
_ASM_PTR "663f\n" /* new instruction */ \
- " .byte " __stringify(feature) "\n" /* feature bit */ \
+ " .word " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .discard,\"aw\",@progbits\n" \
" .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \
".previous\n" \
".section .altinstr_replacement, \"ax\"\n" \
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 4681459..66878c5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -300,11 +300,11 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)
_ASM_ALIGN "\n"
_ASM_PTR "1b\n"
_ASM_PTR "0\n" /* no replacement */
- " .byte %P0\n" /* feature bit */
+ " .word %P0\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
- " .byte 0xff + 0 - (2b-1b)\n" /* padding */
".previous\n"
+ /* skipping size check since replacement size = 0 */
: : "i" (bit) : : t_no);
return true;
t_no:
@@ -318,10 +318,12 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)
_ASM_ALIGN "\n"
_ASM_PTR "1b\n"
_ASM_PTR "3f\n"
- " .byte %P1\n" /* feature bit */
+ " .word %P1\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
- " .byte 0xff + (4f-3f) - (2b-1b)\n" /* padding */
+ ".previous\n"
+ ".section .discard,\"aw\",@progbits\n"
+ " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
"3: movb $1,%0\n"
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index cd49141..7862cf5 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -914,7 +914,7 @@ ENTRY(simd_coprocessor_error)
.balign 4
.long 661b
.long 663f
- .byte X86_FEATURE_XMM
+ .word X86_FEATURE_XMM
.byte 662b-661b
.byte 664f-663f
.previous
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index ebeafcc..aa4326b 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -52,7 +52,7 @@ ENDPROC(clear_page)
.align 8
.quad clear_page
.quad 1b
- .byte X86_FEATURE_REP_GOOD
+ .word X86_FEATURE_REP_GOOD
.byte .Lclear_page_end - clear_page
.byte 2b - 1b
.previous
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 727a5d4..6fec2d1 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -113,7 +113,7 @@ ENDPROC(copy_page)
.align 8
.quad copy_page
.quad 1b
- .byte X86_FEATURE_REP_GOOD
+ .word X86_FEATURE_REP_GOOD
.byte .Lcopy_page_end - copy_page
.byte 2b - 1b
.previous
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 71100c9..a460158 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -29,7 +29,7 @@
.align 8
.quad 0b
.quad 2b
- .byte \feature /* when feature is set */
+ .word \feature /* when feature is set */
.byte 5
.byte 5
.previous
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index f82e884..bcbcd1e 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -131,7 +131,7 @@ ENDPROC(__memcpy)
.align 8
.quad memcpy
.quad .Lmemcpy_c
- .byte X86_FEATURE_REP_GOOD
+ .word X86_FEATURE_REP_GOOD

/*
* Replace only beginning, memcpy is used to apply alternatives,
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index e88d3b8..09d3442 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -121,7 +121,7 @@ ENDPROC(__memset)
.align 8
.quad memset
.quad .Lmemset_c
- .byte X86_FEATURE_REP_GOOD
+ .word X86_FEATURE_REP_GOOD
.byte .Lfinal - memset
.byte .Lmemset_e - .Lmemset_c
.previous

2010-06-25 09:20:54

by Lai Jiangshan

[permalink] [raw]
Subject: Re: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index

tip-bot for H. Peter Anvin wrote:
> Commit-ID: f88731e3068f9d1392ba71cc9f50f035d26a0d4f
> Gitweb: http://git.kernel.org/tip/f88731e3068f9d1392ba71cc9f50f035d26a0d4f
> Author: H. Peter Anvin <[email protected]>
> AuthorDate: Wed, 9 Jun 2010 16:03:09 -0700
> Committer: H. Peter Anvin <[email protected]>
> CommitDate: Wed, 9 Jun 2010 16:03:09 -0700
>
> x86, alternatives: Use 16-bit numbers for cpufeature index
>
> We already have cpufeature indicies above 255, so use a 16-bit number
> for the alternatives index. This consumes a padding field and so
> doesn't add any size, but it means that abusing the padding field to
> create assembly errors on overflow no longer works. We can retain the
> test simply by redirecting it to the .discard section, however.
>

My machine hits "invalid opcode" at *prepare_to_copy+0x79,
and it can't boot up.

(gdb) l *prepare_to_copy+0x79
0xc0101789 is in prepare_to_copy (/home/njubee/work/linux-2.6-tip/arch/x86/include/asm/xsave.h:118).
113
114 static inline void fpu_xsave(struct fpu *fpu)
115 {
116 /* This, however, we can work around by forcing the compiler to select
117 an addressing mode that doesn't require extended registers. */
118 __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27"
119 : : "D" (&(fpu->state->xsave)),
120 "a" (-1), "d"(-1) : "memory");
121 }
122 #endif

It can boot up if this patch is reverted.
Does this patch change the return value of "use_xsave()"

cat /proc/cpuinfo
processor : 0
vendor_id : GenuineIntel
cpu family : 15
model : 4
model name : Intel(R) Pentium(R) D CPU 2.80GHz
stepping : 8
cpu MHz : 2800.120
cache size : 1024 KB
fdiv_bug : no
hlt_bug : no
f00f_bug : no
coma_bug : no
fpu : yes
fpu_exception : yes
cpuid level : 5
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss constant_tsc pebs bts tsc_reliable pni ds_cpl
bogomips : 5600.24
clflush size : 64
cache_alignment : 128
address sizes : 36 bits physical, 48 bits virtual
power management:

processor : 1
vendor_id : GenuineIntel
cpu family : 15
model : 4
model name : Intel(R) Pentium(R) D CPU 2.80GHz
stepping : 8
cpu MHz : 2800.120
cache size : 1024 KB
fdiv_bug : no
hlt_bug : no
f00f_bug : no
coma_bug : no
fpu : yes
fpu_exception : yes
cpuid level : 5
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss constant_tsc pebs bts tsc_reliable pni ds_cpl
bogomips : 5574.36
clflush size : 64
cache_alignment : 128
address sizes : 36 bits physical, 48 bits virtual
power management:

2010-06-25 15:35:49

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index

On 06/25/2010 02:20 AM, Lai Jiangshan wrote:
>>
>> x86, alternatives: Use 16-bit numbers for cpufeature index
>>
>> We already have cpufeature indicies above 255, so use a 16-bit number
>> for the alternatives index. This consumes a padding field and so
>> doesn't add any size, but it means that abusing the padding field to
>> create assembly errors on overflow no longer works. We can retain the
>> test simply by redirecting it to the .discard section, however.
>>
>
> My machine hits "invalid opcode" at *prepare_to_copy+0x79,
> and it can't boot up.
>
> (gdb) l *prepare_to_copy+0x79
> 0xc0101789 is in prepare_to_copy (/home/njubee/work/linux-2.6-tip/arch/x86/include/asm/xsave.h:118).
> 113
> 114 static inline void fpu_xsave(struct fpu *fpu)
> 115 {
> 116 /* This, however, we can work around by forcing the compiler to select
> 117 an addressing mode that doesn't require extended registers. */
> 118 __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27"
> 119 : : "D" (&(fpu->state->xsave)),
> 120 "a" (-1), "d"(-1) : "memory");
> 121 }
> 122 #endif
>

There are no alternatives in that code, at all... so it makes me really
wonder what is going on. One possibility, of course, is that one
alternative has ended up with the wrong address. Will look...

-hpa

--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel. I don't speak on their behalf.

2010-06-28 07:58:47

by Lai Jiangshan

[permalink] [raw]
Subject: Re: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index

H. Peter Anvin wrote:
> On 06/25/2010 02:20 AM, Lai Jiangshan wrote:
>>> x86, alternatives: Use 16-bit numbers for cpufeature index
>>>
>>> We already have cpufeature indicies above 255, so use a 16-bit number
>>> for the alternatives index. This consumes a padding field and so
>>> doesn't add any size, but it means that abusing the padding field to
>>> create assembly errors on overflow no longer works. We can retain the
>>> test simply by redirecting it to the .discard section, however.
>>>
>> My machine hits "invalid opcode" at *prepare_to_copy+0x79,
>> and it can't boot up.
>>
>> (gdb) l *prepare_to_copy+0x79
>> 0xc0101789 is in prepare_to_copy (/home/njubee/work/linux-2.6-tip/arch/x86/include/asm/xsave.h:118).
>> 113
>> 114 static inline void fpu_xsave(struct fpu *fpu)
>> 115 {
>> 116 /* This, however, we can work around by forcing the compiler to select
>> 117 an addressing mode that doesn't require extended registers. */
>> 118 __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27"
>> 119 : : "D" (&(fpu->state->xsave)),
>> 120 "a" (-1), "d"(-1) : "memory");
>> 121 }
>> 122 #endif
>>
>
> There are no alternatives in that code, at all... so it makes me really
> wonder what is going on. One possibility, of course, is that one
> alternative has ended up with the wrong address. Will look...
>

There is alternative in use_xsave().
use_xsave() should return false in my system, but it returns true after this patch applied.

>> Does this patch change the return value of "use_xsave()"

2010-06-28 18:58:41

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index

On 06/28/2010 12:58 AM, Lai Jiangshan wrote:
>
> There is alternative in use_xsave().
> use_xsave() should return false in my system, but it returns true after this patch applied.
>

OK, so a false substitution... strange.

-hpa

2010-06-28 19:06:13

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index

On 06/28/2010 12:58 AM, Lai Jiangshan wrote:
>
> There is alternative in use_xsave().
> use_xsave() should return false in my system, but it returns true after this patch applied.
>

Does this patch solve your problem?

-hpa


Attachments:
0001-x86-alternatives-correct-obsolete-use-of-u8-in-stati.patch (1.53 kB)

2010-06-29 04:58:01

by Lai Jiangshan

[permalink] [raw]
Subject: Re: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index

H. Peter Anvin wrote:
> On 06/28/2010 12:58 AM, Lai Jiangshan wrote:
>> There is alternative in use_xsave().
>> use_xsave() should return false in my system, but it returns true after this patch applied.
>>
>
> Does this patch solve your problem?
>
> -hpa
>
>

Yeah, It works, thanks a lot!

Lai

2010-06-29 07:07:08

by H. Peter Anvin

[permalink] [raw]
Subject: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index

Commit-ID: 5dc71d49a7c209b77cd257049a2cdb99ed1008c0
Gitweb: http://git.kernel.org/tip/5dc71d49a7c209b77cd257049a2cdb99ed1008c0
Author: tip-bot for H. Peter Anvin <[email protected]>
AuthorDate: Thu, 10 Jun 2010 00:10:43 +0000
Committer: H. Peter Anvin <[email protected]>
CommitDate: Thu, 10 Jun 2010 23:20:34 -0700

x86, alternatives: Use 16-bit numbers for cpufeature index

We already have cpufeature indicies above 255, so use a 16-bit number
for the alternatives index. This consumes a padding field and so
doesn't add any size, but it means that abusing the padding field to
create assembly errors on overflow no longer works. We can retain the
test simply by redirecting it to the .discard section, however.

Signed-off-by: H. Peter Anvin <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: H. Peter Anvin <[email protected]>
---
arch/x86/include/asm/alternative.h | 7 ++++---
arch/x86/include/asm/cpufeature.h | 10 ++++++----
2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 03b6bb5..bc6abb7 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -45,10 +45,9 @@
struct alt_instr {
u8 *instr; /* original instruction */
u8 *replacement;
- u8 cpuid; /* cpuid bit set for replacement */
+ u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction, <= instrlen */
- u8 pad1;
#ifdef CONFIG_X86_64
u32 pad2;
#endif
@@ -86,9 +85,11 @@ static inline int alternatives_text_reserved(void *start, void *end)
_ASM_ALIGN "\n" \
_ASM_PTR "661b\n" /* label */ \
_ASM_PTR "663f\n" /* new instruction */ \
- " .byte " __stringify(feature) "\n" /* feature bit */ \
+ " .word " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .discard,\"aw\",@progbits\n" \
" .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \
".previous\n" \
".section .altinstr_replacement, \"ax\"\n" \
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 4681459..66878c5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -300,11 +300,11 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)
_ASM_ALIGN "\n"
_ASM_PTR "1b\n"
_ASM_PTR "0\n" /* no replacement */
- " .byte %P0\n" /* feature bit */
+ " .word %P0\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
- " .byte 0xff + 0 - (2b-1b)\n" /* padding */
".previous\n"
+ /* skipping size check since replacement size = 0 */
: : "i" (bit) : : t_no);
return true;
t_no:
@@ -318,10 +318,12 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)
_ASM_ALIGN "\n"
_ASM_PTR "1b\n"
_ASM_PTR "3f\n"
- " .byte %P1\n" /* feature bit */
+ " .word %P1\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
- " .byte 0xff + (4f-3f) - (2b-1b)\n" /* padding */
+ ".previous\n"
+ ".section .discard,\"aw\",@progbits\n"
+ " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
"3: movb $1,%0\n"

2010-06-29 07:07:39

by H. Peter Anvin

[permalink] [raw]
Subject: [tip:x86/alternatives] x86, alternatives: correct obsolete use of "u8" in static_cpu_has()

Commit-ID: a3d2d12f275fcd33d7edc34e6b4112b5a3c9d35d
Gitweb: http://git.kernel.org/tip/a3d2d12f275fcd33d7edc34e6b4112b5a3c9d35d
Author: H. Peter Anvin <[email protected]>
AuthorDate: Mon, 28 Jun 2010 12:00:18 -0700
Committer: H. Peter Anvin <[email protected]>
CommitDate: Mon, 28 Jun 2010 22:04:47 -0700

x86, alternatives: correct obsolete use of "u8" in static_cpu_has()

gcc seems to pass unsigned words on as signed to the assembler, at
least with the %P format. As such, it is critical to get the correct
type.

Furthermore we are no longer limited to features < 256.

Reported-and-tested-by: Lai Jiangshan <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: H. Peter Anvin <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 66878c5..e8b8896 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -291,7 +291,7 @@ extern const char * const x86_power_flags[32];
* patch the target code for additional performance.
*
*/
-static __always_inline __pure bool __static_cpu_has(u8 bit)
+static __always_inline __pure bool __static_cpu_has(u16 bit)
{
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
asm goto("1: jmp %l[t_no]\n"
@@ -339,7 +339,7 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)
( \
__builtin_constant_p(boot_cpu_has(bit)) ? \
boot_cpu_has(bit) : \
- (__builtin_constant_p(bit) && !((bit) & ~0xff)) ? \
+ __builtin_constant_p(bit) ? \
__static_cpu_has(bit) : \
boot_cpu_has(bit) \
)

2010-06-29 09:16:00

by Ingo Molnar

[permalink] [raw]
Subject: Re: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index


* tip-bot for tip-bot for H. Peter Anvin <[email protected]> wrote:

> Commit-ID: 5dc71d49a7c209b77cd257049a2cdb99ed1008c0
> Gitweb: http://git.kernel.org/tip/5dc71d49a7c209b77cd257049a2cdb99ed1008c0
> Author: tip-bot for H. Peter Anvin <[email protected]>
> AuthorDate: Thu, 10 Jun 2010 00:10:43 +0000
> Committer: H. Peter Anvin <[email protected]>
> CommitDate: Thu, 10 Jun 2010 23:20:34 -0700
>
> x86, alternatives: Use 16-bit numbers for cpufeature index
>
> We already have cpufeature indicies above 255, so use a 16-bit number
> for the alternatives index. This consumes a padding field and so
> doesn't add any size, but it means that abusing the padding field to
> create assembly errors on overflow no longer works. We can retain the
> test simply by redirecting it to the .discard section, however.
>
> Signed-off-by: H. Peter Anvin <[email protected]>
> LKML-Reference: <[email protected]>
> Signed-off-by: H. Peter Anvin <[email protected]>
> ---
> arch/x86/include/asm/alternative.h | 7 ++++---
> arch/x86/include/asm/cpufeature.h | 10 ++++++----
> 2 files changed, 10 insertions(+), 7 deletions(-)

Hm, this patch is causing trouble in -tip testing again - it's triggering a
colorful boot crash:

[ 2.220002] calling inet_init+0x0/0x23d @ 1
[ 2.223343] NET: Registered protocol family 2
[ 2.226727] IP route cache hash table entries: 32768 (order: 6, 262144 bytes)
[ 2.233492] ------------[ cut here ]------------
[ 2.236671] WARNING: at mm/vmalloc.c:107 vmap_page_range_noflush+0x309/0x3a0()
[ 2.240001] Modules linked in:
...
[ 3.090002] Kernel panic - not syncing: Failed to allocate TCP established hash table

So i've zapped them again. We really need to get to the bottom of this. Config
and bootlog attached.

The crash looks very weird - and it's consistent with possible effects of some
sort of code patching failure/mismatch.

It goes away if i revert these two:

a3d2d12: x86, alternatives: correct obsolete use of "u8" in static_cpu_has()
5dc71d4: x86, alternatives: Use 16-bit numbers for cpufeature index

I reproduced the crash twice before testing the revert.

Thanks,

Ingo


Attachments:
(No filename) (2.15 kB)
config-Tue_Jun_29_12_55_45_CEST_2010.bad (69.53 kB)
crash.log (56.69 kB)
Download all attachments

2010-06-29 15:34:18

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index

On 06/29/2010 02:15 AM, Ingo Molnar wrote:
>
> Hm, this patch is causing trouble in -tip testing again - it's triggering a
> colorful boot crash:
>
> [ 2.220002] calling inet_init+0x0/0x23d @ 1
> [ 2.223343] NET: Registered protocol family 2
> [ 2.226727] IP route cache hash table entries: 32768 (order: 6, 262144 bytes)
> [ 2.233492] ------------[ cut here ]------------
> [ 2.236671] WARNING: at mm/vmalloc.c:107 vmap_page_range_noflush+0x309/0x3a0()
> [ 2.240001] Modules linked in:
> ...
> [ 3.090002] Kernel panic - not syncing: Failed to allocate TCP established hash table
>
> So i've zapped them again. We really need to get to the bottom of this. Config
> and bootlog attached.
>
> The crash looks very weird - and it's consistent with possible effects of some
> sort of code patching failure/mismatch.
>
> It goes away if i revert these two:
>
> a3d2d12: x86, alternatives: correct obsolete use of "u8" in static_cpu_has()
> 5dc71d4: x86, alternatives: Use 16-bit numbers for cpufeature index
>
> I reproduced the crash twice before testing the revert.
>

Hi Ingo,

I'm pretty sure that these are related to gcc and/or binutils
differences, so it would be nice to get the .o and .s files of the
failing locations (in this case mm/vmalloc.[so]) *as built on the
failing machines*.

-hpa

--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel. I don't speak on their behalf.

2010-07-07 17:45:58

by H. Peter Anvin

[permalink] [raw]
Subject: [tip:x86/alternatives] x86, alternatives: Use 16-bit numbers for cpufeature index

Commit-ID: 83a7a2ad2a9173dcabc05df0f01d1d85b7ba1c2c
Gitweb: http://git.kernel.org/tip/83a7a2ad2a9173dcabc05df0f01d1d85b7ba1c2c
Author: H. Peter Anvin <[email protected]>
AuthorDate: Thu, 10 Jun 2010 00:10:43 +0000
Committer: H. Peter Anvin <[email protected]>
CommitDate: Wed, 7 Jul 2010 10:36:28 -0700

x86, alternatives: Use 16-bit numbers for cpufeature index

We already have cpufeature indicies above 255, so use a 16-bit number
for the alternatives index. This consumes a padding field and so
doesn't add any size, but it means that abusing the padding field to
create assembly errors on overflow no longer works. We can retain the
test simply by redirecting it to the .discard section, however.

[ v3: updated to include open-coded locations ]

Signed-off-by: H. Peter Anvin <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: H. Peter Anvin <[email protected]>
---
arch/x86/include/asm/alternative.h | 7 ++++---
arch/x86/include/asm/cpufeature.h | 14 ++++++++------
arch/x86/kernel/entry_32.S | 2 +-
arch/x86/lib/clear_page_64.S | 2 +-
arch/x86/lib/copy_page_64.S | 2 +-
arch/x86/lib/memcpy_64.S | 2 +-
arch/x86/lib/memset_64.S | 2 +-
7 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 03b6bb5..bc6abb7 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -45,10 +45,9 @@
struct alt_instr {
u8 *instr; /* original instruction */
u8 *replacement;
- u8 cpuid; /* cpuid bit set for replacement */
+ u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction, <= instrlen */
- u8 pad1;
#ifdef CONFIG_X86_64
u32 pad2;
#endif
@@ -86,9 +85,11 @@ static inline int alternatives_text_reserved(void *start, void *end)
_ASM_ALIGN "\n" \
_ASM_PTR "661b\n" /* label */ \
_ASM_PTR "663f\n" /* new instruction */ \
- " .byte " __stringify(feature) "\n" /* feature bit */ \
+ " .word " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* sourcelen */ \
" .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .discard,\"aw\",@progbits\n" \
" .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \
".previous\n" \
".section .altinstr_replacement, \"ax\"\n" \
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 4681459..e8b8896 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -291,7 +291,7 @@ extern const char * const x86_power_flags[32];
* patch the target code for additional performance.
*
*/
-static __always_inline __pure bool __static_cpu_has(u8 bit)
+static __always_inline __pure bool __static_cpu_has(u16 bit)
{
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
asm goto("1: jmp %l[t_no]\n"
@@ -300,11 +300,11 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)
_ASM_ALIGN "\n"
_ASM_PTR "1b\n"
_ASM_PTR "0\n" /* no replacement */
- " .byte %P0\n" /* feature bit */
+ " .word %P0\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
- " .byte 0xff + 0 - (2b-1b)\n" /* padding */
".previous\n"
+ /* skipping size check since replacement size = 0 */
: : "i" (bit) : : t_no);
return true;
t_no:
@@ -318,10 +318,12 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)
_ASM_ALIGN "\n"
_ASM_PTR "1b\n"
_ASM_PTR "3f\n"
- " .byte %P1\n" /* feature bit */
+ " .word %P1\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
- " .byte 0xff + (4f-3f) - (2b-1b)\n" /* padding */
+ ".previous\n"
+ ".section .discard,\"aw\",@progbits\n"
+ " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
"3: movb $1,%0\n"
@@ -337,7 +339,7 @@ static __always_inline __pure bool __static_cpu_has(u8 bit)
( \
__builtin_constant_p(boot_cpu_has(bit)) ? \
boot_cpu_has(bit) : \
- (__builtin_constant_p(bit) && !((bit) & ~0xff)) ? \
+ __builtin_constant_p(bit) ? \
__static_cpu_has(bit) : \
boot_cpu_has(bit) \
)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index cd49141..7862cf5 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -914,7 +914,7 @@ ENTRY(simd_coprocessor_error)
.balign 4
.long 661b
.long 663f
- .byte X86_FEATURE_XMM
+ .word X86_FEATURE_XMM
.byte 662b-661b
.byte 664f-663f
.previous
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index ebeafcc..aa4326b 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -52,7 +52,7 @@ ENDPROC(clear_page)
.align 8
.quad clear_page
.quad 1b
- .byte X86_FEATURE_REP_GOOD
+ .word X86_FEATURE_REP_GOOD
.byte .Lclear_page_end - clear_page
.byte 2b - 1b
.previous
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 727a5d4..6fec2d1 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -113,7 +113,7 @@ ENDPROC(copy_page)
.align 8
.quad copy_page
.quad 1b
- .byte X86_FEATURE_REP_GOOD
+ .word X86_FEATURE_REP_GOOD
.byte .Lcopy_page_end - copy_page
.byte 2b - 1b
.previous
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index f82e884..bcbcd1e 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -131,7 +131,7 @@ ENDPROC(__memcpy)
.align 8
.quad memcpy
.quad .Lmemcpy_c
- .byte X86_FEATURE_REP_GOOD
+ .word X86_FEATURE_REP_GOOD

/*
* Replace only beginning, memcpy is used to apply alternatives,
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index e88d3b8..09d3442 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -121,7 +121,7 @@ ENDPROC(__memset)
.align 8
.quad memset
.quad .Lmemset_c
- .byte X86_FEATURE_REP_GOOD
+ .word X86_FEATURE_REP_GOOD
.byte .Lfinal - memset
.byte .Lmemset_e - .Lmemset_c
.previous