2009-04-22 20:48:57

by Mathieu Desnoyers

[permalink] [raw]
Subject: [patch 2/2] x86 amd fix cmpxchg read acquire barrier

http://google-perftools.googlecode.com/svn-history/r48/trunk/src/base/atomicops-internals-x86.cc

says

" // Opteron Rev E has a bug in which on very rare occasions a locked
// instruction doesn't act as a read-acquire barrier if followed by a
// non-locked read-modify-write instruction. Rev F has this bug in
// pre-release versions, but not in versions released to customers,
// so we test only for Rev E, which is family 15, model 32..63 inclusive.
if (strcmp(vendor, "AuthenticAMD") == 0 && // AMD
family == 15 &&
32 <= model && model <= 63) {
AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = true;
} else {
AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = false;
}
"

There is no official AMD bug ID yet, but it seems to be reported in the field
and a fix is present in Solaris code base. The following links shows the current
understanding of the issue.

http://bugzilla.kernel.org/show_bug.cgi?id=11305
http://bugs.mysql.com/bug.php?id=26081

Since I needed to put the alternative within assembly statements, I decided to
rework alternative.h and fold the multiple alternative*() definitions into the
same ALTERNATIVE() macro. It is done in the previous patch. Those were
duplicated code anyway.

AFAIK, this bug has not received any official bug ID from AMD. Solaris already
have a workaround for this issue.

One should probably audit Xen cmpxchg too.

Signed-off-by: Mathieu Desnoyers <[email protected]>
CC: Andrew Morton <[email protected]>
CC: [email protected]
CC: [email protected]
CC: 'Ingo Molnar' <[email protected]>
CC: H. Peter Anvin <[email protected]>
CC: Andi Kleen <[email protected]>
CC: Avi Kivity <[email protected]>
---
arch/x86/include/asm/alternative.h | 5 +++++
arch/x86/include/asm/cmpxchg_64.h | 21 ++++++++++++++-------
arch/x86/include/asm/cpufeature.h | 1 +
arch/x86/include/asm/futex.h | 2 ++
arch/x86/include/asm/rwsem.h | 1 +
arch/x86/include/asm/spinlock.h | 2 ++
arch/x86/kernel/cpu/amd.c | 4 ++++
7 files changed, 29 insertions(+), 7 deletions(-)

Index: linux-2.6-lttng/arch/x86/include/asm/cpufeature.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/cpufeature.h 2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/cpufeature.h 2009-04-18 23:58:28.000000000 -0400
@@ -94,6 +94,7 @@
#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
+#define X86_FEATURE_NEED_CMPXCHG_LFENCE (3*32+26) /* Fix AMD cmpxchg lfence */

/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
Index: linux-2.6-lttng/arch/x86/kernel/cpu/amd.c
===================================================================
--- linux-2.6-lttng.orig/arch/x86/kernel/cpu/amd.c 2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/kernel/cpu/amd.c 2009-04-18 23:58:28.000000000 -0400
@@ -416,6 +416,10 @@ static void __cpuinit init_amd(struct cp
clear_cpu_cap(c, X86_FEATURE_MCE);
#endif

+ /* Enable workaround for missing AMD cmpxchg lfence */
+ if (c->x86 == 0xf && c->x86_model >= 32 && c->x86_model <= 63)
+ set_cpu_cap(c, X86_FEATURE_NEED_CMPXCHG_LFENCE);
+
/* Enable workaround for FXSAVE leak */
if (c->x86 >= 6)
set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
Index: linux-2.6-lttng/arch/x86/include/asm/cmpxchg_64.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/cmpxchg_64.h 2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/cmpxchg_64.h 2009-04-18 23:58:28.000000000 -0400
@@ -66,25 +66,29 @@ static inline unsigned long __cmpxchg(vo
unsigned long prev;
switch (size) {
case 1:
- asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
+ asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2\n\t"
+ CMPXCHG_LFENCE
: "=a"(prev)
: "q"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 2:
- asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
+ asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
+ CMPXCHG_LFENCE
: "=a"(prev)
: "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 4:
- asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2"
+ asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2\n\t"
+ CMPXCHG_LFENCE
: "=a"(prev)
: "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 8:
- asm volatile(LOCK_PREFIX "cmpxchgq %1,%2"
+ asm volatile(LOCK_PREFIX "cmpxchgq %1,%2\n\t"
+ CMPXCHG_LFENCE
: "=a"(prev)
: "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
@@ -105,19 +109,22 @@ static inline unsigned long __sync_cmpxc
unsigned long prev;
switch (size) {
case 1:
- asm volatile("lock; cmpxchgb %b1,%2"
+ asm volatile("lock; cmpxchgb %b1,%2\n\t"
+ CMPXCHG_LFENCE
: "=a"(prev)
: "q"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 2:
- asm volatile("lock; cmpxchgw %w1,%2"
+ asm volatile("lock; cmpxchgw %w1,%2\n\t"
+ CMPXCHG_LFENCE
: "=a"(prev)
: "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 4:
- asm volatile("lock; cmpxchgl %1,%2"
+ asm volatile("lock; cmpxchgl %1,%2\n\t"
+ CMPXCHG_LFENCE
: "=a"(prev)
: "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
Index: linux-2.6-lttng/arch/x86/include/asm/alternative.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/alternative.h 2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/alternative.h 2009-04-18 23:58:28.000000000 -0400
@@ -5,6 +5,7 @@
#include <linux/stddef.h>
#include <linux/stringify.h>
#include <asm/asm.h>
+#include <asm/nops.h>

/*
* Alternative inline assembly for SMP.
@@ -54,6 +55,10 @@ struct alt_instr {
#endif
};

+/* Needed both in UP and SMP build because of cmpxchg_sync */
+#define CMPXCHG_LFENCE ALTERNATIVE(ASM_NOP3, "lfence", \
+ X86_FEATURE_NEED_CMPXCHG_LFENCE)
+
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);

Index: linux-2.6-lttng/arch/x86/include/asm/spinlock.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/spinlock.h 2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/spinlock.h 2009-04-18 23:58:28.000000000 -0400
@@ -86,6 +86,7 @@ static __always_inline int __ticket_spin
"leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
"jne 1f\n\t"
LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
+ CMPXCHG_LFENCE "\n\t"
"1:"
"sete %b1\n\t"
"movzbl %b1,%0\n\t"
@@ -139,6 +140,7 @@ static __always_inline int __ticket_spin
"leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
"jne 1f\n\t"
LOCK_PREFIX "cmpxchgl %1,%2\n\t"
+ CMPXCHG_LFENCE "\n\t"
"1:"
"sete %b1\n\t"
"movzbl %b1,%0\n\t"
Index: linux-2.6-lttng/arch/x86/include/asm/rwsem.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/rwsem.h 2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/rwsem.h 2009-04-18 23:58:28.000000000 -0400
@@ -129,6 +129,7 @@ static inline int __down_read_trylock(st
" addl %3,%2\n\t"
" jle 2f\n\t"
LOCK_PREFIX " cmpxchgl %2,%0\n\t"
+ CMPXCHG_LFENCE "\n\t"
" jnz 1b\n\t"
"2:\n\t"
"# ending __down_read_trylock\n\t"
Index: linux-2.6-lttng/arch/x86/include/asm/futex.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/futex.h 2009-04-18 23:58:23.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/futex.h 2009-04-18 23:58:28.000000000 -0400
@@ -26,6 +26,7 @@
"\tmovl\t%0, %3\n" \
"\t" insn "\n" \
"2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \
+ CMPXCHG_LFENCE "\n" \
"\tjnz\t1b\n" \
"3:\t.section .fixup,\"ax\"\n" \
"4:\tmov\t%5, %1\n" \
@@ -123,6 +124,7 @@ static inline int futex_atomic_cmpxchg_i
return -EFAULT;

asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n"
+ CMPXCHG_LFENCE "\n"
"2:\t.section .fixup, \"ax\"\n"
"3:\tmov %2, %0\n"
"\tjmp 2b\n"

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68


2009-04-22 21:00:25

by Alan

[permalink] [raw]
Subject: Re: [patch 2/2] x86 amd fix cmpxchg read acquire barrier

> There is no official AMD bug ID yet, but it seems to be reported in the field
> and a fix is present in Solaris code base. The following links shows the current
> understanding of the issue.

I would like to know why there isn't an official vendor Bug ID yet
because without that nobody knows what the fix actually is or when it is
needed. It's also not going to help much given the user space problem
(and mixed kernel/user stuff like futexes).

Second point - it needs to be possible to avoid compiling this stuff in
the first place. I don't see where you arrange CMPXCHNG_LFENCE to come
out as blank when people compile for processors without the bug ?

2009-04-22 22:48:18

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [patch 2/2] x86 amd fix cmpxchg read acquire barrier

* Alan Cox ([email protected]) wrote:
> > There is no official AMD bug ID yet, but it seems to be reported in the field
> > and a fix is present in Solaris code base. The following links shows the current
> > understanding of the issue.
>
> I would like to know why there isn't an official vendor Bug ID yet
> because without that nobody knows what the fix actually is or when it is
> needed.

I guess AMD should answer to this.

> It's also not going to help much given the user space problem
> (and mixed kernel/user stuff like futexes).

Yeah, ideally, unless user-space is completely audited, the kernel
should restrict multi-threaded user-space programs to run on a single
CPU unless they don't share any memory mapping when the kernel finds it
is running on such broken CPU. Or it can simply refuse to bring up more
than one CPU. That would be a very basic gross fix for a gross problem,
but at least there would be no data corruption. If we use this last
solution, then my cmpxchg lfence workaround becomes unneeded.

> Second point - it needs to be possible to avoid compiling this stuff in
> the first place. I don't see where you arrange CMPXCHNG_LFENCE to come
> out as blank when people compile for processors without the bug ?
>

Good point. I should probably configure this as "nothing" unless

#ifdef CONFIG_X86_64
#if (defined(CONFIG_GENERIC_CPU) || defined(CONFIG_MK8))
...
#endif
#endif

I doubt it's worth adding a "HAVE_MISSING_CMPXCHG_LFENCE" select.

Mathieu

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-04-22 22:55:30

by Alan

[permalink] [raw]
Subject: Re: [patch 2/2] x86 amd fix cmpxchg read acquire barrier

> Good point. I should probably configure this as "nothing" unless
>
> #ifdef CONFIG_X86_64
> #if (defined(CONFIG_GENERIC_CPU) || defined(CONFIG_MK8))
> ...
> #endif
> #endif

Is the erratum only present in 64bit mode ?

Really this needs someone with knowledge of the erratum to characterise
it accurately and the workarounds.

2009-04-22 23:26:48

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [patch 2/2] x86 amd fix cmpxchg read acquire barrier

* Alan Cox ([email protected]) wrote:
> > Good point. I should probably configure this as "nothing" unless
> >
> > #ifdef CONFIG_X86_64
> > #if (defined(CONFIG_GENERIC_CPU) || defined(CONFIG_MK8))
> > ...
> > #endif
> > #endif
>
> Is the erratum only present in 64bit mode ?
>

Hrm, from the info I gathered in the web pages linked in the patch
header (refering to Google and Solaris), nothing said it was limited to
64bit mode. You are right.

> Really this needs someone with knowledge of the erratum to characterise
> it accurately and the workarounds.

Yep, but all I can hear from AMD so far is... *crickets*

The last revision of their errata I can find has been updated in
February 2008, and does not talk about this issue.

http://support.amd.com/us/Processor_TechDocs/25759.pdf

Thanks,

Mathieu

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-04-23 08:09:07

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch 2/2] x86 amd fix cmpxchg read acquire barrier


* Mathieu Desnoyers <[email protected]> wrote:

> " // Opteron Rev E has a bug in which on very rare occasions a locked
> // instruction doesn't act as a read-acquire barrier if followed by a
> // non-locked read-modify-write instruction. Rev F has this bug in
> // pre-release versions, but not in versions released to customers,
> // so we test only for Rev E, which is family 15, model 32..63 inclusive.

Dunno. The fix looks a bit intrusive (emits a NOP even on good
CPUs). Also, the text above says "not in versions released to
customers".

So unless there's an official erratum or reports in the field (not
from early prototype systems shipped to developers) i'd not rush to
apply it, just yet.

Ingo

2009-04-23 13:20:10

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [patch 2/2] x86 amd fix cmpxchg read acquire barrier

* Ingo Molnar ([email protected]) wrote:
>
> * Mathieu Desnoyers <[email protected]> wrote:
>
> > " // Opteron Rev E has a bug in which on very rare occasions a locked
> > // instruction doesn't act as a read-acquire barrier if followed by a
> > // non-locked read-modify-write instruction. Rev F has this bug in
> > // pre-release versions, but not in versions released to customers,
> > // so we test only for Rev E, which is family 15, model 32..63 inclusive.
>
> Dunno. The fix looks a bit intrusive (emits a NOP even on good
> CPUs). Also, the text above says "not in versions released to
> customers".
>
> So unless there's an official erratum or reports in the field (not
> from early prototype systems shipped to developers) i'd not rush to
> apply it, just yet.
>

Actually, Operon Rev E has this bug in the field (family 15, model
32..64). Rev F only had the bug in pre-releases.

But yes, it's bad that it drags so many code additions to something as
critical as cmpxchg. I start to think it might be better to just
disallow bringing up more than one CPU on these machines.

Mathieu

> Ingo

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-04-23 13:41:33

by Arkadiusz Miskiewicz

[permalink] [raw]
Subject: Re: [patch 2/2] x86 amd fix cmpxchg read acquire barrier

On Thursday 23 of April 2009, Mathieu Desnoyers wrote:
> * Ingo Molnar ([email protected]) wrote:
> > * Mathieu Desnoyers <[email protected]> wrote:
> > > " // Opteron Rev E has a bug in which on very rare occasions a locked
> > > // instruction doesn't act as a read-acquire barrier if followed by a
> > > // non-locked read-modify-write instruction. Rev F has this bug in
> > > // pre-release versions, but not in versions released to customers,
> > > // so we test only for Rev E, which is family 15, model 32..63
> > > inclusive.
> >
> > Dunno. The fix looks a bit intrusive (emits a NOP even on good
> > CPUs). Also, the text above says "not in versions released to
> > customers".
> >
> > So unless there's an official erratum or reports in the field (not
> > from early prototype systems shipped to developers) i'd not rush to
> > apply it, just yet.
>
> Actually, Operon Rev E has this bug in the field (family 15, model
> 32..64). Rev F only had the bug in pre-releases.
>
> But yes, it's bad that it drags so many code additions to something as
> critical as cmpxchg. I start to think it might be better to just
> disallow bringing up more than one CPU on these machines.

That probably would be even worse than what we have now. This bug doesn't
manifest too often in a noticeable way here (I have few such machines here,
mostly 2 x dual core; once per few months mysql dies) and loosing 3 of 4 cores
(or 1 cpu of 2; depends on what you mean) doesn't sound like fun.

> Mathieu


--
Arkadiusz Miśkiewicz PLD/Linux Team
arekm / maven.pl http://ftp.pld-linux.org/

2009-04-23 22:17:25

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [patch 2/2] x86 amd fix cmpxchg read acquire barrier

* Arkadiusz Miskiewicz ([email protected]) wrote:
> On Thursday 23 of April 2009, Mathieu Desnoyers wrote:
> > * Ingo Molnar ([email protected]) wrote:
> > > * Mathieu Desnoyers <[email protected]> wrote:
> > > > " // Opteron Rev E has a bug in which on very rare occasions a locked
> > > > // instruction doesn't act as a read-acquire barrier if followed by a
> > > > // non-locked read-modify-write instruction. Rev F has this bug in
> > > > // pre-release versions, but not in versions released to customers,
> > > > // so we test only for Rev E, which is family 15, model 32..63
> > > > inclusive.
> > >
> > > Dunno. The fix looks a bit intrusive (emits a NOP even on good
> > > CPUs). Also, the text above says "not in versions released to
> > > customers".
> > >
> > > So unless there's an official erratum or reports in the field (not
> > > from early prototype systems shipped to developers) i'd not rush to
> > > apply it, just yet.
> >
> > Actually, Operon Rev E has this bug in the field (family 15, model
> > 32..64). Rev F only had the bug in pre-releases.
> >
> > But yes, it's bad that it drags so many code additions to something as
> > critical as cmpxchg. I start to think it might be better to just
> > disallow bringing up more than one CPU on these machines.
>
> That probably would be even worse than what we have now. This bug doesn't
> manifest too often in a noticeable way here (I have few such machines here,
> mostly 2 x dual core; once per few months mysql dies) and loosing 3 of 4 cores
> (or 1 cpu of 2; depends on what you mean) doesn't sound like fun.
>

Having silent data corruption does not sound like fun neither. Another
alternative, when we detect those CPUs, is to printk a warning telling :

"AMD Opteron family X model Y is known to corrupt data on SMP due"
"to incorrect cmpxchg instruction memory barriers. Please contact"
"AMD for more information."

And activate the "tainted" kernel flag. This way, we won't be bothered
trying to fix AMD bugs, and it will officially become AMD's problem.

Mathieu

> > Mathieu
>
>
> --
> Arkadiusz Miśkiewicz PLD/Linux Team
> arekm / maven.pl http://ftp.pld-linux.org/
>
>

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-04-25 08:19:38

by Pavel Machek

[permalink] [raw]
Subject: Re: [patch 2/2] x86 amd fix cmpxchg read acquire barrier

On Wed 2009-04-22 16:18:54, Mathieu Desnoyers wrote:
> http://google-perftools.googlecode.com/svn-history/r48/trunk/src/base/atomicops-internals-x86.cc
>
> says
>
> " // Opteron Rev E has a bug in which on very rare occasions a locked
> // instruction doesn't act as a read-acquire barrier if followed by a
> // non-locked read-modify-write instruction. Rev F has this bug in
> // pre-release versions, but not in versions released to customers,
> // so we test only for Rev E, which is family 15, model 32..63 inclusive.

Could we be more careful here and catch the F pre-release versions,
too? Stepping should help there...

--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html