2018-02-22 09:26:20

by Andrea Parri

[permalink] [raw]
Subject: [PATCH 2/2] locking/xchg/alpha: Add leading smp_mb() to xchg(), cmpxchg()

Successful RMW operations are supposed to be fully ordered, but
Alpha's xchg() and cmpxchg() do not align to this requirement.

Will reported that:

> So MP using xchg:
>
> WRITE_ONCE(x, 1)
> xchg(y, 1)
>
> smp_load_acquire(y) == 1
> READ_ONCE(x) == 0
>
> would be allowed.

(thus violating the above requirement). Amend this by adding a
leading smp_mb() to the implementations of xchg(), cmpxchg().

Reported-by: Will Deacon <[email protected]>
Signed-off-by: Andrea Parri <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Paul E. McKenney <[email protected]>
Cc: Alan Stern <[email protected]>
Cc: Ivan Kokshaysky <[email protected]>
Cc: Matt Turner <[email protected]>
Cc: Richard Henderson <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
arch/alpha/include/asm/xchg.h | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
index e1facf6fc2446..e2b59fac5257d 100644
--- a/arch/alpha/include/asm/xchg.h
+++ b/arch/alpha/include/asm/xchg.h
@@ -12,6 +12,10 @@
* Atomic exchange.
* Since it can be used to implement critical sections
* it must clobber "memory" (also for interrupts in UP).
+ *
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
*/

static inline unsigned long
@@ -19,6 +23,7 @@ ____xchg(_u8, volatile char *m, unsigned long val)
{
unsigned long ret, tmp, addr64;

+ smp_mb();
__asm__ __volatile__(
" andnot %4,7,%3\n"
" insbl %1,%4,%1\n"
@@ -43,6 +48,7 @@ ____xchg(_u16, volatile short *m, unsigned long val)
{
unsigned long ret, tmp, addr64;

+ smp_mb();
__asm__ __volatile__(
" andnot %4,7,%3\n"
" inswl %1,%4,%1\n"
@@ -67,6 +73,7 @@ ____xchg(_u32, volatile int *m, unsigned long val)
{
unsigned long dummy;

+ smp_mb();
__asm__ __volatile__(
"1: ldl_l %0,%4\n"
" bis $31,%3,%1\n"
@@ -87,6 +94,7 @@ ____xchg(_u64, volatile long *m, unsigned long val)
{
unsigned long dummy;

+ smp_mb();
__asm__ __volatile__(
"1: ldq_l %0,%4\n"
" bis $31,%3,%1\n"
@@ -128,9 +136,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*
- * The memory barrier is placed in SMP unconditionally, in order to
- * guarantee that dependency ordering is preserved when a dependency
- * is headed by an unsuccessful operation.
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
+ * The trailing memory barrier is placed in SMP unconditionally, in
+ * order to guarantee that dependency ordering is preserved when a
+ * dependency is headed by an unsuccessful operation.
*/

static inline unsigned long
@@ -138,6 +149,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
{
unsigned long prev, tmp, cmp, addr64;

+ smp_mb();
__asm__ __volatile__(
" andnot %5,7,%4\n"
" insbl %1,%5,%1\n"
@@ -165,6 +177,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
{
unsigned long prev, tmp, cmp, addr64;

+ smp_mb();
__asm__ __volatile__(
" andnot %5,7,%4\n"
" inswl %1,%5,%1\n"
@@ -192,6 +205,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
{
unsigned long prev, cmp;

+ smp_mb();
__asm__ __volatile__(
"1: ldl_l %0,%5\n"
" cmpeq %0,%3,%1\n"
@@ -215,6 +229,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
{
unsigned long prev, cmp;

+ smp_mb();
__asm__ __volatile__(
"1: ldq_l %0,%5\n"
" cmpeq %0,%3,%1\n"
--
2.7.4



2018-02-22 21:48:05

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [PATCH 2/2] locking/xchg/alpha: Add leading smp_mb() to xchg(), cmpxchg()

On Thu, Feb 22, 2018 at 10:24:48AM +0100, Andrea Parri wrote:
> Successful RMW operations are supposed to be fully ordered, but
> Alpha's xchg() and cmpxchg() do not align to this requirement.
>
> Will reported that:
>
> > So MP using xchg:
> >
> > WRITE_ONCE(x, 1)
> > xchg(y, 1)
> >
> > smp_load_acquire(y) == 1
> > READ_ONCE(x) == 0
> >
> > would be allowed.
>
> (thus violating the above requirement). Amend this by adding a
> leading smp_mb() to the implementations of xchg(), cmpxchg().
>
> Reported-by: Will Deacon <[email protected]>
> Signed-off-by: Andrea Parri <[email protected]>

Acked-by: Paul E. McKenney <[email protected]>

> Cc: Peter Zijlstra <[email protected]>
> Cc: Paul E. McKenney <[email protected]>
> Cc: Alan Stern <[email protected]>
> Cc: Ivan Kokshaysky <[email protected]>
> Cc: Matt Turner <[email protected]>
> Cc: Richard Henderson <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> ---
> arch/alpha/include/asm/xchg.h | 21 ++++++++++++++++++---
> 1 file changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
> index e1facf6fc2446..e2b59fac5257d 100644
> --- a/arch/alpha/include/asm/xchg.h
> +++ b/arch/alpha/include/asm/xchg.h
> @@ -12,6 +12,10 @@
> * Atomic exchange.
> * Since it can be used to implement critical sections
> * it must clobber "memory" (also for interrupts in UP).
> + *
> + * The leading and the trailing memory barriers guarantee that these
> + * operations are fully ordered.
> + *
> */
>
> static inline unsigned long
> @@ -19,6 +23,7 @@ ____xchg(_u8, volatile char *m, unsigned long val)
> {
> unsigned long ret, tmp, addr64;
>
> + smp_mb();
> __asm__ __volatile__(
> " andnot %4,7,%3\n"
> " insbl %1,%4,%1\n"
> @@ -43,6 +48,7 @@ ____xchg(_u16, volatile short *m, unsigned long val)
> {
> unsigned long ret, tmp, addr64;
>
> + smp_mb();
> __asm__ __volatile__(
> " andnot %4,7,%3\n"
> " inswl %1,%4,%1\n"
> @@ -67,6 +73,7 @@ ____xchg(_u32, volatile int *m, unsigned long val)
> {
> unsigned long dummy;
>
> + smp_mb();
> __asm__ __volatile__(
> "1: ldl_l %0,%4\n"
> " bis $31,%3,%1\n"
> @@ -87,6 +94,7 @@ ____xchg(_u64, volatile long *m, unsigned long val)
> {
> unsigned long dummy;
>
> + smp_mb();
> __asm__ __volatile__(
> "1: ldq_l %0,%4\n"
> " bis $31,%3,%1\n"
> @@ -128,9 +136,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)
> * store NEW in MEM. Return the initial value in MEM. Success is
> * indicated by comparing RETURN with OLD.
> *
> - * The memory barrier is placed in SMP unconditionally, in order to
> - * guarantee that dependency ordering is preserved when a dependency
> - * is headed by an unsuccessful operation.
> + * The leading and the trailing memory barriers guarantee that these
> + * operations are fully ordered.
> + *
> + * The trailing memory barrier is placed in SMP unconditionally, in
> + * order to guarantee that dependency ordering is preserved when a
> + * dependency is headed by an unsuccessful operation.
> */
>
> static inline unsigned long
> @@ -138,6 +149,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
> {
> unsigned long prev, tmp, cmp, addr64;
>
> + smp_mb();
> __asm__ __volatile__(
> " andnot %5,7,%4\n"
> " insbl %1,%5,%1\n"
> @@ -165,6 +177,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
> {
> unsigned long prev, tmp, cmp, addr64;
>
> + smp_mb();
> __asm__ __volatile__(
> " andnot %5,7,%4\n"
> " inswl %1,%5,%1\n"
> @@ -192,6 +205,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
> {
> unsigned long prev, cmp;
>
> + smp_mb();
> __asm__ __volatile__(
> "1: ldl_l %0,%5\n"
> " cmpeq %0,%3,%1\n"
> @@ -215,6 +229,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
> {
> unsigned long prev, cmp;
>
> + smp_mb();
> __asm__ __volatile__(
> "1: ldq_l %0,%5\n"
> " cmpeq %0,%3,%1\n"
> --
> 2.7.4
>


Subject: [tip:locking/urgent] locking/xchg/alpha: Fix xchg() and cmpxchg() memory ordering bugs

Commit-ID: 472e8c55cf6622d1c112dc2bc777f68bbd4189db
Gitweb: https://git.kernel.org/tip/472e8c55cf6622d1c112dc2bc777f68bbd4189db
Author: Andrea Parri <[email protected]>
AuthorDate: Thu, 22 Feb 2018 10:24:48 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 23 Feb 2018 08:38:16 +0100

locking/xchg/alpha: Fix xchg() and cmpxchg() memory ordering bugs

Successful RMW operations are supposed to be fully ordered, but
Alpha's xchg() and cmpxchg() do not meet this requirement.

Will Deacon noticed the bug:

> So MP using xchg:
>
> WRITE_ONCE(x, 1)
> xchg(y, 1)
>
> smp_load_acquire(y) == 1
> READ_ONCE(x) == 0
>
> would be allowed.

... which thus violates the above requirement.

Fix it by adding a leading smp_mb() to the xchg() and cmpxchg() implementations.

Reported-by: Will Deacon <[email protected]>
Signed-off-by: Andrea Parri <[email protected]>
Acked-by: Paul E. McKenney <[email protected]>
Cc: Alan Stern <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Ivan Kokshaysky <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Matt Turner <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Richard Henderson <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/alpha/include/asm/xchg.h | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
index e1facf6fc244..e2b59fac5257 100644
--- a/arch/alpha/include/asm/xchg.h
+++ b/arch/alpha/include/asm/xchg.h
@@ -12,6 +12,10 @@
* Atomic exchange.
* Since it can be used to implement critical sections
* it must clobber "memory" (also for interrupts in UP).
+ *
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
*/

static inline unsigned long
@@ -19,6 +23,7 @@ ____xchg(_u8, volatile char *m, unsigned long val)
{
unsigned long ret, tmp, addr64;

+ smp_mb();
__asm__ __volatile__(
" andnot %4,7,%3\n"
" insbl %1,%4,%1\n"
@@ -43,6 +48,7 @@ ____xchg(_u16, volatile short *m, unsigned long val)
{
unsigned long ret, tmp, addr64;

+ smp_mb();
__asm__ __volatile__(
" andnot %4,7,%3\n"
" inswl %1,%4,%1\n"
@@ -67,6 +73,7 @@ ____xchg(_u32, volatile int *m, unsigned long val)
{
unsigned long dummy;

+ smp_mb();
__asm__ __volatile__(
"1: ldl_l %0,%4\n"
" bis $31,%3,%1\n"
@@ -87,6 +94,7 @@ ____xchg(_u64, volatile long *m, unsigned long val)
{
unsigned long dummy;

+ smp_mb();
__asm__ __volatile__(
"1: ldq_l %0,%4\n"
" bis $31,%3,%1\n"
@@ -128,9 +136,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*
- * The memory barrier is placed in SMP unconditionally, in order to
- * guarantee that dependency ordering is preserved when a dependency
- * is headed by an unsuccessful operation.
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
+ * The trailing memory barrier is placed in SMP unconditionally, in
+ * order to guarantee that dependency ordering is preserved when a
+ * dependency is headed by an unsuccessful operation.
*/

static inline unsigned long
@@ -138,6 +149,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
{
unsigned long prev, tmp, cmp, addr64;

+ smp_mb();
__asm__ __volatile__(
" andnot %5,7,%4\n"
" insbl %1,%5,%1\n"
@@ -165,6 +177,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
{
unsigned long prev, tmp, cmp, addr64;

+ smp_mb();
__asm__ __volatile__(
" andnot %5,7,%4\n"
" inswl %1,%5,%1\n"
@@ -192,6 +205,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
{
unsigned long prev, cmp;

+ smp_mb();
__asm__ __volatile__(
"1: ldl_l %0,%5\n"
" cmpeq %0,%3,%1\n"
@@ -215,6 +229,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
{
unsigned long prev, cmp;

+ smp_mb();
__asm__ __volatile__(
"1: ldq_l %0,%5\n"
" cmpeq %0,%3,%1\n"