2015-05-11 14:54:28

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip:locking/core] locking/pvqspinlock: Replace xchg() by the more descriptive set_mb()

On Mon, May 11, 2015 at 05:43:55AM -0700, tip-bot for Waiman Long wrote:
> - (void)xchg(&pn->state, vcpu_halted);
> + set_mb(pn->state, vcpu_halted);

Hmm, so I looked at the set_mb() definitions and I figure we want to do
something like the below, right?

---
arch/arm/include/asm/barrier.h | 2 +-
arch/arm64/include/asm/barrier.h | 2 +-
arch/ia64/include/asm/barrier.h | 2 +-
arch/metag/include/asm/barrier.h | 2 +-
arch/mips/include/asm/barrier.h | 2 +-
arch/powerpc/include/asm/barrier.h | 2 +-
arch/s390/include/asm/barrier.h | 2 +-
arch/sparc/include/asm/barrier_64.h | 2 +-
arch/x86/include/asm/barrier.h | 2 +-
include/asm-generic/barrier.h | 2 +-
10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index d2f81e6b8c1c..993150aea681 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -81,7 +81,7 @@ do { \
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)

-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)

#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 71f19c4dc0de..ff7de78d01b8 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -114,7 +114,7 @@ do { \
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)

-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define nop() asm volatile("nop");

#define smp_mb__before_atomic() smp_mb()
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index f6769eb2bbf9..03117e7b2ab8 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -82,7 +82,7 @@ do { \
* acquire vs release semantics but we can't discuss this stuff with
* Linus just yet. Grrr...
*/
-#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
+#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)

/*
* The group barrier in front of the rsm & ssm are necessary to ensure
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index d703d8e26a65..97eb018a2933 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -84,7 +84,7 @@ static inline void fence(void)
#define read_barrier_depends() do { } while (0)
#define smp_read_barrier_depends() do { } while (0)

-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)

#define smp_store_release(p, v) \
do { \
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 2b8bbbcb9be0..cff1bbdaa74a 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -113,7 +113,7 @@
#endif

#define set_mb(var, value) \
- do { var = value; smp_mb(); } while (0)
+ do { WRITE_ONCE(var, value); smp_mb(); } while (0)

#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")

diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index a3bf5be111ff..2a072e48780d 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")

-#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)

#ifdef __SUBARCH_HAS_LWSYNC
# define SMPWMB LWSYNC
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 8d724718ec21..b66cd53d35fc 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()

-#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)

#define smp_store_release(p, v) \
do { \
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 76648941fea7..125fec7512f4 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -41,7 +41,7 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
#define dma_wmb() wmb()

#define set_mb(__var, __value) \
- do { __var = __value; membar_safe("#StoreLoad"); } while(0)
+ do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)

#ifdef CONFIG_SMP
#define smp_mb() mb()
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 959e45b81fe2..9de5cde133a1 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -40,7 +40,7 @@
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
#endif /* SMP */

#define read_barrier_depends() do { } while (0)
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index f5c40b0fadc2..3938716b44d7 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -67,7 +67,7 @@
#endif

#ifndef set_mb
-#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
+#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#endif

#ifndef smp_mb__before_atomic


2015-05-11 16:50:38

by Waiman Long

[permalink] [raw]
Subject: Re: [tip:locking/core] locking/pvqspinlock: Replace xchg() by the more descriptive set_mb()

On 05/11/2015 10:54 AM, Peter Zijlstra wrote:
> On Mon, May 11, 2015 at 05:43:55AM -0700, tip-bot for Waiman Long wrote:
>> - (void)xchg(&pn->state, vcpu_halted);
>> + set_mb(pn->state, vcpu_halted);
> Hmm, so I looked at the set_mb() definitions and I figure we want to do
> something like the below, right?

Yes, I think we should do that just to be safe from unexpected compiler
optimization.

> ---
> arch/arm/include/asm/barrier.h | 2 +-
> arch/arm64/include/asm/barrier.h | 2 +-
> arch/ia64/include/asm/barrier.h | 2 +-
> arch/metag/include/asm/barrier.h | 2 +-
> arch/mips/include/asm/barrier.h | 2 +-
> arch/powerpc/include/asm/barrier.h | 2 +-
> arch/s390/include/asm/barrier.h | 2 +-
> arch/sparc/include/asm/barrier_64.h | 2 +-
> arch/x86/include/asm/barrier.h | 2 +-
> include/asm-generic/barrier.h | 2 +-
> 10 files changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
> index d2f81e6b8c1c..993150aea681 100644
> --- a/arch/arm/include/asm/barrier.h
> +++ b/arch/arm/include/asm/barrier.h
> @@ -81,7 +81,7 @@ do { \
> #define read_barrier_depends() do { } while(0)
> #define smp_read_barrier_depends() do { } while(0)
>
> -#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
> +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
>
> #define smp_mb__before_atomic() smp_mb()
> #define smp_mb__after_atomic() smp_mb()
> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
> index 71f19c4dc0de..ff7de78d01b8 100644
> --- a/arch/arm64/include/asm/barrier.h
> +++ b/arch/arm64/include/asm/barrier.h
> @@ -114,7 +114,7 @@ do { \
> #define read_barrier_depends() do { } while(0)
> #define smp_read_barrier_depends() do { } while(0)
>
> -#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
> +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
> #define nop() asm volatile("nop");
>
> #define smp_mb__before_atomic() smp_mb()
> diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
> index f6769eb2bbf9..03117e7b2ab8 100644
> --- a/arch/ia64/include/asm/barrier.h
> +++ b/arch/ia64/include/asm/barrier.h
> @@ -82,7 +82,7 @@ do { \
> * acquire vs release semantics but we can't discuss this stuff with
> * Linus just yet. Grrr...
> */
> -#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
> +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
>
> /*
> * The group barrier in front of the rsm& ssm are necessary to ensure
> diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
> index d703d8e26a65..97eb018a2933 100644
> --- a/arch/metag/include/asm/barrier.h
> +++ b/arch/metag/include/asm/barrier.h
> @@ -84,7 +84,7 @@ static inline void fence(void)
> #define read_barrier_depends() do { } while (0)
> #define smp_read_barrier_depends() do { } while (0)
>
> -#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
> +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
>
> #define smp_store_release(p, v) \
> do { \
> diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
> index 2b8bbbcb9be0..cff1bbdaa74a 100644
> --- a/arch/mips/include/asm/barrier.h
> +++ b/arch/mips/include/asm/barrier.h
> @@ -113,7 +113,7 @@
> #endif
>
> #define set_mb(var, value) \
> - do { var = value; smp_mb(); } while (0)
> + do { WRITE_ONCE(var, value); smp_mb(); } while (0)
>
> #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
>
> diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
> index a3bf5be111ff..2a072e48780d 100644
> --- a/arch/powerpc/include/asm/barrier.h
> +++ b/arch/powerpc/include/asm/barrier.h
> @@ -34,7 +34,7 @@
> #define rmb() __asm__ __volatile__ ("sync" : : : "memory")
> #define wmb() __asm__ __volatile__ ("sync" : : : "memory")
>
> -#define set_mb(var, value) do { var = value; mb(); } while (0)
> +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
>
> #ifdef __SUBARCH_HAS_LWSYNC
> # define SMPWMB LWSYNC
> diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
> index 8d724718ec21..b66cd53d35fc 100644
> --- a/arch/s390/include/asm/barrier.h
> +++ b/arch/s390/include/asm/barrier.h
> @@ -36,7 +36,7 @@
> #define smp_mb__before_atomic() smp_mb()
> #define smp_mb__after_atomic() smp_mb()
>
> -#define set_mb(var, value) do { var = value; mb(); } while (0)
> +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
>
> #define smp_store_release(p, v) \
> do { \
> diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
> index 76648941fea7..125fec7512f4 100644
> --- a/arch/sparc/include/asm/barrier_64.h
> +++ b/arch/sparc/include/asm/barrier_64.h
> @@ -41,7 +41,7 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
> #define dma_wmb() wmb()
>
> #define set_mb(__var, __value) \
> - do { __var = __value; membar_safe("#StoreLoad"); } while(0)
> + do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)
>
> #ifdef CONFIG_SMP
> #define smp_mb() mb()
> diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
> index 959e45b81fe2..9de5cde133a1 100644
> --- a/arch/x86/include/asm/barrier.h
> +++ b/arch/x86/include/asm/barrier.h
> @@ -40,7 +40,7 @@
> #define smp_mb() barrier()
> #define smp_rmb() barrier()
> #define smp_wmb() barrier()
> -#define set_mb(var, value) do { var = value; barrier(); } while (0)
> +#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)

That part is in the !CONFIG_SMP portion. I don't think we need to change it.

Cheers,
Longman

2015-05-11 17:50:49

by Linus Torvalds

[permalink] [raw]
Subject: Re: [tip:locking/core] locking/pvqspinlock: Replace xchg() by the more descriptive set_mb()

On Mon, May 11, 2015 at 7:54 AM, Peter Zijlstra <[email protected]> wrote:
>
> Hmm, so I looked at the set_mb() definitions and I figure we want to do
> something like the below, right?

I don't think you need to do this for the non-smp cases. The whole
thing is about smp memory ordering, so on UP you don't even need the
WRITE_ONCE(), much less a barrier.

That said, I do wonder if we should make that "it's only an smp
barrier" more explicit. We have non-smp barriers for people who do
DMA, and while they should probably never use anything like set_mb()
anyway (they tend to want *release* semantics, not a full barrier),
from a conceptual standpoint the "set_mb()" function really is closer
to the "smp_store_release()/smp_load_acquire()" family of macros.

So I wonder if we should change the name to match.

IOW, if we are really cleaning up smp_mb() and changing most of the
lines associated with it (we really have very few users, and there
seems to be more lines *defining* smp_mb() than there are lines
*using* it in the kernel), maybe we should also just rename it
"smp_store_mb()" at the same time.

I dunno. Maybe the churn isn't worth it.

Linus

2015-05-12 08:45:48

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip:locking/core] locking/pvqspinlock: Replace xchg() by the more descriptive set_mb()

On Mon, May 11, 2015 at 10:50:42AM -0700, Linus Torvalds wrote:
> On Mon, May 11, 2015 at 7:54 AM, Peter Zijlstra <[email protected]> wrote:
> >
> > Hmm, so I looked at the set_mb() definitions and I figure we want to do
> > something like the below, right?
>
> I don't think you need to do this for the non-smp cases.

Well, its the store tearing thing again, we use WRITE_ONCE() in
smp_store_release() for the same reason. We want it to be a single
store.

> The whole
> thing is about smp memory ordering, so on UP you don't even need the
> WRITE_ONCE(), much less a barrier.

No, we actually need both still on UP.

Imagine the following sequence:

for (;;) {
set_current_state(TASK_KILLABLE);
if (cond)
break;

schedule();
}
__set_current_state(TASK_RUNNING);

vs

<IRQ>
wake_up_process(p);

As we know, set_current_state() is set_mb(), and thus will look like:

current->state = TASK_KILLABLE;
smp_mb();
if (cond)
break;

So without the WRITE_ONCE() we can get store tearing, and suppose our
compiler is insane and translates the store into 4 byte stores.

current->state[0] = TASK_UNINTERRUPTIBLE;
current->state[1] = TASK_WAKEKILL >> 8;
current->state[2] = 0;
current->state[3] = 0;

The obvious fail here is to get the wakeup interrupt between [0] and
[1].

current->state[0] = TASK_UNINTERRUPTIBLE;

<IRQ>
wake_up_process(p);
p->state = TASK_RUNNING;

current->state[1] = TASK_WAKEKILL >> 8;
current->state[2] = 0;
current->state[3] = 0;

With the end result that ->state == TASK_WAKEKILL, from which we'll not
wake up unless killed.

Similarly, without the barrier(), our friendly compiler is allowed to
do:

if (cond)
break
current->state = TASK_KILLABLE;
schedule();

Which we all know to be broken.

So no, set_mb() (or smp_store_mb()) very much does need the WRITE_ONCE()
and a barrier() on UP.

2015-05-12 08:54:20

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip:locking/core] locking/pvqspinlock: Replace xchg() by the more descriptive set_mb()

On Mon, May 11, 2015 at 10:50:42AM -0700, Linus Torvalds wrote:

> from a conceptual standpoint the "set_mb()" function really is closer
> to the "smp_store_release()/smp_load_acquire()" family of macros.
>
> So I wonder if we should change the name to match.
>
> IOW, if we are really cleaning up smp_mb() and changing most of the
> lines associated with it (we really have very few users, and there
> seems to be more lines *defining* smp_mb() than there are lines
> *using* it in the kernel), maybe we should also just rename it
> "smp_store_mb()" at the same time.
>
> I dunno. Maybe the churn isn't worth it.

Can't hurt, and its a trivial patch to generate.

git grep -l "\<set_mb\>" | while read file; do sed -i -e 's/\<set_mb\>/smp_store_mb/g' $file; done

And a manual edit later...

---
Documentation/memory-barriers.txt | 6 +++---
arch/arm/include/asm/barrier.h | 2 +-
arch/arm64/include/asm/barrier.h | 2 +-
arch/ia64/include/asm/barrier.h | 7 +------
arch/metag/include/asm/barrier.h | 2 +-
arch/mips/include/asm/barrier.h | 2 +-
arch/powerpc/include/asm/barrier.h | 2 +-
arch/s390/include/asm/barrier.h | 2 +-
arch/sh/include/asm/barrier.h | 2 +-
arch/sparc/include/asm/barrier_64.h | 2 +-
arch/x86/include/asm/barrier.h | 4 ++--
arch/x86/um/asm/barrier.h | 3 ++-
fs/select.c | 6 +++---
include/asm-generic/barrier.h | 4 ++--
include/linux/sched.h | 8 ++++----
kernel/futex.c | 2 +-
kernel/locking/qspinlock_paravirt.h | 2 +-
kernel/sched/wait.c | 4 ++--
18 files changed, 29 insertions(+), 33 deletions(-)

--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1662,7 +1662,7 @@ CPU from reordering them.

There are some more advanced barrier functions:

- (*) set_mb(var, value)
+ (*) smp_store_mb(var, value)

This assigns the value to the variable and then inserts a full memory
barrier after it, depending on the function. It isn't guaranteed to
@@ -1975,7 +1975,7 @@ A general memory barrier is interpolated
CPU 1
===============================
set_current_state();
- set_mb();
+ smp_store_mb();
STORE current->state
<general barrier>
LOAD event_indicated
@@ -2016,7 +2016,7 @@ something up. The barrier occurs before
CPU 1 CPU 2
=============================== ===============================
set_current_state(); STORE event_indicated
- set_mb(); wake_up();
+ smp_store_mb(); wake_up();
STORE current->state <write barrier>
<general barrier> STORE current->state
LOAD event_indicated
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -81,7 +81,7 @@ do { \
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)

-#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)

#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -114,7 +114,7 @@ do { \
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)

-#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define nop() asm volatile("nop");

#define smp_mb__before_atomic() smp_mb()
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -77,12 +77,7 @@ do { \
___p1; \
})

-/*
- * XXX check on this ---I suspect what Linus really wants here is
- * acquire vs release semantics but we can't discuss this stuff with
- * Linus just yet. Grrr...
- */
-#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)

/*
* The group barrier in front of the rsm & ssm are necessary to ensure
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -84,7 +84,7 @@ static inline void fence(void)
#define read_barrier_depends() do { } while (0)
#define smp_read_barrier_depends() do { } while (0)

-#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)

#define smp_store_release(p, v) \
do { \
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -112,7 +112,7 @@
#define __WEAK_LLSC_MB " \n"
#endif

-#define set_mb(var, value) \
+#define smp_store_mb(var, value) \
do { WRITE_ONCE(var, value); smp_mb(); } while (0)

#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")

-#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)

#ifdef __SUBARCH_HAS_LWSYNC
# define SMPWMB LWSYNC
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()

-#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)

#define smp_store_release(p, v) \
do { \
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -32,7 +32,7 @@
#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
#endif

-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)

#include <asm-generic/barrier.h>

--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -40,7 +40,7 @@ do { __asm__ __volatile__("ba,pt %%xcc,
#define dma_rmb() rmb()
#define dma_wmb() wmb()

-#define set_mb(__var, __value) \
+#define smp_store_mb(__var, __value) \
do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)

#ifdef CONFIG_SMP
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -35,12 +35,12 @@
#define smp_mb() mb()
#define smp_rmb() dma_rmb()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else /* !SMP */
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
#endif /* SMP */

#define read_barrier_depends() do { } while (0)
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -39,7 +39,8 @@
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
+
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)

#define read_barrier_depends() do { } while (0)
#define smp_read_barrier_depends() do { } while (0)
--- a/fs/select.c
+++ b/fs/select.c
@@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait
* doesn't imply write barrier and the users expect write
* barrier semantics on wakeup functions. The following
* smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
- * and is paired with set_mb() in poll_schedule_timeout.
+ * and is paired with smp_store_mb() in poll_schedule_timeout.
*/
smp_wmb();
pwq->triggered = 1;
@@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wq
/*
* Prepare for the next iteration.
*
- * The following set_mb() serves two purposes. First, it's
+ * The following smp_store_mb() serves two purposes. First, it's
* the counterpart rmb of the wmb in pollwake() such that data
* written before wake up is always visible after wake up.
* Second, the full barrier guarantees that triggered clearing
@@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wq
* this problem doesn't exist for the first iteration as
* add_wait_queue() has full barrier semantics.
*/
- set_mb(pwq->triggered, 0);
+ smp_store_mb(pwq->triggered, 0);

return rc;
}
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -66,8 +66,8 @@
#define smp_read_barrier_depends() do { } while (0)
#endif

-#ifndef set_mb
-#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#ifndef smp_store_mb
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#endif

#ifndef smp_mb__before_atomic
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -257,7 +257,7 @@ extern char ___assert_task_state[1 - 2*!
#define set_task_state(tsk, state_value) \
do { \
(tsk)->task_state_change = _THIS_IP_; \
- set_mb((tsk)->state, (state_value)); \
+ smp_store_mb((tsk)->state, (state_value)); \
} while (0)

/*
@@ -279,7 +279,7 @@ extern char ___assert_task_state[1 - 2*!
#define set_current_state(state_value) \
do { \
current->task_state_change = _THIS_IP_; \
- set_mb(current->state, (state_value)); \
+ smp_store_mb(current->state, (state_value)); \
} while (0)

#else
@@ -287,7 +287,7 @@ extern char ___assert_task_state[1 - 2*!
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
#define set_task_state(tsk, state_value) \
- set_mb((tsk)->state, (state_value))
+ smp_store_mb((tsk)->state, (state_value))

/*
* set_current_state() includes a barrier so that the write of current->state
@@ -303,7 +303,7 @@ extern char ___assert_task_state[1 - 2*!
#define __set_current_state(state_value) \
do { current->state = (state_value); } while (0)
#define set_current_state(state_value) \
- set_mb(current->state, (state_value))
+ smp_store_mb(current->state, (state_value))

#endif

--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2056,7 +2056,7 @@ static void futex_wait_queue_me(struct f
{
/*
* The task state is guaranteed to be set before another task can
- * wake it. set_current_state() is implemented using set_mb() and
+ * wake it. set_current_state() is implemented using smp_store_mb() and
* queue_me() calls spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier.
*/
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -175,7 +175,7 @@ static void pv_wait_node(struct mcs_spin
*
* Matches the xchg() from pv_kick_node().
*/
- set_mb(pn->state, vcpu_halted);
+ smp_store_mb(pn->state, vcpu_halted);

if (!READ_ONCE(node->locked))
pv_wait(&pn->state, vcpu_halted);
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -341,7 +341,7 @@ long wait_woken(wait_queue_t *wait, unsi
* condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
* an event.
*/
- set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
+ smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */

return timeout;
}
@@ -354,7 +354,7 @@ int woken_wake_function(wait_queue_t *wa
* doesn't imply write barrier and the users expects write
* barrier semantics on wakeup functions. The following
* smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
- * and is paired with set_mb() in wait_woken().
+ * and is paired with smp_store_mb() in wait_woken().
*/
smp_wmb(); /* C */
wait->flags |= WQ_FLAG_WOKEN;

2015-05-12 13:01:03

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip:locking/core] locking/pvqspinlock: Replace xchg() by the more descriptive set_mb()

On Tue, May 12, 2015 at 10:45:29AM +0200, Peter Zijlstra wrote:
> On Mon, May 11, 2015 at 10:50:42AM -0700, Linus Torvalds wrote:
> > On Mon, May 11, 2015 at 7:54 AM, Peter Zijlstra <[email protected]> wrote:
> > >
> > > Hmm, so I looked at the set_mb() definitions and I figure we want to do
> > > something like the below, right?
> >
> > I don't think you need to do this for the non-smp cases.
>
> Well, its the store tearing thing again, we use WRITE_ONCE() in
> smp_store_release() for the same reason. We want it to be a single
> store.
>
> > The whole
> > thing is about smp memory ordering, so on UP you don't even need the
> > WRITE_ONCE(), much less a barrier.

Ah, you meant the memory barrier; indeed, a compiler barrier is
sufficient. I got somewhat confused between Waiman's email and barrier
and barrier() (again!).

2015-05-12 14:59:54

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip:locking/core] locking/pvqspinlock: Replace xchg() by the more descriptive set_mb()

On Mon, May 11, 2015 at 04:54:08PM +0200, Peter Zijlstra wrote:

> +++ b/arch/arm/include/asm/barrier.h
> @@ -81,7 +81,7 @@ do { \
> #define read_barrier_depends() do { } while(0)
> #define smp_read_barrier_depends() do { } while(0)
>
> -#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
> +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
>
> #define smp_mb__before_atomic() smp_mb()
> #define smp_mb__after_atomic() smp_mb()

Which triggered a massive compile fail and requires the same union
trickery we already had for READ_ONCE().

Added the below bit to the patch to aid compiling.

---
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index a7c0941d10da..03e227ba481c 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -250,7 +250,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })

#define WRITE_ONCE(x, val) \
- ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
+ ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })

#endif /* __KERNEL__ */