2007-02-11 19:19:54

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH 00/10] local_t : adding and standardising local atomic primitives

local_t : adding and standardising local atomic primitives

These patches extend and standardise local_t operations on each architectures,
allowing a rich set of atomic operations to be done on per-cpu data with
minimal performance impact. On architectures where there seems to be no
difference between the SMP and UP operation (same memory barriers, same
LOCKing), local.h simply includes asm-generic/local.h, which removes duplicated
code from the current kernel tree.

These patches apply on 2.6.20.
It depends on the patch "atomic.h : standardising atomic primitives"

These patches compile fine against :

arm
i686
ia64
m68k
mips
mipsel
x86_64
ppc 405
powerpc 970
s390
sparc
sparc64

Signed-off-by : Mathieu Desnoyers <[email protected]>


2007-02-11 19:18:24

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH 01/10] local_t : architecture independant extension

local_t : architecture independant extension

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-generic/local.h
+++ b/include/asm-generic/local.h
@@ -33,6 +33,19 @@ typedef struct
#define local_add(i,l) atomic_long_add((i),(&(l)->a))
#define local_sub(i,l) atomic_long_sub((i),(&(l)->a))

+#define local_sub_and_test(i, l) atomic_long_sub_and_test((i), (&(l)->a))
+#define local_dec_and_test(l) atomic_long_dec_and_test(&(l)->a)
+#define local_inc_and_test(l) atomic_long_inc_and_test(&(l)->a)
+#define local_add_negative(i, l) atomic_long_add_negative((i), (&(l)->a))
+#define local_add_return(i, l) atomic_long_add_return((i), (&(l)->a))
+#define local_sub_return(i, l) atomic_long_sub_return((i), (&(l)->a))
+#define local_inc_return(l) atomic_long_inc_return(&(l)->a)
+
+#define local_cmpxchg(l, o, n) atomic_long_cmpxchg((&(l)->a), (o), (n))
+#define local_xchg(l, n) atomic_long_xchg((&(l)->a), (n))
+#define local_add_unless(l, a, u) atomic_long_add_unless((&(l)->a), (a), (u))
+#define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a)
+
/* Non-atomic variants, ie. preemption disabled and won't be touched
* in interrupt, etc. Some archs can optimize this case well. */
#define __local_inc(l) local_set((l), local_read(l) + 1)
@@ -44,19 +57,19 @@ typedef struct
* much more efficient than these naive implementations. Note they take
* a variable (eg. mystruct.foo), not an address.
*/
-#define cpu_local_read(v) local_read(&__get_cpu_var(v))
-#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i))
-#define cpu_local_inc(v) local_inc(&__get_cpu_var(v))
-#define cpu_local_dec(v) local_dec(&__get_cpu_var(v))
-#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v))
-#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v))
+#define cpu_local_read(l) local_read(&__get_cpu_var(l))
+#define cpu_local_set(l, i) local_set(&__get_cpu_var(l), (i))
+#define cpu_local_inc(l) local_inc(&__get_cpu_var(l))
+#define cpu_local_dec(l) local_dec(&__get_cpu_var(l))
+#define cpu_local_add(i, l) local_add((i), &__get_cpu_var(l))
+#define cpu_local_sub(i, l) local_sub((i), &__get_cpu_var(l))

/* Non-atomic increments, ie. preemption disabled and won't be touched
* in interrupt, etc. Some archs can optimize this case well.
*/
-#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v))
-#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v))
-#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v))
-#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v))
+#define __cpu_local_inc(l) __local_inc(&__get_cpu_var(l))
+#define __cpu_local_dec(l) __local_dec(&__get_cpu_var(l))
+#define __cpu_local_add(i, l) __local_add((i), &__get_cpu_var(l))
+#define __cpu_local_sub(i, l) __local_sub((i), &__get_cpu_var(l))

#endif /* _ASM_GENERIC_LOCAL_H */

2007-02-11 19:18:55

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH 06/10] local_t : parisc cleanup

local_t : parisc cleanup

parisc architecture local_t cleanup : use asm-generic/local.h.

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-parisc/local.h
+++ b/include/asm-parisc/local.h
@@ -1,40 +1 @@
-#ifndef _ARCH_PARISC_LOCAL_H
-#define _ARCH_PARISC_LOCAL_H
-
-#include <linux/percpu.h>
-#include <asm/atomic.h>
-
-typedef atomic_long_t local_t;
-
-#define LOCAL_INIT(i) ATOMIC_LONG_INIT(i)
-#define local_read(v) atomic_long_read(v)
-#define local_set(v,i) atomic_long_set(v,i)
-
-#define local_inc(v) atomic_long_inc(v)
-#define local_dec(v) atomic_long_dec(v)
-#define local_add(i, v) atomic_long_add(i, v)
-#define local_sub(i, v) atomic_long_sub(i, v)
-
-#define __local_inc(v) ((v)->counter++)
-#define __local_dec(v) ((v)->counter--)
-#define __local_add(i,v) ((v)->counter+=(i))
-#define __local_sub(i,v) ((v)->counter-=(i))
-
-/* Use these for per-cpu local_t variables: on some archs they are
- * much more efficient than these naive implementations. Note they take
- * a variable, not an address.
- */
-#define cpu_local_read(v) local_read(&__get_cpu_var(v))
-#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i))
-
-#define cpu_local_inc(v) local_inc(&__get_cpu_var(v))
-#define cpu_local_dec(v) local_dec(&__get_cpu_var(v))
-#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v))
-#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v))
-
-#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v))
-#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v))
-#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v))
-#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v))
-
-#endif /* _ARCH_PARISC_LOCAL_H */
+#include <asm-generic/local.h>

2007-02-11 19:18:55

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH 08/10] local_t : s390 cleanup

local_t : s390 cleanup

s390 architecture local_t cleanup : use asm-generic/local.h.

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-s390/local.h
+++ b/include/asm-s390/local.h
@@ -1,58 +1 @@
-#ifndef _ASM_LOCAL_H
-#define _ASM_LOCAL_H
-
-#include <linux/percpu.h>
-#include <asm/atomic.h>
-
-#ifndef __s390x__
-
-typedef atomic_t local_t;
-
-#define LOCAL_INIT(i) ATOMIC_INIT(i)
-#define local_read(v) atomic_read(v)
-#define local_set(v,i) atomic_set(v,i)
-
-#define local_inc(v) atomic_inc(v)
-#define local_dec(v) atomic_dec(v)
-#define local_add(i, v) atomic_add(i, v)
-#define local_sub(i, v) atomic_sub(i, v)
-
-#else
-
-typedef atomic64_t local_t;
-
-#define LOCAL_INIT(i) ATOMIC64_INIT(i)
-#define local_read(v) atomic64_read(v)
-#define local_set(v,i) atomic64_set(v,i)
-
-#define local_inc(v) atomic64_inc(v)
-#define local_dec(v) atomic64_dec(v)
-#define local_add(i, v) atomic64_add(i, v)
-#define local_sub(i, v) atomic64_sub(i, v)
-
-#endif
-
-#define __local_inc(v) ((v)->counter++)
-#define __local_dec(v) ((v)->counter--)
-#define __local_add(i,v) ((v)->counter+=(i))
-#define __local_sub(i,v) ((v)->counter-=(i))
-
-/*
- * Use these for per-cpu local_t variables: on some archs they are
- * much more efficient than these naive implementations. Note they take
- * a variable, not an address.
- */
-#define cpu_local_read(v) local_read(&__get_cpu_var(v))
-#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i))
-
-#define cpu_local_inc(v) local_inc(&__get_cpu_var(v))
-#define cpu_local_dec(v) local_dec(&__get_cpu_var(v))
-#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v))
-#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v))
-
-#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v))
-#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v))
-#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v))
-#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v))
-
-#endif /* _ASM_LOCAL_H */
+#include <asm-generic/local.h>

2007-02-11 19:18:55

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH 03/10] local_t : i386 extension

local_t : i386 extension

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-i386/local.h
+++ b/include/asm-i386/local.h
@@ -2,47 +2,198 @@
#define _ARCH_I386_LOCAL_H

#include <linux/percpu.h>
+#include <asm/system.h>
+#include <asm/atomic.h>

typedef struct
{
- volatile long counter;
+ atomic_long_t a;
} local_t;

-#define LOCAL_INIT(i) { (i) }
+#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }

-#define local_read(v) ((v)->counter)
-#define local_set(v,i) (((v)->counter) = (i))
+#define local_read(l) atomic_long_read(&(l)->a)
+#define local_set(l,i) atomic_long_set(&(l)->a, (i))

-static __inline__ void local_inc(local_t *v)
+static __inline__ void local_inc(local_t *l)
{
__asm__ __volatile__(
"incl %0"
- :"+m" (v->counter));
+ :"+m" (l->a.counter));
}

-static __inline__ void local_dec(local_t *v)
+static __inline__ void local_dec(local_t *l)
{
__asm__ __volatile__(
"decl %0"
- :"+m" (v->counter));
+ :"+m" (l->a.counter));
}

-static __inline__ void local_add(long i, local_t *v)
+static __inline__ void local_add(long i, local_t *l)
{
__asm__ __volatile__(
"addl %1,%0"
- :"+m" (v->counter)
+ :"+m" (l->a.counter)
:"ir" (i));
}

-static __inline__ void local_sub(long i, local_t *v)
+static __inline__ void local_sub(long i, local_t *l)
{
__asm__ __volatile__(
"subl %1,%0"
- :"+m" (v->counter)
+ :"+m" (l->a.counter)
:"ir" (i));
}

+/**
+ * local_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @l: pointer of type local_t
+ *
+ * Atomically subtracts @i from @l and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static __inline__ int local_sub_and_test(long i, local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ "subl %2,%0; sete %1"
+ :"+m" (l->a.counter), "=qm" (c)
+ :"ir" (i) : "memory");
+ return c;
+}
+
+/**
+ * local_dec_and_test - decrement and test
+ * @l: pointer of type local_t
+ *
+ * Atomically decrements @l by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static __inline__ int local_dec_and_test(local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ "decl %0; sete %1"
+ :"+m" (l->a.counter), "=qm" (c)
+ : : "memory");
+ return c != 0;
+}
+
+/**
+ * local_inc_and_test - increment and test
+ * @l: pointer of type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static __inline__ int local_inc_and_test(local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ "incl %0; sete %1"
+ :"+m" (l->a.counter), "=qm" (c)
+ : : "memory");
+ return c != 0;
+}
+
+/**
+ * local_add_negative - add and test if negative
+ * @l: pointer of type local_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @l and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static __inline__ int local_add_negative(long i, local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ "addl %2,%0; sets %1"
+ :"+m" (l->a.counter), "=qm" (c)
+ :"ir" (i) : "memory");
+ return c;
+}
+
+/**
+ * local_add_return - add and return
+ * @l: pointer of type local_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @l and returns @i + @l
+ */
+static __inline__ long local_add_return(long i, local_t *l)
+{
+ long __i;
+#ifdef CONFIG_M386
+ unsigned long flags;
+ if(unlikely(boot_cpu_data.x86==3))
+ goto no_xadd;
+#endif
+ /* Modern 486+ processor */
+ __i = i;
+ __asm__ __volatile__(
+ "xaddl %0, %1;"
+ :"+r" (i), "+m" (l->a.counter)
+ : : "memory");
+ return i + __i;
+
+#ifdef CONFIG_M386
+no_xadd: /* Legacy 386 processor */
+ local_irq_save(flags);
+ __i = local_read(l);
+ local_set(l, i + __i);
+ local_irq_restore(flags);
+ return i + __i;
+#endif
+}
+
+static __inline__ long local_sub_return(long i, local_t *l)
+{
+ return local_add_return(-i,l);
+}
+
+#define local_inc_return(l) (local_add_return(1,l))
+#define local_dec_return(l) (local_sub_return(1,l))
+
+#define local_cmpxchg(l, o, n) \
+ (cmpxchg_local(&((l)->a.counter), (o), (n)))
+/* Always has a lock prefix */
+#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to l...
+ * @u: ...unless l is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+#define local_add_unless(l, a, u) \
+({ \
+ long c, old; \
+ c = local_read(l); \
+ for (;;) { \
+ if (unlikely(c == (u))) \
+ break; \
+ old = local_cmpxchg((l), c, c + (a)); \
+ if (likely(old == c)) \
+ break; \
+ c = old; \
+ } \
+ c != (u); \
+})
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
/* On x86, these are no better than the atomic variants. */
#define __local_inc(l) local_inc(l)
#define __local_dec(l) local_dec(l)
@@ -56,27 +207,27 @@ static __inline__ void local_sub(long i, local_t *v)

/* Need to disable preemption for the cpu local counters otherwise we could
still access a variable of a previous CPU in a non atomic way. */
-#define cpu_local_wrap_v(v) \
+#define cpu_local_wrap_v(l) \
({ local_t res__; \
preempt_disable(); \
- res__ = (v); \
+ res__ = (l); \
preempt_enable(); \
res__; })
-#define cpu_local_wrap(v) \
+#define cpu_local_wrap(l) \
({ preempt_disable(); \
- v; \
+ l; \
preempt_enable(); }) \

-#define cpu_local_read(v) cpu_local_wrap_v(local_read(&__get_cpu_var(v)))
-#define cpu_local_set(v, i) cpu_local_wrap(local_set(&__get_cpu_var(v), (i)))
-#define cpu_local_inc(v) cpu_local_wrap(local_inc(&__get_cpu_var(v)))
-#define cpu_local_dec(v) cpu_local_wrap(local_dec(&__get_cpu_var(v)))
-#define cpu_local_add(i, v) cpu_local_wrap(local_add((i), &__get_cpu_var(v)))
-#define cpu_local_sub(i, v) cpu_local_wrap(local_sub((i), &__get_cpu_var(v)))
-
-#define __cpu_local_inc(v) cpu_local_inc(v)
-#define __cpu_local_dec(v) cpu_local_dec(v)
-#define __cpu_local_add(i, v) cpu_local_add((i), (v))
-#define __cpu_local_sub(i, v) cpu_local_sub((i), (v))
+#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
+#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
+#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l)))
+#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l)))
+#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
+#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
+
+#define __cpu_local_inc(l) cpu_local_inc(l)
+#define __cpu_local_dec(l) cpu_local_dec(l)
+#define __cpu_local_add(i, l) cpu_local_add((i), (l))
+#define __cpu_local_sub(i, l) cpu_local_sub((i), (l))

#endif /* _ARCH_I386_LOCAL_H */
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -274,6 +274,9 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
#define sync_cmpxchg(ptr,o,n)\
((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
(unsigned long)(n),sizeof(*(ptr))))
+#define cmpxchg_local(ptr,o,n)\
+ ((__typeof__(*(ptr)))__cmpxchg_local((ptr),(unsigned long)(o),\
+ (unsigned long)(n),sizeof(*(ptr))))
#endif

static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
@@ -336,6 +339,33 @@ static inline unsigned long __sync_cmpxchg(volatile void *ptr,
return old;
}

+static inline unsigned long __cmpxchg_local(volatile void *ptr,
+ unsigned long old, unsigned long new, int size)
+{
+ unsigned long prev;
+ switch (size) {
+ case 1:
+ __asm__ __volatile__("cmpxchgb %b1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+ __asm__ __volatile__("cmpxchgw %w1,%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+ __asm__ __volatile__("cmpxchgl %1,%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ }
+ return old;
+}
+
#ifndef CONFIG_X86_CMPXCHG
/*
* Building a kernel capable running on 80386. It may be necessary to
@@ -372,6 +402,17 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
(unsigned long)(n), sizeof(*(ptr))); \
__ret; \
})
+#define cmpxchg_local(ptr,o,n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ if (likely(boot_cpu_data.x86 > 3)) \
+ __ret = __cmpxchg_local((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))); \
+ else \
+ __ret = cmpxchg_386((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))); \
+ __ret; \
+})
#endif

#ifdef CONFIG_X86_CMPXCHG64
@@ -390,10 +431,26 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long l
return prev;
}

+static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
+ unsigned long long old, unsigned long long new)
+{
+ unsigned long long prev;
+ __asm__ __volatile__("cmpxchg8b %3"
+ : "=A"(prev)
+ : "b"((unsigned long)new),
+ "c"((unsigned long)(new >> 32)),
+ "m"(*__xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+}
+
#define cmpxchg64(ptr,o,n)\
((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
(unsigned long long)(n)))
-
+#define cmpxchg64_local(ptr,o,n)\
+ ((__typeof__(*(ptr)))__cmpxchg64_local((ptr),(unsigned long long)(o),\
+ (unsigned long long)(n)))
#endif

/*

2007-02-11 19:24:06

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH 09/10] local_t : sparc64 cleanup

local_t : sparc64 cleanup

sparc64 local_t cleanup : simply use asm-generic/local.h.

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-sparc64/local.h
+++ b/include/asm-sparc64/local.h
@@ -1,40 +1 @@
-#ifndef _ARCH_SPARC64_LOCAL_H
-#define _ARCH_SPARC64_LOCAL_H
-
-#include <linux/percpu.h>
-#include <asm/atomic.h>
-
-typedef atomic64_t local_t;
-
-#define LOCAL_INIT(i) ATOMIC64_INIT(i)
-#define local_read(v) atomic64_read(v)
-#define local_set(v,i) atomic64_set(v,i)
-
-#define local_inc(v) atomic64_inc(v)
-#define local_dec(v) atomic64_dec(v)
-#define local_add(i, v) atomic64_add(i, v)
-#define local_sub(i, v) atomic64_sub(i, v)
-
-#define __local_inc(v) ((v)->counter++)
-#define __local_dec(v) ((v)->counter--)
-#define __local_add(i,v) ((v)->counter+=(i))
-#define __local_sub(i,v) ((v)->counter-=(i))
-
-/* Use these for per-cpu local_t variables: on some archs they are
- * much more efficient than these naive implementations. Note they take
- * a variable, not an address.
- */
-#define cpu_local_read(v) local_read(&__get_cpu_var(v))
-#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i))
-
-#define cpu_local_inc(v) local_inc(&__get_cpu_var(v))
-#define cpu_local_dec(v) local_dec(&__get_cpu_var(v))
-#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v))
-#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v))
-
-#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v))
-#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v))
-#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v))
-#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v))
-
-#endif /* _ARCH_SPARC64_LOCAL_H */
+#include <asm-generic/local.h>

2007-02-11 19:24:06

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH 05/10] local_t : mips extension

local_t : mips extension

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-mips/local.h
+++ b/include/asm-mips/local.h
@@ -1,60 +1,524 @@
-#ifndef _ASM_LOCAL_H
-#define _ASM_LOCAL_H
+#ifndef _ARCH_MIPS_LOCAL_H
+#define _ARCH_MIPS_LOCAL_H

#include <linux/percpu.h>
#include <asm/atomic.h>
+#include <asm/war.h>

-#ifdef CONFIG_32BIT
+typedef struct
+{
+ atomic_long_t a;
+} local_t;

-typedef atomic_t local_t;
+#define LOCAL_INIT(i) { local_LONG_INIT(i) }

-#define LOCAL_INIT(i) ATOMIC_INIT(i)
-#define local_read(v) atomic_read(v)
-#define local_set(v,i) atomic_set(v,i)
+#define local_read(l) local_long_read(&(l)->a)
+#define local_set(l,i) local_long_set(&(l)->a, (i))

-#define local_inc(v) atomic_inc(v)
-#define local_dec(v) atomic_dec(v)
-#define local_add(i, v) atomic_add(i, v)
-#define local_sub(i, v) atomic_sub(i, v)
+#define local_add(i,l) local_long_add((i),(&(l)->a))
+#define local_sub(i,l) local_long_sub((i),(&(l)->a))
+#define local_inc(l) local_long_inc(&(l)->a)
+#define local_dec(l) local_long_dec(&(l)->a)

-#endif

-#ifdef CONFIG_64BIT
+#ifndef CONFIG_64BIT

-typedef atomic64_t local_t;
+/*
+ * Same as above, but return the result value
+ */
+static __inline__ int local_add_return(int i, local_t * l)
+{
+ unsigned long result;
+
+ if (cpu_has_llsc && R10000_LLSC_WAR) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: ll %1, %2 # local_add_return \n"
+ " addu %0, %1, %3 \n"
+ " sc %0, %2 \n"
+ " beqzl %0, 1b \n"
+ " addu %0, %1, %3 \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else if (cpu_has_llsc) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: ll %1, %2 # local_add_return \n"
+ " addu %0, %1, %3 \n"
+ " sc %0, %2 \n"
+ " beqz %0, 1b \n"
+ " addu %0, %1, %3 \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ result = l->a.counter;
+ result += i;
+ l->a.counter = result;
+ local_irq_restore(flags);
+ }
+
+ return result;
+}
+
+static __inline__ int local_sub_return(int i, local_t * l)
+{
+ unsigned long result;
+
+ if (cpu_has_llsc && R10000_LLSC_WAR) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: ll %1, %2 # local_sub_return \n"
+ " subu %0, %1, %3 \n"
+ " sc %0, %2 \n"
+ " beqzl %0, 1b \n"
+ " subu %0, %1, %3 \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else if (cpu_has_llsc) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: ll %1, %2 # local_sub_return \n"
+ " subu %0, %1, %3 \n"
+ " sc %0, %2 \n"
+ " beqz %0, 1b \n"
+ " subu %0, %1, %3 \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else {
+ unsigned long flags;

-#define LOCAL_INIT(i) ATOMIC64_INIT(i)
-#define local_read(v) atomic64_read(v)
-#define local_set(v,i) atomic64_set(v,i)
+ local_irq_save(flags);
+ result = l->a.counter;
+ result -= i;
+ l->a.counter = result;
+ local_irq_restore(flags);
+ }

-#define local_inc(v) atomic64_inc(v)
-#define local_dec(v) atomic64_dec(v)
-#define local_add(i, v) atomic64_add(i, v)
-#define local_sub(i, v) atomic64_sub(i, v)
+ return result;
+}

-#endif
+/*
+ * local_sub_if_positive - conditionally subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @l: pointer of type local_t
+ *
+ * Atomically test @l and subtract @i if @l is greater or equal than @i.
+ * The function returns the old value of @l minus @i.
+ */
+static __inline__ int local_sub_if_positive(int i, local_t * l)
+{
+ unsigned long result;
+
+ if (cpu_has_llsc && R10000_LLSC_WAR) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: ll %1, %2 # local_sub_if_positive\n"
+ " subu %0, %1, %3 \n"
+ " bltz %0, 1f \n"
+ " sc %0, %2 \n"
+ " .set noreorder \n"
+ " beqzl %0, 1b \n"
+ " subu %0, %1, %3 \n"
+ " .set reorder \n"
+ "1: \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else if (cpu_has_llsc) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: ll %1, %2 # local_sub_if_positive\n"
+ " subu %0, %1, %3 \n"
+ " bltz %0, 1f \n"
+ " sc %0, %2 \n"
+ " .set noreorder \n"
+ " beqz %0, 1b \n"
+ " subu %0, %1, %3 \n"
+ " .set reorder \n"
+ "1: \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ result = l->a.counter;
+ result -= i;
+ if (result >= 0)
+ l->a.counter = result;
+ local_irq_restore(flags);
+ }
+
+ return result;
+}
+
+#define local_cmpxchg(l, o, n) \
+ (cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to l...
+ * @u: ...unless l is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+#define local_add_unless(l, a, u) \
+({ \
+ long c, old; \
+ c = local_read(l); \
+ while (c != (u) && (old = local_cmpxchg((l), c, c + (a))) != c) \
+ c = old; \
+ c != (u); \
+})
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)

-#define __local_inc(v) ((v)->counter++)
-#define __local_dec(v) ((v)->counter--)
-#define __local_add(i,v) ((v)->counter+=(i))
-#define __local_sub(i,v) ((v)->counter-=(i))
+#define local_dec_return(l) local_sub_return(1,(l))
+#define local_inc_return(l) local_add_return(1,(l))

/*
- * Use these for per-cpu local_t variables: on some archs they are
+ * local_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @l: pointer of type local_t
+ *
+ * Atomically subtracts @i from @l and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+#define local_sub_and_test(i,l) (local_sub_return((i), (l)) == 0)
+
+/*
+ * local_inc_and_test - increment and test
+ * @l: pointer of type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define local_inc_and_test(l) (local_inc_return(l) == 0)
+
+/*
+ * local_dec_and_test - decrement by 1 and test
+ * @l: pointer of type local_t
+ *
+ * Atomically decrements @l by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0)
+
+/*
+ * local_dec_if_positive - decrement by 1 if old value positive
+ * @l: pointer of type local_t
+ */
+#define local_dec_if_positive(l) local_sub_if_positive(1, l)
+
+/*
+ * local_add_negative - add and test if negative
+ * @l: pointer of type local_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @l and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+#define local_add_negative(i,l) (local_add_return(i, (l)) < 0)
+
+#else /* CONFIG_64BIT */
+
+/*
+ * Same as above, but return the result value
+ */
+static __inline__ long local_add_return(long i, local_t * l)
+{
+ unsigned long result;
+
+ if (cpu_has_llsc && R10000_LLSC_WAR) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: lld %1, %2 # local_add_return \n"
+ " addu %0, %1, %3 \n"
+ " scd %0, %2 \n"
+ " beqzl %0, 1b \n"
+ " addu %0, %1, %3 \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else if (cpu_has_llsc) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: lld %1, %2 # local_add_return \n"
+ " addu %0, %1, %3 \n"
+ " scd %0, %2 \n"
+ " beqz %0, 1b \n"
+ " addu %0, %1, %3 \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ result = l->a.counter;
+ result += i;
+ l->a.counter = result;
+ local_irq_restore(flags);
+ }
+
+ return result;
+}
+
+static __inline__ long local_sub_return(long i, local_t * l)
+{
+ unsigned long result;
+
+ if (cpu_has_llsc && R10000_LLSC_WAR) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: lld %1, %2 # local_sub_return \n"
+ " subu %0, %1, %3 \n"
+ " scd %0, %2 \n"
+ " beqzl %0, 1b \n"
+ " subu %0, %1, %3 \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else if (cpu_has_llsc) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: lld %1, %2 # local_sub_return \n"
+ " subu %0, %1, %3 \n"
+ " scd %0, %2 \n"
+ " beqz %0, 1b \n"
+ " subu %0, %1, %3 \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ result = l->a.counter;
+ result -= i;
+ l->a.counter = result;
+ local_irq_restore(flags);
+ }
+
+ return result;
+}
+
+/*
+ * local_sub_if_positive - conditionally subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @l: pointer of type local_t
+ *
+ * Atomically test @l and subtract @i if @l is greater or equal than @i.
+ * The function returns the old value of @l minus @i.
+ */
+static __inline__ long local_sub_if_positive(long i, local_t * l)
+{
+ unsigned long result;
+
+ if (cpu_has_llsc && R10000_LLSC_WAR) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: lld %1, %2 # local_sub_if_positive\n"
+ " dsubu %0, %1, %3 \n"
+ " bltz %0, 1f \n"
+ " scd %0, %2 \n"
+ " .set noreorder \n"
+ " beqzl %0, 1b \n"
+ " dsubu %0, %1, %3 \n"
+ " .set reorder \n"
+ "1: \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else if (cpu_has_llsc) {
+ unsigned long temp;
+
+ __asm__ __volatile__(
+ " .set mips3 \n"
+ "1: lld %1, %2 # local_sub_if_positive\n"
+ " dsubu %0, %1, %3 \n"
+ " bltz %0, 1f \n"
+ " scd %0, %2 \n"
+ " .set noreorder \n"
+ " beqz %0, 1b \n"
+ " dsubu %0, %1, %3 \n"
+ " .set reorder \n"
+ "1: \n"
+ " .set mips0 \n"
+ : "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+ : "Ir" (i), "m" (l->a.counter)
+ : "memory");
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ result = l->a.counter;
+ result -= i;
+ if (result >= 0)
+ l->a.counter = result;
+ local_irq_restore(flags);
+ }
+
+ return result;
+}
+
+#define local_cmpxchg(l, o, n) \
+ ((long)cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_xchg(l, (n)) (xchg_local(&((l)->a.counter),(n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to l...
+ * @u: ...unless l is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+#define local_add_unless(l, a, u) \
+({ \
+ long c, old; \
+ c = local_read(l); \
+ while (c != (u) && (old = local_cmpxchg((l), c, c + (a))) != c) \
+ c = old; \
+ c != (u); \
+})
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
+#define local_dec_return(l) local_sub_return(1,(l))
+#define local_inc_return(l) local_add_return(1,(l))
+
+/*
+ * local_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @l: pointer of type local_t
+ *
+ * Atomically subtracts @i from @l and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+#define local_sub_and_test(i,l) (local_sub_return((i), (l)) == 0)
+
+/*
+ * local_inc_and_test - increment and test
+ * @l: pointer of type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define local_inc_and_test(l) (local_inc_return(l) == 0)
+
+/*
+ * local_dec_and_test - decrement by 1 and test
+ * @l: pointer of type local_t
+ *
+ * Atomically decrements @l by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0)
+
+/*
+ * local_dec_if_positive - decrement by 1 if old value positive
+ * @l: pointer of type local_t
+ */
+#define local_dec_if_positive(l) local_sub_if_positive(1, l)
+
+/*
+ * local_add_negative - add and test if negative
+ * @l: pointer of type local_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @l and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+#define local_add_negative(i,l) (local_add_return(i, (l)) < 0)
+
+#endif /* !CONFIG_64BIT */
+
+
+/* Use these for per-cpu local_t variables: on some archs they are
* much more efficient than these naive implementations. Note they take
* a variable, not an address.
*/
-#define cpu_local_read(v) local_read(&__get_cpu_var(v))
-#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i))

-#define cpu_local_inc(v) local_inc(&__get_cpu_var(v))
-#define cpu_local_dec(v) local_dec(&__get_cpu_var(v))
-#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v))
-#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v))
+#define __local_inc(l) ((l)->a.counter++)
+#define __local_dec(l) ((l)->a.counter++)
+#define __local_add(i,l) ((l)->a.counter+=(i))
+#define __local_sub(i,l) ((l)->a.counter-=(i))
+
+/* Need to disable preemption for the cpu local counters otherwise we could
+ still access a variable of a previous CPU in a non atomic way. */
+#define cpu_local_wrap_v(l) \
+ ({ local_t res__; \
+ preempt_disable(); \
+ res__ = (l); \
+ preempt_enable(); \
+ res__; })
+#define cpu_local_wrap(l) \
+ ({ preempt_disable(); \
+ l; \
+ preempt_enable(); }) \
+
+#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
+#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
+#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l)))
+#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l)))
+#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
+#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))

-#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v))
-#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v))
-#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v))
-#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v))
+#define __cpu_local_inc(l) cpu_local_inc(l)
+#define __cpu_local_dec(l) cpu_local_dec(l)
+#define __cpu_local_add(i, l) cpu_local_add((i), (l))
+#define __cpu_local_sub(i, l) cpu_local_sub((i), (l))

-#endif /* _ASM_LOCAL_H */
+#endif /* _ARCH_MIPS_LOCAL_H */
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -253,6 +253,58 @@ static inline unsigned long __cmpxchg_u32(volatile int * m, unsigned long old,
return retval;
}

+static inline unsigned long __cmpxchg_u32_local(volatile int * m,
+ unsigned long old, unsigned long new)
+{
+ __u32 retval;
+
+ if (cpu_has_llsc && R10000_LLSC_WAR) {
+ __asm__ __volatile__(
+ " .set push \n"
+ " .set noat \n"
+ " .set mips3 \n"
+ "1: ll %0, %2 # __cmpxchg_u32 \n"
+ " bne %0, %z3, 2f \n"
+ " .set mips0 \n"
+ " move $1, %z4 \n"
+ " .set mips3 \n"
+ " sc $1, %1 \n"
+ " beqzl $1, 1b \n"
+ "2: \n"
+ " .set pop \n"
+ : "=&r" (retval), "=R" (*m)
+ : "R" (*m), "Jr" (old), "Jr" (new)
+ : "memory");
+ } else if (cpu_has_llsc) {
+ __asm__ __volatile__(
+ " .set push \n"
+ " .set noat \n"
+ " .set mips3 \n"
+ "1: ll %0, %2 # __cmpxchg_u32 \n"
+ " bne %0, %z3, 2f \n"
+ " .set mips0 \n"
+ " move $1, %z4 \n"
+ " .set mips3 \n"
+ " sc $1, %1 \n"
+ " beqz $1, 1b \n"
+ "2: \n"
+ " .set pop \n"
+ : "=&r" (retval), "=R" (*m)
+ : "R" (*m), "Jr" (old), "Jr" (new)
+ : "memory");
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ retval = *m;
+ if (retval == old)
+ *m = new;
+ local_irq_restore(flags); /* implies memory barrier */
+ }
+
+ return retval;
+}
+
#ifdef CONFIG_64BIT
static inline unsigned long __cmpxchg_u64(volatile int * m, unsigned long old,
unsigned long new)
@@ -303,10 +355,62 @@ static inline unsigned long __cmpxchg_u64(volatile int * m, unsigned long old,

return retval;
}
+
+static inline unsigned long __cmpxchg_u64_local(volatile int * m,
+ unsigned long old, unsigned long new)
+{
+ __u64 retval;
+
+ if (cpu_has_llsc && R10000_LLSC_WAR) {
+ __asm__ __volatile__(
+ " .set push \n"
+ " .set noat \n"
+ " .set mips3 \n"
+ "1: lld %0, %2 # __cmpxchg_u64 \n"
+ " bne %0, %z3, 2f \n"
+ " move $1, %z4 \n"
+ " scd $1, %1 \n"
+ " beqzl $1, 1b \n"
+ "2: \n"
+ " .set pop \n"
+ : "=&r" (retval), "=R" (*m)
+ : "R" (*m), "Jr" (old), "Jr" (new)
+ : "memory");
+ } else if (cpu_has_llsc) {
+ __asm__ __volatile__(
+ " .set push \n"
+ " .set noat \n"
+ " .set mips3 \n"
+ "1: lld %0, %2 # __cmpxchg_u64 \n"
+ " bne %0, %z3, 2f \n"
+ " move $1, %z4 \n"
+ " scd $1, %1 \n"
+ " beqz $1, 1b \n"
+ "2: \n"
+ " .set pop \n"
+ : "=&r" (retval), "=R" (*m)
+ : "R" (*m), "Jr" (old), "Jr" (new)
+ : "memory");
+ } else {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ retval = *m;
+ if (retval == old)
+ *m = new;
+ local_irq_restore(flags); /* implies memory barrier */
+ }
+
+ return retval;
+}
+
#else
extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
volatile int * m, unsigned long old, unsigned long new);
#define __cmpxchg_u64 __cmpxchg_u64_unsupported_on_32bit_kernels
+extern unsigned long __cmpxchg_u64_local_unsupported_on_32bit_kernels(
+ volatile int * m, unsigned long old, unsigned long new);
+#define __cmpxchg_u64_local __cmpxchg_u64_local_unsupported_on_32bit_kernels
#endif

/* This function doesn't exist, so you'll get a linker error
@@ -326,7 +430,26 @@ static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
return old;
}

-#define cmpxchg(ptr,old,new) ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(old), (unsigned long)(new),sizeof(*(ptr))))
+static inline unsigned long __cmpxchg_local(volatile void * ptr,
+ unsigned long old, unsigned long new, int size)
+{
+ switch (size) {
+ case 4:
+ return __cmpxchg_u32_local(ptr, old, new);
+ case 8:
+ return __cmpxchg_u64_local(ptr, old, new);
+ }
+ __cmpxchg_called_with_bad_pointer();
+ return old;
+}
+
+#define cmpxchg(ptr,old,new) \
+ ((__typeof__(*(ptr)))__cmpxchg((ptr), \
+ (unsigned long)(old), (unsigned long)(new),sizeof(*(ptr))))
+
+#define cmpxchg_local(ptr,old,new) \
+ ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \
+ (unsigned long)(old), (unsigned long)(new),sizeof(*(ptr))))

extern void set_handler (unsigned long offset, void *addr, unsigned long len);
extern void set_uncached_handler (unsigned long offset, void *addr, unsigned long len);

2007-02-11 19:24:06

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH 04/10] local_t : ia64 extension

local_t : ia64 extension

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-ia64/local.h
+++ b/include/asm-ia64/local.h
@@ -1,50 +1 @@
-#ifndef _ASM_IA64_LOCAL_H
-#define _ASM_IA64_LOCAL_H
-
-/*
- * Copyright (C) 2003 Hewlett-Packard Co
- * David Mosberger-Tang <[email protected]>
- */
-
-#include <linux/percpu.h>
-
-typedef struct {
- atomic64_t val;
-} local_t;
-
-#define LOCAL_INIT(i) ((local_t) { { (i) } })
-#define local_read(l) atomic64_read(&(l)->val)
-#define local_set(l, i) atomic64_set(&(l)->val, i)
-#define local_inc(l) atomic64_inc(&(l)->val)
-#define local_dec(l) atomic64_dec(&(l)->val)
-#define local_add(i, l) atomic64_add((i), &(l)->val)
-#define local_sub(i, l) atomic64_sub((i), &(l)->val)
-
-/* Non-atomic variants, i.e., preemption disabled and won't be touched in interrupt, etc. */
-
-#define __local_inc(l) (++(l)->val.counter)
-#define __local_dec(l) (--(l)->val.counter)
-#define __local_add(i,l) ((l)->val.counter += (i))
-#define __local_sub(i,l) ((l)->val.counter -= (i))
-
-/*
- * Use these for per-cpu local_t variables. Note they take a variable (eg. mystruct.foo),
- * not an address.
- */
-#define cpu_local_read(v) local_read(&__ia64_per_cpu_var(v))
-#define cpu_local_set(v, i) local_set(&__ia64_per_cpu_var(v), (i))
-#define cpu_local_inc(v) local_inc(&__ia64_per_cpu_var(v))
-#define cpu_local_dec(v) local_dec(&__ia64_per_cpu_var(v))
-#define cpu_local_add(i, v) local_add((i), &__ia64_per_cpu_var(v))
-#define cpu_local_sub(i, v) local_sub((i), &__ia64_per_cpu_var(v))
-
-/*
- * Non-atomic increments, i.e., preemption disabled and won't be touched in interrupt,
- * etc.
- */
-#define __cpu_local_inc(v) __local_inc(&__ia64_per_cpu_var(v))
-#define __cpu_local_dec(v) __local_dec(&__ia64_per_cpu_var(v))
-#define __cpu_local_add(i, v) __local_add((i), &__ia64_per_cpu_var(v))
-#define __cpu_local_sub(i, v) __local_sub((i), &__ia64_per_cpu_var(v))
-
-#endif /* _ASM_IA64_LOCAL_H */
+#include <asm-generic/local.h>

2007-02-11 19:24:07

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH 07/10] local_t : powerpc extension

local_t : powerpc extension

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-powerpc/local.h
+++ b/include/asm-powerpc/local.h
@@ -1 +1,342 @@
-#include <asm-generic/local.h>
+#ifndef _ARCH_POWERPC_LOCAL_H
+#define _ARCH_POWERPC_LOCAL_H
+
+#include <linux/percpu.h>
+#include <asm/atomic.h>
+
+typedef struct
+{
+ atomic_long_t a;
+} local_t;
+
+#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
+
+#define local_read(l) atomic_long_read(&(l)->a)
+#define local_set(l,i) atomic_long_set(&(l)->a, (i))
+
+#define local_add(i,l) atomic_long_add((i),(&(l)->a))
+#define local_sub(i,l) atomic_long_sub((i),(&(l)->a))
+#define local_inc(l) atomic_long_inc(&(l)->a)
+#define local_dec(l) atomic_long_dec(&(l)->a)
+
+#ifndef __powerpc64__
+
+static __inline__ int local_add_return(int a, local_t *l)
+{
+ int t;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%2 # local_add_return\n\
+ add %0,%1,%0\n"
+ PPC405_ERR77(0,%2)
+" stwcx. %0,0,%2 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (a), "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+
+static __inline__ int local_sub_return(int a, local_t *l)
+{
+ int t;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%2 # local_sub_return\n\
+ subf %0,%1,%0\n"
+ PPC405_ERR77(0,%2)
+" stwcx. %0,0,%2 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (a), "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+static __inline__ int local_inc_return(local_t *l)
+{
+ int t;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%1 # local_inc_return\n\
+ addic %0,%0,1\n"
+ PPC405_ERR77(0,%1)
+" stwcx. %0,0,%1 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+/*
+ * local_inc_and_test - increment and test
+ * @l: pointer of type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define local_inc_and_test(l) (local_inc_return(l) == 0)
+
+static __inline__ int local_dec_return(local_t *l)
+{
+ int t;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%1 # local_dec_return\n\
+ addic %0,%0,-1\n"
+ PPC405_ERR77(0,%1)
+" stwcx. %0,0,%1\n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#define local_cmpxchg(l, o, n) \
+ (cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+static __inline__ int local_add_unless(local_t *l, int a, int u)
+{
+ int t;
+
+ __asm__ __volatile__ (
+"1: lwarx %0,0,%1 # local_add_unless\n\
+ cmpw 0,%0,%3 \n\
+ beq- 2f \n\
+ add %0,%2,%0 \n"
+ PPC405_ERR77(0,%2)
+" stwcx. %0,0,%1 \n\
+ bne- 1b \n"
+" subf %0,%2,%0 \n\
+2:"
+ : "=&r" (t)
+ : "r" (&(l->a.counter)), "r" (a), "r" (u)
+ : "cc", "memory");
+
+ return t != u;
+}
+
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
+#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
+#define local_dec_and_test(l) (local_dec_return((l)) == 0)
+
+/*
+ * Atomically test *l and decrement if it is greater than 0.
+ * The function returns the old value of *l minus 1.
+ */
+static __inline__ int local_dec_if_positive(local_t *l)
+{
+ int t;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%1 # local_dec_if_positive\n\
+ cmpwi %0,1\n\
+ addi %0,%0,-1\n\
+ blt- 2f\n"
+ PPC405_ERR77(0,%1)
+" stwcx. %0,0,%1\n\
+ bne- 1b"
+ "\n\
+2:" : "=&b" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#else /* __powerpc64__ */
+
+static __inline__ long local_add_return(long a, local_t *l)
+{
+ long t;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%2 # local_add_return\n\
+ add %0,%1,%0\n\
+ stdcx. %0,0,%2 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (a), "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+
+static __inline__ long local_sub_return(long a, local_t *l)
+{
+ long t;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%2 # local_sub_return\n\
+ subf %0,%1,%0\n\
+ stdcx. %0,0,%2 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (a), "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+static __inline__ long local_inc_return(local_t *l)
+{
+ long t;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%1 # local_inc_return\n\
+ addic %0,%0,1\n\
+ stdcx. %0,0,%1 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+/*
+ * local_inc_and_test - increment and test
+ * @l: pointer of type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define local_inc_and_test(l) (local_inc_return(l) == 0)
+
+static __inline__ long local_dec_return(local_t *l)
+{
+ long t;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%1 # local_dec_return\n\
+ addic %0,%0,-1\n\
+ stdcx. %0,0,%1\n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
+#define local_dec_and_test(l) (local_dec_return((l)) == 0)
+
+/*
+ * Atomically test *l and decrement if it is greater than 0.
+ * The function returns the old value of *l minus 1.
+ */
+static __inline__ long local_dec_if_positive(local_t *l)
+{
+ long t;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%1 # local_dec_if_positive\n\
+ addic. %0,%0,-1\n\
+ blt- 2f\n\
+ stdcx. %0,0,%1\n\
+ bne- 1b"
+ "\n\
+2:" : "=&r" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#define local_cmpxchg(l, o, n) \
+ ((long)cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_xchg(l, (n)) (xchg_local(&((l)->a.counter), (n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to l...
+ * @u: ...unless l is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+static __inline__ int local_add_unless(local_t *l, long a, long u)
+{
+ long t;
+
+ __asm__ __volatile__ (
+"1: ldarx %0,0,%1 # local_add_unless\n\
+ cmpd 0,%0,%3 \n\
+ beq- 2f \n\
+ add %0,%2,%0 \n"
+" stdcx. %0,0,%1 \n\
+ bne- 1b \n"
+" subf %0,%2,%0 \n\
+2:"
+ : "=&r" (t)
+ : "r" (&(l->a.counter)), "r" (a), "r" (u)
+ : "cc", "memory");
+
+ return t != u;
+}
+
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
+#endif /* !__powerpc64__ */
+
+/* Use these for per-cpu local_t variables: on some archs they are
+ * much more efficient than these naive implementations. Note they take
+ * a variable, not an address.
+ */
+
+#define __local_inc(l) ((l)->a.counter++)
+#define __local_dec(l) ((l)->a.counter++)
+#define __local_add(i,l) ((l)->a.counter+=(i))
+#define __local_sub(i,l) ((l)->a.counter-=(i))
+
+/* Need to disable preemption for the cpu local counters otherwise we could
+ still access a variable of a previous CPU in a non atomic way. */
+#define cpu_local_wrap_v(l) \
+ ({ local_t res__; \
+ preempt_disable(); \
+ res__ = (l); \
+ preempt_enable(); \
+ res__; })
+#define cpu_local_wrap(l) \
+ ({ preempt_disable(); \
+ l; \
+ preempt_enable(); }) \
+
+#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
+#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
+#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l)))
+#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l)))
+#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
+#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
+
+#define __cpu_local_inc(l) cpu_local_inc(l)
+#define __cpu_local_dec(l) cpu_local_dec(l)
+#define __cpu_local_add(i, l) cpu_local_add((i), (l))
+#define __cpu_local_sub(i, l) cpu_local_sub((i), (l))
+
+#endif /* _ARCH_POWERPC_LOCAL_H */
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -7,7 +7,6 @@
#include <linux/kernel.h>

#include <asm/hw_irq.h>
-#include <asm/atomic.h>

/*
* Memory barrier.
@@ -226,6 +225,29 @@ __xchg_u32(volatile void *p, unsigned long val)
return prev;
}

+/*
+ * Atomic exchange
+ *
+ * Changes the memory location '*ptr' to be val and returns
+ * the previous value stored there.
+ */
+static __inline__ unsigned long
+__xchg_u32_local(volatile void *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%2 \n"
+ PPC405_ERR77(0,%2)
+" stwcx. %3,0,%2 \n\
+ bne- 1b"
+ : "=&r" (prev), "+m" (*(volatile unsigned int *)p)
+ : "r" (p), "r" (val)
+ : "cc", "memory");
+
+ return prev;
+}
+
#ifdef CONFIG_PPC64
static __inline__ unsigned long
__xchg_u64(volatile void *p, unsigned long val)
@@ -245,6 +267,23 @@ __xchg_u64(volatile void *p, unsigned long val)

return prev;
}
+
+static __inline__ unsigned long
+__xchg_u64_local(volatile void *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%2 \n"
+ PPC405_ERR77(0,%2)
+" stdcx. %3,0,%2 \n\
+ bne- 1b"
+ : "=&r" (prev), "+m" (*(volatile unsigned long *)p)
+ : "r" (p), "r" (val)
+ : "cc", "memory");
+
+ return prev;
+}
#endif

/*
@@ -268,12 +307,33 @@ __xchg(volatile void *ptr, unsigned long x, unsigned int size)
return x;
}

+static __inline__ unsigned long
+__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
+{
+ switch (size) {
+ case 4:
+ return __xchg_u32_local(ptr, x);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __xchg_u64_local(ptr, x);
+#endif
+ }
+ __xchg_called_with_bad_pointer();
+ return x;
+}
#define xchg(ptr,x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
})

+#define xchg_local(ptr,x) \
+ ({ \
+ __typeof__(*(ptr)) _x_ = (x); \
+ (__typeof__(*(ptr))) __xchg_local((ptr), \
+ (unsigned long)_x_, sizeof(*(ptr))); \
+ })
+
#define tas(ptr) (xchg((ptr),1))

/*
@@ -305,6 +365,28 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
return prev;
}

+static __inline__ unsigned long
+__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
+ unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+"1: lwarx %0,0,%2 # __cmpxchg_u32\n\
+ cmpw 0,%0,%3\n\
+ bne- 2f\n"
+ PPC405_ERR77(0,%2)
+" stwcx. %4,0,%2\n\
+ bne- 1b"
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
#ifdef CONFIG_PPC64
static __inline__ unsigned long
__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
@@ -327,6 +409,27 @@ __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)

return prev;
}
+
+static __inline__ unsigned long
+__cmpxchg_u64_local(volatile unsigned long *p, unsigned long old,
+ unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: ldarx %0,0,%2 # __cmpxchg_u64\n\
+ cmpd 0,%0,%3\n\
+ bne- 2f\n\
+ stdcx. %4,0,%2\n\
+ bne- 1b"
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
#endif

/* This function doesn't exist, so you'll get a linker error
@@ -349,6 +452,22 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
return old;
}

+static __inline__ unsigned long
+__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
+ unsigned int size)
+{
+ switch (size) {
+ case 4:
+ return __cmpxchg_u32_local(ptr, old, new);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __cmpxchg_u64_local(ptr, old, new);
+#endif
+ }
+ __cmpxchg_called_with_bad_pointer();
+ return old;
+}
+
#define cmpxchg(ptr,o,n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
@@ -357,6 +476,15 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
(unsigned long)_n_, sizeof(*(ptr))); \
})

+
+#define cmpxchg_local(ptr,o,n) \
+ ({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \
+ (unsigned long)_n_, sizeof(*(ptr))); \
+ })
+
#ifdef CONFIG_PPC64
/*
* We handle most unaligned accesses in hardware. On the other hand

2007-02-11 19:26:07

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH 10/10] local_t : x86_64 extension

local_t : x86_64 extension

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-x86_64/local.h
+++ b/include/asm-x86_64/local.h
@@ -2,49 +2,183 @@
#define _ARCH_X8664_LOCAL_H

#include <linux/percpu.h>
+#include <asm/atomic.h>

typedef struct
{
- volatile long counter;
+ atomic_long_t a;
} local_t;

-#define LOCAL_INIT(i) { (i) }
+#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }

-#define local_read(v) ((v)->counter)
-#define local_set(v,i) (((v)->counter) = (i))
+#define local_read(l) atomic_long_read(&(l)->a)
+#define local_set(l,i) atomic_long_set(&(l)->a, (i))

-static inline void local_inc(local_t *v)
+static inline void local_inc(local_t *l)
{
__asm__ __volatile__(
"incq %0"
- :"=m" (v->counter)
- :"m" (v->counter));
+ :"=m" (l->a.counter)
+ :"m" (l->a.counter));
}

-static inline void local_dec(local_t *v)
+static inline void local_dec(local_t *l)
{
__asm__ __volatile__(
"decq %0"
- :"=m" (v->counter)
- :"m" (v->counter));
+ :"=m" (l->a.counter)
+ :"m" (l->a.counter));
}

-static inline void local_add(long i, local_t *v)
+static inline void local_add(long i, local_t *l)
{
__asm__ __volatile__(
"addq %1,%0"
- :"=m" (v->counter)
- :"ir" (i), "m" (v->counter));
+ :"=m" (l->a.counter)
+ :"ir" (i), "m" (l->a.counter));
}

-static inline void local_sub(long i, local_t *v)
+static inline void local_sub(long i, local_t *l)
{
__asm__ __volatile__(
"subq %1,%0"
- :"=m" (v->counter)
- :"ir" (i), "m" (v->counter));
+ :"=m" (l->a.counter)
+ :"ir" (i), "m" (l->a.counter));
}

+/**
+ * local_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @l: pointer to type local_t
+ *
+ * Atomically subtracts @i from @l and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static __inline__ int local_sub_and_test(long i, local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ "subq %2,%0; sete %1"
+ :"=m" (l->a.counter), "=qm" (c)
+ :"ir" (i), "m" (l->a.counter) : "memory");
+ return c;
+}
+
+/**
+ * local_dec_and_test - decrement and test
+ * @l: pointer to type local_t
+ *
+ * Atomically decrements @l by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static __inline__ int local_dec_and_test(local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ "decq %0; sete %1"
+ :"=m" (l->a.counter), "=qm" (c)
+ :"m" (l->a.counter) : "memory");
+ return c != 0;
+}
+
+/**
+ * local_inc_and_test - increment and test
+ * @l: pointer to type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static __inline__ int local_inc_and_test(local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ "incq %0; sete %1"
+ :"=m" (l->a.counter), "=qm" (c)
+ :"m" (l->a.counter) : "memory");
+ return c != 0;
+}
+
+/**
+ * local_add_negative - add and test if negative
+ * @i: integer value to add
+ * @l: pointer to type local_t
+ *
+ * Atomically adds @i to @l and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static __inline__ int local_add_negative(long i, local_t *l)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ "addq %2,%0; sets %1"
+ :"=m" (l->a.counter), "=qm" (c)
+ :"ir" (i), "m" (l->a.counter) : "memory");
+ return c;
+}
+
+/**
+ * local_add_return - add and return
+ * @i: integer value to add
+ * @l: pointer to type local_t
+ *
+ * Atomically adds @i to @l and returns @i + @l
+ */
+static __inline__ long local_add_return(long i, local_t *l)
+{
+ long __i = i;
+ __asm__ __volatile__(
+ "xaddq %0, %1;"
+ :"+r" (i), "+m" (l->a.counter)
+ : : "memory");
+ return i + __i;
+}
+
+static __inline__ long local_sub_return(long i, local_t *l)
+{
+ return local_add_return(-i,l);
+}
+
+#define local_inc_return(l) (local_add_return(1,l))
+#define local_dec_return(l) (local_sub_return(1,l))
+
+#define local_cmpxchg(l, o, n) \
+ (cmpxchg_local(&((l)->a.counter), (o), (n)))
+/* Always has a lock prefix */
+#define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
+
+/**
+ * atomic_up_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to l...
+ * @u: ...unless l is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+#define local_add_unless(l, a, u) \
+({ \
+ long c, old; \
+ c = local_read(l); \
+ for (;;) { \
+ if (unlikely(c == (u))) \
+ break; \
+ old = local_cmpxchg((l), c, c + (a)); \
+ if (likely(old == c)) \
+ break; \
+ c = old; \
+ } \
+ c != (u); \
+})
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
/* On x86-64 these are better than the atomic variants on SMP kernels
because they dont use a lock prefix. */
#define __local_inc(l) local_inc(l)
@@ -62,27 +196,27 @@ static inline void local_sub(long i, local_t *v)

/* Need to disable preemption for the cpu local counters otherwise we could
still access a variable of a previous CPU in a non atomic way. */
-#define cpu_local_wrap_v(v) \
+#define cpu_local_wrap_v(l) \
({ local_t res__; \
preempt_disable(); \
- res__ = (v); \
+ res__ = (l); \
preempt_enable(); \
res__; })
-#define cpu_local_wrap(v) \
+#define cpu_local_wrap(l) \
({ preempt_disable(); \
- v; \
+ l; \
preempt_enable(); }) \

-#define cpu_local_read(v) cpu_local_wrap_v(local_read(&__get_cpu_var(v)))
-#define cpu_local_set(v, i) cpu_local_wrap(local_set(&__get_cpu_var(v), (i)))
-#define cpu_local_inc(v) cpu_local_wrap(local_inc(&__get_cpu_var(v)))
-#define cpu_local_dec(v) cpu_local_wrap(local_dec(&__get_cpu_var(v)))
-#define cpu_local_add(i, v) cpu_local_wrap(local_add((i), &__get_cpu_var(v)))
-#define cpu_local_sub(i, v) cpu_local_wrap(local_sub((i), &__get_cpu_var(v)))
+#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
+#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
+#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l)))
+#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l)))
+#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
+#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))

-#define __cpu_local_inc(v) cpu_local_inc(v)
-#define __cpu_local_dec(v) cpu_local_dec(v)
-#define __cpu_local_add(i, v) cpu_local_add((i), (v))
-#define __cpu_local_sub(i, v) cpu_local_sub((i), (v))
+#define __cpu_local_inc(l) cpu_local_inc(l)
+#define __cpu_local_dec(l) cpu_local_dec(l)
+#define __cpu_local_add(i, l) cpu_local_add((i), (l))
+#define __cpu_local_sub(i, l) cpu_local_sub((i), (l))

-#endif /* _ARCH_I386_LOCAL_H */
+#endif /* _ARCH_X8664_LOCAL_H */
--- a/include/asm-x86_64/system.h
+++ b/include/asm-x86_64/system.h
@@ -209,9 +209,45 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
return old;
}

+static inline unsigned long __cmpxchg_local(volatile void *ptr,
+ unsigned long old, unsigned long new, int size)
+{
+ unsigned long prev;
+ switch (size) {
+ case 1:
+ __asm__ __volatile__("cmpxchgb %b1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+ __asm__ __volatile__("cmpxchgw %w1,%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+ __asm__ __volatile__("cmpxchgl %k1,%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 8:
+ __asm__ __volatile__("cmpxchgq %1,%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ }
+ return old;
+}
+
#define cmpxchg(ptr,o,n)\
((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
(unsigned long)(n),sizeof(*(ptr))))
+#define cmpxchg_local(ptr,o,n)\
+ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+ (unsigned long)(n),sizeof(*(ptr))))

#ifdef CONFIG_SMP
#define smp_mb() mb()

2007-02-11 19:28:32

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH 02/10] local_t : alpha extension

local_t : alpha extension

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-alpha/local.h
+++ b/include/asm-alpha/local.h
@@ -4,37 +4,115 @@
#include <linux/percpu.h>
#include <asm/atomic.h>

-typedef atomic64_t local_t;
+typedef struct
+{
+ atomic_long_t a;
+} local_t;

-#define LOCAL_INIT(i) ATOMIC64_INIT(i)
-#define local_read(v) atomic64_read(v)
-#define local_set(v,i) atomic64_set(v,i)
+#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
+#define local_read(l) atomic_long_read(&(l)->a)
+#define local_set(l,i) atomic_long_set(&(l)->a, (i))
+#define local_inc(l) atomic_long_inc(&(l)->a)
+#define local_dec(l) atomic_long_dec(&(l)->a)
+#define local_add(i,l) atomic_long_add((i),(&(l)->a))
+#define local_sub(i,l) atomic_long_sub((i),(&(l)->a))

-#define local_inc(v) atomic64_inc(v)
-#define local_dec(v) atomic64_dec(v)
-#define local_add(i, v) atomic64_add(i, v)
-#define local_sub(i, v) atomic64_sub(i, v)
+static __inline__ long local_add_return(long i, local_t * l)
+{
+ long temp, result;
+ __asm__ __volatile__(
+ "1: ldq_l %0,%1\n"
+ " addq %0,%3,%2\n"
+ " addq %0,%3,%0\n"
+ " stq_c %0,%1\n"
+ " beq %0,2f\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ :"=&r" (temp), "=m" (l->a.counter), "=&r" (result)
+ :"Ir" (i), "m" (l->a.counter) : "memory");
+ return result;
+}

-#define __local_inc(v) ((v)->counter++)
-#define __local_dec(v) ((v)->counter++)
-#define __local_add(i,v) ((v)->counter+=(i))
-#define __local_sub(i,v) ((v)->counter-=(i))
+static __inline__ long local_sub_return(long i, local_t * l)
+{
+ long temp, result;
+ __asm__ __volatile__(
+ "1: ldq_l %0,%1\n"
+ " subq %0,%3,%2\n"
+ " subq %0,%3,%0\n"
+ " stq_c %0,%1\n"
+ " beq %0,2f\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ :"=&r" (temp), "=m" (l->a.counter), "=&r" (result)
+ :"Ir" (i), "m" (l->a.counter) : "memory");
+ return result;
+}
+
+#define local_cmpxchg(l, o, n) \
+ (cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to l...
+ * @u: ...unless l is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+#define local_add_unless(l, a, u) \
+({ \
+ long c, old; \
+ c = local_read(l); \
+ for (;;) { \
+ if (unlikely(c == (u))) \
+ break; \
+ old = local_cmpxchg((l), c, c + (a)); \
+ if (likely(old == c)) \
+ break; \
+ c = old; \
+ } \
+ c != (u); \
+})
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+
+#define local_dec_return(l) local_sub_return(1,(l))
+
+#define local_inc_return(l) local_add_return(1,(l))
+
+#define local_sub_and_test(i,l) (local_sub_return((i), (l)) == 0)
+
+#define local_inc_and_test(l) (local_add_return(1, (l)) == 0)
+
+#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0)
+
+/* Verify if faster than atomic ops */
+#define __local_inc(l) ((l)->a.counter++)
+#define __local_dec(l) ((l)->a.counter++)
+#define __local_add(i,l) ((l)->a.counter+=(i))
+#define __local_sub(i,l) ((l)->a.counter-=(i))

/* Use these for per-cpu local_t variables: on some archs they are
* much more efficient than these naive implementations. Note they take
* a variable, not an address.
*/
-#define cpu_local_read(v) local_read(&__get_cpu_var(v))
-#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i))
-
-#define cpu_local_inc(v) local_inc(&__get_cpu_var(v))
-#define cpu_local_dec(v) local_dec(&__get_cpu_var(v))
-#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v))
-#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v))
-
-#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v))
-#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v))
-#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v))
-#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v))
+#define cpu_local_read(l) local_read(&__get_cpu_var(l))
+#define cpu_local_set(l, i) local_set(&__get_cpu_var(l), (i))
+
+#define cpu_local_inc(l) local_inc(&__get_cpu_var(l))
+#define cpu_local_dec(l) local_dec(&__get_cpu_var(l))
+#define cpu_local_add(i, l) local_add((i), &__get_cpu_var(l))
+#define cpu_local_sub(i, l) local_sub((i), &__get_cpu_var(l))
+
+#define __cpu_local_inc(l) __local_inc(&__get_cpu_var(l))
+#define __cpu_local_dec(l) __local_dec(&__get_cpu_var(l))
+#define __cpu_local_add(i, l) __local_add((i), &__get_cpu_var(l))
+#define __cpu_local_sub(i, l) __local_sub((i), &__get_cpu_var(l))

#endif /* _ALPHA_LOCAL_H */
--- a/include/asm-alpha/system.h
+++ b/include/asm-alpha/system.h
@@ -443,6 +443,111 @@ extern void __xchg_called_with_bad_pointer(void);
(__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
})

+static inline unsigned long
+__xchg_u8_local(volatile char *m, unsigned long val)
+{
+ unsigned long ret, tmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %4,7,%3\n"
+ " insbl %1,%4,%1\n"
+ "1: ldq_l %2,0(%3)\n"
+ " extbl %2,%4,%0\n"
+ " mskbl %2,%4,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%3)\n"
+ " beq %2,2f\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
+ : "r" ((long)m), "1" (val) : "memory");
+
+ return ret;
+}
+
+static inline unsigned long
+__xchg_u16_local(volatile short *m, unsigned long val)
+{
+ unsigned long ret, tmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %4,7,%3\n"
+ " inswl %1,%4,%1\n"
+ "1: ldq_l %2,0(%3)\n"
+ " extwl %2,%4,%0\n"
+ " mskwl %2,%4,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%3)\n"
+ " beq %2,2f\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
+ : "r" ((long)m), "1" (val) : "memory");
+
+ return ret;
+}
+
+static inline unsigned long
+__xchg_u32_local(volatile int *m, unsigned long val)
+{
+ unsigned long dummy;
+
+ __asm__ __volatile__(
+ "1: ldl_l %0,%4\n"
+ " bis $31,%3,%1\n"
+ " stl_c %1,%2\n"
+ " beq %1,2f\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (val), "=&r" (dummy), "=m" (*m)
+ : "rI" (val), "m" (*m) : "memory");
+
+ return val;
+}
+
+static inline unsigned long
+__xchg_u64_local(volatile long *m, unsigned long val)
+{
+ unsigned long dummy;
+
+ __asm__ __volatile__(
+ "1: ldq_l %0,%4\n"
+ " bis $31,%3,%1\n"
+ " stq_c %1,%2\n"
+ " beq %1,2f\n"
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (val), "=&r" (dummy), "=m" (*m)
+ : "rI" (val), "m" (*m) : "memory");
+
+ return val;
+}
+
+#define __xchg_local(ptr, x, size) \
+({ \
+ unsigned long __xchg__res; \
+ volatile void *__xchg__ptr = (ptr); \
+ switch (size) { \
+ case 1: __xchg__res = __xchg_u8_local(__xchg__ptr, x); break; \
+ case 2: __xchg__res = __xchg_u16_local(__xchg__ptr, x); break; \
+ case 4: __xchg__res = __xchg_u32_local(__xchg__ptr, x); break; \
+ case 8: __xchg__res = __xchg_u64_local(__xchg__ptr, x); break; \
+ default: __xchg_called_with_bad_pointer(); __xchg__res = x; \
+ } \
+ __xchg__res; \
+})
+
+#define xchg_local(ptr,x) \
+ ({ \
+ __typeof__(*(ptr)) _x_ = (x); \
+ (__typeof__(*(ptr))) __xchg_local((ptr), (unsigned long)_x_, \
+ sizeof(*(ptr))); \
+ })
+
#define tas(ptr) (xchg((ptr),1))


@@ -596,6 +701,128 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
(unsigned long)_n_, sizeof(*(ptr))); \
})

+static inline unsigned long
+__cmpxchg_u8_local(volatile char *m, long old, long new)
+{
+ unsigned long prev, tmp, cmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %5,7,%4\n"
+ " insbl %1,%5,%1\n"
+ "1: ldq_l %2,0(%4)\n"
+ " extbl %2,%5,%0\n"
+ " cmpeq %0,%6,%3\n"
+ " beq %3,2f\n"
+ " mskbl %2,%5,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%4)\n"
+ " beq %2,3f\n"
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
+ : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+
+ return prev;
+}
+
+static inline unsigned long
+__cmpxchg_u16_local(volatile short *m, long old, long new)
+{
+ unsigned long prev, tmp, cmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %5,7,%4\n"
+ " inswl %1,%5,%1\n"
+ "1: ldq_l %2,0(%4)\n"
+ " extwl %2,%5,%0\n"
+ " cmpeq %0,%6,%3\n"
+ " beq %3,2f\n"
+ " mskwl %2,%5,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%4)\n"
+ " beq %2,3f\n"
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
+ : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+
+ return prev;
+}
+
+static inline unsigned long
+__cmpxchg_u32_local(volatile int *m, int old, int new)
+{
+ unsigned long prev, cmp;
+
+ __asm__ __volatile__(
+ "1: ldl_l %0,%5\n"
+ " cmpeq %0,%3,%1\n"
+ " beq %1,2f\n"
+ " mov %4,%1\n"
+ " stl_c %1,%2\n"
+ " beq %1,3f\n"
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r"(prev), "=&r"(cmp), "=m"(*m)
+ : "r"((long) old), "r"(new), "m"(*m) : "memory");
+
+ return prev;
+}
+
+static inline unsigned long
+__cmpxchg_u64_local(volatile long *m, unsigned long old, unsigned long new)
+{
+ unsigned long prev, cmp;
+
+ __asm__ __volatile__(
+ "1: ldq_l %0,%5\n"
+ " cmpeq %0,%3,%1\n"
+ " beq %1,2f\n"
+ " mov %4,%1\n"
+ " stq_c %1,%2\n"
+ " beq %1,3f\n"
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r"(prev), "=&r"(cmp), "=m"(*m)
+ : "r"((long) old), "r"(new), "m"(*m) : "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
+ int size)
+{
+ switch (size) {
+ case 1:
+ return __cmpxchg_u8_local(ptr, old, new);
+ case 2:
+ return __cmpxchg_u16_local(ptr, old, new);
+ case 4:
+ return __cmpxchg_u32_local(ptr, old, new);
+ case 8:
+ return __cmpxchg_u64_local(ptr, old, new);
+ }
+ __cmpxchg_called_with_bad_pointer();
+ return old;
+}
+
+#define cmpxchg_local(ptr,o,n) \
+ ({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \
+ (unsigned long)_n_, sizeof(*(ptr))); \
+ })
+
#endif /* __ASSEMBLY__ */

#define arch_align_stack(x) (x)

2007-02-15 06:57:06

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 07/10] local_t : powerpc extension

On Sun, 11 Feb 2007 14:18:12 -0500 Mathieu Desnoyers <[email protected]> wrote:

> local_t : powerpc extension

This diff contains changes which are also present in "[PATCH 07/10]
atomic.h : Add atomic64 cmpxchg, xchg and add_unless to powerpc", resulting
in rather a mess.

I dropped the duplicated hunks from the later patch rather than this one,
but I haven't tried to compile the result yet.

2007-02-15 07:20:28

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 07/10] local_t : powerpc extension

* Andrew Morton ([email protected]) wrote:
> On Sun, 11 Feb 2007 14:18:12 -0500 Mathieu Desnoyers <[email protected]> wrote:
>
> > local_t : powerpc extension
>
> This diff contains changes which are also present in "[PATCH 07/10]
> atomic.h : Add atomic64 cmpxchg, xchg and add_unless to powerpc", resulting
> in rather a mess.
>
> I dropped the duplicated hunks from the later patch rather than this one,
> but I haven't tried to compile the result yet.
>

Yes, sorry about that : the atomic.h diff has been duplicated in both
the local and atomic changes.

The change to system.h for local.h is to add the xchg_local and
cmpxchg_local, while the change for atomic.h is to simply remove the
#include <asm/atomic.h> from system.h.

Here is a proper local.h diff, if it happens to become necessary, but
simply removing the duplicated asm-powerpc/system.h hunks will be ok.

Thanks,

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-powerpc/local.h
+++ b/include/asm-powerpc/local.h
@@ -1 +1,342 @@
-#include <asm-generic/local.h>
+#ifndef _ARCH_POWERPC_LOCAL_H
+#define _ARCH_POWERPC_LOCAL_H
+
+#include <linux/percpu.h>
+#include <asm/atomic.h>
+
+typedef struct
+{
+ atomic_long_t a;
+} local_t;
+
+#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
+
+#define local_read(l) atomic_long_read(&(l)->a)
+#define local_set(l,i) atomic_long_set(&(l)->a, (i))
+
+#define local_add(i,l) atomic_long_add((i),(&(l)->a))
+#define local_sub(i,l) atomic_long_sub((i),(&(l)->a))
+#define local_inc(l) atomic_long_inc(&(l)->a)
+#define local_dec(l) atomic_long_dec(&(l)->a)
+
+#ifndef __powerpc64__
+
+static __inline__ int local_add_return(int a, local_t *l)
+{
+ int t;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%2 # local_add_return\n\
+ add %0,%1,%0\n"
+ PPC405_ERR77(0,%2)
+" stwcx. %0,0,%2 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (a), "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+
+static __inline__ int local_sub_return(int a, local_t *l)
+{
+ int t;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%2 # local_sub_return\n\
+ subf %0,%1,%0\n"
+ PPC405_ERR77(0,%2)
+" stwcx. %0,0,%2 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (a), "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+static __inline__ int local_inc_return(local_t *l)
+{
+ int t;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%1 # local_inc_return\n\
+ addic %0,%0,1\n"
+ PPC405_ERR77(0,%1)
+" stwcx. %0,0,%1 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+/*
+ * local_inc_and_test - increment and test
+ * @l: pointer of type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define local_inc_and_test(l) (local_inc_return(l) == 0)
+
+static __inline__ int local_dec_return(local_t *l)
+{
+ int t;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%1 # local_dec_return\n\
+ addic %0,%0,-1\n"
+ PPC405_ERR77(0,%1)
+" stwcx. %0,0,%1\n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#define local_cmpxchg(l, o, n) \
+ (cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+static __inline__ int local_add_unless(local_t *l, int a, int u)
+{
+ int t;
+
+ __asm__ __volatile__ (
+"1: lwarx %0,0,%1 # local_add_unless\n\
+ cmpw 0,%0,%3 \n\
+ beq- 2f \n\
+ add %0,%2,%0 \n"
+ PPC405_ERR77(0,%2)
+" stwcx. %0,0,%1 \n\
+ bne- 1b \n"
+" subf %0,%2,%0 \n\
+2:"
+ : "=&r" (t)
+ : "r" (&(l->a.counter)), "r" (a), "r" (u)
+ : "cc", "memory");
+
+ return t != u;
+}
+
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
+#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
+#define local_dec_and_test(l) (local_dec_return((l)) == 0)
+
+/*
+ * Atomically test *l and decrement if it is greater than 0.
+ * The function returns the old value of *l minus 1.
+ */
+static __inline__ int local_dec_if_positive(local_t *l)
+{
+ int t;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%1 # local_dec_if_positive\n\
+ cmpwi %0,1\n\
+ addi %0,%0,-1\n\
+ blt- 2f\n"
+ PPC405_ERR77(0,%1)
+" stwcx. %0,0,%1\n\
+ bne- 1b"
+ "\n\
+2:" : "=&b" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#else /* __powerpc64__ */
+
+static __inline__ long local_add_return(long a, local_t *l)
+{
+ long t;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%2 # local_add_return\n\
+ add %0,%1,%0\n\
+ stdcx. %0,0,%2 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (a), "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+
+static __inline__ long local_sub_return(long a, local_t *l)
+{
+ long t;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%2 # local_sub_return\n\
+ subf %0,%1,%0\n\
+ stdcx. %0,0,%2 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (a), "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+static __inline__ long local_inc_return(local_t *l)
+{
+ long t;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%1 # local_inc_return\n\
+ addic %0,%0,1\n\
+ stdcx. %0,0,%1 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+/*
+ * local_inc_and_test - increment and test
+ * @l: pointer of type local_t
+ *
+ * Atomically increments @l by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define local_inc_and_test(l) (local_inc_return(l) == 0)
+
+static __inline__ long local_dec_return(local_t *l)
+{
+ long t;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%1 # local_dec_return\n\
+ addic %0,%0,-1\n\
+ stdcx. %0,0,%1\n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
+#define local_dec_and_test(l) (local_dec_return((l)) == 0)
+
+/*
+ * Atomically test *l and decrement if it is greater than 0.
+ * The function returns the old value of *l minus 1.
+ */
+static __inline__ long local_dec_if_positive(local_t *l)
+{
+ long t;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%1 # local_dec_if_positive\n\
+ addic. %0,%0,-1\n\
+ blt- 2f\n\
+ stdcx. %0,0,%1\n\
+ bne- 1b"
+ "\n\
+2:" : "=&r" (t)
+ : "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}
+
+#define local_cmpxchg(l, o, n) \
+ ((long)cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_xchg(l, (n)) (xchg_local(&((l)->a.counter), (n)))
+
+/**
+ * local_add_unless - add unless the number is a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to l...
+ * @u: ...unless l is equal to u.
+ *
+ * Atomically adds @a to @l, so long as it was not @u.
+ * Returns non-zero if @l was not @u, and zero otherwise.
+ */
+static __inline__ int local_add_unless(local_t *l, long a, long u)
+{
+ long t;
+
+ __asm__ __volatile__ (
+"1: ldarx %0,0,%1 # local_add_unless\n\
+ cmpd 0,%0,%3 \n\
+ beq- 2f \n\
+ add %0,%2,%0 \n"
+" stdcx. %0,0,%1 \n\
+ bne- 1b \n"
+" subf %0,%2,%0 \n\
+2:"
+ : "=&r" (t)
+ : "r" (&(l->a.counter)), "r" (a), "r" (u)
+ : "cc", "memory");
+
+ return t != u;
+}
+
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
+#endif /* !__powerpc64__ */
+
+/* Use these for per-cpu local_t variables: on some archs they are
+ * much more efficient than these naive implementations. Note they take
+ * a variable, not an address.
+ */
+
+#define __local_inc(l) ((l)->a.counter++)
+#define __local_dec(l) ((l)->a.counter++)
+#define __local_add(i,l) ((l)->a.counter+=(i))
+#define __local_sub(i,l) ((l)->a.counter-=(i))
+
+/* Need to disable preemption for the cpu local counters otherwise we could
+ still access a variable of a previous CPU in a non atomic way. */
+#define cpu_local_wrap_v(l) \
+ ({ local_t res__; \
+ preempt_disable(); \
+ res__ = (l); \
+ preempt_enable(); \
+ res__; })
+#define cpu_local_wrap(l) \
+ ({ preempt_disable(); \
+ l; \
+ preempt_enable(); }) \
+
+#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
+#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
+#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l)))
+#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l)))
+#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
+#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
+
+#define __cpu_local_inc(l) cpu_local_inc(l)
+#define __cpu_local_dec(l) cpu_local_dec(l)
+#define __cpu_local_add(i, l) cpu_local_add((i), (l))
+#define __cpu_local_sub(i, l) cpu_local_sub((i), (l))
+
+#endif /* _ARCH_POWERPC_LOCAL_H */
--
Mathieu Desnoyers
Computer Engineering Ph.D. Candidate, ?cole Polytechnique de Montr?al
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2007-02-15 14:51:16

by Kumar Gala

[permalink] [raw]
Subject: Re: [PATCH 07/10] local_t : powerpc extension


On Feb 15, 2007, at 1:20 AM, Mathieu Desnoyers wrote:

> * Andrew Morton ([email protected]) wrote:
>> On Sun, 11 Feb 2007 14:18:12 -0500 Mathieu Desnoyers
>> <[email protected]> wrote:
>>
>>> local_t : powerpc extension
>>
>> This diff contains changes which are also present in "[PATCH 07/10]
>> atomic.h : Add atomic64 cmpxchg, xchg and add_unless to powerpc",
>> resulting
>> in rather a mess.
>>
>> I dropped the duplicated hunks from the later patch rather than
>> this one,
>> but I haven't tried to compile the result yet.
>>
>
> Yes, sorry about that : the atomic.h diff has been duplicated in both
> the local and atomic changes.
>
> The change to system.h for local.h is to add the xchg_local and
> cmpxchg_local, while the change for atomic.h is to simply remove the
> #include <asm/atomic.h> from system.h.
>
> Here is a proper local.h diff, if it happens to become necessary, but
> simply removing the duplicated asm-powerpc/system.h hunks will be ok.
>
> Thanks,
>
> Signed-off-by: Mathieu Desnoyers <[email protected]>
>
> --- a/include/asm-powerpc/local.h
> +++ b/include/asm-powerpc/local.h
> @@ -1 +1,342 @@
> -#include <asm-generic/local.h>
> +#ifndef _ARCH_POWERPC_LOCAL_H
> +#define _ARCH_POWERPC_LOCAL_H
> +
> +#include <linux/percpu.h>
> +#include <asm/atomic.h>
> +
> +typedef struct
> +{
> + atomic_long_t a;
> +} local_t;
> +
> +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
> +
> +#define local_read(l) atomic_long_read(&(l)->a)
> +#define local_set(l,i) atomic_long_set(&(l)->a, (i))
> +
> +#define local_add(i,l) atomic_long_add((i),(&(l)->a))
> +#define local_sub(i,l) atomic_long_sub((i),(&(l)->a))
> +#define local_inc(l) atomic_long_inc(&(l)->a)
> +#define local_dec(l) atomic_long_dec(&(l)->a)
> +
> +#ifndef __powerpc64__
> +
> +static __inline__ int local_add_return(int a, local_t *l)
> +{

is there a reason this isn't local_add_return(long a, local_t *l) on
ppc32?

(same comment for other functions)

- k



2007-02-15 20:02:49

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] local_t : powerpc extension - use long for powerpc32

* Kumar Gala ([email protected]) wrote:
> is there a reason this isn't local_add_return(long a, local_t *l) on
> ppc32?
>
> (same comment for other functions)
>
> - k

no, except that we use the code is taken from atomic.h and used an
int parameter. However, due to the semantics of local_t, we should
manipulate longs instead of ints, even if they are the same size on a
given architecture (ppc32 and mips32 are affectec by this).

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-powerpc/local.h
+++ b/include/asm-powerpc/local.h
@@ -21,9 +21,9 @@ typedef struct

#ifndef __powerpc64__

-static __inline__ int local_add_return(int a, local_t *l)
+static __inline__ long local_add_return(long a, local_t *l)
{
- int t;
+ long t;

__asm__ __volatile__(
"1: lwarx %0,0,%2 # local_add_return\n\
@@ -40,9 +40,9 @@ static __inline__ int local_add_return(int a, local_t *l)

#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)

-static __inline__ int local_sub_return(int a, local_t *l)
+static __inline__ long local_sub_return(long a, local_t *l)
{
- int t;
+ long t;

__asm__ __volatile__(
"1: lwarx %0,0,%2 # local_sub_return\n\
@@ -57,9 +57,9 @@ static __inline__ int local_sub_return(int a, local_t *l)
return t;
}

-static __inline__ int local_inc_return(local_t *l)
+static __inline__ long local_inc_return(local_t *l)
{
- int t;
+ long t;

__asm__ __volatile__(
"1: lwarx %0,0,%1 # local_inc_return\n\
@@ -84,9 +84,9 @@ static __inline__ int local_inc_return(local_t *l)
*/
#define local_inc_and_test(l) (local_inc_return(l) == 0)

-static __inline__ int local_dec_return(local_t *l)
+static __inline__ long local_dec_return(local_t *l)
{
- int t;
+ long t;

__asm__ __volatile__(
"1: lwarx %0,0,%1 # local_dec_return\n\
@@ -114,9 +114,9 @@ static __inline__ int local_dec_return(local_t *l)
* Atomically adds @a to @l, so long as it was not @u.
* Returns non-zero if @l was not @u, and zero otherwise.
*/
-static __inline__ int local_add_unless(local_t *l, int a, int u)
+static __inline__ int local_add_unless(local_t *l, long a, long u)
{
- int t;
+ long t;

__asm__ __volatile__ (
"1: lwarx %0,0,%1 # local_add_unless\n\
@@ -144,9 +144,9 @@ static __inline__ int local_add_unless(local_t *l, int a, int u)
* Atomically test *l and decrement if it is greater than 0.
* The function returns the old value of *l minus 1.
*/
-static __inline__ int local_dec_if_positive(local_t *l)
+static __inline__ long local_dec_if_positive(local_t *l)
{
- int t;
+ long t;

__asm__ __volatile__(
"1: lwarx %0,0,%1 # local_dec_if_positive\n\
--
Mathieu Desnoyers
Computer Engineering Ph.D. Candidate, ?cole Polytechnique de Montr?al
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2007-02-15 20:36:19

by Kumar Gala

[permalink] [raw]
Subject: Re: [PATCH] local_t : powerpc extension - use long for powerpc32


On Feb 15, 2007, at 2:02 PM, Mathieu Desnoyers wrote:

> * Kumar Gala ([email protected]) wrote:
>> is there a reason this isn't local_add_return(long a, local_t *l) on
>> ppc32?
>>
>> (same comment for other functions)
>>
>> - k
>
> no, except that we use the code is taken from atomic.h and used an
> int parameter. However, due to the semantics of local_t, we should
> manipulate longs instead of ints, even if they are the same size on a
> given architecture (ppc32 and mips32 are affectec by this).
>
> Signed-off-by: Mathieu Desnoyers <[email protected]>

Right, this means we can simply for ppc version down to:

+static __inline__ int local_add_return(int a, local_t *l)
+{
+ int t;
+
+ __asm__ __volatile__(
+"1: PPC_LLARX %0,0,%2 # local_add_return\n\
+ add %0,%1,%0\n"
+ PPC405_ERR77(0,%2)
+" PPC_STLCX %0,0,%2 \n\
+ bne- 1b"
+ : "=&r" (t)
+ : "r" (a), "r" (&(l->a.counter))
+ : "cc", "memory");
+
+ return t;
+}

and that should work for both ppc32 & ppc64 and removes the
duplication between the two.

- k

2007-02-15 21:23:04

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] local_t : powerpc extension - use long for powerpc32

* Kumar Gala ([email protected]) wrote:
>
> On Feb 15, 2007, at 2:02 PM, Mathieu Desnoyers wrote:
>
> >* Kumar Gala ([email protected]) wrote:
> >>is there a reason this isn't local_add_return(long a, local_t *l) on
> >>ppc32?
> >>
> >>(same comment for other functions)
> >>
> >>- k
> >
> >no, except that we use the code is taken from atomic.h and used an
> >int parameter. However, due to the semantics of local_t, we should
> >manipulate longs instead of ints, even if they are the same size on a
> >given architecture (ppc32 and mips32 are affectec by this).
> >
> >Signed-off-by: Mathieu Desnoyers <[email protected]>
>
> Right, this means we can simply for ppc version down to:
>
> +static __inline__ int local_add_return(int a, local_t *l)
> +{
> + int t;
> +
> + __asm__ __volatile__(
> +"1: PPC_LLARX %0,0,%2 # local_add_return\n\
> + add %0,%1,%0\n"
> + PPC405_ERR77(0,%2)
> +" PPC_STLCX %0,0,%2 \n\
what about the dot after stwcx ? (stwcx.) ? Is it no longer required ?

> + bne- 1b"
> + : "=&r" (t)
> + : "r" (a), "r" (&(l->a.counter))
> + : "cc", "memory");
> +
> + return t;
> +}
>
> and that should work for both ppc32 & ppc64 and removes the
> duplication between the two.
>
> - k
>

--
Mathieu Desnoyers
Computer Engineering Ph.D. Candidate, ?cole Polytechnique de Montr?al
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2007-02-15 21:30:55

by Kumar Gala

[permalink] [raw]
Subject: Re: [PATCH] local_t : powerpc extension - use long for powerpc32


On Feb 15, 2007, at 3:23 PM, Mathieu Desnoyers wrote:

> * Kumar Gala ([email protected]) wrote:
>>
>> On Feb 15, 2007, at 2:02 PM, Mathieu Desnoyers wrote:
>>
>>> * Kumar Gala ([email protected]) wrote:
>>>> is there a reason this isn't local_add_return(long a, local_t
>>>> *l) on
>>>> ppc32?
>>>>
>>>> (same comment for other functions)
>>>>
>>>> - k
>>>
>>> no, except that we use the code is taken from atomic.h and used an
>>> int parameter. However, due to the semantics of local_t, we should
>>> manipulate longs instead of ints, even if they are the same size
>>> on a
>>> given architecture (ppc32 and mips32 are affectec by this).
>>>
>>> Signed-off-by: Mathieu Desnoyers <[email protected]>
>>
>> Right, this means we can simply for ppc version down to:
>>
>> +static __inline__ int local_add_return(int a, local_t *l)
>> +{
>> + int t;
>> +
>> + __asm__ __volatile__(
>> +"1: PPC_LLARX %0,0,%2 # local_add_return\n\
>> + add %0,%1,%0\n"
>> + PPC405_ERR77(0,%2)
>> +" PPC_STLCX %0,0,%2 \n\
> what about the dot after stwcx ? (stwcx.) ? Is it no longer required ?

Its implied, PPC_LLARX and PPC_STLCW are #defines that expand to
proper instructions.

(look in include/asm-powerpc/asm-compat.h) for the defines.

Also, the PPC405_ERR77 will preprocess out on ppc64 if you were
concerned.

- k

2007-02-15 21:32:50

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] local_t : powerpc extension - use long for powerpc32

Please forget about my last email :

#define PPC_STLCX stringify_in_c(stdcx.)
and
#define PPC_STLCX stringify_in_c(stwcx.)

are self explaining. :)

Mathieu

* Kumar Gala ([email protected]) wrote:
>
> On Feb 15, 2007, at 2:02 PM, Mathieu Desnoyers wrote:
>
> >* Kumar Gala ([email protected]) wrote:
> >>is there a reason this isn't local_add_return(long a, local_t *l) on
> >>ppc32?
> >>
> >>(same comment for other functions)
> >>
> >>- k
> >
> >no, except that we use the code is taken from atomic.h and used an
> >int parameter. However, due to the semantics of local_t, we should
> >manipulate longs instead of ints, even if they are the same size on a
> >given architecture (ppc32 and mips32 are affectec by this).
> >
> >Signed-off-by: Mathieu Desnoyers <[email protected]>
>
> Right, this means we can simply for ppc version down to:
>
> +static __inline__ int local_add_return(int a, local_t *l)
> +{
> + int t;
> +
> + __asm__ __volatile__(
> +"1: PPC_LLARX %0,0,%2 # local_add_return\n\
> + add %0,%1,%0\n"
> + PPC405_ERR77(0,%2)
> +" PPC_STLCX %0,0,%2 \n\
> + bne- 1b"
> + : "=&r" (t)
> + : "r" (a), "r" (&(l->a.counter))
> + : "cc", "memory");
> +
> + return t;
> +}
>
> and that should work for both ppc32 & ppc64 and removes the
> duplication between the two.
>
> - k
>

--
Mathieu Desnoyers
Computer Engineering Ph.D. Candidate, ?cole Polytechnique de Montr?al
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2007-02-15 21:43:58

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] local_t : powerpc extension - shrink powerpc local.h

local_t : powerpc extension - shrink powerpc local.h

By using PPC_LLARX and PPC_STLCX, we can cut in half the size of powerpc
local.h.

In applies on top of the local_t : powerpc extension - use long for powerpc32

Signed-off-by: Mathieu Desnoyers <[email protected]>

--- a/include/asm-powerpc/local.h
+++ b/include/asm-powerpc/local.h
@@ -19,17 +19,15 @@ typedef struct
#define local_inc(l) atomic_long_inc(&(l)->a)
#define local_dec(l) atomic_long_dec(&(l)->a)

-#ifndef __powerpc64__
-
static __inline__ long local_add_return(long a, local_t *l)
{
long t;

__asm__ __volatile__(
-"1: lwarx %0,0,%2 # local_add_return\n\
+"1:" PPC_LLARX "%0,0,%2 # local_add_return\n\
add %0,%1,%0\n"
PPC405_ERR77(0,%2)
-" stwcx. %0,0,%2 \n\
+ PPC_STLCX "%0,0,%2 \n\
bne- 1b"
: "=&r" (t)
: "r" (a), "r" (&(l->a.counter))
@@ -45,10 +43,10 @@ static __inline__ long local_sub_return(long a, local_t *l)
long t;

__asm__ __volatile__(
-"1: lwarx %0,0,%2 # local_sub_return\n\
+"1:" PPC_LLARX "%0,0,%2 # local_sub_return\n\
subf %0,%1,%0\n"
PPC405_ERR77(0,%2)
-" stwcx. %0,0,%2 \n\
+ PPC_STLCX "%0,0,%2 \n\
bne- 1b"
: "=&r" (t)
: "r" (a), "r" (&(l->a.counter))
@@ -62,10 +60,10 @@ static __inline__ long local_inc_return(local_t *l)
long t;

__asm__ __volatile__(
-"1: lwarx %0,0,%1 # local_inc_return\n\
+"1:" PPC_LLARX "%0,0,%1 # local_inc_return\n\
addic %0,%0,1\n"
PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1 \n\
+ PPC_STLCX "%0,0,%1 \n\
bne- 1b"
: "=&r" (t)
: "r" (&(l->a.counter))
@@ -89,10 +87,10 @@ static __inline__ long local_dec_return(local_t *l)
long t;

__asm__ __volatile__(
-"1: lwarx %0,0,%1 # local_dec_return\n\
+"1:" PPC_LLARX "%0,0,%1 # local_dec_return\n\
addic %0,%0,-1\n"
PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1\n\
+ PPC_STLCX "%0,0,%1\n\
bne- 1b"
: "=&r" (t)
: "r" (&(l->a.counter))
@@ -119,12 +117,12 @@ static __inline__ int local_add_unless(local_t *l, long a, long u)
long t;

__asm__ __volatile__ (
-"1: lwarx %0,0,%1 # local_add_unless\n\
+"1:" PPC_LLARX "%0,0,%1 # local_add_unless\n\
cmpw 0,%0,%3 \n\
beq- 2f \n\
add %0,%2,%0 \n"
PPC405_ERR77(0,%2)
-" stwcx. %0,0,%1 \n\
+ PPC_STLCX "%0,0,%1 \n\
bne- 1b \n"
" subf %0,%2,%0 \n\
2:"
@@ -149,12 +147,12 @@ static __inline__ long local_dec_if_positive(local_t *l)
long t;

__asm__ __volatile__(
-"1: lwarx %0,0,%1 # local_dec_if_positive\n\
+"1:" PPC_LLARX "%0,0,%1 # local_dec_if_positive\n\
cmpwi %0,1\n\
addi %0,%0,-1\n\
blt- 2f\n"
PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1\n\
+ PPC_STLCX "%0,0,%1\n\
bne- 1b"
"\n\
2:" : "=&b" (t)
@@ -164,146 +162,6 @@ static __inline__ long local_dec_if_positive(local_t *l)
return t;
}

-#else /* __powerpc64__ */
-
-static __inline__ long local_add_return(long a, local_t *l)
-{
- long t;
-
- __asm__ __volatile__(
-"1: ldarx %0,0,%2 # local_add_return\n\
- add %0,%1,%0\n\
- stdcx. %0,0,%2 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (a), "r" (&(l->a.counter))
- : "cc", "memory");
-
- return t;
-}
-
-#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
-
-static __inline__ long local_sub_return(long a, local_t *l)
-{
- long t;
-
- __asm__ __volatile__(
-"1: ldarx %0,0,%2 # local_sub_return\n\
- subf %0,%1,%0\n\
- stdcx. %0,0,%2 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (a), "r" (&(l->a.counter))
- : "cc", "memory");
-
- return t;
-}
-
-static __inline__ long local_inc_return(local_t *l)
-{
- long t;
-
- __asm__ __volatile__(
-"1: ldarx %0,0,%1 # local_inc_return\n\
- addic %0,%0,1\n\
- stdcx. %0,0,%1 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&(l->a.counter))
- : "cc", "memory");
-
- return t;
-}
-
-/*
- * local_inc_and_test - increment and test
- * @l: pointer of type local_t
- *
- * Atomically increments @l by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define local_inc_and_test(l) (local_inc_return(l) == 0)
-
-static __inline__ long local_dec_return(local_t *l)
-{
- long t;
-
- __asm__ __volatile__(
-"1: ldarx %0,0,%1 # local_dec_return\n\
- addic %0,%0,-1\n\
- stdcx. %0,0,%1\n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&(l->a.counter))
- : "cc", "memory");
-
- return t;
-}
-
-#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
-#define local_dec_and_test(l) (local_dec_return((l)) == 0)
-
-/*
- * Atomically test *l and decrement if it is greater than 0.
- * The function returns the old value of *l minus 1.
- */
-static __inline__ long local_dec_if_positive(local_t *l)
-{
- long t;
-
- __asm__ __volatile__(
-"1: ldarx %0,0,%1 # local_dec_if_positive\n\
- addic. %0,%0,-1\n\
- blt- 2f\n\
- stdcx. %0,0,%1\n\
- bne- 1b"
- "\n\
-2:" : "=&r" (t)
- : "r" (&(l->a.counter))
- : "cc", "memory");
-
- return t;
-}
-
-#define local_cmpxchg(l, o, n) \
- ((long)cmpxchg_local(&((l)->a.counter), (o), (n)))
-#define local_xchg(l, (n)) (xchg_local(&((l)->a.counter), (n)))
-
-/**
- * local_add_unless - add unless the number is a given value
- * @l: pointer of type local_t
- * @a: the amount to add to l...
- * @u: ...unless l is equal to u.
- *
- * Atomically adds @a to @l, so long as it was not @u.
- * Returns non-zero if @l was not @u, and zero otherwise.
- */
-static __inline__ int local_add_unless(local_t *l, long a, long u)
-{
- long t;
-
- __asm__ __volatile__ (
-"1: ldarx %0,0,%1 # local_add_unless\n\
- cmpd 0,%0,%3 \n\
- beq- 2f \n\
- add %0,%2,%0 \n"
-" stdcx. %0,0,%1 \n\
- bne- 1b \n"
-" subf %0,%2,%0 \n\
-2:"
- : "=&r" (t)
- : "r" (&(l->a.counter)), "r" (a), "r" (u)
- : "cc", "memory");
-
- return t != u;
-}
-
-#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
-
-#endif /* !__powerpc64__ */
-
/* Use these for per-cpu local_t variables: on some archs they are
* much more efficient than these naive implementations. Note they take
* a variable, not an address.
--
Mathieu Desnoyers
Computer Engineering Ph.D. Candidate, ?cole Polytechnique de Montr?al
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68