2010-01-05 22:05:47

by Christoph Lameter

[permalink] [raw]
Subject: [RFC local_t removal V1 3/4] Optimized add_local()

Use XADD to implement add_local().

Signed-off-by: Christoph Lameter <[email protected]>

---
arch/x86/include/asm/add-local.h | 56 ++++++++++++++++++++++++++++++++++++++-
1 file changed, 55 insertions(+), 1 deletion(-)

Index: linux-2.6/arch/x86/include/asm/add-local.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/add-local.h 2010-01-05 15:29:11.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/add-local.h 2010-01-05 15:33:59.000000000 -0600
@@ -1,2 +1,56 @@
-#include <asm-generic/add-local.h>
+#ifndef __ASM_X86_ADD_LOCAL_H
+#define __ASM_X86_ADD_LOCAL_H
+
+#include <linux/types.h>
+#include <asm-generic/add-local-generic.h>
+
+static inline unsigned long __add_return_local(volatile void *ptr,
+ unsigned long value, int size)
+{
+ unsigned long r;
+
+#ifdef CONFIG_M386
+ if (unlikely(boot_cpu_data.x86 <= 3))
+ return __add_return_local_generic(ptr, value, size);
+#endif
+
+ /*
+ * Sanity checking, compile-time.
+ */
+ if (size == 8 && sizeof(unsigned long) != 8)
+ wrong_size_add_local(ptr);
+
+ r = value;
+ switch (size) {
+ case 1:
+ asm volatile("xaddb %0, %1;": "+r" (r), "+m" (*((u8 *)ptr)):
+ : "memory");
+ break;
+ case 2:
+ asm volatile("xaddw %0, %1;": "+r" (r), "+m" (*((u16 *)ptr)):
+ : "memory");
+ break;
+ case 4:
+ asm volatile("xaddl %0, %1;": "+r" (r), "+m" (*((u32 *)ptr)):
+ : "memory");
+ break;
+ case 8:
+ asm volatile("xaddq %0, %1;": "+r" (r), "+m" (*((u64 *)ptr)):
+ : "memory");
+ break;
+ default:
+ wrong_size_add_local(ptr);
+ }
+ return r + value;
+}
+
+#define add_return_local(ptr, v) \
+ ((__typeof__(*(ptr)))__add_return_local((ptr), (unsigned long)(v), \
+ sizeof(*(ptr))))
+
+#define add_local(ptr, v) (void)__add_return_local((ptr), (unsigned long)(v), \
+ sizeof(*(ptr)))
+
+
+#endif


--


2010-01-05 23:04:36

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [RFC local_t removal V1 3/4] Optimized add_local()

* Christoph Lameter ([email protected]) wrote:
> Use XADD to implement add_local().

xadd should only be used to implement add_local_return, not add_local.

add_local can be implemented with the "add" instruction, which is
significantly faster if my memory serves me correctly.

Thanks,

Mathieu

>
> Signed-off-by: Christoph Lameter <[email protected]>
>
> ---
> arch/x86/include/asm/add-local.h | 56 ++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 55 insertions(+), 1 deletion(-)
>
> Index: linux-2.6/arch/x86/include/asm/add-local.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/add-local.h 2010-01-05 15:29:11.000000000 -0600
> +++ linux-2.6/arch/x86/include/asm/add-local.h 2010-01-05 15:33:59.000000000 -0600
> @@ -1,2 +1,56 @@
> -#include <asm-generic/add-local.h>
> +#ifndef __ASM_X86_ADD_LOCAL_H
> +#define __ASM_X86_ADD_LOCAL_H
> +
> +#include <linux/types.h>
> +#include <asm-generic/add-local-generic.h>
> +
> +static inline unsigned long __add_return_local(volatile void *ptr,
> + unsigned long value, int size)
> +{
> + unsigned long r;
> +
> +#ifdef CONFIG_M386
> + if (unlikely(boot_cpu_data.x86 <= 3))
> + return __add_return_local_generic(ptr, value, size);
> +#endif
> +
> + /*
> + * Sanity checking, compile-time.
> + */
> + if (size == 8 && sizeof(unsigned long) != 8)
> + wrong_size_add_local(ptr);
> +
> + r = value;
> + switch (size) {
> + case 1:
> + asm volatile("xaddb %0, %1;": "+r" (r), "+m" (*((u8 *)ptr)):
> + : "memory");
> + break;
> + case 2:
> + asm volatile("xaddw %0, %1;": "+r" (r), "+m" (*((u16 *)ptr)):
> + : "memory");
> + break;
> + case 4:
> + asm volatile("xaddl %0, %1;": "+r" (r), "+m" (*((u32 *)ptr)):
> + : "memory");
> + break;
> + case 8:
> + asm volatile("xaddq %0, %1;": "+r" (r), "+m" (*((u64 *)ptr)):
> + : "memory");
> + break;
> + default:
> + wrong_size_add_local(ptr);
> + }
> + return r + value;
> +}
> +
> +#define add_return_local(ptr, v) \
> + ((__typeof__(*(ptr)))__add_return_local((ptr), (unsigned long)(v), \
> + sizeof(*(ptr))))
> +
> +#define add_local(ptr, v) (void)__add_return_local((ptr), (unsigned long)(v), \
> + sizeof(*(ptr)))
> +
> +
> +#endif
>
>
> --

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2010-01-07 17:17:19

by Christoph Lameter

[permalink] [raw]
Subject: Re: [RFC local_t removal V1 3/4] Optimized add_local()

On Tue, 5 Jan 2010, Mathieu Desnoyers wrote:

> add_local can be implemented with the "add" instruction, which is
> significantly faster if my memory serves me correctly.

Yes a full patchset would do that. Lets first see if we can come to an
agreement to go down this road before I put the effort in.