2009-12-19 00:27:06

by Christoph Lameter

[permalink] [raw]
Subject: [this_cpu_xx V8 10/16] Support generating inc/dec for this_cpu_inc/dec

Support generating inc/dec instruction. Currently we create an
add 1 instruction. Saves one byte per use of this_cpu_xx.

Signed-off-by: Christoph Lameter <[email protected]>

---
arch/x86/include/asm/percpu.h | 47 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)

Index: linux-2.6/arch/x86/include/asm/percpu.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/percpu.h 2009-12-18 12:45:31.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/percpu.h 2009-12-18 14:22:40.000000000 -0600
@@ -133,6 +133,29 @@ do { \
pfo_ret__; \
})

+#define percpu_var_op(op, var) \
+({ \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm(op "b "__percpu_arg(0) \
+ : "+m" (var) :); \
+ break; \
+ case 2: \
+ asm(op "w "__percpu_arg(0) \
+ : "+m" (var) :); \
+ break; \
+ case 4: \
+ asm(op "l "__percpu_arg(0) \
+ : "+m" (var) :); \
+ break; \
+ case 8: \
+ asm(op "q "__percpu_arg(0) \
+ : "+m" (var) :); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+})
+
/*
* percpu_read() makes gcc load the percpu variable every time it is
* accessed while percpu_read_stable() allows the value to be cached.
@@ -163,6 +186,12 @@ do { \
#define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
#define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
#define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define __this_cpu_inc_1(pcp) percpu_var_op("inc", (pcp))
+#define __this_cpu_inc_2(pcp) percpu_var_op("inc", (pcp))
+#define __this_cpu_inc_4(pcp) percpu_var_op("inc", (pcp))
+#define __this_cpu_dec_1(pcp) percpu_var_op("dec", (pcp))
+#define __this_cpu_dec_2(pcp) percpu_var_op("dec", (pcp))
+#define __this_cpu_dec_4(pcp) percpu_var_op("dec", (pcp))
#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -182,6 +211,12 @@ do { \
#define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
#define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
#define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define this_cpu_inc_1(pcp) percpu_var_op("inc", (pcp))
+#define this_cpu_inc_2(pcp) percpu_var_op("inc", (pcp))
+#define this_cpu_inc_4(pcp) percpu_var_op("inc", (pcp))
+#define this_cpu_dec_1(pcp) percpu_var_op("dec", (pcp))
+#define this_cpu_dec_2(pcp) percpu_var_op("dec", (pcp))
+#define this_cpu_dec_4(pcp) percpu_var_op("dec", (pcp))
#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -195,6 +230,12 @@ do { \
#define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
#define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
#define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define irqsafe_cpu_inc_1(pcp) percpu_var_op("inc", (pcp))
+#define irqsafe_cpu_inc_2(pcp) percpu_var_op("inc", (pcp))
+#define irqsafe_cpu_inc_4(pcp) percpu_var_op("inc", (pcp))
+#define irqsafe_cpu_dec_1(pcp) percpu_var_op("dec", (pcp))
+#define irqsafe_cpu_dec_2(pcp) percpu_var_op("dec", (pcp))
+#define irqsafe_cpu_dec_4(pcp) percpu_var_op("dec", (pcp))
#define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -213,6 +254,8 @@ do { \
#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
#define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define __this_cpu_inc_8(pcp) percpu_var_op("inc", (pcp))
+#define __this_cpu_dec_8(pcp) percpu_var_op("dec", (pcp))
#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
@@ -220,11 +263,15 @@ do { \
#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define this_cpu_inc_8(pcp) percpu_var_op("inc", (pcp))
+#define this_cpu_dec_8(pcp) percpu_var_op("dec", (pcp))
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)

#define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define irqsafe_cpu_inc_8(pcp) percpu_var_op("inc", (pcp))
+#define irqsafe_cpu_dec_8(pcp) percpu_var_op("dec", (pcp))
#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)

--