2006-05-31 01:06:12

by Chuck Ebbert

[permalink] [raw]
Subject: [patch 2.6.17-rc5 2/2] i386 memcpy: minor optimization

Skip an additional instruction if memcpy size is divisible by four.

Signed-off-by: Chuck Ebbert <[email protected]>

--- 2.6.17-rc5-32.orig/include/asm-i386/string.h
+++ 2.6.17-rc5-32/include/asm-i386/string.h
@@ -203,20 +203,20 @@ return __res;

static __always_inline void * __memcpy(void * to, const void * from, size_t n)
{
-int d0, d1, d2;
+int d0, d1, d2, d3;
__asm__ __volatile__(
"rep ; movsl\n\t"
- "movl %4,%%ecx\n\t"
- "andl $3,%%ecx\n\t"
-#if 1 /* want to pay 2 byte penalty for a chance to skip microcoded rep? */
+ "andl $3,%3\n\t"
+#if 1 /* want to pay 2 byte penalty for a chance to skip a mov and a microcoded rep? */
"jz 1f\n\t"
#endif
+ "movl %3,%%ecx\n\t"
"rep ; movsb\n\t"
"1:"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- : "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from)
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2), "=&g" (d3)
+ : "0" (n/4), "1" ((long) to), "2" ((long) from), "3" (n)
: "memory");
-return (to);
+return to;
}

/*
--
Chuck