Hi
Here is another update(against BK curr) for crc32(). A kind soul pointed out
the optimizations below.
lib/crc32defs.h:
- Make it possible to define new values for CRC_LE_BITS/CRC_BE_BITS without
modifying the source.
lib/crc32.c:
- Eliminate the need for ENDIAN_SHIFT. Saves a 24 bit shift in the byte
loops.
- Swap the XOR expression in DO_CRC. gcc for x86 can not do that simple
optimization itself(gcc 3.2.2 and RH gcc 2.96 tested). Will improve
performance with 20-25% on x86.
Joakim Tjernlund
--- lib/crc32defs.h Wed Jan 8 01:35:35 2003
+++ lib/new.crc32defs.h Tue Feb 18 09:38:08 2003
@@ -8,8 +8,12 @@
/* How many bits at a time to use. Requires a table of 4<<CRC_xx_BITS bytes. */
/* For less performance-sensitive, use 4 */
-#define CRC_LE_BITS 8
-#define CRC_BE_BITS 8
+#ifndef CRC_LE_BITS
+# define CRC_LE_BITS 8
+#endif
+#ifndef CRC_BE_BITS
+# define CRC_BE_BITS 8
+#endif
/*
* Little-endian CRC computation. Used with serial bit streams sent
--- lib/crc32.c Tue Feb 18 09:29:46 2003
+++ lib/new.crc32.c Tue Feb 18 09:43:15 2003
@@ -90,19 +90,16 @@
const u32 *tab = crc32table_le;
# ifdef __LITTLE_ENDIAN
-# define DO_CRC crc = (crc>>8) ^ tab[ crc & 255 ]
-# define ENDIAN_SHIFT 0
+# define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8)
# else
-# define DO_CRC crc = (crc<<8) ^ tab[ crc >> 24 ]
-# define ENDIAN_SHIFT 24
+# define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8)
# endif
crc = __cpu_to_le32(crc);
/* Align it */
if(unlikely(((long)b)&3 && len)){
do {
- crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
- DO_CRC;
+ DO_CRC(*((u8 *)b)++);
} while ((--len) && ((long)b)&3 );
}
if(likely(len >= 4)){
@@ -112,10 +109,10 @@
--b; /* use pre increment below(*++b) for speed */
do {
crc ^= *++b;
- DO_CRC;
- DO_CRC;
- DO_CRC;
- DO_CRC;
+ DO_CRC(0);
+ DO_CRC(0);
+ DO_CRC(0);
+ DO_CRC(0);
} while (--len);
b++; /* point to next byte(s) */
len = save_len;
@@ -123,8 +120,7 @@
/* And the last few bytes */
if(len){
do {
- crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
- DO_CRC;
+ DO_CRC(*((u8 *)b)++);
} while (--len);
}
@@ -195,19 +191,16 @@
const u32 *tab = crc32table_be;
# ifdef __LITTLE_ENDIAN
-# define DO_CRC crc = (crc>>8) ^ tab[ crc & 255 ]
-# define ENDIAN_SHIFT 24
+# define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8)
# else
-# define DO_CRC crc = (crc<<8) ^ tab[ crc >> 24 ]
-# define ENDIAN_SHIFT 0
+# define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8)
# endif
crc = __cpu_to_be32(crc);
/* Align it */
if(unlikely(((long)b)&3 && len)){
do {
- crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
- DO_CRC;
+ DO_CRC(*((u8 *)b)++);
} while ((--len) && ((long)b)&3 );
}
if(likely(len >= 4)){
@@ -217,10 +210,10 @@
--b; /* use pre increment below(*++b) for speed */
do {
crc ^= *++b;
- DO_CRC;
- DO_CRC;
- DO_CRC;
- DO_CRC;
+ DO_CRC(0);
+ DO_CRC(0);
+ DO_CRC(0);
+ DO_CRC(0);
} while (--len);
b++; /* point to next byte(s) */
len = save_len;
@@ -228,8 +221,7 @@
/* And the last few bytes */
if(len){
do {
- crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
- DO_CRC;
+ DO_CRC(*((u8 *)b)++);
} while (--len);
}
return __be32_to_cpu(crc);