2009-11-08 10:02:43

by Joakim Tjernlund

[permalink] [raw]
Subject: [PATCH] zlib: Optimize inffast when copying direct from output

JFFS2 uses lesser compression ratio and inflate always
ends up in "copy direct from output" case.
This patch tries to optimize the copy procedure for
arch's that have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS defined.
On my MPC8321 this is about 14% faster on my JFFS2 root FS
than the original.

Signed-off-by: Joakim Tjernlund <[email protected]>
---
lib/zlib_inflate/inffast.c | 35 +++++++++++++++++++++++++++++++++++
1 files changed, 35 insertions(+), 0 deletions(-)

diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index 8550b0c..0588fbf 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -240,6 +240,40 @@ void inflate_fast(z_streamp strm, unsigned start)
}
else {
from = out - dist; /* copy direct from output */
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ /* minimum length is three */
+ if (dist > 2 ) {
+ unsigned short *sout = (unsigned short *)(out - OFF);
+ unsigned short *sfrom = (unsigned short *)(from - OFF);
+ unsigned long loops = len >> 1;
+
+ do
+ PUP(sout) = PUP(sfrom);
+ while (--loops);
+ out = (unsigned char *)sout + OFF;
+ from = (unsigned char *)sfrom + OFF;
+ if (len & 1)
+ PUP(out) = PUP(from);
+ } else if (dist == 2) {
+ unsigned short *sout = (unsigned short *)(out - OFF);
+ unsigned short pat16;
+ unsigned long loops = len >> 1;
+
+ pat16 = *(sout-2+2*OFF);
+ do
+ PUP(sout) = pat16;
+ while (--loops);
+ out = (unsigned char *)sout + OFF;
+ if (len & 1)
+ PUP(out) = PUP(from);
+ } else {
+ unsigned char pat8 = *(out - 1 + OFF);
+
+ do {
+ PUP(out) = pat8;
+ } while (--len);
+ }
+#else
do { /* minimum length is three */
PUP(out) = PUP(from);
PUP(out) = PUP(from);
@@ -251,6 +285,7 @@ void inflate_fast(z_streamp strm, unsigned start)
if (len > 1)
PUP(out) = PUP(from);
}
+#endif
}
}
else if ((op & 64) == 0) { /* 2nd level distance code */
--
1.6.4.4