From: Sebastian Siewior Subject: [PATCH 1/1] Crypto: [xp ]cbc: use 64bit regs on 64bit machines Date: Thu, 14 Jun 2007 00:20:06 +0200 Message-ID: <20070613222006.GA4585@Chamillionaire.breakpoint.cc> Mime-Version: 1.0 Content-Type: text/plain; charset=iso-8859-15 To: linux-crypto@vger.kernel.org Return-path: Received: from Chamillionaire.breakpoint.cc ([85.10.199.196]:45668 "EHLO Chamillionaire.breakpoint.cc" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751038AbXFMWUL (ORCPT ); Wed, 13 Jun 2007 18:20:11 -0400 Received: id: bigeasy by Chamillionaire.breakpoint.cc with local (easymta 1.00 BETA 1) id 1HybCA-0001Mo-Cb for linux-crypto@vger.kernel.org; Thu, 14 Jun 2007 00:20:06 +0200 Content-Disposition: inline Sender: linux-crypto-owner@vger.kernel.org List-Id: linux-crypto.vger.kernel.org Currently on 64bit machines, the xor operation takes two 32bit registers for a 8byte xor instead one single 64bit register. This patch fixes it for the cbc, pcbc and xcbc template. A quick speed test with with the tcrypt module showed for aes+cbc+dec: old: test 14 (256 bit key, 8192 byte blocks): 1 operation in 183138 cycles (8192 bytes) new: test 14 (256 bit key, 8192 byte blocks): 1 operation in 181419 cycles (8192 bytes) Maybe my computer is just as tired as I am atm. In general I thing 64bit registers should be prefered. Signed-off-by: Sebastian Siewior Index: b/linux/crypto/cbc.c =================================================================== --- a/linux/crypto/cbc.c +++ b/linux/crypto/cbc.c @@ -17,6 +17,7 @@ #include #include #include +#include struct crypto_cbc_ctx { struct crypto_cipher *child; @@ -226,16 +227,18 @@ static void xor_quad(u8 *dst, const u8 * static void xor_64(u8 *a, const u8 *b, unsigned int bs) { +#if BITS_PER_LONG == 64 + ((u64 *)a)[0] ^= ((u64 *)b)[0]; +#else ((u32 *)a)[0] ^= ((u32 *)b)[0]; ((u32 *)a)[1] ^= ((u32 *)b)[1]; +#endif } static void xor_128(u8 *a, const u8 *b, unsigned int bs) { - ((u32 *)a)[0] ^= ((u32 *)b)[0]; - ((u32 *)a)[1] ^= ((u32 *)b)[1]; - ((u32 *)a)[2] ^= ((u32 *)b)[2]; - ((u32 *)a)[3] ^= ((u32 *)b)[3]; + xor_64(&a[0], &b[0], bs); + xor_64(&a[8], &b[8], bs); } static int crypto_cbc_init_tfm(struct crypto_tfm *tfm) Index: b/linux/crypto/pcbc.c =================================================================== --- a/linux/crypto/pcbc.c +++ b/linux/crypto/pcbc.c @@ -21,6 +21,7 @@ #include #include #include +#include struct crypto_pcbc_ctx { struct crypto_cipher *child; @@ -230,16 +231,18 @@ static void xor_quad(u8 *dst, const u8 * static void xor_64(u8 *a, const u8 *b, unsigned int bs) { +#if BITS_PER_LONG == 64 + ((u64 *)a)[0] ^= ((u64 *)b)[0]; +#else ((u32 *)a)[0] ^= ((u32 *)b)[0]; ((u32 *)a)[1] ^= ((u32 *)b)[1]; +#endif } static void xor_128(u8 *a, const u8 *b, unsigned int bs) { - ((u32 *)a)[0] ^= ((u32 *)b)[0]; - ((u32 *)a)[1] ^= ((u32 *)b)[1]; - ((u32 *)a)[2] ^= ((u32 *)b)[2]; - ((u32 *)a)[3] ^= ((u32 *)b)[3]; + xor_64(&a[0], &b[0], bs); + xor_64(&a[8], &b[8], bs); } static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm) Index: b/linux/crypto/xcbc.c =================================================================== --- a/linux/crypto/xcbc.c +++ b/linux/crypto/xcbc.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "internal.h" static u_int32_t ks[12] = {0x01010101, 0x01010101, 0x01010101, 0x01010101, @@ -60,10 +61,15 @@ struct crypto_xcbc_ctx { static void xor_128(u8 *a, const u8 *b, unsigned int bs) { +#if BITS_PER_LONG == 64 + ((u64 *)a)[0] ^= ((u64 *)b)[0]; + ((u64 *)a)[1] ^= ((u64 *)b)[1]; +#else ((u32 *)a)[0] ^= ((u32 *)b)[0]; ((u32 *)a)[1] ^= ((u32 *)b)[1]; ((u32 *)a)[2] ^= ((u32 *)b)[2]; ((u32 *)a)[3] ^= ((u32 *)b)[3]; +#endif } static int _crypto_xcbc_digest_setkey(struct crypto_hash *parent,