Subject: [PATCH 1/1] Crypto: [xp ]cbc: use 64bit regs on 64bit machines

Currently on 64bit machines, the xor operation takes two 32bit registers
for a 8byte xor instead one single 64bit register. This patch fixes it for
the cbc, pcbc and xcbc template.

A quick speed test with with the tcrypt module showed for aes+cbc+dec:
old:
test 14 (256 bit key, 8192 byte blocks): 1 operation in 183138 cycles
(8192 bytes)
new:
test 14 (256 bit key, 8192 byte blocks): 1 operation in 181419 cycles
(8192 bytes)

Maybe my computer is just as tired as I am atm. In general I thing 64bit
registers should be prefered.

Signed-off-by: Sebastian Siewior <[email protected]>
Index: b/linux/crypto/cbc.c
===================================================================
--- a/linux/crypto/cbc.c
+++ b/linux/crypto/cbc.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <linux/types.h>

struct crypto_cbc_ctx {
struct crypto_cipher *child;
@@ -226,16 +227,18 @@ static void xor_quad(u8 *dst, const u8 *

static void xor_64(u8 *a, const u8 *b, unsigned int bs)
{
+#if BITS_PER_LONG == 64
+ ((u64 *)a)[0] ^= ((u64 *)b)[0];
+#else
((u32 *)a)[0] ^= ((u32 *)b)[0];
((u32 *)a)[1] ^= ((u32 *)b)[1];
+#endif
}

static void xor_128(u8 *a, const u8 *b, unsigned int bs)
{
- ((u32 *)a)[0] ^= ((u32 *)b)[0];
- ((u32 *)a)[1] ^= ((u32 *)b)[1];
- ((u32 *)a)[2] ^= ((u32 *)b)[2];
- ((u32 *)a)[3] ^= ((u32 *)b)[3];
+ xor_64(&a[0], &b[0], bs);
+ xor_64(&a[8], &b[8], bs);
}

static int crypto_cbc_init_tfm(struct crypto_tfm *tfm)
Index: b/linux/crypto/pcbc.c
===================================================================
--- a/linux/crypto/pcbc.c
+++ b/linux/crypto/pcbc.c
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <linux/types.h>

struct crypto_pcbc_ctx {
struct crypto_cipher *child;
@@ -230,16 +231,18 @@ static void xor_quad(u8 *dst, const u8 *

static void xor_64(u8 *a, const u8 *b, unsigned int bs)
{
+#if BITS_PER_LONG == 64
+ ((u64 *)a)[0] ^= ((u64 *)b)[0];
+#else
((u32 *)a)[0] ^= ((u32 *)b)[0];
((u32 *)a)[1] ^= ((u32 *)b)[1];
+#endif
}

static void xor_128(u8 *a, const u8 *b, unsigned int bs)
{
- ((u32 *)a)[0] ^= ((u32 *)b)[0];
- ((u32 *)a)[1] ^= ((u32 *)b)[1];
- ((u32 *)a)[2] ^= ((u32 *)b)[2];
- ((u32 *)a)[3] ^= ((u32 *)b)[3];
+ xor_64(&a[0], &b[0], bs);
+ xor_64(&a[8], &b[8], bs);
}

static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm)
Index: b/linux/crypto/xcbc.c
===================================================================
--- a/linux/crypto/xcbc.c
+++ b/linux/crypto/xcbc.c
@@ -27,6 +27,7 @@
#include <linux/rtnetlink.h>
#include <linux/slab.h>
#include <linux/scatterlist.h>
+#include <linux/types.h>
#include "internal.h"

static u_int32_t ks[12] = {0x01010101, 0x01010101, 0x01010101, 0x01010101,
@@ -60,10 +61,15 @@ struct crypto_xcbc_ctx {

static void xor_128(u8 *a, const u8 *b, unsigned int bs)
{
+#if BITS_PER_LONG == 64
+ ((u64 *)a)[0] ^= ((u64 *)b)[0];
+ ((u64 *)a)[1] ^= ((u64 *)b)[1];
+#else
((u32 *)a)[0] ^= ((u32 *)b)[0];
((u32 *)a)[1] ^= ((u32 *)b)[1];
((u32 *)a)[2] ^= ((u32 *)b)[2];
((u32 *)a)[3] ^= ((u32 *)b)[3];
+#endif
}

static int _crypto_xcbc_digest_setkey(struct crypto_hash *parent,


2007-06-14 10:58:06

by Evgeniy Polyakov

[permalink] [raw]
Subject: Re: [PATCH 1/1] Crypto: [xp ]cbc: use 64bit regs on 64bit machines

On Thu, Jun 14, 2007 at 12:20:06AM +0200, Sebastian Siewior ([email protected]) wrote:
> static void xor_64(u8 *a, const u8 *b, unsigned int bs)
> {
> +#if BITS_PER_LONG == 64
> + ((u64 *)a)[0] ^= ((u64 *)b)[0];
> +#else
> ((u32 *)a)[0] ^= ((u32 *)b)[0];
> ((u32 *)a)[1] ^= ((u32 *)b)[1];
> +#endif
> }

What about endianess?

--
Evgeniy Polyakov

Subject: Re: [PATCH 1/1] Crypto: [xp ]cbc: use 64bit regs on 64bit machines

* Evgeniy Polyakov | 2007-06-14 14:57:45 [+0400]:

>On Thu, Jun 14, 2007 at 12:20:06AM +0200, Sebastian Siewior ([email protected]) wrote:
>> static void xor_64(u8 *a, const u8 *b, unsigned int bs)
>> {
>> +#if BITS_PER_LONG == 64
>> + ((u64 *)a)[0] ^= ((u64 *)b)[0];
>> +#else
>> ((u32 *)a)[0] ^= ((u32 *)b)[0];
>> ((u32 *)a)[1] ^= ((u32 *)b)[1];
>> +#endif
>> }
>
>What about endianess?

It doesn't effect the xor operation. Either I load both LE, xor, store
LE or load both BE, xor, store BE. The result is always the same. An
additional endian convert would make thinks just slower.
Since you confused me a little, I tested it on ppc and amd64 :)

>
>--
> Evgeniy Polyakov

Sebastian

2007-06-15 06:10:49

by Evgeniy Polyakov

[permalink] [raw]
Subject: Re: [PATCH 1/1] Crypto: [xp ]cbc: use 64bit regs on 64bit machines

On Thu, Jun 14, 2007 at 01:47:19PM +0200, Sebastian Siewior ([email protected]) wrote:
> * Evgeniy Polyakov | 2007-06-14 14:57:45 [+0400]:
>
> >On Thu, Jun 14, 2007 at 12:20:06AM +0200, Sebastian Siewior ([email protected]) wrote:
> >> static void xor_64(u8 *a, const u8 *b, unsigned int bs)
> >> {
> >> +#if BITS_PER_LONG == 64
> >> + ((u64 *)a)[0] ^= ((u64 *)b)[0];
> >> +#else
> >> ((u32 *)a)[0] ^= ((u32 *)b)[0];
> >> ((u32 *)a)[1] ^= ((u32 *)b)[1];
> >> +#endif
> >> }
> >
> >What about endianess?
>
> It doesn't effect the xor operation. Either I load both LE, xor, store
> LE or load both BE, xor, store BE. The result is always the same. An
> additional endian convert would make thinks just slower.
> Since you confused me a little, I tested it on ppc and amd64 :)

Yeah, logic is correct for both endian.

--
Evgeniy Polyakov