Signed-off-by: Harvey Harrison <[email protected]>
---
Depends on the unaligned access work in -mm.
crypto/salsa20_generic.c | 37 ++++++++++++++++---------------------
1 files changed, 16 insertions(+), 21 deletions(-)
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c
index b07d559..dbc9e4a 100644
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -26,6 +26,7 @@
#include <linux/types.h>
#include <crypto/algapi.h>
#include <asm/byteorder.h>
+#include <asm/unaligned.h>
#define SALSA20_IV_SIZE 8U
#define SALSA20_MIN_KEY_SIZE 16U
@@ -46,12 +47,6 @@ Public domain.
#define XOR(v,w) ((v) ^ (w))
#define PLUS(v,w) (((v) + (w)))
#define PLUSONE(v) (PLUS((v),1))
-#define U32TO8_LITTLE(p, v) \
- { (p)[0] = (v >> 0) & 0xff; (p)[1] = (v >> 8) & 0xff; \
- (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; }
-#define U8TO32_LITTLE(p) \
- (((u32)((p)[0]) ) | ((u32)((p)[1]) << 8) | \
- ((u32)((p)[2]) << 16) | ((u32)((p)[3]) << 24) )
struct salsa20_ctx
{
@@ -101,7 +96,7 @@ static void salsa20_wordtobyte(u8 output[64], const u32 input[16])
for (i = 0; i < 16; ++i)
x[i] = PLUS(x[i],input[i]);
for (i = 0; i < 16; ++i)
- U32TO8_LITTLE(output + 4 * i,x[i]);
+ store_le32_noalign((__le32 *)(output + 4 * i), x[i]);
}
static const char sigma[16] = "expand 32-byte k";
@@ -111,30 +106,30 @@ static void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, u32 kbytes)
{
const char *constants;
- ctx->input[1] = U8TO32_LITTLE(k + 0);
- ctx->input[2] = U8TO32_LITTLE(k + 4);
- ctx->input[3] = U8TO32_LITTLE(k + 8);
- ctx->input[4] = U8TO32_LITTLE(k + 12);
+ ctx->input[1] = load_le32_noalign((__le32 *)(k + 0));
+ ctx->input[2] = load_le32_noalign((__le32 *)(k + 4));
+ ctx->input[3] = load_le32_noalign((__le32 *)(k + 8));
+ ctx->input[4] = load_le32_noalign((__le32 *)(k + 12));
if (kbytes == 32) { /* recommended */
k += 16;
constants = sigma;
} else { /* kbytes == 16 */
constants = tau;
}
- ctx->input[11] = U8TO32_LITTLE(k + 0);
- ctx->input[12] = U8TO32_LITTLE(k + 4);
- ctx->input[13] = U8TO32_LITTLE(k + 8);
- ctx->input[14] = U8TO32_LITTLE(k + 12);
- ctx->input[0] = U8TO32_LITTLE(constants + 0);
- ctx->input[5] = U8TO32_LITTLE(constants + 4);
- ctx->input[10] = U8TO32_LITTLE(constants + 8);
- ctx->input[15] = U8TO32_LITTLE(constants + 12);
+ ctx->input[11] = load_le32_noalign((__le32 *)(k + 0));
+ ctx->input[12] = load_le32_noalign((__le32 *)(k + 4));
+ ctx->input[13] = load_le32_noalign((__le32 *)(k + 8));
+ ctx->input[14] = load_le32_noalign((__le32 *)(k + 12));
+ ctx->input[0] = load_le32_noalign((__le32 *)(constants + 0));
+ ctx->input[5] = load_le32_noalign((__le32 *)(constants + 4));
+ ctx->input[10] = load_le32_noalign((__le32 *)(constants + 8));
+ ctx->input[15] = load_le32_noalign((__le32 *)(constants + 12));
}
static void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv)
{
- ctx->input[6] = U8TO32_LITTLE(iv + 0);
- ctx->input[7] = U8TO32_LITTLE(iv + 4);
+ ctx->input[6] = load_le32_noalign((__le32 *)(iv + 0));
+ ctx->input[7] = load_le32_noalign((__le32 *)(iv + 4));
ctx->input[8] = 0;
ctx->input[9] = 0;
}
--
1.6.1.rc1.262.gb6810
On Wed, Dec 03, 2008 at 10:13:39AM -0800, Harvey Harrison wrote:
>
> @@ -101,7 +96,7 @@ static void salsa20_wordtobyte(u8 output[64], const u32 input[16])
> for (i = 0; i < 16; ++i)
> x[i] = PLUS(x[i],input[i]);
> for (i = 0; i < 16; ++i)
> - U32TO8_LITTLE(output + 4 * i,x[i]);
> + store_le32_noalign((__le32 *)(output + 4 * i), x[i]);
We can easily make "output" 4-byte aligned since its our buffer
on the stack.
> }
>
> static const char sigma[16] = "expand 32-byte k";
> @@ -111,30 +106,30 @@ static void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, u32 kbytes)
> {
> const char *constants;
>
> - ctx->input[1] = U8TO32_LITTLE(k + 0);
> - ctx->input[2] = U8TO32_LITTLE(k + 4);
> - ctx->input[3] = U8TO32_LITTLE(k + 8);
> - ctx->input[4] = U8TO32_LITTLE(k + 12);
> + ctx->input[1] = load_le32_noalign((__le32 *)(k + 0));
> + ctx->input[2] = load_le32_noalign((__le32 *)(k + 4));
> + ctx->input[3] = load_le32_noalign((__le32 *)(k + 8));
> + ctx->input[4] = load_le32_noalign((__le32 *)(k + 12));
Since salsa sets alignmask to 3, k should already be aligned.
> + ctx->input[11] = load_le32_noalign((__le32 *)(k + 0));
> + ctx->input[12] = load_le32_noalign((__le32 *)(k + 4));
> + ctx->input[13] = load_le32_noalign((__le32 *)(k + 8));
> + ctx->input[14] = load_le32_noalign((__le32 *)(k + 12));
Ditto.
> + ctx->input[0] = load_le32_noalign((__le32 *)(constants + 0));
> + ctx->input[5] = load_le32_noalign((__le32 *)(constants + 4));
> + ctx->input[10] = load_le32_noalign((__le32 *)(constants + 8));
> + ctx->input[15] = load_le32_noalign((__le32 *)(constants + 12));
We should mark "constants" with the proper alignment.
> static void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv)
> {
> - ctx->input[6] = U8TO32_LITTLE(iv + 0);
> - ctx->input[7] = U8TO32_LITTLE(iv + 4);
> + ctx->input[6] = load_le32_noalign((__le32 *)(iv + 0));
> + ctx->input[7] = load_le32_noalign((__le32 *)(iv + 4));
The IV should already be aligned.
So please simply get rid of all the unaligned ops and add the
appropriate alignment marking on constants and the buffer on
the stack.
Thanks,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt