2013-01-19 11:38:48

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 00/12] crypto: x86 assembler cleanups

Change x86 crypto implementations to use ENTRY()/ENDPROC() as linux/linkage.h
recommends use of ENDPROC() for assembler functions.

While at it, also change jump targets to local labels for some implementations.

---

Jussi Kivilinna (12):
crypto: x86/aes - assembler clean-ups: use ENTRY/ENDPROC, localize jump targets
crypto: aesni-intel - add ENDPROC statements for assembler functions
crypto: blowfish-x86_64: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
crypto: camellia-x86_64/aes-ni: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
crypto: cast5-avx: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
crypto: cast6-avx: use ENTRY()/ENDPROC() for assembler functions
crypto: x86/crc32c - assembler clean-up: use ENTRY/ENDPROC
crypto: x86/ghash - assembler clean-up: use ENDPROC at end of assember functions
crypto: x86/salsa20 - assembler cleanup, use ENTRY/ENDPROC for assember functions and rename ECRYPT_* to salsa20_*
crypto: x86/serpent - use ENTRY/ENDPROC for assember functions and localize jump targets
crypto: x86/sha1 - assembler clean-ups: use ENTRY/ENDPROC
crypto: x86/twofish - assembler clean-ups: use ENTRY/ENDPROC, localize jump labels


arch/x86/crypto/aes-i586-asm_32.S | 15 +++-----
arch/x86/crypto/aes-x86_64-asm_64.S | 30 ++++++++--------
arch/x86/crypto/aesni-intel_asm.S | 23 +++++++++++-
arch/x86/crypto/blowfish-x86_64-asm_64.S | 39 +++++++-------------
arch/x86/crypto/camellia-aesni-avx-asm_64.S | 38 +++++++-------------
arch/x86/crypto/camellia-x86_64-asm_64.S | 50 +++++++++++---------------
arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 48 +++++++++----------------
arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 35 ++++++------------
arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 8 +++-
arch/x86/crypto/ghash-clmulni-intel_asm.S | 4 ++
arch/x86/crypto/salsa20-i586-asm_32.S | 28 +++++++--------
arch/x86/crypto/salsa20-x86_64-asm_64.S | 28 +++++++--------
arch/x86/crypto/salsa20_glue.c | 5 ---
arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 35 ++++++------------
arch/x86/crypto/serpent-sse2-i586-asm_32.S | 20 ++++------
arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | 20 ++++------
arch/x86/crypto/sha1_ssse3_asm.S | 10 +++--
arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 35 ++++++------------
arch/x86/crypto/twofish-i586-asm_32.S | 11 +++---
arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 20 ++++------
arch/x86/crypto/twofish-x86_64-asm_64.S | 11 +++---
21 files changed, 219 insertions(+), 294 deletions(-)

--


2013-01-19 11:38:53

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 01/12] crypto: x86/aes - assembler clean-ups: use ENTRY/ENDPROC, localize jump targets

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/aes-i586-asm_32.S | 15 +++++----------
arch/x86/crypto/aes-x86_64-asm_64.S | 30 +++++++++++++++---------------
2 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
index b949ec2..2849dbc 100644
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -36,6 +36,7 @@
.file "aes-i586-asm.S"
.text

+#include <linux/linkage.h>
#include <asm/asm-offsets.h>

#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
@@ -219,14 +220,10 @@
// AES (Rijndael) Encryption Subroutine
/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */

-.global aes_enc_blk
-
.extern crypto_ft_tab
.extern crypto_fl_tab

-.align 4
-
-aes_enc_blk:
+ENTRY(aes_enc_blk)
push %ebp
mov ctx(%esp),%ebp

@@ -290,18 +287,15 @@ aes_enc_blk:
mov %r0,(%ebp)
pop %ebp
ret
+ENDPROC(aes_enc_blk)

// AES (Rijndael) Decryption Subroutine
/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */

-.global aes_dec_blk
-
.extern crypto_it_tab
.extern crypto_il_tab

-.align 4
-
-aes_dec_blk:
+ENTRY(aes_dec_blk)
push %ebp
mov ctx(%esp),%ebp

@@ -365,3 +359,4 @@ aes_dec_blk:
mov %r0,(%ebp)
pop %ebp
ret
+ENDPROC(aes_dec_blk)
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index 5b577d5..9105655 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -15,6 +15,7 @@

.text

+#include <linux/linkage.h>
#include <asm/asm-offsets.h>

#define R1 %rax
@@ -49,10 +50,8 @@
#define R11 %r11

#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
- .global FUNC; \
- .type FUNC,@function; \
- .align 8; \
-FUNC: movq r1,r2; \
+ ENTRY(FUNC); \
+ movq r1,r2; \
movq r3,r4; \
leaq KEY+48(r8),r9; \
movq r10,r11; \
@@ -71,14 +70,15 @@ FUNC: movq r1,r2; \
je B192; \
leaq 32(r9),r9;

-#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
+#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
movq r1,r2; \
movq r3,r4; \
movl r5 ## E,(r9); \
movl r6 ## E,4(r9); \
movl r7 ## E,8(r9); \
movl r8 ## E,12(r9); \
- ret;
+ ret; \
+ ENDPROC(FUNC);

#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
movzbl r2 ## H,r5 ## E; \
@@ -133,7 +133,7 @@ FUNC: movq r1,r2; \
#define entry(FUNC,KEY,B128,B192) \
prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)

-#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
+#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)

#define encrypt_round(TAB,OFFSET) \
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
@@ -151,12 +151,12 @@ FUNC: movq r1,r2; \

/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */

- entry(aes_enc_blk,0,enc128,enc192)
+ entry(aes_enc_blk,0,.Le128,.Le192)
encrypt_round(crypto_ft_tab,-96)
encrypt_round(crypto_ft_tab,-80)
-enc192: encrypt_round(crypto_ft_tab,-64)
+.Le192: encrypt_round(crypto_ft_tab,-64)
encrypt_round(crypto_ft_tab,-48)
-enc128: encrypt_round(crypto_ft_tab,-32)
+.Le128: encrypt_round(crypto_ft_tab,-32)
encrypt_round(crypto_ft_tab,-16)
encrypt_round(crypto_ft_tab, 0)
encrypt_round(crypto_ft_tab, 16)
@@ -166,16 +166,16 @@ enc128: encrypt_round(crypto_ft_tab,-32)
encrypt_round(crypto_ft_tab, 80)
encrypt_round(crypto_ft_tab, 96)
encrypt_final(crypto_fl_tab,112)
- return
+ return(aes_enc_blk)

/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */

- entry(aes_dec_blk,240,dec128,dec192)
+ entry(aes_dec_blk,240,.Ld128,.Ld192)
decrypt_round(crypto_it_tab,-96)
decrypt_round(crypto_it_tab,-80)
-dec192: decrypt_round(crypto_it_tab,-64)
+.Ld192: decrypt_round(crypto_it_tab,-64)
decrypt_round(crypto_it_tab,-48)
-dec128: decrypt_round(crypto_it_tab,-32)
+.Ld128: decrypt_round(crypto_it_tab,-32)
decrypt_round(crypto_it_tab,-16)
decrypt_round(crypto_it_tab, 0)
decrypt_round(crypto_it_tab, 16)
@@ -185,4 +185,4 @@ dec128: decrypt_round(crypto_it_tab,-32)
decrypt_round(crypto_it_tab, 80)
decrypt_round(crypto_it_tab, 96)
decrypt_final(crypto_il_tab,112)
- return
+ return(aes_dec_blk)

2013-01-19 11:39:03

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 03/12] crypto: blowfish-x86_64: use ENTRY()/ENDPROC() for assembler functions and localize jump targets

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/blowfish-x86_64-asm_64.S | 39 +++++++++++-------------------
1 file changed, 14 insertions(+), 25 deletions(-)

diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 391d245..246c670 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -20,6 +20,8 @@
*
*/

+#include <linux/linkage.h>
+
.file "blowfish-x86_64-asm.S"
.text

@@ -116,11 +118,7 @@
bswapq RX0; \
xorq RX0, (RIO);

-.align 8
-.global __blowfish_enc_blk
-.type __blowfish_enc_blk,@function;
-
-__blowfish_enc_blk:
+ENTRY(__blowfish_enc_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -148,19 +146,16 @@ __blowfish_enc_blk:

movq %r10, RIO;
test %cl, %cl;
- jnz __enc_xor;
+ jnz .L__enc_xor;

write_block();
ret;
-__enc_xor:
+.L__enc_xor:
xor_block();
ret;
+ENDPROC(__blowfish_enc_blk)

-.align 8
-.global blowfish_dec_blk
-.type blowfish_dec_blk,@function;
-
-blowfish_dec_blk:
+ENTRY(blowfish_dec_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -189,6 +184,7 @@ blowfish_dec_blk:
movq %r11, %rbp;

ret;
+ENDPROC(blowfish_dec_blk)

/**********************************************************************
4-way blowfish, four blocks parallel
@@ -300,11 +296,7 @@ blowfish_dec_blk:
bswapq RX3; \
xorq RX3, 24(RIO);

-.align 8
-.global __blowfish_enc_blk_4way
-.type __blowfish_enc_blk_4way,@function;
-
-__blowfish_enc_blk_4way:
+ENTRY(__blowfish_enc_blk_4way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -336,7 +328,7 @@ __blowfish_enc_blk_4way:
movq %r11, RIO;

test %bpl, %bpl;
- jnz __enc_xor4;
+ jnz .L__enc_xor4;

write_block4();

@@ -344,18 +336,15 @@ __blowfish_enc_blk_4way:
popq %rbp;
ret;

-__enc_xor4:
+.L__enc_xor4:
xor_block4();

popq %rbx;
popq %rbp;
ret;
+ENDPROC(__blowfish_enc_blk_4way)

-.align 8
-.global blowfish_dec_blk_4way
-.type blowfish_dec_blk_4way,@function;
-
-blowfish_dec_blk_4way:
+ENTRY(blowfish_dec_blk_4way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -387,4 +376,4 @@ blowfish_dec_blk_4way:
popq %rbp;

ret;
-
+ENDPROC(blowfish_dec_blk_4way)

2013-01-19 11:38:58

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 02/12] crypto: aesni-intel - add ENDPROC statements for assembler functions

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/aesni-intel_asm.S | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 3470624..04b7977 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1262,7 +1262,6 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
* poly = x^128 + x^127 + x^126 + x^121 + 1
*
*****************************************************************************/
-
ENTRY(aesni_gcm_dec)
push %r12
push %r13
@@ -1437,6 +1436,7 @@ _return_T_done_decrypt:
pop %r13
pop %r12
ret
+ENDPROC(aesni_gcm_dec)


/*****************************************************************************
@@ -1700,10 +1700,12 @@ _return_T_done_encrypt:
pop %r13
pop %r12
ret
+ENDPROC(aesni_gcm_enc)

#endif


+.align 4
_key_expansion_128:
_key_expansion_256a:
pshufd $0b11111111, %xmm1, %xmm1
@@ -1715,6 +1717,8 @@ _key_expansion_256a:
movaps %xmm0, (TKEYP)
add $0x10, TKEYP
ret
+ENDPROC(_key_expansion_128)
+ENDPROC(_key_expansion_256a)

.align 4
_key_expansion_192a:
@@ -1739,6 +1743,7 @@ _key_expansion_192a:
movaps %xmm1, 0x10(TKEYP)
add $0x20, TKEYP
ret
+ENDPROC(_key_expansion_192a)

.align 4
_key_expansion_192b:
@@ -1758,6 +1763,7 @@ _key_expansion_192b:
movaps %xmm0, (TKEYP)
add $0x10, TKEYP
ret
+ENDPROC(_key_expansion_192b)

.align 4
_key_expansion_256b:
@@ -1770,6 +1776,7 @@ _key_expansion_256b:
movaps %xmm2, (TKEYP)
add $0x10, TKEYP
ret
+ENDPROC(_key_expansion_256b)

/*
* int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -1882,6 +1889,7 @@ ENTRY(aesni_set_key)
popl KEYP
#endif
ret
+ENDPROC(aesni_set_key)

/*
* void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -1903,6 +1911,7 @@ ENTRY(aesni_enc)
popl KEYP
#endif
ret
+ENDPROC(aesni_enc)

/*
* _aesni_enc1: internal ABI
@@ -1960,6 +1969,7 @@ _aesni_enc1:
movaps 0x70(TKEYP), KEY
AESENCLAST KEY STATE
ret
+ENDPROC(_aesni_enc1)

/*
* _aesni_enc4: internal ABI
@@ -2068,6 +2078,7 @@ _aesni_enc4:
AESENCLAST KEY STATE3
AESENCLAST KEY STATE4
ret
+ENDPROC(_aesni_enc4)

/*
* void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -2090,6 +2101,7 @@ ENTRY(aesni_dec)
popl KEYP
#endif
ret
+ENDPROC(aesni_dec)

/*
* _aesni_dec1: internal ABI
@@ -2147,6 +2159,7 @@ _aesni_dec1:
movaps 0x70(TKEYP), KEY
AESDECLAST KEY STATE
ret
+ENDPROC(_aesni_dec1)

/*
* _aesni_dec4: internal ABI
@@ -2255,6 +2268,7 @@ _aesni_dec4:
AESDECLAST KEY STATE3
AESDECLAST KEY STATE4
ret
+ENDPROC(_aesni_dec4)

/*
* void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2312,6 +2326,7 @@ ENTRY(aesni_ecb_enc)
popl LEN
#endif
ret
+ENDPROC(aesni_ecb_enc)

/*
* void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2370,6 +2385,7 @@ ENTRY(aesni_ecb_dec)
popl LEN
#endif
ret
+ENDPROC(aesni_ecb_dec)

/*
* void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2411,6 +2427,7 @@ ENTRY(aesni_cbc_enc)
popl IVP
#endif
ret
+ENDPROC(aesni_cbc_enc)

/*
* void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2501,6 +2518,7 @@ ENTRY(aesni_cbc_dec)
popl IVP
#endif
ret
+ENDPROC(aesni_cbc_dec)

#ifdef __x86_64__
.align 16
@@ -2527,6 +2545,7 @@ _aesni_inc_init:
MOVQ_R64_XMM TCTR_LOW INC
MOVQ_R64_XMM CTR TCTR_LOW
ret
+ENDPROC(_aesni_inc_init)

/*
* _aesni_inc: internal ABI
@@ -2555,6 +2574,7 @@ _aesni_inc:
movaps CTR, IV
PSHUFB_XMM BSWAP_MASK IV
ret
+ENDPROC(_aesni_inc)

/*
* void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2615,4 +2635,5 @@ ENTRY(aesni_ctr_enc)
movups IV, (IVP)
.Lctr_enc_just_ret:
ret
+ENDPROC(aesni_ctr_enc)
#endif

2013-01-19 11:39:09

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 04/12] crypto: camellia-x86_64/aes-ni: use ENTRY()/ENDPROC() for assembler functions and localize jump targets

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/camellia-aesni-avx-asm_64.S | 38 ++++++++-------------
arch/x86/crypto/camellia-x86_64-asm_64.S | 50 ++++++++++++---------------
2 files changed, 36 insertions(+), 52 deletions(-)

diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index 2306d2e..cfc1634 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -15,6 +15,8 @@
* http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
*/

+#include <linux/linkage.h>
+
#define CAMELLIA_TABLE_BYTE_LEN 272

/* struct camellia_ctx: */
@@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
%rcx, (%r9));
ret;
+ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)

.align 8
roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
@@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
%xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
%rax, (%r9));
ret;
+ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)

/*
* IN/OUT:
@@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
.text

.align 8
-.type __camellia_enc_blk16,@function;
-
__camellia_enc_blk16:
/* input:
* %rdi: ctx, CTX
@@ -793,10 +795,9 @@ __camellia_enc_blk16:
%xmm15, %rax, %rcx, 24);

jmp .Lenc_done;
+ENDPROC(__camellia_enc_blk16)

.align 8
-.type __camellia_dec_blk16,@function;
-
__camellia_dec_blk16:
/* input:
* %rdi: ctx, CTX
@@ -877,12 +878,9 @@ __camellia_dec_blk16:
((key_table + (24) * 8) + 4)(CTX));

jmp .Ldec_max24;
+ENDPROC(__camellia_dec_blk16)

-.align 8
-.global camellia_ecb_enc_16way
-.type camellia_ecb_enc_16way,@function;
-
-camellia_ecb_enc_16way:
+ENTRY(camellia_ecb_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -903,12 +901,9 @@ camellia_ecb_enc_16way:
%xmm8, %rsi);

ret;
+ENDPROC(camellia_ecb_enc_16way)

-.align 8
-.global camellia_ecb_dec_16way
-.type camellia_ecb_dec_16way,@function;
-
-camellia_ecb_dec_16way:
+ENTRY(camellia_ecb_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -934,12 +929,9 @@ camellia_ecb_dec_16way:
%xmm8, %rsi);

ret;
+ENDPROC(camellia_ecb_dec_16way)

-.align 8
-.global camellia_cbc_dec_16way
-.type camellia_cbc_dec_16way,@function;
-
-camellia_cbc_dec_16way:
+ENTRY(camellia_cbc_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -986,6 +978,7 @@ camellia_cbc_dec_16way:
%xmm8, %rsi);

ret;
+ENDPROC(camellia_cbc_dec_16way)

#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
@@ -993,11 +986,7 @@ camellia_cbc_dec_16way:
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;

-.align 8
-.global camellia_ctr_16way
-.type camellia_ctr_16way,@function;
-
-camellia_ctr_16way:
+ENTRY(camellia_ctr_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -1100,3 +1089,4 @@ camellia_ctr_16way:
%xmm8, %rsi);

ret;
+ENDPROC(camellia_ctr_16way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 0b33743..310319c 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -20,6 +20,8 @@
*
*/

+#include <linux/linkage.h>
+
.file "camellia-x86_64-asm_64.S"
.text

@@ -188,10 +190,7 @@
bswapq RAB0; \
movq RAB0, 4*2(RIO);

-.global __camellia_enc_blk;
-.type __camellia_enc_blk,@function;
-
-__camellia_enc_blk:
+ENTRY(__camellia_enc_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -214,33 +213,31 @@ __camellia_enc_blk:
movl $24, RT1d; /* max */

cmpb $16, key_length(CTX);
- je __enc_done;
+ je .L__enc_done;

enc_fls(24);
enc_rounds(24);
movl $32, RT1d; /* max */

-__enc_done:
+.L__enc_done:
testb RXORbl, RXORbl;
movq RDST, RIO;

- jnz __enc_xor;
+ jnz .L__enc_xor;

enc_outunpack(mov, RT1);

movq RRBP, %rbp;
ret;

-__enc_xor:
+.L__enc_xor:
enc_outunpack(xor, RT1);

movq RRBP, %rbp;
ret;
+ENDPROC(__camellia_enc_blk)

-.global camellia_dec_blk;
-.type camellia_dec_blk,@function;
-
-camellia_dec_blk:
+ENTRY(camellia_dec_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -258,12 +255,12 @@ camellia_dec_blk:
dec_inpack(RT2);

cmpb $24, RT2bl;
- je __dec_rounds16;
+ je .L__dec_rounds16;

dec_rounds(24);
dec_fls(24);

-__dec_rounds16:
+.L__dec_rounds16:
dec_rounds(16);
dec_fls(16);
dec_rounds(8);
@@ -276,6 +273,7 @@ __dec_rounds16:

movq RRBP, %rbp;
ret;
+ENDPROC(camellia_dec_blk)

/**********************************************************************
2-way camellia
@@ -426,10 +424,7 @@ __dec_rounds16:
bswapq RAB1; \
movq RAB1, 12*2(RIO);

-.global __camellia_enc_blk_2way;
-.type __camellia_enc_blk_2way,@function;
-
-__camellia_enc_blk_2way:
+ENTRY(__camellia_enc_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -453,16 +448,16 @@ __camellia_enc_blk_2way:
movl $24, RT2d; /* max */

cmpb $16, key_length(CTX);
- je __enc2_done;
+ je .L__enc2_done;

enc_fls2(24);
enc_rounds2(24);
movl $32, RT2d; /* max */

-__enc2_done:
+.L__enc2_done:
test RXORbl, RXORbl;
movq RDST, RIO;
- jnz __enc2_xor;
+ jnz .L__enc2_xor;

enc_outunpack2(mov, RT2);

@@ -470,17 +465,15 @@ __enc2_done:
popq %rbx;
ret;

-__enc2_xor:
+.L__enc2_xor:
enc_outunpack2(xor, RT2);

movq RRBP, %rbp;
popq %rbx;
ret;
+ENDPROC(__camellia_enc_blk_2way)

-.global camellia_dec_blk_2way;
-.type camellia_dec_blk_2way,@function;
-
-camellia_dec_blk_2way:
+ENTRY(camellia_dec_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -499,12 +492,12 @@ camellia_dec_blk_2way:
dec_inpack2(RT2);

cmpb $24, RT2bl;
- je __dec2_rounds16;
+ je .L__dec2_rounds16;

dec_rounds2(24);
dec_fls2(24);

-__dec2_rounds16:
+.L__dec2_rounds16:
dec_rounds2(16);
dec_fls2(16);
dec_rounds2(8);
@@ -518,3 +511,4 @@ __dec2_rounds16:
movq RRBP, %rbp;
movq RXOR, %rbx;
ret;
+ENDPROC(camellia_dec_blk_2way)

2013-01-19 11:39:14

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 05/12] crypto: cast5-avx: use ENTRY()/ENDPROC() for assembler functions and localize jump targets

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 48 +++++++++++------------------
1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index 15b00ac..c35fd5d 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -23,6 +23,8 @@
*
*/

+#include <linux/linkage.h>
+
.file "cast5-avx-x86_64-asm_64.S"

.extern cast_s1
@@ -211,8 +213,6 @@
.text

.align 16
-.type __cast5_enc_blk16,@function;
-
__cast5_enc_blk16:
/* input:
* %rdi: ctx, CTX
@@ -263,14 +263,14 @@ __cast5_enc_blk16:

movzbl rr(CTX), %eax;
testl %eax, %eax;
- jnz __skip_enc;
+ jnz .L__skip_enc;

round(RL, RR, 12, 1);
round(RR, RL, 13, 2);
round(RL, RR, 14, 3);
round(RR, RL, 15, 1);

-__skip_enc:
+.L__skip_enc:
popq %rbx;
popq %rbp;

@@ -282,10 +282,9 @@ __skip_enc:
outunpack_blocks(RR4, RL4, RTMP, RX, RKM);

ret;
+ENDPROC(__cast5_enc_blk16)

.align 16
-.type __cast5_dec_blk16,@function;
-
__cast5_dec_blk16:
/* input:
* %rdi: ctx, CTX
@@ -323,14 +322,14 @@ __cast5_dec_blk16:

movzbl rr(CTX), %eax;
testl %eax, %eax;
- jnz __skip_dec;
+ jnz .L__skip_dec;

round(RL, RR, 15, 1);
round(RR, RL, 14, 3);
round(RL, RR, 13, 2);
round(RR, RL, 12, 1);

-__dec_tail:
+.L__dec_tail:
round(RL, RR, 11, 3);
round(RR, RL, 10, 2);
round(RL, RR, 9, 1);
@@ -355,15 +354,12 @@ __dec_tail:

ret;

-__skip_dec:
+.L__skip_dec:
vpsrldq $4, RKR, RKR;
- jmp __dec_tail;
+ jmp .L__dec_tail;
+ENDPROC(__cast5_dec_blk16)

-.align 16
-.global cast5_ecb_enc_16way
-.type cast5_ecb_enc_16way,@function;
-
-cast5_ecb_enc_16way:
+ENTRY(cast5_ecb_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -393,12 +389,9 @@ cast5_ecb_enc_16way:
vmovdqu RL4, (7*4*4)(%r11);

ret;
+ENDPROC(cast5_ecb_enc_16way)

-.align 16
-.global cast5_ecb_dec_16way
-.type cast5_ecb_dec_16way,@function;
-
-cast5_ecb_dec_16way:
+ENTRY(cast5_ecb_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -428,12 +421,9 @@ cast5_ecb_dec_16way:
vmovdqu RL4, (7*4*4)(%r11);

ret;
+ENDPROC(cast5_ecb_dec_16way)

-.align 16
-.global cast5_cbc_dec_16way
-.type cast5_cbc_dec_16way,@function;
-
-cast5_cbc_dec_16way:
+ENTRY(cast5_cbc_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -480,12 +470,9 @@ cast5_cbc_dec_16way:
popq %r12;

ret;
+ENDPROC(cast5_cbc_dec_16way)

-.align 16
-.global cast5_ctr_16way
-.type cast5_ctr_16way,@function;
-
-cast5_ctr_16way:
+ENTRY(cast5_ctr_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -556,3 +543,4 @@ cast5_ctr_16way:
popq %r12;

ret;
+ENDPROC(cast5_ctr_16way)

2013-01-19 11:39:18

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 06/12] crypto: cast6-avx: use ENTRY()/ENDPROC() for assembler functions

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 35 +++++++++--------------------
1 file changed, 11 insertions(+), 24 deletions(-)

diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 2569d0d..f93b610 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -23,6 +23,7 @@
*
*/

+#include <linux/linkage.h>
#include "glue_helper-asm-avx.S"

.file "cast6-avx-x86_64-asm_64.S"
@@ -250,8 +251,6 @@
.text

.align 8
-.type __cast6_enc_blk8,@function;
-
__cast6_enc_blk8:
/* input:
* %rdi: ctx, CTX
@@ -295,10 +294,9 @@ __cast6_enc_blk8:
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);

ret;
+ENDPROC(__cast6_enc_blk8)

.align 8
-.type __cast6_dec_blk8,@function;
-
__cast6_dec_blk8:
/* input:
* %rdi: ctx, CTX
@@ -341,12 +339,9 @@ __cast6_dec_blk8:
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);

ret;
+ENDPROC(__cast6_dec_blk8)

-.align 8
-.global cast6_ecb_enc_8way
-.type cast6_ecb_enc_8way,@function;
-
-cast6_ecb_enc_8way:
+ENTRY(cast6_ecb_enc_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -362,12 +357,9 @@ cast6_ecb_enc_8way:
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);

ret;
+ENDPROC(cast6_ecb_enc_8way)

-.align 8
-.global cast6_ecb_dec_8way
-.type cast6_ecb_dec_8way,@function;
-
-cast6_ecb_dec_8way:
+ENTRY(cast6_ecb_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -383,12 +375,9 @@ cast6_ecb_dec_8way:
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);

ret;
+ENDPROC(cast6_ecb_dec_8way)

-.align 8
-.global cast6_cbc_dec_8way
-.type cast6_cbc_dec_8way,@function;
-
-cast6_cbc_dec_8way:
+ENTRY(cast6_cbc_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -409,12 +398,9 @@ cast6_cbc_dec_8way:
popq %r12;

ret;
+ENDPROC(cast6_cbc_dec_8way)

-.align 8
-.global cast6_ctr_8way
-.type cast6_ctr_8way,@function;
-
-cast6_ctr_8way:
+ENTRY(cast6_ctr_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -437,3 +423,4 @@ cast6_ctr_8way:
popq %r12;

ret;
+ENDPROC(cast6_ctr_8way)

2013-01-19 11:39:23

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 07/12] crypto: x86/crc32c - assembler clean-up: use ENTRY/ENDPROC

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 93c6d39..cf1a7ec 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -42,6 +42,8 @@
* SOFTWARE.
*/

+#include <linux/linkage.h>
+
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction

.macro LABEL prefix n
@@ -68,8 +70,7 @@

# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);

-.global crc_pcl
-crc_pcl:
+ENTRY(crc_pcl)
#define bufp %rdi
#define bufp_dw %edi
#define bufp_w %di
@@ -323,6 +324,9 @@ JMPTBL_ENTRY %i
.noaltmacro
i=i+1
.endr
+
+ENDPROC(crc_pcl)
+
################################################################
## PCLMULQDQ tables
## Table is 128 entries x 2 quad words each

2013-01-19 11:39:28

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 08/12] crypto: x86/ghash - assembler clean-up: use ENDPROC at end of assember functions

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/ghash-clmulni-intel_asm.S | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 1eb7f90..586f41a 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -94,6 +94,7 @@ __clmul_gf128mul_ble:
pxor T2, T1
pxor T1, DATA
ret
+ENDPROC(__clmul_gf128mul_ble)

/* void clmul_ghash_mul(char *dst, const be128 *shash) */
ENTRY(clmul_ghash_mul)
@@ -105,6 +106,7 @@ ENTRY(clmul_ghash_mul)
PSHUFB_XMM BSWAP DATA
movups DATA, (%rdi)
ret
+ENDPROC(clmul_ghash_mul)

/*
* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
@@ -131,6 +133,7 @@ ENTRY(clmul_ghash_update)
movups DATA, (%rdi)
.Lupdate_just_ret:
ret
+ENDPROC(clmul_ghash_update)

/*
* void clmul_ghash_setkey(be128 *shash, const u8 *key);
@@ -155,3 +158,4 @@ ENTRY(clmul_ghash_setkey)
pxor %xmm1, %xmm0
movups %xmm0, (%rdi)
ret
+ENDPROC(clmul_ghash_setkey)

2013-01-19 11:39:33

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 09/12] crypto: x86/salsa20 - assembler cleanup, use ENTRY/ENDPROC for assember functions and rename ECRYPT_* to salsa20_*

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/salsa20-i586-asm_32.S | 28 ++++++++++++++--------------
arch/x86/crypto/salsa20-x86_64-asm_64.S | 28 +++++++++++++---------------
arch/x86/crypto/salsa20_glue.c | 5 -----
3 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
index 72eb306..329452b 100644
--- a/arch/x86/crypto/salsa20-i586-asm_32.S
+++ b/arch/x86/crypto/salsa20-i586-asm_32.S
@@ -2,11 +2,12 @@
# D. J. Bernstein
# Public domain.

-# enter ECRYPT_encrypt_bytes
+#include <linux/linkage.h>
+
.text
-.p2align 5
-.globl ECRYPT_encrypt_bytes
-ECRYPT_encrypt_bytes:
+
+# enter salsa20_encrypt_bytes
+ENTRY(salsa20_encrypt_bytes)
mov %esp,%eax
and $31,%eax
add $256,%eax
@@ -933,11 +934,10 @@ ECRYPT_encrypt_bytes:
add $64,%esi
# goto bytesatleast1
jmp ._bytesatleast1
-# enter ECRYPT_keysetup
-.text
-.p2align 5
-.globl ECRYPT_keysetup
-ECRYPT_keysetup:
+ENDPROC(salsa20_encrypt_bytes)
+
+# enter salsa20_keysetup
+ENTRY(salsa20_keysetup)
mov %esp,%eax
and $31,%eax
add $256,%eax
@@ -1060,11 +1060,10 @@ ECRYPT_keysetup:
# leave
add %eax,%esp
ret
-# enter ECRYPT_ivsetup
-.text
-.p2align 5
-.globl ECRYPT_ivsetup
-ECRYPT_ivsetup:
+ENDPROC(salsa20_keysetup)
+
+# enter salsa20_ivsetup
+ENTRY(salsa20_ivsetup)
mov %esp,%eax
and $31,%eax
add $256,%eax
@@ -1112,3 +1111,4 @@ ECRYPT_ivsetup:
# leave
add %eax,%esp
ret
+ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
index 6214a9b..9279e0b 100644
--- a/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S
@@ -1,8 +1,7 @@
-# enter ECRYPT_encrypt_bytes
-.text
-.p2align 5
-.globl ECRYPT_encrypt_bytes
-ECRYPT_encrypt_bytes:
+#include <linux/linkage.h>
+
+# enter salsa20_encrypt_bytes
+ENTRY(salsa20_encrypt_bytes)
mov %rsp,%r11
and $31,%r11
add $256,%r11
@@ -802,11 +801,10 @@ ECRYPT_encrypt_bytes:
# comment:fp stack unchanged by jump
# goto bytesatleast1
jmp ._bytesatleast1
-# enter ECRYPT_keysetup
-.text
-.p2align 5
-.globl ECRYPT_keysetup
-ECRYPT_keysetup:
+ENDPROC(salsa20_encrypt_bytes)
+
+# enter salsa20_keysetup
+ENTRY(salsa20_keysetup)
mov %rsp,%r11
and $31,%r11
add $256,%r11
@@ -892,11 +890,10 @@ ECRYPT_keysetup:
mov %rdi,%rax
mov %rsi,%rdx
ret
-# enter ECRYPT_ivsetup
-.text
-.p2align 5
-.globl ECRYPT_ivsetup
-ECRYPT_ivsetup:
+ENDPROC(salsa20_keysetup)
+
+# enter salsa20_ivsetup
+ENTRY(salsa20_ivsetup)
mov %rsp,%r11
and $31,%r11
add $256,%r11
@@ -918,3 +915,4 @@ ECRYPT_ivsetup:
mov %rdi,%rax
mov %rsi,%rdx
ret
+ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index a3a3c02..5e8e677 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -26,11 +26,6 @@
#define SALSA20_MIN_KEY_SIZE 16U
#define SALSA20_MAX_KEY_SIZE 32U

-// use the ECRYPT_* function names
-#define salsa20_keysetup ECRYPT_keysetup
-#define salsa20_ivsetup ECRYPT_ivsetup
-#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes
-
struct salsa20_ctx
{
u32 input[16];

2013-01-19 11:39:40

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 10/12] crypto: x86/serpent - use ENTRY/ENDPROC for assember functions and localize jump targets

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 35 ++++++++------------------
arch/x86/crypto/serpent-sse2-i586-asm_32.S | 20 ++++++---------
arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | 20 ++++++---------
3 files changed, 27 insertions(+), 48 deletions(-)

diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index 02b0e9f..43c9386 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -24,6 +24,7 @@
*
*/

+#include <linux/linkage.h>
#include "glue_helper-asm-avx.S"

.file "serpent-avx-x86_64-asm_64.S"
@@ -566,8 +567,6 @@
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)

.align 8
-.type __serpent_enc_blk8_avx,@function;
-
__serpent_enc_blk8_avx:
/* input:
* %rdi: ctx, CTX
@@ -619,10 +618,9 @@ __serpent_enc_blk8_avx:
write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);

ret;
+ENDPROC(__serpent_enc_blk8_avx)

.align 8
-.type __serpent_dec_blk8_avx,@function;
-
__serpent_dec_blk8_avx:
/* input:
* %rdi: ctx, CTX
@@ -674,12 +672,9 @@ __serpent_dec_blk8_avx:
write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);

ret;
+ENDPROC(__serpent_dec_blk8_avx)

-.align 8
-.global serpent_ecb_enc_8way_avx
-.type serpent_ecb_enc_8way_avx,@function;
-
-serpent_ecb_enc_8way_avx:
+ENTRY(serpent_ecb_enc_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -693,12 +688,9 @@ serpent_ecb_enc_8way_avx:
store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);

ret;
+ENDPROC(serpent_ecb_enc_8way_avx)

-.align 8
-.global serpent_ecb_dec_8way_avx
-.type serpent_ecb_dec_8way_avx,@function;
-
-serpent_ecb_dec_8way_avx:
+ENTRY(serpent_ecb_dec_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -712,12 +704,9 @@ serpent_ecb_dec_8way_avx:
store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);

ret;
+ENDPROC(serpent_ecb_dec_8way_avx)

-.align 8
-.global serpent_cbc_dec_8way_avx
-.type serpent_cbc_dec_8way_avx,@function;
-
-serpent_cbc_dec_8way_avx:
+ENTRY(serpent_cbc_dec_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -731,12 +720,9 @@ serpent_cbc_dec_8way_avx:
store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);

ret;
+ENDPROC(serpent_cbc_dec_8way_avx)

-.align 8
-.global serpent_ctr_8way_avx
-.type serpent_ctr_8way_avx,@function;
-
-serpent_ctr_8way_avx:
+ENTRY(serpent_ctr_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -752,3 +738,4 @@ serpent_ctr_8way_avx:
store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);

ret;
+ENDPROC(serpent_ctr_8way_avx)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index c00053d..d348f15 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -24,6 +24,8 @@
*
*/

+#include <linux/linkage.h>
+
.file "serpent-sse2-i586-asm_32.S"
.text

@@ -510,11 +512,7 @@
pxor t0, x3; \
movdqu x3, (3*4*4)(out);

-.align 8
-.global __serpent_enc_blk_4way
-.type __serpent_enc_blk_4way,@function;
-
-__serpent_enc_blk_4way:
+ENTRY(__serpent_enc_blk_4way)
/* input:
* arg_ctx(%esp): ctx, CTX
* arg_dst(%esp): dst
@@ -566,22 +564,19 @@ __serpent_enc_blk_4way:
movl arg_dst(%esp), %eax;

cmpb $0, arg_xor(%esp);
- jnz __enc_xor4;
+ jnz .L__enc_xor4;

write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);

ret;

-__enc_xor4:
+.L__enc_xor4:
xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);

ret;
+ENDPROC(__serpent_enc_blk_4way)

-.align 8
-.global serpent_dec_blk_4way
-.type serpent_dec_blk_4way,@function;
-
-serpent_dec_blk_4way:
+ENTRY(serpent_dec_blk_4way)
/* input:
* arg_ctx(%esp): ctx, CTX
* arg_dst(%esp): dst
@@ -633,3 +628,4 @@ serpent_dec_blk_4way:
write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);

ret;
+ENDPROC(serpent_dec_blk_4way)
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index 3ee1ff0..acc066c 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -24,6 +24,8 @@
*
*/

+#include <linux/linkage.h>
+
.file "serpent-sse2-x86_64-asm_64.S"
.text

@@ -632,11 +634,7 @@
pxor t0, x3; \
movdqu x3, (3*4*4)(out);

-.align 8
-.global __serpent_enc_blk_8way
-.type __serpent_enc_blk_8way,@function;
-
-__serpent_enc_blk_8way:
+ENTRY(__serpent_enc_blk_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -687,24 +685,21 @@ __serpent_enc_blk_8way:
leaq (4*4*4)(%rsi), %rax;

testb %cl, %cl;
- jnz __enc_xor8;
+ jnz .L__enc_xor8;

write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);

ret;

-__enc_xor8:
+.L__enc_xor8:
xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);

ret;
+ENDPROC(__serpent_enc_blk_8way)

-.align 8
-.global serpent_dec_blk_8way
-.type serpent_dec_blk_8way,@function;
-
-serpent_dec_blk_8way:
+ENTRY(serpent_dec_blk_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -756,3 +751,4 @@ serpent_dec_blk_8way:
write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);

ret;
+ENDPROC(serpent_dec_blk_8way)

2013-01-19 11:39:49

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 12/12] crypto: x86/twofish - assembler clean-ups: use ENTRY/ENDPROC, localize jump labels

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 35 ++++++++------------------
arch/x86/crypto/twofish-i586-asm_32.S | 11 ++++----
arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 20 ++++++---------
arch/x86/crypto/twofish-x86_64-asm_64.S | 11 ++++----
4 files changed, 29 insertions(+), 48 deletions(-)

diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index ebac16b..8d3e113 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -23,6 +23,7 @@
*
*/

+#include <linux/linkage.h>
#include "glue_helper-asm-avx.S"

.file "twofish-avx-x86_64-asm_64.S"
@@ -243,8 +244,6 @@
vpxor x3, wkey, x3;

.align 8
-.type __twofish_enc_blk8,@function;
-
__twofish_enc_blk8:
/* input:
* %rdi: ctx, CTX
@@ -284,10 +283,9 @@ __twofish_enc_blk8:
outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);

ret;
+ENDPROC(__twofish_enc_blk8)

.align 8
-.type __twofish_dec_blk8,@function;
-
__twofish_dec_blk8:
/* input:
* %rdi: ctx, CTX
@@ -325,12 +323,9 @@ __twofish_dec_blk8:
outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);

ret;
+ENDPROC(__twofish_dec_blk8)

-.align 8
-.global twofish_ecb_enc_8way
-.type twofish_ecb_enc_8way,@function;
-
-twofish_ecb_enc_8way:
+ENTRY(twofish_ecb_enc_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -346,12 +341,9 @@ twofish_ecb_enc_8way:
store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);

ret;
+ENDPROC(twofish_ecb_enc_8way)

-.align 8
-.global twofish_ecb_dec_8way
-.type twofish_ecb_dec_8way,@function;
-
-twofish_ecb_dec_8way:
+ENTRY(twofish_ecb_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -367,12 +359,9 @@ twofish_ecb_dec_8way:
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);

ret;
+ENDPROC(twofish_ecb_dec_8way)

-.align 8
-.global twofish_cbc_dec_8way
-.type twofish_cbc_dec_8way,@function;
-
-twofish_cbc_dec_8way:
+ENTRY(twofish_cbc_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -393,12 +382,9 @@ twofish_cbc_dec_8way:
popq %r12;

ret;
+ENDPROC(twofish_cbc_dec_8way)

-.align 8
-.global twofish_ctr_8way
-.type twofish_ctr_8way,@function;
-
-twofish_ctr_8way:
+ENTRY(twofish_ctr_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -421,3 +407,4 @@ twofish_ctr_8way:
popq %r12;

ret;
+ENDPROC(twofish_ctr_8way)
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 658af4b..694ea45 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -20,6 +20,7 @@
.file "twofish-i586-asm.S"
.text

+#include <linux/linkage.h>
#include <asm/asm-offsets.h>

/* return address at 0 */
@@ -219,11 +220,7 @@
xor %esi, d ## D;\
ror $1, d ## D;

-.align 4
-.global twofish_enc_blk
-.global twofish_dec_blk
-
-twofish_enc_blk:
+ENTRY(twofish_enc_blk)
push %ebp /* save registers according to calling convention*/
push %ebx
push %esi
@@ -277,8 +274,9 @@ twofish_enc_blk:
pop %ebp
mov $1, %eax
ret
+ENDPROC(twofish_enc_blk)

-twofish_dec_blk:
+ENTRY(twofish_dec_blk)
push %ebp /* save registers according to calling convention*/
push %ebx
push %esi
@@ -333,3 +331,4 @@ twofish_dec_blk:
pop %ebp
mov $1, %eax
ret
+ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index 5b012a2..1c3b7ce 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -20,6 +20,8 @@
*
*/

+#include <linux/linkage.h>
+
.file "twofish-x86_64-asm-3way.S"
.text

@@ -214,11 +216,7 @@
rorq $32, RAB2; \
outunpack3(mov, RIO, 2, RAB, 2);

-.align 8
-.global __twofish_enc_blk_3way
-.type __twofish_enc_blk_3way,@function;
-
-__twofish_enc_blk_3way:
+ENTRY(__twofish_enc_blk_3way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -250,7 +248,7 @@ __twofish_enc_blk_3way:
popq %rbp; /* bool xor */

testb %bpl, %bpl;
- jnz __enc_xor3;
+ jnz .L__enc_xor3;

outunpack_enc3(mov);

@@ -262,7 +260,7 @@ __twofish_enc_blk_3way:
popq %r15;
ret;

-__enc_xor3:
+.L__enc_xor3:
outunpack_enc3(xor);

popq %rbx;
@@ -272,11 +270,9 @@ __enc_xor3:
popq %r14;
popq %r15;
ret;
+ENDPROC(__twofish_enc_blk_3way)

-.global twofish_dec_blk_3way
-.type twofish_dec_blk_3way,@function;
-
-twofish_dec_blk_3way:
+ENTRY(twofish_dec_blk_3way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -313,4 +309,4 @@ twofish_dec_blk_3way:
popq %r14;
popq %r15;
ret;
-
+ENDPROC(twofish_dec_blk_3way)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index 7bcf3fc..a039d21 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -20,6 +20,7 @@
.file "twofish-x86_64-asm.S"
.text

+#include <linux/linkage.h>
#include <asm/asm-offsets.h>

#define a_offset 0
@@ -214,11 +215,7 @@
xor %r8d, d ## D;\
ror $1, d ## D;

-.align 8
-.global twofish_enc_blk
-.global twofish_dec_blk
-
-twofish_enc_blk:
+ENTRY(twofish_enc_blk)
pushq R1

/* %rdi contains the ctx address */
@@ -269,8 +266,9 @@ twofish_enc_blk:
popq R1
movq $1,%rax
ret
+ENDPROC(twofish_enc_blk)

-twofish_dec_blk:
+ENTRY(twofish_dec_blk)
pushq R1

/* %rdi contains the ctx address */
@@ -320,3 +318,4 @@ twofish_dec_blk:
popq R1
movq $1,%rax
ret
+ENDPROC(twofish_dec_blk)

2013-01-19 11:39:44

by Jussi Kivilinna

[permalink] [raw]
Subject: [PATCH 11/12] crypto: x86/sha1 - assembler clean-ups: use ENTRY/ENDPROC

Signed-off-by: Jussi Kivilinna <[email protected]>
---
arch/x86/crypto/sha1_ssse3_asm.S | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index 49d6987..a410950 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -28,6 +28,8 @@
* (at your option) any later version.
*/

+#include <linux/linkage.h>
+
#define CTX %rdi // arg1
#define BUF %rsi // arg2
#define CNT %rdx // arg3
@@ -69,10 +71,8 @@
* param: function's name
*/
.macro SHA1_VECTOR_ASM name
- .global \name
- .type \name, @function
- .align 32
-\name:
+ ENTRY(\name)
+
push %rbx
push %rbp
push %r12
@@ -106,7 +106,7 @@
pop %rbx
ret

- .size \name, .-\name
+ ENDPROC(\name)
.endm

/*

2013-01-19 15:39:52

by David Miller

[permalink] [raw]
Subject: Re: [PATCH 00/12] crypto: x86 assembler cleanups

From: Jussi Kivilinna <[email protected]>
Date: Sat, 19 Jan 2013 13:38:45 +0200

> Change x86 crypto implementations to use ENTRY()/ENDPROC() as linux/linkage.h
> recommends use of ENDPROC() for assembler functions.
>
> While at it, also change jump targets to local labels for some implementations.

For series:

Acked-by: David S. Miller <[email protected]>

2013-01-20 00:13:52

by Herbert Xu

[permalink] [raw]
Subject: Re: [PATCH 00/12] crypto: x86 assembler cleanups

On Sat, Jan 19, 2013 at 10:39:50AM -0500, David Miller wrote:
> From: Jussi Kivilinna <[email protected]>
> Date: Sat, 19 Jan 2013 13:38:45 +0200
>
> > Change x86 crypto implementations to use ENTRY()/ENDPROC() as linux/linkage.h
> > recommends use of ENDPROC() for assembler functions.
> >
> > While at it, also change jump targets to local labels for some implementations.
>
> For series:
>
> Acked-by: David S. Miller <[email protected]>

All applied. Thanks!
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt