2020-08-27 17:39:07

by Uros Bizjak

[permalink] [raw]
Subject: [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl

x86_64 zero extends 32bit operations, so for 64bit operands,
XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
a REX prefix byte when legacy registers are used.

Signed-off-by: Uros Bizjak <[email protected]>
Cc: Herbert Xu <[email protected]>
Cc: "David S. Miller" <[email protected]>
---
arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
index 137edcf038cb..7d568012cc15 100644
--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
@@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
___
&declare_function("poly1305_init_x86_64", 32, 3);
$code.=<<___;
- xor %rax,%rax
+ xor %eax,%eax
mov %rax,0($ctx) # initialize hash value
mov %rax,8($ctx)
mov %rax,16($ctx)
@@ -2853,7 +2853,7 @@ $code.=<<___;
.type poly1305_init_base2_44,\@function,3
.align 32
poly1305_init_base2_44:
- xor %rax,%rax
+ xor %eax,%eax
mov %rax,0($ctx) # initialize hash value
mov %rax,8($ctx)
mov %rax,16($ctx)
@@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
mov \$16,$len
sub %r10,$len
xor %eax,%eax
- xor %r11,%r11
+ xor %r11d,%r11d
.Loop_dec_byte:
mov ($inp,$otp),%r11b
mov ($otp),%al
@@ -4085,7 +4085,7 @@ avx_handler:
.long 0xa548f3fc # cld; rep movsq

mov $disp,%rsi
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
+ xor %ecx,%ecx # arg1, UNW_FLAG_NHANDLER
mov 8(%rsi),%rdx # arg2, disp->ImageBase
mov 0(%rsi),%r8 # arg3, disp->ControlPc
mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
--
2.26.2


2020-09-01 19:16:53

by Jason A. Donenfeld

[permalink] [raw]
Subject: Re: [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl

Hi Uros,

Any benchmarks for this? Seems like it's all in initialization code,
right? I'm CC'ing Andy into this.

Jason

On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> x86_64 zero extends 32bit operations, so for 64bit operands,
> XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> a REX prefix byte when legacy registers are used.
>
> Signed-off-by: Uros Bizjak <[email protected]>
> Cc: Herbert Xu <[email protected]>
> Cc: "David S. Miller" <[email protected]>
> ---
> arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> index 137edcf038cb..7d568012cc15 100644
> --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> @@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
> ___
> &declare_function("poly1305_init_x86_64", 32, 3);
> $code.=<<___;
> - xor %rax,%rax
> + xor %eax,%eax
> mov %rax,0($ctx) # initialize hash value
> mov %rax,8($ctx)
> mov %rax,16($ctx)
> @@ -2853,7 +2853,7 @@ $code.=<<___;
> .type poly1305_init_base2_44,\@function,3
> .align 32
> poly1305_init_base2_44:
> - xor %rax,%rax
> + xor %eax,%eax
> mov %rax,0($ctx) # initialize hash value
> mov %rax,8($ctx)
> mov %rax,16($ctx)
> @@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
> mov \$16,$len
> sub %r10,$len
> xor %eax,%eax
> - xor %r11,%r11
> + xor %r11d,%r11d
> .Loop_dec_byte:
> mov ($inp,$otp),%r11b
> mov ($otp),%al
> @@ -4085,7 +4085,7 @@ avx_handler:
> .long 0xa548f3fc # cld; rep movsq
>
> mov $disp,%rsi
> - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
> + xor %ecx,%ecx # arg1, UNW_FLAG_NHANDLER
> mov 8(%rsi),%rdx # arg2, disp->ImageBase
> mov 0(%rsi),%r8 # arg3, disp->ControlPc
> mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
> --
> 2.26.2
>

--
Jason A. Donenfeld
Deep Space Explorer
fr: +33 6 51 90 82 66
us: +1 513 476 1200
http://www.jasondonenfeld.com
http://www.zx2c4.com
zx2c4.com/keys/AB9942E6D4A4CFC3412620A749FC7012A5DE03AE.asc

2020-09-02 05:53:05

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl

On Tue, Sep 1, 2020 at 9:16 PM Jason A. Donenfeld <[email protected]> wrote:
>
> Hi Uros,
>
> Any benchmarks for this? Seems like it's all in initialization code,
> right? I'm CC'ing Andy into this.

This patch should have no performance effect, it saves REX prefix byte
when the optimization is applied to legacy registers.

Uros.

> Jason
>
> On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> > x86_64 zero extends 32bit operations, so for 64bit operands,
> > XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> > a REX prefix byte when legacy registers are used.
> >
> > Signed-off-by: Uros Bizjak <[email protected]>
> > Cc: Herbert Xu <[email protected]>
> > Cc: "David S. Miller" <[email protected]>
> > ---
> > arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
> > 1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> > index 137edcf038cb..7d568012cc15 100644
> > --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> > +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> > @@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
> > ___
> > &declare_function("poly1305_init_x86_64", 32, 3);
> > $code.=<<___;
> > - xor %rax,%rax
> > + xor %eax,%eax
> > mov %rax,0($ctx) # initialize hash value
> > mov %rax,8($ctx)
> > mov %rax,16($ctx)
> > @@ -2853,7 +2853,7 @@ $code.=<<___;
> > .type poly1305_init_base2_44,\@function,3
> > .align 32
> > poly1305_init_base2_44:
> > - xor %rax,%rax
> > + xor %eax,%eax
> > mov %rax,0($ctx) # initialize hash value
> > mov %rax,8($ctx)
> > mov %rax,16($ctx)
> > @@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
> > mov \$16,$len
> > sub %r10,$len
> > xor %eax,%eax
> > - xor %r11,%r11
> > + xor %r11d,%r11d
> > .Loop_dec_byte:
> > mov ($inp,$otp),%r11b
> > mov ($otp),%al
> > @@ -4085,7 +4085,7 @@ avx_handler:
> > .long 0xa548f3fc # cld; rep movsq
> >
> > mov $disp,%rsi
> > - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
> > + xor %ecx,%ecx # arg1, UNW_FLAG_NHANDLER
> > mov 8(%rsi),%rdx # arg2, disp->ImageBase
> > mov 0(%rsi),%r8 # arg3, disp->ControlPc
> > mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
> > --
> > 2.26.2
> >
>
> --
> Jason A. Donenfeld
> Deep Space Explorer
> fr: +33 6 51 90 82 66
> us: +1 513 476 1200
> http://www.jasondonenfeld.com
> http://www.zx2c4.com
> zx2c4.com/keys/AB9942E6D4A4CFC3412620A749FC7012A5DE03AE.asc

2020-09-07 17:46:51

by Jason A. Donenfeld

[permalink] [raw]
Subject: Re: [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl

Hi Uros, Herbert,

On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> x86_64 zero extends 32bit operations, so for 64bit operands,
> XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> a REX prefix byte when legacy registers are used.
>
> Signed-off-by: Uros Bizjak <[email protected]>
> Cc: Herbert Xu <[email protected]>
> Cc: "David S. Miller" <[email protected]>
> ---
> arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> index 137edcf038cb..7d568012cc15 100644
> --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> @@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
> ___
> &declare_function("poly1305_init_x86_64", 32, 3);
> $code.=<<___;
> - xor %rax,%rax
> + xor %eax,%eax
> mov %rax,0($ctx) # initialize hash value
> mov %rax,8($ctx)
> mov %rax,16($ctx)
> @@ -2853,7 +2853,7 @@ $code.=<<___;
> .type poly1305_init_base2_44,\@function,3
> .align 32
> poly1305_init_base2_44:
> - xor %rax,%rax
> + xor %eax,%eax
> mov %rax,0($ctx) # initialize hash value
> mov %rax,8($ctx)
> mov %rax,16($ctx)
> @@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
> mov \$16,$len
> sub %r10,$len
> xor %eax,%eax
> - xor %r11,%r11
> + xor %r11d,%r11d
> .Loop_dec_byte:
> mov ($inp,$otp),%r11b
> mov ($otp),%al
> @@ -4085,7 +4085,7 @@ avx_handler:
> .long 0xa548f3fc # cld; rep movsq
>
> mov $disp,%rsi
> - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
> + xor %ecx,%ecx # arg1, UNW_FLAG_NHANDLER
> mov 8(%rsi),%rdx # arg2, disp->ImageBase
> mov 0(%rsi),%r8 # arg3, disp->ControlPc
> mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
> --
> 2.26.2
>

Per the discussion elsewhere,

Acked-by: Jason A. Donenfeld <[email protected]>

for cryptodev-2.6.git, rather than crypto-2.6.git

Thanks,
Jason

2020-09-11 06:56:49

by Herbert Xu

[permalink] [raw]
Subject: Re: [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl

On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> x86_64 zero extends 32bit operations, so for 64bit operands,
> XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> a REX prefix byte when legacy registers are used.
>
> Signed-off-by: Uros Bizjak <[email protected]>
> Cc: Herbert Xu <[email protected]>
> Cc: "David S. Miller" <[email protected]>
> ---
> arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)

Patch applied. Thanks.
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt