2017-08-02 00:26:34

by Megha Dey

[permalink] [raw]
Subject: [Patch V2] crypto: x86/sha1 : Fix reads beyond the number of blocks passed

It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
reading ahead beyond its intended data, and causing a crash if the next
block is beyond page boundary:
http://marc.info/?l=linux-crypto-vger&m=149373371023377

This patch makes sure that there is no overflow for any buffer length.

It passes the tests written by Jan Stancek that revealed this problem:
https://github.com/jstancek/sha1-avx2-crash

Jan, can you verify this fix?
Herbert, can you re-enable sha1-avx2 once Jan has checked it out and
revert commit b82ce24426a4071da9529d726057e4e642948667 ?

Originally-by: Ilya Albrekht <[email protected]>
Signed-off-by: Megha Dey <[email protected]>
Reported-by: Jan Stancek <[email protected]>
---
arch/x86/crypto/sha1_avx2_x86_64_asm.S | 67 ++++++++++++++++++----------------
1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1cd792d..1eab79c 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -117,11 +117,10 @@
.set T1, REG_T1
.endm

-#define K_BASE %r8
#define HASH_PTR %r9
+#define BLOCKS_CTR %r8
#define BUFFER_PTR %r10
#define BUFFER_PTR2 %r13
-#define BUFFER_END %r11

#define PRECALC_BUF %r14
#define WK_BUF %r15
@@ -205,14 +204,14 @@
* blended AVX2 and ALU instruction scheduling
* 1 vector iteration per 8 rounds
*/
- vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
+ vmovdqu (i * 2)(BUFFER_PTR), W_TMP
.elseif ((i & 7) == 1)
- vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
+ vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
WY_TMP, WY_TMP
.elseif ((i & 7) == 2)
vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
.elseif ((i & 7) == 4)
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
.elseif ((i & 7) == 7)
vmovdqu WY_TMP, PRECALC_WK(i&~7)

@@ -255,7 +254,7 @@
vpxor WY, WY_TMP, WY_TMP
.elseif ((i & 7) == 7)
vpxor WY_TMP2, WY_TMP, WY
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7)

PRECALC_ROTATE_WY
@@ -291,7 +290,7 @@
vpsrld $30, WY, WY
vpor WY, WY_TMP, WY
.elseif ((i & 7) == 7)
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7)

PRECALC_ROTATE_WY
@@ -446,6 +445,16 @@

.endm

+/* Add constant only if (%2 > %3) condition met (uses RTA as temp)
+ * %1 + %2 >= %3 ? %4 : 0
+ */
+.macro ADD_IF_GE a, b, c, d
+ mov \a, RTA
+ add $\d, RTA
+ cmp $\c, \b
+ cmovge RTA, \a
+.endm
+
/*
* macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
*/
@@ -463,13 +472,16 @@
lea (2*4*80+32)(%rsp), WK_BUF

# Precalc WK for first 2 blocks
- PRECALC_OFFSET = 0
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64
.set i, 0
.rept 160
PRECALC i
.set i, i + 1
.endr
- PRECALC_OFFSET = 128
+
+ /* Go to next block if needed */
+ ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
xchg WK_BUF, PRECALC_BUF

.align 32
@@ -479,8 +491,8 @@ _loop:
* we use K_BASE value as a signal of a last block,
* it is set below by: cmovae BUFFER_PTR, K_BASE
*/
- cmp K_BASE, BUFFER_PTR
- jne _begin
+ test BLOCKS_CTR, BLOCKS_CTR
+ jnz _begin
.align 32
jmp _end
.align 32
@@ -512,10 +524,10 @@ _loop0:
.set j, j+2
.endr

- add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */
- cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */
- cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
-
+ /* Update Counter */
+ sub $1, BLOCKS_CTR
+ /* Move to the next block only if needed*/
+ ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128
/*
* rounds
* 60,62,64,66,68
@@ -532,8 +544,8 @@ _loop0:
UPDATE_HASH 12(HASH_PTR), D
UPDATE_HASH 16(HASH_PTR), E

- cmp K_BASE, BUFFER_PTR /* is current block the last one? */
- je _loop
+ test BLOCKS_CTR, BLOCKS_CTR
+ jz _loop

mov TB, B

@@ -575,10 +587,10 @@ _loop2:
.set j, j+2
.endr

- add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */
-
- cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */
- cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
+ /* update counter */
+ sub $1, BLOCKS_CTR
+ /* Move to the next block only if needed*/
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128

jmp _loop3
_loop3:
@@ -641,19 +653,12 @@ _loop3:

avx2_zeroupper

- lea K_XMM_AR(%rip), K_BASE
-
+ /* Setup initial values */
mov CTX, HASH_PTR
mov BUF, BUFFER_PTR
- lea 64(BUF), BUFFER_PTR2
-
- shl $6, CNT /* mul by 64 */
- add BUF, CNT
- add $64, CNT
- mov CNT, BUFFER_END

- cmp BUFFER_END, BUFFER_PTR2
- cmovae K_BASE, BUFFER_PTR2
+ mov BUF, BUFFER_PTR2
+ mov CNT, BLOCKS_CTR

xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP

--
1.9.1


2017-08-02 02:30:07

by Herbert Xu

[permalink] [raw]
Subject: Re: [Patch V2] crypto: x86/sha1 : Fix reads beyond the number of blocks passed

On Tue, Aug 01, 2017 at 05:38:32PM -0700, Megha Dey wrote:
> It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
> reading ahead beyond its intended data, and causing a crash if the next
> block is beyond page boundary:
> http://marc.info/?l=linux-crypto-vger&m=149373371023377
>
> This patch makes sure that there is no overflow for any buffer length.
>
> It passes the tests written by Jan Stancek that revealed this problem:
> https://github.com/jstancek/sha1-avx2-crash
>
> Jan, can you verify this fix?
> Herbert, can you re-enable sha1-avx2 once Jan has checked it out and
> revert commit b82ce24426a4071da9529d726057e4e642948667 ?

Can you please include the hunk to actually reenable sha1-avx2
in your patch? Thanks!
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

2017-08-02 10:39:59

by Jan Stancek

[permalink] [raw]
Subject: Re: [Patch V2] crypto: x86/sha1 : Fix reads beyond the number of blocks passed

On 08/02/2017 02:38 AM, Megha Dey wrote:
> It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
> reading ahead beyond its intended data, and causing a crash if the next
> block is beyond page boundary:
> http://marc.info/?l=linux-crypto-vger&m=149373371023377
>
> This patch makes sure that there is no overflow for any buffer length.
>
> It passes the tests written by Jan Stancek that revealed this problem:
> https://github.com/jstancek/sha1-avx2-crash
>
> Jan, can you verify this fix?

Hi,

Looks good, my tests (below) PASS-ed.

I updated reproducer at [1] to try more than just single scenario. It
now tries thousands of combinations with different starting offset within
page and sizes of data chunks.

(without patch) v4.13-rc3-102-g26c5ceb with avx2 enabled:
[ 106.455175] sha_test module loaded
[ 106.460458] data is at 0xffff8c88e58f8000, page_after_data: 0xffff8c88e58fa000
[ 106.468625] sha_test testing hash: sha1-generic, maxlen: 8192
[ 127.091237] sha_test hash: sha1-generic calculated 1764032 hashes
[ 127.098038] sha_test testing hash: sha1-ni, maxlen: 8192
[ 127.108805] failed to alloc sha1-ni
[ 127.112703] sha_test testing hash: sha1-avx, maxlen: 8192
[ 141.166611] sha_test hash: sha1-avx calculated 1764032 hashes
[ 141.173023] sha_test testing hash: sha1-avx2, maxlen: 8192
[ 141.180046] BUG: unable to handle kernel paging request at ffff8c88e58fa000
[ 141.187817] IP: _begin+0x28/0x187
[ 141.191512] PGD 89c578067
[ 141.191513] P4D 89c578067
[ 141.194529] PUD c61f0a063
[ 141.197545] PMD c64cb1063
[ 141.200561] PTE 8000000c658fa062
[ 141.203577]
[ 141.208832] Oops: 0000 [#1] SMP
...

With patch "[Patch V2] crypto: x86/sha1 : Fix reads beyond the number of blocks passed":
[ 406.323781] sha_test module loaded
[ 406.329062] data is at 0xffff93ab97108000, page_after_data: 0xffff93ab9710a000
[ 406.337185] sha_test testing hash: sha1-generic, maxlen: 8192
[ 426.157242] sha_test hash: sha1-generic calculated 1764032 hashes
[ 426.164043] sha_test testing hash: sha1-ni, maxlen: 8192
[ 426.174244] failed to alloc sha1-ni
[ 426.178139] sha_test testing hash: sha1-avx, maxlen: 8192
[ 440.226240] sha_test hash: sha1-avx calculated 1764032 hashes
[ 440.232651] sha_test testing hash: sha1-avx2, maxlen: 8192
[ 452.497988] sha_test hash: sha1-avx2 calculated 1764032 hashes
[ 452.504495] sha_test testing hash: sha1-ssse3, maxlen: 8192
[ 467.319316] sha_test hash: sha1-ssse3 calculated 1764032 hashes
[ 467.325922] sha_test done
[ 471.827083] sha_test module unloaded

I also ran a test at [2], which is comparing hashes produced by
kernel with hashes produced by user-space utilities (e.g. 'sha1sum').
This test also PASS-ed.

Regards,
Jan

[1] https://github.com/jstancek/sha1-avx2-crash
[2] https://github.com/jstancek/kernel_sha_test/tree/sha1-avx2-4.13

> Herbert, can you re-enable sha1-avx2 once Jan has checked it out and
> revert commit b82ce24426a4071da9529d726057e4e642948667 ?
>
> Originally-by: Ilya Albrekht <[email protected]>
> Signed-off-by: Megha Dey <[email protected]>
> Reported-by: Jan Stancek <[email protected]>


2017-08-02 16:05:19

by Tim Chen

[permalink] [raw]
Subject: Re: [Patch V2] crypto: x86/sha1 : Fix reads beyond the number of blocks passed

On 08/02/2017 03:39 AM, Jan Stancek wrote:
> On 08/02/2017 02:38 AM, Megha Dey wrote:
>> It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
>> reading ahead beyond its intended data, and causing a crash if the next
>> block is beyond page boundary:
>> http://marc.info/?l=linux-crypto-vger&m=149373371023377
>>
>> This patch makes sure that there is no overflow for any buffer length.
>>
>> It passes the tests written by Jan Stancek that revealed this problem:
>> https://github.com/jstancek/sha1-avx2-crash
>>
>> Jan, can you verify this fix?
>
> Hi,
>
> Looks good, my tests (below) PASS-ed.
>
> I updated reproducer at [1] to try more than just single scenario. It
> now tries thousands of combinations with different starting offset within
> page and sizes of data chunks.
>
> (without patch) v4.13-rc3-102-g26c5ceb with avx2 enabled:
> [ 106.455175] sha_test module loaded
> [ 106.460458] data is at 0xffff8c88e58f8000, page_after_data: 0xffff8c88e58fa000
> [ 106.468625] sha_test testing hash: sha1-generic, maxlen: 8192
> [ 127.091237] sha_test hash: sha1-generic calculated 1764032 hashes
> [ 127.098038] sha_test testing hash: sha1-ni, maxlen: 8192
> [ 127.108805] failed to alloc sha1-ni
> [ 127.112703] sha_test testing hash: sha1-avx, maxlen: 8192
> [ 141.166611] sha_test hash: sha1-avx calculated 1764032 hashes
> [ 141.173023] sha_test testing hash: sha1-avx2, maxlen: 8192
> [ 141.180046] BUG: unable to handle kernel paging request at ffff8c88e58fa000
> [ 141.187817] IP: _begin+0x28/0x187
> [ 141.191512] PGD 89c578067
> [ 141.191513] P4D 89c578067
> [ 141.194529] PUD c61f0a063
> [ 141.197545] PMD c64cb1063
> [ 141.200561] PTE 8000000c658fa062
> [ 141.203577]
> [ 141.208832] Oops: 0000 [#1] SMP
> ...
>
> With patch "[Patch V2] crypto: x86/sha1 : Fix reads beyond the number of blocks passed":
> [ 406.323781] sha_test module loaded
> [ 406.329062] data is at 0xffff93ab97108000, page_after_data: 0xffff93ab9710a000
> [ 406.337185] sha_test testing hash: sha1-generic, maxlen: 8192
> [ 426.157242] sha_test hash: sha1-generic calculated 1764032 hashes
> [ 426.164043] sha_test testing hash: sha1-ni, maxlen: 8192
> [ 426.174244] failed to alloc sha1-ni
> [ 426.178139] sha_test testing hash: sha1-avx, maxlen: 8192
> [ 440.226240] sha_test hash: sha1-avx calculated 1764032 hashes
> [ 440.232651] sha_test testing hash: sha1-avx2, maxlen: 8192
> [ 452.497988] sha_test hash: sha1-avx2 calculated 1764032 hashes
> [ 452.504495] sha_test testing hash: sha1-ssse3, maxlen: 8192
> [ 467.319316] sha_test hash: sha1-ssse3 calculated 1764032 hashes
> [ 467.325922] sha_test done
> [ 471.827083] sha_test module unloaded
>
> I also ran a test at [2], which is comparing hashes produced by
> kernel with hashes produced by user-space utilities (e.g. 'sha1sum').
> This test also PASS-ed.

Great. Should the fix be picked up also in the stable branches since
it was disabled in the stable releases? Or the changes are too
much for stable?

Tim