2020-06-30 21:00:34

by Oliver Swede

[permalink] [raw]
Subject: [PATCH v4 14/14] arm64: Improve accuracy of fixup for UAO cases

This accounts for variations in the number of bytes copied to the
destination buffer that could result from the substitution of
STP instructions with 2x unprivileged STTR variants if UAO is
supported and enabled.

Rather than duplicating the store fixups with the modifications,
the relevant alternatives are inserted in-line.

Signed-off-by: Oliver Swede <[email protected]>
---
arch/arm64/lib/copy_user_fixup.S | 47 ++++++++++++++++++++++++++++----
1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/lib/copy_user_fixup.S b/arch/arm64/lib/copy_user_fixup.S
index 37ca3d99a02a..2d413f9ba5d3 100644
--- a/arch/arm64/lib/copy_user_fixup.S
+++ b/arch/arm64/lib/copy_user_fixup.S
@@ -205,7 +205,12 @@ addr .req x15
/* 32 < count < 128 -> count - ((addr-dst)&15) */
cmp count, 128
sub x0, addr, dst // relative fault offset
+ /* fault offset within dest. buffer */
+ alternative_if ARM64_HAS_UAO
+ bic x0, x0, 7 // stp subst. for 2x sttr
+ alternative_else
bic x0, x0, 15 // bytes already copied (steps of 16B stores)
+ alternative_endif
sub x0, count, x0 // bytes yet to copy
b.le L(end_fixup)
/* 128 < count -> count */
@@ -265,7 +270,12 @@ addr .req x15
sub tmp1, count, tmp1 // remaining bytes after non-overlapping section
sub x0, dstend, 64
sub x0, addr, x0
- bic x0, x0, 15 // fault offset within dest. buffer
+ /* fault offset within dest. buffer */
+ alternative_if ARM64_HAS_UAO
+ bic x0, x0, 7 // stp subst. for 2x sttr
+ alternative_else
+ bic x0, x0, 15 // bytes already copied (steps of 16B stores)
+ alternative_endif
add x0, dstend, x0
sub x0, x0, 64
sub x0, dstend, x0 // remaining bytes in final (overlapping) 64B
@@ -295,7 +305,12 @@ addr .req x15
*/
sub tmp1, dstend, 32
sub tmp1, addr, tmp1
- bic tmp1, tmp1, 15
+ /* fault offset */
+ alternative_if ARM64_HAS_UAO
+ bic tmp1, tmp1, 7 // stp subst. for 2x sttr
+ alternative_else
+ bic tmp1, tmp1, 15 // bytes already copied (steps of 16B stores)
+ alternative_endif
mov x0, 32
sub tmp1, x0, tmp1
sub x0, count, 32
@@ -309,7 +324,12 @@ addr .req x15
*/
sub tmp1, dstend, 32
sub tmp1, addr, tmp1
- bic tmp1, tmp1, 15
+ /* fault offset */
+ alternative_if ARM64_HAS_UAO
+ bic tmp1, tmp1, 7 // stp subst. for 2x sttr
+ alternative_else
+ bic tmp1, tmp1, 15 // bytes already copied (steps of 16B stores)
+ alternative_endif
mov x0, 32
sub tmp1, x0, tmp1
sub x0, count, 64
@@ -324,7 +344,12 @@ addr .req x15
*/
sub tmp1, dstend, 64
sub tmp1, addr, tmp1
- bic tmp1, tmp1, 15
+ /* fault offset */
+ alternative_if ARM64_HAS_UAO
+ bic tmp1, tmp1, 7 // stp subst. for 2x sttr
+ alternative_else
+ bic tmp1, tmp1, 15 // bytes already copied (steps of 16B stores)
+ alternative_endif
mov x0, 64
sub tmp1, x0, tmp1
cmp count, 128
@@ -378,10 +403,20 @@ addr .req x15
/* Take the min from {16,(fault_addr&15)-(dst&15)}
* and subtract from count to obtain the return value */
bic tmp1, dst, 15 // aligned dst
- bic x0, addr, 15
+ /* fault offset */
+ alternative_if ARM64_HAS_UAO
+ bic x0, addr, 7 // stp subst. for 2x sttr
+ alternative_else
+ bic x0, addr, 15 // bytes already copied (steps of 16B stores)
+ alternative_endif
sub x0, x0, tmp1 // relative fault offset
cmp x0, 16
- bic x0, addr, 15
+ /* fault offset */
+ alternative_if ARM64_HAS_UAO
+ bic x0, addr, 7 // stp subst. for 2x sttr
+ alternative_else
+ bic x0, addr, 15 // bytes already copied (steps of 16B stores)
+ alternative_endif
sub x0, x0, dst
sub x0, count, x0
b.gt L(end_fixup)
--
2.17.1