The label .Llast_fixup\@ is jumped to on page fault within the final
byte set loop of memset (on < MIPSR6 architectures). For some reason, in
this fault handler, the v1 register is randomly set to a2 & STORMASK.
This clobbers v1 for the calling function. This can be observed with the
following test code:
static int __init __attribute__((optimize("O0"))) test_clear_user(void)
{
register int t asm("v1");
char *test;
int j, k;
pr_info("\n\n\nTesting clear_user\n");
test = vmalloc(PAGE_SIZE);
for (j = 256; j < 512; j++) {
t = 0xa5a5a5a5;
if ((k = clear_user(test + PAGE_SIZE - 256, j)) != j - 256) {
pr_err("clear_user (%px %d) returned %d\n", test + PAGE_SIZE - 256, j, k);
}
if (t != 0xa5a5a5a5) {
pr_err("v1 was clobbered to 0x%x!\n", t);
}
}
return 0;
}
late_initcall(test_clear_user);
Which demonstrates that v1 is indeed clobbered (MIPS64):
Testing clear_user
v1 was clobbered to 0x1!
v1 was clobbered to 0x2!
v1 was clobbered to 0x3!
v1 was clobbered to 0x4!
v1 was clobbered to 0x5!
v1 was clobbered to 0x6!
v1 was clobbered to 0x7!
Since the number of bytes that could not be set is already contained in
a2, the andi placing a value in v1 is not necessary and actively
harmful in clobbering v1.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: [email protected]
Reported-by: James Hogan <[email protected]>
Signed-off-by: Matt Redfearn <[email protected]>
---
Changes in v2: None
arch/mips/lib/memset.S | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index 184819c1d5c8..f7327979a8f8 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -258,7 +258,7 @@
.Llast_fixup\@:
jr ra
- andi v1, a2, STORMASK
+ nop
.Lsmall_fixup\@:
PTR_SUBU a2, t1, a0
--
2.7.4
The micromips implementation of bzero additionally clobbers registers t7
& t8. Specify this in the clobbers list when invoking bzero.
Reported-by: James Hogan <[email protected]>
Fixes: 26c5e07d1478 ("MIPS: microMIPS: Optimise 'memset' core library function.")
Cc: [email protected]
Signed-off-by: Matt Redfearn <[email protected]>
---
Changes in v2: None
arch/mips/include/asm/uaccess.h | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index b71306947290..06629011a434 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -654,6 +654,13 @@ __clear_user(void __user *addr, __kernel_size_t size)
{
__kernel_size_t res;
+#ifdef CONFIG_CPU_MICROMIPS
+/* micromips memset / bzero also clobbers t7 & t8 */
+#define bzero_clobbers "$4", "$5", "$6", __UA_t0, __UA_t1, "$15", "$24", "$31"
+#else
+#define bzero_clobbers "$4", "$5", "$6", __UA_t0, __UA_t1, "$31"
+#endif /* CONFIG_CPU_MICROMIPS */
+
if (eva_kernel_access()) {
__asm__ __volatile__(
"move\t$4, %1\n\t"
@@ -663,7 +670,7 @@ __clear_user(void __user *addr, __kernel_size_t size)
"move\t%0, $6"
: "=r" (res)
: "r" (addr), "r" (size)
- : "$4", "$5", "$6", __UA_t0, __UA_t1, "$31");
+ : bzero_clobbers);
} else {
might_fault();
__asm__ __volatile__(
@@ -674,7 +681,7 @@ __clear_user(void __user *addr, __kernel_size_t size)
"move\t%0, $6"
: "=r" (res)
: "r" (addr), "r" (size)
- : "$4", "$5", "$6", __UA_t0, __UA_t1, "$31");
+ : bzero_clobbers);
}
return res;
--
2.7.4
Assembly language within the MIPS kernel conventionally indents
instructions which are in a branch delay slot to make them easier to
see. Commit 8483b14aaa81 ("MIPS: lib: memset: Whitespace fixes") rather
inexplicably removed all of these indentations from memset.S. Reinstate
the convention for all instructions in a branch delay slot. This
effectively reverts the above commit, plus other locations introduced
with MIPSR6 support.
Signed-off-by: Matt Redfearn <[email protected]>
---
Changes in v2:
- Rebase delay slot indentation on v3 of "MIPS: memset.S: Fix return of
__clear_user from Lpartial_fixup"
arch/mips/lib/memset.S | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index f7327979a8f8..1cc306520a55 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -95,7 +95,7 @@
sltiu t0, a2, STORSIZE /* very small region? */
bnez t0, .Lsmall_memset\@
- andi t0, a0, STORMASK /* aligned? */
+ andi t0, a0, STORMASK /* aligned? */
#ifdef CONFIG_CPU_MICROMIPS
move t8, a1 /* used by 'swp' instruction */
@@ -103,12 +103,12 @@
#endif
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
beqz t0, 1f
- PTR_SUBU t0, STORSIZE /* alignment in bytes */
+ PTR_SUBU t0, STORSIZE /* alignment in bytes */
#else
.set noat
li AT, STORSIZE
beqz t0, 1f
- PTR_SUBU t0, AT /* alignment in bytes */
+ PTR_SUBU t0, AT /* alignment in bytes */
.set at
#endif
@@ -149,7 +149,7 @@
1: ori t1, a2, 0x3f /* # of full blocks */
xori t1, 0x3f
beqz t1, .Lmemset_partial\@ /* no block to fill */
- andi t0, a2, 0x40-STORSIZE
+ andi t0, a2, 0x40-STORSIZE
PTR_ADDU t1, a0 /* end address */
.set reorder
@@ -174,7 +174,7 @@
.set at
#endif
jr t1
- PTR_ADDU a0, t0 /* dest ptr */
+ PTR_ADDU a0, t0 /* dest ptr */
.set push
.set noreorder
@@ -186,7 +186,7 @@
beqz a2, 1f
#ifndef CONFIG_CPU_MIPSR6
- PTR_ADDU a0, a2 /* What's left */
+ PTR_ADDU a0, a2 /* What's left */
R10KCBARRIER(0(ra))
#ifdef __MIPSEB__
EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
@@ -194,7 +194,7 @@
EX(LONG_S_L, a1, -1(a0), .Llast_fixup\@)
#endif
#else
- PTR_SUBU t0, $0, a2
+ PTR_SUBU t0, $0, a2
PTR_ADDIU t0, 1
STORE_BYTE(0)
STORE_BYTE(1)
@@ -210,11 +210,11 @@
0:
#endif
1: jr ra
- move a2, zero
+ move a2, zero
.Lsmall_memset\@:
beqz a2, 2f
- PTR_ADDU t1, a0, a2
+ PTR_ADDU t1, a0, a2
1: PTR_ADDIU a0, 1 /* fill bytewise */
R10KCBARRIER(0(ra))
@@ -222,7 +222,7 @@
EX(sb, a1, -1(a0), .Lsmall_fixup\@)
2: jr ra /* done */
- move a2, zero
+ move a2, zero
.if __memset == 1
END(memset)
.set __memset, 0
@@ -238,7 +238,7 @@
.Lfirst_fixup\@:
jr ra
- nop
+ nop
.Lfwd_fixup\@:
PTR_L t0, TI_TASK($28)
@@ -246,7 +246,7 @@
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, t1
jr ra
- LONG_SUBU a2, t0
+ LONG_SUBU a2, t0
.Lpartial_fixup\@:
PTR_L t0, TI_TASK($28)
@@ -254,7 +254,7 @@
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, a0
jr ra
- LONG_SUBU a2, t0
+ LONG_SUBU a2, t0
.Llast_fixup\@:
jr ra
@@ -278,7 +278,7 @@
LEAF(memset)
EXPORT_SYMBOL(memset)
beqz a1, 1f
- move v0, a0 /* result */
+ move v0, a0 /* result */
andi a1, 0xff /* spread fillword */
LONG_SLL t1, a1, 8
--
2.7.4
It is not immediately obvious what the expected inputs to these fault
handlers is and how they calculate the number of unset bytes. Having
stared deeply at this in order to fix some corner cases, add some
comments to addist those who follow.
Signed-off-by: Matt Redfearn <[email protected]>
---
Changes in v2:
- Add comments to fault handlers in new, separate, patch.
arch/mips/lib/memset.S | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index 1cc306520a55..a06dabe99d4b 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -231,16 +231,25 @@
#ifdef CONFIG_CPU_MIPSR6
.Lbyte_fixup\@:
+ /*
+ * unset_bytes = current_addr + 1
+ * a2 = t0 + 1
+ */
PTR_SUBU a2, $0, t0
jr ra
PTR_ADDIU a2, 1
#endif /* CONFIG_CPU_MIPSR6 */
.Lfirst_fixup\@:
+ /* unset_bytes already in a2 */
jr ra
nop
.Lfwd_fixup\@:
+ /*
+ * unset_bytes = partial_start_addr + #bytes - fault_addr
+ * a2 = t1 + (a2 & 3f) - $28->task->BUADDR
+ */
PTR_L t0, TI_TASK($28)
andi a2, 0x3f
LONG_L t0, THREAD_BUADDR(t0)
@@ -249,6 +258,10 @@
LONG_SUBU a2, t0
.Lpartial_fixup\@:
+ /*
+ * unset_bytes = partial_end_addr + #bytes - fault_addr
+ * a2 = a0 + (a2 & STORMASK) - $28->task->BUADDR
+ */
PTR_L t0, TI_TASK($28)
andi a2, STORMASK
LONG_L t0, THREAD_BUADDR(t0)
@@ -257,10 +270,15 @@
LONG_SUBU a2, t0
.Llast_fixup\@:
+ /* unset_bytes already in a2 */
jr ra
nop
.Lsmall_fixup\@:
+ /*
+ * unset_bytes = end_addr - current_addr + 1
+ * a2 = t1 - a0 + 1
+ */
PTR_SUBU a2, t1, a0
jr ra
PTR_ADDIU a2, 1
--
2.7.4
On Tue, Apr 17, 2018 at 04:40:00PM +0100, Matt Redfearn wrote:
> The label .Llast_fixup\@ is jumped to on page fault within the final
> byte set loop of memset (on < MIPSR6 architectures). For some reason, in
> this fault handler, the v1 register is randomly set to a2 & STORMASK.
> This clobbers v1 for the calling function. This can be observed with the
> following test code:
>
> static int __init __attribute__((optimize("O0"))) test_clear_user(void)
> {
> register int t asm("v1");
> char *test;
> int j, k;
>
> pr_info("\n\n\nTesting clear_user\n");
> test = vmalloc(PAGE_SIZE);
>
> for (j = 256; j < 512; j++) {
> t = 0xa5a5a5a5;
> if ((k = clear_user(test + PAGE_SIZE - 256, j)) != j - 256) {
> pr_err("clear_user (%px %d) returned %d\n", test + PAGE_SIZE - 256, j, k);
> }
> if (t != 0xa5a5a5a5) {
> pr_err("v1 was clobbered to 0x%x!\n", t);
> }
> }
>
> return 0;
> }
> late_initcall(test_clear_user);
>
> Which demonstrates that v1 is indeed clobbered (MIPS64):
>
> Testing clear_user
> v1 was clobbered to 0x1!
> v1 was clobbered to 0x2!
> v1 was clobbered to 0x3!
> v1 was clobbered to 0x4!
> v1 was clobbered to 0x5!
> v1 was clobbered to 0x6!
> v1 was clobbered to 0x7!
>
> Since the number of bytes that could not be set is already contained in
> a2, the andi placing a value in v1 is not necessary and actively
> harmful in clobbering v1.
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Cc: [email protected]
> Reported-by: James Hogan <[email protected]>
> Signed-off-by: Matt Redfearn <[email protected]>
Thanks, Patches 1 & 2 applied to my fixes branch.
Cheers
James
On Tue, Apr 17, 2018 at 04:40:03PM +0100, Matt Redfearn wrote:
> diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
> index 1cc306520a55..a06dabe99d4b 100644
> --- a/arch/mips/lib/memset.S
> +++ b/arch/mips/lib/memset.S
> @@ -231,16 +231,25 @@
>
> #ifdef CONFIG_CPU_MIPSR6
> .Lbyte_fixup\@:
> + /*
> + * unset_bytes = current_addr + 1
> + * a2 = t0 + 1
The code looks more like a2 = 1 - t0 to me:
> + */
> PTR_SUBU a2, $0, t0
> jr ra
> PTR_ADDIU a2, 1
I.e. t0 counts up to 1 and then stops.
Anyway I've applied patch 3 for 4.18.
Cheers
James
Hi James,
On 21/05/18 17:14, James Hogan wrote:
> On Tue, Apr 17, 2018 at 04:40:03PM +0100, Matt Redfearn wrote:
>> diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
>> index 1cc306520a55..a06dabe99d4b 100644
>> --- a/arch/mips/lib/memset.S
>> +++ b/arch/mips/lib/memset.S
>> @@ -231,16 +231,25 @@
>>
>> #ifdef CONFIG_CPU_MIPSR6
>> .Lbyte_fixup\@:
>> + /*
>> + * unset_bytes = current_addr + 1
>> + * a2 = t0 + 1
>
> The code looks more like a2 = 1 - t0 to me:
>
>> + */
>> PTR_SUBU a2, $0, t0
>> jr ra
>> PTR_ADDIU a2, 1
>
> I.e. t0 counts up to 1 and then stops.
Well spotted. Which means this code is also wrong.... :-/
We have the count of bytes to zero in a2, but that gets clobbered in the
exception handler and we always return a number of bytes uncopied within
STORSIZE.
This test code:
static int __init __attribute__((optimize("O0"))) test_clear_user(void)
{
int j, k;
pr_info("\n\n\nTesting clear_user\n");
for (j = 0; j < 512; j++) {
if ((k = clear_user(NULL+3, j)) != j) {
pr_err("clear_user (NULL %d) returned %d\n", j, k);
}
}
return 0;
}
late_initcall(test_clear_user);
on a 64r6el kernel results in:
[ 3.965439] Testing clear_user
[ 3.973169] clear_user (NULL 8) returned 6
[ 3.976782] clear_user (NULL 9) returned 6
[ 3.980390] clear_user (NULL 10) returned 6
[ 3.984052] clear_user (NULL 11) returned 6
[ 3.987524] clear_user (NULL 12) returned 6
[ 3.991179] clear_user (NULL 13) returned 6
[ 3.994841] clear_user (NULL 14) returned 6
[ 3.998500] clear_user (NULL 15) returned 6
[ 4.002160] clear_user (NULL 16) returned 6
[ 4.005820] clear_user (NULL 17) returned 6
[ 4.009480] clear_user (NULL 18) returned 6
[ 4.013140] clear_user (NULL 19) returned 6
[ 4.016797] clear_user (NULL 20) returned 6
[ 4.020456] clear_user (NULL 21) returned 6
I'll post another fix soon, and update this comment to reflect the
corrected behavior.
Thanks,
Matt
>
> Anyway I've applied patch 3 for 4.18.
>
> Cheers
> James
>