2020-04-15 22:47:32

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH] powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()

At the time being, __put_user()/__get_user() and friends only use
register indirect with immediate index addressing, with the index
set to 0. Ex:

lwz reg1, 0(reg2)

Give the compiler the opportunity to use other adressing modes
whenever possible, to get more optimised code.

Hereunder is a small exemple:

struct test {
u32 item1;
u16 item2;
u8 item3;
u64 item4;
};

int set_test_user(struct test __user *from, struct test __user *to, int idx)
{
int err;
u32 item1;
u16 item2;
u8 item3;
u64 item4;

err = __get_user(item1, &from->item1);
err |= __get_user(item2, &from->item2);
err |= __get_user(item3, &from->item3);
err |= __get_user(item4, &from->item4);

err |= __put_user(item1, &to->item1);
err |= __put_user(item2, &to->item2);
err |= __put_user(item3, &to->item3);
err |= __put_user(item4, &to->item4);

return err;
}

Before the patch:

00000df0 <set_test_user>:
df0: 94 21 ff f0 stwu r1,-16(r1)
df4: 39 40 00 00 li r10,0
df8: 93 c1 00 08 stw r30,8(r1)
dfc: 93 e1 00 0c stw r31,12(r1)
e00: 7d 49 53 78 mr r9,r10
e04: 80 a3 00 00 lwz r5,0(r3)
e08: 38 e3 00 04 addi r7,r3,4
e0c: 7d 46 53 78 mr r6,r10
e10: a0 e7 00 00 lhz r7,0(r7)
e14: 7d 29 33 78 or r9,r9,r6
e18: 39 03 00 06 addi r8,r3,6
e1c: 7d 46 53 78 mr r6,r10
e20: 89 08 00 00 lbz r8,0(r8)
e24: 7d 29 33 78 or r9,r9,r6
e28: 38 63 00 08 addi r3,r3,8
e2c: 7d 46 53 78 mr r6,r10
e30: 83 c3 00 00 lwz r30,0(r3)
e34: 83 e3 00 04 lwz r31,4(r3)
e38: 7d 29 33 78 or r9,r9,r6
e3c: 7d 43 53 78 mr r3,r10
e40: 90 a4 00 00 stw r5,0(r4)
e44: 7d 29 1b 78 or r9,r9,r3
e48: 38 c4 00 04 addi r6,r4,4
e4c: 7d 43 53 78 mr r3,r10
e50: b0 e6 00 00 sth r7,0(r6)
e54: 7d 29 1b 78 or r9,r9,r3
e58: 38 e4 00 06 addi r7,r4,6
e5c: 7d 43 53 78 mr r3,r10
e60: 99 07 00 00 stb r8,0(r7)
e64: 7d 23 1b 78 or r3,r9,r3
e68: 38 84 00 08 addi r4,r4,8
e6c: 93 c4 00 00 stw r30,0(r4)
e70: 93 e4 00 04 stw r31,4(r4)
e74: 7c 63 53 78 or r3,r3,r10
e78: 83 c1 00 08 lwz r30,8(r1)
e7c: 83 e1 00 0c lwz r31,12(r1)
e80: 38 21 00 10 addi r1,r1,16
e84: 4e 80 00 20 blr

After the patch:

00000dbc <set_test_user>:
dbc: 39 40 00 00 li r10,0
dc0: 7d 49 53 78 mr r9,r10
dc4: 80 03 00 00 lwz r0,0(r3)
dc8: 7d 48 53 78 mr r8,r10
dcc: a1 63 00 04 lhz r11,4(r3)
dd0: 7d 29 43 78 or r9,r9,r8
dd4: 7d 48 53 78 mr r8,r10
dd8: 88 a3 00 06 lbz r5,6(r3)
ddc: 7d 29 43 78 or r9,r9,r8
de0: 7d 48 53 78 mr r8,r10
de4: 80 c3 00 08 lwz r6,8(r3)
de8: 80 e3 00 0c lwz r7,12(r3)
dec: 7d 29 43 78 or r9,r9,r8
df0: 7d 43 53 78 mr r3,r10
df4: 90 04 00 00 stw r0,0(r4)
df8: 7d 29 1b 78 or r9,r9,r3
dfc: 7d 43 53 78 mr r3,r10
e00: b1 64 00 04 sth r11,4(r4)
e04: 7d 29 1b 78 or r9,r9,r3
e08: 7d 43 53 78 mr r3,r10
e0c: 98 a4 00 06 stb r5,6(r4)
e10: 7d 23 1b 78 or r3,r9,r3
e14: 90 c4 00 08 stw r6,8(r4)
e18: 90 e4 00 0c stw r7,12(r4)
e1c: 7c 63 53 78 or r3,r3,r10
e20: 4e 80 00 20 blr

Signed-off-by: Christophe Leroy <[email protected]>
---
arch/powerpc/include/asm/uaccess.h | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 2f500debae21..dee71e9c7618 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -114,7 +114,7 @@ extern long __put_user_bad(void);
*/
#define __put_user_asm(x, addr, err, op) \
__asm__ __volatile__( \
- "1: " op " %1,0(%2) # put_user\n" \
+ "1: " op "%U2%X2 %1,%2 # put_user\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: li %0,%3\n" \
@@ -122,7 +122,7 @@ extern long __put_user_bad(void);
".previous\n" \
EX_TABLE(1b, 3b) \
: "=r" (err) \
- : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
+ : "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))

#ifdef __powerpc64__
#define __put_user_asm2(x, ptr, retval) \
@@ -130,8 +130,8 @@ extern long __put_user_bad(void);
#else /* __powerpc64__ */
#define __put_user_asm2(x, addr, err) \
__asm__ __volatile__( \
- "1: stw %1,0(%2)\n" \
- "2: stw %1+1,4(%2)\n" \
+ "1: stw%U2%X2 %1,%2\n" \
+ "2: stw%U2%X2 %L1,%L2\n" \
"3:\n" \
".section .fixup,\"ax\"\n" \
"4: li %0,%3\n" \
@@ -140,7 +140,7 @@ extern long __put_user_bad(void);
EX_TABLE(1b, 4b) \
EX_TABLE(2b, 4b) \
: "=r" (err) \
- : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
+ : "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
#endif /* __powerpc64__ */

#define __put_user_size_allowed(x, ptr, size, retval) \
@@ -217,7 +217,7 @@ extern long __get_user_bad(void);

#define __get_user_asm(x, addr, err, op) \
__asm__ __volatile__( \
- "1: "op" %1,0(%2) # get_user\n" \
+ "1: "op"%U2%X2 %1, %2 # get_user\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: li %0,%3\n" \
@@ -226,7 +226,7 @@ extern long __get_user_bad(void);
".previous\n" \
EX_TABLE(1b, 3b) \
: "=r" (err), "=r" (x) \
- : "b" (addr), "i" (-EFAULT), "0" (err))
+ : "m" (*addr), "i" (-EFAULT), "0" (err))

#ifdef __powerpc64__
#define __get_user_asm2(x, addr, err) \
@@ -234,8 +234,8 @@ extern long __get_user_bad(void);
#else /* __powerpc64__ */
#define __get_user_asm2(x, addr, err) \
__asm__ __volatile__( \
- "1: lwz %1,0(%2)\n" \
- "2: lwz %1+1,4(%2)\n" \
+ "1: lwz%U2%X2 %1, %2\n" \
+ "2: lwz%U2%X2 %L1, %L2\n" \
"3:\n" \
".section .fixup,\"ax\"\n" \
"4: li %0,%3\n" \
@@ -246,7 +246,7 @@ extern long __get_user_bad(void);
EX_TABLE(1b, 4b) \
EX_TABLE(2b, 4b) \
: "=r" (err), "=&r" (x) \
- : "b" (addr), "i" (-EFAULT), "0" (err))
+ : "m" (*addr), "i" (-EFAULT), "0" (err))
#endif /* __powerpc64__ */

#define __get_user_size_allowed(x, ptr, size, retval) \
@@ -256,10 +256,10 @@ do { \
if (size > sizeof(x)) \
(x) = __get_user_bad(); \
switch (size) { \
- case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \
- case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \
- case 4: __get_user_asm(x, ptr, retval, "lwz"); break; \
- case 8: __get_user_asm2(x, ptr, retval); break; \
+ case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break; \
+ case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break; \
+ case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break; \
+ case 8: __get_user_asm2(x, (u64 __user *)ptr, retval); break; \
default: (x) = __get_user_bad(); \
} \
} while (0)
--
2.25.0


2020-04-16 01:13:55

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH] powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()

Hi!

On Wed, Apr 15, 2020 at 09:20:26AM +0000, Christophe Leroy wrote:
> At the time being, __put_user()/__get_user() and friends only use
> register indirect with immediate index addressing, with the index
> set to 0. Ex:
>
> lwz reg1, 0(reg2)

This is called a "D-form" instruction, or sometimes "offset addressing".
Don't talk about an "index", it confuses things, because the *other*
kind is called "indexed" already, also in the ISA docs! (X-form, aka
indexed addressing, [reg+reg], where D-form does [reg+imm], and both
forms can do [reg]).

> Give the compiler the opportunity to use other adressing modes
> whenever possible, to get more optimised code.

Great :-)

> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -114,7 +114,7 @@ extern long __put_user_bad(void);
> */
> #define __put_user_asm(x, addr, err, op) \
> __asm__ __volatile__( \
> - "1: " op " %1,0(%2) # put_user\n" \
> + "1: " op "%U2%X2 %1,%2 # put_user\n" \
> "2:\n" \
> ".section .fixup,\"ax\"\n" \
> "3: li %0,%3\n" \
> @@ -122,7 +122,7 @@ extern long __put_user_bad(void);
> ".previous\n" \
> EX_TABLE(1b, 3b) \
> : "=r" (err) \
> - : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
> + : "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))

%Un on an "m" operand doesn't do much: you need to make it "m<>" if you
want pre-modify ("update") insns to be generated. (You then will want
to make sure that operand is used in a way GCC can understand; since it
is used only once here, that works fine).

> @@ -130,8 +130,8 @@ extern long __put_user_bad(void);
> #else /* __powerpc64__ */
> #define __put_user_asm2(x, addr, err) \
> __asm__ __volatile__( \
> - "1: stw %1,0(%2)\n" \
> - "2: stw %1+1,4(%2)\n" \
> + "1: stw%U2%X2 %1,%2\n" \
> + "2: stw%U2%X2 %L1,%L2\n" \
> "3:\n" \
> ".section .fixup,\"ax\"\n" \
> "4: li %0,%3\n" \
> @@ -140,7 +140,7 @@ extern long __put_user_bad(void);
> EX_TABLE(1b, 4b) \
> EX_TABLE(2b, 4b) \
> : "=r" (err) \
> - : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
> + : "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))

Here, it doesn't work. You don't want two consecutive update insns in
any case. Easiest is to just not use "m<>", and then, don't use %Un
(which won't do anything, but it is confusing).

Same for the reads.

Rest looks fine, and update should be good with that fixed as said.

Reviewed-by: Segher Boessenkool <[email protected]>


Segher

2020-04-16 05:51:44

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH] powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()

Hi,

Le 16/04/2020 à 00:06, Segher Boessenkool a écrit :
> Hi!
>
> On Wed, Apr 15, 2020 at 09:20:26AM +0000, Christophe Leroy wrote:
>> At the time being, __put_user()/__get_user() and friends only use
>> register indirect with immediate index addressing, with the index
>> set to 0. Ex:
>>
>> lwz reg1, 0(reg2)
>
> This is called a "D-form" instruction, or sometimes "offset addressing".
> Don't talk about an "index", it confuses things, because the *other*
> kind is called "indexed" already, also in the ISA docs! (X-form, aka
> indexed addressing, [reg+reg], where D-form does [reg+imm], and both
> forms can do [reg]).

In the "Programming Environments Manual for 32-Bit Implementations of
the PowerPC™ Architecture", they list the following addressing modes:

Load and store operations have three categories of effective address
generation that depend on the
operands specified:
• Register indirect with immediate index mode
• Register indirect with index mode
• Register indirect mode


>
>> Give the compiler the opportunity to use other adressing modes
>> whenever possible, to get more optimised code.
>
> Great :-)
>
>> --- a/arch/powerpc/include/asm/uaccess.h
>> +++ b/arch/powerpc/include/asm/uaccess.h
>> @@ -114,7 +114,7 @@ extern long __put_user_bad(void);
>> */
>> #define __put_user_asm(x, addr, err, op) \
>> __asm__ __volatile__( \
>> - "1: " op " %1,0(%2) # put_user\n" \
>> + "1: " op "%U2%X2 %1,%2 # put_user\n" \
>> "2:\n" \
>> ".section .fixup,\"ax\"\n" \
>> "3: li %0,%3\n" \
>> @@ -122,7 +122,7 @@ extern long __put_user_bad(void);
>> ".previous\n" \
>> EX_TABLE(1b, 3b) \
>> : "=r" (err) \
>> - : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
>> + : "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
>
> %Un on an "m" operand doesn't do much: you need to make it "m<>" if you
> want pre-modify ("update") insns to be generated. (You then will want
> to make sure that operand is used in a way GCC can understand; since it
> is used only once here, that works fine).

Ah ? Indeed I got the idea from include/asm/io.h where there is:

#define DEF_MMIO_IN_D(name, size, insn) \
static inline u##size name(const volatile u##size __iomem *addr) \
{ \
u##size ret; \
__asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\
: "=r" (ret) : "m" (*addr) : "memory"); \
return ret; \
}

It should be "m<>" there as well ?

>
>> @@ -130,8 +130,8 @@ extern long __put_user_bad(void);
>> #else /* __powerpc64__ */
>> #define __put_user_asm2(x, addr, err) \
>> __asm__ __volatile__( \
>> - "1: stw %1,0(%2)\n" \
>> - "2: stw %1+1,4(%2)\n" \
>> + "1: stw%U2%X2 %1,%2\n" \
>> + "2: stw%U2%X2 %L1,%L2\n" \
>> "3:\n" \
>> ".section .fixup,\"ax\"\n" \
>> "4: li %0,%3\n" \
>> @@ -140,7 +140,7 @@ extern long __put_user_bad(void);
>> EX_TABLE(1b, 4b) \
>> EX_TABLE(2b, 4b) \
>> : "=r" (err) \
>> - : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
>> + : "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
>
> Here, it doesn't work. You don't want two consecutive update insns in
> any case. Easiest is to just not use "m<>", and then, don't use %Un
> (which won't do anything, but it is confusing).

Can't we leave the Un on the second stw ?

>
> Same for the reads.
>
> Rest looks fine, and update should be good with that fixed as said.
>
> Reviewed-by: Segher Boessenkool <[email protected]>
>
>
> Segher
>

Thanks for the review
Christophe

2020-04-17 01:43:23

by Segher Boessenkool

[permalink] [raw]
Subject: Re: [PATCH] powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()

Hi!

On Thu, Apr 16, 2020 at 07:50:00AM +0200, Christophe Leroy wrote:
> Le 16/04/2020 à 00:06, Segher Boessenkool a écrit :
> >On Wed, Apr 15, 2020 at 09:20:26AM +0000, Christophe Leroy wrote:
> >>At the time being, __put_user()/__get_user() and friends only use
> >>register indirect with immediate index addressing, with the index
> >>set to 0. Ex:
> >>
> >> lwz reg1, 0(reg2)
> >
> >This is called a "D-form" instruction, or sometimes "offset addressing".
> >Don't talk about an "index", it confuses things, because the *other*
> >kind is called "indexed" already, also in the ISA docs! (X-form, aka
> >indexed addressing, [reg+reg], where D-form does [reg+imm], and both
> >forms can do [reg]).
>
> In the "Programming Environments Manual for 32-Bit Implementations of
> the PowerPC™ Architecture", they list the following addressing modes:
>
> Load and store operations have three categories of effective address
> generation that depend on the
> operands specified:
> • Register indirect with immediate index mode
> • Register indirect with index mode
> • Register indirect mode

Huh. I must have pushed all that confusing terminology to the back of
my head :-)

> >%Un on an "m" operand doesn't do much: you need to make it "m<>" if you
> >want pre-modify ("update") insns to be generated. (You then will want
> >to make sure that operand is used in a way GCC can understand; since it
> >is used only once here, that works fine).
>
> Ah ? Indeed I got the idea from include/asm/io.h where there is:
>
> #define DEF_MMIO_IN_D(name, size, insn) \
> static inline u##size name(const volatile u##size __iomem *addr) \
> { \
> u##size ret; \
> __asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\
> : "=r" (ret) : "m" (*addr) : "memory"); \
> return ret; \
> }
>
> It should be "m<>" there as well ?

Yes, that will work here.

Long ago, "m" in inline assembler code did the same as "m<>" now does
(and "m" internal in GCC still does). To get a memory without pre-modify
addressing, you used "es".

Since people kept getting that wrong (it *is* surprising), it was changed
to the current scheme. But the kernel uses weren't updated (and no one
seems to have missed it).

> >> #else /* __powerpc64__ */
> >> #define __put_user_asm2(x, addr, err) \
> >> __asm__ __volatile__( \
> >>- "1: stw %1,0(%2)\n" \
> >>- "2: stw %1+1,4(%2)\n" \
> >>+ "1: stw%U2%X2 %1,%2\n" \
> >>+ "2: stw%U2%X2 %L1,%L2\n" \
> >> "3:\n" \
> >> ".section .fixup,\"ax\"\n" \
> >> "4: li %0,%3\n" \
> >>@@ -140,7 +140,7 @@ extern long __put_user_bad(void);
> >> EX_TABLE(1b, 4b) \
> >> EX_TABLE(2b, 4b) \
> >> : "=r" (err) \
> >>- : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
> >>+ : "r" (x), "m" (*addr), "i" (-EFAULT), "0" (err))
> >
> >Here, it doesn't work. You don't want two consecutive update insns in
> >any case. Easiest is to just not use "m<>", and then, don't use %Un
> >(which won't do anything, but it is confusing).
>
> Can't we leave the Un on the second stw ?

That cannot work. You can use it on only the *first* though :-)

And this doesn't work on LE I think? (But the asm doesn't anyway?)

Or, you can decide this is all way too tricky, and not worth it.


Segher