2024-02-16 07:33:29

by Guenter Roeck

[permalink] [raw]
Subject: [PATCH] parisc/unaligned: Rewrite 64-bit inline assembly of emulate_ldd()

Convert to use real temp variables instead of clobbering processor
registers. This aligns the 64-bit inline assembly code with the 32-bit
assembly code which was rewritten with commit 427c1073a2a1
("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()").

While at it, fix comment in 32-bit rewrite code. Temporary variables are
now used for both 32-bit and 64-bit code, so move their declarations
to the function header.

No functional change intended.

Signed-off-by: Guenter Roeck <[email protected]>
---
Implemented while analyzing a bug. I am not really sure of it is worth
the effort, but I figured that I might as well submit it.

arch/parisc/kernel/unaligned.c | 29 +++++++++++++----------------
1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index c520e551a165..622c7b549fb8 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
{
unsigned long saddr = regs->ior;
- __u64 val = 0;
+ unsigned long shift;
+ __u64 val = 0, temp1;
ASM_EXCEPTIONTABLE_VAR(ret);

DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n",
@@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)

#ifdef CONFIG_64BIT
__asm__ __volatile__ (
-" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */
-" mtsp %4, %%sr1\n"
-" depd %%r0,63,3,%3\n"
-"1: ldd 0(%%sr1,%3),%0\n"
-"2: ldd 8(%%sr1,%3),%%r20\n"
-" subi 64,%%r19,%%r19\n"
-" mtsar %%r19\n"
-" shrpd %0,%%r20,%%sar,%0\n"
+" depd,z %4,60,3,%2\n" /* shift=(ofs&7)*8 */
+" mtsp %5, %%sr1\n"
+" depd %%r0,63,3,%4\n"
+"1: ldd 0(%%sr1,%4),%0\n"
+"2: ldd 8(%%sr1,%4),%3\n"
+" subi 64,%2,%2\n"
+" mtsar %2\n"
+" shrpd %0,%3,%%sar,%0\n"
"3: \n"
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
- : "=r" (val), "+r" (ret)
- : "0" (val), "r" (saddr), "r" (regs->isr)
- : "r19", "r20" );
+ : "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1)
+ : "r" (saddr), "r" (regs->isr) );
#else
- {
- unsigned long shift, temp1;
__asm__ __volatile__ (
-" zdep %2,29,2,%3\n" /* r19=(ofs&3)*8 */
+" zdep %2,29,2,%3\n" /* shift=(ofs&3)*8 */
" mtsp %5, %%sr1\n"
" dep %%r0,31,2,%2\n"
"1: ldw 0(%%sr1,%2),%0\n"
@@ -214,7 +212,6 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1")
: "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
: "r" (regs->isr) );
- }
#endif

DPRINTF("val = 0x%llx\n", val);
--
2.39.2



2024-02-16 13:35:36

by Helge Deller

[permalink] [raw]
Subject: Re: [PATCH] parisc/unaligned: Rewrite 64-bit inline assembly of emulate_ldd()

On 2/16/24 08:33, Guenter Roeck wrote:
> Convert to use real temp variables instead of clobbering processor
> registers.

Thanks for doing this.
It was on my todo list since quite some time :-)

> This aligns the 64-bit inline assembly code with the 32-bit
> assembly code which was rewritten with commit 427c1073a2a1
> ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()").
>
> While at it, fix comment in 32-bit rewrite code. Temporary variables are
> now used for both 32-bit and 64-bit code, so move their declarations
> to the function header.
>
> No functional change intended.
>
> Signed-off-by: Guenter Roeck <[email protected]>
> ---
> Implemented while analyzing a bug. I am not really sure of it is worth
> the effort, but I figured that I might as well submit it.
>
> arch/parisc/kernel/unaligned.c | 29 +++++++++++++----------------
> 1 file changed, 13 insertions(+), 16 deletions(-)
>
> diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
> index c520e551a165..622c7b549fb8 100644
> --- a/arch/parisc/kernel/unaligned.c
> +++ b/arch/parisc/kernel/unaligned.c
> @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
> static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
> {
> unsigned long saddr = regs->ior;
> - __u64 val = 0;
> + unsigned long shift;
> + __u64 val = 0, temp1;

temp1 is ok to be "long".

> ASM_EXCEPTIONTABLE_VAR(ret);
>
> DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n",
> @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>
> #ifdef CONFIG_64BIT
> __asm__ __volatile__ (
> -" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */
> -" mtsp %4, %%sr1\n"
> -" depd %%r0,63,3,%3\n"
> -"1: ldd 0(%%sr1,%3),%0\n"
> -"2: ldd 8(%%sr1,%3),%%r20\n"
> -" subi 64,%%r19,%%r19\n"
> -" mtsar %%r19\n"
> -" shrpd %0,%%r20,%%sar,%0\n"
> +" depd,z %4,60,3,%2\n" /* shift=(ofs&7)*8 */
> +" mtsp %5, %%sr1\n"
> +" depd %%r0,63,3,%4\n"
> +"1: ldd 0(%%sr1,%4),%0\n"
> +"2: ldd 8(%%sr1,%4),%3\n"
> +" subi 64,%2,%2\n"
> +" mtsar %2\n"
> +" shrpd %0,%3,%%sar,%0\n"
> "3: \n"
> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
> - : "=r" (val), "+r" (ret)
> - : "0" (val), "r" (saddr), "r" (regs->isr)
> - : "r19", "r20" );
> + : "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1)
> + : "r" (saddr), "r" (regs->isr) );

addr is actually being modified.
That's why I moved it into the output registers and
shuffled shift and temp1 one backwards, so that the registers
are now in the same ordering as on the 32-bit path.

I've pushed the modified patch here:
https://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git/commit/?h=for-next&id=a6ea53ce77e9dd6e388d673bdd4d80741f97b914

Please double-check!

Thanks!
Helge


> #else
> - {
> - unsigned long shift, temp1;
> __asm__ __volatile__ (
> -" zdep %2,29,2,%3\n" /* r19=(ofs&3)*8 */
> +" zdep %2,29,2,%3\n" /* shift=(ofs&3)*8 */
> " mtsp %5, %%sr1\n"
> " dep %%r0,31,2,%2\n"
> "1: ldw 0(%%sr1,%2),%0\n"
> @@ -214,7 +212,6 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1")
> : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
> : "r" (regs->isr) );
> - }
> #endif
>
> DPRINTF("val = 0x%llx\n", val);


2024-02-16 15:16:20

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH] parisc/unaligned: Rewrite 64-bit inline assembly of emulate_ldd()

On 2/16/24 05:48, Helge Deller wrote:
> On 2/16/24 08:33, Guenter Roeck wrote:
>> Convert to use real temp variables instead of clobbering processor
>> registers.
>
> Thanks for doing this.
> It was on my todo list since quite some time :-)
>
>> This aligns the 64-bit inline assembly code with the 32-bit
>> assembly code which was rewritten with commit 427c1073a2a1
>> ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()").
>>
>> While at it, fix comment in 32-bit rewrite code. Temporary variables are
>> now used for both 32-bit and 64-bit code, so move their declarations
>> to the function header.
>>
>> No functional change intended.
>>
>> Signed-off-by: Guenter Roeck <[email protected]>
>> ---
>> Implemented while analyzing a bug. I am not really sure of it is worth
>> the effort, but I figured that I might as well submit it.
>>
>>   arch/parisc/kernel/unaligned.c | 29 +++++++++++++----------------
>>   1 file changed, 13 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
>> index c520e551a165..622c7b549fb8 100644
>> --- a/arch/parisc/kernel/unaligned.c
>> +++ b/arch/parisc/kernel/unaligned.c
>> @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
>>   static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>>   {
>>       unsigned long saddr = regs->ior;
>> -    __u64 val = 0;
>> +    unsigned long shift;
>> +    __u64 val = 0, temp1;
>
> temp1 is ok to be "long".
>
>>       ASM_EXCEPTIONTABLE_VAR(ret);
>>
>>       DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n",
>> @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>>
>>   #ifdef CONFIG_64BIT
>>       __asm__ __volatile__  (
>> -"    depd,z    %3,60,3,%%r19\n"        /* r19=(ofs&7)*8 */
>> -"    mtsp    %4, %%sr1\n"
>> -"    depd    %%r0,63,3,%3\n"
>> -"1:    ldd    0(%%sr1,%3),%0\n"
>> -"2:    ldd    8(%%sr1,%3),%%r20\n"
>> -"    subi    64,%%r19,%%r19\n"
>> -"    mtsar    %%r19\n"
>> -"    shrpd    %0,%%r20,%%sar,%0\n"
>> +"    depd,z    %4,60,3,%2\n"        /* shift=(ofs&7)*8 */
>> +"    mtsp    %5, %%sr1\n"
>> +"    depd    %%r0,63,3,%4\n"
>> +"1:    ldd    0(%%sr1,%4),%0\n"
>> +"2:    ldd    8(%%sr1,%4),%3\n"
>> +"    subi    64,%2,%2\n"
>> +"    mtsar    %2\n"
>> +"    shrpd    %0,%3,%%sar,%0\n"
>>   "3:    \n"
>>       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
>>       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
>> -    : "=r" (val), "+r" (ret)
>> -    : "0" (val), "r" (saddr), "r" (regs->isr)
>> -    : "r19", "r20" );
>> +    : "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1)
>> +    : "r" (saddr), "r" (regs->isr) );
>
> addr is actually being modified.
> That's why I moved it into the output registers and
> shuffled shift and temp1 one backwards, so that the registers
> are now in the same ordering as on the 32-bit path.
>
> I've pushed the modified patch here:
> https://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git/commit/?h=for-next&id=a6ea53ce77e9dd6e388d673bdd4d80741f97b914
>
> Please double-check!
>
Confirmed working.

Thanks,
Guenter

> Thanks!
> Helge
>
>
>>   #else
>> -    {
>> -    unsigned long shift, temp1;
>>       __asm__ __volatile__  (
>> -"    zdep    %2,29,2,%3\n"        /* r19=(ofs&3)*8 */
>> +"    zdep    %2,29,2,%3\n"        /* shift=(ofs&3)*8 */
>>   "    mtsp    %5, %%sr1\n"
>>   "    dep    %%r0,31,2,%2\n"
>>   "1:    ldw    0(%%sr1,%2),%0\n"
>> @@ -214,7 +212,6 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>>       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1")
>>       : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
>>       : "r" (regs->isr) );
>> -    }
>>   #endif
>>
>>       DPRINTF("val = 0x%llx\n", val);
>


2024-02-26 19:54:27

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH] parisc/unaligned: Rewrite 64-bit inline assembly of emulate_ldd()

Hi Helge,

On Thu, Feb 15, 2024 at 11:33:15PM -0800, Guenter Roeck wrote:
> Convert to use real temp variables instead of clobbering processor
> registers. This aligns the 64-bit inline assembly code with the 32-bit
> assembly code which was rewritten with commit 427c1073a2a1
> ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()").
>
> While at it, fix comment in 32-bit rewrite code. Temporary variables are
> now used for both 32-bit and 64-bit code, so move their declarations
> to the function header.
>
> No functional change intended.
>
> Signed-off-by: Guenter Roeck <[email protected]>
> ---
> Implemented while analyzing a bug. I am not really sure of it is worth
> the effort, but I figured that I might as well submit it.
>
> arch/parisc/kernel/unaligned.c | 29 +++++++++++++----------------
> 1 file changed, 13 insertions(+), 16 deletions(-)
>
> diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
> index c520e551a165..622c7b549fb8 100644
> --- a/arch/parisc/kernel/unaligned.c
> +++ b/arch/parisc/kernel/unaligned.c
> @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
> static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
> {
> unsigned long saddr = regs->ior;
> - __u64 val = 0;
> + unsigned long shift;
> + __u64 val = 0, temp1;
> ASM_EXCEPTIONTABLE_VAR(ret);
>
> DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n",
> @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>
> #ifdef CONFIG_64BIT
> __asm__ __volatile__ (
> -" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */
> -" mtsp %4, %%sr1\n"
> -" depd %%r0,63,3,%3\n"
> -"1: ldd 0(%%sr1,%3),%0\n"
> -"2: ldd 8(%%sr1,%3),%%r20\n"
> -" subi 64,%%r19,%%r19\n"
> -" mtsar %%r19\n"
> -" shrpd %0,%%r20,%%sar,%0\n"
> +" depd,z %4,60,3,%2\n" /* shift=(ofs&7)*8 */
> +" mtsp %5, %%sr1\n"
> +" depd %%r0,63,3,%4\n"
> +"1: ldd 0(%%sr1,%4),%0\n"
> +"2: ldd 8(%%sr1,%4),%3\n"
> +" subi 64,%2,%2\n"
> +" mtsar %2\n"
> +" shrpd %0,%3,%%sar,%0\n"
> "3: \n"
> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
> - : "=r" (val), "+r" (ret)
> - : "0" (val), "r" (saddr), "r" (regs->isr)
> - : "r19", "r20" );
> + : "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1)
> + : "r" (saddr), "r" (regs->isr) );

It looks like something went wrong when this patch was applied. It is now

+" depd,z %4,60,3,%3\n" /* shift=(ofs&7)*8 */
..
+ : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
+ : "r" (regs->isr) );

meaning saddr is now %2, but the depd,z instruction
still assumes it is %4. Unfortunately this results in a crash
when trying to boot linux-next on parisc64.

The patch below on top of linux-next fixes the problem for me.

Guenter

---
iff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 31974eddedc9..a8e75e5b884a 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -181,7 +181,7 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)

#ifdef CONFIG_64BIT
__asm__ __volatile__ (
-" depd,z %4,60,3,%3\n" /* shift=(ofs&7)*8 */
+" depd,z %2,60,3,%3\n" /* shift=(ofs&7)*8 */
" mtsp %5, %%sr1\n"
" depd %%r0,63,3,%2\n"
"1: ldd 0(%%sr1,%2),%0\n"

2024-02-26 20:18:29

by Helge Deller

[permalink] [raw]
Subject: Re: [PATCH] parisc/unaligned: Rewrite 64-bit inline assembly of emulate_ldd()

On 2/26/24 20:29, Guenter Roeck wrote:
> Hi Helge,
>
> On Thu, Feb 15, 2024 at 11:33:15PM -0800, Guenter Roeck wrote:
>> Convert to use real temp variables instead of clobbering processor
>> registers. This aligns the 64-bit inline assembly code with the 32-bit
>> assembly code which was rewritten with commit 427c1073a2a1
>> ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()").
>>
>> While at it, fix comment in 32-bit rewrite code. Temporary variables are
>> now used for both 32-bit and 64-bit code, so move their declarations
>> to the function header.
>>
>> No functional change intended.
>>
>> Signed-off-by: Guenter Roeck <[email protected]>
>> ---
>> Implemented while analyzing a bug. I am not really sure of it is worth
>> the effort, but I figured that I might as well submit it.
>>
>> arch/parisc/kernel/unaligned.c | 29 +++++++++++++----------------
>> 1 file changed, 13 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
>> index c520e551a165..622c7b549fb8 100644
>> --- a/arch/parisc/kernel/unaligned.c
>> +++ b/arch/parisc/kernel/unaligned.c
>> @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
>> static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>> {
>> unsigned long saddr = regs->ior;
>> - __u64 val = 0;
>> + unsigned long shift;
>> + __u64 val = 0, temp1;
>> ASM_EXCEPTIONTABLE_VAR(ret);
>>
>> DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n",
>> @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>>
>> #ifdef CONFIG_64BIT
>> __asm__ __volatile__ (
>> -" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */
>> -" mtsp %4, %%sr1\n"
>> -" depd %%r0,63,3,%3\n"
>> -"1: ldd 0(%%sr1,%3),%0\n"
>> -"2: ldd 8(%%sr1,%3),%%r20\n"
>> -" subi 64,%%r19,%%r19\n"
>> -" mtsar %%r19\n"
>> -" shrpd %0,%%r20,%%sar,%0\n"
>> +" depd,z %4,60,3,%2\n" /* shift=(ofs&7)*8 */
>> +" mtsp %5, %%sr1\n"
>> +" depd %%r0,63,3,%4\n"
>> +"1: ldd 0(%%sr1,%4),%0\n"
>> +"2: ldd 8(%%sr1,%4),%3\n"
>> +" subi 64,%2,%2\n"
>> +" mtsar %2\n"
>> +" shrpd %0,%3,%%sar,%0\n"
>> "3: \n"
>> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
>> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
>> - : "=r" (val), "+r" (ret)
>> - : "0" (val), "r" (saddr), "r" (regs->isr)
>> - : "r19", "r20" );
>> + : "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1)
>> + : "r" (saddr), "r" (regs->isr) );
>
> It looks like something went wrong when this patch was applied.

I think this was my fault when I tried to reshuffle the input vars :-(

> It is now
>
> +" depd,z %4,60,3,%3\n" /* shift=(ofs&7)*8 */
> ...
> + : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
> + : "r" (regs->isr) );
>
> meaning saddr is now %2, but the depd,z instruction
> still assumes it is %4. Unfortunately this results in a crash
> when trying to boot linux-next on parisc64.
>
> The patch below on top of linux-next fixes the problem for me.

I fixed it up with your hunk below in the parisc for-next branch, so it should be
fixed in linux-next soon.

THANKS!

Helge



> Guenter
>
> ---
> iff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
> index 31974eddedc9..a8e75e5b884a 100644
> --- a/arch/parisc/kernel/unaligned.c
> +++ b/arch/parisc/kernel/unaligned.c
> @@ -181,7 +181,7 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>
> #ifdef CONFIG_64BIT
> __asm__ __volatile__ (
> -" depd,z %4,60,3,%3\n" /* shift=(ofs&7)*8 */
> +" depd,z %2,60,3,%3\n" /* shift=(ofs&7)*8 */
> " mtsp %5, %%sr1\n"
> " depd %%r0,63,3,%2\n"
> "1: ldd 0(%%sr1,%2),%0\n"