2019-07-28 11:52:37

by Alexey Dobriyan

[permalink] [raw]
Subject: [PATCH] x86: drop REG_OUT macro from hweight functions

Output register is always RAX/EAX.

Signed-off-by: Alexey Dobriyan <[email protected]>
---

arch/x86/include/asm/arch_hweight.h | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)

--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -6,10 +6,8 @@

#ifdef CONFIG_64BIT
#define REG_IN "D"
-#define REG_OUT "a"
#else
#define REG_IN "a"
-#define REG_OUT "a"
#endif

static __always_inline unsigned int __arch_hweight32(unsigned int w)
@@ -17,7 +15,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w)
unsigned int res;

asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
+ : "=a" (res)
: REG_IN (w));

return res;
@@ -45,7 +43,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
unsigned long res;

asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
+ : "=a" (res)
: REG_IN (w));

return res;


2019-07-29 09:46:46

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] x86: drop REG_OUT macro from hweight functions

On Sun, Jul 28, 2019 at 02:51:40PM +0300, Alexey Dobriyan wrote:
> Output register is always RAX/EAX.
>
> Signed-off-by: Alexey Dobriyan <[email protected]>
> ---
>
> arch/x86/include/asm/arch_hweight.h | 6 ++----
> 1 file changed, 2 insertions(+), 4 deletions(-)
>
> --- a/arch/x86/include/asm/arch_hweight.h
> +++ b/arch/x86/include/asm/arch_hweight.h
> @@ -6,10 +6,8 @@
>
> #ifdef CONFIG_64BIT
> #define REG_IN "D"
> -#define REG_OUT "a"
> #else
> #define REG_IN "a"
> -#define REG_OUT "a"
> #endif
>
> static __always_inline unsigned int __arch_hweight32(unsigned int w)
> @@ -17,7 +15,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w)
> unsigned int res;
>
> asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
> - : "="REG_OUT (res)
> + : "=a" (res)
> : REG_IN (w));
>
> return res;
> @@ -45,7 +43,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
> unsigned long res;
>
> asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
> - : "="REG_OUT (res)
> + : "=a" (res)
> : REG_IN (w));
>
> return res;

I _think_ something like the below should also work:

(fwiw _ASM_ARG 5 and 6 are broken, as are all the QLWB variants)

---
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..88704612b2f7 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -3,22 +3,15 @@
#define _ASM_X86_HWEIGHT_H

#include <asm/cpufeatures.h>
-
-#ifdef CONFIG_64BIT
-#define REG_IN "D"
-#define REG_OUT "a"
-#else
-#define REG_IN "a"
-#define REG_OUT "a"
-#endif
+#include <asm/asm.h>

static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;

asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1 (w));

return res;
}
@@ -45,8 +38,8 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
unsigned long res;

asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1 (w));

return res;
}

2019-07-29 10:07:23

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] x86: drop REG_OUT macro from hweight functions

On Mon, Jul 29, 2019 at 11:43:29AM +0200, Peter Zijlstra wrote:

> I _think_ something like the below should also work:
>
> (fwiw _ASM_ARG 5 and 6 are broken, as are all the QLWB variants)

Fixed that, because

> ---
> diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
> index ba88edd0d58b..88704612b2f7 100644
> --- a/arch/x86/include/asm/arch_hweight.h
> +++ b/arch/x86/include/asm/arch_hweight.h
> @@ -3,22 +3,15 @@
> #define _ASM_X86_HWEIGHT_H
>
> #include <asm/cpufeatures.h>
> -
> -#ifdef CONFIG_64BIT
> -#define REG_IN "D"
> -#define REG_OUT "a"
> -#else
> -#define REG_IN "a"
> -#define REG_OUT "a"
> -#endif
> +#include <asm/asm.h>
>
> static __always_inline unsigned int __arch_hweight32(unsigned int w)
> {
> unsigned int res;
>
> asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
> - : "="REG_OUT (res)
> - : REG_IN (w));
> + : "=a" (res)
> + : _ASM_ARG1 (w));

That needs _ASM_ARG1L because popcntl..

---
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..3cab7f382a43 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -3,22 +3,15 @@
#define _ASM_X86_HWEIGHT_H

#include <asm/cpufeatures.h>
-
-#ifdef CONFIG_64BIT
-#define REG_IN "D"
-#define REG_OUT "a"
-#else
-#define REG_IN "a"
-#define REG_OUT "a"
-#endif
+#include <asm/asm.h>

static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;

asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1L (w));

return res;
}
@@ -45,8 +38,8 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
unsigned long res;

asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1 (w));

return res;
}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 3ff577c0b102..0bb0bbcd4551 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -53,17 +53,17 @@
#define _ASM_ARG2 _ASM_DX
#define _ASM_ARG3 _ASM_CX

-#define _ASM_ARG1L eax
-#define _ASM_ARG2L edx
-#define _ASM_ARG3L ecx
+#define _ASM_ARG1L _ASM_ARG1
+#define _ASM_ARG2L _ASM_ARG2
+#define _ASM_ARG3L _ASM_ARG3

-#define _ASM_ARG1W ax
-#define _ASM_ARG2W dx
-#define _ASM_ARG3W cx
+#define _ASM_ARG1W __ASM_FORM_RAW(ax)
+#define _ASM_ARG2W __ASM_FORM_RAW(dx)
+#define _ASM_ARG3W __ASM_FORM_RAW(cx)

-#define _ASM_ARG1B al
-#define _ASM_ARG2B dl
-#define _ASM_ARG3B cl
+#define _ASM_ARG1B __ASM_FORM_RAW(al)
+#define _ASM_ARG2B __ASM_FORM_RAW(dl)
+#define _ASM_ARG3B __ASM_FORM_RAW(cl)

#else
/* 64 bit */
@@ -72,36 +72,29 @@
#define _ASM_ARG2 _ASM_SI
#define _ASM_ARG3 _ASM_DX
#define _ASM_ARG4 _ASM_CX
-#define _ASM_ARG5 r8
-#define _ASM_ARG6 r9
-
-#define _ASM_ARG1Q rdi
-#define _ASM_ARG2Q rsi
-#define _ASM_ARG3Q rdx
-#define _ASM_ARG4Q rcx
-#define _ASM_ARG5Q r8
-#define _ASM_ARG6Q r9
-
-#define _ASM_ARG1L edi
-#define _ASM_ARG2L esi
-#define _ASM_ARG3L edx
-#define _ASM_ARG4L ecx
-#define _ASM_ARG5L r8d
-#define _ASM_ARG6L r9d
-
-#define _ASM_ARG1W di
-#define _ASM_ARG2W si
-#define _ASM_ARG3W dx
-#define _ASM_ARG4W cx
-#define _ASM_ARG5W r8w
-#define _ASM_ARG6W r9w
-
-#define _ASM_ARG1B dil
-#define _ASM_ARG2B sil
-#define _ASM_ARG3B dl
-#define _ASM_ARG4B cl
-#define _ASM_ARG5B r8b
-#define _ASM_ARG6B r9b
+#define _ASM_ARG5 __ASM_REG(8)
+#define _ASM_ARG6 __ASM_REG(9)
+
+#define _ASM_ARG1L __ASM_FORM_RAW(edi)
+#define _ASM_ARG2L __ASM_FORM_RAW(esi)
+#define _ASM_ARG3L __ASM_FORM_RAW(edx)
+#define _ASM_ARG4L __ASM_FORM_RAW(ecx)
+#define _ASM_ARG5L __ASM_FORM_RAW(r8d)
+#define _ASM_ARG6L __ASM_FORM_RAW(r9d)
+
+#define _ASM_ARG1W __ASM_FORM_RAW(di)
+#define _ASM_ARG2W __ASM_FORM_RAW(si)
+#define _ASM_ARG3W __ASM_FORM_RAW(dx)
+#define _ASM_ARG4W __ASM_FORM_RAW(cx)
+#define _ASM_ARG5W __ASM_FORM_RAW(r8w)
+#define _ASM_ARG6W __ASM_FORM_RAW(r9w)
+
+#define _ASM_ARG1B __ASM_FORM_RAW(dil)
+#define _ASM_ARG2B __ASM_FORM_RAW(sil)
+#define _ASM_ARG3B __ASM_FORM_RAW(dl)
+#define _ASM_ARG4B __ASM_FORM_RAW(cl)
+#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
+#define _ASM_ARG6B __ASM_FORM_RAW(r9b)

#endif

2019-07-29 20:45:34

by Alexey Dobriyan

[permalink] [raw]
Subject: Re: [PATCH] x86: drop REG_OUT macro from hweight functions

On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
> +#define _ASM_ARG1B __ASM_FORM_RAW(dil)
> +#define _ASM_ARG2B __ASM_FORM_RAW(sil)
> +#define _ASM_ARG3B __ASM_FORM_RAW(dl)
> +#define _ASM_ARG4B __ASM_FORM_RAW(cl)
> +#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
> +#define _ASM_ARG6B __ASM_FORM_RAW(r9b)

I preprocessed percpu code once to see what precisely it does because
it was easier than wading through forest of macroes.

Hopefully x86 assembly won't get to that level.

2019-07-30 11:07:37

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] x86: drop REG_OUT macro from hweight functions

On Mon, Jul 29, 2019 at 11:44:17PM +0300, Alexey Dobriyan wrote:
> On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
> > +#define _ASM_ARG1B __ASM_FORM_RAW(dil)
> > +#define _ASM_ARG2B __ASM_FORM_RAW(sil)
> > +#define _ASM_ARG3B __ASM_FORM_RAW(dl)
> > +#define _ASM_ARG4B __ASM_FORM_RAW(cl)
> > +#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
> > +#define _ASM_ARG6B __ASM_FORM_RAW(r9b)
>
> I preprocessed percpu code once to see what precisely it does because
> it was easier than wading through forest of macroes.

Per cpu is easy, try reading the tracepoint code ;-)

2019-07-30 16:17:58

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH] x86: drop REG_OUT macro from hweight functions

On July 30, 2019 1:08:43 AM PDT, Peter Zijlstra <[email protected]> wrote:
>On Mon, Jul 29, 2019 at 11:44:17PM +0300, Alexey Dobriyan wrote:
>> On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
>> > +#define _ASM_ARG1B __ASM_FORM_RAW(dil)
>> > +#define _ASM_ARG2B __ASM_FORM_RAW(sil)
>> > +#define _ASM_ARG3B __ASM_FORM_RAW(dl)
>> > +#define _ASM_ARG4B __ASM_FORM_RAW(cl)
>> > +#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
>> > +#define _ASM_ARG6B __ASM_FORM_RAW(r9b)
>>
>> I preprocessed percpu code once to see what precisely it does because
>> it was easier than wading through forest of macroes.
>
>Per cpu is easy, try reading the tracepoint code ;-)

Sometimes it's the .s file one ends up wanting to check out...
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.