Output register is always RAX/EAX.
Signed-off-by: Alexey Dobriyan <[email protected]>
---
arch/x86/include/asm/arch_hweight.h | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -6,10 +6,8 @@
#ifdef CONFIG_64BIT
#define REG_IN "D"
-#define REG_OUT "a"
#else
#define REG_IN "a"
-#define REG_OUT "a"
#endif
static __always_inline unsigned int __arch_hweight32(unsigned int w)
@@ -17,7 +15,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w)
unsigned int res;
asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
+ : "=a" (res)
: REG_IN (w));
return res;
@@ -45,7 +43,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
unsigned long res;
asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
+ : "=a" (res)
: REG_IN (w));
return res;
On Sun, Jul 28, 2019 at 02:51:40PM +0300, Alexey Dobriyan wrote:
> Output register is always RAX/EAX.
>
> Signed-off-by: Alexey Dobriyan <[email protected]>
> ---
>
> arch/x86/include/asm/arch_hweight.h | 6 ++----
> 1 file changed, 2 insertions(+), 4 deletions(-)
>
> --- a/arch/x86/include/asm/arch_hweight.h
> +++ b/arch/x86/include/asm/arch_hweight.h
> @@ -6,10 +6,8 @@
>
> #ifdef CONFIG_64BIT
> #define REG_IN "D"
> -#define REG_OUT "a"
> #else
> #define REG_IN "a"
> -#define REG_OUT "a"
> #endif
>
> static __always_inline unsigned int __arch_hweight32(unsigned int w)
> @@ -17,7 +15,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w)
> unsigned int res;
>
> asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
> - : "="REG_OUT (res)
> + : "=a" (res)
> : REG_IN (w));
>
> return res;
> @@ -45,7 +43,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
> unsigned long res;
>
> asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
> - : "="REG_OUT (res)
> + : "=a" (res)
> : REG_IN (w));
>
> return res;
I _think_ something like the below should also work:
(fwiw _ASM_ARG 5 and 6 are broken, as are all the QLWB variants)
---
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..88704612b2f7 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -3,22 +3,15 @@
#define _ASM_X86_HWEIGHT_H
#include <asm/cpufeatures.h>
-
-#ifdef CONFIG_64BIT
-#define REG_IN "D"
-#define REG_OUT "a"
-#else
-#define REG_IN "a"
-#define REG_OUT "a"
-#endif
+#include <asm/asm.h>
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;
asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1 (w));
return res;
}
@@ -45,8 +38,8 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
unsigned long res;
asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1 (w));
return res;
}
On Mon, Jul 29, 2019 at 11:43:29AM +0200, Peter Zijlstra wrote:
> I _think_ something like the below should also work:
>
> (fwiw _ASM_ARG 5 and 6 are broken, as are all the QLWB variants)
Fixed that, because
> ---
> diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
> index ba88edd0d58b..88704612b2f7 100644
> --- a/arch/x86/include/asm/arch_hweight.h
> +++ b/arch/x86/include/asm/arch_hweight.h
> @@ -3,22 +3,15 @@
> #define _ASM_X86_HWEIGHT_H
>
> #include <asm/cpufeatures.h>
> -
> -#ifdef CONFIG_64BIT
> -#define REG_IN "D"
> -#define REG_OUT "a"
> -#else
> -#define REG_IN "a"
> -#define REG_OUT "a"
> -#endif
> +#include <asm/asm.h>
>
> static __always_inline unsigned int __arch_hweight32(unsigned int w)
> {
> unsigned int res;
>
> asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
> - : "="REG_OUT (res)
> - : REG_IN (w));
> + : "=a" (res)
> + : _ASM_ARG1 (w));
That needs _ASM_ARG1L because popcntl..
---
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..3cab7f382a43 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -3,22 +3,15 @@
#define _ASM_X86_HWEIGHT_H
#include <asm/cpufeatures.h>
-
-#ifdef CONFIG_64BIT
-#define REG_IN "D"
-#define REG_OUT "a"
-#else
-#define REG_IN "a"
-#define REG_OUT "a"
-#endif
+#include <asm/asm.h>
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;
asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1L (w));
return res;
}
@@ -45,8 +38,8 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
unsigned long res;
asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1 (w));
return res;
}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 3ff577c0b102..0bb0bbcd4551 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -53,17 +53,17 @@
#define _ASM_ARG2 _ASM_DX
#define _ASM_ARG3 _ASM_CX
-#define _ASM_ARG1L eax
-#define _ASM_ARG2L edx
-#define _ASM_ARG3L ecx
+#define _ASM_ARG1L _ASM_ARG1
+#define _ASM_ARG2L _ASM_ARG2
+#define _ASM_ARG3L _ASM_ARG3
-#define _ASM_ARG1W ax
-#define _ASM_ARG2W dx
-#define _ASM_ARG3W cx
+#define _ASM_ARG1W __ASM_FORM_RAW(ax)
+#define _ASM_ARG2W __ASM_FORM_RAW(dx)
+#define _ASM_ARG3W __ASM_FORM_RAW(cx)
-#define _ASM_ARG1B al
-#define _ASM_ARG2B dl
-#define _ASM_ARG3B cl
+#define _ASM_ARG1B __ASM_FORM_RAW(al)
+#define _ASM_ARG2B __ASM_FORM_RAW(dl)
+#define _ASM_ARG3B __ASM_FORM_RAW(cl)
#else
/* 64 bit */
@@ -72,36 +72,29 @@
#define _ASM_ARG2 _ASM_SI
#define _ASM_ARG3 _ASM_DX
#define _ASM_ARG4 _ASM_CX
-#define _ASM_ARG5 r8
-#define _ASM_ARG6 r9
-
-#define _ASM_ARG1Q rdi
-#define _ASM_ARG2Q rsi
-#define _ASM_ARG3Q rdx
-#define _ASM_ARG4Q rcx
-#define _ASM_ARG5Q r8
-#define _ASM_ARG6Q r9
-
-#define _ASM_ARG1L edi
-#define _ASM_ARG2L esi
-#define _ASM_ARG3L edx
-#define _ASM_ARG4L ecx
-#define _ASM_ARG5L r8d
-#define _ASM_ARG6L r9d
-
-#define _ASM_ARG1W di
-#define _ASM_ARG2W si
-#define _ASM_ARG3W dx
-#define _ASM_ARG4W cx
-#define _ASM_ARG5W r8w
-#define _ASM_ARG6W r9w
-
-#define _ASM_ARG1B dil
-#define _ASM_ARG2B sil
-#define _ASM_ARG3B dl
-#define _ASM_ARG4B cl
-#define _ASM_ARG5B r8b
-#define _ASM_ARG6B r9b
+#define _ASM_ARG5 __ASM_REG(8)
+#define _ASM_ARG6 __ASM_REG(9)
+
+#define _ASM_ARG1L __ASM_FORM_RAW(edi)
+#define _ASM_ARG2L __ASM_FORM_RAW(esi)
+#define _ASM_ARG3L __ASM_FORM_RAW(edx)
+#define _ASM_ARG4L __ASM_FORM_RAW(ecx)
+#define _ASM_ARG5L __ASM_FORM_RAW(r8d)
+#define _ASM_ARG6L __ASM_FORM_RAW(r9d)
+
+#define _ASM_ARG1W __ASM_FORM_RAW(di)
+#define _ASM_ARG2W __ASM_FORM_RAW(si)
+#define _ASM_ARG3W __ASM_FORM_RAW(dx)
+#define _ASM_ARG4W __ASM_FORM_RAW(cx)
+#define _ASM_ARG5W __ASM_FORM_RAW(r8w)
+#define _ASM_ARG6W __ASM_FORM_RAW(r9w)
+
+#define _ASM_ARG1B __ASM_FORM_RAW(dil)
+#define _ASM_ARG2B __ASM_FORM_RAW(sil)
+#define _ASM_ARG3B __ASM_FORM_RAW(dl)
+#define _ASM_ARG4B __ASM_FORM_RAW(cl)
+#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
+#define _ASM_ARG6B __ASM_FORM_RAW(r9b)
#endif
On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
> +#define _ASM_ARG1B __ASM_FORM_RAW(dil)
> +#define _ASM_ARG2B __ASM_FORM_RAW(sil)
> +#define _ASM_ARG3B __ASM_FORM_RAW(dl)
> +#define _ASM_ARG4B __ASM_FORM_RAW(cl)
> +#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
> +#define _ASM_ARG6B __ASM_FORM_RAW(r9b)
I preprocessed percpu code once to see what precisely it does because
it was easier than wading through forest of macroes.
Hopefully x86 assembly won't get to that level.
On Mon, Jul 29, 2019 at 11:44:17PM +0300, Alexey Dobriyan wrote:
> On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
> > +#define _ASM_ARG1B __ASM_FORM_RAW(dil)
> > +#define _ASM_ARG2B __ASM_FORM_RAW(sil)
> > +#define _ASM_ARG3B __ASM_FORM_RAW(dl)
> > +#define _ASM_ARG4B __ASM_FORM_RAW(cl)
> > +#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
> > +#define _ASM_ARG6B __ASM_FORM_RAW(r9b)
>
> I preprocessed percpu code once to see what precisely it does because
> it was easier than wading through forest of macroes.
Per cpu is easy, try reading the tracepoint code ;-)
On July 30, 2019 1:08:43 AM PDT, Peter Zijlstra <[email protected]> wrote:
>On Mon, Jul 29, 2019 at 11:44:17PM +0300, Alexey Dobriyan wrote:
>> On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
>> > +#define _ASM_ARG1B __ASM_FORM_RAW(dil)
>> > +#define _ASM_ARG2B __ASM_FORM_RAW(sil)
>> > +#define _ASM_ARG3B __ASM_FORM_RAW(dl)
>> > +#define _ASM_ARG4B __ASM_FORM_RAW(cl)
>> > +#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
>> > +#define _ASM_ARG6B __ASM_FORM_RAW(r9b)
>>
>> I preprocessed percpu code once to see what precisely it does because
>> it was easier than wading through forest of macroes.
>
>Per cpu is easy, try reading the tracepoint code ;-)
Sometimes it's the .s file one ends up wanting to check out...
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.