2024-03-14 16:57:46

by Alexey Dobriyan

[permalink] [raw]
Subject: [PATCH 1/1] x86_64: cleanup memset16(), memset32(), memset64()

* use "+" constraint modifier,
simplify inputs and output lists,
delete dummy variables with meaningless names,

"&" only makes sense in complex assembly creating constraints on
intermediate registers. But 1 instruction assemblies don't have
inner body so to speak.

* write "rep stos*" on one line:
Rep prefix is integral part of x86 instruction. I'm not sure why
people separate "rep" with newline or semicolon pretending rep is
separate instruction. It is not and never was.

* use __auto_type for maximum copy pasta experience,

* reformat a bit to make everything looks nicer.

Note that "memory" clobber is too much if "n" is known at compile time.
However,

"=m" (*(T(*)[n])s)

doesn't work because -Wvla even if "n" is compile time constant:

if (BCP(n)) {
rep stos
: "=m" (*(T(*)[n])s)
} else {
rep stosw
: "memory"
}

The above doesn't work.

Signed-off-by: Alexey Dobriyan <[email protected]>
---
arch/x86/include/asm/string_64.h | 45 +++++++++++++++++---------------
1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 857d364b9888..9d0b324eab21 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -30,37 +30,40 @@ void *__memset(void *s, int c, size_t n);
#define __HAVE_ARCH_MEMSET16
static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosw"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosw"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}

#define __HAVE_ARCH_MEMSET32
static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosl"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosl"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}

#define __HAVE_ARCH_MEMSET64
static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosq"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosq"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}
#endif

--
2.43.2



2024-03-22 10:37:54

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 1/1] x86_64: cleanup memset16(), memset32(), memset64()

On Fri, Mar 22, 2024 at 11:05 AM Alexey Dobriyan <[email protected]> wrote:
>
> * use "+" constraint modifier,
> simplify inputs and output lists,
> delete dummy variables with meaningless names,
>
> "&" only makes sense in complex assembly creating constraints on
> intermediate registers. But 1 instruction assemblies don't have
> inner body so to speak.
>
> * write "rep stos*" on one line:
> Rep prefix is integral part of x86 instruction. I'm not sure why
> people separate "rep" with newline or semicolon pretending rep is
> separate instruction. It is not and never was.

The reason for this was that some archaic assemblers rejected "rep
insn" on one line. I have checked that the minimum required
binutils-2.25 assembles this without problems.

> * use __auto_type for maximum copy pasta experience,
>
> * reformat a bit to make everything looks nicer.
>
> Note that "memory" clobber is too much if "n" is known at compile time.
> However,
>
> "=m" (*(T(*)[n])s)
>
> doesn't work because -Wvla even if "n" is compile time constant:
>
> if (BCP(n)) {
> rep stos
> : "=m" (*(T(*)[n])s)
> } else {
> rep stosw
> : "memory"
> }
>
> The above doesn't work.
>
> Signed-off-by: Alexey Dobriyan <[email protected]>

Reviewed-by: Uros Bizjak <[email protected]>

> ---
> arch/x86/include/asm/string_64.h | 45 +++++++++++++++++---------------
> 1 file changed, 24 insertions(+), 21 deletions(-)
>
> diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
> index 857d364b9888..9d0b324eab21 100644
> --- a/arch/x86/include/asm/string_64.h
> +++ b/arch/x86/include/asm/string_64.h
> @@ -30,37 +30,40 @@ void *__memset(void *s, int c, size_t n);
> #define __HAVE_ARCH_MEMSET16
> static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
> {
> - long d0, d1;
> - asm volatile("rep\n\t"
> - "stosw"
> - : "=&c" (d0), "=&D" (d1)
> - : "a" (v), "1" (s), "0" (n)
> - : "memory");
> - return s;
> + const __auto_type s0 = s;
> + asm volatile (
> + "rep stosw"
> + : "+D" (s), "+c" (n)
> + : "a" (v)
> + : "memory"
> + );
> + return s0;
> }
>
> #define __HAVE_ARCH_MEMSET32
> static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
> {
> - long d0, d1;
> - asm volatile("rep\n\t"
> - "stosl"
> - : "=&c" (d0), "=&D" (d1)
> - : "a" (v), "1" (s), "0" (n)
> - : "memory");
> - return s;
> + const __auto_type s0 = s;
> + asm volatile (
> + "rep stosl"
> + : "+D" (s), "+c" (n)
> + : "a" (v)
> + : "memory"
> + );
> + return s0;
> }
>
> #define __HAVE_ARCH_MEMSET64
> static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
> {
> - long d0, d1;
> - asm volatile("rep\n\t"
> - "stosq"
> - : "=&c" (d0), "=&D" (d1)
> - : "a" (v), "1" (s), "0" (n)
> - : "memory");
> - return s;
> + const __auto_type s0 = s;
> + asm volatile (
> + "rep stosq"
> + : "+D" (s), "+c" (n)
> + : "a" (v)
> + : "memory"
> + );
> + return s0;
> }
> #endif
>
> --
> 2.43.2
>

2024-03-22 11:08:07

by tip-bot2 for Jacob Pan

[permalink] [raw]
Subject: [tip: x86/asm] x86/asm/64: Clean up memset16(), memset32(), memset64() assembly constraints in <asm/string_64.h>

The following commit has been merged into the x86/asm branch of tip:

Commit-ID: 4c9a93800121e90484cd07c8e5bde70e31cdb996
Gitweb: https://git.kernel.org/tip/4c9a93800121e90484cd07c8e5bde70e31cdb996
Author: Alexey Dobriyan <[email protected]>
AuthorDate: Thu, 14 Mar 2024 19:57:15 +03:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Fri, 22 Mar 2024 11:47:34 +01:00

x86/asm/64: Clean up memset16(), memset32(), memset64() assembly constraints in <asm/string_64.h>

- Use "+" constraint modifier,
simplify inputs and output lists,
delete dummy variables with meaningless names,

"&" only makes sense in complex assembly creating constraints on
intermediate registers. But 1 instruction assemblies don't have
inner body so to speak.

- Write "rep stos*" on one line:
Rep prefix is integral part of x86 instruction. I'm not sure why
people separate "rep" with newline.

Uros Bizjak adds context: "some archaic assemblers rejected 'rep
insn' on one line. I have checked that the minimum required
binutils-2.25 assembles this without problems."

- Use __auto_type for maximum copy pasta experience,

- Reformat a bit to make everything looks nicer.

Note that "memory" clobber is too much if "n" is known at compile time.
However,

"=m" (*(T(*)[n])s)

doesn't work because -Wvla even if "n" is compile time constant:

if (BCP(n)) {
rep stos
: "=m" (*(T(*)[n])s)
} else {
rep stosw
: "memory"
}

The above doesn't work.

Signed-off-by: Alexey Dobriyan <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Reviewed-by: Uros Bizjak <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/include/asm/string_64.h | 45 ++++++++++++++++---------------
1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 857d364..9d0b324 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -30,37 +30,40 @@ void *__memset(void *s, int c, size_t n);
#define __HAVE_ARCH_MEMSET16
static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosw"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosw"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}

#define __HAVE_ARCH_MEMSET32
static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosl"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosl"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}

#define __HAVE_ARCH_MEMSET64
static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosq"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosq"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}
#endif