2021-05-07 22:13:42

by Arnd Bergmann

[permalink] [raw]
Subject: [RFC 06/12] asm-generic: unaligned: remove byteshift helpers

From: Arnd Bergmann <[email protected]>

In theory, compilers should be able to work this out themselves so we
can use a simpler version based on the swab() helpers.

I have verified that this works on all supported compiler versions
(gcc-4.9 and up, clang-10 and up). Looking at the object code produced by
gcc-11, I found that the impact is mostly a change in inlining decisions
that lead to slightly larger code.

In other cases, this version produces explicit byte swaps in place of
separate byte access, or comparing against pre-swapped constants.

While the source code is clearly simpler, I have not seen an indication
of the new version actually producing better code on Arm, so maybe
we want to skip this after all. From what I can tell, gcc recognizes
the byteswap pattern in the byteshift.h header and can turn it into
explicit instructions, but it does not turn a __builtin_bswap32() back
into individual bytes when that would result in better output, e.g.
when storing a byte-reversed constant.

Suggested-by: Linus Torvalds <[email protected]>
Signed-off-by: Arnd Bergmann <[email protected]>
---
I've included this patch in the series because Linus asked about
removing the byteshift version, but after trying it out, I'd
prefer to drop this and use the byteshift version for the
generic code as well.
---
arch/arm/include/asm/unaligned.h | 2 -
include/asm-generic/unaligned.h | 2 -
include/linux/unaligned/be_byteshift.h | 71 --------------------------
include/linux/unaligned/be_struct.h | 30 +++++++++++
include/linux/unaligned/le_byteshift.h | 71 --------------------------
include/linux/unaligned/le_struct.h | 30 +++++++++++
6 files changed, 60 insertions(+), 146 deletions(-)
delete mode 100644 include/linux/unaligned/be_byteshift.h
delete mode 100644 include/linux/unaligned/le_byteshift.h

diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h
index ab905ffcf193..3c5248fb4cdc 100644
--- a/arch/arm/include/asm/unaligned.h
+++ b/arch/arm/include/asm/unaligned.h
@@ -10,13 +10,11 @@

#if defined(__LITTLE_ENDIAN)
# include <linux/unaligned/le_struct.h>
-# include <linux/unaligned/be_byteshift.h>
# include <linux/unaligned/generic.h>
# define get_unaligned __get_unaligned_le
# define put_unaligned __put_unaligned_le
#elif defined(__BIG_ENDIAN)
# include <linux/unaligned/be_struct.h>
-# include <linux/unaligned/le_byteshift.h>
# include <linux/unaligned/generic.h>
# define get_unaligned __get_unaligned_be
# define put_unaligned __put_unaligned_be
diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h
index 374c940e9be1..d79df721ae60 100644
--- a/include/asm-generic/unaligned.h
+++ b/include/asm-generic/unaligned.h
@@ -16,7 +16,6 @@
#if defined(__LITTLE_ENDIAN)
# ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
# include <linux/unaligned/le_struct.h>
-# include <linux/unaligned/be_byteshift.h>
# endif
# include <linux/unaligned/generic.h>
# define get_unaligned __get_unaligned_le
@@ -24,7 +23,6 @@
#elif defined(__BIG_ENDIAN)
# ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
# include <linux/unaligned/be_struct.h>
-# include <linux/unaligned/le_byteshift.h>
# endif
# include <linux/unaligned/generic.h>
# define get_unaligned __get_unaligned_be
diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h
deleted file mode 100644
index c43ff5918c8a..000000000000
--- a/include/linux/unaligned/be_byteshift.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H
-#define _LINUX_UNALIGNED_BE_BYTESHIFT_H
-
-#include <linux/types.h>
-
-static inline u16 __get_unaligned_be16(const u8 *p)
-{
- return p[0] << 8 | p[1];
-}
-
-static inline u32 __get_unaligned_be32(const u8 *p)
-{
- return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
-}
-
-static inline u64 __get_unaligned_be64(const u8 *p)
-{
- return (u64)__get_unaligned_be32(p) << 32 |
- __get_unaligned_be32(p + 4);
-}
-
-static inline void __put_unaligned_be16(u16 val, u8 *p)
-{
- *p++ = val >> 8;
- *p++ = val;
-}
-
-static inline void __put_unaligned_be32(u32 val, u8 *p)
-{
- __put_unaligned_be16(val >> 16, p);
- __put_unaligned_be16(val, p + 2);
-}
-
-static inline void __put_unaligned_be64(u64 val, u8 *p)
-{
- __put_unaligned_be32(val >> 32, p);
- __put_unaligned_be32(val, p + 4);
-}
-
-static inline u16 get_unaligned_be16(const void *p)
-{
- return __get_unaligned_be16(p);
-}
-
-static inline u32 get_unaligned_be32(const void *p)
-{
- return __get_unaligned_be32(p);
-}
-
-static inline u64 get_unaligned_be64(const void *p)
-{
- return __get_unaligned_be64(p);
-}
-
-static inline void put_unaligned_be16(u16 val, void *p)
-{
- __put_unaligned_be16(val, p);
-}
-
-static inline void put_unaligned_be32(u32 val, void *p)
-{
- __put_unaligned_be32(val, p);
-}
-
-static inline void put_unaligned_be64(u64 val, void *p)
-{
- __put_unaligned_be64(val, p);
-}
-
-#endif /* _LINUX_UNALIGNED_BE_BYTESHIFT_H */
diff --git a/include/linux/unaligned/be_struct.h b/include/linux/unaligned/be_struct.h
index 15ea503a13fc..76d9fe297c33 100644
--- a/include/linux/unaligned/be_struct.h
+++ b/include/linux/unaligned/be_struct.h
@@ -34,4 +34,34 @@ static inline void put_unaligned_be64(u64 val, void *p)
__put_unaligned_cpu64(val, p);
}

+static inline u16 get_unaligned_le16(const void *p)
+{
+ return swab16(__get_unaligned_cpu16((const u8 *)p));
+}
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+ return swab32(__get_unaligned_cpu32((const u8 *)p));
+}
+
+static inline u64 get_unaligned_le64(const void *p)
+{
+ return swab64(__get_unaligned_cpu64((const u8 *)p));
+}
+
+static inline void put_unaligned_le16(u16 val, void *p)
+{
+ __put_unaligned_cpu16(swab16(val), p);
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+ __put_unaligned_cpu32(swab32(val), p);
+}
+
+static inline void put_unaligned_le64(u64 val, void *p)
+{
+ __put_unaligned_cpu64(swab64(val), p);
+}
+
#endif /* _LINUX_UNALIGNED_BE_STRUCT_H */
diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h
deleted file mode 100644
index 2248dcb0df76..000000000000
--- a/include/linux/unaligned/le_byteshift.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H
-#define _LINUX_UNALIGNED_LE_BYTESHIFT_H
-
-#include <linux/types.h>
-
-static inline u16 __get_unaligned_le16(const u8 *p)
-{
- return p[0] | p[1] << 8;
-}
-
-static inline u32 __get_unaligned_le32(const u8 *p)
-{
- return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
-}
-
-static inline u64 __get_unaligned_le64(const u8 *p)
-{
- return (u64)__get_unaligned_le32(p + 4) << 32 |
- __get_unaligned_le32(p);
-}
-
-static inline void __put_unaligned_le16(u16 val, u8 *p)
-{
- *p++ = val;
- *p++ = val >> 8;
-}
-
-static inline void __put_unaligned_le32(u32 val, u8 *p)
-{
- __put_unaligned_le16(val >> 16, p + 2);
- __put_unaligned_le16(val, p);
-}
-
-static inline void __put_unaligned_le64(u64 val, u8 *p)
-{
- __put_unaligned_le32(val >> 32, p + 4);
- __put_unaligned_le32(val, p);
-}
-
-static inline u16 get_unaligned_le16(const void *p)
-{
- return __get_unaligned_le16(p);
-}
-
-static inline u32 get_unaligned_le32(const void *p)
-{
- return __get_unaligned_le32(p);
-}
-
-static inline u64 get_unaligned_le64(const void *p)
-{
- return __get_unaligned_le64(p);
-}
-
-static inline void put_unaligned_le16(u16 val, void *p)
-{
- __put_unaligned_le16(val, p);
-}
-
-static inline void put_unaligned_le32(u32 val, void *p)
-{
- __put_unaligned_le32(val, p);
-}
-
-static inline void put_unaligned_le64(u64 val, void *p)
-{
- __put_unaligned_le64(val, p);
-}
-
-#endif /* _LINUX_UNALIGNED_LE_BYTESHIFT_H */
diff --git a/include/linux/unaligned/le_struct.h b/include/linux/unaligned/le_struct.h
index 9977987883a6..22f90a4afaa5 100644
--- a/include/linux/unaligned/le_struct.h
+++ b/include/linux/unaligned/le_struct.h
@@ -34,4 +34,34 @@ static inline void put_unaligned_le64(u64 val, void *p)
__put_unaligned_cpu64(val, p);
}

+static inline u16 get_unaligned_be16(const void *p)
+{
+ return swab16(__get_unaligned_cpu16((const u8 *)p));
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+ return swab32(__get_unaligned_cpu32((const u8 *)p));
+}
+
+static inline u64 get_unaligned_be64(const void *p)
+{
+ return swab64(__get_unaligned_cpu64((const u8 *)p));
+}
+
+static inline void put_unaligned_be16(u16 val, void *p)
+{
+ __put_unaligned_cpu16(swab16(val), p);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+ __put_unaligned_cpu32(swab32(val), p);
+}
+
+static inline void put_unaligned_be64(u64 val, void *p)
+{
+ __put_unaligned_cpu64(swab64(val), p);
+}
+
#endif /* _LINUX_UNALIGNED_LE_STRUCT_H */
--
2.29.2


2021-05-08 11:40:09

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [RFC 06/12] asm-generic: unaligned: remove byteshift helpers

On Sat, May 8, 2021 at 12:11 AM Arnd Bergmann <[email protected]> wrote:
>
> Suggested-by: Linus Torvalds <[email protected]>
> Signed-off-by: Arnd Bergmann <[email protected]>
> ---
> I've included this patch in the series because Linus asked about
> removing the byteshift version, but after trying it out, I'd
> prefer to drop this and use the byteshift version for the
> generic code as well.

Update: I've tried to create a small test case that illustrates the problem
with using the swab() based version, however all cases combinations
I tried showed either both producing identical output, or the shift
version being worse, see https://godbolt.org/z/bTdsjnjfT

I'll rewrite the patch description accordingly and leave this in place.

Arnd