2021-05-07 22:13:59

by Arnd Bergmann

[permalink] [raw]
Subject: [RFC 12/12] asm-generic: simplify asm/unaligned.h

From: Arnd Bergmann <[email protected]>

The get_unaligned()/put_unaligned() implementations are much more complex
than necessary, now that all architectures use the same code.

Move everything into one file and use a much more compact way to express
the same logic.

I've compared the binary output using gcc-11 across defconfig builds for
all architectures and found this patch to make no difference, except for
a single function on powerpc that needs two additional register moves
because of random differences in register allocation.

There are a handful of callers of the low-level __get_unaligned_cpu32,
so leave that in place for the time being even though the common code
no longer uses it.

This adds a warning for any caller of get_unaligned()/put_unaligned()
that passes in a single-byte pointer, but I've sent patches for all
instances that show up in x86 and randconfig builds. It would be nice
to change the arguments of the endian-specific accessors to take the
matching __be16/__be32/__be64/__le16/__le32/__le64 arguments instead of
a void pointer, but that requires more changes to the rest of the kernel.

Signed-off-by: Arnd Bergmann <[email protected]>
---
include/asm-generic/unaligned.h | 138 +++++++++++++++++++++++++---
include/linux/unaligned/be_struct.h | 67 --------------
include/linux/unaligned/generic.h | 115 -----------------------
include/linux/unaligned/le_struct.h | 67 --------------
4 files changed, 125 insertions(+), 262 deletions(-)
delete mode 100644 include/linux/unaligned/be_struct.h
delete mode 100644 include/linux/unaligned/generic.h
delete mode 100644 include/linux/unaligned/le_struct.h

diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h
index 36bf03aaa674..9ed952fba718 100644
--- a/include/asm-generic/unaligned.h
+++ b/include/asm-generic/unaligned.h
@@ -6,20 +6,132 @@
* This is the most generic implementation of unaligned accesses
* and should work almost anywhere.
*/
+#include <linux/unaligned/packed_struct.h>
#include <asm/byteorder.h>

-#if defined(__LITTLE_ENDIAN)
-# include <linux/unaligned/le_struct.h>
-# include <linux/unaligned/generic.h>
-# define get_unaligned __get_unaligned_le
-# define put_unaligned __put_unaligned_le
-#elif defined(__BIG_ENDIAN)
-# include <linux/unaligned/be_struct.h>
-# include <linux/unaligned/generic.h>
-# define get_unaligned __get_unaligned_be
-# define put_unaligned __put_unaligned_be
-#else
-# error need to define endianess
-#endif
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
+ __pptr->x; \
+})
+
+#define get_unaligned(ptr) ({ \
+ __auto_type __ptr = (ptr); \
+ __get_unaligned_t(typeof(*__ptr), __ptr); \
+})
+
+#define __put_unaligned_t(type, val, ptr) ({ \
+ struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
+ __pptr->x = (val); \
+})
+
+#define put_unaligned(val, ptr) do { \
+ __auto_type __ptr = (ptr); \
+ __put_unaligned_t(typeof(*__ptr), (val), __ptr); \
+} while (0)
+
+
+static inline u16 get_unaligned_le16(const void *p)
+{
+ return le16_to_cpu(__get_unaligned_t(__le16, p));
+}
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+ return le32_to_cpu(__get_unaligned_t(__le32, p));
+}
+
+static inline u64 get_unaligned_le64(const void *p)
+{
+ return le64_to_cpu(__get_unaligned_t(__le64, p));
+}
+
+static inline void put_unaligned_le16(u16 val, void *p)
+{
+ __put_unaligned_t(__le16, cpu_to_le16(val), p);
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+ __put_unaligned_t(__le32, cpu_to_le32(val), p);
+}
+
+static inline void put_unaligned_le64(u64 val, void *p)
+{
+ __put_unaligned_t(__le64, cpu_to_le64(val), p);
+}
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+ return be16_to_cpu(__get_unaligned_t(__be16, p));
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+ return be32_to_cpu(__get_unaligned_t(__be32, p));
+}
+
+static inline u64 get_unaligned_be64(const void *p)
+{
+ return be64_to_cpu(__get_unaligned_t(__be64, p));
+}
+
+static inline void put_unaligned_be16(u16 val, void *p)
+{
+ __put_unaligned_t(__be16, cpu_to_be16(val), p);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+ __put_unaligned_t(__be32, cpu_to_be32(val), p);
+}
+
+static inline void put_unaligned_be64(u64 val, void *p)
+{
+ __put_unaligned_t(__be64, cpu_to_be64(val), p);
+}
+
+static inline u32 __get_unaligned_be24(const u8 *p)
+{
+ return p[0] << 16 | p[1] << 8 | p[2];
+}
+
+static inline u32 get_unaligned_be24(const void *p)
+{
+ return __get_unaligned_be24(p);
+}
+
+static inline u32 __get_unaligned_le24(const u8 *p)
+{
+ return p[0] | p[1] << 8 | p[2] << 16;
+}
+
+static inline u32 get_unaligned_le24(const void *p)
+{
+ return __get_unaligned_le24(p);
+}
+
+static inline void __put_unaligned_be24(const u32 val, u8 *p)
+{
+ *p++ = val >> 16;
+ *p++ = val >> 8;
+ *p++ = val;
+}
+
+static inline void put_unaligned_be24(const u32 val, void *p)
+{
+ __put_unaligned_be24(val, p);
+}
+
+static inline void __put_unaligned_le24(const u32 val, u8 *p)
+{
+ *p++ = val;
+ *p++ = val >> 8;
+ *p++ = val >> 16;
+}
+
+static inline void put_unaligned_le24(const u32 val, void *p)
+{
+ __put_unaligned_le24(val, p);
+}

#endif /* __ASM_GENERIC_UNALIGNED_H */
diff --git a/include/linux/unaligned/be_struct.h b/include/linux/unaligned/be_struct.h
deleted file mode 100644
index 76d9fe297c33..000000000000
--- a/include/linux/unaligned/be_struct.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_UNALIGNED_BE_STRUCT_H
-#define _LINUX_UNALIGNED_BE_STRUCT_H
-
-#include <linux/unaligned/packed_struct.h>
-
-static inline u16 get_unaligned_be16(const void *p)
-{
- return __get_unaligned_cpu16((const u8 *)p);
-}
-
-static inline u32 get_unaligned_be32(const void *p)
-{
- return __get_unaligned_cpu32((const u8 *)p);
-}
-
-static inline u64 get_unaligned_be64(const void *p)
-{
- return __get_unaligned_cpu64((const u8 *)p);
-}
-
-static inline void put_unaligned_be16(u16 val, void *p)
-{
- __put_unaligned_cpu16(val, p);
-}
-
-static inline void put_unaligned_be32(u32 val, void *p)
-{
- __put_unaligned_cpu32(val, p);
-}
-
-static inline void put_unaligned_be64(u64 val, void *p)
-{
- __put_unaligned_cpu64(val, p);
-}
-
-static inline u16 get_unaligned_le16(const void *p)
-{
- return swab16(__get_unaligned_cpu16((const u8 *)p));
-}
-
-static inline u32 get_unaligned_le32(const void *p)
-{
- return swab32(__get_unaligned_cpu32((const u8 *)p));
-}
-
-static inline u64 get_unaligned_le64(const void *p)
-{
- return swab64(__get_unaligned_cpu64((const u8 *)p));
-}
-
-static inline void put_unaligned_le16(u16 val, void *p)
-{
- __put_unaligned_cpu16(swab16(val), p);
-}
-
-static inline void put_unaligned_le32(u32 val, void *p)
-{
- __put_unaligned_cpu32(swab32(val), p);
-}
-
-static inline void put_unaligned_le64(u64 val, void *p)
-{
- __put_unaligned_cpu64(swab64(val), p);
-}
-
-#endif /* _LINUX_UNALIGNED_BE_STRUCT_H */
diff --git a/include/linux/unaligned/generic.h b/include/linux/unaligned/generic.h
deleted file mode 100644
index 303289492859..000000000000
--- a/include/linux/unaligned/generic.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_UNALIGNED_GENERIC_H
-#define _LINUX_UNALIGNED_GENERIC_H
-
-#include <linux/types.h>
-
-/*
- * Cause a link-time error if we try an unaligned access other than
- * 1,2,4 or 8 bytes long
- */
-extern void __bad_unaligned_access_size(void);
-
-#define __get_unaligned_le(ptr) ((__force typeof(*(ptr)))({ \
- __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \
- __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)), \
- __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)), \
- __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)), \
- __bad_unaligned_access_size())))); \
- }))
-
-#define __get_unaligned_be(ptr) ((__force typeof(*(ptr)))({ \
- __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \
- __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)), \
- __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)), \
- __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)), \
- __bad_unaligned_access_size())))); \
- }))
-
-#define __put_unaligned_le(val, ptr) ({ \
- void *__gu_p = (ptr); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- *(u8 *)__gu_p = (__force u8)(val); \
- break; \
- case 2: \
- put_unaligned_le16((__force u16)(val), __gu_p); \
- break; \
- case 4: \
- put_unaligned_le32((__force u32)(val), __gu_p); \
- break; \
- case 8: \
- put_unaligned_le64((__force u64)(val), __gu_p); \
- break; \
- default: \
- __bad_unaligned_access_size(); \
- break; \
- } \
- (void)0; })
-
-#define __put_unaligned_be(val, ptr) ({ \
- void *__gu_p = (ptr); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- *(u8 *)__gu_p = (__force u8)(val); \
- break; \
- case 2: \
- put_unaligned_be16((__force u16)(val), __gu_p); \
- break; \
- case 4: \
- put_unaligned_be32((__force u32)(val), __gu_p); \
- break; \
- case 8: \
- put_unaligned_be64((__force u64)(val), __gu_p); \
- break; \
- default: \
- __bad_unaligned_access_size(); \
- break; \
- } \
- (void)0; })
-
-static inline u32 __get_unaligned_be24(const u8 *p)
-{
- return p[0] << 16 | p[1] << 8 | p[2];
-}
-
-static inline u32 get_unaligned_be24(const void *p)
-{
- return __get_unaligned_be24(p);
-}
-
-static inline u32 __get_unaligned_le24(const u8 *p)
-{
- return p[0] | p[1] << 8 | p[2] << 16;
-}
-
-static inline u32 get_unaligned_le24(const void *p)
-{
- return __get_unaligned_le24(p);
-}
-
-static inline void __put_unaligned_be24(const u32 val, u8 *p)
-{
- *p++ = val >> 16;
- *p++ = val >> 8;
- *p++ = val;
-}
-
-static inline void put_unaligned_be24(const u32 val, void *p)
-{
- __put_unaligned_be24(val, p);
-}
-
-static inline void __put_unaligned_le24(const u32 val, u8 *p)
-{
- *p++ = val;
- *p++ = val >> 8;
- *p++ = val >> 16;
-}
-
-static inline void put_unaligned_le24(const u32 val, void *p)
-{
- __put_unaligned_le24(val, p);
-}
-
-#endif /* _LINUX_UNALIGNED_GENERIC_H */
diff --git a/include/linux/unaligned/le_struct.h b/include/linux/unaligned/le_struct.h
deleted file mode 100644
index 22f90a4afaa5..000000000000
--- a/include/linux/unaligned/le_struct.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_UNALIGNED_LE_STRUCT_H
-#define _LINUX_UNALIGNED_LE_STRUCT_H
-
-#include <linux/unaligned/packed_struct.h>
-
-static inline u16 get_unaligned_le16(const void *p)
-{
- return __get_unaligned_cpu16((const u8 *)p);
-}
-
-static inline u32 get_unaligned_le32(const void *p)
-{
- return __get_unaligned_cpu32((const u8 *)p);
-}
-
-static inline u64 get_unaligned_le64(const void *p)
-{
- return __get_unaligned_cpu64((const u8 *)p);
-}
-
-static inline void put_unaligned_le16(u16 val, void *p)
-{
- __put_unaligned_cpu16(val, p);
-}
-
-static inline void put_unaligned_le32(u32 val, void *p)
-{
- __put_unaligned_cpu32(val, p);
-}
-
-static inline void put_unaligned_le64(u64 val, void *p)
-{
- __put_unaligned_cpu64(val, p);
-}
-
-static inline u16 get_unaligned_be16(const void *p)
-{
- return swab16(__get_unaligned_cpu16((const u8 *)p));
-}
-
-static inline u32 get_unaligned_be32(const void *p)
-{
- return swab32(__get_unaligned_cpu32((const u8 *)p));
-}
-
-static inline u64 get_unaligned_be64(const void *p)
-{
- return swab64(__get_unaligned_cpu64((const u8 *)p));
-}
-
-static inline void put_unaligned_be16(u16 val, void *p)
-{
- __put_unaligned_cpu16(swab16(val), p);
-}
-
-static inline void put_unaligned_be32(u32 val, void *p)
-{
- __put_unaligned_cpu32(swab32(val), p);
-}
-
-static inline void put_unaligned_be64(u64 val, void *p)
-{
- __put_unaligned_cpu64(swab64(val), p);
-}
-
-#endif /* _LINUX_UNALIGNED_LE_STRUCT_H */
--
2.29.2


2021-05-07 23:56:27

by Linus Torvalds

[permalink] [raw]
Subject: Re: [RFC 12/12] asm-generic: simplify asm/unaligned.h

On Fri, May 7, 2021 at 3:12 PM Arnd Bergmann <[email protected]> wrote:
>
> From: Arnd Bergmann <[email protected]>
>
> The get_unaligned()/put_unaligned() implementations are much more complex
> than necessary, now that all architectures use the same code.

Thanks for doing this, it looks good to me.

I suspect it's still slightly unnecessarily complicated - why is that
get_unaligned() not just

#define get_unaligned(ptr) \
__get_unaligned_t(typeof(*__ptr), __ptr)

Because I'm not seeing the reason for doing that "__auto_type __ptr"
thing - the argument to a "typeof()" isn't actually evaluated.

Maybe I'm just nit-picking, this certainly is a huge improvement regardless.

Linus

2021-05-08 09:31:20

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [RFC 12/12] asm-generic: simplify asm/unaligned.h

On Sat, May 8, 2021 at 1:54 AM Linus Torvalds
<[email protected]> wrote:
> On Fri, May 7, 2021 at 3:12 PM Arnd Bergmann <[email protected]> wrote:
> >
> > From: Arnd Bergmann <[email protected]>
> >
> > The get_unaligned()/put_unaligned() implementations are much more complex
> > than necessary, now that all architectures use the same code.
>
> Thanks for doing this, it looks good to me.
>
> I suspect it's still slightly unnecessarily complicated - why is that
> get_unaligned() not just
>
> #define get_unaligned(ptr) \
> __get_unaligned_t(typeof(*__ptr), __ptr)
>
> Because I'm not seeing the reason for doing that "__auto_type __ptr"
> thing - the argument to a "typeof()" isn't actually evaluated.

Both versions are equally correct, I picked the __auto_type version
because this tends to produce smaller preprocessor output when you have
multiple layers of nested macros with 'ptr' expanding to something
complicated, and the get_unaligned() itself being expanded multiple
times again.

When I recently experimented with possible changes to cmpxchg() and
get_user(), it had a measurable impact on compile time with clang on
those macros.

get_unaligned() doesn't appear to be used much in nested macros
at all, so it probably won't actually help here, and I can just do the
simpler version instead.

I forgot to mention in the changelog that this version does not actually
require the argument to be a scalar, not sure if this is something
we want or not. It does allow developers to write something like

__be32 get_ip_saddr(struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
return get_unaligned(iph).saddr;
}

and get the expected result. While this seems handy, it also makes it
harder to change the macro back to one that only works on scalars
after such usage becomes widespread.

Arnd

2021-05-08 11:05:37

by David Laight

[permalink] [raw]
Subject: RE: [RFC 12/12] asm-generic: simplify asm/unaligned.h

From: Arnd Bergmann
> Sent: 07 May 2021 23:08
>
> The get_unaligned()/put_unaligned() implementations are much more complex
> than necessary, now that all architectures use the same code.
>
...
> +#define __get_unaligned_t(type, ptr) ({ \
> + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
> + __pptr->x; \
> +})

I thought gcc was likely to track through the alignment of the
variable holding the source pointer (through any (void *) casts
implied by inlined function calls) through to the pointer used
for the actual access - so it would tend to issue a single
instruction that assumed an aligned address.

I know that has caused grief trying to copy unaligned data
to an aligned structure.

Possibly adding:
asm ("" :: "+r" (__pptr)) );
in the middle stops that assumption without generating any code.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2021-05-08 14:21:39

by David Laight

[permalink] [raw]
Subject: RE: [RFC 12/12] asm-generic: simplify asm/unaligned.h

From: David Laight
> Sent: 08 May 2021 12:03
>
> From: Arnd Bergmann
> > Sent: 07 May 2021 23:08
> >
> > The get_unaligned()/put_unaligned() implementations are much more complex
> > than necessary, now that all architectures use the same code.
> >
> ...
> > +#define __get_unaligned_t(type, ptr) ({ \
> > + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
> > + __pptr->x; \
> > +})
>
> I thought gcc was likely to track through the alignment of the
> variable holding the source pointer (through any (void *) casts
> implied by inlined function calls) through to the pointer used
> for the actual access - so it would tend to issue a single
> instruction that assumed an aligned address.
>
> I know that has caused grief trying to copy unaligned data
> to an aligned structure.
>
> Possibly adding:
> asm ("" :: "+r" (__pptr)) );
> in the middle stops that assumption without generating any code.

That is the wrong asm.
You need the one where an input operand and output operand
share the same register and use it for the assignment.

I've been trying to get godbolt to do something useful.
But it seems to be stuck in C++ mode and is missing something like
sparc which definitely doesn't do misaligned accesses.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2021-05-08 15:25:20

by Linus Torvalds

[permalink] [raw]
Subject: Re: [RFC 12/12] asm-generic: simplify asm/unaligned.h

On Sat, May 8, 2021 at 2:29 AM Arnd Bergmann <[email protected]> wrote:
>
> On Sat, May 8, 2021 at 1:54 AM Linus Torvalds
> <[email protected]> wrote:
> >
> > #define get_unaligned(ptr) \
> > __get_unaligned_t(typeof(*__ptr), __ptr)

That's missing a set of parentheses around that __ptr thing ('*__ptr'
should be '*(__ptr)'), btw, so don't use that as-is.

> Both versions are equally correct, I picked the __auto_type version
> because this tends to produce smaller preprocessor output when you have
> multiple layers of nested macros with 'ptr' expanding to something
> complicated,

Ahh.

Yeah, that's probably not a problem for get_unaligned(), but it might
happen in other situations.

Linus

2021-05-10 06:41:38

by Geert Uytterhoeven

[permalink] [raw]
Subject: Re: [RFC 12/12] asm-generic: simplify asm/unaligned.h

Hi Arnd,

On Sat, May 8, 2021 at 12:12 AM Arnd Bergmann <[email protected]> wrote:
> The get_unaligned()/put_unaligned() implementations are much more complex
> than necessary, now that all architectures use the same code.
>
> Move everything into one file and use a much more compact way to express
> the same logic.
>
> I've compared the binary output using gcc-11 across defconfig builds for
> all architectures and found this patch to make no difference, except for
> a single function on powerpc that needs two additional register moves
> because of random differences in register allocation.
>
> There are a handful of callers of the low-level __get_unaligned_cpu32,
> so leave that in place for the time being even though the common code
> no longer uses it.
>
> This adds a warning for any caller of get_unaligned()/put_unaligned()
> that passes in a single-byte pointer, but I've sent patches for all
> instances that show up in x86 and randconfig builds. It would be nice
> to change the arguments of the endian-specific accessors to take the
> matching __be16/__be32/__be64/__le16/__le32/__le64 arguments instead of
> a void pointer, but that requires more changes to the rest of the kernel.
>
> Signed-off-by: Arnd Bergmann <[email protected]>

Thanks for your patch!

> @@ -6,20 +6,132 @@
> * This is the most generic implementation of unaligned accesses
> * and should work almost anywhere.
> */
> +#include <linux/unaligned/packed_struct.h>
> #include <asm/byteorder.h>
>
> -#if defined(__LITTLE_ENDIAN)
> -# include <linux/unaligned/le_struct.h>
> -# include <linux/unaligned/generic.h>
> -# define get_unaligned __get_unaligned_le
> -# define put_unaligned __put_unaligned_le
> -#elif defined(__BIG_ENDIAN)
> -# include <linux/unaligned/be_struct.h>
> -# include <linux/unaligned/generic.h>
> -# define get_unaligned __get_unaligned_be
> -# define put_unaligned __put_unaligned_be
> -#else
> -# error need to define endianess
> -#endif
> +#define __get_unaligned_t(type, ptr) ({ \
> + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
> + __pptr->x; \

Space before tab (cfr. checkpatch).

> +})
> +
> +#define get_unaligned(ptr) ({ \
> + __auto_type __ptr = (ptr); \
> + __get_unaligned_t(typeof(*__ptr), __ptr); \
> +})
> +
> +#define __put_unaligned_t(type, val, ptr) ({ \
> + struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
> + __pptr->x = (val); \

Likewise

> +})
>
Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds